{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.998615597600369, "eval_steps": 500, "global_step": 5415, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005537609598523304, "grad_norm": 6.014165878295898, "learning_rate": 1.845018450184502e-08, "loss": 0.8713, "step": 1 }, { "epoch": 0.0011075219197046607, "grad_norm": 5.897510528564453, "learning_rate": 3.690036900369004e-08, "loss": 0.854, "step": 2 }, { "epoch": 0.0016612828795569913, "grad_norm": 5.930586338043213, "learning_rate": 5.5350553505535055e-08, "loss": 0.846, "step": 3 }, { "epoch": 0.0022150438394093214, "grad_norm": 6.160415172576904, "learning_rate": 7.380073800738008e-08, "loss": 0.8932, "step": 4 }, { "epoch": 0.0027688047992616522, "grad_norm": 5.772947311401367, "learning_rate": 9.22509225092251e-08, "loss": 0.8431, "step": 5 }, { "epoch": 0.0033225657591139826, "grad_norm": 6.1169753074646, "learning_rate": 1.1070110701107011e-07, "loss": 0.8839, "step": 6 }, { "epoch": 0.003876326718966313, "grad_norm": 6.122945785522461, "learning_rate": 1.2915129151291515e-07, "loss": 0.8815, "step": 7 }, { "epoch": 0.004430087678818643, "grad_norm": 6.044448375701904, "learning_rate": 1.4760147601476016e-07, "loss": 0.874, "step": 8 }, { "epoch": 0.004983848638670974, "grad_norm": 6.412347793579102, "learning_rate": 1.6605166051660518e-07, "loss": 0.9231, "step": 9 }, { "epoch": 0.0055376095985233045, "grad_norm": 5.997819423675537, "learning_rate": 1.845018450184502e-07, "loss": 0.8484, "step": 10 }, { "epoch": 0.006091370558375634, "grad_norm": 5.760068416595459, "learning_rate": 2.029520295202952e-07, "loss": 0.853, "step": 11 }, { "epoch": 0.006645131518227965, "grad_norm": 5.706671237945557, "learning_rate": 2.2140221402214022e-07, "loss": 0.8651, "step": 12 }, { "epoch": 0.007198892478080295, "grad_norm": 5.957831382751465, "learning_rate": 2.3985239852398526e-07, "loss": 0.8969, "step": 13 }, { "epoch": 0.007752653437932626, "grad_norm": 5.79080867767334, "learning_rate": 2.583025830258303e-07, "loss": 0.8537, "step": 14 }, { "epoch": 0.008306414397784956, "grad_norm": 5.811881065368652, "learning_rate": 2.767527675276753e-07, "loss": 0.882, "step": 15 }, { "epoch": 0.008860175357637286, "grad_norm": 5.7313923835754395, "learning_rate": 2.9520295202952033e-07, "loss": 0.871, "step": 16 }, { "epoch": 0.009413936317489617, "grad_norm": 5.502678394317627, "learning_rate": 3.136531365313653e-07, "loss": 0.8633, "step": 17 }, { "epoch": 0.009967697277341947, "grad_norm": 5.560994625091553, "learning_rate": 3.3210332103321035e-07, "loss": 0.8545, "step": 18 }, { "epoch": 0.010521458237194277, "grad_norm": 5.673508644104004, "learning_rate": 3.5055350553505534e-07, "loss": 0.8947, "step": 19 }, { "epoch": 0.011075219197046609, "grad_norm": 5.430346965789795, "learning_rate": 3.690036900369004e-07, "loss": 0.8717, "step": 20 }, { "epoch": 0.011628980156898939, "grad_norm": 5.326581001281738, "learning_rate": 3.874538745387454e-07, "loss": 0.8536, "step": 21 }, { "epoch": 0.012182741116751269, "grad_norm": 4.630413055419922, "learning_rate": 4.059040590405904e-07, "loss": 0.8057, "step": 22 }, { "epoch": 0.012736502076603599, "grad_norm": 4.3171539306640625, "learning_rate": 4.2435424354243545e-07, "loss": 0.7965, "step": 23 }, { "epoch": 0.01329026303645593, "grad_norm": 4.290299415588379, "learning_rate": 4.4280442804428044e-07, "loss": 0.8122, "step": 24 }, { "epoch": 0.01384402399630826, "grad_norm": 4.088802337646484, "learning_rate": 4.612546125461255e-07, "loss": 0.8019, "step": 25 }, { "epoch": 0.01439778495616059, "grad_norm": 4.310701370239258, "learning_rate": 4.797047970479705e-07, "loss": 0.784, "step": 26 }, { "epoch": 0.014951545916012922, "grad_norm": 4.261911869049072, "learning_rate": 4.981549815498156e-07, "loss": 0.8578, "step": 27 }, { "epoch": 0.015505306875865252, "grad_norm": 4.060521125793457, "learning_rate": 5.166051660516606e-07, "loss": 0.798, "step": 28 }, { "epoch": 0.01605906783571758, "grad_norm": 4.0544867515563965, "learning_rate": 5.350553505535055e-07, "loss": 0.8157, "step": 29 }, { "epoch": 0.01661282879556991, "grad_norm": 3.7097115516662598, "learning_rate": 5.535055350553506e-07, "loss": 0.8143, "step": 30 }, { "epoch": 0.01716658975542224, "grad_norm": 2.2734148502349854, "learning_rate": 5.719557195571956e-07, "loss": 0.7409, "step": 31 }, { "epoch": 0.01772035071527457, "grad_norm": 2.1705515384674072, "learning_rate": 5.904059040590407e-07, "loss": 0.7887, "step": 32 }, { "epoch": 0.018274111675126905, "grad_norm": 2.2246546745300293, "learning_rate": 6.088560885608857e-07, "loss": 0.7777, "step": 33 }, { "epoch": 0.018827872634979235, "grad_norm": 2.0905911922454834, "learning_rate": 6.273062730627306e-07, "loss": 0.7835, "step": 34 }, { "epoch": 0.019381633594831565, "grad_norm": 2.107123374938965, "learning_rate": 6.457564575645757e-07, "loss": 0.7691, "step": 35 }, { "epoch": 0.019935394554683895, "grad_norm": 2.001885414123535, "learning_rate": 6.642066420664207e-07, "loss": 0.7705, "step": 36 }, { "epoch": 0.020489155514536225, "grad_norm": 1.9182335138320923, "learning_rate": 6.826568265682657e-07, "loss": 0.716, "step": 37 }, { "epoch": 0.021042916474388555, "grad_norm": 1.967488408088684, "learning_rate": 7.011070110701107e-07, "loss": 0.7769, "step": 38 }, { "epoch": 0.021596677434240884, "grad_norm": 1.6966832876205444, "learning_rate": 7.195571955719557e-07, "loss": 0.7487, "step": 39 }, { "epoch": 0.022150438394093218, "grad_norm": 1.5496914386749268, "learning_rate": 7.380073800738008e-07, "loss": 0.7325, "step": 40 }, { "epoch": 0.022704199353945548, "grad_norm": 1.3778611421585083, "learning_rate": 7.564575645756458e-07, "loss": 0.7488, "step": 41 }, { "epoch": 0.023257960313797878, "grad_norm": 1.44842529296875, "learning_rate": 7.749077490774908e-07, "loss": 0.751, "step": 42 }, { "epoch": 0.023811721273650208, "grad_norm": 1.6650052070617676, "learning_rate": 7.933579335793358e-07, "loss": 0.7274, "step": 43 }, { "epoch": 0.024365482233502538, "grad_norm": 1.9042081832885742, "learning_rate": 8.118081180811808e-07, "loss": 0.7461, "step": 44 }, { "epoch": 0.024919243193354867, "grad_norm": 1.9027445316314697, "learning_rate": 8.302583025830259e-07, "loss": 0.7651, "step": 45 }, { "epoch": 0.025473004153207197, "grad_norm": 1.9083220958709717, "learning_rate": 8.487084870848709e-07, "loss": 0.7048, "step": 46 }, { "epoch": 0.02602676511305953, "grad_norm": 1.8612231016159058, "learning_rate": 8.671586715867159e-07, "loss": 0.7399, "step": 47 }, { "epoch": 0.02658052607291186, "grad_norm": 1.7182652950286865, "learning_rate": 8.856088560885609e-07, "loss": 0.7263, "step": 48 }, { "epoch": 0.02713428703276419, "grad_norm": 1.5920425653457642, "learning_rate": 9.040590405904059e-07, "loss": 0.7224, "step": 49 }, { "epoch": 0.02768804799261652, "grad_norm": 1.3176703453063965, "learning_rate": 9.22509225092251e-07, "loss": 0.6946, "step": 50 }, { "epoch": 0.02824180895246885, "grad_norm": 1.1884641647338867, "learning_rate": 9.40959409594096e-07, "loss": 0.6827, "step": 51 }, { "epoch": 0.02879556991232118, "grad_norm": 0.981661856174469, "learning_rate": 9.59409594095941e-07, "loss": 0.6775, "step": 52 }, { "epoch": 0.02934933087217351, "grad_norm": 0.8595092296600342, "learning_rate": 9.77859778597786e-07, "loss": 0.6763, "step": 53 }, { "epoch": 0.029903091832025844, "grad_norm": 0.8848995566368103, "learning_rate": 9.963099630996311e-07, "loss": 0.7397, "step": 54 }, { "epoch": 0.030456852791878174, "grad_norm": 0.8736474514007568, "learning_rate": 1.0147601476014762e-06, "loss": 0.6848, "step": 55 }, { "epoch": 0.031010613751730504, "grad_norm": 1.0269657373428345, "learning_rate": 1.0332103321033212e-06, "loss": 0.6997, "step": 56 }, { "epoch": 0.031564374711582834, "grad_norm": 0.9646909236907959, "learning_rate": 1.0516605166051662e-06, "loss": 0.675, "step": 57 }, { "epoch": 0.03211813567143516, "grad_norm": 0.9945862889289856, "learning_rate": 1.070110701107011e-06, "loss": 0.6631, "step": 58 }, { "epoch": 0.03267189663128749, "grad_norm": 0.9375670552253723, "learning_rate": 1.088560885608856e-06, "loss": 0.7203, "step": 59 }, { "epoch": 0.03322565759113982, "grad_norm": 0.8322911262512207, "learning_rate": 1.1070110701107011e-06, "loss": 0.6647, "step": 60 }, { "epoch": 0.03377941855099215, "grad_norm": 0.7018800973892212, "learning_rate": 1.1254612546125462e-06, "loss": 0.6661, "step": 61 }, { "epoch": 0.03433317951084448, "grad_norm": 0.7065979838371277, "learning_rate": 1.1439114391143912e-06, "loss": 0.6934, "step": 62 }, { "epoch": 0.03488694047069681, "grad_norm": 0.6821363568305969, "learning_rate": 1.1623616236162363e-06, "loss": 0.6344, "step": 63 }, { "epoch": 0.03544070143054914, "grad_norm": 0.7779387831687927, "learning_rate": 1.1808118081180813e-06, "loss": 0.6576, "step": 64 }, { "epoch": 0.03599446239040148, "grad_norm": 0.6340335607528687, "learning_rate": 1.1992619926199263e-06, "loss": 0.6484, "step": 65 }, { "epoch": 0.03654822335025381, "grad_norm": 0.6900181770324707, "learning_rate": 1.2177121771217714e-06, "loss": 0.6581, "step": 66 }, { "epoch": 0.03710198431010614, "grad_norm": 0.5423425436019897, "learning_rate": 1.2361623616236164e-06, "loss": 0.6424, "step": 67 }, { "epoch": 0.03765574526995847, "grad_norm": 0.5667786002159119, "learning_rate": 1.2546125461254613e-06, "loss": 0.635, "step": 68 }, { "epoch": 0.0382095062298108, "grad_norm": 0.5225174427032471, "learning_rate": 1.2730627306273063e-06, "loss": 0.6422, "step": 69 }, { "epoch": 0.03876326718966313, "grad_norm": 0.5604956150054932, "learning_rate": 1.2915129151291513e-06, "loss": 0.6262, "step": 70 }, { "epoch": 0.03931702814951546, "grad_norm": 0.5690175890922546, "learning_rate": 1.3099630996309964e-06, "loss": 0.6374, "step": 71 }, { "epoch": 0.03987078910936779, "grad_norm": 0.5327182412147522, "learning_rate": 1.3284132841328414e-06, "loss": 0.639, "step": 72 }, { "epoch": 0.04042455006922012, "grad_norm": 0.47235071659088135, "learning_rate": 1.3468634686346865e-06, "loss": 0.6392, "step": 73 }, { "epoch": 0.04097831102907245, "grad_norm": 0.4569340646266937, "learning_rate": 1.3653136531365315e-06, "loss": 0.6157, "step": 74 }, { "epoch": 0.04153207198892478, "grad_norm": 0.508716881275177, "learning_rate": 1.3837638376383765e-06, "loss": 0.6401, "step": 75 }, { "epoch": 0.04208583294877711, "grad_norm": 0.45128142833709717, "learning_rate": 1.4022140221402214e-06, "loss": 0.6399, "step": 76 }, { "epoch": 0.04263959390862944, "grad_norm": 0.4534916281700134, "learning_rate": 1.4206642066420664e-06, "loss": 0.6117, "step": 77 }, { "epoch": 0.04319335486848177, "grad_norm": 0.4419490396976471, "learning_rate": 1.4391143911439114e-06, "loss": 0.627, "step": 78 }, { "epoch": 0.043747115828334106, "grad_norm": 0.41959109902381897, "learning_rate": 1.4575645756457565e-06, "loss": 0.641, "step": 79 }, { "epoch": 0.044300876788186436, "grad_norm": 0.4353376030921936, "learning_rate": 1.4760147601476015e-06, "loss": 0.601, "step": 80 }, { "epoch": 0.044854637748038766, "grad_norm": 0.4534298777580261, "learning_rate": 1.4944649446494466e-06, "loss": 0.6615, "step": 81 }, { "epoch": 0.045408398707891096, "grad_norm": 0.42502716183662415, "learning_rate": 1.5129151291512916e-06, "loss": 0.6234, "step": 82 }, { "epoch": 0.045962159667743425, "grad_norm": 0.47156989574432373, "learning_rate": 1.5313653136531366e-06, "loss": 0.5905, "step": 83 }, { "epoch": 0.046515920627595755, "grad_norm": 0.40800225734710693, "learning_rate": 1.5498154981549817e-06, "loss": 0.6297, "step": 84 }, { "epoch": 0.047069681587448085, "grad_norm": 0.46074092388153076, "learning_rate": 1.5682656826568267e-06, "loss": 0.6547, "step": 85 }, { "epoch": 0.047623442547300415, "grad_norm": 0.4736429452896118, "learning_rate": 1.5867158671586716e-06, "loss": 0.6146, "step": 86 }, { "epoch": 0.048177203507152745, "grad_norm": 0.4554425776004791, "learning_rate": 1.6051660516605166e-06, "loss": 0.5941, "step": 87 }, { "epoch": 0.048730964467005075, "grad_norm": 0.3798641860485077, "learning_rate": 1.6236162361623616e-06, "loss": 0.6138, "step": 88 }, { "epoch": 0.049284725426857405, "grad_norm": 0.40926966071128845, "learning_rate": 1.6420664206642067e-06, "loss": 0.6175, "step": 89 }, { "epoch": 0.049838486386709735, "grad_norm": 0.38790079951286316, "learning_rate": 1.6605166051660517e-06, "loss": 0.6114, "step": 90 }, { "epoch": 0.050392247346562065, "grad_norm": 0.4059058129787445, "learning_rate": 1.6789667896678968e-06, "loss": 0.6082, "step": 91 }, { "epoch": 0.050946008306414395, "grad_norm": 0.4402030110359192, "learning_rate": 1.6974169741697418e-06, "loss": 0.602, "step": 92 }, { "epoch": 0.051499769266266725, "grad_norm": 0.4006226360797882, "learning_rate": 1.7158671586715868e-06, "loss": 0.6168, "step": 93 }, { "epoch": 0.05205353022611906, "grad_norm": 0.42007502913475037, "learning_rate": 1.7343173431734319e-06, "loss": 0.6096, "step": 94 }, { "epoch": 0.05260729118597139, "grad_norm": 0.3919573426246643, "learning_rate": 1.752767527675277e-06, "loss": 0.6246, "step": 95 }, { "epoch": 0.05316105214582372, "grad_norm": 0.3869551420211792, "learning_rate": 1.7712177121771217e-06, "loss": 0.5874, "step": 96 }, { "epoch": 0.05371481310567605, "grad_norm": 0.37819355726242065, "learning_rate": 1.7896678966789668e-06, "loss": 0.5989, "step": 97 }, { "epoch": 0.05426857406552838, "grad_norm": 0.3713648021221161, "learning_rate": 1.8081180811808118e-06, "loss": 0.5739, "step": 98 }, { "epoch": 0.05482233502538071, "grad_norm": 0.4035806357860565, "learning_rate": 1.8265682656826569e-06, "loss": 0.6209, "step": 99 }, { "epoch": 0.05537609598523304, "grad_norm": 0.4202962815761566, "learning_rate": 1.845018450184502e-06, "loss": 0.6282, "step": 100 }, { "epoch": 0.05592985694508537, "grad_norm": 0.39130738377571106, "learning_rate": 1.863468634686347e-06, "loss": 0.6178, "step": 101 }, { "epoch": 0.0564836179049377, "grad_norm": 0.37897393107414246, "learning_rate": 1.881918819188192e-06, "loss": 0.5922, "step": 102 }, { "epoch": 0.05703737886479003, "grad_norm": 0.3983903229236603, "learning_rate": 1.900369003690037e-06, "loss": 0.5759, "step": 103 }, { "epoch": 0.05759113982464236, "grad_norm": 0.4290478527545929, "learning_rate": 1.918819188191882e-06, "loss": 0.5899, "step": 104 }, { "epoch": 0.05814490078449469, "grad_norm": 0.43606919050216675, "learning_rate": 1.937269372693727e-06, "loss": 0.6209, "step": 105 }, { "epoch": 0.05869866174434702, "grad_norm": 0.37784290313720703, "learning_rate": 1.955719557195572e-06, "loss": 0.5913, "step": 106 }, { "epoch": 0.05925242270419935, "grad_norm": 0.44921353459358215, "learning_rate": 1.974169741697417e-06, "loss": 0.6156, "step": 107 }, { "epoch": 0.05980618366405169, "grad_norm": 0.3531395196914673, "learning_rate": 1.9926199261992622e-06, "loss": 0.5632, "step": 108 }, { "epoch": 0.06035994462390402, "grad_norm": 0.4078461527824402, "learning_rate": 2.0110701107011073e-06, "loss": 0.598, "step": 109 }, { "epoch": 0.06091370558375635, "grad_norm": 0.39814451336860657, "learning_rate": 2.0295202952029523e-06, "loss": 0.6144, "step": 110 }, { "epoch": 0.06146746654360868, "grad_norm": 0.3756044805049896, "learning_rate": 2.0479704797047974e-06, "loss": 0.5825, "step": 111 }, { "epoch": 0.06202122750346101, "grad_norm": 0.375185489654541, "learning_rate": 2.0664206642066424e-06, "loss": 0.5613, "step": 112 }, { "epoch": 0.06257498846331333, "grad_norm": 0.38839760422706604, "learning_rate": 2.0848708487084874e-06, "loss": 0.5918, "step": 113 }, { "epoch": 0.06312874942316567, "grad_norm": 0.37708091735839844, "learning_rate": 2.1033210332103325e-06, "loss": 0.5807, "step": 114 }, { "epoch": 0.063682510383018, "grad_norm": 0.3257189691066742, "learning_rate": 2.1217712177121775e-06, "loss": 0.5738, "step": 115 }, { "epoch": 0.06423627134287033, "grad_norm": 0.3924807012081146, "learning_rate": 2.140221402214022e-06, "loss": 0.5895, "step": 116 }, { "epoch": 0.06479003230272266, "grad_norm": 0.349293977022171, "learning_rate": 2.158671586715867e-06, "loss": 0.5674, "step": 117 }, { "epoch": 0.06534379326257499, "grad_norm": 0.37979549169540405, "learning_rate": 2.177121771217712e-06, "loss": 0.5878, "step": 118 }, { "epoch": 0.06589755422242732, "grad_norm": 0.37412866950035095, "learning_rate": 2.1955719557195573e-06, "loss": 0.5704, "step": 119 }, { "epoch": 0.06645131518227965, "grad_norm": 0.3867645561695099, "learning_rate": 2.2140221402214023e-06, "loss": 0.5778, "step": 120 }, { "epoch": 0.06700507614213198, "grad_norm": 0.38660353422164917, "learning_rate": 2.2324723247232473e-06, "loss": 0.5607, "step": 121 }, { "epoch": 0.0675588371019843, "grad_norm": 0.4165349006652832, "learning_rate": 2.2509225092250924e-06, "loss": 0.5887, "step": 122 }, { "epoch": 0.06811259806183664, "grad_norm": 0.3811813294887543, "learning_rate": 2.2693726937269374e-06, "loss": 0.5891, "step": 123 }, { "epoch": 0.06866635902168897, "grad_norm": 0.3916253447532654, "learning_rate": 2.2878228782287825e-06, "loss": 0.5971, "step": 124 }, { "epoch": 0.0692201199815413, "grad_norm": 0.3703649640083313, "learning_rate": 2.3062730627306275e-06, "loss": 0.5784, "step": 125 }, { "epoch": 0.06977388094139363, "grad_norm": 0.38154569268226624, "learning_rate": 2.3247232472324725e-06, "loss": 0.5907, "step": 126 }, { "epoch": 0.07032764190124596, "grad_norm": 0.37331727147102356, "learning_rate": 2.3431734317343176e-06, "loss": 0.5987, "step": 127 }, { "epoch": 0.07088140286109829, "grad_norm": 0.330460786819458, "learning_rate": 2.3616236162361626e-06, "loss": 0.5301, "step": 128 }, { "epoch": 0.07143516382095062, "grad_norm": 0.40529724955558777, "learning_rate": 2.3800738007380077e-06, "loss": 0.6028, "step": 129 }, { "epoch": 0.07198892478080296, "grad_norm": 0.40128666162490845, "learning_rate": 2.3985239852398527e-06, "loss": 0.5671, "step": 130 }, { "epoch": 0.07254268574065528, "grad_norm": 0.405517578125, "learning_rate": 2.4169741697416977e-06, "loss": 0.5867, "step": 131 }, { "epoch": 0.07309644670050762, "grad_norm": 0.3589847683906555, "learning_rate": 2.4354243542435428e-06, "loss": 0.5605, "step": 132 }, { "epoch": 0.07365020766035994, "grad_norm": 0.339797705411911, "learning_rate": 2.453874538745388e-06, "loss": 0.5584, "step": 133 }, { "epoch": 0.07420396862021228, "grad_norm": 0.4045381247997284, "learning_rate": 2.472324723247233e-06, "loss": 0.5799, "step": 134 }, { "epoch": 0.0747577295800646, "grad_norm": 0.38862118124961853, "learning_rate": 2.490774907749078e-06, "loss": 0.5685, "step": 135 }, { "epoch": 0.07531149053991694, "grad_norm": 0.40960919857025146, "learning_rate": 2.5092250922509225e-06, "loss": 0.5963, "step": 136 }, { "epoch": 0.07586525149976926, "grad_norm": 0.39562541246414185, "learning_rate": 2.527675276752768e-06, "loss": 0.5666, "step": 137 }, { "epoch": 0.0764190124596216, "grad_norm": 0.3510668873786926, "learning_rate": 2.5461254612546126e-06, "loss": 0.576, "step": 138 }, { "epoch": 0.07697277341947392, "grad_norm": 0.39916926622390747, "learning_rate": 2.564575645756458e-06, "loss": 0.6076, "step": 139 }, { "epoch": 0.07752653437932626, "grad_norm": 0.39452287554740906, "learning_rate": 2.5830258302583027e-06, "loss": 0.5729, "step": 140 }, { "epoch": 0.07808029533917858, "grad_norm": 0.40024876594543457, "learning_rate": 2.6014760147601477e-06, "loss": 0.576, "step": 141 }, { "epoch": 0.07863405629903092, "grad_norm": 0.38324975967407227, "learning_rate": 2.6199261992619928e-06, "loss": 0.563, "step": 142 }, { "epoch": 0.07918781725888324, "grad_norm": 0.36725160479545593, "learning_rate": 2.638376383763838e-06, "loss": 0.5851, "step": 143 }, { "epoch": 0.07974157821873558, "grad_norm": 0.36442968249320984, "learning_rate": 2.656826568265683e-06, "loss": 0.5911, "step": 144 }, { "epoch": 0.08029533917858792, "grad_norm": 0.38342612981796265, "learning_rate": 2.675276752767528e-06, "loss": 0.5885, "step": 145 }, { "epoch": 0.08084910013844024, "grad_norm": 0.3685580790042877, "learning_rate": 2.693726937269373e-06, "loss": 0.5848, "step": 146 }, { "epoch": 0.08140286109829258, "grad_norm": 0.42139074206352234, "learning_rate": 2.712177121771218e-06, "loss": 0.5871, "step": 147 }, { "epoch": 0.0819566220581449, "grad_norm": 0.391757607460022, "learning_rate": 2.730627306273063e-06, "loss": 0.5844, "step": 148 }, { "epoch": 0.08251038301799724, "grad_norm": 0.4042794704437256, "learning_rate": 2.749077490774908e-06, "loss": 0.55, "step": 149 }, { "epoch": 0.08306414397784956, "grad_norm": 0.38608673214912415, "learning_rate": 2.767527675276753e-06, "loss": 0.5705, "step": 150 }, { "epoch": 0.0836179049377019, "grad_norm": 0.40304553508758545, "learning_rate": 2.785977859778598e-06, "loss": 0.5639, "step": 151 }, { "epoch": 0.08417166589755422, "grad_norm": 0.4092898368835449, "learning_rate": 2.8044280442804427e-06, "loss": 0.5614, "step": 152 }, { "epoch": 0.08472542685740655, "grad_norm": 0.3676226735115051, "learning_rate": 2.822878228782288e-06, "loss": 0.5546, "step": 153 }, { "epoch": 0.08527918781725888, "grad_norm": 0.41524752974510193, "learning_rate": 2.841328413284133e-06, "loss": 0.5505, "step": 154 }, { "epoch": 0.08583294877711121, "grad_norm": 0.3458798825740814, "learning_rate": 2.8597785977859783e-06, "loss": 0.5711, "step": 155 }, { "epoch": 0.08638670973696354, "grad_norm": 0.3995174765586853, "learning_rate": 2.878228782287823e-06, "loss": 0.56, "step": 156 }, { "epoch": 0.08694047069681587, "grad_norm": 0.35882872343063354, "learning_rate": 2.8966789667896684e-06, "loss": 0.5434, "step": 157 }, { "epoch": 0.08749423165666821, "grad_norm": 0.41419973969459534, "learning_rate": 2.915129151291513e-06, "loss": 0.5832, "step": 158 }, { "epoch": 0.08804799261652053, "grad_norm": 0.3448088765144348, "learning_rate": 2.9335793357933584e-06, "loss": 0.5499, "step": 159 }, { "epoch": 0.08860175357637287, "grad_norm": 0.39340740442276, "learning_rate": 2.952029520295203e-06, "loss": 0.5883, "step": 160 }, { "epoch": 0.0891555145362252, "grad_norm": 0.4244903028011322, "learning_rate": 2.970479704797048e-06, "loss": 0.5734, "step": 161 }, { "epoch": 0.08970927549607753, "grad_norm": 0.42329955101013184, "learning_rate": 2.988929889298893e-06, "loss": 0.5688, "step": 162 }, { "epoch": 0.09026303645592985, "grad_norm": 0.39687106013298035, "learning_rate": 3.007380073800738e-06, "loss": 0.5692, "step": 163 }, { "epoch": 0.09081679741578219, "grad_norm": 0.39781761169433594, "learning_rate": 3.0258302583025832e-06, "loss": 0.5629, "step": 164 }, { "epoch": 0.09137055837563451, "grad_norm": 0.418769508600235, "learning_rate": 3.0442804428044283e-06, "loss": 0.582, "step": 165 }, { "epoch": 0.09192431933548685, "grad_norm": 0.41387853026390076, "learning_rate": 3.0627306273062733e-06, "loss": 0.5482, "step": 166 }, { "epoch": 0.09247808029533917, "grad_norm": 0.4213850498199463, "learning_rate": 3.0811808118081183e-06, "loss": 0.5706, "step": 167 }, { "epoch": 0.09303184125519151, "grad_norm": 0.39489662647247314, "learning_rate": 3.0996309963099634e-06, "loss": 0.5757, "step": 168 }, { "epoch": 0.09358560221504383, "grad_norm": 0.38952144980430603, "learning_rate": 3.1180811808118084e-06, "loss": 0.5306, "step": 169 }, { "epoch": 0.09413936317489617, "grad_norm": 0.3943084478378296, "learning_rate": 3.1365313653136535e-06, "loss": 0.5457, "step": 170 }, { "epoch": 0.0946931241347485, "grad_norm": 0.3926984369754791, "learning_rate": 3.1549815498154985e-06, "loss": 0.5554, "step": 171 }, { "epoch": 0.09524688509460083, "grad_norm": 0.3922779858112335, "learning_rate": 3.173431734317343e-06, "loss": 0.5549, "step": 172 }, { "epoch": 0.09580064605445317, "grad_norm": 0.40063416957855225, "learning_rate": 3.1918819188191886e-06, "loss": 0.5283, "step": 173 }, { "epoch": 0.09635440701430549, "grad_norm": 0.3800892233848572, "learning_rate": 3.210332103321033e-06, "loss": 0.5491, "step": 174 }, { "epoch": 0.09690816797415783, "grad_norm": 0.40492719411849976, "learning_rate": 3.2287822878228787e-06, "loss": 0.5618, "step": 175 }, { "epoch": 0.09746192893401015, "grad_norm": 0.3660856783390045, "learning_rate": 3.2472324723247233e-06, "loss": 0.5549, "step": 176 }, { "epoch": 0.09801568989386249, "grad_norm": 0.3793259561061859, "learning_rate": 3.2656826568265687e-06, "loss": 0.526, "step": 177 }, { "epoch": 0.09856945085371481, "grad_norm": 0.37546077370643616, "learning_rate": 3.2841328413284134e-06, "loss": 0.547, "step": 178 }, { "epoch": 0.09912321181356715, "grad_norm": 0.39530149102211, "learning_rate": 3.302583025830259e-06, "loss": 0.5532, "step": 179 }, { "epoch": 0.09967697277341947, "grad_norm": 0.3641144037246704, "learning_rate": 3.3210332103321034e-06, "loss": 0.5487, "step": 180 }, { "epoch": 0.1002307337332718, "grad_norm": 0.37714895606040955, "learning_rate": 3.3394833948339485e-06, "loss": 0.5581, "step": 181 }, { "epoch": 0.10078449469312413, "grad_norm": 0.36601969599723816, "learning_rate": 3.3579335793357935e-06, "loss": 0.5458, "step": 182 }, { "epoch": 0.10133825565297647, "grad_norm": 0.37721991539001465, "learning_rate": 3.3763837638376386e-06, "loss": 0.5428, "step": 183 }, { "epoch": 0.10189201661282879, "grad_norm": 0.3969361186027527, "learning_rate": 3.3948339483394836e-06, "loss": 0.5584, "step": 184 }, { "epoch": 0.10244577757268113, "grad_norm": 0.3982224762439728, "learning_rate": 3.4132841328413286e-06, "loss": 0.525, "step": 185 }, { "epoch": 0.10299953853253345, "grad_norm": 0.3633633255958557, "learning_rate": 3.4317343173431737e-06, "loss": 0.5444, "step": 186 }, { "epoch": 0.10355329949238579, "grad_norm": 0.3649335503578186, "learning_rate": 3.4501845018450187e-06, "loss": 0.5578, "step": 187 }, { "epoch": 0.10410706045223812, "grad_norm": 0.3850434720516205, "learning_rate": 3.4686346863468638e-06, "loss": 0.5264, "step": 188 }, { "epoch": 0.10466082141209045, "grad_norm": 0.37551894783973694, "learning_rate": 3.487084870848709e-06, "loss": 0.5644, "step": 189 }, { "epoch": 0.10521458237194278, "grad_norm": 0.39620092511177063, "learning_rate": 3.505535055350554e-06, "loss": 0.543, "step": 190 }, { "epoch": 0.1057683433317951, "grad_norm": 0.4034550189971924, "learning_rate": 3.523985239852399e-06, "loss": 0.5318, "step": 191 }, { "epoch": 0.10632210429164744, "grad_norm": 0.36161693930625916, "learning_rate": 3.5424354243542435e-06, "loss": 0.5315, "step": 192 }, { "epoch": 0.10687586525149977, "grad_norm": 0.418378084897995, "learning_rate": 3.560885608856089e-06, "loss": 0.5373, "step": 193 }, { "epoch": 0.1074296262113521, "grad_norm": 0.38483965396881104, "learning_rate": 3.5793357933579336e-06, "loss": 0.5445, "step": 194 }, { "epoch": 0.10798338717120443, "grad_norm": 0.36567217111587524, "learning_rate": 3.597785977859779e-06, "loss": 0.5287, "step": 195 }, { "epoch": 0.10853714813105676, "grad_norm": 0.4333740472793579, "learning_rate": 3.6162361623616237e-06, "loss": 0.5484, "step": 196 }, { "epoch": 0.10909090909090909, "grad_norm": 0.40417635440826416, "learning_rate": 3.634686346863469e-06, "loss": 0.5266, "step": 197 }, { "epoch": 0.10964467005076142, "grad_norm": 0.3633660078048706, "learning_rate": 3.6531365313653137e-06, "loss": 0.5139, "step": 198 }, { "epoch": 0.11019843101061375, "grad_norm": 0.40867626667022705, "learning_rate": 3.671586715867159e-06, "loss": 0.5389, "step": 199 }, { "epoch": 0.11075219197046608, "grad_norm": 0.3859797418117523, "learning_rate": 3.690036900369004e-06, "loss": 0.5558, "step": 200 }, { "epoch": 0.11130595293031842, "grad_norm": 0.42342278361320496, "learning_rate": 3.708487084870849e-06, "loss": 0.5368, "step": 201 }, { "epoch": 0.11185971389017074, "grad_norm": 0.4499402344226837, "learning_rate": 3.726937269372694e-06, "loss": 0.5544, "step": 202 }, { "epoch": 0.11241347485002308, "grad_norm": 0.3703411817550659, "learning_rate": 3.745387453874539e-06, "loss": 0.5359, "step": 203 }, { "epoch": 0.1129672358098754, "grad_norm": 0.3801664710044861, "learning_rate": 3.763837638376384e-06, "loss": 0.5525, "step": 204 }, { "epoch": 0.11352099676972774, "grad_norm": 0.4611019790172577, "learning_rate": 3.782287822878229e-06, "loss": 0.5512, "step": 205 }, { "epoch": 0.11407475772958006, "grad_norm": 0.3849778473377228, "learning_rate": 3.800738007380074e-06, "loss": 0.5405, "step": 206 }, { "epoch": 0.1146285186894324, "grad_norm": 0.40120190382003784, "learning_rate": 3.819188191881919e-06, "loss": 0.5461, "step": 207 }, { "epoch": 0.11518227964928472, "grad_norm": 0.3940730094909668, "learning_rate": 3.837638376383764e-06, "loss": 0.5447, "step": 208 }, { "epoch": 0.11573604060913706, "grad_norm": 0.34999459981918335, "learning_rate": 3.856088560885609e-06, "loss": 0.5477, "step": 209 }, { "epoch": 0.11628980156898938, "grad_norm": 0.3560105860233307, "learning_rate": 3.874538745387454e-06, "loss": 0.5419, "step": 210 }, { "epoch": 0.11684356252884172, "grad_norm": 0.40144410729408264, "learning_rate": 3.892988929889299e-06, "loss": 0.5381, "step": 211 }, { "epoch": 0.11739732348869404, "grad_norm": 0.4164940416812897, "learning_rate": 3.911439114391144e-06, "loss": 0.5493, "step": 212 }, { "epoch": 0.11795108444854638, "grad_norm": 0.3795572817325592, "learning_rate": 3.929889298892989e-06, "loss": 0.5377, "step": 213 }, { "epoch": 0.1185048454083987, "grad_norm": 0.4328853189945221, "learning_rate": 3.948339483394834e-06, "loss": 0.5232, "step": 214 }, { "epoch": 0.11905860636825104, "grad_norm": 0.4174976944923401, "learning_rate": 3.966789667896679e-06, "loss": 0.5493, "step": 215 }, { "epoch": 0.11961236732810338, "grad_norm": 0.34503090381622314, "learning_rate": 3.9852398523985245e-06, "loss": 0.5066, "step": 216 }, { "epoch": 0.1201661282879557, "grad_norm": 0.39394867420196533, "learning_rate": 4.003690036900369e-06, "loss": 0.5451, "step": 217 }, { "epoch": 0.12071988924780803, "grad_norm": 0.42402592301368713, "learning_rate": 4.0221402214022145e-06, "loss": 0.5657, "step": 218 }, { "epoch": 0.12127365020766036, "grad_norm": 0.4330940246582031, "learning_rate": 4.04059040590406e-06, "loss": 0.5219, "step": 219 }, { "epoch": 0.1218274111675127, "grad_norm": 0.37159669399261475, "learning_rate": 4.059040590405905e-06, "loss": 0.5185, "step": 220 }, { "epoch": 0.12238117212736502, "grad_norm": 0.3961015045642853, "learning_rate": 4.077490774907749e-06, "loss": 0.5444, "step": 221 }, { "epoch": 0.12293493308721735, "grad_norm": 0.46110835671424866, "learning_rate": 4.095940959409595e-06, "loss": 0.5906, "step": 222 }, { "epoch": 0.12348869404706968, "grad_norm": 0.3724175691604614, "learning_rate": 4.114391143911439e-06, "loss": 0.5377, "step": 223 }, { "epoch": 0.12404245500692201, "grad_norm": 0.35553663969039917, "learning_rate": 4.132841328413285e-06, "loss": 0.5704, "step": 224 }, { "epoch": 0.12459621596677434, "grad_norm": 0.4322466552257538, "learning_rate": 4.151291512915129e-06, "loss": 0.5436, "step": 225 }, { "epoch": 0.12514997692662666, "grad_norm": 0.3647211790084839, "learning_rate": 4.169741697416975e-06, "loss": 0.5294, "step": 226 }, { "epoch": 0.125703737886479, "grad_norm": 0.4180801212787628, "learning_rate": 4.1881918819188195e-06, "loss": 0.5443, "step": 227 }, { "epoch": 0.12625749884633133, "grad_norm": 0.37434136867523193, "learning_rate": 4.206642066420665e-06, "loss": 0.5419, "step": 228 }, { "epoch": 0.12681125980618366, "grad_norm": 0.38882380723953247, "learning_rate": 4.2250922509225096e-06, "loss": 0.5208, "step": 229 }, { "epoch": 0.127365020766036, "grad_norm": 0.4050264358520508, "learning_rate": 4.243542435424355e-06, "loss": 0.5297, "step": 230 }, { "epoch": 0.12791878172588833, "grad_norm": 0.38343679904937744, "learning_rate": 4.2619926199262e-06, "loss": 0.567, "step": 231 }, { "epoch": 0.12847254268574065, "grad_norm": 0.4973456859588623, "learning_rate": 4.280442804428044e-06, "loss": 0.5424, "step": 232 }, { "epoch": 0.12902630364559298, "grad_norm": 0.4452069103717804, "learning_rate": 4.29889298892989e-06, "loss": 0.5454, "step": 233 }, { "epoch": 0.12958006460544533, "grad_norm": 0.44286400079727173, "learning_rate": 4.317343173431734e-06, "loss": 0.5305, "step": 234 }, { "epoch": 0.13013382556529765, "grad_norm": 0.4046939015388489, "learning_rate": 4.33579335793358e-06, "loss": 0.5298, "step": 235 }, { "epoch": 0.13068758652514997, "grad_norm": 0.4939473569393158, "learning_rate": 4.354243542435424e-06, "loss": 0.5388, "step": 236 }, { "epoch": 0.1312413474850023, "grad_norm": 0.4631883203983307, "learning_rate": 4.37269372693727e-06, "loss": 0.547, "step": 237 }, { "epoch": 0.13179510844485465, "grad_norm": 0.4208291172981262, "learning_rate": 4.3911439114391145e-06, "loss": 0.5374, "step": 238 }, { "epoch": 0.13234886940470697, "grad_norm": 0.47946348786354065, "learning_rate": 4.40959409594096e-06, "loss": 0.5242, "step": 239 }, { "epoch": 0.1329026303645593, "grad_norm": 0.4278174936771393, "learning_rate": 4.428044280442805e-06, "loss": 0.552, "step": 240 }, { "epoch": 0.13345639132441162, "grad_norm": 0.3921195864677429, "learning_rate": 4.446494464944649e-06, "loss": 0.51, "step": 241 }, { "epoch": 0.13401015228426397, "grad_norm": 0.43443912267684937, "learning_rate": 4.464944649446495e-06, "loss": 0.5245, "step": 242 }, { "epoch": 0.1345639132441163, "grad_norm": 0.4103611409664154, "learning_rate": 4.483394833948339e-06, "loss": 0.5016, "step": 243 }, { "epoch": 0.1351176742039686, "grad_norm": 0.3711482882499695, "learning_rate": 4.501845018450185e-06, "loss": 0.5486, "step": 244 }, { "epoch": 0.13567143516382096, "grad_norm": 0.44851693511009216, "learning_rate": 4.520295202952029e-06, "loss": 0.5367, "step": 245 }, { "epoch": 0.1362251961236733, "grad_norm": 0.3751492500305176, "learning_rate": 4.538745387453875e-06, "loss": 0.5284, "step": 246 }, { "epoch": 0.1367789570835256, "grad_norm": 0.3972494900226593, "learning_rate": 4.5571955719557194e-06, "loss": 0.513, "step": 247 }, { "epoch": 0.13733271804337793, "grad_norm": 0.4134092628955841, "learning_rate": 4.575645756457565e-06, "loss": 0.5409, "step": 248 }, { "epoch": 0.13788647900323028, "grad_norm": 0.4445379674434662, "learning_rate": 4.5940959409594095e-06, "loss": 0.5417, "step": 249 }, { "epoch": 0.1384402399630826, "grad_norm": 0.4105472266674042, "learning_rate": 4.612546125461255e-06, "loss": 0.5415, "step": 250 }, { "epoch": 0.13899400092293493, "grad_norm": 0.5208163261413574, "learning_rate": 4.6309963099631e-06, "loss": 0.571, "step": 251 }, { "epoch": 0.13954776188278725, "grad_norm": 0.4505283236503601, "learning_rate": 4.649446494464945e-06, "loss": 0.5466, "step": 252 }, { "epoch": 0.1401015228426396, "grad_norm": 0.4458693861961365, "learning_rate": 4.66789667896679e-06, "loss": 0.5411, "step": 253 }, { "epoch": 0.14065528380249193, "grad_norm": 0.4541989862918854, "learning_rate": 4.686346863468635e-06, "loss": 0.5177, "step": 254 }, { "epoch": 0.14120904476234425, "grad_norm": 0.4078447222709656, "learning_rate": 4.704797047970481e-06, "loss": 0.5529, "step": 255 }, { "epoch": 0.14176280572219657, "grad_norm": 0.48063474893569946, "learning_rate": 4.723247232472325e-06, "loss": 0.5326, "step": 256 }, { "epoch": 0.14231656668204892, "grad_norm": 0.41401705145835876, "learning_rate": 4.741697416974171e-06, "loss": 0.5375, "step": 257 }, { "epoch": 0.14287032764190125, "grad_norm": 0.4332129955291748, "learning_rate": 4.760147601476015e-06, "loss": 0.5462, "step": 258 }, { "epoch": 0.14342408860175357, "grad_norm": 0.43630072474479675, "learning_rate": 4.778597785977861e-06, "loss": 0.5284, "step": 259 }, { "epoch": 0.14397784956160592, "grad_norm": 0.4409022331237793, "learning_rate": 4.797047970479705e-06, "loss": 0.543, "step": 260 }, { "epoch": 0.14453161052145824, "grad_norm": 0.4704098701477051, "learning_rate": 4.81549815498155e-06, "loss": 0.5688, "step": 261 }, { "epoch": 0.14508537148131057, "grad_norm": 0.4279260039329529, "learning_rate": 4.8339483394833955e-06, "loss": 0.5425, "step": 262 }, { "epoch": 0.1456391324411629, "grad_norm": 0.456147164106369, "learning_rate": 4.85239852398524e-06, "loss": 0.5274, "step": 263 }, { "epoch": 0.14619289340101524, "grad_norm": 0.4391512870788574, "learning_rate": 4.8708487084870856e-06, "loss": 0.5558, "step": 264 }, { "epoch": 0.14674665436086756, "grad_norm": 0.4548433721065521, "learning_rate": 4.88929889298893e-06, "loss": 0.5206, "step": 265 }, { "epoch": 0.14730041532071989, "grad_norm": 0.49633026123046875, "learning_rate": 4.907749077490776e-06, "loss": 0.5598, "step": 266 }, { "epoch": 0.1478541762805722, "grad_norm": 0.3930225372314453, "learning_rate": 4.92619926199262e-06, "loss": 0.5506, "step": 267 }, { "epoch": 0.14840793724042456, "grad_norm": 0.47617679834365845, "learning_rate": 4.944649446494466e-06, "loss": 0.5277, "step": 268 }, { "epoch": 0.14896169820027688, "grad_norm": 0.3669383227825165, "learning_rate": 4.96309963099631e-06, "loss": 0.5497, "step": 269 }, { "epoch": 0.1495154591601292, "grad_norm": 0.3772871792316437, "learning_rate": 4.981549815498156e-06, "loss": 0.5328, "step": 270 }, { "epoch": 0.15006922011998153, "grad_norm": 0.46807652711868286, "learning_rate": 5e-06, "loss": 0.5102, "step": 271 }, { "epoch": 0.15062298107983388, "grad_norm": 0.37675318121910095, "learning_rate": 5.018450184501845e-06, "loss": 0.5458, "step": 272 }, { "epoch": 0.1511767420396862, "grad_norm": 0.4200018644332886, "learning_rate": 5.03690036900369e-06, "loss": 0.5301, "step": 273 }, { "epoch": 0.15173050299953852, "grad_norm": 0.41210782527923584, "learning_rate": 5.055350553505536e-06, "loss": 0.5206, "step": 274 }, { "epoch": 0.15228426395939088, "grad_norm": 0.3770165741443634, "learning_rate": 5.0738007380073806e-06, "loss": 0.5187, "step": 275 }, { "epoch": 0.1528380249192432, "grad_norm": 0.3860195279121399, "learning_rate": 5.092250922509225e-06, "loss": 0.5357, "step": 276 }, { "epoch": 0.15339178587909552, "grad_norm": 0.37985536456108093, "learning_rate": 5.11070110701107e-06, "loss": 0.5318, "step": 277 }, { "epoch": 0.15394554683894784, "grad_norm": 0.40773189067840576, "learning_rate": 5.129151291512916e-06, "loss": 0.5313, "step": 278 }, { "epoch": 0.1544993077988002, "grad_norm": 0.35738620162010193, "learning_rate": 5.147601476014761e-06, "loss": 0.4937, "step": 279 }, { "epoch": 0.15505306875865252, "grad_norm": 0.4224122166633606, "learning_rate": 5.166051660516605e-06, "loss": 0.5311, "step": 280 }, { "epoch": 0.15560682971850484, "grad_norm": 0.44098031520843506, "learning_rate": 5.18450184501845e-06, "loss": 0.5427, "step": 281 }, { "epoch": 0.15616059067835716, "grad_norm": 0.3546939790248871, "learning_rate": 5.2029520295202954e-06, "loss": 0.5072, "step": 282 }, { "epoch": 0.15671435163820951, "grad_norm": 0.42525696754455566, "learning_rate": 5.221402214022141e-06, "loss": 0.5238, "step": 283 }, { "epoch": 0.15726811259806184, "grad_norm": 0.38778069615364075, "learning_rate": 5.2398523985239855e-06, "loss": 0.4922, "step": 284 }, { "epoch": 0.15782187355791416, "grad_norm": 0.41155168414115906, "learning_rate": 5.25830258302583e-06, "loss": 0.5109, "step": 285 }, { "epoch": 0.15837563451776648, "grad_norm": 0.40725284814834595, "learning_rate": 5.276752767527676e-06, "loss": 0.5168, "step": 286 }, { "epoch": 0.15892939547761883, "grad_norm": 0.4293622672557831, "learning_rate": 5.295202952029521e-06, "loss": 0.5337, "step": 287 }, { "epoch": 0.15948315643747116, "grad_norm": 0.3942197263240814, "learning_rate": 5.313653136531366e-06, "loss": 0.5125, "step": 288 }, { "epoch": 0.16003691739732348, "grad_norm": 0.39063695073127747, "learning_rate": 5.332103321033211e-06, "loss": 0.5363, "step": 289 }, { "epoch": 0.16059067835717583, "grad_norm": 0.39502644538879395, "learning_rate": 5.350553505535056e-06, "loss": 0.5181, "step": 290 }, { "epoch": 0.16114443931702815, "grad_norm": 0.40805134177207947, "learning_rate": 5.369003690036901e-06, "loss": 0.5357, "step": 291 }, { "epoch": 0.16169820027688048, "grad_norm": 0.45030874013900757, "learning_rate": 5.387453874538746e-06, "loss": 0.5208, "step": 292 }, { "epoch": 0.1622519612367328, "grad_norm": 0.3950265944004059, "learning_rate": 5.405904059040591e-06, "loss": 0.5511, "step": 293 }, { "epoch": 0.16280572219658515, "grad_norm": 0.4759574830532074, "learning_rate": 5.424354243542436e-06, "loss": 0.5115, "step": 294 }, { "epoch": 0.16335948315643747, "grad_norm": 0.4520640969276428, "learning_rate": 5.4428044280442805e-06, "loss": 0.5237, "step": 295 }, { "epoch": 0.1639132441162898, "grad_norm": 0.3990505337715149, "learning_rate": 5.461254612546126e-06, "loss": 0.5126, "step": 296 }, { "epoch": 0.16446700507614212, "grad_norm": 0.5856457948684692, "learning_rate": 5.4797047970479715e-06, "loss": 0.5364, "step": 297 }, { "epoch": 0.16502076603599447, "grad_norm": 0.40639254450798035, "learning_rate": 5.498154981549816e-06, "loss": 0.5326, "step": 298 }, { "epoch": 0.1655745269958468, "grad_norm": 0.39968302845954895, "learning_rate": 5.516605166051661e-06, "loss": 0.504, "step": 299 }, { "epoch": 0.16612828795569912, "grad_norm": 0.38437339663505554, "learning_rate": 5.535055350553506e-06, "loss": 0.5449, "step": 300 }, { "epoch": 0.16668204891555147, "grad_norm": 0.381281316280365, "learning_rate": 5.553505535055352e-06, "loss": 0.5337, "step": 301 }, { "epoch": 0.1672358098754038, "grad_norm": 0.39366796612739563, "learning_rate": 5.571955719557196e-06, "loss": 0.5198, "step": 302 }, { "epoch": 0.1677895708352561, "grad_norm": 0.42254263162612915, "learning_rate": 5.590405904059041e-06, "loss": 0.5136, "step": 303 }, { "epoch": 0.16834333179510844, "grad_norm": 0.449341744184494, "learning_rate": 5.6088560885608855e-06, "loss": 0.5305, "step": 304 }, { "epoch": 0.1688970927549608, "grad_norm": 0.4075009226799011, "learning_rate": 5.627306273062732e-06, "loss": 0.5556, "step": 305 }, { "epoch": 0.1694508537148131, "grad_norm": 0.4081745743751526, "learning_rate": 5.645756457564576e-06, "loss": 0.5292, "step": 306 }, { "epoch": 0.17000461467466543, "grad_norm": 0.4478996694087982, "learning_rate": 5.664206642066421e-06, "loss": 0.5235, "step": 307 }, { "epoch": 0.17055837563451776, "grad_norm": 0.43514391779899597, "learning_rate": 5.682656826568266e-06, "loss": 0.5529, "step": 308 }, { "epoch": 0.1711121365943701, "grad_norm": 0.3947388231754303, "learning_rate": 5.701107011070112e-06, "loss": 0.5139, "step": 309 }, { "epoch": 0.17166589755422243, "grad_norm": 0.4857195019721985, "learning_rate": 5.7195571955719566e-06, "loss": 0.5595, "step": 310 }, { "epoch": 0.17221965851407475, "grad_norm": 0.42614808678627014, "learning_rate": 5.738007380073801e-06, "loss": 0.5554, "step": 311 }, { "epoch": 0.17277341947392708, "grad_norm": 0.4709138572216034, "learning_rate": 5.756457564575646e-06, "loss": 0.5066, "step": 312 }, { "epoch": 0.17332718043377943, "grad_norm": 0.4472712576389313, "learning_rate": 5.77490774907749e-06, "loss": 0.5102, "step": 313 }, { "epoch": 0.17388094139363175, "grad_norm": 0.4009638726711273, "learning_rate": 5.793357933579337e-06, "loss": 0.5189, "step": 314 }, { "epoch": 0.17443470235348407, "grad_norm": 0.45421212911605835, "learning_rate": 5.811808118081181e-06, "loss": 0.5435, "step": 315 }, { "epoch": 0.17498846331333642, "grad_norm": 0.44733741879463196, "learning_rate": 5.830258302583026e-06, "loss": 0.547, "step": 316 }, { "epoch": 0.17554222427318875, "grad_norm": 0.39286553859710693, "learning_rate": 5.8487084870848706e-06, "loss": 0.481, "step": 317 }, { "epoch": 0.17609598523304107, "grad_norm": 0.40039610862731934, "learning_rate": 5.867158671586717e-06, "loss": 0.5218, "step": 318 }, { "epoch": 0.1766497461928934, "grad_norm": 0.41943153738975525, "learning_rate": 5.8856088560885615e-06, "loss": 0.5145, "step": 319 }, { "epoch": 0.17720350715274574, "grad_norm": 0.4935627579689026, "learning_rate": 5.904059040590406e-06, "loss": 0.5489, "step": 320 }, { "epoch": 0.17775726811259807, "grad_norm": 0.41662874817848206, "learning_rate": 5.922509225092251e-06, "loss": 0.5368, "step": 321 }, { "epoch": 0.1783110290724504, "grad_norm": 0.48724934458732605, "learning_rate": 5.940959409594096e-06, "loss": 0.5202, "step": 322 }, { "epoch": 0.1788647900323027, "grad_norm": 0.4591318964958191, "learning_rate": 5.959409594095942e-06, "loss": 0.5161, "step": 323 }, { "epoch": 0.17941855099215506, "grad_norm": 0.40617313981056213, "learning_rate": 5.977859778597786e-06, "loss": 0.517, "step": 324 }, { "epoch": 0.17997231195200739, "grad_norm": 0.43235328793525696, "learning_rate": 5.996309963099632e-06, "loss": 0.4998, "step": 325 }, { "epoch": 0.1805260729118597, "grad_norm": 0.440584659576416, "learning_rate": 6.014760147601476e-06, "loss": 0.4973, "step": 326 }, { "epoch": 0.18107983387171203, "grad_norm": 0.43344974517822266, "learning_rate": 6.033210332103322e-06, "loss": 0.5396, "step": 327 }, { "epoch": 0.18163359483156438, "grad_norm": 0.3824906647205353, "learning_rate": 6.0516605166051664e-06, "loss": 0.4928, "step": 328 }, { "epoch": 0.1821873557914167, "grad_norm": 0.4700775444507599, "learning_rate": 6.070110701107012e-06, "loss": 0.5211, "step": 329 }, { "epoch": 0.18274111675126903, "grad_norm": 0.3927646577358246, "learning_rate": 6.0885608856088565e-06, "loss": 0.504, "step": 330 }, { "epoch": 0.18329487771112138, "grad_norm": 0.40194234251976013, "learning_rate": 6.107011070110702e-06, "loss": 0.5049, "step": 331 }, { "epoch": 0.1838486386709737, "grad_norm": 0.4240693151950836, "learning_rate": 6.125461254612547e-06, "loss": 0.5109, "step": 332 }, { "epoch": 0.18440239963082602, "grad_norm": 0.45079389214515686, "learning_rate": 6.143911439114392e-06, "loss": 0.5128, "step": 333 }, { "epoch": 0.18495616059067835, "grad_norm": 0.4387524127960205, "learning_rate": 6.162361623616237e-06, "loss": 0.5376, "step": 334 }, { "epoch": 0.1855099215505307, "grad_norm": 0.5068715214729309, "learning_rate": 6.180811808118081e-06, "loss": 0.5311, "step": 335 }, { "epoch": 0.18606368251038302, "grad_norm": 0.4453672766685486, "learning_rate": 6.199261992619927e-06, "loss": 0.5233, "step": 336 }, { "epoch": 0.18661744347023534, "grad_norm": 0.44551554322242737, "learning_rate": 6.217712177121772e-06, "loss": 0.5233, "step": 337 }, { "epoch": 0.18717120443008767, "grad_norm": 0.43874621391296387, "learning_rate": 6.236162361623617e-06, "loss": 0.4896, "step": 338 }, { "epoch": 0.18772496538994002, "grad_norm": 0.4488990902900696, "learning_rate": 6.2546125461254615e-06, "loss": 0.5261, "step": 339 }, { "epoch": 0.18827872634979234, "grad_norm": 0.411893367767334, "learning_rate": 6.273062730627307e-06, "loss": 0.5082, "step": 340 }, { "epoch": 0.18883248730964466, "grad_norm": 0.39121779799461365, "learning_rate": 6.291512915129152e-06, "loss": 0.5383, "step": 341 }, { "epoch": 0.189386248269497, "grad_norm": 0.4125840365886688, "learning_rate": 6.309963099630997e-06, "loss": 0.5162, "step": 342 }, { "epoch": 0.18994000922934934, "grad_norm": 0.48363980650901794, "learning_rate": 6.328413284132842e-06, "loss": 0.5201, "step": 343 }, { "epoch": 0.19049377018920166, "grad_norm": 0.38090208172798157, "learning_rate": 6.346863468634686e-06, "loss": 0.5292, "step": 344 }, { "epoch": 0.19104753114905398, "grad_norm": 0.5109505653381348, "learning_rate": 6.3653136531365325e-06, "loss": 0.5253, "step": 345 }, { "epoch": 0.19160129210890633, "grad_norm": 0.4313194751739502, "learning_rate": 6.383763837638377e-06, "loss": 0.5145, "step": 346 }, { "epoch": 0.19215505306875866, "grad_norm": 0.454925537109375, "learning_rate": 6.402214022140222e-06, "loss": 0.5471, "step": 347 }, { "epoch": 0.19270881402861098, "grad_norm": 0.39554843306541443, "learning_rate": 6.420664206642066e-06, "loss": 0.5088, "step": 348 }, { "epoch": 0.1932625749884633, "grad_norm": 0.44809383153915405, "learning_rate": 6.439114391143913e-06, "loss": 0.5117, "step": 349 }, { "epoch": 0.19381633594831565, "grad_norm": 0.41464078426361084, "learning_rate": 6.457564575645757e-06, "loss": 0.5044, "step": 350 }, { "epoch": 0.19437009690816798, "grad_norm": 0.435523122549057, "learning_rate": 6.476014760147602e-06, "loss": 0.5126, "step": 351 }, { "epoch": 0.1949238578680203, "grad_norm": 0.44859009981155396, "learning_rate": 6.4944649446494466e-06, "loss": 0.5152, "step": 352 }, { "epoch": 0.19547761882787262, "grad_norm": 0.41101738810539246, "learning_rate": 6.512915129151291e-06, "loss": 0.5247, "step": 353 }, { "epoch": 0.19603137978772497, "grad_norm": 0.5181945562362671, "learning_rate": 6.5313653136531375e-06, "loss": 0.5356, "step": 354 }, { "epoch": 0.1965851407475773, "grad_norm": 0.41367462277412415, "learning_rate": 6.549815498154982e-06, "loss": 0.5011, "step": 355 }, { "epoch": 0.19713890170742962, "grad_norm": 0.4302010238170624, "learning_rate": 6.568265682656827e-06, "loss": 0.512, "step": 356 }, { "epoch": 0.19769266266728194, "grad_norm": 0.41611048579216003, "learning_rate": 6.586715867158671e-06, "loss": 0.5159, "step": 357 }, { "epoch": 0.1982464236271343, "grad_norm": 0.46597275137901306, "learning_rate": 6.605166051660518e-06, "loss": 0.4973, "step": 358 }, { "epoch": 0.19880018458698662, "grad_norm": 0.37931185960769653, "learning_rate": 6.623616236162362e-06, "loss": 0.5258, "step": 359 }, { "epoch": 0.19935394554683894, "grad_norm": 0.4379553198814392, "learning_rate": 6.642066420664207e-06, "loss": 0.5061, "step": 360 }, { "epoch": 0.1999077065066913, "grad_norm": 0.4163099527359009, "learning_rate": 6.660516605166052e-06, "loss": 0.5135, "step": 361 }, { "epoch": 0.2004614674665436, "grad_norm": 0.4104025065898895, "learning_rate": 6.678966789667897e-06, "loss": 0.522, "step": 362 }, { "epoch": 0.20101522842639594, "grad_norm": 0.45592719316482544, "learning_rate": 6.697416974169742e-06, "loss": 0.5317, "step": 363 }, { "epoch": 0.20156898938624826, "grad_norm": 0.38983023166656494, "learning_rate": 6.715867158671587e-06, "loss": 0.5322, "step": 364 }, { "epoch": 0.2021227503461006, "grad_norm": 0.4917503297328949, "learning_rate": 6.7343173431734325e-06, "loss": 0.5149, "step": 365 }, { "epoch": 0.20267651130595293, "grad_norm": 0.41790664196014404, "learning_rate": 6.752767527675277e-06, "loss": 0.5159, "step": 366 }, { "epoch": 0.20323027226580526, "grad_norm": 0.42450517416000366, "learning_rate": 6.771217712177123e-06, "loss": 0.4857, "step": 367 }, { "epoch": 0.20378403322565758, "grad_norm": 0.4047274887561798, "learning_rate": 6.789667896678967e-06, "loss": 0.4972, "step": 368 }, { "epoch": 0.20433779418550993, "grad_norm": 0.4356374442577362, "learning_rate": 6.808118081180813e-06, "loss": 0.5314, "step": 369 }, { "epoch": 0.20489155514536225, "grad_norm": 0.3555612862110138, "learning_rate": 6.826568265682657e-06, "loss": 0.5092, "step": 370 }, { "epoch": 0.20544531610521458, "grad_norm": 0.43003618717193604, "learning_rate": 6.845018450184503e-06, "loss": 0.5196, "step": 371 }, { "epoch": 0.2059990770650669, "grad_norm": 0.4151500463485718, "learning_rate": 6.863468634686347e-06, "loss": 0.5489, "step": 372 }, { "epoch": 0.20655283802491925, "grad_norm": 0.38493767380714417, "learning_rate": 6.881918819188193e-06, "loss": 0.5139, "step": 373 }, { "epoch": 0.20710659898477157, "grad_norm": 0.40599051117897034, "learning_rate": 6.9003690036900374e-06, "loss": 0.4989, "step": 374 }, { "epoch": 0.2076603599446239, "grad_norm": 0.424696683883667, "learning_rate": 6.918819188191882e-06, "loss": 0.5172, "step": 375 }, { "epoch": 0.20821412090447625, "grad_norm": 0.3935846984386444, "learning_rate": 6.9372693726937275e-06, "loss": 0.5178, "step": 376 }, { "epoch": 0.20876788186432857, "grad_norm": 0.45079800486564636, "learning_rate": 6.955719557195573e-06, "loss": 0.5089, "step": 377 }, { "epoch": 0.2093216428241809, "grad_norm": 0.4188171625137329, "learning_rate": 6.974169741697418e-06, "loss": 0.5134, "step": 378 }, { "epoch": 0.20987540378403322, "grad_norm": 0.5188259482383728, "learning_rate": 6.992619926199262e-06, "loss": 0.4975, "step": 379 }, { "epoch": 0.21042916474388557, "grad_norm": 0.43447771668434143, "learning_rate": 7.011070110701108e-06, "loss": 0.505, "step": 380 }, { "epoch": 0.2109829257037379, "grad_norm": 0.5638190507888794, "learning_rate": 7.029520295202953e-06, "loss": 0.512, "step": 381 }, { "epoch": 0.2115366866635902, "grad_norm": 0.4414464235305786, "learning_rate": 7.047970479704798e-06, "loss": 0.5114, "step": 382 }, { "epoch": 0.21209044762344254, "grad_norm": 0.4516310691833496, "learning_rate": 7.066420664206642e-06, "loss": 0.5304, "step": 383 }, { "epoch": 0.21264420858329489, "grad_norm": 0.524346113204956, "learning_rate": 7.084870848708487e-06, "loss": 0.5323, "step": 384 }, { "epoch": 0.2131979695431472, "grad_norm": 0.39934155344963074, "learning_rate": 7.103321033210333e-06, "loss": 0.5275, "step": 385 }, { "epoch": 0.21375173050299953, "grad_norm": 0.5000519156455994, "learning_rate": 7.121771217712178e-06, "loss": 0.5152, "step": 386 }, { "epoch": 0.21430549146285188, "grad_norm": 0.3891535997390747, "learning_rate": 7.1402214022140225e-06, "loss": 0.502, "step": 387 }, { "epoch": 0.2148592524227042, "grad_norm": 0.45847296714782715, "learning_rate": 7.158671586715867e-06, "loss": 0.532, "step": 388 }, { "epoch": 0.21541301338255653, "grad_norm": 0.407610148191452, "learning_rate": 7.1771217712177135e-06, "loss": 0.5168, "step": 389 }, { "epoch": 0.21596677434240885, "grad_norm": 0.507230281829834, "learning_rate": 7.195571955719558e-06, "loss": 0.507, "step": 390 }, { "epoch": 0.2165205353022612, "grad_norm": 0.44229981303215027, "learning_rate": 7.214022140221403e-06, "loss": 0.527, "step": 391 }, { "epoch": 0.21707429626211353, "grad_norm": 0.47391772270202637, "learning_rate": 7.232472324723247e-06, "loss": 0.5297, "step": 392 }, { "epoch": 0.21762805722196585, "grad_norm": 0.431949257850647, "learning_rate": 7.250922509225092e-06, "loss": 0.5156, "step": 393 }, { "epoch": 0.21818181818181817, "grad_norm": 0.4833814203739166, "learning_rate": 7.269372693726938e-06, "loss": 0.5316, "step": 394 }, { "epoch": 0.21873557914167052, "grad_norm": 0.4197404384613037, "learning_rate": 7.287822878228783e-06, "loss": 0.4954, "step": 395 }, { "epoch": 0.21928934010152284, "grad_norm": 0.48287898302078247, "learning_rate": 7.3062730627306275e-06, "loss": 0.5156, "step": 396 }, { "epoch": 0.21984310106137517, "grad_norm": 0.39599236845970154, "learning_rate": 7.324723247232473e-06, "loss": 0.4999, "step": 397 }, { "epoch": 0.2203968620212275, "grad_norm": 0.5040798187255859, "learning_rate": 7.343173431734318e-06, "loss": 0.5038, "step": 398 }, { "epoch": 0.22095062298107984, "grad_norm": 0.4108046293258667, "learning_rate": 7.361623616236163e-06, "loss": 0.4794, "step": 399 }, { "epoch": 0.22150438394093216, "grad_norm": 0.5476970672607422, "learning_rate": 7.380073800738008e-06, "loss": 0.5111, "step": 400 }, { "epoch": 0.2220581449007845, "grad_norm": 0.4698829650878906, "learning_rate": 7.398523985239853e-06, "loss": 0.4989, "step": 401 }, { "epoch": 0.22261190586063684, "grad_norm": 0.5354872941970825, "learning_rate": 7.416974169741698e-06, "loss": 0.4923, "step": 402 }, { "epoch": 0.22316566682048916, "grad_norm": 0.47291409969329834, "learning_rate": 7.435424354243543e-06, "loss": 0.4903, "step": 403 }, { "epoch": 0.22371942778034148, "grad_norm": 0.4179520010948181, "learning_rate": 7.453874538745388e-06, "loss": 0.5193, "step": 404 }, { "epoch": 0.2242731887401938, "grad_norm": 0.48921412229537964, "learning_rate": 7.472324723247233e-06, "loss": 0.5357, "step": 405 }, { "epoch": 0.22482694970004616, "grad_norm": 0.4155014753341675, "learning_rate": 7.490774907749078e-06, "loss": 0.5209, "step": 406 }, { "epoch": 0.22538071065989848, "grad_norm": 0.4507433772087097, "learning_rate": 7.509225092250923e-06, "loss": 0.4846, "step": 407 }, { "epoch": 0.2259344716197508, "grad_norm": 0.4087013900279999, "learning_rate": 7.527675276752768e-06, "loss": 0.5111, "step": 408 }, { "epoch": 0.22648823257960313, "grad_norm": 0.4164627194404602, "learning_rate": 7.5461254612546134e-06, "loss": 0.4836, "step": 409 }, { "epoch": 0.22704199353945548, "grad_norm": 0.49661538004875183, "learning_rate": 7.564575645756458e-06, "loss": 0.5513, "step": 410 }, { "epoch": 0.2275957544993078, "grad_norm": 0.4001203775405884, "learning_rate": 7.5830258302583035e-06, "loss": 0.4839, "step": 411 }, { "epoch": 0.22814951545916012, "grad_norm": 0.498279869556427, "learning_rate": 7.601476014760148e-06, "loss": 0.4535, "step": 412 }, { "epoch": 0.22870327641901245, "grad_norm": 0.43975603580474854, "learning_rate": 7.619926199261994e-06, "loss": 0.5387, "step": 413 }, { "epoch": 0.2292570373788648, "grad_norm": 0.4552561640739441, "learning_rate": 7.638376383763837e-06, "loss": 0.4892, "step": 414 }, { "epoch": 0.22981079833871712, "grad_norm": 0.4423806667327881, "learning_rate": 7.656826568265684e-06, "loss": 0.5075, "step": 415 }, { "epoch": 0.23036455929856944, "grad_norm": 0.46483469009399414, "learning_rate": 7.675276752767528e-06, "loss": 0.5058, "step": 416 }, { "epoch": 0.2309183202584218, "grad_norm": 0.4591600298881531, "learning_rate": 7.693726937269373e-06, "loss": 0.5252, "step": 417 }, { "epoch": 0.23147208121827412, "grad_norm": 0.4875623881816864, "learning_rate": 7.712177121771218e-06, "loss": 0.513, "step": 418 }, { "epoch": 0.23202584217812644, "grad_norm": 0.4506981372833252, "learning_rate": 7.730627306273064e-06, "loss": 0.5123, "step": 419 }, { "epoch": 0.23257960313797876, "grad_norm": 0.41740280389785767, "learning_rate": 7.749077490774908e-06, "loss": 0.4891, "step": 420 }, { "epoch": 0.23313336409783111, "grad_norm": 0.4461287558078766, "learning_rate": 7.767527675276753e-06, "loss": 0.5074, "step": 421 }, { "epoch": 0.23368712505768344, "grad_norm": 0.4380086362361908, "learning_rate": 7.785977859778598e-06, "loss": 0.4985, "step": 422 }, { "epoch": 0.23424088601753576, "grad_norm": 0.4485791325569153, "learning_rate": 7.804428044280444e-06, "loss": 0.5274, "step": 423 }, { "epoch": 0.23479464697738808, "grad_norm": 0.4031726121902466, "learning_rate": 7.822878228782289e-06, "loss": 0.503, "step": 424 }, { "epoch": 0.23534840793724043, "grad_norm": 0.4360205829143524, "learning_rate": 7.841328413284133e-06, "loss": 0.5253, "step": 425 }, { "epoch": 0.23590216889709276, "grad_norm": 0.4125031530857086, "learning_rate": 7.859778597785978e-06, "loss": 0.505, "step": 426 }, { "epoch": 0.23645592985694508, "grad_norm": 0.3649049699306488, "learning_rate": 7.878228782287824e-06, "loss": 0.4868, "step": 427 }, { "epoch": 0.2370096908167974, "grad_norm": 0.47657695412635803, "learning_rate": 7.896678966789669e-06, "loss": 0.517, "step": 428 }, { "epoch": 0.23756345177664975, "grad_norm": 0.40756967663764954, "learning_rate": 7.915129151291513e-06, "loss": 0.5099, "step": 429 }, { "epoch": 0.23811721273650208, "grad_norm": 0.4556548595428467, "learning_rate": 7.933579335793358e-06, "loss": 0.4885, "step": 430 }, { "epoch": 0.2386709736963544, "grad_norm": 0.3784118890762329, "learning_rate": 7.952029520295204e-06, "loss": 0.5183, "step": 431 }, { "epoch": 0.23922473465620675, "grad_norm": 0.4185868501663208, "learning_rate": 7.970479704797049e-06, "loss": 0.491, "step": 432 }, { "epoch": 0.23977849561605907, "grad_norm": 0.37511178851127625, "learning_rate": 7.988929889298894e-06, "loss": 0.4838, "step": 433 }, { "epoch": 0.2403322565759114, "grad_norm": 0.4160645604133606, "learning_rate": 8.007380073800738e-06, "loss": 0.4843, "step": 434 }, { "epoch": 0.24088601753576372, "grad_norm": 0.3915506899356842, "learning_rate": 8.025830258302584e-06, "loss": 0.5239, "step": 435 }, { "epoch": 0.24143977849561607, "grad_norm": 0.39466390013694763, "learning_rate": 8.044280442804429e-06, "loss": 0.5101, "step": 436 }, { "epoch": 0.2419935394554684, "grad_norm": 0.42799046635627747, "learning_rate": 8.062730627306274e-06, "loss": 0.5139, "step": 437 }, { "epoch": 0.24254730041532072, "grad_norm": 0.4251141846179962, "learning_rate": 8.08118081180812e-06, "loss": 0.5104, "step": 438 }, { "epoch": 0.24310106137517304, "grad_norm": 0.3934801518917084, "learning_rate": 8.099630996309965e-06, "loss": 0.4824, "step": 439 }, { "epoch": 0.2436548223350254, "grad_norm": 0.4069085419178009, "learning_rate": 8.11808118081181e-06, "loss": 0.4971, "step": 440 }, { "epoch": 0.2442085832948777, "grad_norm": 0.41914284229278564, "learning_rate": 8.136531365313654e-06, "loss": 0.5171, "step": 441 }, { "epoch": 0.24476234425473004, "grad_norm": 0.4145299792289734, "learning_rate": 8.154981549815498e-06, "loss": 0.4965, "step": 442 }, { "epoch": 0.24531610521458236, "grad_norm": 0.43759840726852417, "learning_rate": 8.173431734317345e-06, "loss": 0.5169, "step": 443 }, { "epoch": 0.2458698661744347, "grad_norm": 0.4000653624534607, "learning_rate": 8.19188191881919e-06, "loss": 0.5099, "step": 444 }, { "epoch": 0.24642362713428703, "grad_norm": 0.410854697227478, "learning_rate": 8.210332103321034e-06, "loss": 0.4736, "step": 445 }, { "epoch": 0.24697738809413936, "grad_norm": 0.3896498680114746, "learning_rate": 8.228782287822879e-06, "loss": 0.513, "step": 446 }, { "epoch": 0.2475311490539917, "grad_norm": 0.47462084889411926, "learning_rate": 8.247232472324725e-06, "loss": 0.5037, "step": 447 }, { "epoch": 0.24808491001384403, "grad_norm": 0.43424078822135925, "learning_rate": 8.26568265682657e-06, "loss": 0.4973, "step": 448 }, { "epoch": 0.24863867097369635, "grad_norm": 0.45642998814582825, "learning_rate": 8.284132841328414e-06, "loss": 0.5059, "step": 449 }, { "epoch": 0.24919243193354867, "grad_norm": 0.44411173462867737, "learning_rate": 8.302583025830259e-06, "loss": 0.4692, "step": 450 }, { "epoch": 0.24974619289340103, "grad_norm": 0.46147945523262024, "learning_rate": 8.321033210332105e-06, "loss": 0.54, "step": 451 }, { "epoch": 0.2502999538532533, "grad_norm": 0.4033389985561371, "learning_rate": 8.33948339483395e-06, "loss": 0.4889, "step": 452 }, { "epoch": 0.2508537148131057, "grad_norm": 0.46802592277526855, "learning_rate": 8.357933579335794e-06, "loss": 0.5219, "step": 453 }, { "epoch": 0.251407475772958, "grad_norm": 0.39269745349884033, "learning_rate": 8.376383763837639e-06, "loss": 0.5186, "step": 454 }, { "epoch": 0.25196123673281035, "grad_norm": 0.48569628596305847, "learning_rate": 8.394833948339484e-06, "loss": 0.5204, "step": 455 }, { "epoch": 0.25251499769266267, "grad_norm": 0.3741343319416046, "learning_rate": 8.41328413284133e-06, "loss": 0.4847, "step": 456 }, { "epoch": 0.253068758652515, "grad_norm": 0.5201795101165771, "learning_rate": 8.431734317343175e-06, "loss": 0.5177, "step": 457 }, { "epoch": 0.2536225196123673, "grad_norm": 0.41616517305374146, "learning_rate": 8.450184501845019e-06, "loss": 0.4897, "step": 458 }, { "epoch": 0.25417628057221964, "grad_norm": 0.4093177914619446, "learning_rate": 8.468634686346864e-06, "loss": 0.4779, "step": 459 }, { "epoch": 0.254730041532072, "grad_norm": 0.4024859368801117, "learning_rate": 8.48708487084871e-06, "loss": 0.4918, "step": 460 }, { "epoch": 0.25528380249192434, "grad_norm": 0.4327787458896637, "learning_rate": 8.505535055350555e-06, "loss": 0.5113, "step": 461 }, { "epoch": 0.25583756345177666, "grad_norm": 0.4263727366924286, "learning_rate": 8.5239852398524e-06, "loss": 0.4923, "step": 462 }, { "epoch": 0.256391324411629, "grad_norm": 0.4086902141571045, "learning_rate": 8.542435424354244e-06, "loss": 0.5028, "step": 463 }, { "epoch": 0.2569450853714813, "grad_norm": 0.4590758681297302, "learning_rate": 8.560885608856089e-06, "loss": 0.5114, "step": 464 }, { "epoch": 0.25749884633133363, "grad_norm": 0.4627887010574341, "learning_rate": 8.579335793357935e-06, "loss": 0.4692, "step": 465 }, { "epoch": 0.25805260729118595, "grad_norm": 0.4890758693218231, "learning_rate": 8.59778597785978e-06, "loss": 0.4933, "step": 466 }, { "epoch": 0.2586063682510383, "grad_norm": 0.5391367673873901, "learning_rate": 8.616236162361624e-06, "loss": 0.5139, "step": 467 }, { "epoch": 0.25916012921089066, "grad_norm": 0.47109338641166687, "learning_rate": 8.634686346863469e-06, "loss": 0.5139, "step": 468 }, { "epoch": 0.259713890170743, "grad_norm": 0.40664923191070557, "learning_rate": 8.653136531365315e-06, "loss": 0.4936, "step": 469 }, { "epoch": 0.2602676511305953, "grad_norm": 0.5189430713653564, "learning_rate": 8.67158671586716e-06, "loss": 0.4958, "step": 470 }, { "epoch": 0.2608214120904476, "grad_norm": 0.5668074488639832, "learning_rate": 8.690036900369004e-06, "loss": 0.5167, "step": 471 }, { "epoch": 0.26137517305029995, "grad_norm": 0.4698292315006256, "learning_rate": 8.708487084870849e-06, "loss": 0.4913, "step": 472 }, { "epoch": 0.26192893401015227, "grad_norm": 0.5675038695335388, "learning_rate": 8.726937269372693e-06, "loss": 0.5248, "step": 473 }, { "epoch": 0.2624826949700046, "grad_norm": 0.46395087242126465, "learning_rate": 8.74538745387454e-06, "loss": 0.51, "step": 474 }, { "epoch": 0.26303645592985697, "grad_norm": 0.5022774934768677, "learning_rate": 8.763837638376384e-06, "loss": 0.5119, "step": 475 }, { "epoch": 0.2635902168897093, "grad_norm": 0.4828384220600128, "learning_rate": 8.782287822878229e-06, "loss": 0.5339, "step": 476 }, { "epoch": 0.2641439778495616, "grad_norm": 0.4435117840766907, "learning_rate": 8.800738007380074e-06, "loss": 0.5013, "step": 477 }, { "epoch": 0.26469773880941394, "grad_norm": 0.4178343713283539, "learning_rate": 8.81918819188192e-06, "loss": 0.5115, "step": 478 }, { "epoch": 0.26525149976926626, "grad_norm": 0.4391827881336212, "learning_rate": 8.837638376383765e-06, "loss": 0.4951, "step": 479 }, { "epoch": 0.2658052607291186, "grad_norm": 0.38893136382102966, "learning_rate": 8.85608856088561e-06, "loss": 0.4951, "step": 480 }, { "epoch": 0.2663590216889709, "grad_norm": 0.45563074946403503, "learning_rate": 8.874538745387454e-06, "loss": 0.497, "step": 481 }, { "epoch": 0.26691278264882323, "grad_norm": 0.4171759784221649, "learning_rate": 8.892988929889298e-06, "loss": 0.4675, "step": 482 }, { "epoch": 0.2674665436086756, "grad_norm": 0.4795277416706085, "learning_rate": 8.911439114391145e-06, "loss": 0.5156, "step": 483 }, { "epoch": 0.26802030456852793, "grad_norm": 0.47675830125808716, "learning_rate": 8.92988929889299e-06, "loss": 0.5102, "step": 484 }, { "epoch": 0.26857406552838026, "grad_norm": 0.48056280612945557, "learning_rate": 8.948339483394834e-06, "loss": 0.5201, "step": 485 }, { "epoch": 0.2691278264882326, "grad_norm": 0.4314589202404022, "learning_rate": 8.966789667896679e-06, "loss": 0.4857, "step": 486 }, { "epoch": 0.2696815874480849, "grad_norm": 0.4170920252799988, "learning_rate": 8.985239852398525e-06, "loss": 0.5001, "step": 487 }, { "epoch": 0.2702353484079372, "grad_norm": 0.4651602804660797, "learning_rate": 9.00369003690037e-06, "loss": 0.4893, "step": 488 }, { "epoch": 0.27078910936778955, "grad_norm": 0.4666293263435364, "learning_rate": 9.022140221402214e-06, "loss": 0.4955, "step": 489 }, { "epoch": 0.2713428703276419, "grad_norm": 0.40739041566848755, "learning_rate": 9.040590405904059e-06, "loss": 0.5093, "step": 490 }, { "epoch": 0.27189663128749425, "grad_norm": 0.4839509129524231, "learning_rate": 9.059040590405905e-06, "loss": 0.4694, "step": 491 }, { "epoch": 0.2724503922473466, "grad_norm": 0.4284436106681824, "learning_rate": 9.07749077490775e-06, "loss": 0.4855, "step": 492 }, { "epoch": 0.2730041532071989, "grad_norm": 0.490162193775177, "learning_rate": 9.095940959409594e-06, "loss": 0.4973, "step": 493 }, { "epoch": 0.2735579141670512, "grad_norm": 0.46302422881126404, "learning_rate": 9.114391143911439e-06, "loss": 0.5165, "step": 494 }, { "epoch": 0.27411167512690354, "grad_norm": 0.4318949282169342, "learning_rate": 9.132841328413285e-06, "loss": 0.5074, "step": 495 }, { "epoch": 0.27466543608675587, "grad_norm": 0.45687544345855713, "learning_rate": 9.15129151291513e-06, "loss": 0.5116, "step": 496 }, { "epoch": 0.2752191970466082, "grad_norm": 0.45748987793922424, "learning_rate": 9.169741697416974e-06, "loss": 0.5227, "step": 497 }, { "epoch": 0.27577295800646057, "grad_norm": 0.46636664867401123, "learning_rate": 9.188191881918819e-06, "loss": 0.5377, "step": 498 }, { "epoch": 0.2763267189663129, "grad_norm": 0.39476945996284485, "learning_rate": 9.206642066420665e-06, "loss": 0.502, "step": 499 }, { "epoch": 0.2768804799261652, "grad_norm": 0.4684544503688812, "learning_rate": 9.22509225092251e-06, "loss": 0.4862, "step": 500 }, { "epoch": 0.27743424088601754, "grad_norm": 0.4879494607448578, "learning_rate": 9.243542435424355e-06, "loss": 0.4976, "step": 501 }, { "epoch": 0.27798800184586986, "grad_norm": 0.4374106228351593, "learning_rate": 9.2619926199262e-06, "loss": 0.4846, "step": 502 }, { "epoch": 0.2785417628057222, "grad_norm": 0.4415509104728699, "learning_rate": 9.280442804428046e-06, "loss": 0.5013, "step": 503 }, { "epoch": 0.2790955237655745, "grad_norm": 0.43773356080055237, "learning_rate": 9.29889298892989e-06, "loss": 0.4797, "step": 504 }, { "epoch": 0.2796492847254269, "grad_norm": 0.4449767768383026, "learning_rate": 9.317343173431735e-06, "loss": 0.508, "step": 505 }, { "epoch": 0.2802030456852792, "grad_norm": 0.3970884680747986, "learning_rate": 9.33579335793358e-06, "loss": 0.5171, "step": 506 }, { "epoch": 0.28075680664513153, "grad_norm": 0.5044116973876953, "learning_rate": 9.354243542435426e-06, "loss": 0.5065, "step": 507 }, { "epoch": 0.28131056760498385, "grad_norm": 0.40697282552719116, "learning_rate": 9.37269372693727e-06, "loss": 0.4961, "step": 508 }, { "epoch": 0.2818643285648362, "grad_norm": 0.42604711651802063, "learning_rate": 9.391143911439115e-06, "loss": 0.4936, "step": 509 }, { "epoch": 0.2824180895246885, "grad_norm": 0.4927046298980713, "learning_rate": 9.409594095940961e-06, "loss": 0.5338, "step": 510 }, { "epoch": 0.2829718504845408, "grad_norm": 0.3916417360305786, "learning_rate": 9.428044280442806e-06, "loss": 0.4946, "step": 511 }, { "epoch": 0.28352561144439314, "grad_norm": 0.43391603231430054, "learning_rate": 9.44649446494465e-06, "loss": 0.4938, "step": 512 }, { "epoch": 0.2840793724042455, "grad_norm": 0.47632765769958496, "learning_rate": 9.464944649446495e-06, "loss": 0.5136, "step": 513 }, { "epoch": 0.28463313336409785, "grad_norm": 0.41228413581848145, "learning_rate": 9.483394833948341e-06, "loss": 0.4996, "step": 514 }, { "epoch": 0.28518689432395017, "grad_norm": 0.4289187788963318, "learning_rate": 9.501845018450186e-06, "loss": 0.4902, "step": 515 }, { "epoch": 0.2857406552838025, "grad_norm": 0.4631807208061218, "learning_rate": 9.52029520295203e-06, "loss": 0.5079, "step": 516 }, { "epoch": 0.2862944162436548, "grad_norm": 0.414673775434494, "learning_rate": 9.538745387453875e-06, "loss": 0.4999, "step": 517 }, { "epoch": 0.28684817720350714, "grad_norm": 0.4362106919288635, "learning_rate": 9.557195571955722e-06, "loss": 0.5228, "step": 518 }, { "epoch": 0.28740193816335946, "grad_norm": 0.406316876411438, "learning_rate": 9.575645756457566e-06, "loss": 0.5104, "step": 519 }, { "epoch": 0.28795569912321184, "grad_norm": 0.45621275901794434, "learning_rate": 9.59409594095941e-06, "loss": 0.5088, "step": 520 }, { "epoch": 0.28850946008306416, "grad_norm": 0.4162580370903015, "learning_rate": 9.612546125461255e-06, "loss": 0.5059, "step": 521 }, { "epoch": 0.2890632210429165, "grad_norm": 0.4104117751121521, "learning_rate": 9.6309963099631e-06, "loss": 0.5287, "step": 522 }, { "epoch": 0.2896169820027688, "grad_norm": 0.4105607867240906, "learning_rate": 9.649446494464946e-06, "loss": 0.4884, "step": 523 }, { "epoch": 0.29017074296262113, "grad_norm": 0.43861809372901917, "learning_rate": 9.667896678966791e-06, "loss": 0.4919, "step": 524 }, { "epoch": 0.29072450392247345, "grad_norm": 0.3604418933391571, "learning_rate": 9.686346863468636e-06, "loss": 0.4833, "step": 525 }, { "epoch": 0.2912782648823258, "grad_norm": 0.4441142678260803, "learning_rate": 9.70479704797048e-06, "loss": 0.4997, "step": 526 }, { "epoch": 0.2918320258421781, "grad_norm": 0.41122907400131226, "learning_rate": 9.723247232472326e-06, "loss": 0.4848, "step": 527 }, { "epoch": 0.2923857868020305, "grad_norm": 0.4718834459781647, "learning_rate": 9.741697416974171e-06, "loss": 0.4957, "step": 528 }, { "epoch": 0.2929395477618828, "grad_norm": 0.4327332675457001, "learning_rate": 9.760147601476016e-06, "loss": 0.4994, "step": 529 }, { "epoch": 0.2934933087217351, "grad_norm": 0.48327648639678955, "learning_rate": 9.77859778597786e-06, "loss": 0.5172, "step": 530 }, { "epoch": 0.29404706968158745, "grad_norm": 0.4000093936920166, "learning_rate": 9.797047970479707e-06, "loss": 0.4897, "step": 531 }, { "epoch": 0.29460083064143977, "grad_norm": 0.4861779510974884, "learning_rate": 9.815498154981551e-06, "loss": 0.5215, "step": 532 }, { "epoch": 0.2951545916012921, "grad_norm": 0.3969844877719879, "learning_rate": 9.833948339483396e-06, "loss": 0.5088, "step": 533 }, { "epoch": 0.2957083525611444, "grad_norm": 0.4267115890979767, "learning_rate": 9.85239852398524e-06, "loss": 0.5014, "step": 534 }, { "epoch": 0.2962621135209968, "grad_norm": 0.3732796609401703, "learning_rate": 9.870848708487085e-06, "loss": 0.4876, "step": 535 }, { "epoch": 0.2968158744808491, "grad_norm": 0.4313034415245056, "learning_rate": 9.889298892988931e-06, "loss": 0.4991, "step": 536 }, { "epoch": 0.29736963544070144, "grad_norm": 0.4756830930709839, "learning_rate": 9.907749077490776e-06, "loss": 0.5153, "step": 537 }, { "epoch": 0.29792339640055376, "grad_norm": 0.4193405210971832, "learning_rate": 9.92619926199262e-06, "loss": 0.5105, "step": 538 }, { "epoch": 0.2984771573604061, "grad_norm": 0.47097155451774597, "learning_rate": 9.944649446494465e-06, "loss": 0.4947, "step": 539 }, { "epoch": 0.2990309183202584, "grad_norm": 0.5028030872344971, "learning_rate": 9.963099630996312e-06, "loss": 0.5124, "step": 540 }, { "epoch": 0.29958467928011073, "grad_norm": 0.4200828969478607, "learning_rate": 9.981549815498156e-06, "loss": 0.524, "step": 541 }, { "epoch": 0.30013844023996306, "grad_norm": 0.4470821022987366, "learning_rate": 1e-05, "loss": 0.5055, "step": 542 }, { "epoch": 0.30069220119981543, "grad_norm": 0.45730268955230713, "learning_rate": 9.999998960924952e-06, "loss": 0.5086, "step": 543 }, { "epoch": 0.30124596215966776, "grad_norm": 0.46103113889694214, "learning_rate": 9.999995843700237e-06, "loss": 0.5166, "step": 544 }, { "epoch": 0.3017997231195201, "grad_norm": 0.4381064176559448, "learning_rate": 9.999990648327153e-06, "loss": 0.5131, "step": 545 }, { "epoch": 0.3023534840793724, "grad_norm": 0.44501760601997375, "learning_rate": 9.999983374807859e-06, "loss": 0.5037, "step": 546 }, { "epoch": 0.3029072450392247, "grad_norm": 0.4123047888278961, "learning_rate": 9.999974023145375e-06, "loss": 0.5103, "step": 547 }, { "epoch": 0.30346100599907705, "grad_norm": 0.39611852169036865, "learning_rate": 9.999962593343593e-06, "loss": 0.5023, "step": 548 }, { "epoch": 0.30401476695892937, "grad_norm": 0.386686772108078, "learning_rate": 9.999949085407258e-06, "loss": 0.4739, "step": 549 }, { "epoch": 0.30456852791878175, "grad_norm": 0.48049017786979675, "learning_rate": 9.99993349934199e-06, "loss": 0.5165, "step": 550 }, { "epoch": 0.3051222888786341, "grad_norm": 0.4115730822086334, "learning_rate": 9.999915835154262e-06, "loss": 0.4874, "step": 551 }, { "epoch": 0.3056760498384864, "grad_norm": 0.4582667350769043, "learning_rate": 9.99989609285142e-06, "loss": 0.5216, "step": 552 }, { "epoch": 0.3062298107983387, "grad_norm": 0.4611881375312805, "learning_rate": 9.999874272441665e-06, "loss": 0.5157, "step": 553 }, { "epoch": 0.30678357175819104, "grad_norm": 0.4481215178966522, "learning_rate": 9.99985037393407e-06, "loss": 0.5068, "step": 554 }, { "epoch": 0.30733733271804337, "grad_norm": 0.4264558255672455, "learning_rate": 9.999824397338567e-06, "loss": 0.5028, "step": 555 }, { "epoch": 0.3078910936778957, "grad_norm": 0.42919620871543884, "learning_rate": 9.999796342665953e-06, "loss": 0.4941, "step": 556 }, { "epoch": 0.308444854637748, "grad_norm": 0.4522802233695984, "learning_rate": 9.999766209927886e-06, "loss": 0.5029, "step": 557 }, { "epoch": 0.3089986155976004, "grad_norm": 0.4787648618221283, "learning_rate": 9.999733999136892e-06, "loss": 0.5015, "step": 558 }, { "epoch": 0.3095523765574527, "grad_norm": 0.4384936988353729, "learning_rate": 9.999699710306358e-06, "loss": 0.4661, "step": 559 }, { "epoch": 0.31010613751730504, "grad_norm": 0.4574987292289734, "learning_rate": 9.999663343450536e-06, "loss": 0.4871, "step": 560 }, { "epoch": 0.31065989847715736, "grad_norm": 0.432535856962204, "learning_rate": 9.999624898584542e-06, "loss": 0.4887, "step": 561 }, { "epoch": 0.3112136594370097, "grad_norm": 0.42569294571876526, "learning_rate": 9.999584375724353e-06, "loss": 0.4778, "step": 562 }, { "epoch": 0.311767420396862, "grad_norm": 0.44739648699760437, "learning_rate": 9.999541774886815e-06, "loss": 0.5275, "step": 563 }, { "epoch": 0.31232118135671433, "grad_norm": 0.43573272228240967, "learning_rate": 9.999497096089628e-06, "loss": 0.5027, "step": 564 }, { "epoch": 0.3128749423165667, "grad_norm": 0.41702544689178467, "learning_rate": 9.999450339351368e-06, "loss": 0.5018, "step": 565 }, { "epoch": 0.31342870327641903, "grad_norm": 0.4418914020061493, "learning_rate": 9.999401504691463e-06, "loss": 0.4894, "step": 566 }, { "epoch": 0.31398246423627135, "grad_norm": 0.4346746802330017, "learning_rate": 9.999350592130218e-06, "loss": 0.5171, "step": 567 }, { "epoch": 0.3145362251961237, "grad_norm": 0.43304702639579773, "learning_rate": 9.999297601688785e-06, "loss": 0.4978, "step": 568 }, { "epoch": 0.315089986155976, "grad_norm": 0.431048721075058, "learning_rate": 9.999242533389192e-06, "loss": 0.513, "step": 569 }, { "epoch": 0.3156437471158283, "grad_norm": 0.4568383991718292, "learning_rate": 9.99918538725433e-06, "loss": 0.4806, "step": 570 }, { "epoch": 0.31619750807568064, "grad_norm": 0.41392019391059875, "learning_rate": 9.999126163307945e-06, "loss": 0.4839, "step": 571 }, { "epoch": 0.31675126903553297, "grad_norm": 0.43631383776664734, "learning_rate": 9.999064861574659e-06, "loss": 0.4803, "step": 572 }, { "epoch": 0.31730502999538535, "grad_norm": 0.39564868807792664, "learning_rate": 9.999001482079943e-06, "loss": 0.5007, "step": 573 }, { "epoch": 0.31785879095523767, "grad_norm": 0.40202629566192627, "learning_rate": 9.998936024850148e-06, "loss": 0.469, "step": 574 }, { "epoch": 0.31841255191509, "grad_norm": 0.4100625216960907, "learning_rate": 9.998868489912472e-06, "loss": 0.4941, "step": 575 }, { "epoch": 0.3189663128749423, "grad_norm": 0.4644562900066376, "learning_rate": 9.998798877294991e-06, "loss": 0.4914, "step": 576 }, { "epoch": 0.31952007383479464, "grad_norm": 0.3963070511817932, "learning_rate": 9.998727187026633e-06, "loss": 0.5003, "step": 577 }, { "epoch": 0.32007383479464696, "grad_norm": 0.3899221122264862, "learning_rate": 9.998653419137196e-06, "loss": 0.5159, "step": 578 }, { "epoch": 0.3206275957544993, "grad_norm": 0.4510885179042816, "learning_rate": 9.998577573657344e-06, "loss": 0.4863, "step": 579 }, { "epoch": 0.32118135671435166, "grad_norm": 0.4622509479522705, "learning_rate": 9.998499650618595e-06, "loss": 0.4752, "step": 580 }, { "epoch": 0.321735117674204, "grad_norm": 0.38509705662727356, "learning_rate": 9.998419650053339e-06, "loss": 0.4932, "step": 581 }, { "epoch": 0.3222888786340563, "grad_norm": 0.48007437586784363, "learning_rate": 9.998337571994827e-06, "loss": 0.4976, "step": 582 }, { "epoch": 0.32284263959390863, "grad_norm": 0.41753271222114563, "learning_rate": 9.998253416477173e-06, "loss": 0.5102, "step": 583 }, { "epoch": 0.32339640055376095, "grad_norm": 0.5796026587486267, "learning_rate": 9.998167183535355e-06, "loss": 0.4744, "step": 584 }, { "epoch": 0.3239501615136133, "grad_norm": 0.41997990012168884, "learning_rate": 9.99807887320521e-06, "loss": 0.501, "step": 585 }, { "epoch": 0.3245039224734656, "grad_norm": 0.5596761703491211, "learning_rate": 9.997988485523448e-06, "loss": 0.5007, "step": 586 }, { "epoch": 0.325057683433318, "grad_norm": 0.5405199527740479, "learning_rate": 9.997896020527634e-06, "loss": 0.5041, "step": 587 }, { "epoch": 0.3256114443931703, "grad_norm": 0.46261096000671387, "learning_rate": 9.997801478256199e-06, "loss": 0.4808, "step": 588 }, { "epoch": 0.3261652053530226, "grad_norm": 0.5463973879814148, "learning_rate": 9.997704858748438e-06, "loss": 0.4986, "step": 589 }, { "epoch": 0.32671896631287495, "grad_norm": 0.36433112621307373, "learning_rate": 9.997606162044508e-06, "loss": 0.4725, "step": 590 }, { "epoch": 0.32727272727272727, "grad_norm": 0.5191019773483276, "learning_rate": 9.997505388185434e-06, "loss": 0.4827, "step": 591 }, { "epoch": 0.3278264882325796, "grad_norm": 0.4364435374736786, "learning_rate": 9.997402537213097e-06, "loss": 0.5132, "step": 592 }, { "epoch": 0.3283802491924319, "grad_norm": 0.5169992446899414, "learning_rate": 9.997297609170244e-06, "loss": 0.5188, "step": 593 }, { "epoch": 0.32893401015228424, "grad_norm": 0.47668445110321045, "learning_rate": 9.99719060410049e-06, "loss": 0.5168, "step": 594 }, { "epoch": 0.3294877711121366, "grad_norm": 0.47450095415115356, "learning_rate": 9.997081522048309e-06, "loss": 0.4636, "step": 595 }, { "epoch": 0.33004153207198894, "grad_norm": 0.5263452529907227, "learning_rate": 9.996970363059035e-06, "loss": 0.4854, "step": 596 }, { "epoch": 0.33059529303184126, "grad_norm": 0.4111824631690979, "learning_rate": 9.996857127178875e-06, "loss": 0.4887, "step": 597 }, { "epoch": 0.3311490539916936, "grad_norm": 0.4888688921928406, "learning_rate": 9.996741814454887e-06, "loss": 0.5039, "step": 598 }, { "epoch": 0.3317028149515459, "grad_norm": 0.4543573558330536, "learning_rate": 9.996624424935002e-06, "loss": 0.4676, "step": 599 }, { "epoch": 0.33225657591139823, "grad_norm": 0.43162915110588074, "learning_rate": 9.99650495866801e-06, "loss": 0.4713, "step": 600 }, { "epoch": 0.33281033687125056, "grad_norm": 0.4799591600894928, "learning_rate": 9.996383415703565e-06, "loss": 0.4794, "step": 601 }, { "epoch": 0.33336409783110293, "grad_norm": 0.42775675654411316, "learning_rate": 9.996259796092183e-06, "loss": 0.5256, "step": 602 }, { "epoch": 0.33391785879095526, "grad_norm": 0.4614792466163635, "learning_rate": 9.996134099885244e-06, "loss": 0.4638, "step": 603 }, { "epoch": 0.3344716197508076, "grad_norm": 0.3941904604434967, "learning_rate": 9.996006327134992e-06, "loss": 0.4988, "step": 604 }, { "epoch": 0.3350253807106599, "grad_norm": 0.4777986407279968, "learning_rate": 9.995876477894533e-06, "loss": 0.4798, "step": 605 }, { "epoch": 0.3355791416705122, "grad_norm": 0.4468982517719269, "learning_rate": 9.995744552217836e-06, "loss": 0.4878, "step": 606 }, { "epoch": 0.33613290263036455, "grad_norm": 0.4928005635738373, "learning_rate": 9.995610550159733e-06, "loss": 0.5058, "step": 607 }, { "epoch": 0.3366866635902169, "grad_norm": 0.46784788370132446, "learning_rate": 9.995474471775922e-06, "loss": 0.4889, "step": 608 }, { "epoch": 0.3372404245500692, "grad_norm": 0.46869224309921265, "learning_rate": 9.995336317122956e-06, "loss": 0.5047, "step": 609 }, { "epoch": 0.3377941855099216, "grad_norm": 0.46808090806007385, "learning_rate": 9.99519608625826e-06, "loss": 0.4804, "step": 610 }, { "epoch": 0.3383479464697739, "grad_norm": 0.4167005121707916, "learning_rate": 9.995053779240118e-06, "loss": 0.4783, "step": 611 }, { "epoch": 0.3389017074296262, "grad_norm": 0.5025443434715271, "learning_rate": 9.994909396127675e-06, "loss": 0.4631, "step": 612 }, { "epoch": 0.33945546838947854, "grad_norm": 0.43069401383399963, "learning_rate": 9.994762936980944e-06, "loss": 0.5224, "step": 613 }, { "epoch": 0.34000922934933087, "grad_norm": 0.5125929713249207, "learning_rate": 9.994614401860793e-06, "loss": 0.5291, "step": 614 }, { "epoch": 0.3405629903091832, "grad_norm": 0.4868583083152771, "learning_rate": 9.994463790828965e-06, "loss": 0.4965, "step": 615 }, { "epoch": 0.3411167512690355, "grad_norm": 0.4669782817363739, "learning_rate": 9.994311103948051e-06, "loss": 0.46, "step": 616 }, { "epoch": 0.3416705122288879, "grad_norm": 0.4520213305950165, "learning_rate": 9.994156341281517e-06, "loss": 0.4855, "step": 617 }, { "epoch": 0.3422242731887402, "grad_norm": 0.560319185256958, "learning_rate": 9.993999502893685e-06, "loss": 0.485, "step": 618 }, { "epoch": 0.34277803414859254, "grad_norm": 0.5351271629333496, "learning_rate": 9.993840588849743e-06, "loss": 0.5178, "step": 619 }, { "epoch": 0.34333179510844486, "grad_norm": 0.5197262763977051, "learning_rate": 9.993679599215738e-06, "loss": 0.5019, "step": 620 }, { "epoch": 0.3438855560682972, "grad_norm": 0.4786757230758667, "learning_rate": 9.993516534058585e-06, "loss": 0.4645, "step": 621 }, { "epoch": 0.3444393170281495, "grad_norm": 0.4562654495239258, "learning_rate": 9.993351393446058e-06, "loss": 0.5012, "step": 622 }, { "epoch": 0.34499307798800183, "grad_norm": 0.46101251244544983, "learning_rate": 9.993184177446795e-06, "loss": 0.4653, "step": 623 }, { "epoch": 0.34554683894785415, "grad_norm": 0.5018407702445984, "learning_rate": 9.993014886130293e-06, "loss": 0.5029, "step": 624 }, { "epoch": 0.34610059990770653, "grad_norm": 0.42612800002098083, "learning_rate": 9.992843519566918e-06, "loss": 0.5064, "step": 625 }, { "epoch": 0.34665436086755885, "grad_norm": 0.4830513894557953, "learning_rate": 9.99267007782789e-06, "loss": 0.4814, "step": 626 }, { "epoch": 0.3472081218274112, "grad_norm": 0.45402246713638306, "learning_rate": 9.992494560985305e-06, "loss": 0.4706, "step": 627 }, { "epoch": 0.3477618827872635, "grad_norm": 0.4601718783378601, "learning_rate": 9.992316969112106e-06, "loss": 0.4906, "step": 628 }, { "epoch": 0.3483156437471158, "grad_norm": 0.484438419342041, "learning_rate": 9.992137302282108e-06, "loss": 0.4951, "step": 629 }, { "epoch": 0.34886940470696814, "grad_norm": 0.5233283042907715, "learning_rate": 9.991955560569985e-06, "loss": 0.5287, "step": 630 }, { "epoch": 0.34942316566682047, "grad_norm": 0.4269018769264221, "learning_rate": 9.991771744051275e-06, "loss": 0.489, "step": 631 }, { "epoch": 0.34997692662667285, "grad_norm": 0.4956077039241791, "learning_rate": 9.991585852802378e-06, "loss": 0.4992, "step": 632 }, { "epoch": 0.35053068758652517, "grad_norm": 0.44928738474845886, "learning_rate": 9.991397886900556e-06, "loss": 0.4905, "step": 633 }, { "epoch": 0.3510844485463775, "grad_norm": 0.399374395608902, "learning_rate": 9.991207846423933e-06, "loss": 0.4849, "step": 634 }, { "epoch": 0.3516382095062298, "grad_norm": 0.44912010431289673, "learning_rate": 9.991015731451496e-06, "loss": 0.4914, "step": 635 }, { "epoch": 0.35219197046608214, "grad_norm": 0.4500245153903961, "learning_rate": 9.990821542063093e-06, "loss": 0.4791, "step": 636 }, { "epoch": 0.35274573142593446, "grad_norm": 0.45403534173965454, "learning_rate": 9.990625278339435e-06, "loss": 0.4994, "step": 637 }, { "epoch": 0.3532994923857868, "grad_norm": 0.45706161856651306, "learning_rate": 9.990426940362095e-06, "loss": 0.491, "step": 638 }, { "epoch": 0.3538532533456391, "grad_norm": 0.45484066009521484, "learning_rate": 9.99022652821351e-06, "loss": 0.5224, "step": 639 }, { "epoch": 0.3544070143054915, "grad_norm": 0.37558048963546753, "learning_rate": 9.990024041976974e-06, "loss": 0.4702, "step": 640 }, { "epoch": 0.3549607752653438, "grad_norm": 0.5365707278251648, "learning_rate": 9.989819481736647e-06, "loss": 0.4746, "step": 641 }, { "epoch": 0.35551453622519613, "grad_norm": 0.4052101969718933, "learning_rate": 9.989612847577553e-06, "loss": 0.4817, "step": 642 }, { "epoch": 0.35606829718504845, "grad_norm": 0.468299001455307, "learning_rate": 9.989404139585575e-06, "loss": 0.4905, "step": 643 }, { "epoch": 0.3566220581449008, "grad_norm": 0.474132239818573, "learning_rate": 9.989193357847456e-06, "loss": 0.4878, "step": 644 }, { "epoch": 0.3571758191047531, "grad_norm": 0.4231448173522949, "learning_rate": 9.988980502450804e-06, "loss": 0.4798, "step": 645 }, { "epoch": 0.3577295800646054, "grad_norm": 0.4780554175376892, "learning_rate": 9.98876557348409e-06, "loss": 0.5075, "step": 646 }, { "epoch": 0.3582833410244578, "grad_norm": 0.418031245470047, "learning_rate": 9.988548571036642e-06, "loss": 0.5118, "step": 647 }, { "epoch": 0.3588371019843101, "grad_norm": 0.46332406997680664, "learning_rate": 9.988329495198654e-06, "loss": 0.4929, "step": 648 }, { "epoch": 0.35939086294416245, "grad_norm": 0.4104249179363251, "learning_rate": 9.988108346061181e-06, "loss": 0.5016, "step": 649 }, { "epoch": 0.35994462390401477, "grad_norm": 0.43490222096443176, "learning_rate": 9.987885123716138e-06, "loss": 0.4847, "step": 650 }, { "epoch": 0.3604983848638671, "grad_norm": 0.47612708806991577, "learning_rate": 9.987659828256306e-06, "loss": 0.4955, "step": 651 }, { "epoch": 0.3610521458237194, "grad_norm": 0.4448936879634857, "learning_rate": 9.987432459775321e-06, "loss": 0.5093, "step": 652 }, { "epoch": 0.36160590678357174, "grad_norm": 0.4985165297985077, "learning_rate": 9.987203018367686e-06, "loss": 0.5008, "step": 653 }, { "epoch": 0.36215966774342406, "grad_norm": 0.40945667028427124, "learning_rate": 9.986971504128764e-06, "loss": 0.4746, "step": 654 }, { "epoch": 0.36271342870327644, "grad_norm": 0.488266259431839, "learning_rate": 9.986737917154778e-06, "loss": 0.4882, "step": 655 }, { "epoch": 0.36326718966312876, "grad_norm": 0.3653155267238617, "learning_rate": 9.986502257542813e-06, "loss": 0.4885, "step": 656 }, { "epoch": 0.3638209506229811, "grad_norm": 0.5694623589515686, "learning_rate": 9.986264525390818e-06, "loss": 0.5087, "step": 657 }, { "epoch": 0.3643747115828334, "grad_norm": 0.4475906491279602, "learning_rate": 9.9860247207976e-06, "loss": 0.5115, "step": 658 }, { "epoch": 0.36492847254268573, "grad_norm": 0.4513932764530182, "learning_rate": 9.985782843862833e-06, "loss": 0.5006, "step": 659 }, { "epoch": 0.36548223350253806, "grad_norm": 0.5498886704444885, "learning_rate": 9.985538894687043e-06, "loss": 0.4594, "step": 660 }, { "epoch": 0.3660359944623904, "grad_norm": 0.4216246008872986, "learning_rate": 9.985292873371625e-06, "loss": 0.4831, "step": 661 }, { "epoch": 0.36658975542224276, "grad_norm": 0.4607003927230835, "learning_rate": 9.985044780018835e-06, "loss": 0.4868, "step": 662 }, { "epoch": 0.3671435163820951, "grad_norm": 0.4551302492618561, "learning_rate": 9.984794614731785e-06, "loss": 0.5108, "step": 663 }, { "epoch": 0.3676972773419474, "grad_norm": 0.4263835847377777, "learning_rate": 9.984542377614453e-06, "loss": 0.45, "step": 664 }, { "epoch": 0.3682510383017997, "grad_norm": 0.5184308886528015, "learning_rate": 9.984288068771673e-06, "loss": 0.4997, "step": 665 }, { "epoch": 0.36880479926165205, "grad_norm": 0.39354997873306274, "learning_rate": 9.984031688309145e-06, "loss": 0.4727, "step": 666 }, { "epoch": 0.3693585602215044, "grad_norm": 0.5060971975326538, "learning_rate": 9.983773236333433e-06, "loss": 0.4872, "step": 667 }, { "epoch": 0.3699123211813567, "grad_norm": 0.44652435183525085, "learning_rate": 9.983512712951951e-06, "loss": 0.4999, "step": 668 }, { "epoch": 0.370466082141209, "grad_norm": 0.4136661887168884, "learning_rate": 9.983250118272985e-06, "loss": 0.4827, "step": 669 }, { "epoch": 0.3710198431010614, "grad_norm": 0.4527342617511749, "learning_rate": 9.982985452405672e-06, "loss": 0.4913, "step": 670 }, { "epoch": 0.3715736040609137, "grad_norm": 0.41932180523872375, "learning_rate": 9.98271871546002e-06, "loss": 0.478, "step": 671 }, { "epoch": 0.37212736502076604, "grad_norm": 0.41613322496414185, "learning_rate": 9.982449907546892e-06, "loss": 0.4752, "step": 672 }, { "epoch": 0.37268112598061837, "grad_norm": 0.4363129436969757, "learning_rate": 9.982179028778012e-06, "loss": 0.5001, "step": 673 }, { "epoch": 0.3732348869404707, "grad_norm": 0.38035380840301514, "learning_rate": 9.981906079265963e-06, "loss": 0.4798, "step": 674 }, { "epoch": 0.373788647900323, "grad_norm": 0.4224567711353302, "learning_rate": 9.981631059124195e-06, "loss": 0.4861, "step": 675 }, { "epoch": 0.37434240886017534, "grad_norm": 0.4830438792705536, "learning_rate": 9.981353968467011e-06, "loss": 0.5257, "step": 676 }, { "epoch": 0.3748961698200277, "grad_norm": 0.41082051396369934, "learning_rate": 9.981074807409582e-06, "loss": 0.4908, "step": 677 }, { "epoch": 0.37544993077988004, "grad_norm": 0.40651124715805054, "learning_rate": 9.980793576067933e-06, "loss": 0.4782, "step": 678 }, { "epoch": 0.37600369173973236, "grad_norm": 0.39623332023620605, "learning_rate": 9.980510274558953e-06, "loss": 0.4701, "step": 679 }, { "epoch": 0.3765574526995847, "grad_norm": 0.4489481449127197, "learning_rate": 9.980224903000389e-06, "loss": 0.4908, "step": 680 }, { "epoch": 0.377111213659437, "grad_norm": 0.4004645049571991, "learning_rate": 9.979937461510852e-06, "loss": 0.4776, "step": 681 }, { "epoch": 0.37766497461928933, "grad_norm": 0.4569014310836792, "learning_rate": 9.979647950209812e-06, "loss": 0.5095, "step": 682 }, { "epoch": 0.37821873557914165, "grad_norm": 0.4393109679222107, "learning_rate": 9.979356369217597e-06, "loss": 0.4917, "step": 683 }, { "epoch": 0.378772496538994, "grad_norm": 0.4341040551662445, "learning_rate": 9.979062718655397e-06, "loss": 0.4934, "step": 684 }, { "epoch": 0.37932625749884635, "grad_norm": 0.43620020151138306, "learning_rate": 9.978766998645262e-06, "loss": 0.473, "step": 685 }, { "epoch": 0.3798800184586987, "grad_norm": 0.468645304441452, "learning_rate": 9.978469209310103e-06, "loss": 0.4902, "step": 686 }, { "epoch": 0.380433779418551, "grad_norm": 0.4507107734680176, "learning_rate": 9.978169350773688e-06, "loss": 0.4803, "step": 687 }, { "epoch": 0.3809875403784033, "grad_norm": 0.4300573766231537, "learning_rate": 9.977867423160648e-06, "loss": 0.4997, "step": 688 }, { "epoch": 0.38154130133825565, "grad_norm": 0.43265536427497864, "learning_rate": 9.977563426596476e-06, "loss": 0.488, "step": 689 }, { "epoch": 0.38209506229810797, "grad_norm": 0.5016294717788696, "learning_rate": 9.97725736120752e-06, "loss": 0.4927, "step": 690 }, { "epoch": 0.3826488232579603, "grad_norm": 0.40112683176994324, "learning_rate": 9.976949227120989e-06, "loss": 0.494, "step": 691 }, { "epoch": 0.38320258421781267, "grad_norm": 0.4804500341415405, "learning_rate": 9.976639024464953e-06, "loss": 0.5058, "step": 692 }, { "epoch": 0.383756345177665, "grad_norm": 0.430152952671051, "learning_rate": 9.976326753368345e-06, "loss": 0.4926, "step": 693 }, { "epoch": 0.3843101061375173, "grad_norm": 0.4978470206260681, "learning_rate": 9.976012413960948e-06, "loss": 0.4953, "step": 694 }, { "epoch": 0.38486386709736964, "grad_norm": 0.4628553092479706, "learning_rate": 9.975696006373417e-06, "loss": 0.4851, "step": 695 }, { "epoch": 0.38541762805722196, "grad_norm": 0.47951310873031616, "learning_rate": 9.975377530737256e-06, "loss": 0.4826, "step": 696 }, { "epoch": 0.3859713890170743, "grad_norm": 0.47239360213279724, "learning_rate": 9.975056987184837e-06, "loss": 0.4634, "step": 697 }, { "epoch": 0.3865251499769266, "grad_norm": 0.47593340277671814, "learning_rate": 9.974734375849382e-06, "loss": 0.4675, "step": 698 }, { "epoch": 0.38707891093677893, "grad_norm": 0.3690137565135956, "learning_rate": 9.974409696864983e-06, "loss": 0.4756, "step": 699 }, { "epoch": 0.3876326718966313, "grad_norm": 0.4830561578273773, "learning_rate": 9.974082950366587e-06, "loss": 0.4945, "step": 700 }, { "epoch": 0.38818643285648363, "grad_norm": 0.37991344928741455, "learning_rate": 9.973754136489995e-06, "loss": 0.4643, "step": 701 }, { "epoch": 0.38874019381633595, "grad_norm": 0.42852863669395447, "learning_rate": 9.973423255371875e-06, "loss": 0.5033, "step": 702 }, { "epoch": 0.3892939547761883, "grad_norm": 0.4650701880455017, "learning_rate": 9.97309030714975e-06, "loss": 0.4916, "step": 703 }, { "epoch": 0.3898477157360406, "grad_norm": 0.44632941484451294, "learning_rate": 9.972755291962003e-06, "loss": 0.4778, "step": 704 }, { "epoch": 0.3904014766958929, "grad_norm": 0.5014118552207947, "learning_rate": 9.972418209947879e-06, "loss": 0.4954, "step": 705 }, { "epoch": 0.39095523765574525, "grad_norm": 0.4999413788318634, "learning_rate": 9.972079061247477e-06, "loss": 0.489, "step": 706 }, { "epoch": 0.3915089986155976, "grad_norm": 0.5270202159881592, "learning_rate": 9.971737846001757e-06, "loss": 0.502, "step": 707 }, { "epoch": 0.39206275957544995, "grad_norm": 0.5036186575889587, "learning_rate": 9.97139456435254e-06, "loss": 0.4709, "step": 708 }, { "epoch": 0.39261652053530227, "grad_norm": 0.5798619985580444, "learning_rate": 9.9710492164425e-06, "loss": 0.4958, "step": 709 }, { "epoch": 0.3931702814951546, "grad_norm": 0.42804670333862305, "learning_rate": 9.970701802415182e-06, "loss": 0.4763, "step": 710 }, { "epoch": 0.3937240424550069, "grad_norm": 0.582588791847229, "learning_rate": 9.970352322414974e-06, "loss": 0.5289, "step": 711 }, { "epoch": 0.39427780341485924, "grad_norm": 0.45862504839897156, "learning_rate": 9.970000776587134e-06, "loss": 0.4839, "step": 712 }, { "epoch": 0.39483156437471156, "grad_norm": 0.5426108241081238, "learning_rate": 9.969647165077776e-06, "loss": 0.496, "step": 713 }, { "epoch": 0.3953853253345639, "grad_norm": 0.4305201470851898, "learning_rate": 9.969291488033867e-06, "loss": 0.5197, "step": 714 }, { "epoch": 0.39593908629441626, "grad_norm": 0.514569103717804, "learning_rate": 9.968933745603243e-06, "loss": 0.4883, "step": 715 }, { "epoch": 0.3964928472542686, "grad_norm": 0.42893746495246887, "learning_rate": 9.968573937934588e-06, "loss": 0.4694, "step": 716 }, { "epoch": 0.3970466082141209, "grad_norm": 0.51752769947052, "learning_rate": 9.968212065177449e-06, "loss": 0.4701, "step": 717 }, { "epoch": 0.39760036917397323, "grad_norm": 0.43449971079826355, "learning_rate": 9.967848127482234e-06, "loss": 0.4971, "step": 718 }, { "epoch": 0.39815413013382556, "grad_norm": 0.5628939270973206, "learning_rate": 9.967482125000204e-06, "loss": 0.4964, "step": 719 }, { "epoch": 0.3987078910936779, "grad_norm": 0.5420982837677002, "learning_rate": 9.967114057883482e-06, "loss": 0.4759, "step": 720 }, { "epoch": 0.3992616520535302, "grad_norm": 0.5550535917282104, "learning_rate": 9.966743926285048e-06, "loss": 0.4726, "step": 721 }, { "epoch": 0.3998154130133826, "grad_norm": 0.43458104133605957, "learning_rate": 9.966371730358737e-06, "loss": 0.4733, "step": 722 }, { "epoch": 0.4003691739732349, "grad_norm": 0.530353307723999, "learning_rate": 9.965997470259249e-06, "loss": 0.4916, "step": 723 }, { "epoch": 0.4009229349330872, "grad_norm": 0.369856595993042, "learning_rate": 9.965621146142134e-06, "loss": 0.4776, "step": 724 }, { "epoch": 0.40147669589293955, "grad_norm": 0.44323259592056274, "learning_rate": 9.965242758163806e-06, "loss": 0.4911, "step": 725 }, { "epoch": 0.4020304568527919, "grad_norm": 0.4158749580383301, "learning_rate": 9.964862306481536e-06, "loss": 0.4829, "step": 726 }, { "epoch": 0.4025842178126442, "grad_norm": 0.5062821507453918, "learning_rate": 9.964479791253446e-06, "loss": 0.4774, "step": 727 }, { "epoch": 0.4031379787724965, "grad_norm": 0.45740821957588196, "learning_rate": 9.964095212638522e-06, "loss": 0.5007, "step": 728 }, { "epoch": 0.40369173973234884, "grad_norm": 0.5239325761795044, "learning_rate": 9.96370857079661e-06, "loss": 0.5033, "step": 729 }, { "epoch": 0.4042455006922012, "grad_norm": 0.5229483842849731, "learning_rate": 9.96331986588841e-06, "loss": 0.4765, "step": 730 }, { "epoch": 0.40479926165205354, "grad_norm": 0.5127926468849182, "learning_rate": 9.962929098075475e-06, "loss": 0.4437, "step": 731 }, { "epoch": 0.40535302261190587, "grad_norm": 0.5349799990653992, "learning_rate": 9.962536267520222e-06, "loss": 0.4817, "step": 732 }, { "epoch": 0.4059067835717582, "grad_norm": 0.4531018137931824, "learning_rate": 9.962141374385925e-06, "loss": 0.4832, "step": 733 }, { "epoch": 0.4064605445316105, "grad_norm": 0.4675440192222595, "learning_rate": 9.96174441883671e-06, "loss": 0.4755, "step": 734 }, { "epoch": 0.40701430549146284, "grad_norm": 0.4419876039028168, "learning_rate": 9.961345401037565e-06, "loss": 0.5146, "step": 735 }, { "epoch": 0.40756806645131516, "grad_norm": 0.4267847537994385, "learning_rate": 9.960944321154336e-06, "loss": 0.4808, "step": 736 }, { "epoch": 0.40812182741116754, "grad_norm": 0.47326934337615967, "learning_rate": 9.960541179353722e-06, "loss": 0.4804, "step": 737 }, { "epoch": 0.40867558837101986, "grad_norm": 0.41619300842285156, "learning_rate": 9.96013597580328e-06, "loss": 0.4822, "step": 738 }, { "epoch": 0.4092293493308722, "grad_norm": 0.39907920360565186, "learning_rate": 9.959728710671426e-06, "loss": 0.4673, "step": 739 }, { "epoch": 0.4097831102907245, "grad_norm": 0.4305252134799957, "learning_rate": 9.959319384127432e-06, "loss": 0.4887, "step": 740 }, { "epoch": 0.41033687125057683, "grad_norm": 0.4708583950996399, "learning_rate": 9.958907996341425e-06, "loss": 0.5058, "step": 741 }, { "epoch": 0.41089063221042915, "grad_norm": 0.4422723352909088, "learning_rate": 9.95849454748439e-06, "loss": 0.4968, "step": 742 }, { "epoch": 0.4114443931702815, "grad_norm": 0.5180055499076843, "learning_rate": 9.958079037728172e-06, "loss": 0.5109, "step": 743 }, { "epoch": 0.4119981541301338, "grad_norm": 0.37570464611053467, "learning_rate": 9.957661467245466e-06, "loss": 0.4888, "step": 744 }, { "epoch": 0.4125519150899862, "grad_norm": 0.4045150876045227, "learning_rate": 9.957241836209825e-06, "loss": 0.4824, "step": 745 }, { "epoch": 0.4131056760498385, "grad_norm": 0.423125296831131, "learning_rate": 9.956820144795665e-06, "loss": 0.4836, "step": 746 }, { "epoch": 0.4136594370096908, "grad_norm": 0.4376266598701477, "learning_rate": 9.956396393178251e-06, "loss": 0.4654, "step": 747 }, { "epoch": 0.41421319796954315, "grad_norm": 0.40815943479537964, "learning_rate": 9.95597058153371e-06, "loss": 0.4832, "step": 748 }, { "epoch": 0.41476695892939547, "grad_norm": 0.4487222731113434, "learning_rate": 9.955542710039017e-06, "loss": 0.4784, "step": 749 }, { "epoch": 0.4153207198892478, "grad_norm": 0.4021719992160797, "learning_rate": 9.955112778872011e-06, "loss": 0.4769, "step": 750 }, { "epoch": 0.4158744808491001, "grad_norm": 0.4062252640724182, "learning_rate": 9.954680788211385e-06, "loss": 0.5011, "step": 751 }, { "epoch": 0.4164282418089525, "grad_norm": 0.38958245515823364, "learning_rate": 9.954246738236686e-06, "loss": 0.4932, "step": 752 }, { "epoch": 0.4169820027688048, "grad_norm": 0.4295423626899719, "learning_rate": 9.953810629128318e-06, "loss": 0.5308, "step": 753 }, { "epoch": 0.41753576372865714, "grad_norm": 0.40305688977241516, "learning_rate": 9.953372461067542e-06, "loss": 0.4899, "step": 754 }, { "epoch": 0.41808952468850946, "grad_norm": 0.39291995763778687, "learning_rate": 9.952932234236474e-06, "loss": 0.4954, "step": 755 }, { "epoch": 0.4186432856483618, "grad_norm": 0.3621155619621277, "learning_rate": 9.952489948818084e-06, "loss": 0.4884, "step": 756 }, { "epoch": 0.4191970466082141, "grad_norm": 0.3715658187866211, "learning_rate": 9.952045604996202e-06, "loss": 0.4934, "step": 757 }, { "epoch": 0.41975080756806643, "grad_norm": 0.42321643233299255, "learning_rate": 9.951599202955507e-06, "loss": 0.5078, "step": 758 }, { "epoch": 0.4203045685279188, "grad_norm": 0.430503785610199, "learning_rate": 9.951150742881538e-06, "loss": 0.485, "step": 759 }, { "epoch": 0.42085832948777113, "grad_norm": 0.39009761810302734, "learning_rate": 9.95070022496069e-06, "loss": 0.4678, "step": 760 }, { "epoch": 0.42141209044762346, "grad_norm": 0.45248129963874817, "learning_rate": 9.950247649380211e-06, "loss": 0.5053, "step": 761 }, { "epoch": 0.4219658514074758, "grad_norm": 0.3882608115673065, "learning_rate": 9.949793016328203e-06, "loss": 0.4566, "step": 762 }, { "epoch": 0.4225196123673281, "grad_norm": 0.39524534344673157, "learning_rate": 9.94933632599363e-06, "loss": 0.4895, "step": 763 }, { "epoch": 0.4230733733271804, "grad_norm": 0.38324663043022156, "learning_rate": 9.948877578566303e-06, "loss": 0.4984, "step": 764 }, { "epoch": 0.42362713428703275, "grad_norm": 0.43257179856300354, "learning_rate": 9.948416774236891e-06, "loss": 0.5, "step": 765 }, { "epoch": 0.42418089524688507, "grad_norm": 0.4389267861843109, "learning_rate": 9.94795391319692e-06, "loss": 0.4562, "step": 766 }, { "epoch": 0.42473465620673745, "grad_norm": 0.4605172872543335, "learning_rate": 9.947488995638765e-06, "loss": 0.4823, "step": 767 }, { "epoch": 0.42528841716658977, "grad_norm": 0.40570181608200073, "learning_rate": 9.947022021755663e-06, "loss": 0.4918, "step": 768 }, { "epoch": 0.4258421781264421, "grad_norm": 0.38202816247940063, "learning_rate": 9.946552991741702e-06, "loss": 0.4879, "step": 769 }, { "epoch": 0.4263959390862944, "grad_norm": 0.46935856342315674, "learning_rate": 9.946081905791825e-06, "loss": 0.4909, "step": 770 }, { "epoch": 0.42694970004614674, "grad_norm": 0.4655874967575073, "learning_rate": 9.945608764101829e-06, "loss": 0.5274, "step": 771 }, { "epoch": 0.42750346100599906, "grad_norm": 0.3889467120170593, "learning_rate": 9.945133566868366e-06, "loss": 0.4937, "step": 772 }, { "epoch": 0.4280572219658514, "grad_norm": 0.5222169160842896, "learning_rate": 9.944656314288944e-06, "loss": 0.4626, "step": 773 }, { "epoch": 0.42861098292570377, "grad_norm": 0.39404210448265076, "learning_rate": 9.944177006561918e-06, "loss": 0.492, "step": 774 }, { "epoch": 0.4291647438855561, "grad_norm": 0.5179499387741089, "learning_rate": 9.943695643886509e-06, "loss": 0.49, "step": 775 }, { "epoch": 0.4297185048454084, "grad_norm": 0.41033345460891724, "learning_rate": 9.943212226462782e-06, "loss": 0.4765, "step": 776 }, { "epoch": 0.43027226580526073, "grad_norm": 0.42040881514549255, "learning_rate": 9.94272675449166e-06, "loss": 0.4752, "step": 777 }, { "epoch": 0.43082602676511306, "grad_norm": 0.4476858377456665, "learning_rate": 9.94223922817492e-06, "loss": 0.5064, "step": 778 }, { "epoch": 0.4313797877249654, "grad_norm": 0.4600520730018616, "learning_rate": 9.941749647715198e-06, "loss": 0.5004, "step": 779 }, { "epoch": 0.4319335486848177, "grad_norm": 0.47403693199157715, "learning_rate": 9.941258013315969e-06, "loss": 0.4991, "step": 780 }, { "epoch": 0.43248730964467, "grad_norm": 0.39357924461364746, "learning_rate": 9.940764325181577e-06, "loss": 0.4868, "step": 781 }, { "epoch": 0.4330410706045224, "grad_norm": 0.4015486240386963, "learning_rate": 9.940268583517212e-06, "loss": 0.5106, "step": 782 }, { "epoch": 0.4335948315643747, "grad_norm": 0.4455430209636688, "learning_rate": 9.939770788528921e-06, "loss": 0.4851, "step": 783 }, { "epoch": 0.43414859252422705, "grad_norm": 0.3998722732067108, "learning_rate": 9.9392709404236e-06, "loss": 0.4551, "step": 784 }, { "epoch": 0.4347023534840794, "grad_norm": 0.38475504517555237, "learning_rate": 9.938769039409e-06, "loss": 0.4847, "step": 785 }, { "epoch": 0.4352561144439317, "grad_norm": 0.43542107939720154, "learning_rate": 9.93826508569373e-06, "loss": 0.4799, "step": 786 }, { "epoch": 0.435809875403784, "grad_norm": 0.414852112531662, "learning_rate": 9.937759079487245e-06, "loss": 0.4843, "step": 787 }, { "epoch": 0.43636363636363634, "grad_norm": 0.4013381600379944, "learning_rate": 9.937251020999858e-06, "loss": 0.4947, "step": 788 }, { "epoch": 0.4369173973234887, "grad_norm": 0.4529331624507904, "learning_rate": 9.936740910442732e-06, "loss": 0.4794, "step": 789 }, { "epoch": 0.43747115828334104, "grad_norm": 0.40163007378578186, "learning_rate": 9.936228748027886e-06, "loss": 0.4772, "step": 790 }, { "epoch": 0.43802491924319337, "grad_norm": 0.5136457085609436, "learning_rate": 9.935714533968188e-06, "loss": 0.4748, "step": 791 }, { "epoch": 0.4385786802030457, "grad_norm": 0.4144260585308075, "learning_rate": 9.935198268477364e-06, "loss": 0.4898, "step": 792 }, { "epoch": 0.439132441162898, "grad_norm": 0.4251055121421814, "learning_rate": 9.934679951769987e-06, "loss": 0.4911, "step": 793 }, { "epoch": 0.43968620212275034, "grad_norm": 0.4364539384841919, "learning_rate": 9.934159584061484e-06, "loss": 0.5139, "step": 794 }, { "epoch": 0.44023996308260266, "grad_norm": 0.43218812346458435, "learning_rate": 9.933637165568138e-06, "loss": 0.5037, "step": 795 }, { "epoch": 0.440793724042455, "grad_norm": 0.379526287317276, "learning_rate": 9.93311269650708e-06, "loss": 0.4952, "step": 796 }, { "epoch": 0.44134748500230736, "grad_norm": 0.41769886016845703, "learning_rate": 9.932586177096297e-06, "loss": 0.4978, "step": 797 }, { "epoch": 0.4419012459621597, "grad_norm": 0.3608790636062622, "learning_rate": 9.932057607554624e-06, "loss": 0.462, "step": 798 }, { "epoch": 0.442455006922012, "grad_norm": 0.408361554145813, "learning_rate": 9.931526988101752e-06, "loss": 0.4762, "step": 799 }, { "epoch": 0.44300876788186433, "grad_norm": 0.42425084114074707, "learning_rate": 9.93099431895822e-06, "loss": 0.4804, "step": 800 }, { "epoch": 0.44356252884171665, "grad_norm": 0.37080118060112, "learning_rate": 9.930459600345425e-06, "loss": 0.4908, "step": 801 }, { "epoch": 0.444116289801569, "grad_norm": 0.41937291622161865, "learning_rate": 9.92992283248561e-06, "loss": 0.4941, "step": 802 }, { "epoch": 0.4446700507614213, "grad_norm": 0.46354940533638, "learning_rate": 9.929384015601872e-06, "loss": 0.5055, "step": 803 }, { "epoch": 0.4452238117212737, "grad_norm": 0.38866958022117615, "learning_rate": 9.928843149918159e-06, "loss": 0.4549, "step": 804 }, { "epoch": 0.445777572681126, "grad_norm": 0.5625099539756775, "learning_rate": 9.928300235659272e-06, "loss": 0.4611, "step": 805 }, { "epoch": 0.4463313336409783, "grad_norm": 0.41278544068336487, "learning_rate": 9.927755273050863e-06, "loss": 0.4824, "step": 806 }, { "epoch": 0.44688509460083065, "grad_norm": 0.4545687139034271, "learning_rate": 9.927208262319431e-06, "loss": 0.4655, "step": 807 }, { "epoch": 0.44743885556068297, "grad_norm": 0.48922082781791687, "learning_rate": 9.926659203692336e-06, "loss": 0.4809, "step": 808 }, { "epoch": 0.4479926165205353, "grad_norm": 0.4419759213924408, "learning_rate": 9.926108097397777e-06, "loss": 0.4947, "step": 809 }, { "epoch": 0.4485463774803876, "grad_norm": 0.3896903395652771, "learning_rate": 9.925554943664815e-06, "loss": 0.4539, "step": 810 }, { "epoch": 0.44910013844023994, "grad_norm": 0.5475815534591675, "learning_rate": 9.924999742723355e-06, "loss": 0.4917, "step": 811 }, { "epoch": 0.4496538994000923, "grad_norm": 0.45117703080177307, "learning_rate": 9.924442494804157e-06, "loss": 0.4779, "step": 812 }, { "epoch": 0.45020766035994464, "grad_norm": 0.44480082392692566, "learning_rate": 9.923883200138829e-06, "loss": 0.4663, "step": 813 }, { "epoch": 0.45076142131979696, "grad_norm": 0.5667861700057983, "learning_rate": 9.92332185895983e-06, "loss": 0.4568, "step": 814 }, { "epoch": 0.4513151822796493, "grad_norm": 0.4547692537307739, "learning_rate": 9.922758471500471e-06, "loss": 0.4811, "step": 815 }, { "epoch": 0.4518689432395016, "grad_norm": 0.4028513729572296, "learning_rate": 9.922193037994912e-06, "loss": 0.4775, "step": 816 }, { "epoch": 0.45242270419935393, "grad_norm": 0.5037835836410522, "learning_rate": 9.921625558678165e-06, "loss": 0.4869, "step": 817 }, { "epoch": 0.45297646515920625, "grad_norm": 0.4722062945365906, "learning_rate": 9.921056033786091e-06, "loss": 0.4852, "step": 818 }, { "epoch": 0.45353022611905863, "grad_norm": 0.4528781473636627, "learning_rate": 9.920484463555401e-06, "loss": 0.5173, "step": 819 }, { "epoch": 0.45408398707891096, "grad_norm": 0.5305238366127014, "learning_rate": 9.919910848223659e-06, "loss": 0.5113, "step": 820 }, { "epoch": 0.4546377480387633, "grad_norm": 0.41338300704956055, "learning_rate": 9.919335188029274e-06, "loss": 0.4915, "step": 821 }, { "epoch": 0.4551915089986156, "grad_norm": 0.3910463750362396, "learning_rate": 9.91875748321151e-06, "loss": 0.4791, "step": 822 }, { "epoch": 0.4557452699584679, "grad_norm": 0.46771544218063354, "learning_rate": 9.918177734010476e-06, "loss": 0.4811, "step": 823 }, { "epoch": 0.45629903091832025, "grad_norm": 0.4240265488624573, "learning_rate": 9.917595940667135e-06, "loss": 0.4704, "step": 824 }, { "epoch": 0.45685279187817257, "grad_norm": 0.436620831489563, "learning_rate": 9.917012103423298e-06, "loss": 0.4645, "step": 825 }, { "epoch": 0.4574065528380249, "grad_norm": 0.5118358135223389, "learning_rate": 9.916426222521623e-06, "loss": 0.4734, "step": 826 }, { "epoch": 0.45796031379787727, "grad_norm": 0.39574703574180603, "learning_rate": 9.915838298205623e-06, "loss": 0.5206, "step": 827 }, { "epoch": 0.4585140747577296, "grad_norm": 0.44414225220680237, "learning_rate": 9.915248330719655e-06, "loss": 0.4687, "step": 828 }, { "epoch": 0.4590678357175819, "grad_norm": 0.46896684169769287, "learning_rate": 9.914656320308926e-06, "loss": 0.4847, "step": 829 }, { "epoch": 0.45962159667743424, "grad_norm": 0.37112754583358765, "learning_rate": 9.914062267219496e-06, "loss": 0.5015, "step": 830 }, { "epoch": 0.46017535763728656, "grad_norm": 0.3917914628982544, "learning_rate": 9.91346617169827e-06, "loss": 0.4944, "step": 831 }, { "epoch": 0.4607291185971389, "grad_norm": 0.43364277482032776, "learning_rate": 9.912868033993005e-06, "loss": 0.4976, "step": 832 }, { "epoch": 0.4612828795569912, "grad_norm": 0.40289461612701416, "learning_rate": 9.9122678543523e-06, "loss": 0.4816, "step": 833 }, { "epoch": 0.4618366405168436, "grad_norm": 0.3810864984989166, "learning_rate": 9.911665633025612e-06, "loss": 0.468, "step": 834 }, { "epoch": 0.4623904014766959, "grad_norm": 0.3839159905910492, "learning_rate": 9.911061370263242e-06, "loss": 0.4513, "step": 835 }, { "epoch": 0.46294416243654823, "grad_norm": 0.41530972719192505, "learning_rate": 9.910455066316339e-06, "loss": 0.4865, "step": 836 }, { "epoch": 0.46349792339640056, "grad_norm": 0.3826664090156555, "learning_rate": 9.9098467214369e-06, "loss": 0.4815, "step": 837 }, { "epoch": 0.4640516843562529, "grad_norm": 0.3536207973957062, "learning_rate": 9.909236335877773e-06, "loss": 0.4875, "step": 838 }, { "epoch": 0.4646054453161052, "grad_norm": 0.35841694474220276, "learning_rate": 9.908623909892651e-06, "loss": 0.4787, "step": 839 }, { "epoch": 0.4651592062759575, "grad_norm": 0.40058958530426025, "learning_rate": 9.90800944373608e-06, "loss": 0.4773, "step": 840 }, { "epoch": 0.46571296723580985, "grad_norm": 0.3887956142425537, "learning_rate": 9.907392937663446e-06, "loss": 0.5134, "step": 841 }, { "epoch": 0.46626672819566223, "grad_norm": 0.4068099856376648, "learning_rate": 9.906774391930991e-06, "loss": 0.4996, "step": 842 }, { "epoch": 0.46682048915551455, "grad_norm": 0.39956235885620117, "learning_rate": 9.906153806795799e-06, "loss": 0.4864, "step": 843 }, { "epoch": 0.4673742501153669, "grad_norm": 0.37425103783607483, "learning_rate": 9.905531182515803e-06, "loss": 0.4703, "step": 844 }, { "epoch": 0.4679280110752192, "grad_norm": 0.399742990732193, "learning_rate": 9.90490651934979e-06, "loss": 0.4592, "step": 845 }, { "epoch": 0.4684817720350715, "grad_norm": 0.4181869626045227, "learning_rate": 9.904279817557382e-06, "loss": 0.4966, "step": 846 }, { "epoch": 0.46903553299492384, "grad_norm": 0.4327099621295929, "learning_rate": 9.90365107739906e-06, "loss": 0.4942, "step": 847 }, { "epoch": 0.46958929395477617, "grad_norm": 0.4031676948070526, "learning_rate": 9.903020299136141e-06, "loss": 0.4828, "step": 848 }, { "epoch": 0.47014305491462854, "grad_norm": 0.363918274641037, "learning_rate": 9.902387483030802e-06, "loss": 0.4852, "step": 849 }, { "epoch": 0.47069681587448087, "grad_norm": 0.37986570596694946, "learning_rate": 9.901752629346058e-06, "loss": 0.4991, "step": 850 }, { "epoch": 0.4712505768343332, "grad_norm": 0.3815036714076996, "learning_rate": 9.901115738345774e-06, "loss": 0.4736, "step": 851 }, { "epoch": 0.4718043377941855, "grad_norm": 0.40065857768058777, "learning_rate": 9.900476810294659e-06, "loss": 0.5065, "step": 852 }, { "epoch": 0.47235809875403784, "grad_norm": 0.4268467426300049, "learning_rate": 9.899835845458271e-06, "loss": 0.4538, "step": 853 }, { "epoch": 0.47291185971389016, "grad_norm": 0.40337836742401123, "learning_rate": 9.899192844103016e-06, "loss": 0.4629, "step": 854 }, { "epoch": 0.4734656206737425, "grad_norm": 0.41758668422698975, "learning_rate": 9.898547806496143e-06, "loss": 0.4564, "step": 855 }, { "epoch": 0.4740193816335948, "grad_norm": 0.4044412672519684, "learning_rate": 9.897900732905751e-06, "loss": 0.4863, "step": 856 }, { "epoch": 0.4745731425934472, "grad_norm": 0.36383646726608276, "learning_rate": 9.89725162360078e-06, "loss": 0.4764, "step": 857 }, { "epoch": 0.4751269035532995, "grad_norm": 0.4307420551776886, "learning_rate": 9.896600478851023e-06, "loss": 0.4897, "step": 858 }, { "epoch": 0.47568066451315183, "grad_norm": 0.41352686285972595, "learning_rate": 9.895947298927114e-06, "loss": 0.4725, "step": 859 }, { "epoch": 0.47623442547300415, "grad_norm": 0.39586201310157776, "learning_rate": 9.895292084100532e-06, "loss": 0.4961, "step": 860 }, { "epoch": 0.4767881864328565, "grad_norm": 0.4154324233531952, "learning_rate": 9.894634834643606e-06, "loss": 0.487, "step": 861 }, { "epoch": 0.4773419473927088, "grad_norm": 0.3779523968696594, "learning_rate": 9.893975550829507e-06, "loss": 0.478, "step": 862 }, { "epoch": 0.4778957083525611, "grad_norm": 0.39752647280693054, "learning_rate": 9.893314232932257e-06, "loss": 0.4859, "step": 863 }, { "epoch": 0.4784494693124135, "grad_norm": 0.3547062575817108, "learning_rate": 9.892650881226714e-06, "loss": 0.4909, "step": 864 }, { "epoch": 0.4790032302722658, "grad_norm": 0.42254361510276794, "learning_rate": 9.891985495988592e-06, "loss": 0.4707, "step": 865 }, { "epoch": 0.47955699123211815, "grad_norm": 0.4629381597042084, "learning_rate": 9.891318077494444e-06, "loss": 0.4781, "step": 866 }, { "epoch": 0.48011075219197047, "grad_norm": 0.3880061209201813, "learning_rate": 9.890648626021666e-06, "loss": 0.4559, "step": 867 }, { "epoch": 0.4806645131518228, "grad_norm": 0.47073304653167725, "learning_rate": 9.889977141848505e-06, "loss": 0.4753, "step": 868 }, { "epoch": 0.4812182741116751, "grad_norm": 0.3962337076663971, "learning_rate": 9.889303625254048e-06, "loss": 0.4919, "step": 869 }, { "epoch": 0.48177203507152744, "grad_norm": 0.38412970304489136, "learning_rate": 9.888628076518231e-06, "loss": 0.4663, "step": 870 }, { "epoch": 0.48232579603137976, "grad_norm": 0.42063426971435547, "learning_rate": 9.887950495921832e-06, "loss": 0.4532, "step": 871 }, { "epoch": 0.48287955699123214, "grad_norm": 0.4145756959915161, "learning_rate": 9.887270883746471e-06, "loss": 0.4814, "step": 872 }, { "epoch": 0.48343331795108446, "grad_norm": 0.4072231352329254, "learning_rate": 9.88658924027462e-06, "loss": 0.4805, "step": 873 }, { "epoch": 0.4839870789109368, "grad_norm": 0.44742828607559204, "learning_rate": 9.885905565789586e-06, "loss": 0.4975, "step": 874 }, { "epoch": 0.4845408398707891, "grad_norm": 0.4994361996650696, "learning_rate": 9.885219860575525e-06, "loss": 0.4678, "step": 875 }, { "epoch": 0.48509460083064143, "grad_norm": 0.434209942817688, "learning_rate": 9.88453212491744e-06, "loss": 0.5002, "step": 876 }, { "epoch": 0.48564836179049375, "grad_norm": 0.4521613121032715, "learning_rate": 9.883842359101173e-06, "loss": 0.4966, "step": 877 }, { "epoch": 0.4862021227503461, "grad_norm": 0.520271360874176, "learning_rate": 9.883150563413411e-06, "loss": 0.4801, "step": 878 }, { "epoch": 0.48675588371019846, "grad_norm": 0.41456112265586853, "learning_rate": 9.882456738141685e-06, "loss": 0.4686, "step": 879 }, { "epoch": 0.4873096446700508, "grad_norm": 0.502713143825531, "learning_rate": 9.88176088357437e-06, "loss": 0.4777, "step": 880 }, { "epoch": 0.4878634056299031, "grad_norm": 0.4434688687324524, "learning_rate": 9.881063000000682e-06, "loss": 0.4899, "step": 881 }, { "epoch": 0.4884171665897554, "grad_norm": 0.48114466667175293, "learning_rate": 9.880363087710687e-06, "loss": 0.4626, "step": 882 }, { "epoch": 0.48897092754960775, "grad_norm": 0.49518144130706787, "learning_rate": 9.879661146995285e-06, "loss": 0.4841, "step": 883 }, { "epoch": 0.48952468850946007, "grad_norm": 0.4322006106376648, "learning_rate": 9.878957178146225e-06, "loss": 0.4732, "step": 884 }, { "epoch": 0.4900784494693124, "grad_norm": 0.43942296504974365, "learning_rate": 9.878251181456098e-06, "loss": 0.4576, "step": 885 }, { "epoch": 0.4906322104291647, "grad_norm": 0.4154396653175354, "learning_rate": 9.877543157218337e-06, "loss": 0.4633, "step": 886 }, { "epoch": 0.4911859713890171, "grad_norm": 0.49631446599960327, "learning_rate": 9.876833105727219e-06, "loss": 0.4855, "step": 887 }, { "epoch": 0.4917397323488694, "grad_norm": 0.3947916626930237, "learning_rate": 9.876121027277864e-06, "loss": 0.4774, "step": 888 }, { "epoch": 0.49229349330872174, "grad_norm": 0.3959406614303589, "learning_rate": 9.875406922166228e-06, "loss": 0.4856, "step": 889 }, { "epoch": 0.49284725426857406, "grad_norm": 0.37372249364852905, "learning_rate": 9.87469079068912e-06, "loss": 0.4782, "step": 890 }, { "epoch": 0.4934010152284264, "grad_norm": 0.4108045995235443, "learning_rate": 9.873972633144183e-06, "loss": 0.4895, "step": 891 }, { "epoch": 0.4939547761882787, "grad_norm": 0.40932077169418335, "learning_rate": 9.873252449829906e-06, "loss": 0.4983, "step": 892 }, { "epoch": 0.49450853714813103, "grad_norm": 0.38621556758880615, "learning_rate": 9.872530241045617e-06, "loss": 0.487, "step": 893 }, { "epoch": 0.4950622981079834, "grad_norm": 0.4013294577598572, "learning_rate": 9.871806007091491e-06, "loss": 0.472, "step": 894 }, { "epoch": 0.49561605906783573, "grad_norm": 0.40428489446640015, "learning_rate": 9.871079748268537e-06, "loss": 0.4814, "step": 895 }, { "epoch": 0.49616982002768806, "grad_norm": 0.40264445543289185, "learning_rate": 9.870351464878614e-06, "loss": 0.5019, "step": 896 }, { "epoch": 0.4967235809875404, "grad_norm": 0.44068869948387146, "learning_rate": 9.869621157224416e-06, "loss": 0.4641, "step": 897 }, { "epoch": 0.4972773419473927, "grad_norm": 0.4392201006412506, "learning_rate": 9.868888825609482e-06, "loss": 0.5054, "step": 898 }, { "epoch": 0.497831102907245, "grad_norm": 0.4300096929073334, "learning_rate": 9.868154470338189e-06, "loss": 0.4746, "step": 899 }, { "epoch": 0.49838486386709735, "grad_norm": 0.4665893316268921, "learning_rate": 9.867418091715761e-06, "loss": 0.4898, "step": 900 }, { "epoch": 0.4989386248269497, "grad_norm": 0.5322461128234863, "learning_rate": 9.866679690048254e-06, "loss": 0.4896, "step": 901 }, { "epoch": 0.49949238578680205, "grad_norm": 0.4108174741268158, "learning_rate": 9.865939265642575e-06, "loss": 0.4693, "step": 902 }, { "epoch": 0.5000461467466544, "grad_norm": 0.6025389432907104, "learning_rate": 9.865196818806461e-06, "loss": 0.476, "step": 903 }, { "epoch": 0.5005999077065066, "grad_norm": 0.40068456530570984, "learning_rate": 9.864452349848501e-06, "loss": 0.5122, "step": 904 }, { "epoch": 0.501153668666359, "grad_norm": 0.4258415699005127, "learning_rate": 9.863705859078115e-06, "loss": 0.4818, "step": 905 }, { "epoch": 0.5017074296262114, "grad_norm": 0.38699817657470703, "learning_rate": 9.862957346805568e-06, "loss": 0.4778, "step": 906 }, { "epoch": 0.5022611905860637, "grad_norm": 0.363513708114624, "learning_rate": 9.862206813341966e-06, "loss": 0.4723, "step": 907 }, { "epoch": 0.502814951545916, "grad_norm": 0.3957225978374481, "learning_rate": 9.86145425899925e-06, "loss": 0.4665, "step": 908 }, { "epoch": 0.5033687125057683, "grad_norm": 0.47229206562042236, "learning_rate": 9.860699684090204e-06, "loss": 0.4937, "step": 909 }, { "epoch": 0.5039224734656207, "grad_norm": 0.421001136302948, "learning_rate": 9.859943088928456e-06, "loss": 0.4815, "step": 910 }, { "epoch": 0.504476234425473, "grad_norm": 0.37605518102645874, "learning_rate": 9.859184473828465e-06, "loss": 0.4941, "step": 911 }, { "epoch": 0.5050299953853253, "grad_norm": 0.49998190999031067, "learning_rate": 9.858423839105537e-06, "loss": 0.5077, "step": 912 }, { "epoch": 0.5055837563451777, "grad_norm": 0.3762170672416687, "learning_rate": 9.857661185075815e-06, "loss": 0.4548, "step": 913 }, { "epoch": 0.50613751730503, "grad_norm": 0.44702792167663574, "learning_rate": 9.85689651205628e-06, "loss": 0.5012, "step": 914 }, { "epoch": 0.5066912782648824, "grad_norm": 0.399705708026886, "learning_rate": 9.856129820364752e-06, "loss": 0.4926, "step": 915 }, { "epoch": 0.5072450392247346, "grad_norm": 0.3888434171676636, "learning_rate": 9.855361110319892e-06, "loss": 0.4729, "step": 916 }, { "epoch": 0.507798800184587, "grad_norm": 0.42499759793281555, "learning_rate": 9.8545903822412e-06, "loss": 0.4711, "step": 917 }, { "epoch": 0.5083525611444393, "grad_norm": 0.47658205032348633, "learning_rate": 9.853817636449012e-06, "loss": 0.4801, "step": 918 }, { "epoch": 0.5089063221042917, "grad_norm": 0.3844057619571686, "learning_rate": 9.853042873264506e-06, "loss": 0.4748, "step": 919 }, { "epoch": 0.509460083064144, "grad_norm": 0.3782051205635071, "learning_rate": 9.852266093009695e-06, "loss": 0.4612, "step": 920 }, { "epoch": 0.5100138440239963, "grad_norm": 0.4963882565498352, "learning_rate": 9.851487296007434e-06, "loss": 0.5139, "step": 921 }, { "epoch": 0.5105676049838487, "grad_norm": 0.3985527455806732, "learning_rate": 9.850706482581413e-06, "loss": 0.4631, "step": 922 }, { "epoch": 0.511121365943701, "grad_norm": 0.452970951795578, "learning_rate": 9.849923653056161e-06, "loss": 0.4706, "step": 923 }, { "epoch": 0.5116751269035533, "grad_norm": 0.39081117510795593, "learning_rate": 9.849138807757049e-06, "loss": 0.4695, "step": 924 }, { "epoch": 0.5122288878634056, "grad_norm": 0.4107449948787689, "learning_rate": 9.848351947010277e-06, "loss": 0.4793, "step": 925 }, { "epoch": 0.512782648823258, "grad_norm": 0.453405499458313, "learning_rate": 9.847563071142894e-06, "loss": 0.5035, "step": 926 }, { "epoch": 0.5133364097831102, "grad_norm": 0.42526131868362427, "learning_rate": 9.846772180482774e-06, "loss": 0.4906, "step": 927 }, { "epoch": 0.5138901707429626, "grad_norm": 0.4160592257976532, "learning_rate": 9.84597927535864e-06, "loss": 0.4619, "step": 928 }, { "epoch": 0.514443931702815, "grad_norm": 0.45479467511177063, "learning_rate": 9.845184356100045e-06, "loss": 0.514, "step": 929 }, { "epoch": 0.5149976926626673, "grad_norm": 0.38659000396728516, "learning_rate": 9.844387423037381e-06, "loss": 0.4677, "step": 930 }, { "epoch": 0.5155514536225196, "grad_norm": 0.38427045941352844, "learning_rate": 9.84358847650188e-06, "loss": 0.4364, "step": 931 }, { "epoch": 0.5161052145823719, "grad_norm": 0.5658709406852722, "learning_rate": 9.842787516825602e-06, "loss": 0.4939, "step": 932 }, { "epoch": 0.5166589755422243, "grad_norm": 0.3712390661239624, "learning_rate": 9.841984544341459e-06, "loss": 0.4911, "step": 933 }, { "epoch": 0.5172127365020766, "grad_norm": 0.5703173875808716, "learning_rate": 9.841179559383182e-06, "loss": 0.4585, "step": 934 }, { "epoch": 0.5177664974619289, "grad_norm": 0.4473087191581726, "learning_rate": 9.840372562285351e-06, "loss": 0.495, "step": 935 }, { "epoch": 0.5183202584217813, "grad_norm": 0.44920778274536133, "learning_rate": 9.839563553383377e-06, "loss": 0.4736, "step": 936 }, { "epoch": 0.5188740193816336, "grad_norm": 0.4246853291988373, "learning_rate": 9.83875253301351e-06, "loss": 0.4805, "step": 937 }, { "epoch": 0.519427780341486, "grad_norm": 0.44855910539627075, "learning_rate": 9.837939501512834e-06, "loss": 0.4669, "step": 938 }, { "epoch": 0.5199815413013382, "grad_norm": 0.4346722662448883, "learning_rate": 9.837124459219266e-06, "loss": 0.5136, "step": 939 }, { "epoch": 0.5205353022611906, "grad_norm": 0.4333327114582062, "learning_rate": 9.836307406471569e-06, "loss": 0.4795, "step": 940 }, { "epoch": 0.5210890632210429, "grad_norm": 0.4037315249443054, "learning_rate": 9.835488343609327e-06, "loss": 0.4905, "step": 941 }, { "epoch": 0.5216428241808952, "grad_norm": 0.3863520622253418, "learning_rate": 9.83466727097297e-06, "loss": 0.498, "step": 942 }, { "epoch": 0.5221965851407476, "grad_norm": 0.3797200620174408, "learning_rate": 9.833844188903763e-06, "loss": 0.4679, "step": 943 }, { "epoch": 0.5227503461005999, "grad_norm": 0.4921037554740906, "learning_rate": 9.833019097743801e-06, "loss": 0.5118, "step": 944 }, { "epoch": 0.5233041070604523, "grad_norm": 0.38158801198005676, "learning_rate": 9.832191997836016e-06, "loss": 0.4606, "step": 945 }, { "epoch": 0.5238578680203045, "grad_norm": 0.42654716968536377, "learning_rate": 9.831362889524179e-06, "loss": 0.5044, "step": 946 }, { "epoch": 0.5244116289801569, "grad_norm": 0.427574098110199, "learning_rate": 9.830531773152889e-06, "loss": 0.4982, "step": 947 }, { "epoch": 0.5249653899400092, "grad_norm": 0.3904443383216858, "learning_rate": 9.829698649067583e-06, "loss": 0.4824, "step": 948 }, { "epoch": 0.5255191508998616, "grad_norm": 0.3758675158023834, "learning_rate": 9.828863517614533e-06, "loss": 0.4796, "step": 949 }, { "epoch": 0.5260729118597139, "grad_norm": 0.3632560074329376, "learning_rate": 9.828026379140847e-06, "loss": 0.4615, "step": 950 }, { "epoch": 0.5266266728195662, "grad_norm": 0.4011538624763489, "learning_rate": 9.827187233994462e-06, "loss": 0.4623, "step": 951 }, { "epoch": 0.5271804337794186, "grad_norm": 0.36223411560058594, "learning_rate": 9.826346082524152e-06, "loss": 0.4969, "step": 952 }, { "epoch": 0.5277341947392709, "grad_norm": 0.3913445770740509, "learning_rate": 9.825502925079527e-06, "loss": 0.4835, "step": 953 }, { "epoch": 0.5282879556991232, "grad_norm": 0.40164634585380554, "learning_rate": 9.824657762011026e-06, "loss": 0.4712, "step": 954 }, { "epoch": 0.5288417166589755, "grad_norm": 0.42099103331565857, "learning_rate": 9.823810593669924e-06, "loss": 0.4739, "step": 955 }, { "epoch": 0.5293954776188279, "grad_norm": 0.39911314845085144, "learning_rate": 9.822961420408334e-06, "loss": 0.5009, "step": 956 }, { "epoch": 0.5299492385786801, "grad_norm": 0.3946044445037842, "learning_rate": 9.822110242579194e-06, "loss": 0.4906, "step": 957 }, { "epoch": 0.5305029995385325, "grad_norm": 0.41038382053375244, "learning_rate": 9.821257060536279e-06, "loss": 0.524, "step": 958 }, { "epoch": 0.5310567604983849, "grad_norm": 0.40173232555389404, "learning_rate": 9.820401874634198e-06, "loss": 0.4983, "step": 959 }, { "epoch": 0.5316105214582372, "grad_norm": 0.3873099088668823, "learning_rate": 9.819544685228392e-06, "loss": 0.4856, "step": 960 }, { "epoch": 0.5321642824180896, "grad_norm": 0.43359097838401794, "learning_rate": 9.818685492675135e-06, "loss": 0.4859, "step": 961 }, { "epoch": 0.5327180433779418, "grad_norm": 0.4455109238624573, "learning_rate": 9.817824297331533e-06, "loss": 0.4705, "step": 962 }, { "epoch": 0.5332718043377942, "grad_norm": 0.45274171233177185, "learning_rate": 9.816961099555524e-06, "loss": 0.4544, "step": 963 }, { "epoch": 0.5338255652976465, "grad_norm": 0.40063250064849854, "learning_rate": 9.816095899705878e-06, "loss": 0.4706, "step": 964 }, { "epoch": 0.5343793262574988, "grad_norm": 0.4969136118888855, "learning_rate": 9.815228698142201e-06, "loss": 0.4716, "step": 965 }, { "epoch": 0.5349330872173512, "grad_norm": 0.435459166765213, "learning_rate": 9.814359495224926e-06, "loss": 0.4631, "step": 966 }, { "epoch": 0.5354868481772035, "grad_norm": 0.5502042174339294, "learning_rate": 9.81348829131532e-06, "loss": 0.5114, "step": 967 }, { "epoch": 0.5360406091370559, "grad_norm": 0.4521084725856781, "learning_rate": 9.81261508677548e-06, "loss": 0.507, "step": 968 }, { "epoch": 0.5365943700969081, "grad_norm": 0.41840437054634094, "learning_rate": 9.811739881968338e-06, "loss": 0.4832, "step": 969 }, { "epoch": 0.5371481310567605, "grad_norm": 0.39289429783821106, "learning_rate": 9.810862677257657e-06, "loss": 0.4859, "step": 970 }, { "epoch": 0.5377018920166128, "grad_norm": 0.4194715917110443, "learning_rate": 9.809983473008027e-06, "loss": 0.4945, "step": 971 }, { "epoch": 0.5382556529764652, "grad_norm": 0.4247782826423645, "learning_rate": 9.80910226958487e-06, "loss": 0.5016, "step": 972 }, { "epoch": 0.5388094139363175, "grad_norm": 0.4690609872341156, "learning_rate": 9.808219067354448e-06, "loss": 0.4868, "step": 973 }, { "epoch": 0.5393631748961698, "grad_norm": 0.4510354697704315, "learning_rate": 9.807333866683838e-06, "loss": 0.4688, "step": 974 }, { "epoch": 0.5399169358560222, "grad_norm": 0.3943024277687073, "learning_rate": 9.806446667940959e-06, "loss": 0.5041, "step": 975 }, { "epoch": 0.5404706968158745, "grad_norm": 0.48757925629615784, "learning_rate": 9.805557471494558e-06, "loss": 0.4567, "step": 976 }, { "epoch": 0.5410244577757268, "grad_norm": 0.4259018301963806, "learning_rate": 9.804666277714212e-06, "loss": 0.4836, "step": 977 }, { "epoch": 0.5415782187355791, "grad_norm": 0.3833393156528473, "learning_rate": 9.803773086970328e-06, "loss": 0.5003, "step": 978 }, { "epoch": 0.5421319796954315, "grad_norm": 0.41831091046333313, "learning_rate": 9.80287789963414e-06, "loss": 0.4623, "step": 979 }, { "epoch": 0.5426857406552839, "grad_norm": 0.38282907009124756, "learning_rate": 9.801980716077717e-06, "loss": 0.4452, "step": 980 }, { "epoch": 0.5432395016151361, "grad_norm": 0.4633399248123169, "learning_rate": 9.801081536673958e-06, "loss": 0.5274, "step": 981 }, { "epoch": 0.5437932625749885, "grad_norm": 0.4494279623031616, "learning_rate": 9.800180361796585e-06, "loss": 0.4878, "step": 982 }, { "epoch": 0.5443470235348408, "grad_norm": 0.36994776129722595, "learning_rate": 9.799277191820154e-06, "loss": 0.4754, "step": 983 }, { "epoch": 0.5449007844946931, "grad_norm": 0.4315449595451355, "learning_rate": 9.798372027120051e-06, "loss": 0.4911, "step": 984 }, { "epoch": 0.5454545454545454, "grad_norm": 0.43971362709999084, "learning_rate": 9.797464868072489e-06, "loss": 0.4688, "step": 985 }, { "epoch": 0.5460083064143978, "grad_norm": 0.37869274616241455, "learning_rate": 9.796555715054508e-06, "loss": 0.4587, "step": 986 }, { "epoch": 0.5465620673742501, "grad_norm": 0.4102703630924225, "learning_rate": 9.795644568443984e-06, "loss": 0.4618, "step": 987 }, { "epoch": 0.5471158283341024, "grad_norm": 0.43598347902297974, "learning_rate": 9.794731428619614e-06, "loss": 0.5067, "step": 988 }, { "epoch": 0.5476695892939548, "grad_norm": 0.42545703053474426, "learning_rate": 9.793816295960928e-06, "loss": 0.5206, "step": 989 }, { "epoch": 0.5482233502538071, "grad_norm": 0.5297684669494629, "learning_rate": 9.79289917084828e-06, "loss": 0.4643, "step": 990 }, { "epoch": 0.5487771112136595, "grad_norm": 0.47686538100242615, "learning_rate": 9.791980053662855e-06, "loss": 0.4761, "step": 991 }, { "epoch": 0.5493308721735117, "grad_norm": 0.42472562193870544, "learning_rate": 9.791058944786669e-06, "loss": 0.4916, "step": 992 }, { "epoch": 0.5498846331333641, "grad_norm": 0.5175829529762268, "learning_rate": 9.79013584460256e-06, "loss": 0.4689, "step": 993 }, { "epoch": 0.5504383940932164, "grad_norm": 0.4339579939842224, "learning_rate": 9.789210753494196e-06, "loss": 0.4771, "step": 994 }, { "epoch": 0.5509921550530688, "grad_norm": 0.43897461891174316, "learning_rate": 9.788283671846072e-06, "loss": 0.4776, "step": 995 }, { "epoch": 0.5515459160129211, "grad_norm": 0.44765639305114746, "learning_rate": 9.787354600043513e-06, "loss": 0.4711, "step": 996 }, { "epoch": 0.5520996769727734, "grad_norm": 0.5037674903869629, "learning_rate": 9.78642353847267e-06, "loss": 0.4582, "step": 997 }, { "epoch": 0.5526534379326258, "grad_norm": 0.436972439289093, "learning_rate": 9.785490487520517e-06, "loss": 0.4882, "step": 998 }, { "epoch": 0.553207198892478, "grad_norm": 0.46437013149261475, "learning_rate": 9.78455544757486e-06, "loss": 0.4691, "step": 999 }, { "epoch": 0.5537609598523304, "grad_norm": 0.3785422742366791, "learning_rate": 9.783618419024327e-06, "loss": 0.48, "step": 1000 }, { "epoch": 0.5543147208121827, "grad_norm": 0.41479936242103577, "learning_rate": 9.782679402258379e-06, "loss": 0.502, "step": 1001 }, { "epoch": 0.5548684817720351, "grad_norm": 0.41505759954452515, "learning_rate": 9.781738397667297e-06, "loss": 0.4751, "step": 1002 }, { "epoch": 0.5554222427318874, "grad_norm": 0.39144381880760193, "learning_rate": 9.780795405642192e-06, "loss": 0.4853, "step": 1003 }, { "epoch": 0.5559760036917397, "grad_norm": 0.454562783241272, "learning_rate": 9.779850426574999e-06, "loss": 0.4943, "step": 1004 }, { "epoch": 0.5565297646515921, "grad_norm": 0.38999703526496887, "learning_rate": 9.778903460858482e-06, "loss": 0.4584, "step": 1005 }, { "epoch": 0.5570835256114444, "grad_norm": 0.4089610278606415, "learning_rate": 9.777954508886225e-06, "loss": 0.4824, "step": 1006 }, { "epoch": 0.5576372865712967, "grad_norm": 0.4263215959072113, "learning_rate": 9.777003571052641e-06, "loss": 0.4703, "step": 1007 }, { "epoch": 0.558191047531149, "grad_norm": 0.3900358974933624, "learning_rate": 9.77605064775297e-06, "loss": 0.4723, "step": 1008 }, { "epoch": 0.5587448084910014, "grad_norm": 0.4521275460720062, "learning_rate": 9.775095739383276e-06, "loss": 0.4812, "step": 1009 }, { "epoch": 0.5592985694508538, "grad_norm": 0.37081149220466614, "learning_rate": 9.774138846340448e-06, "loss": 0.4768, "step": 1010 }, { "epoch": 0.559852330410706, "grad_norm": 0.39347171783447266, "learning_rate": 9.773179969022196e-06, "loss": 0.4895, "step": 1011 }, { "epoch": 0.5604060913705584, "grad_norm": 0.386059045791626, "learning_rate": 9.772219107827062e-06, "loss": 0.4771, "step": 1012 }, { "epoch": 0.5609598523304107, "grad_norm": 0.37710851430892944, "learning_rate": 9.771256263154406e-06, "loss": 0.4669, "step": 1013 }, { "epoch": 0.5615136132902631, "grad_norm": 0.3881578743457794, "learning_rate": 9.770291435404416e-06, "loss": 0.4752, "step": 1014 }, { "epoch": 0.5620673742501153, "grad_norm": 0.3915596008300781, "learning_rate": 9.769324624978106e-06, "loss": 0.4568, "step": 1015 }, { "epoch": 0.5626211352099677, "grad_norm": 0.45749250054359436, "learning_rate": 9.768355832277306e-06, "loss": 0.4837, "step": 1016 }, { "epoch": 0.5631748961698201, "grad_norm": 0.3609451353549957, "learning_rate": 9.76738505770468e-06, "loss": 0.4644, "step": 1017 }, { "epoch": 0.5637286571296724, "grad_norm": 0.42881205677986145, "learning_rate": 9.76641230166371e-06, "loss": 0.4984, "step": 1018 }, { "epoch": 0.5642824180895247, "grad_norm": 0.473361998796463, "learning_rate": 9.765437564558702e-06, "loss": 0.469, "step": 1019 }, { "epoch": 0.564836179049377, "grad_norm": 0.39516180753707886, "learning_rate": 9.764460846794785e-06, "loss": 0.4972, "step": 1020 }, { "epoch": 0.5653899400092294, "grad_norm": 0.4871281683444977, "learning_rate": 9.763482148777915e-06, "loss": 0.4541, "step": 1021 }, { "epoch": 0.5659437009690816, "grad_norm": 0.4321620464324951, "learning_rate": 9.762501470914865e-06, "loss": 0.5237, "step": 1022 }, { "epoch": 0.566497461928934, "grad_norm": 0.5116628408432007, "learning_rate": 9.761518813613236e-06, "loss": 0.4662, "step": 1023 }, { "epoch": 0.5670512228887863, "grad_norm": 0.44205552339553833, "learning_rate": 9.760534177281452e-06, "loss": 0.4673, "step": 1024 }, { "epoch": 0.5676049838486387, "grad_norm": 0.43510058522224426, "learning_rate": 9.759547562328752e-06, "loss": 0.4698, "step": 1025 }, { "epoch": 0.568158744808491, "grad_norm": 0.4751429855823517, "learning_rate": 9.758558969165207e-06, "loss": 0.482, "step": 1026 }, { "epoch": 0.5687125057683433, "grad_norm": 0.3840591311454773, "learning_rate": 9.757568398201705e-06, "loss": 0.4365, "step": 1027 }, { "epoch": 0.5692662667281957, "grad_norm": 0.41593286395072937, "learning_rate": 9.756575849849958e-06, "loss": 0.4822, "step": 1028 }, { "epoch": 0.569820027688048, "grad_norm": 0.4062572717666626, "learning_rate": 9.755581324522496e-06, "loss": 0.4819, "step": 1029 }, { "epoch": 0.5703737886479003, "grad_norm": 0.4050317704677582, "learning_rate": 9.754584822632675e-06, "loss": 0.4543, "step": 1030 }, { "epoch": 0.5709275496077526, "grad_norm": 0.43368610739707947, "learning_rate": 9.753586344594674e-06, "loss": 0.4627, "step": 1031 }, { "epoch": 0.571481310567605, "grad_norm": 0.3640391528606415, "learning_rate": 9.752585890823486e-06, "loss": 0.4713, "step": 1032 }, { "epoch": 0.5720350715274574, "grad_norm": 0.3953351378440857, "learning_rate": 9.751583461734932e-06, "loss": 0.4656, "step": 1033 }, { "epoch": 0.5725888324873096, "grad_norm": 0.3762674033641815, "learning_rate": 9.750579057745652e-06, "loss": 0.4792, "step": 1034 }, { "epoch": 0.573142593447162, "grad_norm": 0.37683016061782837, "learning_rate": 9.749572679273104e-06, "loss": 0.4741, "step": 1035 }, { "epoch": 0.5736963544070143, "grad_norm": 0.41023826599121094, "learning_rate": 9.748564326735572e-06, "loss": 0.4768, "step": 1036 }, { "epoch": 0.5742501153668667, "grad_norm": 0.42402681708335876, "learning_rate": 9.747554000552156e-06, "loss": 0.4843, "step": 1037 }, { "epoch": 0.5748038763267189, "grad_norm": 0.43029171228408813, "learning_rate": 9.746541701142777e-06, "loss": 0.4771, "step": 1038 }, { "epoch": 0.5753576372865713, "grad_norm": 0.43121105432510376, "learning_rate": 9.74552742892818e-06, "loss": 0.4781, "step": 1039 }, { "epoch": 0.5759113982464237, "grad_norm": 0.4319000542163849, "learning_rate": 9.744511184329924e-06, "loss": 0.479, "step": 1040 }, { "epoch": 0.576465159206276, "grad_norm": 0.47726520895957947, "learning_rate": 9.743492967770391e-06, "loss": 0.473, "step": 1041 }, { "epoch": 0.5770189201661283, "grad_norm": 0.4156806170940399, "learning_rate": 9.742472779672786e-06, "loss": 0.4687, "step": 1042 }, { "epoch": 0.5775726811259806, "grad_norm": 0.4566013514995575, "learning_rate": 9.741450620461125e-06, "loss": 0.4708, "step": 1043 }, { "epoch": 0.578126442085833, "grad_norm": 0.39063555002212524, "learning_rate": 9.740426490560251e-06, "loss": 0.466, "step": 1044 }, { "epoch": 0.5786802030456852, "grad_norm": 0.45334768295288086, "learning_rate": 9.739400390395824e-06, "loss": 0.4421, "step": 1045 }, { "epoch": 0.5792339640055376, "grad_norm": 0.39572519063949585, "learning_rate": 9.738372320394319e-06, "loss": 0.4725, "step": 1046 }, { "epoch": 0.57978772496539, "grad_norm": 0.43986865878105164, "learning_rate": 9.737342280983032e-06, "loss": 0.466, "step": 1047 }, { "epoch": 0.5803414859252423, "grad_norm": 0.40859347581863403, "learning_rate": 9.736310272590082e-06, "loss": 0.489, "step": 1048 }, { "epoch": 0.5808952468850946, "grad_norm": 0.40705233812332153, "learning_rate": 9.735276295644401e-06, "loss": 0.4995, "step": 1049 }, { "epoch": 0.5814490078449469, "grad_norm": 0.4699002504348755, "learning_rate": 9.734240350575741e-06, "loss": 0.4768, "step": 1050 }, { "epoch": 0.5820027688047993, "grad_norm": 0.360856294631958, "learning_rate": 9.733202437814673e-06, "loss": 0.451, "step": 1051 }, { "epoch": 0.5825565297646516, "grad_norm": 0.4462083578109741, "learning_rate": 9.73216255779258e-06, "loss": 0.5019, "step": 1052 }, { "epoch": 0.5831102907245039, "grad_norm": 0.42489951848983765, "learning_rate": 9.731120710941675e-06, "loss": 0.4723, "step": 1053 }, { "epoch": 0.5836640516843562, "grad_norm": 0.4075542986392975, "learning_rate": 9.730076897694975e-06, "loss": 0.4607, "step": 1054 }, { "epoch": 0.5842178126442086, "grad_norm": 0.4475225806236267, "learning_rate": 9.72903111848632e-06, "loss": 0.4954, "step": 1055 }, { "epoch": 0.584771573604061, "grad_norm": 0.3822975158691406, "learning_rate": 9.727983373750372e-06, "loss": 0.471, "step": 1056 }, { "epoch": 0.5853253345639132, "grad_norm": 0.36577412486076355, "learning_rate": 9.726933663922602e-06, "loss": 0.4501, "step": 1057 }, { "epoch": 0.5858790955237656, "grad_norm": 0.47545063495635986, "learning_rate": 9.725881989439299e-06, "loss": 0.454, "step": 1058 }, { "epoch": 0.5864328564836179, "grad_norm": 0.37499967217445374, "learning_rate": 9.724828350737574e-06, "loss": 0.4779, "step": 1059 }, { "epoch": 0.5869866174434702, "grad_norm": 0.4036925733089447, "learning_rate": 9.723772748255348e-06, "loss": 0.466, "step": 1060 }, { "epoch": 0.5875403784033225, "grad_norm": 0.37403643131256104, "learning_rate": 9.722715182431363e-06, "loss": 0.4572, "step": 1061 }, { "epoch": 0.5880941393631749, "grad_norm": 0.40160688757896423, "learning_rate": 9.721655653705176e-06, "loss": 0.4825, "step": 1062 }, { "epoch": 0.5886479003230273, "grad_norm": 0.36085695028305054, "learning_rate": 9.720594162517155e-06, "loss": 0.4776, "step": 1063 }, { "epoch": 0.5892016612828795, "grad_norm": 0.41617390513420105, "learning_rate": 9.719530709308493e-06, "loss": 0.4932, "step": 1064 }, { "epoch": 0.5897554222427319, "grad_norm": 0.3862336575984955, "learning_rate": 9.71846529452119e-06, "loss": 0.4661, "step": 1065 }, { "epoch": 0.5903091832025842, "grad_norm": 0.41136181354522705, "learning_rate": 9.717397918598064e-06, "loss": 0.4707, "step": 1066 }, { "epoch": 0.5908629441624366, "grad_norm": 0.41098248958587646, "learning_rate": 9.716328581982749e-06, "loss": 0.4823, "step": 1067 }, { "epoch": 0.5914167051222888, "grad_norm": 0.3866676390171051, "learning_rate": 9.715257285119693e-06, "loss": 0.4689, "step": 1068 }, { "epoch": 0.5919704660821412, "grad_norm": 0.38574522733688354, "learning_rate": 9.714184028454161e-06, "loss": 0.4758, "step": 1069 }, { "epoch": 0.5925242270419936, "grad_norm": 0.40090519189834595, "learning_rate": 9.713108812432228e-06, "loss": 0.474, "step": 1070 }, { "epoch": 0.5930779880018459, "grad_norm": 0.4093726575374603, "learning_rate": 9.712031637500787e-06, "loss": 0.4771, "step": 1071 }, { "epoch": 0.5936317489616982, "grad_norm": 0.3924410045146942, "learning_rate": 9.710952504107546e-06, "loss": 0.4799, "step": 1072 }, { "epoch": 0.5941855099215505, "grad_norm": 0.41381385922431946, "learning_rate": 9.709871412701024e-06, "loss": 0.47, "step": 1073 }, { "epoch": 0.5947392708814029, "grad_norm": 0.39116042852401733, "learning_rate": 9.708788363730555e-06, "loss": 0.4691, "step": 1074 }, { "epoch": 0.5952930318412551, "grad_norm": 0.40025070309638977, "learning_rate": 9.707703357646286e-06, "loss": 0.4338, "step": 1075 }, { "epoch": 0.5958467928011075, "grad_norm": 0.38308414816856384, "learning_rate": 9.706616394899177e-06, "loss": 0.4768, "step": 1076 }, { "epoch": 0.5964005537609599, "grad_norm": 0.38082870841026306, "learning_rate": 9.705527475941006e-06, "loss": 0.4788, "step": 1077 }, { "epoch": 0.5969543147208122, "grad_norm": 0.45329269766807556, "learning_rate": 9.704436601224359e-06, "loss": 0.4884, "step": 1078 }, { "epoch": 0.5975080756806646, "grad_norm": 0.4223858416080475, "learning_rate": 9.703343771202633e-06, "loss": 0.4966, "step": 1079 }, { "epoch": 0.5980618366405168, "grad_norm": 0.46564212441444397, "learning_rate": 9.702248986330046e-06, "loss": 0.486, "step": 1080 }, { "epoch": 0.5986155976003692, "grad_norm": 0.39236828684806824, "learning_rate": 9.70115224706162e-06, "loss": 0.454, "step": 1081 }, { "epoch": 0.5991693585602215, "grad_norm": 0.4203537702560425, "learning_rate": 9.700053553853194e-06, "loss": 0.4699, "step": 1082 }, { "epoch": 0.5997231195200738, "grad_norm": 0.4052623212337494, "learning_rate": 9.69895290716142e-06, "loss": 0.4761, "step": 1083 }, { "epoch": 0.6002768804799261, "grad_norm": 0.4321443736553192, "learning_rate": 9.697850307443755e-06, "loss": 0.5029, "step": 1084 }, { "epoch": 0.6008306414397785, "grad_norm": 0.46118634939193726, "learning_rate": 9.696745755158476e-06, "loss": 0.476, "step": 1085 }, { "epoch": 0.6013844023996309, "grad_norm": 0.4154890179634094, "learning_rate": 9.695639250764667e-06, "loss": 0.4765, "step": 1086 }, { "epoch": 0.6019381633594831, "grad_norm": 0.4137137234210968, "learning_rate": 9.694530794722226e-06, "loss": 0.4886, "step": 1087 }, { "epoch": 0.6024919243193355, "grad_norm": 0.42291855812072754, "learning_rate": 9.693420387491857e-06, "loss": 0.4662, "step": 1088 }, { "epoch": 0.6030456852791878, "grad_norm": 0.4419591724872589, "learning_rate": 9.692308029535082e-06, "loss": 0.4837, "step": 1089 }, { "epoch": 0.6035994462390402, "grad_norm": 0.4633064866065979, "learning_rate": 9.691193721314229e-06, "loss": 0.4671, "step": 1090 }, { "epoch": 0.6041532071988924, "grad_norm": 0.3861783742904663, "learning_rate": 9.690077463292437e-06, "loss": 0.4536, "step": 1091 }, { "epoch": 0.6047069681587448, "grad_norm": 0.41687700152397156, "learning_rate": 9.688959255933659e-06, "loss": 0.4836, "step": 1092 }, { "epoch": 0.6052607291185972, "grad_norm": 0.4089800715446472, "learning_rate": 9.687839099702653e-06, "loss": 0.4662, "step": 1093 }, { "epoch": 0.6058144900784495, "grad_norm": 0.37759214639663696, "learning_rate": 9.686716995064993e-06, "loss": 0.4592, "step": 1094 }, { "epoch": 0.6063682510383018, "grad_norm": 0.3644743859767914, "learning_rate": 9.685592942487056e-06, "loss": 0.473, "step": 1095 }, { "epoch": 0.6069220119981541, "grad_norm": 0.410145103931427, "learning_rate": 9.684466942436032e-06, "loss": 0.4938, "step": 1096 }, { "epoch": 0.6074757729580065, "grad_norm": 0.38548871874809265, "learning_rate": 9.68333899537992e-06, "loss": 0.4699, "step": 1097 }, { "epoch": 0.6080295339178587, "grad_norm": 0.360137015581131, "learning_rate": 9.682209101787532e-06, "loss": 0.4503, "step": 1098 }, { "epoch": 0.6085832948777111, "grad_norm": 0.3717727065086365, "learning_rate": 9.681077262128484e-06, "loss": 0.4629, "step": 1099 }, { "epoch": 0.6091370558375635, "grad_norm": 0.4310454726219177, "learning_rate": 9.679943476873201e-06, "loss": 0.478, "step": 1100 }, { "epoch": 0.6096908167974158, "grad_norm": 0.43745338916778564, "learning_rate": 9.678807746492922e-06, "loss": 0.4861, "step": 1101 }, { "epoch": 0.6102445777572681, "grad_norm": 0.3577141761779785, "learning_rate": 9.677670071459686e-06, "loss": 0.4903, "step": 1102 }, { "epoch": 0.6107983387171204, "grad_norm": 0.4437004625797272, "learning_rate": 9.676530452246347e-06, "loss": 0.4664, "step": 1103 }, { "epoch": 0.6113520996769728, "grad_norm": 0.4109301269054413, "learning_rate": 9.675388889326564e-06, "loss": 0.4816, "step": 1104 }, { "epoch": 0.6119058606368251, "grad_norm": 0.3534083962440491, "learning_rate": 9.674245383174808e-06, "loss": 0.4554, "step": 1105 }, { "epoch": 0.6124596215966774, "grad_norm": 0.4022260010242462, "learning_rate": 9.673099934266351e-06, "loss": 0.5164, "step": 1106 }, { "epoch": 0.6130133825565298, "grad_norm": 0.3903641700744629, "learning_rate": 9.671952543077278e-06, "loss": 0.4486, "step": 1107 }, { "epoch": 0.6135671435163821, "grad_norm": 0.4408094584941864, "learning_rate": 9.670803210084478e-06, "loss": 0.4684, "step": 1108 }, { "epoch": 0.6141209044762345, "grad_norm": 0.372468501329422, "learning_rate": 9.669651935765648e-06, "loss": 0.4636, "step": 1109 }, { "epoch": 0.6146746654360867, "grad_norm": 0.41390275955200195, "learning_rate": 9.668498720599294e-06, "loss": 0.4633, "step": 1110 }, { "epoch": 0.6152284263959391, "grad_norm": 0.37634769082069397, "learning_rate": 9.667343565064724e-06, "loss": 0.4867, "step": 1111 }, { "epoch": 0.6157821873557914, "grad_norm": 0.4454021155834198, "learning_rate": 9.66618646964206e-06, "loss": 0.4864, "step": 1112 }, { "epoch": 0.6163359483156438, "grad_norm": 0.34683549404144287, "learning_rate": 9.665027434812219e-06, "loss": 0.4752, "step": 1113 }, { "epoch": 0.616889709275496, "grad_norm": 0.3828405737876892, "learning_rate": 9.663866461056936e-06, "loss": 0.4723, "step": 1114 }, { "epoch": 0.6174434702353484, "grad_norm": 0.40305960178375244, "learning_rate": 9.662703548858747e-06, "loss": 0.4817, "step": 1115 }, { "epoch": 0.6179972311952008, "grad_norm": 0.3931417465209961, "learning_rate": 9.66153869870099e-06, "loss": 0.5015, "step": 1116 }, { "epoch": 0.618550992155053, "grad_norm": 0.35548749566078186, "learning_rate": 9.66037191106781e-06, "loss": 0.4468, "step": 1117 }, { "epoch": 0.6191047531149054, "grad_norm": 0.42970770597457886, "learning_rate": 9.659203186444163e-06, "loss": 0.503, "step": 1118 }, { "epoch": 0.6196585140747577, "grad_norm": 0.38652828335762024, "learning_rate": 9.658032525315808e-06, "loss": 0.4839, "step": 1119 }, { "epoch": 0.6202122750346101, "grad_norm": 0.40463656187057495, "learning_rate": 9.6568599281693e-06, "loss": 0.4882, "step": 1120 }, { "epoch": 0.6207660359944623, "grad_norm": 0.3732741177082062, "learning_rate": 9.655685395492011e-06, "loss": 0.4761, "step": 1121 }, { "epoch": 0.6213197969543147, "grad_norm": 0.3856641948223114, "learning_rate": 9.65450892777211e-06, "loss": 0.5027, "step": 1122 }, { "epoch": 0.6218735579141671, "grad_norm": 0.4222745895385742, "learning_rate": 9.653330525498571e-06, "loss": 0.4797, "step": 1123 }, { "epoch": 0.6224273188740194, "grad_norm": 0.3937954604625702, "learning_rate": 9.652150189161177e-06, "loss": 0.497, "step": 1124 }, { "epoch": 0.6229810798338717, "grad_norm": 0.3400292694568634, "learning_rate": 9.65096791925051e-06, "loss": 0.4418, "step": 1125 }, { "epoch": 0.623534840793724, "grad_norm": 0.4560067057609558, "learning_rate": 9.649783716257952e-06, "loss": 0.453, "step": 1126 }, { "epoch": 0.6240886017535764, "grad_norm": 0.3494074046611786, "learning_rate": 9.6485975806757e-06, "loss": 0.4601, "step": 1127 }, { "epoch": 0.6246423627134287, "grad_norm": 0.42945748567581177, "learning_rate": 9.647409512996746e-06, "loss": 0.4519, "step": 1128 }, { "epoch": 0.625196123673281, "grad_norm": 0.4227854609489441, "learning_rate": 9.646219513714884e-06, "loss": 0.5046, "step": 1129 }, { "epoch": 0.6257498846331334, "grad_norm": 0.4223552644252777, "learning_rate": 9.645027583324715e-06, "loss": 0.481, "step": 1130 }, { "epoch": 0.6263036455929857, "grad_norm": 0.42086929082870483, "learning_rate": 9.643833722321641e-06, "loss": 0.4893, "step": 1131 }, { "epoch": 0.6268574065528381, "grad_norm": 0.4123913049697876, "learning_rate": 9.642637931201867e-06, "loss": 0.4996, "step": 1132 }, { "epoch": 0.6274111675126903, "grad_norm": 0.39919522404670715, "learning_rate": 9.641440210462398e-06, "loss": 0.4526, "step": 1133 }, { "epoch": 0.6279649284725427, "grad_norm": 0.36298850178718567, "learning_rate": 9.640240560601045e-06, "loss": 0.4605, "step": 1134 }, { "epoch": 0.628518689432395, "grad_norm": 0.3541809022426605, "learning_rate": 9.639038982116418e-06, "loss": 0.4889, "step": 1135 }, { "epoch": 0.6290724503922474, "grad_norm": 0.38646525144577026, "learning_rate": 9.637835475507927e-06, "loss": 0.4811, "step": 1136 }, { "epoch": 0.6296262113520997, "grad_norm": 0.35858938097953796, "learning_rate": 9.636630041275787e-06, "loss": 0.4816, "step": 1137 }, { "epoch": 0.630179972311952, "grad_norm": 0.3921908140182495, "learning_rate": 9.635422679921012e-06, "loss": 0.4706, "step": 1138 }, { "epoch": 0.6307337332718044, "grad_norm": 0.4250425398349762, "learning_rate": 9.63421339194542e-06, "loss": 0.4984, "step": 1139 }, { "epoch": 0.6312874942316566, "grad_norm": 0.3526803255081177, "learning_rate": 9.633002177851624e-06, "loss": 0.4549, "step": 1140 }, { "epoch": 0.631841255191509, "grad_norm": 0.3527928292751312, "learning_rate": 9.631789038143042e-06, "loss": 0.4688, "step": 1141 }, { "epoch": 0.6323950161513613, "grad_norm": 0.379881888628006, "learning_rate": 9.630573973323893e-06, "loss": 0.4679, "step": 1142 }, { "epoch": 0.6329487771112137, "grad_norm": 0.35339245200157166, "learning_rate": 9.629356983899193e-06, "loss": 0.461, "step": 1143 }, { "epoch": 0.6335025380710659, "grad_norm": 0.3596797585487366, "learning_rate": 9.62813807037476e-06, "loss": 0.4827, "step": 1144 }, { "epoch": 0.6340562990309183, "grad_norm": 0.36624962091445923, "learning_rate": 9.626917233257211e-06, "loss": 0.45, "step": 1145 }, { "epoch": 0.6346100599907707, "grad_norm": 0.40163204073905945, "learning_rate": 9.62569447305396e-06, "loss": 0.4545, "step": 1146 }, { "epoch": 0.635163820950623, "grad_norm": 0.39504092931747437, "learning_rate": 9.624469790273226e-06, "loss": 0.5029, "step": 1147 }, { "epoch": 0.6357175819104753, "grad_norm": 0.4099467992782593, "learning_rate": 9.623243185424024e-06, "loss": 0.4576, "step": 1148 }, { "epoch": 0.6362713428703276, "grad_norm": 0.3917321264743805, "learning_rate": 9.622014659016165e-06, "loss": 0.5047, "step": 1149 }, { "epoch": 0.63682510383018, "grad_norm": 0.3726976215839386, "learning_rate": 9.620784211560264e-06, "loss": 0.4846, "step": 1150 }, { "epoch": 0.6373788647900323, "grad_norm": 0.40307995676994324, "learning_rate": 9.619551843567732e-06, "loss": 0.4592, "step": 1151 }, { "epoch": 0.6379326257498846, "grad_norm": 0.33782291412353516, "learning_rate": 9.618317555550776e-06, "loss": 0.475, "step": 1152 }, { "epoch": 0.638486386709737, "grad_norm": 0.46960699558258057, "learning_rate": 9.617081348022403e-06, "loss": 0.4784, "step": 1153 }, { "epoch": 0.6390401476695893, "grad_norm": 0.3820076584815979, "learning_rate": 9.615843221496422e-06, "loss": 0.4676, "step": 1154 }, { "epoch": 0.6395939086294417, "grad_norm": 0.3540625274181366, "learning_rate": 9.614603176487432e-06, "loss": 0.4788, "step": 1155 }, { "epoch": 0.6401476695892939, "grad_norm": 0.36396685242652893, "learning_rate": 9.613361213510833e-06, "loss": 0.4537, "step": 1156 }, { "epoch": 0.6407014305491463, "grad_norm": 0.40896907448768616, "learning_rate": 9.612117333082824e-06, "loss": 0.4925, "step": 1157 }, { "epoch": 0.6412551915089986, "grad_norm": 0.3908737599849701, "learning_rate": 9.610871535720396e-06, "loss": 0.4627, "step": 1158 }, { "epoch": 0.641808952468851, "grad_norm": 0.3939005136489868, "learning_rate": 9.609623821941343e-06, "loss": 0.4595, "step": 1159 }, { "epoch": 0.6423627134287033, "grad_norm": 0.36619076132774353, "learning_rate": 9.608374192264251e-06, "loss": 0.4717, "step": 1160 }, { "epoch": 0.6429164743885556, "grad_norm": 0.3893590569496155, "learning_rate": 9.607122647208505e-06, "loss": 0.4527, "step": 1161 }, { "epoch": 0.643470235348408, "grad_norm": 0.40434759855270386, "learning_rate": 9.605869187294281e-06, "loss": 0.4866, "step": 1162 }, { "epoch": 0.6440239963082602, "grad_norm": 0.4136258363723755, "learning_rate": 9.604613813042559e-06, "loss": 0.4645, "step": 1163 }, { "epoch": 0.6445777572681126, "grad_norm": 0.3619273900985718, "learning_rate": 9.603356524975107e-06, "loss": 0.4851, "step": 1164 }, { "epoch": 0.6451315182279649, "grad_norm": 0.38092610239982605, "learning_rate": 9.602097323614492e-06, "loss": 0.4734, "step": 1165 }, { "epoch": 0.6456852791878173, "grad_norm": 0.3718932271003723, "learning_rate": 9.600836209484077e-06, "loss": 0.4638, "step": 1166 }, { "epoch": 0.6462390401476696, "grad_norm": 0.3373836278915405, "learning_rate": 9.59957318310802e-06, "loss": 0.4719, "step": 1167 }, { "epoch": 0.6467928011075219, "grad_norm": 0.35758963227272034, "learning_rate": 9.59830824501127e-06, "loss": 0.4553, "step": 1168 }, { "epoch": 0.6473465620673743, "grad_norm": 0.4219372570514679, "learning_rate": 9.597041395719573e-06, "loss": 0.5079, "step": 1169 }, { "epoch": 0.6479003230272266, "grad_norm": 0.37445470690727234, "learning_rate": 9.595772635759473e-06, "loss": 0.4976, "step": 1170 }, { "epoch": 0.6484540839870789, "grad_norm": 0.4012693166732788, "learning_rate": 9.594501965658303e-06, "loss": 0.4893, "step": 1171 }, { "epoch": 0.6490078449469312, "grad_norm": 0.3536968231201172, "learning_rate": 9.593229385944192e-06, "loss": 0.4595, "step": 1172 }, { "epoch": 0.6495616059067836, "grad_norm": 0.39249539375305176, "learning_rate": 9.591954897146059e-06, "loss": 0.4768, "step": 1173 }, { "epoch": 0.650115366866636, "grad_norm": 0.38255855441093445, "learning_rate": 9.590678499793625e-06, "loss": 0.4527, "step": 1174 }, { "epoch": 0.6506691278264882, "grad_norm": 0.3499041795730591, "learning_rate": 9.589400194417395e-06, "loss": 0.4534, "step": 1175 }, { "epoch": 0.6512228887863406, "grad_norm": 0.4241704046726227, "learning_rate": 9.588119981548673e-06, "loss": 0.4781, "step": 1176 }, { "epoch": 0.6517766497461929, "grad_norm": 0.3731544613838196, "learning_rate": 9.586837861719556e-06, "loss": 0.4772, "step": 1177 }, { "epoch": 0.6523304107060452, "grad_norm": 0.3863711357116699, "learning_rate": 9.585553835462926e-06, "loss": 0.4726, "step": 1178 }, { "epoch": 0.6528841716658975, "grad_norm": 0.38817161321640015, "learning_rate": 9.584267903312468e-06, "loss": 0.4733, "step": 1179 }, { "epoch": 0.6534379326257499, "grad_norm": 0.35609275102615356, "learning_rate": 9.582980065802652e-06, "loss": 0.4764, "step": 1180 }, { "epoch": 0.6539916935856022, "grad_norm": 0.36368364095687866, "learning_rate": 9.58169032346874e-06, "loss": 0.4841, "step": 1181 }, { "epoch": 0.6545454545454545, "grad_norm": 0.3602033853530884, "learning_rate": 9.580398676846791e-06, "loss": 0.4806, "step": 1182 }, { "epoch": 0.6550992155053069, "grad_norm": 0.3985549211502075, "learning_rate": 9.57910512647365e-06, "loss": 0.4829, "step": 1183 }, { "epoch": 0.6556529764651592, "grad_norm": 0.42108821868896484, "learning_rate": 9.577809672886956e-06, "loss": 0.4744, "step": 1184 }, { "epoch": 0.6562067374250116, "grad_norm": 0.41022899746894836, "learning_rate": 9.576512316625139e-06, "loss": 0.4546, "step": 1185 }, { "epoch": 0.6567604983848638, "grad_norm": 0.35979998111724854, "learning_rate": 9.575213058227418e-06, "loss": 0.4661, "step": 1186 }, { "epoch": 0.6573142593447162, "grad_norm": 0.3554682433605194, "learning_rate": 9.573911898233805e-06, "loss": 0.4688, "step": 1187 }, { "epoch": 0.6578680203045685, "grad_norm": 0.4270106256008148, "learning_rate": 9.5726088371851e-06, "loss": 0.4807, "step": 1188 }, { "epoch": 0.6584217812644209, "grad_norm": 0.3596557378768921, "learning_rate": 9.571303875622895e-06, "loss": 0.4814, "step": 1189 }, { "epoch": 0.6589755422242732, "grad_norm": 0.3649778366088867, "learning_rate": 9.569997014089571e-06, "loss": 0.5072, "step": 1190 }, { "epoch": 0.6595293031841255, "grad_norm": 0.39609605073928833, "learning_rate": 9.5686882531283e-06, "loss": 0.4815, "step": 1191 }, { "epoch": 0.6600830641439779, "grad_norm": 0.3422112762928009, "learning_rate": 9.56737759328304e-06, "loss": 0.4591, "step": 1192 }, { "epoch": 0.6606368251038301, "grad_norm": 0.36193662881851196, "learning_rate": 9.566065035098542e-06, "loss": 0.4522, "step": 1193 }, { "epoch": 0.6611905860636825, "grad_norm": 0.3465469777584076, "learning_rate": 9.564750579120345e-06, "loss": 0.4885, "step": 1194 }, { "epoch": 0.6617443470235348, "grad_norm": 0.3898077607154846, "learning_rate": 9.563434225894777e-06, "loss": 0.4472, "step": 1195 }, { "epoch": 0.6622981079833872, "grad_norm": 0.42955687642097473, "learning_rate": 9.56211597596895e-06, "loss": 0.4791, "step": 1196 }, { "epoch": 0.6628518689432396, "grad_norm": 0.39834290742874146, "learning_rate": 9.560795829890772e-06, "loss": 0.48, "step": 1197 }, { "epoch": 0.6634056299030918, "grad_norm": 0.41766035556793213, "learning_rate": 9.559473788208935e-06, "loss": 0.4807, "step": 1198 }, { "epoch": 0.6639593908629442, "grad_norm": 0.3868374824523926, "learning_rate": 9.558149851472918e-06, "loss": 0.4686, "step": 1199 }, { "epoch": 0.6645131518227965, "grad_norm": 0.37969040870666504, "learning_rate": 9.556824020232988e-06, "loss": 0.4962, "step": 1200 }, { "epoch": 0.6650669127826488, "grad_norm": 0.4154088497161865, "learning_rate": 9.555496295040203e-06, "loss": 0.4547, "step": 1201 }, { "epoch": 0.6656206737425011, "grad_norm": 0.39192089438438416, "learning_rate": 9.554166676446405e-06, "loss": 0.4924, "step": 1202 }, { "epoch": 0.6661744347023535, "grad_norm": 0.44946104288101196, "learning_rate": 9.552835165004222e-06, "loss": 0.476, "step": 1203 }, { "epoch": 0.6667281956622059, "grad_norm": 0.3420163094997406, "learning_rate": 9.55150176126707e-06, "loss": 0.4566, "step": 1204 }, { "epoch": 0.6672819566220581, "grad_norm": 0.4199497401714325, "learning_rate": 9.550166465789153e-06, "loss": 0.4568, "step": 1205 }, { "epoch": 0.6678357175819105, "grad_norm": 0.4616442024707794, "learning_rate": 9.548829279125457e-06, "loss": 0.4669, "step": 1206 }, { "epoch": 0.6683894785417628, "grad_norm": 0.3496432602405548, "learning_rate": 9.54749020183176e-06, "loss": 0.4582, "step": 1207 }, { "epoch": 0.6689432395016152, "grad_norm": 0.46286725997924805, "learning_rate": 9.546149234464623e-06, "loss": 0.4828, "step": 1208 }, { "epoch": 0.6694970004614674, "grad_norm": 0.382727712392807, "learning_rate": 9.544806377581391e-06, "loss": 0.482, "step": 1209 }, { "epoch": 0.6700507614213198, "grad_norm": 0.38821330666542053, "learning_rate": 9.543461631740194e-06, "loss": 0.4547, "step": 1210 }, { "epoch": 0.6706045223811721, "grad_norm": 0.3799228072166443, "learning_rate": 9.54211499749995e-06, "loss": 0.4421, "step": 1211 }, { "epoch": 0.6711582833410245, "grad_norm": 0.3687892258167267, "learning_rate": 9.540766475420364e-06, "loss": 0.4946, "step": 1212 }, { "epoch": 0.6717120443008768, "grad_norm": 0.38328394293785095, "learning_rate": 9.539416066061917e-06, "loss": 0.4644, "step": 1213 }, { "epoch": 0.6722658052607291, "grad_norm": 0.35493841767311096, "learning_rate": 9.538063769985883e-06, "loss": 0.4564, "step": 1214 }, { "epoch": 0.6728195662205815, "grad_norm": 0.3737426698207855, "learning_rate": 9.536709587754317e-06, "loss": 0.4812, "step": 1215 }, { "epoch": 0.6733733271804337, "grad_norm": 0.3578367829322815, "learning_rate": 9.535353519930055e-06, "loss": 0.4692, "step": 1216 }, { "epoch": 0.6739270881402861, "grad_norm": 0.36108577251434326, "learning_rate": 9.533995567076719e-06, "loss": 0.4959, "step": 1217 }, { "epoch": 0.6744808491001384, "grad_norm": 0.420228511095047, "learning_rate": 9.53263572975872e-06, "loss": 0.474, "step": 1218 }, { "epoch": 0.6750346100599908, "grad_norm": 0.34940990805625916, "learning_rate": 9.531274008541243e-06, "loss": 0.4581, "step": 1219 }, { "epoch": 0.6755883710198431, "grad_norm": 0.3541697859764099, "learning_rate": 9.529910403990263e-06, "loss": 0.4628, "step": 1220 }, { "epoch": 0.6761421319796954, "grad_norm": 0.37076637148857117, "learning_rate": 9.528544916672533e-06, "loss": 0.4567, "step": 1221 }, { "epoch": 0.6766958929395478, "grad_norm": 0.3961624503135681, "learning_rate": 9.527177547155589e-06, "loss": 0.4665, "step": 1222 }, { "epoch": 0.6772496538994001, "grad_norm": 0.36980774998664856, "learning_rate": 9.525808296007755e-06, "loss": 0.4237, "step": 1223 }, { "epoch": 0.6778034148592524, "grad_norm": 0.3981638252735138, "learning_rate": 9.52443716379813e-06, "loss": 0.4827, "step": 1224 }, { "epoch": 0.6783571758191047, "grad_norm": 0.34644296765327454, "learning_rate": 9.523064151096597e-06, "loss": 0.453, "step": 1225 }, { "epoch": 0.6789109367789571, "grad_norm": 0.38678064942359924, "learning_rate": 9.521689258473826e-06, "loss": 0.4622, "step": 1226 }, { "epoch": 0.6794646977388095, "grad_norm": 0.3824797570705414, "learning_rate": 9.520312486501258e-06, "loss": 0.4754, "step": 1227 }, { "epoch": 0.6800184586986617, "grad_norm": 0.40293338894844055, "learning_rate": 9.518933835751122e-06, "loss": 0.4779, "step": 1228 }, { "epoch": 0.6805722196585141, "grad_norm": 0.3647953271865845, "learning_rate": 9.51755330679643e-06, "loss": 0.4892, "step": 1229 }, { "epoch": 0.6811259806183664, "grad_norm": 0.3927794396877289, "learning_rate": 9.516170900210968e-06, "loss": 0.4781, "step": 1230 }, { "epoch": 0.6816797415782188, "grad_norm": 0.3762713372707367, "learning_rate": 9.514786616569307e-06, "loss": 0.436, "step": 1231 }, { "epoch": 0.682233502538071, "grad_norm": 0.37481844425201416, "learning_rate": 9.513400456446795e-06, "loss": 0.4632, "step": 1232 }, { "epoch": 0.6827872634979234, "grad_norm": 0.3783378601074219, "learning_rate": 9.512012420419565e-06, "loss": 0.4484, "step": 1233 }, { "epoch": 0.6833410244577758, "grad_norm": 0.45682409405708313, "learning_rate": 9.510622509064525e-06, "loss": 0.4557, "step": 1234 }, { "epoch": 0.683894785417628, "grad_norm": 0.32191213965415955, "learning_rate": 9.509230722959364e-06, "loss": 0.4755, "step": 1235 }, { "epoch": 0.6844485463774804, "grad_norm": 0.4605029821395874, "learning_rate": 9.507837062682547e-06, "loss": 0.4941, "step": 1236 }, { "epoch": 0.6850023073373327, "grad_norm": 0.3845261037349701, "learning_rate": 9.506441528813326e-06, "loss": 0.4583, "step": 1237 }, { "epoch": 0.6855560682971851, "grad_norm": 0.37329262495040894, "learning_rate": 9.505044121931724e-06, "loss": 0.4757, "step": 1238 }, { "epoch": 0.6861098292570373, "grad_norm": 0.4102815091609955, "learning_rate": 9.503644842618547e-06, "loss": 0.4832, "step": 1239 }, { "epoch": 0.6866635902168897, "grad_norm": 0.4646155834197998, "learning_rate": 9.502243691455375e-06, "loss": 0.4701, "step": 1240 }, { "epoch": 0.687217351176742, "grad_norm": 0.4044606685638428, "learning_rate": 9.500840669024569e-06, "loss": 0.438, "step": 1241 }, { "epoch": 0.6877711121365944, "grad_norm": 0.5184035301208496, "learning_rate": 9.49943577590927e-06, "loss": 0.4573, "step": 1242 }, { "epoch": 0.6883248730964467, "grad_norm": 0.4301811754703522, "learning_rate": 9.498029012693389e-06, "loss": 0.4857, "step": 1243 }, { "epoch": 0.688878634056299, "grad_norm": 0.3979569673538208, "learning_rate": 9.496620379961622e-06, "loss": 0.456, "step": 1244 }, { "epoch": 0.6894323950161514, "grad_norm": 0.511087954044342, "learning_rate": 9.495209878299442e-06, "loss": 0.4552, "step": 1245 }, { "epoch": 0.6899861559760037, "grad_norm": 0.45219606161117554, "learning_rate": 9.49379750829309e-06, "loss": 0.5036, "step": 1246 }, { "epoch": 0.690539916935856, "grad_norm": 0.41253384947776794, "learning_rate": 9.492383270529593e-06, "loss": 0.456, "step": 1247 }, { "epoch": 0.6910936778957083, "grad_norm": 0.6051667928695679, "learning_rate": 9.49096716559675e-06, "loss": 0.4802, "step": 1248 }, { "epoch": 0.6916474388555607, "grad_norm": 0.42395636439323425, "learning_rate": 9.489549194083134e-06, "loss": 0.4765, "step": 1249 }, { "epoch": 0.6922011998154131, "grad_norm": 0.44442951679229736, "learning_rate": 9.488129356578102e-06, "loss": 0.4819, "step": 1250 }, { "epoch": 0.6927549607752653, "grad_norm": 0.49269047379493713, "learning_rate": 9.486707653671776e-06, "loss": 0.484, "step": 1251 }, { "epoch": 0.6933087217351177, "grad_norm": 0.3919488787651062, "learning_rate": 9.485284085955062e-06, "loss": 0.4956, "step": 1252 }, { "epoch": 0.69386248269497, "grad_norm": 0.4203889071941376, "learning_rate": 9.483858654019635e-06, "loss": 0.4893, "step": 1253 }, { "epoch": 0.6944162436548224, "grad_norm": 0.422599196434021, "learning_rate": 9.482431358457949e-06, "loss": 0.4811, "step": 1254 }, { "epoch": 0.6949700046146746, "grad_norm": 0.3625422418117523, "learning_rate": 9.481002199863228e-06, "loss": 0.4719, "step": 1255 }, { "epoch": 0.695523765574527, "grad_norm": 0.3744456171989441, "learning_rate": 9.479571178829476e-06, "loss": 0.4613, "step": 1256 }, { "epoch": 0.6960775265343794, "grad_norm": 0.3935151994228363, "learning_rate": 9.47813829595147e-06, "loss": 0.4523, "step": 1257 }, { "epoch": 0.6966312874942316, "grad_norm": 0.40260425209999084, "learning_rate": 9.476703551824755e-06, "loss": 0.4856, "step": 1258 }, { "epoch": 0.697185048454084, "grad_norm": 0.38715681433677673, "learning_rate": 9.475266947045655e-06, "loss": 0.4618, "step": 1259 }, { "epoch": 0.6977388094139363, "grad_norm": 0.37717095017433167, "learning_rate": 9.473828482211267e-06, "loss": 0.4668, "step": 1260 }, { "epoch": 0.6982925703737887, "grad_norm": 0.3315001428127289, "learning_rate": 9.47238815791946e-06, "loss": 0.4752, "step": 1261 }, { "epoch": 0.6988463313336409, "grad_norm": 0.37179625034332275, "learning_rate": 9.470945974768874e-06, "loss": 0.4655, "step": 1262 }, { "epoch": 0.6994000922934933, "grad_norm": 0.3791336417198181, "learning_rate": 9.469501933358928e-06, "loss": 0.4865, "step": 1263 }, { "epoch": 0.6999538532533457, "grad_norm": 0.3316580653190613, "learning_rate": 9.468056034289806e-06, "loss": 0.4848, "step": 1264 }, { "epoch": 0.700507614213198, "grad_norm": 0.40124401450157166, "learning_rate": 9.466608278162466e-06, "loss": 0.4925, "step": 1265 }, { "epoch": 0.7010613751730503, "grad_norm": 0.3902309536933899, "learning_rate": 9.46515866557864e-06, "loss": 0.4928, "step": 1266 }, { "epoch": 0.7016151361329026, "grad_norm": 0.3450373709201813, "learning_rate": 9.463707197140833e-06, "loss": 0.4738, "step": 1267 }, { "epoch": 0.702168897092755, "grad_norm": 0.41157403588294983, "learning_rate": 9.462253873452314e-06, "loss": 0.4859, "step": 1268 }, { "epoch": 0.7027226580526073, "grad_norm": 0.3505227267742157, "learning_rate": 9.460798695117131e-06, "loss": 0.4714, "step": 1269 }, { "epoch": 0.7032764190124596, "grad_norm": 0.37305912375450134, "learning_rate": 9.459341662740101e-06, "loss": 0.5064, "step": 1270 }, { "epoch": 0.7038301799723119, "grad_norm": 0.36001211404800415, "learning_rate": 9.457882776926807e-06, "loss": 0.4554, "step": 1271 }, { "epoch": 0.7043839409321643, "grad_norm": 0.32804083824157715, "learning_rate": 9.456422038283609e-06, "loss": 0.4586, "step": 1272 }, { "epoch": 0.7049377018920167, "grad_norm": 0.42367056012153625, "learning_rate": 9.45495944741763e-06, "loss": 0.4516, "step": 1273 }, { "epoch": 0.7054914628518689, "grad_norm": 0.41297489404678345, "learning_rate": 9.453495004936772e-06, "loss": 0.4771, "step": 1274 }, { "epoch": 0.7060452238117213, "grad_norm": 0.4082895815372467, "learning_rate": 9.452028711449697e-06, "loss": 0.4825, "step": 1275 }, { "epoch": 0.7065989847715736, "grad_norm": 0.37352949380874634, "learning_rate": 9.450560567565842e-06, "loss": 0.4726, "step": 1276 }, { "epoch": 0.707152745731426, "grad_norm": 0.4887038767337799, "learning_rate": 9.449090573895412e-06, "loss": 0.5019, "step": 1277 }, { "epoch": 0.7077065066912782, "grad_norm": 0.36055970191955566, "learning_rate": 9.44761873104938e-06, "loss": 0.4501, "step": 1278 }, { "epoch": 0.7082602676511306, "grad_norm": 0.44408202171325684, "learning_rate": 9.446145039639486e-06, "loss": 0.4645, "step": 1279 }, { "epoch": 0.708814028610983, "grad_norm": 0.4063699543476105, "learning_rate": 9.444669500278245e-06, "loss": 0.466, "step": 1280 }, { "epoch": 0.7093677895708352, "grad_norm": 0.42303571105003357, "learning_rate": 9.443192113578932e-06, "loss": 0.4924, "step": 1281 }, { "epoch": 0.7099215505306876, "grad_norm": 0.4620376527309418, "learning_rate": 9.441712880155594e-06, "loss": 0.4991, "step": 1282 }, { "epoch": 0.7104753114905399, "grad_norm": 0.40394553542137146, "learning_rate": 9.440231800623046e-06, "loss": 0.4744, "step": 1283 }, { "epoch": 0.7110290724503923, "grad_norm": 0.3658445179462433, "learning_rate": 9.438748875596866e-06, "loss": 0.4725, "step": 1284 }, { "epoch": 0.7115828334102445, "grad_norm": 0.38392001390457153, "learning_rate": 9.437264105693405e-06, "loss": 0.478, "step": 1285 }, { "epoch": 0.7121365943700969, "grad_norm": 0.39073216915130615, "learning_rate": 9.435777491529778e-06, "loss": 0.4814, "step": 1286 }, { "epoch": 0.7126903553299493, "grad_norm": 0.40565580129623413, "learning_rate": 9.434289033723865e-06, "loss": 0.4791, "step": 1287 }, { "epoch": 0.7132441162898016, "grad_norm": 0.4138217270374298, "learning_rate": 9.432798732894314e-06, "loss": 0.4767, "step": 1288 }, { "epoch": 0.7137978772496539, "grad_norm": 0.3574051260948181, "learning_rate": 9.431306589660543e-06, "loss": 0.4339, "step": 1289 }, { "epoch": 0.7143516382095062, "grad_norm": 0.38163650035858154, "learning_rate": 9.429812604642725e-06, "loss": 0.4661, "step": 1290 }, { "epoch": 0.7149053991693586, "grad_norm": 0.3905923366546631, "learning_rate": 9.428316778461806e-06, "loss": 0.467, "step": 1291 }, { "epoch": 0.7154591601292108, "grad_norm": 0.43945547938346863, "learning_rate": 9.4268191117395e-06, "loss": 0.467, "step": 1292 }, { "epoch": 0.7160129210890632, "grad_norm": 0.3613353967666626, "learning_rate": 9.42531960509828e-06, "loss": 0.4566, "step": 1293 }, { "epoch": 0.7165666820489156, "grad_norm": 0.32940205931663513, "learning_rate": 9.423818259161386e-06, "loss": 0.4556, "step": 1294 }, { "epoch": 0.7171204430087679, "grad_norm": 0.38750410079956055, "learning_rate": 9.422315074552823e-06, "loss": 0.4617, "step": 1295 }, { "epoch": 0.7176742039686203, "grad_norm": 0.34196677803993225, "learning_rate": 9.42081005189736e-06, "loss": 0.4476, "step": 1296 }, { "epoch": 0.7182279649284725, "grad_norm": 0.4086407423019409, "learning_rate": 9.419303191820528e-06, "loss": 0.4794, "step": 1297 }, { "epoch": 0.7187817258883249, "grad_norm": 0.35501039028167725, "learning_rate": 9.417794494948624e-06, "loss": 0.4624, "step": 1298 }, { "epoch": 0.7193354868481772, "grad_norm": 0.35777488350868225, "learning_rate": 9.41628396190871e-06, "loss": 0.4583, "step": 1299 }, { "epoch": 0.7198892478080295, "grad_norm": 0.3394460380077362, "learning_rate": 9.414771593328605e-06, "loss": 0.4664, "step": 1300 }, { "epoch": 0.7204430087678818, "grad_norm": 0.3766457736492157, "learning_rate": 9.413257389836896e-06, "loss": 0.4698, "step": 1301 }, { "epoch": 0.7209967697277342, "grad_norm": 0.31790390610694885, "learning_rate": 9.411741352062933e-06, "loss": 0.4535, "step": 1302 }, { "epoch": 0.7215505306875866, "grad_norm": 0.4119001030921936, "learning_rate": 9.410223480636825e-06, "loss": 0.4503, "step": 1303 }, { "epoch": 0.7221042916474388, "grad_norm": 0.3885115385055542, "learning_rate": 9.408703776189448e-06, "loss": 0.4744, "step": 1304 }, { "epoch": 0.7226580526072912, "grad_norm": 0.3605935275554657, "learning_rate": 9.407182239352432e-06, "loss": 0.4576, "step": 1305 }, { "epoch": 0.7232118135671435, "grad_norm": 0.36650732159614563, "learning_rate": 9.405658870758176e-06, "loss": 0.4346, "step": 1306 }, { "epoch": 0.7237655745269959, "grad_norm": 0.40742823481559753, "learning_rate": 9.404133671039839e-06, "loss": 0.4716, "step": 1307 }, { "epoch": 0.7243193354868481, "grad_norm": 0.34349143505096436, "learning_rate": 9.402606640831338e-06, "loss": 0.4592, "step": 1308 }, { "epoch": 0.7248730964467005, "grad_norm": 0.4713986814022064, "learning_rate": 9.401077780767353e-06, "loss": 0.4411, "step": 1309 }, { "epoch": 0.7254268574065529, "grad_norm": 0.37818944454193115, "learning_rate": 9.399547091483325e-06, "loss": 0.4774, "step": 1310 }, { "epoch": 0.7259806183664052, "grad_norm": 0.416020005941391, "learning_rate": 9.398014573615452e-06, "loss": 0.463, "step": 1311 }, { "epoch": 0.7265343793262575, "grad_norm": 0.43411391973495483, "learning_rate": 9.396480227800698e-06, "loss": 0.4834, "step": 1312 }, { "epoch": 0.7270881402861098, "grad_norm": 0.3461960554122925, "learning_rate": 9.394944054676779e-06, "loss": 0.4511, "step": 1313 }, { "epoch": 0.7276419012459622, "grad_norm": 0.39043861627578735, "learning_rate": 9.393406054882177e-06, "loss": 0.4404, "step": 1314 }, { "epoch": 0.7281956622058144, "grad_norm": 0.3983977138996124, "learning_rate": 9.391866229056133e-06, "loss": 0.484, "step": 1315 }, { "epoch": 0.7287494231656668, "grad_norm": 0.39407283067703247, "learning_rate": 9.39032457783864e-06, "loss": 0.4887, "step": 1316 }, { "epoch": 0.7293031841255192, "grad_norm": 0.38397401571273804, "learning_rate": 9.388781101870459e-06, "loss": 0.4872, "step": 1317 }, { "epoch": 0.7298569450853715, "grad_norm": 0.42111846804618835, "learning_rate": 9.387235801793102e-06, "loss": 0.4605, "step": 1318 }, { "epoch": 0.7304107060452238, "grad_norm": 0.3681069612503052, "learning_rate": 9.385688678248843e-06, "loss": 0.4664, "step": 1319 }, { "epoch": 0.7309644670050761, "grad_norm": 0.45675644278526306, "learning_rate": 9.384139731880714e-06, "loss": 0.4379, "step": 1320 }, { "epoch": 0.7315182279649285, "grad_norm": 0.41950011253356934, "learning_rate": 9.382588963332503e-06, "loss": 0.4544, "step": 1321 }, { "epoch": 0.7320719889247808, "grad_norm": 0.3855050206184387, "learning_rate": 9.381036373248755e-06, "loss": 0.4904, "step": 1322 }, { "epoch": 0.7326257498846331, "grad_norm": 0.4070005416870117, "learning_rate": 9.379481962274775e-06, "loss": 0.4897, "step": 1323 }, { "epoch": 0.7331795108444855, "grad_norm": 0.3815324902534485, "learning_rate": 9.377925731056619e-06, "loss": 0.4562, "step": 1324 }, { "epoch": 0.7337332718043378, "grad_norm": 0.3719714879989624, "learning_rate": 9.376367680241108e-06, "loss": 0.4579, "step": 1325 }, { "epoch": 0.7342870327641902, "grad_norm": 0.3712398111820221, "learning_rate": 9.374807810475813e-06, "loss": 0.4635, "step": 1326 }, { "epoch": 0.7348407937240424, "grad_norm": 0.36849772930145264, "learning_rate": 9.37324612240906e-06, "loss": 0.4599, "step": 1327 }, { "epoch": 0.7353945546838948, "grad_norm": 0.3901553452014923, "learning_rate": 9.371682616689938e-06, "loss": 0.454, "step": 1328 }, { "epoch": 0.7359483156437471, "grad_norm": 0.35794901847839355, "learning_rate": 9.370117293968285e-06, "loss": 0.4556, "step": 1329 }, { "epoch": 0.7365020766035995, "grad_norm": 0.3683810234069824, "learning_rate": 9.368550154894696e-06, "loss": 0.4931, "step": 1330 }, { "epoch": 0.7370558375634517, "grad_norm": 0.3765607178211212, "learning_rate": 9.366981200120519e-06, "loss": 0.4768, "step": 1331 }, { "epoch": 0.7376095985233041, "grad_norm": 0.3836864233016968, "learning_rate": 9.365410430297863e-06, "loss": 0.459, "step": 1332 }, { "epoch": 0.7381633594831565, "grad_norm": 0.3672686517238617, "learning_rate": 9.363837846079584e-06, "loss": 0.4414, "step": 1333 }, { "epoch": 0.7387171204430087, "grad_norm": 0.388498455286026, "learning_rate": 9.362263448119295e-06, "loss": 0.4669, "step": 1334 }, { "epoch": 0.7392708814028611, "grad_norm": 0.40160247683525085, "learning_rate": 9.360687237071364e-06, "loss": 0.4698, "step": 1335 }, { "epoch": 0.7398246423627134, "grad_norm": 0.3964986503124237, "learning_rate": 9.359109213590914e-06, "loss": 0.4812, "step": 1336 }, { "epoch": 0.7403784033225658, "grad_norm": 0.4164401888847351, "learning_rate": 9.357529378333814e-06, "loss": 0.4699, "step": 1337 }, { "epoch": 0.740932164282418, "grad_norm": 0.3487233519554138, "learning_rate": 9.355947731956694e-06, "loss": 0.4537, "step": 1338 }, { "epoch": 0.7414859252422704, "grad_norm": 0.5043642520904541, "learning_rate": 9.354364275116934e-06, "loss": 0.4937, "step": 1339 }, { "epoch": 0.7420396862021228, "grad_norm": 0.415173202753067, "learning_rate": 9.352779008472666e-06, "loss": 0.4502, "step": 1340 }, { "epoch": 0.7425934471619751, "grad_norm": 0.3562435209751129, "learning_rate": 9.351191932682772e-06, "loss": 0.4717, "step": 1341 }, { "epoch": 0.7431472081218274, "grad_norm": 0.4226479232311249, "learning_rate": 9.34960304840689e-06, "loss": 0.4681, "step": 1342 }, { "epoch": 0.7437009690816797, "grad_norm": 0.39772430062294006, "learning_rate": 9.348012356305411e-06, "loss": 0.486, "step": 1343 }, { "epoch": 0.7442547300415321, "grad_norm": 0.38570713996887207, "learning_rate": 9.346419857039469e-06, "loss": 0.493, "step": 1344 }, { "epoch": 0.7448084910013844, "grad_norm": 0.37852638959884644, "learning_rate": 9.344825551270957e-06, "loss": 0.4514, "step": 1345 }, { "epoch": 0.7453622519612367, "grad_norm": 0.39035695791244507, "learning_rate": 9.343229439662517e-06, "loss": 0.4679, "step": 1346 }, { "epoch": 0.7459160129210891, "grad_norm": 0.3665717542171478, "learning_rate": 9.341631522877542e-06, "loss": 0.4781, "step": 1347 }, { "epoch": 0.7464697738809414, "grad_norm": 0.37694108486175537, "learning_rate": 9.340031801580172e-06, "loss": 0.4712, "step": 1348 }, { "epoch": 0.7470235348407938, "grad_norm": 0.4102180004119873, "learning_rate": 9.338430276435298e-06, "loss": 0.4895, "step": 1349 }, { "epoch": 0.747577295800646, "grad_norm": 0.3439154624938965, "learning_rate": 9.336826948108563e-06, "loss": 0.4524, "step": 1350 }, { "epoch": 0.7481310567604984, "grad_norm": 0.3756393790245056, "learning_rate": 9.335221817266362e-06, "loss": 0.4471, "step": 1351 }, { "epoch": 0.7486848177203507, "grad_norm": 0.3907032907009125, "learning_rate": 9.333614884575831e-06, "loss": 0.4759, "step": 1352 }, { "epoch": 0.749238578680203, "grad_norm": 0.3799581825733185, "learning_rate": 9.33200615070486e-06, "loss": 0.4887, "step": 1353 }, { "epoch": 0.7497923396400554, "grad_norm": 0.3501971662044525, "learning_rate": 9.33039561632209e-06, "loss": 0.479, "step": 1354 }, { "epoch": 0.7503461005999077, "grad_norm": 0.44094935059547424, "learning_rate": 9.328783282096904e-06, "loss": 0.4726, "step": 1355 }, { "epoch": 0.7508998615597601, "grad_norm": 0.36814337968826294, "learning_rate": 9.32716914869944e-06, "loss": 0.4774, "step": 1356 }, { "epoch": 0.7514536225196123, "grad_norm": 0.3536168038845062, "learning_rate": 9.325553216800577e-06, "loss": 0.4794, "step": 1357 }, { "epoch": 0.7520073834794647, "grad_norm": 0.44729071855545044, "learning_rate": 9.323935487071947e-06, "loss": 0.4786, "step": 1358 }, { "epoch": 0.752561144439317, "grad_norm": 0.36258819699287415, "learning_rate": 9.322315960185925e-06, "loss": 0.4464, "step": 1359 }, { "epoch": 0.7531149053991694, "grad_norm": 0.4067261815071106, "learning_rate": 9.320694636815637e-06, "loss": 0.4795, "step": 1360 }, { "epoch": 0.7536686663590217, "grad_norm": 0.3946942090988159, "learning_rate": 9.319071517634952e-06, "loss": 0.4868, "step": 1361 }, { "epoch": 0.754222427318874, "grad_norm": 0.35645586252212524, "learning_rate": 9.31744660331849e-06, "loss": 0.476, "step": 1362 }, { "epoch": 0.7547761882787264, "grad_norm": 0.36006900668144226, "learning_rate": 9.315819894541609e-06, "loss": 0.4558, "step": 1363 }, { "epoch": 0.7553299492385787, "grad_norm": 0.4297291934490204, "learning_rate": 9.314191391980422e-06, "loss": 0.4581, "step": 1364 }, { "epoch": 0.755883710198431, "grad_norm": 0.3190176784992218, "learning_rate": 9.312561096311784e-06, "loss": 0.4507, "step": 1365 }, { "epoch": 0.7564374711582833, "grad_norm": 0.40553539991378784, "learning_rate": 9.310929008213292e-06, "loss": 0.4786, "step": 1366 }, { "epoch": 0.7569912321181357, "grad_norm": 0.38612914085388184, "learning_rate": 9.309295128363293e-06, "loss": 0.4523, "step": 1367 }, { "epoch": 0.757544993077988, "grad_norm": 0.3536340892314911, "learning_rate": 9.307659457440877e-06, "loss": 0.4763, "step": 1368 }, { "epoch": 0.7580987540378403, "grad_norm": 0.40945568680763245, "learning_rate": 9.306021996125875e-06, "loss": 0.4635, "step": 1369 }, { "epoch": 0.7586525149976927, "grad_norm": 0.351439505815506, "learning_rate": 9.304382745098867e-06, "loss": 0.4764, "step": 1370 }, { "epoch": 0.759206275957545, "grad_norm": 0.37082284688949585, "learning_rate": 9.302741705041176e-06, "loss": 0.4725, "step": 1371 }, { "epoch": 0.7597600369173974, "grad_norm": 0.3798982501029968, "learning_rate": 9.301098876634866e-06, "loss": 0.461, "step": 1372 }, { "epoch": 0.7603137978772496, "grad_norm": 0.33189356327056885, "learning_rate": 9.299454260562744e-06, "loss": 0.4811, "step": 1373 }, { "epoch": 0.760867558837102, "grad_norm": 0.38054898381233215, "learning_rate": 9.297807857508365e-06, "loss": 0.4697, "step": 1374 }, { "epoch": 0.7614213197969543, "grad_norm": 0.33878806233406067, "learning_rate": 9.296159668156023e-06, "loss": 0.4814, "step": 1375 }, { "epoch": 0.7619750807568066, "grad_norm": 0.32288363575935364, "learning_rate": 9.294509693190754e-06, "loss": 0.4822, "step": 1376 }, { "epoch": 0.762528841716659, "grad_norm": 0.3665960729122162, "learning_rate": 9.292857933298338e-06, "loss": 0.448, "step": 1377 }, { "epoch": 0.7630826026765113, "grad_norm": 0.388002872467041, "learning_rate": 9.291204389165294e-06, "loss": 0.4988, "step": 1378 }, { "epoch": 0.7636363636363637, "grad_norm": 0.3770802617073059, "learning_rate": 9.289549061478887e-06, "loss": 0.4739, "step": 1379 }, { "epoch": 0.7641901245962159, "grad_norm": 0.35102975368499756, "learning_rate": 9.28789195092712e-06, "loss": 0.4412, "step": 1380 }, { "epoch": 0.7647438855560683, "grad_norm": 0.36751672625541687, "learning_rate": 9.286233058198739e-06, "loss": 0.4685, "step": 1381 }, { "epoch": 0.7652976465159206, "grad_norm": 0.3512617349624634, "learning_rate": 9.284572383983228e-06, "loss": 0.4508, "step": 1382 }, { "epoch": 0.765851407475773, "grad_norm": 0.3874419927597046, "learning_rate": 9.282909928970812e-06, "loss": 0.4621, "step": 1383 }, { "epoch": 0.7664051684356253, "grad_norm": 0.4017471671104431, "learning_rate": 9.281245693852458e-06, "loss": 0.4749, "step": 1384 }, { "epoch": 0.7669589293954776, "grad_norm": 0.372367799282074, "learning_rate": 9.279579679319874e-06, "loss": 0.4594, "step": 1385 }, { "epoch": 0.76751269035533, "grad_norm": 0.40153011679649353, "learning_rate": 9.277911886065504e-06, "loss": 0.4698, "step": 1386 }, { "epoch": 0.7680664513151823, "grad_norm": 0.37599480152130127, "learning_rate": 9.276242314782535e-06, "loss": 0.4652, "step": 1387 }, { "epoch": 0.7686202122750346, "grad_norm": 0.381710410118103, "learning_rate": 9.27457096616489e-06, "loss": 0.4607, "step": 1388 }, { "epoch": 0.7691739732348869, "grad_norm": 0.3713345527648926, "learning_rate": 9.27289784090723e-06, "loss": 0.4606, "step": 1389 }, { "epoch": 0.7697277341947393, "grad_norm": 0.338177353143692, "learning_rate": 9.271222939704956e-06, "loss": 0.4591, "step": 1390 }, { "epoch": 0.7702814951545917, "grad_norm": 0.3899575173854828, "learning_rate": 9.269546263254207e-06, "loss": 0.4623, "step": 1391 }, { "epoch": 0.7708352561144439, "grad_norm": 0.3557356894016266, "learning_rate": 9.267867812251865e-06, "loss": 0.4695, "step": 1392 }, { "epoch": 0.7713890170742963, "grad_norm": 0.35258162021636963, "learning_rate": 9.266187587395538e-06, "loss": 0.4672, "step": 1393 }, { "epoch": 0.7719427780341486, "grad_norm": 0.3604828417301178, "learning_rate": 9.264505589383583e-06, "loss": 0.4833, "step": 1394 }, { "epoch": 0.772496538994001, "grad_norm": 0.36754661798477173, "learning_rate": 9.262821818915085e-06, "loss": 0.462, "step": 1395 }, { "epoch": 0.7730502999538532, "grad_norm": 0.3843841850757599, "learning_rate": 9.261136276689872e-06, "loss": 0.4508, "step": 1396 }, { "epoch": 0.7736040609137056, "grad_norm": 0.34207993745803833, "learning_rate": 9.259448963408504e-06, "loss": 0.4747, "step": 1397 }, { "epoch": 0.7741578218735579, "grad_norm": 0.41438835859298706, "learning_rate": 9.257759879772283e-06, "loss": 0.4858, "step": 1398 }, { "epoch": 0.7747115828334102, "grad_norm": 0.34543612599372864, "learning_rate": 9.256069026483239e-06, "loss": 0.4515, "step": 1399 }, { "epoch": 0.7752653437932626, "grad_norm": 0.37108275294303894, "learning_rate": 9.254376404244141e-06, "loss": 0.4466, "step": 1400 }, { "epoch": 0.7758191047531149, "grad_norm": 0.39394786953926086, "learning_rate": 9.252682013758498e-06, "loss": 0.4665, "step": 1401 }, { "epoch": 0.7763728657129673, "grad_norm": 0.37096449732780457, "learning_rate": 9.250985855730545e-06, "loss": 0.4631, "step": 1402 }, { "epoch": 0.7769266266728195, "grad_norm": 0.4016181230545044, "learning_rate": 9.249287930865257e-06, "loss": 0.464, "step": 1403 }, { "epoch": 0.7774803876326719, "grad_norm": 0.3582565188407898, "learning_rate": 9.247588239868345e-06, "loss": 0.4338, "step": 1404 }, { "epoch": 0.7780341485925242, "grad_norm": 0.34993135929107666, "learning_rate": 9.245886783446249e-06, "loss": 0.4557, "step": 1405 }, { "epoch": 0.7785879095523766, "grad_norm": 0.36565491557121277, "learning_rate": 9.244183562306146e-06, "loss": 0.4676, "step": 1406 }, { "epoch": 0.7791416705122289, "grad_norm": 0.3740096390247345, "learning_rate": 9.24247857715595e-06, "loss": 0.473, "step": 1407 }, { "epoch": 0.7796954314720812, "grad_norm": 0.3995714783668518, "learning_rate": 9.240771828704296e-06, "loss": 0.4591, "step": 1408 }, { "epoch": 0.7802491924319336, "grad_norm": 0.3650170564651489, "learning_rate": 9.239063317660565e-06, "loss": 0.4481, "step": 1409 }, { "epoch": 0.7808029533917858, "grad_norm": 0.3830241858959198, "learning_rate": 9.237353044734867e-06, "loss": 0.4373, "step": 1410 }, { "epoch": 0.7813567143516382, "grad_norm": 0.35495471954345703, "learning_rate": 9.235641010638036e-06, "loss": 0.4805, "step": 1411 }, { "epoch": 0.7819104753114905, "grad_norm": 0.3920479118824005, "learning_rate": 9.233927216081653e-06, "loss": 0.4451, "step": 1412 }, { "epoch": 0.7824642362713429, "grad_norm": 0.3284823000431061, "learning_rate": 9.232211661778019e-06, "loss": 0.4783, "step": 1413 }, { "epoch": 0.7830179972311953, "grad_norm": 0.3510037362575531, "learning_rate": 9.230494348440167e-06, "loss": 0.456, "step": 1414 }, { "epoch": 0.7835717581910475, "grad_norm": 0.3592149317264557, "learning_rate": 9.228775276781867e-06, "loss": 0.468, "step": 1415 }, { "epoch": 0.7841255191508999, "grad_norm": 0.36784833669662476, "learning_rate": 9.227054447517616e-06, "loss": 0.4894, "step": 1416 }, { "epoch": 0.7846792801107522, "grad_norm": 0.3617315888404846, "learning_rate": 9.225331861362643e-06, "loss": 0.4647, "step": 1417 }, { "epoch": 0.7852330410706045, "grad_norm": 0.3508741557598114, "learning_rate": 9.223607519032906e-06, "loss": 0.4918, "step": 1418 }, { "epoch": 0.7857868020304568, "grad_norm": 0.35364845395088196, "learning_rate": 9.221881421245096e-06, "loss": 0.4593, "step": 1419 }, { "epoch": 0.7863405629903092, "grad_norm": 0.34282052516937256, "learning_rate": 9.220153568716625e-06, "loss": 0.476, "step": 1420 }, { "epoch": 0.7868943239501616, "grad_norm": 0.3745896816253662, "learning_rate": 9.218423962165646e-06, "loss": 0.4919, "step": 1421 }, { "epoch": 0.7874480849100138, "grad_norm": 0.34158286452293396, "learning_rate": 9.216692602311033e-06, "loss": 0.4365, "step": 1422 }, { "epoch": 0.7880018458698662, "grad_norm": 0.4137333631515503, "learning_rate": 9.214959489872391e-06, "loss": 0.4526, "step": 1423 }, { "epoch": 0.7885556068297185, "grad_norm": 0.382204532623291, "learning_rate": 9.213224625570055e-06, "loss": 0.4686, "step": 1424 }, { "epoch": 0.7891093677895709, "grad_norm": 0.34799161553382874, "learning_rate": 9.211488010125083e-06, "loss": 0.4775, "step": 1425 }, { "epoch": 0.7896631287494231, "grad_norm": 0.38461318612098694, "learning_rate": 9.209749644259269e-06, "loss": 0.4748, "step": 1426 }, { "epoch": 0.7902168897092755, "grad_norm": 0.3519740104675293, "learning_rate": 9.20800952869513e-06, "loss": 0.4468, "step": 1427 }, { "epoch": 0.7907706506691278, "grad_norm": 0.3849634826183319, "learning_rate": 9.206267664155906e-06, "loss": 0.4807, "step": 1428 }, { "epoch": 0.7913244116289802, "grad_norm": 0.33345848321914673, "learning_rate": 9.204524051365572e-06, "loss": 0.4523, "step": 1429 }, { "epoch": 0.7918781725888325, "grad_norm": 0.3924989104270935, "learning_rate": 9.202778691048826e-06, "loss": 0.4828, "step": 1430 }, { "epoch": 0.7924319335486848, "grad_norm": 0.3989019989967346, "learning_rate": 9.201031583931089e-06, "loss": 0.4732, "step": 1431 }, { "epoch": 0.7929856945085372, "grad_norm": 0.3465370535850525, "learning_rate": 9.199282730738513e-06, "loss": 0.4629, "step": 1432 }, { "epoch": 0.7935394554683894, "grad_norm": 0.3478240370750427, "learning_rate": 9.197532132197972e-06, "loss": 0.4687, "step": 1433 }, { "epoch": 0.7940932164282418, "grad_norm": 0.38705331087112427, "learning_rate": 9.19577978903707e-06, "loss": 0.4554, "step": 1434 }, { "epoch": 0.7946469773880941, "grad_norm": 0.3935673236846924, "learning_rate": 9.194025701984134e-06, "loss": 0.4766, "step": 1435 }, { "epoch": 0.7952007383479465, "grad_norm": 0.39912083745002747, "learning_rate": 9.192269871768214e-06, "loss": 0.4633, "step": 1436 }, { "epoch": 0.7957544993077988, "grad_norm": 0.37831777334213257, "learning_rate": 9.190512299119083e-06, "loss": 0.4718, "step": 1437 }, { "epoch": 0.7963082602676511, "grad_norm": 0.3534630835056305, "learning_rate": 9.188752984767245e-06, "loss": 0.4339, "step": 1438 }, { "epoch": 0.7968620212275035, "grad_norm": 0.3402272164821625, "learning_rate": 9.186991929443922e-06, "loss": 0.4597, "step": 1439 }, { "epoch": 0.7974157821873558, "grad_norm": 0.3749474287033081, "learning_rate": 9.185229133881061e-06, "loss": 0.4504, "step": 1440 }, { "epoch": 0.7979695431472081, "grad_norm": 0.38800784945487976, "learning_rate": 9.183464598811336e-06, "loss": 0.4794, "step": 1441 }, { "epoch": 0.7985233041070604, "grad_norm": 0.4087468087673187, "learning_rate": 9.181698324968136e-06, "loss": 0.453, "step": 1442 }, { "epoch": 0.7990770650669128, "grad_norm": 0.3634721338748932, "learning_rate": 9.17993031308558e-06, "loss": 0.4653, "step": 1443 }, { "epoch": 0.7996308260267652, "grad_norm": 0.37608230113983154, "learning_rate": 9.178160563898505e-06, "loss": 0.4883, "step": 1444 }, { "epoch": 0.8001845869866174, "grad_norm": 0.36811330914497375, "learning_rate": 9.176389078142476e-06, "loss": 0.4932, "step": 1445 }, { "epoch": 0.8007383479464698, "grad_norm": 0.4220361113548279, "learning_rate": 9.174615856553772e-06, "loss": 0.4711, "step": 1446 }, { "epoch": 0.8012921089063221, "grad_norm": 0.3530777096748352, "learning_rate": 9.1728408998694e-06, "loss": 0.4344, "step": 1447 }, { "epoch": 0.8018458698661745, "grad_norm": 0.404594749212265, "learning_rate": 9.17106420882708e-06, "loss": 0.4692, "step": 1448 }, { "epoch": 0.8023996308260267, "grad_norm": 0.38501206040382385, "learning_rate": 9.169285784165263e-06, "loss": 0.5046, "step": 1449 }, { "epoch": 0.8029533917858791, "grad_norm": 0.42966821789741516, "learning_rate": 9.167505626623116e-06, "loss": 0.4622, "step": 1450 }, { "epoch": 0.8035071527457315, "grad_norm": 0.39656370878219604, "learning_rate": 9.165723736940522e-06, "loss": 0.4898, "step": 1451 }, { "epoch": 0.8040609137055837, "grad_norm": 0.41981378197669983, "learning_rate": 9.16394011585809e-06, "loss": 0.4656, "step": 1452 }, { "epoch": 0.8046146746654361, "grad_norm": 0.39834991097450256, "learning_rate": 9.162154764117147e-06, "loss": 0.471, "step": 1453 }, { "epoch": 0.8051684356252884, "grad_norm": 0.3818192481994629, "learning_rate": 9.160367682459739e-06, "loss": 0.4526, "step": 1454 }, { "epoch": 0.8057221965851408, "grad_norm": 0.3913419246673584, "learning_rate": 9.15857887162863e-06, "loss": 0.452, "step": 1455 }, { "epoch": 0.806275957544993, "grad_norm": 0.4396856129169464, "learning_rate": 9.156788332367301e-06, "loss": 0.4906, "step": 1456 }, { "epoch": 0.8068297185048454, "grad_norm": 0.39466971158981323, "learning_rate": 9.154996065419958e-06, "loss": 0.4813, "step": 1457 }, { "epoch": 0.8073834794646977, "grad_norm": 0.3451901972293854, "learning_rate": 9.15320207153152e-06, "loss": 0.4671, "step": 1458 }, { "epoch": 0.8079372404245501, "grad_norm": 0.38070186972618103, "learning_rate": 9.151406351447624e-06, "loss": 0.4646, "step": 1459 }, { "epoch": 0.8084910013844024, "grad_norm": 0.3545899987220764, "learning_rate": 9.149608905914624e-06, "loss": 0.4904, "step": 1460 }, { "epoch": 0.8090447623442547, "grad_norm": 0.3768751323223114, "learning_rate": 9.147809735679594e-06, "loss": 0.4622, "step": 1461 }, { "epoch": 0.8095985233041071, "grad_norm": 0.3862748444080353, "learning_rate": 9.146008841490323e-06, "loss": 0.4686, "step": 1462 }, { "epoch": 0.8101522842639594, "grad_norm": 0.41230452060699463, "learning_rate": 9.144206224095317e-06, "loss": 0.4688, "step": 1463 }, { "epoch": 0.8107060452238117, "grad_norm": 0.38638627529144287, "learning_rate": 9.142401884243796e-06, "loss": 0.4312, "step": 1464 }, { "epoch": 0.811259806183664, "grad_norm": 0.36742228269577026, "learning_rate": 9.1405958226857e-06, "loss": 0.4587, "step": 1465 }, { "epoch": 0.8118135671435164, "grad_norm": 0.4164966642856598, "learning_rate": 9.138788040171681e-06, "loss": 0.4544, "step": 1466 }, { "epoch": 0.8123673281033688, "grad_norm": 0.40297913551330566, "learning_rate": 9.136978537453109e-06, "loss": 0.4899, "step": 1467 }, { "epoch": 0.812921089063221, "grad_norm": 0.44442522525787354, "learning_rate": 9.135167315282065e-06, "loss": 0.476, "step": 1468 }, { "epoch": 0.8134748500230734, "grad_norm": 0.42446616291999817, "learning_rate": 9.133354374411352e-06, "loss": 0.4844, "step": 1469 }, { "epoch": 0.8140286109829257, "grad_norm": 0.430387943983078, "learning_rate": 9.131539715594478e-06, "loss": 0.4546, "step": 1470 }, { "epoch": 0.814582371942778, "grad_norm": 0.33788594603538513, "learning_rate": 9.129723339585671e-06, "loss": 0.4334, "step": 1471 }, { "epoch": 0.8151361329026303, "grad_norm": 0.40645214915275574, "learning_rate": 9.127905247139872e-06, "loss": 0.4693, "step": 1472 }, { "epoch": 0.8156898938624827, "grad_norm": 0.40331265330314636, "learning_rate": 9.126085439012736e-06, "loss": 0.4535, "step": 1473 }, { "epoch": 0.8162436548223351, "grad_norm": 0.33556824922561646, "learning_rate": 9.124263915960627e-06, "loss": 0.4765, "step": 1474 }, { "epoch": 0.8167974157821873, "grad_norm": 0.3537658452987671, "learning_rate": 9.122440678740627e-06, "loss": 0.4821, "step": 1475 }, { "epoch": 0.8173511767420397, "grad_norm": 0.3334074020385742, "learning_rate": 9.120615728110527e-06, "loss": 0.4564, "step": 1476 }, { "epoch": 0.817904937701892, "grad_norm": 0.3692815601825714, "learning_rate": 9.118789064828832e-06, "loss": 0.4547, "step": 1477 }, { "epoch": 0.8184586986617444, "grad_norm": 0.3413941562175751, "learning_rate": 9.116960689654757e-06, "loss": 0.466, "step": 1478 }, { "epoch": 0.8190124596215966, "grad_norm": 0.34186291694641113, "learning_rate": 9.11513060334823e-06, "loss": 0.48, "step": 1479 }, { "epoch": 0.819566220581449, "grad_norm": 0.41164880990982056, "learning_rate": 9.11329880666989e-06, "loss": 0.4771, "step": 1480 }, { "epoch": 0.8201199815413014, "grad_norm": 0.31142398715019226, "learning_rate": 9.111465300381088e-06, "loss": 0.4882, "step": 1481 }, { "epoch": 0.8206737425011537, "grad_norm": 0.4038199186325073, "learning_rate": 9.109630085243883e-06, "loss": 0.472, "step": 1482 }, { "epoch": 0.821227503461006, "grad_norm": 0.3928115963935852, "learning_rate": 9.107793162021045e-06, "loss": 0.4866, "step": 1483 }, { "epoch": 0.8217812644208583, "grad_norm": 0.380998432636261, "learning_rate": 9.105954531476055e-06, "loss": 0.457, "step": 1484 }, { "epoch": 0.8223350253807107, "grad_norm": 0.39220407605171204, "learning_rate": 9.104114194373104e-06, "loss": 0.4549, "step": 1485 }, { "epoch": 0.822888786340563, "grad_norm": 0.3596060872077942, "learning_rate": 9.10227215147709e-06, "loss": 0.4633, "step": 1486 }, { "epoch": 0.8234425473004153, "grad_norm": 0.38867634534835815, "learning_rate": 9.10042840355362e-06, "loss": 0.4477, "step": 1487 }, { "epoch": 0.8239963082602676, "grad_norm": 0.39226698875427246, "learning_rate": 9.098582951369014e-06, "loss": 0.4542, "step": 1488 }, { "epoch": 0.82455006922012, "grad_norm": 0.35249045491218567, "learning_rate": 9.096735795690295e-06, "loss": 0.4798, "step": 1489 }, { "epoch": 0.8251038301799724, "grad_norm": 0.36246833205223083, "learning_rate": 9.094886937285199e-06, "loss": 0.4609, "step": 1490 }, { "epoch": 0.8256575911398246, "grad_norm": 0.3556586503982544, "learning_rate": 9.093036376922165e-06, "loss": 0.4795, "step": 1491 }, { "epoch": 0.826211352099677, "grad_norm": 0.3885868191719055, "learning_rate": 9.091184115370341e-06, "loss": 0.4735, "step": 1492 }, { "epoch": 0.8267651130595293, "grad_norm": 0.3530988097190857, "learning_rate": 9.089330153399584e-06, "loss": 0.466, "step": 1493 }, { "epoch": 0.8273188740193816, "grad_norm": 0.36684784293174744, "learning_rate": 9.087474491780454e-06, "loss": 0.4745, "step": 1494 }, { "epoch": 0.8278726349792339, "grad_norm": 0.33402374386787415, "learning_rate": 9.085617131284225e-06, "loss": 0.4908, "step": 1495 }, { "epoch": 0.8284263959390863, "grad_norm": 0.37127596139907837, "learning_rate": 9.083758072682865e-06, "loss": 0.4755, "step": 1496 }, { "epoch": 0.8289801568989387, "grad_norm": 0.33537980914115906, "learning_rate": 9.081897316749059e-06, "loss": 0.4464, "step": 1497 }, { "epoch": 0.8295339178587909, "grad_norm": 0.35537514090538025, "learning_rate": 9.08003486425619e-06, "loss": 0.4838, "step": 1498 }, { "epoch": 0.8300876788186433, "grad_norm": 0.37226027250289917, "learning_rate": 9.078170715978353e-06, "loss": 0.4635, "step": 1499 }, { "epoch": 0.8306414397784956, "grad_norm": 0.3605194091796875, "learning_rate": 9.07630487269034e-06, "loss": 0.4662, "step": 1500 }, { "epoch": 0.831195200738348, "grad_norm": 0.3520413637161255, "learning_rate": 9.074437335167654e-06, "loss": 0.4734, "step": 1501 }, { "epoch": 0.8317489616982002, "grad_norm": 0.38746941089630127, "learning_rate": 9.072568104186499e-06, "loss": 0.4926, "step": 1502 }, { "epoch": 0.8323027226580526, "grad_norm": 0.3671266734600067, "learning_rate": 9.070697180523785e-06, "loss": 0.472, "step": 1503 }, { "epoch": 0.832856483617905, "grad_norm": 0.3500296473503113, "learning_rate": 9.06882456495712e-06, "loss": 0.4757, "step": 1504 }, { "epoch": 0.8334102445777573, "grad_norm": 0.36263778805732727, "learning_rate": 9.066950258264823e-06, "loss": 0.4766, "step": 1505 }, { "epoch": 0.8339640055376096, "grad_norm": 0.36976009607315063, "learning_rate": 9.06507426122591e-06, "loss": 0.4536, "step": 1506 }, { "epoch": 0.8345177664974619, "grad_norm": 0.3514987528324127, "learning_rate": 9.063196574620103e-06, "loss": 0.45, "step": 1507 }, { "epoch": 0.8350715274573143, "grad_norm": 0.4094353914260864, "learning_rate": 9.061317199227825e-06, "loss": 0.4527, "step": 1508 }, { "epoch": 0.8356252884171665, "grad_norm": 0.38279804587364197, "learning_rate": 9.0594361358302e-06, "loss": 0.456, "step": 1509 }, { "epoch": 0.8361790493770189, "grad_norm": 0.3501136600971222, "learning_rate": 9.057553385209055e-06, "loss": 0.4694, "step": 1510 }, { "epoch": 0.8367328103368713, "grad_norm": 0.3813440203666687, "learning_rate": 9.055668948146917e-06, "loss": 0.461, "step": 1511 }, { "epoch": 0.8372865712967236, "grad_norm": 0.3970472514629364, "learning_rate": 9.053782825427014e-06, "loss": 0.4444, "step": 1512 }, { "epoch": 0.837840332256576, "grad_norm": 0.366157203912735, "learning_rate": 9.051895017833278e-06, "loss": 0.4897, "step": 1513 }, { "epoch": 0.8383940932164282, "grad_norm": 0.41080349683761597, "learning_rate": 9.050005526150336e-06, "loss": 0.4747, "step": 1514 }, { "epoch": 0.8389478541762806, "grad_norm": 0.3930312693119049, "learning_rate": 9.048114351163518e-06, "loss": 0.4572, "step": 1515 }, { "epoch": 0.8395016151361329, "grad_norm": 0.4342571794986725, "learning_rate": 9.046221493658853e-06, "loss": 0.4635, "step": 1516 }, { "epoch": 0.8400553760959852, "grad_norm": 0.38695600628852844, "learning_rate": 9.044326954423071e-06, "loss": 0.4896, "step": 1517 }, { "epoch": 0.8406091370558376, "grad_norm": 0.3505764901638031, "learning_rate": 9.042430734243597e-06, "loss": 0.4637, "step": 1518 }, { "epoch": 0.8411628980156899, "grad_norm": 0.41094887256622314, "learning_rate": 9.040532833908558e-06, "loss": 0.4442, "step": 1519 }, { "epoch": 0.8417166589755423, "grad_norm": 0.39175504446029663, "learning_rate": 9.038633254206778e-06, "loss": 0.4743, "step": 1520 }, { "epoch": 0.8422704199353945, "grad_norm": 0.33409616351127625, "learning_rate": 9.036731995927781e-06, "loss": 0.4607, "step": 1521 }, { "epoch": 0.8428241808952469, "grad_norm": 0.33452293276786804, "learning_rate": 9.034829059861785e-06, "loss": 0.4361, "step": 1522 }, { "epoch": 0.8433779418550992, "grad_norm": 0.4310820698738098, "learning_rate": 9.032924446799709e-06, "loss": 0.4593, "step": 1523 }, { "epoch": 0.8439317028149516, "grad_norm": 0.3394586741924286, "learning_rate": 9.031018157533168e-06, "loss": 0.4337, "step": 1524 }, { "epoch": 0.8444854637748038, "grad_norm": 0.3966839015483856, "learning_rate": 9.02911019285447e-06, "loss": 0.4462, "step": 1525 }, { "epoch": 0.8450392247346562, "grad_norm": 0.41268298029899597, "learning_rate": 9.027200553556621e-06, "loss": 0.4846, "step": 1526 }, { "epoch": 0.8455929856945086, "grad_norm": 0.41113966703414917, "learning_rate": 9.02528924043333e-06, "loss": 0.4594, "step": 1527 }, { "epoch": 0.8461467466543608, "grad_norm": 0.4041735529899597, "learning_rate": 9.023376254278994e-06, "loss": 0.4683, "step": 1528 }, { "epoch": 0.8467005076142132, "grad_norm": 0.36677438020706177, "learning_rate": 9.021461595888708e-06, "loss": 0.4519, "step": 1529 }, { "epoch": 0.8472542685740655, "grad_norm": 0.34333494305610657, "learning_rate": 9.019545266058258e-06, "loss": 0.4692, "step": 1530 }, { "epoch": 0.8478080295339179, "grad_norm": 0.41503170132637024, "learning_rate": 9.017627265584132e-06, "loss": 0.4836, "step": 1531 }, { "epoch": 0.8483617904937701, "grad_norm": 0.4027130901813507, "learning_rate": 9.015707595263508e-06, "loss": 0.4649, "step": 1532 }, { "epoch": 0.8489155514536225, "grad_norm": 0.3800080120563507, "learning_rate": 9.013786255894257e-06, "loss": 0.4527, "step": 1533 }, { "epoch": 0.8494693124134749, "grad_norm": 0.4145483672618866, "learning_rate": 9.011863248274945e-06, "loss": 0.4715, "step": 1534 }, { "epoch": 0.8500230733733272, "grad_norm": 0.39300239086151123, "learning_rate": 9.009938573204835e-06, "loss": 0.462, "step": 1535 }, { "epoch": 0.8505768343331795, "grad_norm": 0.32456183433532715, "learning_rate": 9.008012231483878e-06, "loss": 0.4489, "step": 1536 }, { "epoch": 0.8511305952930318, "grad_norm": 0.36300671100616455, "learning_rate": 9.006084223912717e-06, "loss": 0.4661, "step": 1537 }, { "epoch": 0.8516843562528842, "grad_norm": 0.3559773862361908, "learning_rate": 9.004154551292693e-06, "loss": 0.4726, "step": 1538 }, { "epoch": 0.8522381172127365, "grad_norm": 0.3450942635536194, "learning_rate": 9.002223214425834e-06, "loss": 0.4658, "step": 1539 }, { "epoch": 0.8527918781725888, "grad_norm": 0.34342148900032043, "learning_rate": 9.000290214114862e-06, "loss": 0.461, "step": 1540 }, { "epoch": 0.8533456391324412, "grad_norm": 0.37261006236076355, "learning_rate": 8.998355551163192e-06, "loss": 0.4875, "step": 1541 }, { "epoch": 0.8538994000922935, "grad_norm": 0.3308097720146179, "learning_rate": 8.996419226374926e-06, "loss": 0.4559, "step": 1542 }, { "epoch": 0.8544531610521459, "grad_norm": 0.36439043283462524, "learning_rate": 8.994481240554858e-06, "loss": 0.4884, "step": 1543 }, { "epoch": 0.8550069220119981, "grad_norm": 0.3589314818382263, "learning_rate": 8.992541594508474e-06, "loss": 0.4607, "step": 1544 }, { "epoch": 0.8555606829718505, "grad_norm": 0.36512184143066406, "learning_rate": 8.990600289041951e-06, "loss": 0.4518, "step": 1545 }, { "epoch": 0.8561144439317028, "grad_norm": 0.4093114733695984, "learning_rate": 8.98865732496215e-06, "loss": 0.4676, "step": 1546 }, { "epoch": 0.8566682048915552, "grad_norm": 0.3636806011199951, "learning_rate": 8.98671270307663e-06, "loss": 0.4455, "step": 1547 }, { "epoch": 0.8572219658514075, "grad_norm": 0.36604684591293335, "learning_rate": 8.98476642419363e-06, "loss": 0.4467, "step": 1548 }, { "epoch": 0.8577757268112598, "grad_norm": 0.33249345421791077, "learning_rate": 8.982818489122083e-06, "loss": 0.4272, "step": 1549 }, { "epoch": 0.8583294877711122, "grad_norm": 0.39275580644607544, "learning_rate": 8.98086889867161e-06, "loss": 0.4584, "step": 1550 }, { "epoch": 0.8588832487309644, "grad_norm": 0.3977288603782654, "learning_rate": 8.978917653652518e-06, "loss": 0.4817, "step": 1551 }, { "epoch": 0.8594370096908168, "grad_norm": 0.36270612478256226, "learning_rate": 8.976964754875805e-06, "loss": 0.4617, "step": 1552 }, { "epoch": 0.8599907706506691, "grad_norm": 0.38640034198760986, "learning_rate": 8.975010203153153e-06, "loss": 0.4568, "step": 1553 }, { "epoch": 0.8605445316105215, "grad_norm": 0.3552030622959137, "learning_rate": 8.973053999296934e-06, "loss": 0.4521, "step": 1554 }, { "epoch": 0.8610982925703737, "grad_norm": 0.37227943539619446, "learning_rate": 8.971096144120203e-06, "loss": 0.4551, "step": 1555 }, { "epoch": 0.8616520535302261, "grad_norm": 0.3711647093296051, "learning_rate": 8.969136638436703e-06, "loss": 0.4389, "step": 1556 }, { "epoch": 0.8622058144900785, "grad_norm": 0.36304208636283875, "learning_rate": 8.967175483060867e-06, "loss": 0.4433, "step": 1557 }, { "epoch": 0.8627595754499308, "grad_norm": 0.4009297788143158, "learning_rate": 8.965212678807806e-06, "loss": 0.4817, "step": 1558 }, { "epoch": 0.8633133364097831, "grad_norm": 0.36153316497802734, "learning_rate": 8.963248226493322e-06, "loss": 0.4593, "step": 1559 }, { "epoch": 0.8638670973696354, "grad_norm": 0.3373531997203827, "learning_rate": 8.9612821269339e-06, "loss": 0.4526, "step": 1560 }, { "epoch": 0.8644208583294878, "grad_norm": 0.4063087999820709, "learning_rate": 8.959314380946711e-06, "loss": 0.4686, "step": 1561 }, { "epoch": 0.86497461928934, "grad_norm": 0.29449978470802307, "learning_rate": 8.957344989349609e-06, "loss": 0.4601, "step": 1562 }, { "epoch": 0.8655283802491924, "grad_norm": 0.390842080116272, "learning_rate": 8.955373952961131e-06, "loss": 0.4486, "step": 1563 }, { "epoch": 0.8660821412090448, "grad_norm": 0.35693633556365967, "learning_rate": 8.9534012726005e-06, "loss": 0.4398, "step": 1564 }, { "epoch": 0.8666359021688971, "grad_norm": 0.3625425100326538, "learning_rate": 8.951426949087622e-06, "loss": 0.4824, "step": 1565 }, { "epoch": 0.8671896631287495, "grad_norm": 0.421834260225296, "learning_rate": 8.949450983243083e-06, "loss": 0.4831, "step": 1566 }, { "epoch": 0.8677434240886017, "grad_norm": 0.365532249212265, "learning_rate": 8.947473375888156e-06, "loss": 0.4637, "step": 1567 }, { "epoch": 0.8682971850484541, "grad_norm": 0.3750193417072296, "learning_rate": 8.945494127844791e-06, "loss": 0.4877, "step": 1568 }, { "epoch": 0.8688509460083064, "grad_norm": 0.390395849943161, "learning_rate": 8.943513239935627e-06, "loss": 0.4474, "step": 1569 }, { "epoch": 0.8694047069681587, "grad_norm": 0.3442525565624237, "learning_rate": 8.941530712983977e-06, "loss": 0.4568, "step": 1570 }, { "epoch": 0.8699584679280111, "grad_norm": 0.3506120443344116, "learning_rate": 8.93954654781384e-06, "loss": 0.4741, "step": 1571 }, { "epoch": 0.8705122288878634, "grad_norm": 0.37168318033218384, "learning_rate": 8.937560745249895e-06, "loss": 0.4691, "step": 1572 }, { "epoch": 0.8710659898477158, "grad_norm": 0.351131796836853, "learning_rate": 8.9355733061175e-06, "loss": 0.4687, "step": 1573 }, { "epoch": 0.871619750807568, "grad_norm": 0.3417159616947174, "learning_rate": 8.933584231242697e-06, "loss": 0.4634, "step": 1574 }, { "epoch": 0.8721735117674204, "grad_norm": 0.35240086913108826, "learning_rate": 8.9315935214522e-06, "loss": 0.4618, "step": 1575 }, { "epoch": 0.8727272727272727, "grad_norm": 0.33129164576530457, "learning_rate": 8.929601177573413e-06, "loss": 0.4435, "step": 1576 }, { "epoch": 0.8732810336871251, "grad_norm": 0.3619755804538727, "learning_rate": 8.92760720043441e-06, "loss": 0.4854, "step": 1577 }, { "epoch": 0.8738347946469774, "grad_norm": 0.38372406363487244, "learning_rate": 8.925611590863951e-06, "loss": 0.4693, "step": 1578 }, { "epoch": 0.8743885556068297, "grad_norm": 0.39215630292892456, "learning_rate": 8.923614349691471e-06, "loss": 0.4487, "step": 1579 }, { "epoch": 0.8749423165666821, "grad_norm": 0.3764547109603882, "learning_rate": 8.921615477747081e-06, "loss": 0.4715, "step": 1580 }, { "epoch": 0.8754960775265344, "grad_norm": 0.39083927869796753, "learning_rate": 8.919614975861575e-06, "loss": 0.4816, "step": 1581 }, { "epoch": 0.8760498384863867, "grad_norm": 0.35065168142318726, "learning_rate": 8.917612844866419e-06, "loss": 0.4785, "step": 1582 }, { "epoch": 0.876603599446239, "grad_norm": 0.3438623547554016, "learning_rate": 8.91560908559376e-06, "loss": 0.4713, "step": 1583 }, { "epoch": 0.8771573604060914, "grad_norm": 0.36325982213020325, "learning_rate": 8.91360369887642e-06, "loss": 0.4343, "step": 1584 }, { "epoch": 0.8777111213659436, "grad_norm": 0.35419389605522156, "learning_rate": 8.911596685547898e-06, "loss": 0.48, "step": 1585 }, { "epoch": 0.878264882325796, "grad_norm": 0.34662508964538574, "learning_rate": 8.90958804644237e-06, "loss": 0.468, "step": 1586 }, { "epoch": 0.8788186432856484, "grad_norm": 0.34408241510391235, "learning_rate": 8.907577782394687e-06, "loss": 0.4503, "step": 1587 }, { "epoch": 0.8793724042455007, "grad_norm": 0.35852769017219543, "learning_rate": 8.905565894240373e-06, "loss": 0.4833, "step": 1588 }, { "epoch": 0.879926165205353, "grad_norm": 0.35121363401412964, "learning_rate": 8.90355238281563e-06, "loss": 0.4696, "step": 1589 }, { "epoch": 0.8804799261652053, "grad_norm": 0.3166794180870056, "learning_rate": 8.901537248957334e-06, "loss": 0.4558, "step": 1590 }, { "epoch": 0.8810336871250577, "grad_norm": 0.33067744970321655, "learning_rate": 8.899520493503037e-06, "loss": 0.4659, "step": 1591 }, { "epoch": 0.88158744808491, "grad_norm": 0.35910728573799133, "learning_rate": 8.897502117290959e-06, "loss": 0.4929, "step": 1592 }, { "epoch": 0.8821412090447623, "grad_norm": 0.33726921677589417, "learning_rate": 8.895482121160002e-06, "loss": 0.4742, "step": 1593 }, { "epoch": 0.8826949700046147, "grad_norm": 0.36461976170539856, "learning_rate": 8.893460505949733e-06, "loss": 0.4607, "step": 1594 }, { "epoch": 0.883248730964467, "grad_norm": 0.2989748418331146, "learning_rate": 8.8914372725004e-06, "loss": 0.4457, "step": 1595 }, { "epoch": 0.8838024919243194, "grad_norm": 0.34017232060432434, "learning_rate": 8.889412421652916e-06, "loss": 0.4488, "step": 1596 }, { "epoch": 0.8843562528841716, "grad_norm": 0.39685478806495667, "learning_rate": 8.887385954248871e-06, "loss": 0.4622, "step": 1597 }, { "epoch": 0.884910013844024, "grad_norm": 0.34582528471946716, "learning_rate": 8.885357871130528e-06, "loss": 0.4645, "step": 1598 }, { "epoch": 0.8854637748038763, "grad_norm": 0.39090496301651, "learning_rate": 8.883328173140816e-06, "loss": 0.4826, "step": 1599 }, { "epoch": 0.8860175357637287, "grad_norm": 0.3277999460697174, "learning_rate": 8.88129686112334e-06, "loss": 0.4332, "step": 1600 }, { "epoch": 0.886571296723581, "grad_norm": 0.3896397650241852, "learning_rate": 8.879263935922372e-06, "loss": 0.4734, "step": 1601 }, { "epoch": 0.8871250576834333, "grad_norm": 0.3521168828010559, "learning_rate": 8.877229398382861e-06, "loss": 0.4795, "step": 1602 }, { "epoch": 0.8876788186432857, "grad_norm": 0.39523786306381226, "learning_rate": 8.87519324935042e-06, "loss": 0.4687, "step": 1603 }, { "epoch": 0.888232579603138, "grad_norm": 0.41452541947364807, "learning_rate": 8.873155489671333e-06, "loss": 0.4651, "step": 1604 }, { "epoch": 0.8887863405629903, "grad_norm": 0.36427226662635803, "learning_rate": 8.871116120192553e-06, "loss": 0.485, "step": 1605 }, { "epoch": 0.8893401015228426, "grad_norm": 0.32624030113220215, "learning_rate": 8.869075141761705e-06, "loss": 0.479, "step": 1606 }, { "epoch": 0.889893862482695, "grad_norm": 0.35284295678138733, "learning_rate": 8.867032555227079e-06, "loss": 0.4656, "step": 1607 }, { "epoch": 0.8904476234425474, "grad_norm": 0.3542797565460205, "learning_rate": 8.86498836143764e-06, "loss": 0.4402, "step": 1608 }, { "epoch": 0.8910013844023996, "grad_norm": 0.36863261461257935, "learning_rate": 8.86294256124301e-06, "loss": 0.4299, "step": 1609 }, { "epoch": 0.891555145362252, "grad_norm": 0.36654528975486755, "learning_rate": 8.86089515549349e-06, "loss": 0.4622, "step": 1610 }, { "epoch": 0.8921089063221043, "grad_norm": 0.35045018792152405, "learning_rate": 8.85884614504004e-06, "loss": 0.4554, "step": 1611 }, { "epoch": 0.8926626672819566, "grad_norm": 0.4191456437110901, "learning_rate": 8.856795530734293e-06, "loss": 0.4669, "step": 1612 }, { "epoch": 0.8932164282418089, "grad_norm": 0.43865782022476196, "learning_rate": 8.854743313428543e-06, "loss": 0.471, "step": 1613 }, { "epoch": 0.8937701892016613, "grad_norm": 0.38679763674736023, "learning_rate": 8.852689493975753e-06, "loss": 0.5028, "step": 1614 }, { "epoch": 0.8943239501615136, "grad_norm": 0.39126724004745483, "learning_rate": 8.850634073229555e-06, "loss": 0.4685, "step": 1615 }, { "epoch": 0.8948777111213659, "grad_norm": 0.504376232624054, "learning_rate": 8.848577052044243e-06, "loss": 0.433, "step": 1616 }, { "epoch": 0.8954314720812183, "grad_norm": 0.34994566440582275, "learning_rate": 8.846518431274775e-06, "loss": 0.4553, "step": 1617 }, { "epoch": 0.8959852330410706, "grad_norm": 0.4481828212738037, "learning_rate": 8.844458211776777e-06, "loss": 0.4733, "step": 1618 }, { "epoch": 0.896538994000923, "grad_norm": 0.4027933180332184, "learning_rate": 8.842396394406536e-06, "loss": 0.4432, "step": 1619 }, { "epoch": 0.8970927549607752, "grad_norm": 0.3860743045806885, "learning_rate": 8.840332980021008e-06, "loss": 0.4534, "step": 1620 }, { "epoch": 0.8976465159206276, "grad_norm": 0.4605555534362793, "learning_rate": 8.838267969477809e-06, "loss": 0.4786, "step": 1621 }, { "epoch": 0.8982002768804799, "grad_norm": 0.3668578267097473, "learning_rate": 8.836201363635218e-06, "loss": 0.4657, "step": 1622 }, { "epoch": 0.8987540378403323, "grad_norm": 0.42376434803009033, "learning_rate": 8.834133163352178e-06, "loss": 0.4649, "step": 1623 }, { "epoch": 0.8993077988001846, "grad_norm": 0.36339622735977173, "learning_rate": 8.8320633694883e-06, "loss": 0.4501, "step": 1624 }, { "epoch": 0.8998615597600369, "grad_norm": 0.38097116351127625, "learning_rate": 8.829991982903848e-06, "loss": 0.445, "step": 1625 }, { "epoch": 0.9004153207198893, "grad_norm": 0.3671117424964905, "learning_rate": 8.827919004459751e-06, "loss": 0.4707, "step": 1626 }, { "epoch": 0.9009690816797415, "grad_norm": 0.37180352210998535, "learning_rate": 8.825844435017605e-06, "loss": 0.4463, "step": 1627 }, { "epoch": 0.9015228426395939, "grad_norm": 0.3737637996673584, "learning_rate": 8.823768275439664e-06, "loss": 0.443, "step": 1628 }, { "epoch": 0.9020766035994462, "grad_norm": 0.35167089104652405, "learning_rate": 8.821690526588838e-06, "loss": 0.4754, "step": 1629 }, { "epoch": 0.9026303645592986, "grad_norm": 0.3185480833053589, "learning_rate": 8.819611189328704e-06, "loss": 0.4325, "step": 1630 }, { "epoch": 0.903184125519151, "grad_norm": 0.3695691227912903, "learning_rate": 8.817530264523497e-06, "loss": 0.4393, "step": 1631 }, { "epoch": 0.9037378864790032, "grad_norm": 0.3681502938270569, "learning_rate": 8.815447753038112e-06, "loss": 0.4641, "step": 1632 }, { "epoch": 0.9042916474388556, "grad_norm": 0.35785946249961853, "learning_rate": 8.813363655738103e-06, "loss": 0.4532, "step": 1633 }, { "epoch": 0.9048454083987079, "grad_norm": 0.3440832495689392, "learning_rate": 8.811277973489684e-06, "loss": 0.4562, "step": 1634 }, { "epoch": 0.9053991693585602, "grad_norm": 0.3692430853843689, "learning_rate": 8.809190707159725e-06, "loss": 0.4853, "step": 1635 }, { "epoch": 0.9059529303184125, "grad_norm": 0.38560938835144043, "learning_rate": 8.807101857615758e-06, "loss": 0.4322, "step": 1636 }, { "epoch": 0.9065066912782649, "grad_norm": 0.379032164812088, "learning_rate": 8.805011425725974e-06, "loss": 0.4359, "step": 1637 }, { "epoch": 0.9070604522381173, "grad_norm": 0.3446410596370697, "learning_rate": 8.802919412359215e-06, "loss": 0.4566, "step": 1638 }, { "epoch": 0.9076142131979695, "grad_norm": 0.3590020537376404, "learning_rate": 8.800825818384987e-06, "loss": 0.4623, "step": 1639 }, { "epoch": 0.9081679741578219, "grad_norm": 0.31731587648391724, "learning_rate": 8.79873064467345e-06, "loss": 0.4474, "step": 1640 }, { "epoch": 0.9087217351176742, "grad_norm": 0.41113218665122986, "learning_rate": 8.796633892095422e-06, "loss": 0.4992, "step": 1641 }, { "epoch": 0.9092754960775266, "grad_norm": 0.37111788988113403, "learning_rate": 8.794535561522375e-06, "loss": 0.4631, "step": 1642 }, { "epoch": 0.9098292570373788, "grad_norm": 0.3607940971851349, "learning_rate": 8.792435653826438e-06, "loss": 0.4483, "step": 1643 }, { "epoch": 0.9103830179972312, "grad_norm": 0.37465864419937134, "learning_rate": 8.790334169880397e-06, "loss": 0.4322, "step": 1644 }, { "epoch": 0.9109367789570835, "grad_norm": 0.3804672360420227, "learning_rate": 8.78823111055769e-06, "loss": 0.4502, "step": 1645 }, { "epoch": 0.9114905399169358, "grad_norm": 0.3836743235588074, "learning_rate": 8.786126476732413e-06, "loss": 0.4389, "step": 1646 }, { "epoch": 0.9120443008767882, "grad_norm": 0.42369765043258667, "learning_rate": 8.784020269279315e-06, "loss": 0.4807, "step": 1647 }, { "epoch": 0.9125980618366405, "grad_norm": 0.34027761220932007, "learning_rate": 8.781912489073799e-06, "loss": 0.4718, "step": 1648 }, { "epoch": 0.9131518227964929, "grad_norm": 0.4450474679470062, "learning_rate": 8.77980313699192e-06, "loss": 0.4644, "step": 1649 }, { "epoch": 0.9137055837563451, "grad_norm": 0.3898361325263977, "learning_rate": 8.777692213910392e-06, "loss": 0.4451, "step": 1650 }, { "epoch": 0.9142593447161975, "grad_norm": 0.3675553798675537, "learning_rate": 8.775579720706572e-06, "loss": 0.4715, "step": 1651 }, { "epoch": 0.9148131056760498, "grad_norm": 0.4200625717639923, "learning_rate": 8.773465658258483e-06, "loss": 0.4567, "step": 1652 }, { "epoch": 0.9153668666359022, "grad_norm": 0.4013998508453369, "learning_rate": 8.771350027444786e-06, "loss": 0.4858, "step": 1653 }, { "epoch": 0.9159206275957545, "grad_norm": 0.349529892206192, "learning_rate": 8.769232829144806e-06, "loss": 0.4452, "step": 1654 }, { "epoch": 0.9164743885556068, "grad_norm": 0.3949442207813263, "learning_rate": 8.767114064238509e-06, "loss": 0.4291, "step": 1655 }, { "epoch": 0.9170281495154592, "grad_norm": 0.387249618768692, "learning_rate": 8.764993733606522e-06, "loss": 0.4529, "step": 1656 }, { "epoch": 0.9175819104753115, "grad_norm": 0.3376481831073761, "learning_rate": 8.762871838130114e-06, "loss": 0.4549, "step": 1657 }, { "epoch": 0.9181356714351638, "grad_norm": 0.37048256397247314, "learning_rate": 8.760748378691213e-06, "loss": 0.4509, "step": 1658 }, { "epoch": 0.9186894323950161, "grad_norm": 0.389064222574234, "learning_rate": 8.758623356172388e-06, "loss": 0.4518, "step": 1659 }, { "epoch": 0.9192431933548685, "grad_norm": 0.3659161329269409, "learning_rate": 8.756496771456865e-06, "loss": 0.4442, "step": 1660 }, { "epoch": 0.9197969543147209, "grad_norm": 0.3649126887321472, "learning_rate": 8.754368625428516e-06, "loss": 0.4569, "step": 1661 }, { "epoch": 0.9203507152745731, "grad_norm": 0.36831873655319214, "learning_rate": 8.752238918971862e-06, "loss": 0.4622, "step": 1662 }, { "epoch": 0.9209044762344255, "grad_norm": 0.43260011076927185, "learning_rate": 8.750107652972073e-06, "loss": 0.4497, "step": 1663 }, { "epoch": 0.9214582371942778, "grad_norm": 0.3511306345462799, "learning_rate": 8.747974828314966e-06, "loss": 0.4583, "step": 1664 }, { "epoch": 0.9220119981541302, "grad_norm": 0.3386932909488678, "learning_rate": 8.745840445887008e-06, "loss": 0.4562, "step": 1665 }, { "epoch": 0.9225657591139824, "grad_norm": 0.37471526861190796, "learning_rate": 8.743704506575315e-06, "loss": 0.4795, "step": 1666 }, { "epoch": 0.9231195200738348, "grad_norm": 0.31593385338783264, "learning_rate": 8.741567011267641e-06, "loss": 0.4522, "step": 1667 }, { "epoch": 0.9236732810336872, "grad_norm": 0.3265173137187958, "learning_rate": 8.7394279608524e-06, "loss": 0.4491, "step": 1668 }, { "epoch": 0.9242270419935394, "grad_norm": 0.37188243865966797, "learning_rate": 8.737287356218642e-06, "loss": 0.4409, "step": 1669 }, { "epoch": 0.9247808029533918, "grad_norm": 0.3541339635848999, "learning_rate": 8.73514519825607e-06, "loss": 0.4615, "step": 1670 }, { "epoch": 0.9253345639132441, "grad_norm": 0.3623642325401306, "learning_rate": 8.733001487855021e-06, "loss": 0.4355, "step": 1671 }, { "epoch": 0.9258883248730965, "grad_norm": 0.40988850593566895, "learning_rate": 8.730856225906495e-06, "loss": 0.4779, "step": 1672 }, { "epoch": 0.9264420858329487, "grad_norm": 0.34558460116386414, "learning_rate": 8.728709413302122e-06, "loss": 0.4617, "step": 1673 }, { "epoch": 0.9269958467928011, "grad_norm": 0.373626708984375, "learning_rate": 8.726561050934182e-06, "loss": 0.4544, "step": 1674 }, { "epoch": 0.9275496077526535, "grad_norm": 0.39358246326446533, "learning_rate": 8.724411139695602e-06, "loss": 0.4637, "step": 1675 }, { "epoch": 0.9281033687125058, "grad_norm": 0.3811245858669281, "learning_rate": 8.722259680479945e-06, "loss": 0.4615, "step": 1676 }, { "epoch": 0.9286571296723581, "grad_norm": 0.34624814987182617, "learning_rate": 8.720106674181423e-06, "loss": 0.4359, "step": 1677 }, { "epoch": 0.9292108906322104, "grad_norm": 0.43088698387145996, "learning_rate": 8.717952121694894e-06, "loss": 0.472, "step": 1678 }, { "epoch": 0.9297646515920628, "grad_norm": 0.34153446555137634, "learning_rate": 8.715796023915853e-06, "loss": 0.4574, "step": 1679 }, { "epoch": 0.930318412551915, "grad_norm": 0.3740317225456238, "learning_rate": 8.713638381740437e-06, "loss": 0.4492, "step": 1680 }, { "epoch": 0.9308721735117674, "grad_norm": 0.3311839699745178, "learning_rate": 8.711479196065427e-06, "loss": 0.4474, "step": 1681 }, { "epoch": 0.9314259344716197, "grad_norm": 0.42220956087112427, "learning_rate": 8.709318467788249e-06, "loss": 0.4879, "step": 1682 }, { "epoch": 0.9319796954314721, "grad_norm": 0.36266687512397766, "learning_rate": 8.70715619780696e-06, "loss": 0.4508, "step": 1683 }, { "epoch": 0.9325334563913245, "grad_norm": 0.35641127824783325, "learning_rate": 8.70499238702027e-06, "loss": 0.4722, "step": 1684 }, { "epoch": 0.9330872173511767, "grad_norm": 0.37649857997894287, "learning_rate": 8.702827036327523e-06, "loss": 0.4587, "step": 1685 }, { "epoch": 0.9336409783110291, "grad_norm": 0.3379674553871155, "learning_rate": 8.700660146628701e-06, "loss": 0.4294, "step": 1686 }, { "epoch": 0.9341947392708814, "grad_norm": 0.3739279508590698, "learning_rate": 8.698491718824433e-06, "loss": 0.4435, "step": 1687 }, { "epoch": 0.9347485002307337, "grad_norm": 0.36477312445640564, "learning_rate": 8.696321753815978e-06, "loss": 0.4643, "step": 1688 }, { "epoch": 0.935302261190586, "grad_norm": 0.35597923398017883, "learning_rate": 8.69415025250524e-06, "loss": 0.4474, "step": 1689 }, { "epoch": 0.9358560221504384, "grad_norm": 0.3719998896121979, "learning_rate": 8.69197721579476e-06, "loss": 0.4615, "step": 1690 }, { "epoch": 0.9364097831102908, "grad_norm": 0.3820621073246002, "learning_rate": 8.689802644587719e-06, "loss": 0.4521, "step": 1691 }, { "epoch": 0.936963544070143, "grad_norm": 0.34250885248184204, "learning_rate": 8.687626539787933e-06, "loss": 0.4478, "step": 1692 }, { "epoch": 0.9375173050299954, "grad_norm": 0.3319213390350342, "learning_rate": 8.685448902299856e-06, "loss": 0.459, "step": 1693 }, { "epoch": 0.9380710659898477, "grad_norm": 0.41935470700263977, "learning_rate": 8.68326973302858e-06, "loss": 0.4843, "step": 1694 }, { "epoch": 0.9386248269497001, "grad_norm": 0.34997254610061646, "learning_rate": 8.681089032879832e-06, "loss": 0.4766, "step": 1695 }, { "epoch": 0.9391785879095523, "grad_norm": 0.39504632353782654, "learning_rate": 8.678906802759979e-06, "loss": 0.4569, "step": 1696 }, { "epoch": 0.9397323488694047, "grad_norm": 0.4008169174194336, "learning_rate": 8.676723043576019e-06, "loss": 0.4778, "step": 1697 }, { "epoch": 0.9402861098292571, "grad_norm": 0.44354283809661865, "learning_rate": 8.674537756235589e-06, "loss": 0.4574, "step": 1698 }, { "epoch": 0.9408398707891094, "grad_norm": 0.363628625869751, "learning_rate": 8.67235094164696e-06, "loss": 0.479, "step": 1699 }, { "epoch": 0.9413936317489617, "grad_norm": 0.4405473470687866, "learning_rate": 8.670162600719037e-06, "loss": 0.4833, "step": 1700 }, { "epoch": 0.941947392708814, "grad_norm": 0.37836799025535583, "learning_rate": 8.66797273436136e-06, "loss": 0.454, "step": 1701 }, { "epoch": 0.9425011536686664, "grad_norm": 0.3787548840045929, "learning_rate": 8.665781343484105e-06, "loss": 0.4401, "step": 1702 }, { "epoch": 0.9430549146285186, "grad_norm": 0.4190833568572998, "learning_rate": 8.66358842899808e-06, "loss": 0.4651, "step": 1703 }, { "epoch": 0.943608675588371, "grad_norm": 0.3766055107116699, "learning_rate": 8.661393991814724e-06, "loss": 0.482, "step": 1704 }, { "epoch": 0.9441624365482234, "grad_norm": 0.40077564120292664, "learning_rate": 8.65919803284611e-06, "loss": 0.4523, "step": 1705 }, { "epoch": 0.9447161975080757, "grad_norm": 0.4095858037471771, "learning_rate": 8.657000553004947e-06, "loss": 0.456, "step": 1706 }, { "epoch": 0.945269958467928, "grad_norm": 0.3428434431552887, "learning_rate": 8.654801553204573e-06, "loss": 0.4536, "step": 1707 }, { "epoch": 0.9458237194277803, "grad_norm": 0.3488950729370117, "learning_rate": 8.65260103435896e-06, "loss": 0.4646, "step": 1708 }, { "epoch": 0.9463774803876327, "grad_norm": 0.34898507595062256, "learning_rate": 8.650398997382705e-06, "loss": 0.4792, "step": 1709 }, { "epoch": 0.946931241347485, "grad_norm": 0.3544807732105255, "learning_rate": 8.648195443191046e-06, "loss": 0.4455, "step": 1710 }, { "epoch": 0.9474850023073373, "grad_norm": 0.37732404470443726, "learning_rate": 8.645990372699842e-06, "loss": 0.4663, "step": 1711 }, { "epoch": 0.9480387632671896, "grad_norm": 0.41117697954177856, "learning_rate": 8.64378378682559e-06, "loss": 0.4657, "step": 1712 }, { "epoch": 0.948592524227042, "grad_norm": 0.3845609724521637, "learning_rate": 8.641575686485411e-06, "loss": 0.4549, "step": 1713 }, { "epoch": 0.9491462851868944, "grad_norm": 0.3861723244190216, "learning_rate": 8.639366072597057e-06, "loss": 0.4417, "step": 1714 }, { "epoch": 0.9497000461467466, "grad_norm": 0.36759719252586365, "learning_rate": 8.637154946078912e-06, "loss": 0.4598, "step": 1715 }, { "epoch": 0.950253807106599, "grad_norm": 0.35283493995666504, "learning_rate": 8.634942307849987e-06, "loss": 0.449, "step": 1716 }, { "epoch": 0.9508075680664513, "grad_norm": 0.3326910734176636, "learning_rate": 8.632728158829919e-06, "loss": 0.4603, "step": 1717 }, { "epoch": 0.9513613290263037, "grad_norm": 0.38345155119895935, "learning_rate": 8.630512499938974e-06, "loss": 0.4575, "step": 1718 }, { "epoch": 0.9519150899861559, "grad_norm": 0.3392128348350525, "learning_rate": 8.62829533209805e-06, "loss": 0.4493, "step": 1719 }, { "epoch": 0.9524688509460083, "grad_norm": 0.4199959933757782, "learning_rate": 8.626076656228665e-06, "loss": 0.473, "step": 1720 }, { "epoch": 0.9530226119058607, "grad_norm": 0.360125869512558, "learning_rate": 8.623856473252971e-06, "loss": 0.4696, "step": 1721 }, { "epoch": 0.953576372865713, "grad_norm": 0.3546886742115021, "learning_rate": 8.621634784093737e-06, "loss": 0.4846, "step": 1722 }, { "epoch": 0.9541301338255653, "grad_norm": 0.3849370777606964, "learning_rate": 8.61941158967437e-06, "loss": 0.4659, "step": 1723 }, { "epoch": 0.9546838947854176, "grad_norm": 0.29805630445480347, "learning_rate": 8.617186890918891e-06, "loss": 0.4646, "step": 1724 }, { "epoch": 0.95523765574527, "grad_norm": 0.33960479497909546, "learning_rate": 8.614960688751956e-06, "loss": 0.4655, "step": 1725 }, { "epoch": 0.9557914167051222, "grad_norm": 0.3426823019981384, "learning_rate": 8.61273298409884e-06, "loss": 0.4518, "step": 1726 }, { "epoch": 0.9563451776649746, "grad_norm": 0.36784687638282776, "learning_rate": 8.610503777885441e-06, "loss": 0.4739, "step": 1727 }, { "epoch": 0.956898938624827, "grad_norm": 0.3398304879665375, "learning_rate": 8.608273071038288e-06, "loss": 0.4471, "step": 1728 }, { "epoch": 0.9574526995846793, "grad_norm": 0.4191339910030365, "learning_rate": 8.606040864484528e-06, "loss": 0.4736, "step": 1729 }, { "epoch": 0.9580064605445316, "grad_norm": 0.3498739004135132, "learning_rate": 8.60380715915193e-06, "loss": 0.4572, "step": 1730 }, { "epoch": 0.9585602215043839, "grad_norm": 0.413266122341156, "learning_rate": 8.601571955968897e-06, "loss": 0.4698, "step": 1731 }, { "epoch": 0.9591139824642363, "grad_norm": 0.3671891689300537, "learning_rate": 8.59933525586444e-06, "loss": 0.4651, "step": 1732 }, { "epoch": 0.9596677434240886, "grad_norm": 0.3516353368759155, "learning_rate": 8.5970970597682e-06, "loss": 0.4636, "step": 1733 }, { "epoch": 0.9602215043839409, "grad_norm": 0.33125871419906616, "learning_rate": 8.594857368610438e-06, "loss": 0.4424, "step": 1734 }, { "epoch": 0.9607752653437933, "grad_norm": 0.33895403146743774, "learning_rate": 8.59261618332204e-06, "loss": 0.4526, "step": 1735 }, { "epoch": 0.9613290263036456, "grad_norm": 0.33561381697654724, "learning_rate": 8.590373504834506e-06, "loss": 0.4663, "step": 1736 }, { "epoch": 0.961882787263498, "grad_norm": 0.3625500500202179, "learning_rate": 8.588129334079961e-06, "loss": 0.431, "step": 1737 }, { "epoch": 0.9624365482233502, "grad_norm": 0.3737278878688812, "learning_rate": 8.585883671991155e-06, "loss": 0.4655, "step": 1738 }, { "epoch": 0.9629903091832026, "grad_norm": 0.338752418756485, "learning_rate": 8.583636519501446e-06, "loss": 0.4815, "step": 1739 }, { "epoch": 0.9635440701430549, "grad_norm": 0.33975672721862793, "learning_rate": 8.58138787754482e-06, "loss": 0.456, "step": 1740 }, { "epoch": 0.9640978311029073, "grad_norm": 0.35667684674263, "learning_rate": 8.579137747055882e-06, "loss": 0.4669, "step": 1741 }, { "epoch": 0.9646515920627595, "grad_norm": 0.3478187620639801, "learning_rate": 8.57688612896985e-06, "loss": 0.4416, "step": 1742 }, { "epoch": 0.9652053530226119, "grad_norm": 0.371099054813385, "learning_rate": 8.57463302422257e-06, "loss": 0.4564, "step": 1743 }, { "epoch": 0.9657591139824643, "grad_norm": 0.41678091883659363, "learning_rate": 8.572378433750494e-06, "loss": 0.4806, "step": 1744 }, { "epoch": 0.9663128749423165, "grad_norm": 0.3458438515663147, "learning_rate": 8.5701223584907e-06, "loss": 0.4729, "step": 1745 }, { "epoch": 0.9668666359021689, "grad_norm": 0.3644242584705353, "learning_rate": 8.567864799380882e-06, "loss": 0.4573, "step": 1746 }, { "epoch": 0.9674203968620212, "grad_norm": 0.35541006922721863, "learning_rate": 8.565605757359346e-06, "loss": 0.4237, "step": 1747 }, { "epoch": 0.9679741578218736, "grad_norm": 0.34315425157546997, "learning_rate": 8.563345233365022e-06, "loss": 0.4655, "step": 1748 }, { "epoch": 0.9685279187817258, "grad_norm": 0.3646874725818634, "learning_rate": 8.561083228337447e-06, "loss": 0.4527, "step": 1749 }, { "epoch": 0.9690816797415782, "grad_norm": 0.38729575276374817, "learning_rate": 8.558819743216781e-06, "loss": 0.4639, "step": 1750 }, { "epoch": 0.9696354407014306, "grad_norm": 0.35068055987358093, "learning_rate": 8.556554778943795e-06, "loss": 0.4634, "step": 1751 }, { "epoch": 0.9701892016612829, "grad_norm": 0.3680146336555481, "learning_rate": 8.554288336459878e-06, "loss": 0.4801, "step": 1752 }, { "epoch": 0.9707429626211352, "grad_norm": 0.3824026584625244, "learning_rate": 8.55202041670703e-06, "loss": 0.4719, "step": 1753 }, { "epoch": 0.9712967235809875, "grad_norm": 0.3869761526584625, "learning_rate": 8.549751020627868e-06, "loss": 0.438, "step": 1754 }, { "epoch": 0.9718504845408399, "grad_norm": 0.3353727459907532, "learning_rate": 8.547480149165618e-06, "loss": 0.4358, "step": 1755 }, { "epoch": 0.9724042455006922, "grad_norm": 0.41229328513145447, "learning_rate": 8.545207803264126e-06, "loss": 0.4554, "step": 1756 }, { "epoch": 0.9729580064605445, "grad_norm": 0.3605651557445526, "learning_rate": 8.542933983867845e-06, "loss": 0.4652, "step": 1757 }, { "epoch": 0.9735117674203969, "grad_norm": 0.3537110388278961, "learning_rate": 8.540658691921844e-06, "loss": 0.447, "step": 1758 }, { "epoch": 0.9740655283802492, "grad_norm": 0.33068034052848816, "learning_rate": 8.538381928371803e-06, "loss": 0.4323, "step": 1759 }, { "epoch": 0.9746192893401016, "grad_norm": 0.35125473141670227, "learning_rate": 8.536103694164008e-06, "loss": 0.4753, "step": 1760 }, { "epoch": 0.9751730502999538, "grad_norm": 0.36587145924568176, "learning_rate": 8.533823990245369e-06, "loss": 0.47, "step": 1761 }, { "epoch": 0.9757268112598062, "grad_norm": 0.36104097962379456, "learning_rate": 8.531542817563395e-06, "loss": 0.4704, "step": 1762 }, { "epoch": 0.9762805722196585, "grad_norm": 0.34547796845436096, "learning_rate": 8.52926017706621e-06, "loss": 0.4318, "step": 1763 }, { "epoch": 0.9768343331795108, "grad_norm": 0.32819390296936035, "learning_rate": 8.52697606970255e-06, "loss": 0.4416, "step": 1764 }, { "epoch": 0.9773880941393632, "grad_norm": 0.36982956528663635, "learning_rate": 8.524690496421757e-06, "loss": 0.4484, "step": 1765 }, { "epoch": 0.9779418550992155, "grad_norm": 0.3343288004398346, "learning_rate": 8.522403458173785e-06, "loss": 0.4628, "step": 1766 }, { "epoch": 0.9784956160590679, "grad_norm": 0.3630775511264801, "learning_rate": 8.520114955909193e-06, "loss": 0.4694, "step": 1767 }, { "epoch": 0.9790493770189201, "grad_norm": 0.36657655239105225, "learning_rate": 8.517824990579156e-06, "loss": 0.4803, "step": 1768 }, { "epoch": 0.9796031379787725, "grad_norm": 0.38215020298957825, "learning_rate": 8.515533563135447e-06, "loss": 0.4718, "step": 1769 }, { "epoch": 0.9801568989386248, "grad_norm": 0.34029969573020935, "learning_rate": 8.513240674530457e-06, "loss": 0.4522, "step": 1770 }, { "epoch": 0.9807106598984772, "grad_norm": 0.3234170973300934, "learning_rate": 8.510946325717175e-06, "loss": 0.457, "step": 1771 }, { "epoch": 0.9812644208583294, "grad_norm": 0.38812074065208435, "learning_rate": 8.508650517649204e-06, "loss": 0.4292, "step": 1772 }, { "epoch": 0.9818181818181818, "grad_norm": 0.3146967887878418, "learning_rate": 8.50635325128075e-06, "loss": 0.4377, "step": 1773 }, { "epoch": 0.9823719427780342, "grad_norm": 0.34443777799606323, "learning_rate": 8.504054527566626e-06, "loss": 0.454, "step": 1774 }, { "epoch": 0.9829257037378865, "grad_norm": 0.35345640778541565, "learning_rate": 8.50175434746225e-06, "loss": 0.4468, "step": 1775 }, { "epoch": 0.9834794646977388, "grad_norm": 0.32277268171310425, "learning_rate": 8.499452711923646e-06, "loss": 0.4721, "step": 1776 }, { "epoch": 0.9840332256575911, "grad_norm": 0.4012487828731537, "learning_rate": 8.497149621907444e-06, "loss": 0.4681, "step": 1777 }, { "epoch": 0.9845869866174435, "grad_norm": 0.36693471670150757, "learning_rate": 8.494845078370877e-06, "loss": 0.4489, "step": 1778 }, { "epoch": 0.9851407475772958, "grad_norm": 0.32927218079566956, "learning_rate": 8.49253908227178e-06, "loss": 0.4306, "step": 1779 }, { "epoch": 0.9856945085371481, "grad_norm": 0.347852885723114, "learning_rate": 8.490231634568597e-06, "loss": 0.4713, "step": 1780 }, { "epoch": 0.9862482694970005, "grad_norm": 0.42981240153312683, "learning_rate": 8.48792273622037e-06, "loss": 0.4807, "step": 1781 }, { "epoch": 0.9868020304568528, "grad_norm": 0.3782055675983429, "learning_rate": 8.48561238818675e-06, "loss": 0.4522, "step": 1782 }, { "epoch": 0.9873557914167052, "grad_norm": 0.3663841187953949, "learning_rate": 8.483300591427986e-06, "loss": 0.4503, "step": 1783 }, { "epoch": 0.9879095523765574, "grad_norm": 0.395039826631546, "learning_rate": 8.480987346904927e-06, "loss": 0.4551, "step": 1784 }, { "epoch": 0.9884633133364098, "grad_norm": 0.4314592480659485, "learning_rate": 8.478672655579033e-06, "loss": 0.4898, "step": 1785 }, { "epoch": 0.9890170742962621, "grad_norm": 0.38875046372413635, "learning_rate": 8.476356518412352e-06, "loss": 0.4714, "step": 1786 }, { "epoch": 0.9895708352561144, "grad_norm": 0.5190536379814148, "learning_rate": 8.474038936367543e-06, "loss": 0.4849, "step": 1787 }, { "epoch": 0.9901245962159668, "grad_norm": 0.43149667978286743, "learning_rate": 8.471719910407865e-06, "loss": 0.485, "step": 1788 }, { "epoch": 0.9906783571758191, "grad_norm": 0.43016260862350464, "learning_rate": 8.469399441497173e-06, "loss": 0.4763, "step": 1789 }, { "epoch": 0.9912321181356715, "grad_norm": 0.5015780329704285, "learning_rate": 8.467077530599921e-06, "loss": 0.4626, "step": 1790 }, { "epoch": 0.9917858790955237, "grad_norm": 0.3883245289325714, "learning_rate": 8.46475417868117e-06, "loss": 0.46, "step": 1791 }, { "epoch": 0.9923396400553761, "grad_norm": 0.40611183643341064, "learning_rate": 8.462429386706571e-06, "loss": 0.4754, "step": 1792 }, { "epoch": 0.9928934010152284, "grad_norm": 0.46700143814086914, "learning_rate": 8.460103155642379e-06, "loss": 0.4423, "step": 1793 }, { "epoch": 0.9934471619750808, "grad_norm": 0.3788582384586334, "learning_rate": 8.457775486455444e-06, "loss": 0.4749, "step": 1794 }, { "epoch": 0.9940009229349331, "grad_norm": 0.38485920429229736, "learning_rate": 8.455446380113217e-06, "loss": 0.486, "step": 1795 }, { "epoch": 0.9945546838947854, "grad_norm": 0.39980655908584595, "learning_rate": 8.453115837583744e-06, "loss": 0.4613, "step": 1796 }, { "epoch": 0.9951084448546378, "grad_norm": 0.41741248965263367, "learning_rate": 8.450783859835668e-06, "loss": 0.4823, "step": 1797 }, { "epoch": 0.99566220581449, "grad_norm": 0.4134982228279114, "learning_rate": 8.448450447838227e-06, "loss": 0.4647, "step": 1798 }, { "epoch": 0.9962159667743424, "grad_norm": 0.46317610144615173, "learning_rate": 8.446115602561263e-06, "loss": 0.4507, "step": 1799 }, { "epoch": 0.9967697277341947, "grad_norm": 0.396099716424942, "learning_rate": 8.443779324975201e-06, "loss": 0.4445, "step": 1800 }, { "epoch": 0.9973234886940471, "grad_norm": 0.4056612551212311, "learning_rate": 8.441441616051071e-06, "loss": 0.4443, "step": 1801 }, { "epoch": 0.9978772496538993, "grad_norm": 0.3922084867954254, "learning_rate": 8.439102476760496e-06, "loss": 0.4437, "step": 1802 }, { "epoch": 0.9984310106137517, "grad_norm": 0.38536158204078674, "learning_rate": 8.436761908075693e-06, "loss": 0.4492, "step": 1803 }, { "epoch": 0.9989847715736041, "grad_norm": 0.37921708822250366, "learning_rate": 8.43441991096947e-06, "loss": 0.4702, "step": 1804 }, { "epoch": 0.9995385325334564, "grad_norm": 0.36754560470581055, "learning_rate": 8.43207648641523e-06, "loss": 0.4514, "step": 1805 }, { "epoch": 1.0000922934933087, "grad_norm": 0.4756008982658386, "learning_rate": 8.429731635386976e-06, "loss": 0.5405, "step": 1806 }, { "epoch": 1.000646054453161, "grad_norm": 0.3840515911579132, "learning_rate": 8.427385358859293e-06, "loss": 0.4449, "step": 1807 }, { "epoch": 1.0011998154130133, "grad_norm": 0.4073272943496704, "learning_rate": 8.425037657807368e-06, "loss": 0.4428, "step": 1808 }, { "epoch": 1.0017535763728658, "grad_norm": 0.3505707383155823, "learning_rate": 8.422688533206975e-06, "loss": 0.403, "step": 1809 }, { "epoch": 1.002307337332718, "grad_norm": 0.44710224866867065, "learning_rate": 8.42033798603448e-06, "loss": 0.4606, "step": 1810 }, { "epoch": 1.0028610982925703, "grad_norm": 0.3514285981655121, "learning_rate": 8.417986017266841e-06, "loss": 0.4148, "step": 1811 }, { "epoch": 1.0034148592524228, "grad_norm": 0.39757734537124634, "learning_rate": 8.415632627881607e-06, "loss": 0.4299, "step": 1812 }, { "epoch": 1.003968620212275, "grad_norm": 0.3489486575126648, "learning_rate": 8.413277818856918e-06, "loss": 0.4009, "step": 1813 }, { "epoch": 1.0045223811721273, "grad_norm": 0.40171128511428833, "learning_rate": 8.4109215911715e-06, "loss": 0.4319, "step": 1814 }, { "epoch": 1.0050761421319796, "grad_norm": 0.39322730898857117, "learning_rate": 8.408563945804678e-06, "loss": 0.4306, "step": 1815 }, { "epoch": 1.005629903091832, "grad_norm": 0.3526424169540405, "learning_rate": 8.406204883736354e-06, "loss": 0.4306, "step": 1816 }, { "epoch": 1.0061836640516844, "grad_norm": 0.40253961086273193, "learning_rate": 8.403844405947028e-06, "loss": 0.4454, "step": 1817 }, { "epoch": 1.0067374250115366, "grad_norm": 0.43009936809539795, "learning_rate": 8.401482513417787e-06, "loss": 0.4323, "step": 1818 }, { "epoch": 1.0072911859713891, "grad_norm": 0.4509524405002594, "learning_rate": 8.399119207130302e-06, "loss": 0.4416, "step": 1819 }, { "epoch": 1.0078449469312414, "grad_norm": 0.40502113103866577, "learning_rate": 8.396754488066833e-06, "loss": 0.4255, "step": 1820 }, { "epoch": 1.0083987078910936, "grad_norm": 0.37317290902137756, "learning_rate": 8.394388357210232e-06, "loss": 0.4427, "step": 1821 }, { "epoch": 1.008952468850946, "grad_norm": 0.4729897677898407, "learning_rate": 8.392020815543932e-06, "loss": 0.4426, "step": 1822 }, { "epoch": 1.0095062298107984, "grad_norm": 0.34860461950302124, "learning_rate": 8.389651864051953e-06, "loss": 0.436, "step": 1823 }, { "epoch": 1.0100599907706507, "grad_norm": 0.39754560589790344, "learning_rate": 8.387281503718903e-06, "loss": 0.4617, "step": 1824 }, { "epoch": 1.010613751730503, "grad_norm": 0.4255768358707428, "learning_rate": 8.384909735529977e-06, "loss": 0.4422, "step": 1825 }, { "epoch": 1.0111675126903554, "grad_norm": 0.35930874943733215, "learning_rate": 8.38253656047095e-06, "loss": 0.4592, "step": 1826 }, { "epoch": 1.0117212736502077, "grad_norm": 0.35762837529182434, "learning_rate": 8.38016197952819e-06, "loss": 0.3928, "step": 1827 }, { "epoch": 1.01227503461006, "grad_norm": 0.392897367477417, "learning_rate": 8.377785993688637e-06, "loss": 0.4441, "step": 1828 }, { "epoch": 1.0128287955699122, "grad_norm": 0.33119478821754456, "learning_rate": 8.375408603939827e-06, "loss": 0.4196, "step": 1829 }, { "epoch": 1.0133825565297647, "grad_norm": 0.34950754046440125, "learning_rate": 8.373029811269873e-06, "loss": 0.4075, "step": 1830 }, { "epoch": 1.013936317489617, "grad_norm": 0.35339802503585815, "learning_rate": 8.370649616667472e-06, "loss": 0.4493, "step": 1831 }, { "epoch": 1.0144900784494693, "grad_norm": 0.3227692246437073, "learning_rate": 8.368268021121907e-06, "loss": 0.4226, "step": 1832 }, { "epoch": 1.0150438394093217, "grad_norm": 0.32485485076904297, "learning_rate": 8.365885025623038e-06, "loss": 0.4269, "step": 1833 }, { "epoch": 1.015597600369174, "grad_norm": 0.3390521705150604, "learning_rate": 8.363500631161309e-06, "loss": 0.4216, "step": 1834 }, { "epoch": 1.0161513613290263, "grad_norm": 0.385754257440567, "learning_rate": 8.361114838727749e-06, "loss": 0.4545, "step": 1835 }, { "epoch": 1.0167051222888785, "grad_norm": 0.29870209097862244, "learning_rate": 8.35872764931396e-06, "loss": 0.3843, "step": 1836 }, { "epoch": 1.017258883248731, "grad_norm": 0.3287520110607147, "learning_rate": 8.356339063912136e-06, "loss": 0.4354, "step": 1837 }, { "epoch": 1.0178126442085833, "grad_norm": 0.33757615089416504, "learning_rate": 8.353949083515038e-06, "loss": 0.4449, "step": 1838 }, { "epoch": 1.0183664051684356, "grad_norm": 0.34815558791160583, "learning_rate": 8.351557709116021e-06, "loss": 0.4544, "step": 1839 }, { "epoch": 1.018920166128288, "grad_norm": 0.36283236742019653, "learning_rate": 8.349164941709004e-06, "loss": 0.4824, "step": 1840 }, { "epoch": 1.0194739270881403, "grad_norm": 0.37099260091781616, "learning_rate": 8.3467707822885e-06, "loss": 0.4238, "step": 1841 }, { "epoch": 1.0200276880479926, "grad_norm": 0.365957647562027, "learning_rate": 8.344375231849588e-06, "loss": 0.4276, "step": 1842 }, { "epoch": 1.0205814490078449, "grad_norm": 0.3483434021472931, "learning_rate": 8.341978291387935e-06, "loss": 0.4075, "step": 1843 }, { "epoch": 1.0211352099676974, "grad_norm": 0.45256757736206055, "learning_rate": 8.33957996189978e-06, "loss": 0.4544, "step": 1844 }, { "epoch": 1.0216889709275496, "grad_norm": 0.40886983275413513, "learning_rate": 8.33718024438194e-06, "loss": 0.4678, "step": 1845 }, { "epoch": 1.022242731887402, "grad_norm": 0.3515442907810211, "learning_rate": 8.334779139831808e-06, "loss": 0.3881, "step": 1846 }, { "epoch": 1.0227964928472542, "grad_norm": 0.342275470495224, "learning_rate": 8.33237664924736e-06, "loss": 0.4636, "step": 1847 }, { "epoch": 1.0233502538071066, "grad_norm": 0.3425905108451843, "learning_rate": 8.329972773627139e-06, "loss": 0.4091, "step": 1848 }, { "epoch": 1.023904014766959, "grad_norm": 0.33216148614883423, "learning_rate": 8.32756751397027e-06, "loss": 0.414, "step": 1849 }, { "epoch": 1.0244577757268112, "grad_norm": 0.3421858847141266, "learning_rate": 8.325160871276448e-06, "loss": 0.456, "step": 1850 }, { "epoch": 1.0250115366866637, "grad_norm": 0.33745694160461426, "learning_rate": 8.322752846545949e-06, "loss": 0.4656, "step": 1851 }, { "epoch": 1.025565297646516, "grad_norm": 0.3195571303367615, "learning_rate": 8.320343440779622e-06, "loss": 0.4322, "step": 1852 }, { "epoch": 1.0261190586063682, "grad_norm": 0.381334125995636, "learning_rate": 8.317932654978882e-06, "loss": 0.4366, "step": 1853 }, { "epoch": 1.0266728195662205, "grad_norm": 0.34747380018234253, "learning_rate": 8.31552049014573e-06, "loss": 0.4214, "step": 1854 }, { "epoch": 1.027226580526073, "grad_norm": 0.3249076008796692, "learning_rate": 8.313106947282732e-06, "loss": 0.4047, "step": 1855 }, { "epoch": 1.0277803414859252, "grad_norm": 0.379791796207428, "learning_rate": 8.31069202739303e-06, "loss": 0.4578, "step": 1856 }, { "epoch": 1.0283341024457775, "grad_norm": 0.3677885830402374, "learning_rate": 8.308275731480334e-06, "loss": 0.4593, "step": 1857 }, { "epoch": 1.02888786340563, "grad_norm": 0.323329359292984, "learning_rate": 8.305858060548934e-06, "loss": 0.4058, "step": 1858 }, { "epoch": 1.0294416243654823, "grad_norm": 0.35361409187316895, "learning_rate": 8.303439015603683e-06, "loss": 0.4466, "step": 1859 }, { "epoch": 1.0299953853253345, "grad_norm": 0.3755671977996826, "learning_rate": 8.301018597650008e-06, "loss": 0.4416, "step": 1860 }, { "epoch": 1.0305491462851868, "grad_norm": 0.3538978099822998, "learning_rate": 8.298596807693908e-06, "loss": 0.4, "step": 1861 }, { "epoch": 1.0311029072450393, "grad_norm": 0.39158132672309875, "learning_rate": 8.296173646741954e-06, "loss": 0.4717, "step": 1862 }, { "epoch": 1.0316566682048915, "grad_norm": 0.386127769947052, "learning_rate": 8.293749115801283e-06, "loss": 0.4795, "step": 1863 }, { "epoch": 1.0322104291647438, "grad_norm": 0.3185324966907501, "learning_rate": 8.291323215879605e-06, "loss": 0.3803, "step": 1864 }, { "epoch": 1.0327641901245963, "grad_norm": 0.3433416187763214, "learning_rate": 8.288895947985191e-06, "loss": 0.4026, "step": 1865 }, { "epoch": 1.0333179510844486, "grad_norm": 0.370067298412323, "learning_rate": 8.286467313126892e-06, "loss": 0.459, "step": 1866 }, { "epoch": 1.0338717120443008, "grad_norm": 0.34138232469558716, "learning_rate": 8.28403731231412e-06, "loss": 0.4502, "step": 1867 }, { "epoch": 1.034425473004153, "grad_norm": 0.31911808252334595, "learning_rate": 8.281605946556855e-06, "loss": 0.3926, "step": 1868 }, { "epoch": 1.0349792339640056, "grad_norm": 0.3604952394962311, "learning_rate": 8.279173216865645e-06, "loss": 0.4384, "step": 1869 }, { "epoch": 1.0355329949238579, "grad_norm": 0.3345721662044525, "learning_rate": 8.276739124251609e-06, "loss": 0.3947, "step": 1870 }, { "epoch": 1.0360867558837101, "grad_norm": 0.3452201187610626, "learning_rate": 8.274303669726427e-06, "loss": 0.4574, "step": 1871 }, { "epoch": 1.0366405168435626, "grad_norm": 0.35944005846977234, "learning_rate": 8.271866854302345e-06, "loss": 0.4506, "step": 1872 }, { "epoch": 1.0371942778034149, "grad_norm": 0.2793308198451996, "learning_rate": 8.269428678992179e-06, "loss": 0.3571, "step": 1873 }, { "epoch": 1.0377480387632672, "grad_norm": 0.3304983973503113, "learning_rate": 8.266989144809309e-06, "loss": 0.4457, "step": 1874 }, { "epoch": 1.0383017997231194, "grad_norm": 0.39998921751976013, "learning_rate": 8.264548252767677e-06, "loss": 0.4289, "step": 1875 }, { "epoch": 1.038855560682972, "grad_norm": 0.3269573152065277, "learning_rate": 8.26210600388179e-06, "loss": 0.422, "step": 1876 }, { "epoch": 1.0394093216428242, "grad_norm": 0.3068474531173706, "learning_rate": 8.259662399166722e-06, "loss": 0.4233, "step": 1877 }, { "epoch": 1.0399630826026764, "grad_norm": 0.42217957973480225, "learning_rate": 8.257217439638105e-06, "loss": 0.4398, "step": 1878 }, { "epoch": 1.040516843562529, "grad_norm": 0.30273815989494324, "learning_rate": 8.254771126312142e-06, "loss": 0.4163, "step": 1879 }, { "epoch": 1.0410706045223812, "grad_norm": 0.3249501883983612, "learning_rate": 8.25232346020559e-06, "loss": 0.4483, "step": 1880 }, { "epoch": 1.0416243654822335, "grad_norm": 0.32815974950790405, "learning_rate": 8.24987444233578e-06, "loss": 0.4234, "step": 1881 }, { "epoch": 1.0421781264420857, "grad_norm": 0.3605539798736572, "learning_rate": 8.247424073720588e-06, "loss": 0.3923, "step": 1882 }, { "epoch": 1.0427318874019382, "grad_norm": 0.37549662590026855, "learning_rate": 8.244972355378467e-06, "loss": 0.4311, "step": 1883 }, { "epoch": 1.0432856483617905, "grad_norm": 0.386740505695343, "learning_rate": 8.242519288328421e-06, "loss": 0.4504, "step": 1884 }, { "epoch": 1.0438394093216428, "grad_norm": 0.34247806668281555, "learning_rate": 8.24006487359002e-06, "loss": 0.428, "step": 1885 }, { "epoch": 1.0443931702814953, "grad_norm": 0.3574327528476715, "learning_rate": 8.237609112183393e-06, "loss": 0.4166, "step": 1886 }, { "epoch": 1.0449469312413475, "grad_norm": 0.3579200804233551, "learning_rate": 8.235152005129227e-06, "loss": 0.4482, "step": 1887 }, { "epoch": 1.0455006922011998, "grad_norm": 0.3690986931324005, "learning_rate": 8.232693553448772e-06, "loss": 0.4329, "step": 1888 }, { "epoch": 1.046054453161052, "grad_norm": 0.4502304196357727, "learning_rate": 8.23023375816383e-06, "loss": 0.4559, "step": 1889 }, { "epoch": 1.0466082141209045, "grad_norm": 0.3553960621356964, "learning_rate": 8.22777262029677e-06, "loss": 0.4141, "step": 1890 }, { "epoch": 1.0471619750807568, "grad_norm": 0.4369748830795288, "learning_rate": 8.22531014087051e-06, "loss": 0.4499, "step": 1891 }, { "epoch": 1.047715736040609, "grad_norm": 0.3658662736415863, "learning_rate": 8.222846320908537e-06, "loss": 0.4478, "step": 1892 }, { "epoch": 1.0482694970004616, "grad_norm": 0.36315247416496277, "learning_rate": 8.220381161434883e-06, "loss": 0.4044, "step": 1893 }, { "epoch": 1.0488232579603138, "grad_norm": 0.44800904393196106, "learning_rate": 8.217914663474145e-06, "loss": 0.4299, "step": 1894 }, { "epoch": 1.049377018920166, "grad_norm": 0.33311089873313904, "learning_rate": 8.215446828051471e-06, "loss": 0.4401, "step": 1895 }, { "epoch": 1.0499307798800184, "grad_norm": 0.449404776096344, "learning_rate": 8.212977656192568e-06, "loss": 0.4333, "step": 1896 }, { "epoch": 1.0504845408398709, "grad_norm": 0.3549497127532959, "learning_rate": 8.210507148923703e-06, "loss": 0.4124, "step": 1897 }, { "epoch": 1.0510383017997231, "grad_norm": 0.3592413365840912, "learning_rate": 8.208035307271687e-06, "loss": 0.4529, "step": 1898 }, { "epoch": 1.0515920627595754, "grad_norm": 0.3461303114891052, "learning_rate": 8.205562132263894e-06, "loss": 0.4097, "step": 1899 }, { "epoch": 1.0521458237194279, "grad_norm": 0.3718779385089874, "learning_rate": 8.20308762492825e-06, "loss": 0.4157, "step": 1900 }, { "epoch": 1.0526995846792802, "grad_norm": 0.3743903934955597, "learning_rate": 8.200611786293234e-06, "loss": 0.4768, "step": 1901 }, { "epoch": 1.0532533456391324, "grad_norm": 0.3638157248497009, "learning_rate": 8.198134617387877e-06, "loss": 0.4496, "step": 1902 }, { "epoch": 1.0538071065989847, "grad_norm": 0.3971356153488159, "learning_rate": 8.195656119241768e-06, "loss": 0.4835, "step": 1903 }, { "epoch": 1.0543608675588372, "grad_norm": 0.30944007635116577, "learning_rate": 8.193176292885044e-06, "loss": 0.436, "step": 1904 }, { "epoch": 1.0549146285186894, "grad_norm": 0.3738367259502411, "learning_rate": 8.190695139348394e-06, "loss": 0.4316, "step": 1905 }, { "epoch": 1.0554683894785417, "grad_norm": 0.3406101167201996, "learning_rate": 8.188212659663063e-06, "loss": 0.4552, "step": 1906 }, { "epoch": 1.0560221504383942, "grad_norm": 0.34779271483421326, "learning_rate": 8.18572885486084e-06, "loss": 0.4148, "step": 1907 }, { "epoch": 1.0565759113982465, "grad_norm": 0.3216518461704254, "learning_rate": 8.183243725974073e-06, "loss": 0.4694, "step": 1908 }, { "epoch": 1.0571296723580987, "grad_norm": 0.32256942987442017, "learning_rate": 8.180757274035652e-06, "loss": 0.411, "step": 1909 }, { "epoch": 1.057683433317951, "grad_norm": 0.4045613408088684, "learning_rate": 8.178269500079024e-06, "loss": 0.4418, "step": 1910 }, { "epoch": 1.0582371942778035, "grad_norm": 0.3262558579444885, "learning_rate": 8.175780405138182e-06, "loss": 0.4244, "step": 1911 }, { "epoch": 1.0587909552376558, "grad_norm": 0.3707607090473175, "learning_rate": 8.173289990247669e-06, "loss": 0.4337, "step": 1912 }, { "epoch": 1.059344716197508, "grad_norm": 0.36692866683006287, "learning_rate": 8.170798256442573e-06, "loss": 0.4193, "step": 1913 }, { "epoch": 1.0598984771573603, "grad_norm": 0.4175090193748474, "learning_rate": 8.16830520475854e-06, "loss": 0.4516, "step": 1914 }, { "epoch": 1.0604522381172128, "grad_norm": 0.34216028451919556, "learning_rate": 8.165810836231749e-06, "loss": 0.4293, "step": 1915 }, { "epoch": 1.061005999077065, "grad_norm": 0.34519103169441223, "learning_rate": 8.16331515189894e-06, "loss": 0.4503, "step": 1916 }, { "epoch": 1.0615597600369173, "grad_norm": 0.36890822649002075, "learning_rate": 8.160818152797392e-06, "loss": 0.428, "step": 1917 }, { "epoch": 1.0621135209967698, "grad_norm": 0.3098214566707611, "learning_rate": 8.158319839964935e-06, "loss": 0.4146, "step": 1918 }, { "epoch": 1.062667281956622, "grad_norm": 0.3559643626213074, "learning_rate": 8.15582021443994e-06, "loss": 0.4803, "step": 1919 }, { "epoch": 1.0632210429164743, "grad_norm": 0.3470717668533325, "learning_rate": 8.153319277261328e-06, "loss": 0.4227, "step": 1920 }, { "epoch": 1.0637748038763268, "grad_norm": 0.33160847425460815, "learning_rate": 8.150817029468566e-06, "loss": 0.4286, "step": 1921 }, { "epoch": 1.064328564836179, "grad_norm": 0.3108460307121277, "learning_rate": 8.148313472101657e-06, "loss": 0.4273, "step": 1922 }, { "epoch": 1.0648823257960314, "grad_norm": 0.34788280725479126, "learning_rate": 8.14580860620116e-06, "loss": 0.4455, "step": 1923 }, { "epoch": 1.0654360867558836, "grad_norm": 0.2972123324871063, "learning_rate": 8.143302432808168e-06, "loss": 0.4224, "step": 1924 }, { "epoch": 1.0659898477157361, "grad_norm": 0.3282610774040222, "learning_rate": 8.140794952964328e-06, "loss": 0.4829, "step": 1925 }, { "epoch": 1.0665436086755884, "grad_norm": 0.3500787615776062, "learning_rate": 8.13828616771182e-06, "loss": 0.4378, "step": 1926 }, { "epoch": 1.0670973696354407, "grad_norm": 0.3067317605018616, "learning_rate": 8.13577607809337e-06, "loss": 0.4471, "step": 1927 }, { "epoch": 1.067651130595293, "grad_norm": 0.36614882946014404, "learning_rate": 8.133264685152246e-06, "loss": 0.4084, "step": 1928 }, { "epoch": 1.0682048915551454, "grad_norm": 0.3349100649356842, "learning_rate": 8.130751989932262e-06, "loss": 0.4205, "step": 1929 }, { "epoch": 1.0687586525149977, "grad_norm": 0.32286691665649414, "learning_rate": 8.128237993477767e-06, "loss": 0.4641, "step": 1930 }, { "epoch": 1.06931241347485, "grad_norm": 0.33246469497680664, "learning_rate": 8.125722696833653e-06, "loss": 0.4159, "step": 1931 }, { "epoch": 1.0698661744347024, "grad_norm": 0.3210042119026184, "learning_rate": 8.123206101045354e-06, "loss": 0.4394, "step": 1932 }, { "epoch": 1.0704199353945547, "grad_norm": 0.37608033418655396, "learning_rate": 8.120688207158842e-06, "loss": 0.455, "step": 1933 }, { "epoch": 1.070973696354407, "grad_norm": 0.3278134763240814, "learning_rate": 8.11816901622063e-06, "loss": 0.4168, "step": 1934 }, { "epoch": 1.0715274573142592, "grad_norm": 0.4348437786102295, "learning_rate": 8.11564852927777e-06, "loss": 0.4586, "step": 1935 }, { "epoch": 1.0720812182741117, "grad_norm": 0.33542290329933167, "learning_rate": 8.11312674737785e-06, "loss": 0.4112, "step": 1936 }, { "epoch": 1.072634979233964, "grad_norm": 0.3501347303390503, "learning_rate": 8.110603671569e-06, "loss": 0.4432, "step": 1937 }, { "epoch": 1.0731887401938163, "grad_norm": 0.4003233015537262, "learning_rate": 8.108079302899884e-06, "loss": 0.41, "step": 1938 }, { "epoch": 1.0737425011536688, "grad_norm": 0.38301515579223633, "learning_rate": 8.105553642419708e-06, "loss": 0.4656, "step": 1939 }, { "epoch": 1.074296262113521, "grad_norm": 0.3521134555339813, "learning_rate": 8.10302669117821e-06, "loss": 0.43, "step": 1940 }, { "epoch": 1.0748500230733733, "grad_norm": 0.3572501242160797, "learning_rate": 8.100498450225668e-06, "loss": 0.4302, "step": 1941 }, { "epoch": 1.0754037840332256, "grad_norm": 0.3145323097705841, "learning_rate": 8.097968920612895e-06, "loss": 0.418, "step": 1942 }, { "epoch": 1.075957544993078, "grad_norm": 0.34511932730674744, "learning_rate": 8.095438103391238e-06, "loss": 0.4216, "step": 1943 }, { "epoch": 1.0765113059529303, "grad_norm": 0.32421690225601196, "learning_rate": 8.092905999612584e-06, "loss": 0.4174, "step": 1944 }, { "epoch": 1.0770650669127826, "grad_norm": 0.36477866768836975, "learning_rate": 8.09037261032935e-06, "loss": 0.4124, "step": 1945 }, { "epoch": 1.077618827872635, "grad_norm": 0.3975301682949066, "learning_rate": 8.087837936594484e-06, "loss": 0.4534, "step": 1946 }, { "epoch": 1.0781725888324873, "grad_norm": 0.34789127111434937, "learning_rate": 8.085301979461478e-06, "loss": 0.4674, "step": 1947 }, { "epoch": 1.0787263497923396, "grad_norm": 0.41628509759902954, "learning_rate": 8.08276473998435e-06, "loss": 0.4296, "step": 1948 }, { "epoch": 1.0792801107521919, "grad_norm": 0.41215604543685913, "learning_rate": 8.080226219217653e-06, "loss": 0.4442, "step": 1949 }, { "epoch": 1.0798338717120444, "grad_norm": 0.3291819989681244, "learning_rate": 8.077686418216472e-06, "loss": 0.4615, "step": 1950 }, { "epoch": 1.0803876326718966, "grad_norm": 0.36714237928390503, "learning_rate": 8.075145338036426e-06, "loss": 0.4039, "step": 1951 }, { "epoch": 1.080941393631749, "grad_norm": 0.3742508292198181, "learning_rate": 8.072602979733664e-06, "loss": 0.4265, "step": 1952 }, { "epoch": 1.0814951545916014, "grad_norm": 0.3400529623031616, "learning_rate": 8.070059344364862e-06, "loss": 0.4482, "step": 1953 }, { "epoch": 1.0820489155514537, "grad_norm": 0.348096638917923, "learning_rate": 8.067514432987237e-06, "loss": 0.4038, "step": 1954 }, { "epoch": 1.082602676511306, "grad_norm": 0.37118756771087646, "learning_rate": 8.06496824665853e-06, "loss": 0.463, "step": 1955 }, { "epoch": 1.0831564374711582, "grad_norm": 0.3503284752368927, "learning_rate": 8.062420786437009e-06, "loss": 0.4508, "step": 1956 }, { "epoch": 1.0837101984310107, "grad_norm": 0.3905145525932312, "learning_rate": 8.059872053381477e-06, "loss": 0.4608, "step": 1957 }, { "epoch": 1.084263959390863, "grad_norm": 0.295412540435791, "learning_rate": 8.057322048551265e-06, "loss": 0.4133, "step": 1958 }, { "epoch": 1.0848177203507152, "grad_norm": 0.34062570333480835, "learning_rate": 8.05477077300623e-06, "loss": 0.4372, "step": 1959 }, { "epoch": 1.0853714813105677, "grad_norm": 0.37756478786468506, "learning_rate": 8.052218227806756e-06, "loss": 0.4243, "step": 1960 }, { "epoch": 1.08592524227042, "grad_norm": 0.33546799421310425, "learning_rate": 8.049664414013765e-06, "loss": 0.4252, "step": 1961 }, { "epoch": 1.0864790032302722, "grad_norm": 0.36535099148750305, "learning_rate": 8.047109332688691e-06, "loss": 0.4383, "step": 1962 }, { "epoch": 1.0870327641901245, "grad_norm": 0.34108638763427734, "learning_rate": 8.044552984893507e-06, "loss": 0.4445, "step": 1963 }, { "epoch": 1.087586525149977, "grad_norm": 0.32598790526390076, "learning_rate": 8.041995371690707e-06, "loss": 0.4514, "step": 1964 }, { "epoch": 1.0881402861098293, "grad_norm": 0.3786218762397766, "learning_rate": 8.039436494143309e-06, "loss": 0.4418, "step": 1965 }, { "epoch": 1.0886940470696815, "grad_norm": 0.4077594578266144, "learning_rate": 8.036876353314864e-06, "loss": 0.4705, "step": 1966 }, { "epoch": 1.0892478080295338, "grad_norm": 0.30568718910217285, "learning_rate": 8.03431495026944e-06, "loss": 0.4071, "step": 1967 }, { "epoch": 1.0898015689893863, "grad_norm": 0.39400753378868103, "learning_rate": 8.031752286071634e-06, "loss": 0.428, "step": 1968 }, { "epoch": 1.0903553299492386, "grad_norm": 0.33493733406066895, "learning_rate": 8.029188361786569e-06, "loss": 0.4648, "step": 1969 }, { "epoch": 1.0909090909090908, "grad_norm": 0.3407347798347473, "learning_rate": 8.026623178479883e-06, "loss": 0.4368, "step": 1970 }, { "epoch": 1.0914628518689433, "grad_norm": 0.36427170038223267, "learning_rate": 8.024056737217748e-06, "loss": 0.4182, "step": 1971 }, { "epoch": 1.0920166128287956, "grad_norm": 0.34447118639945984, "learning_rate": 8.02148903906685e-06, "loss": 0.4619, "step": 1972 }, { "epoch": 1.0925703737886479, "grad_norm": 0.37458592653274536, "learning_rate": 8.018920085094406e-06, "loss": 0.4419, "step": 1973 }, { "epoch": 1.0931241347485003, "grad_norm": 0.3468335270881653, "learning_rate": 8.016349876368147e-06, "loss": 0.4517, "step": 1974 }, { "epoch": 1.0936778957083526, "grad_norm": 0.3231825828552246, "learning_rate": 8.01377841395633e-06, "loss": 0.3887, "step": 1975 }, { "epoch": 1.0942316566682049, "grad_norm": 0.4897887408733368, "learning_rate": 8.011205698927733e-06, "loss": 0.5013, "step": 1976 }, { "epoch": 1.0947854176280571, "grad_norm": 0.33341577649116516, "learning_rate": 8.008631732351652e-06, "loss": 0.3941, "step": 1977 }, { "epoch": 1.0953391785879096, "grad_norm": 0.45207151770591736, "learning_rate": 8.006056515297905e-06, "loss": 0.4645, "step": 1978 }, { "epoch": 1.095892939547762, "grad_norm": 0.4404405653476715, "learning_rate": 8.00348004883683e-06, "loss": 0.4303, "step": 1979 }, { "epoch": 1.0964467005076142, "grad_norm": 0.35970860719680786, "learning_rate": 8.000902334039285e-06, "loss": 0.3822, "step": 1980 }, { "epoch": 1.0970004614674664, "grad_norm": 0.4297952651977539, "learning_rate": 7.998323371976644e-06, "loss": 0.4584, "step": 1981 }, { "epoch": 1.097554222427319, "grad_norm": 0.4420047998428345, "learning_rate": 7.9957431637208e-06, "loss": 0.4529, "step": 1982 }, { "epoch": 1.0981079833871712, "grad_norm": 0.3888760805130005, "learning_rate": 7.993161710344167e-06, "loss": 0.4317, "step": 1983 }, { "epoch": 1.0986617443470235, "grad_norm": 0.3761383891105652, "learning_rate": 7.990579012919675e-06, "loss": 0.4318, "step": 1984 }, { "epoch": 1.099215505306876, "grad_norm": 0.4182840883731842, "learning_rate": 7.98799507252077e-06, "loss": 0.4201, "step": 1985 }, { "epoch": 1.0997692662667282, "grad_norm": 0.41858765482902527, "learning_rate": 7.985409890221413e-06, "loss": 0.472, "step": 1986 }, { "epoch": 1.1003230272265805, "grad_norm": 0.32111427187919617, "learning_rate": 7.982823467096085e-06, "loss": 0.4227, "step": 1987 }, { "epoch": 1.1008767881864328, "grad_norm": 0.35791048407554626, "learning_rate": 7.980235804219785e-06, "loss": 0.3981, "step": 1988 }, { "epoch": 1.1014305491462852, "grad_norm": 0.38488584756851196, "learning_rate": 7.977646902668015e-06, "loss": 0.4522, "step": 1989 }, { "epoch": 1.1019843101061375, "grad_norm": 0.3447059392929077, "learning_rate": 7.975056763516807e-06, "loss": 0.4101, "step": 1990 }, { "epoch": 1.1025380710659898, "grad_norm": 0.3551190495491028, "learning_rate": 7.972465387842698e-06, "loss": 0.4335, "step": 1991 }, { "epoch": 1.1030918320258423, "grad_norm": 0.44077053666114807, "learning_rate": 7.969872776722743e-06, "loss": 0.4439, "step": 1992 }, { "epoch": 1.1036455929856945, "grad_norm": 0.37797147035598755, "learning_rate": 7.967278931234506e-06, "loss": 0.4298, "step": 1993 }, { "epoch": 1.1041993539455468, "grad_norm": 0.39369991421699524, "learning_rate": 7.96468385245607e-06, "loss": 0.4579, "step": 1994 }, { "epoch": 1.104753114905399, "grad_norm": 0.4840766191482544, "learning_rate": 7.962087541466026e-06, "loss": 0.4605, "step": 1995 }, { "epoch": 1.1053068758652516, "grad_norm": 0.39564841985702515, "learning_rate": 7.95948999934348e-06, "loss": 0.4214, "step": 1996 }, { "epoch": 1.1058606368251038, "grad_norm": 0.3874954283237457, "learning_rate": 7.956891227168047e-06, "loss": 0.4687, "step": 1997 }, { "epoch": 1.106414397784956, "grad_norm": 0.4164445400238037, "learning_rate": 7.954291226019857e-06, "loss": 0.3863, "step": 1998 }, { "epoch": 1.1069681587448086, "grad_norm": 0.4492274224758148, "learning_rate": 7.951689996979546e-06, "loss": 0.5025, "step": 1999 }, { "epoch": 1.1075219197046609, "grad_norm": 0.34496843814849854, "learning_rate": 7.949087541128265e-06, "loss": 0.4011, "step": 2000 }, { "epoch": 1.1080756806645131, "grad_norm": 0.41442376375198364, "learning_rate": 7.946483859547669e-06, "loss": 0.4181, "step": 2001 }, { "epoch": 1.1086294416243654, "grad_norm": 0.38776224851608276, "learning_rate": 7.943878953319932e-06, "loss": 0.4231, "step": 2002 }, { "epoch": 1.1091832025842179, "grad_norm": 0.3814254403114319, "learning_rate": 7.941272823527727e-06, "loss": 0.44, "step": 2003 }, { "epoch": 1.1097369635440701, "grad_norm": 0.3458368182182312, "learning_rate": 7.938665471254241e-06, "loss": 0.4084, "step": 2004 }, { "epoch": 1.1102907245039224, "grad_norm": 0.44027724862098694, "learning_rate": 7.936056897583169e-06, "loss": 0.4706, "step": 2005 }, { "epoch": 1.110844485463775, "grad_norm": 0.37522172927856445, "learning_rate": 7.93344710359871e-06, "loss": 0.4267, "step": 2006 }, { "epoch": 1.1113982464236272, "grad_norm": 0.3910789489746094, "learning_rate": 7.930836090385575e-06, "loss": 0.4462, "step": 2007 }, { "epoch": 1.1119520073834794, "grad_norm": 0.4454744756221771, "learning_rate": 7.928223859028978e-06, "loss": 0.4148, "step": 2008 }, { "epoch": 1.1125057683433317, "grad_norm": 0.31943216919898987, "learning_rate": 7.925610410614642e-06, "loss": 0.4251, "step": 2009 }, { "epoch": 1.1130595293031842, "grad_norm": 0.37735167145729065, "learning_rate": 7.922995746228794e-06, "loss": 0.44, "step": 2010 }, { "epoch": 1.1136132902630365, "grad_norm": 0.406328409910202, "learning_rate": 7.920379866958166e-06, "loss": 0.4381, "step": 2011 }, { "epoch": 1.1141670512228887, "grad_norm": 0.3975454568862915, "learning_rate": 7.917762773889999e-06, "loss": 0.4342, "step": 2012 }, { "epoch": 1.1147208121827412, "grad_norm": 0.3857262432575226, "learning_rate": 7.915144468112032e-06, "loss": 0.4369, "step": 2013 }, { "epoch": 1.1152745731425935, "grad_norm": 0.3757055103778839, "learning_rate": 7.912524950712512e-06, "loss": 0.4466, "step": 2014 }, { "epoch": 1.1158283341024458, "grad_norm": 0.3409002423286438, "learning_rate": 7.909904222780192e-06, "loss": 0.4211, "step": 2015 }, { "epoch": 1.116382095062298, "grad_norm": 0.34529370069503784, "learning_rate": 7.907282285404322e-06, "loss": 0.428, "step": 2016 }, { "epoch": 1.1169358560221505, "grad_norm": 0.35624265670776367, "learning_rate": 7.90465913967466e-06, "loss": 0.4493, "step": 2017 }, { "epoch": 1.1174896169820028, "grad_norm": 0.3632870614528656, "learning_rate": 7.902034786681461e-06, "loss": 0.5009, "step": 2018 }, { "epoch": 1.118043377941855, "grad_norm": 0.31238922476768494, "learning_rate": 7.899409227515488e-06, "loss": 0.405, "step": 2019 }, { "epoch": 1.1185971389017075, "grad_norm": 0.34969979524612427, "learning_rate": 7.896782463268002e-06, "loss": 0.4324, "step": 2020 }, { "epoch": 1.1191508998615598, "grad_norm": 0.3302128314971924, "learning_rate": 7.894154495030763e-06, "loss": 0.416, "step": 2021 }, { "epoch": 1.119704660821412, "grad_norm": 0.3362089693546295, "learning_rate": 7.891525323896037e-06, "loss": 0.4207, "step": 2022 }, { "epoch": 1.1202584217812643, "grad_norm": 0.36902087926864624, "learning_rate": 7.888894950956582e-06, "loss": 0.4528, "step": 2023 }, { "epoch": 1.1208121827411168, "grad_norm": 0.33030587434768677, "learning_rate": 7.886263377305663e-06, "loss": 0.4266, "step": 2024 }, { "epoch": 1.121365943700969, "grad_norm": 0.33379530906677246, "learning_rate": 7.88363060403704e-06, "loss": 0.4068, "step": 2025 }, { "epoch": 1.1219197046608214, "grad_norm": 0.3941119611263275, "learning_rate": 7.880996632244974e-06, "loss": 0.4749, "step": 2026 }, { "epoch": 1.1224734656206738, "grad_norm": 0.3333759605884552, "learning_rate": 7.87836146302422e-06, "loss": 0.4349, "step": 2027 }, { "epoch": 1.1230272265805261, "grad_norm": 0.34120598435401917, "learning_rate": 7.875725097470038e-06, "loss": 0.4571, "step": 2028 }, { "epoch": 1.1235809875403784, "grad_norm": 0.3208242654800415, "learning_rate": 7.873087536678173e-06, "loss": 0.4046, "step": 2029 }, { "epoch": 1.1241347485002307, "grad_norm": 0.35277265310287476, "learning_rate": 7.870448781744882e-06, "loss": 0.4286, "step": 2030 }, { "epoch": 1.1246885094600831, "grad_norm": 0.33431267738342285, "learning_rate": 7.867808833766908e-06, "loss": 0.4485, "step": 2031 }, { "epoch": 1.1252422704199354, "grad_norm": 0.30398762226104736, "learning_rate": 7.865167693841491e-06, "loss": 0.406, "step": 2032 }, { "epoch": 1.1257960313797877, "grad_norm": 0.4026983380317688, "learning_rate": 7.862525363066371e-06, "loss": 0.4569, "step": 2033 }, { "epoch": 1.12634979233964, "grad_norm": 0.3489692211151123, "learning_rate": 7.859881842539778e-06, "loss": 0.4299, "step": 2034 }, { "epoch": 1.1269035532994924, "grad_norm": 0.34804287552833557, "learning_rate": 7.85723713336044e-06, "loss": 0.4441, "step": 2035 }, { "epoch": 1.1274573142593447, "grad_norm": 0.3233163058757782, "learning_rate": 7.854591236627576e-06, "loss": 0.4299, "step": 2036 }, { "epoch": 1.128011075219197, "grad_norm": 0.33176279067993164, "learning_rate": 7.851944153440898e-06, "loss": 0.424, "step": 2037 }, { "epoch": 1.1285648361790495, "grad_norm": 0.3687259554862976, "learning_rate": 7.84929588490062e-06, "loss": 0.4481, "step": 2038 }, { "epoch": 1.1291185971389017, "grad_norm": 0.3333592116832733, "learning_rate": 7.846646432107434e-06, "loss": 0.4215, "step": 2039 }, { "epoch": 1.129672358098754, "grad_norm": 0.34881335496902466, "learning_rate": 7.843995796162538e-06, "loss": 0.4535, "step": 2040 }, { "epoch": 1.1302261190586065, "grad_norm": 0.29560914635658264, "learning_rate": 7.841343978167615e-06, "loss": 0.4041, "step": 2041 }, { "epoch": 1.1307798800184587, "grad_norm": 0.48314350843429565, "learning_rate": 7.838690979224837e-06, "loss": 0.4574, "step": 2042 }, { "epoch": 1.131333640978311, "grad_norm": 0.32405999302864075, "learning_rate": 7.836036800436873e-06, "loss": 0.4083, "step": 2043 }, { "epoch": 1.1318874019381633, "grad_norm": 0.3300076723098755, "learning_rate": 7.833381442906877e-06, "loss": 0.4224, "step": 2044 }, { "epoch": 1.1324411628980158, "grad_norm": 0.34380680322647095, "learning_rate": 7.830724907738498e-06, "loss": 0.4643, "step": 2045 }, { "epoch": 1.132994923857868, "grad_norm": 0.337096244096756, "learning_rate": 7.82806719603587e-06, "loss": 0.4393, "step": 2046 }, { "epoch": 1.1335486848177203, "grad_norm": 0.35114040970802307, "learning_rate": 7.825408308903619e-06, "loss": 0.4253, "step": 2047 }, { "epoch": 1.1341024457775726, "grad_norm": 0.31216201186180115, "learning_rate": 7.822748247446857e-06, "loss": 0.4207, "step": 2048 }, { "epoch": 1.134656206737425, "grad_norm": 0.34679582715034485, "learning_rate": 7.820087012771184e-06, "loss": 0.4411, "step": 2049 }, { "epoch": 1.1352099676972773, "grad_norm": 0.3721540868282318, "learning_rate": 7.817424605982693e-06, "loss": 0.4608, "step": 2050 }, { "epoch": 1.1357637286571296, "grad_norm": 0.35749202966690063, "learning_rate": 7.814761028187957e-06, "loss": 0.4403, "step": 2051 }, { "epoch": 1.136317489616982, "grad_norm": 0.31743842363357544, "learning_rate": 7.81209628049404e-06, "loss": 0.3859, "step": 2052 }, { "epoch": 1.1368712505768344, "grad_norm": 0.34678226709365845, "learning_rate": 7.809430364008493e-06, "loss": 0.4211, "step": 2053 }, { "epoch": 1.1374250115366866, "grad_norm": 0.36471882462501526, "learning_rate": 7.806763279839347e-06, "loss": 0.4542, "step": 2054 }, { "epoch": 1.1379787724965391, "grad_norm": 0.29828864336013794, "learning_rate": 7.804095029095124e-06, "loss": 0.4235, "step": 2055 }, { "epoch": 1.1385325334563914, "grad_norm": 0.35448595881462097, "learning_rate": 7.80142561288483e-06, "loss": 0.4423, "step": 2056 }, { "epoch": 1.1390862944162436, "grad_norm": 0.3463672697544098, "learning_rate": 7.798755032317955e-06, "loss": 0.4532, "step": 2057 }, { "epoch": 1.139640055376096, "grad_norm": 0.38741815090179443, "learning_rate": 7.79608328850447e-06, "loss": 0.4683, "step": 2058 }, { "epoch": 1.1401938163359484, "grad_norm": 0.3326360285282135, "learning_rate": 7.793410382554834e-06, "loss": 0.3794, "step": 2059 }, { "epoch": 1.1407475772958007, "grad_norm": 0.3057347238063812, "learning_rate": 7.790736315579988e-06, "loss": 0.4302, "step": 2060 }, { "epoch": 1.141301338255653, "grad_norm": 0.3327668607234955, "learning_rate": 7.78806108869135e-06, "loss": 0.4656, "step": 2061 }, { "epoch": 1.1418550992155052, "grad_norm": 0.3858741521835327, "learning_rate": 7.785384703000829e-06, "loss": 0.4411, "step": 2062 }, { "epoch": 1.1424088601753577, "grad_norm": 0.31904518604278564, "learning_rate": 7.782707159620808e-06, "loss": 0.4277, "step": 2063 }, { "epoch": 1.14296262113521, "grad_norm": 0.3733570873737335, "learning_rate": 7.780028459664157e-06, "loss": 0.4573, "step": 2064 }, { "epoch": 1.1435163820950622, "grad_norm": 0.3172999918460846, "learning_rate": 7.777348604244221e-06, "loss": 0.379, "step": 2065 }, { "epoch": 1.1440701430549147, "grad_norm": 0.3494924008846283, "learning_rate": 7.774667594474833e-06, "loss": 0.4229, "step": 2066 }, { "epoch": 1.144623904014767, "grad_norm": 0.34541773796081543, "learning_rate": 7.7719854314703e-06, "loss": 0.4675, "step": 2067 }, { "epoch": 1.1451776649746193, "grad_norm": 0.3327332139015198, "learning_rate": 7.769302116345404e-06, "loss": 0.4194, "step": 2068 }, { "epoch": 1.1457314259344715, "grad_norm": 0.3735981285572052, "learning_rate": 7.766617650215417e-06, "loss": 0.439, "step": 2069 }, { "epoch": 1.146285186894324, "grad_norm": 0.3438597023487091, "learning_rate": 7.763932034196082e-06, "loss": 0.4289, "step": 2070 }, { "epoch": 1.1468389478541763, "grad_norm": 0.33667826652526855, "learning_rate": 7.761245269403624e-06, "loss": 0.4117, "step": 2071 }, { "epoch": 1.1473927088140286, "grad_norm": 0.3921342194080353, "learning_rate": 7.758557356954739e-06, "loss": 0.4363, "step": 2072 }, { "epoch": 1.1479464697738808, "grad_norm": 0.3913732171058655, "learning_rate": 7.755868297966605e-06, "loss": 0.4644, "step": 2073 }, { "epoch": 1.1485002307337333, "grad_norm": 0.34243860840797424, "learning_rate": 7.753178093556878e-06, "loss": 0.4237, "step": 2074 }, { "epoch": 1.1490539916935856, "grad_norm": 0.4085008502006531, "learning_rate": 7.750486744843685e-06, "loss": 0.4531, "step": 2075 }, { "epoch": 1.1496077526534378, "grad_norm": 0.40413591265678406, "learning_rate": 7.747794252945635e-06, "loss": 0.4402, "step": 2076 }, { "epoch": 1.1501615136132903, "grad_norm": 0.3420397937297821, "learning_rate": 7.745100618981803e-06, "loss": 0.4483, "step": 2077 }, { "epoch": 1.1507152745731426, "grad_norm": 0.32371121644973755, "learning_rate": 7.74240584407175e-06, "loss": 0.436, "step": 2078 }, { "epoch": 1.1512690355329949, "grad_norm": 0.38274842500686646, "learning_rate": 7.739709929335501e-06, "loss": 0.4487, "step": 2079 }, { "epoch": 1.1518227964928474, "grad_norm": 0.36612433195114136, "learning_rate": 7.73701287589356e-06, "loss": 0.4599, "step": 2080 }, { "epoch": 1.1523765574526996, "grad_norm": 0.30352163314819336, "learning_rate": 7.734314684866905e-06, "loss": 0.3907, "step": 2081 }, { "epoch": 1.152930318412552, "grad_norm": 0.3651036024093628, "learning_rate": 7.731615357376985e-06, "loss": 0.4, "step": 2082 }, { "epoch": 1.1534840793724042, "grad_norm": 0.3507167100906372, "learning_rate": 7.72891489454572e-06, "loss": 0.4406, "step": 2083 }, { "epoch": 1.1540378403322566, "grad_norm": 0.30255967378616333, "learning_rate": 7.726213297495504e-06, "loss": 0.4215, "step": 2084 }, { "epoch": 1.154591601292109, "grad_norm": 0.35052698850631714, "learning_rate": 7.723510567349201e-06, "loss": 0.4204, "step": 2085 }, { "epoch": 1.1551453622519612, "grad_norm": 0.33122387528419495, "learning_rate": 7.72080670523015e-06, "loss": 0.3889, "step": 2086 }, { "epoch": 1.1556991232118135, "grad_norm": 0.3073391616344452, "learning_rate": 7.718101712262154e-06, "loss": 0.433, "step": 2087 }, { "epoch": 1.156252884171666, "grad_norm": 0.3350590765476227, "learning_rate": 7.71539558956949e-06, "loss": 0.4308, "step": 2088 }, { "epoch": 1.1568066451315182, "grad_norm": 0.3385765552520752, "learning_rate": 7.712688338276904e-06, "loss": 0.4352, "step": 2089 }, { "epoch": 1.1573604060913705, "grad_norm": 0.33302435278892517, "learning_rate": 7.709979959509612e-06, "loss": 0.4596, "step": 2090 }, { "epoch": 1.157914167051223, "grad_norm": 0.29584842920303345, "learning_rate": 7.707270454393297e-06, "loss": 0.3754, "step": 2091 }, { "epoch": 1.1584679280110752, "grad_norm": 0.33450037240982056, "learning_rate": 7.70455982405411e-06, "loss": 0.4413, "step": 2092 }, { "epoch": 1.1590216889709275, "grad_norm": 0.3602945804595947, "learning_rate": 7.701848069618669e-06, "loss": 0.4746, "step": 2093 }, { "epoch": 1.15957544993078, "grad_norm": 0.3707371950149536, "learning_rate": 7.699135192214062e-06, "loss": 0.4251, "step": 2094 }, { "epoch": 1.1601292108906323, "grad_norm": 0.31389689445495605, "learning_rate": 7.696421192967846e-06, "loss": 0.4329, "step": 2095 }, { "epoch": 1.1606829718504845, "grad_norm": 0.3546028733253479, "learning_rate": 7.693706073008033e-06, "loss": 0.4498, "step": 2096 }, { "epoch": 1.1612367328103368, "grad_norm": 0.32229235768318176, "learning_rate": 7.690989833463115e-06, "loss": 0.4262, "step": 2097 }, { "epoch": 1.1617904937701893, "grad_norm": 0.3666158616542816, "learning_rate": 7.68827247546204e-06, "loss": 0.4952, "step": 2098 }, { "epoch": 1.1623442547300415, "grad_norm": 0.3683710992336273, "learning_rate": 7.685554000134221e-06, "loss": 0.4209, "step": 2099 }, { "epoch": 1.1628980156898938, "grad_norm": 0.3815615177154541, "learning_rate": 7.682834408609543e-06, "loss": 0.4805, "step": 2100 }, { "epoch": 1.163451776649746, "grad_norm": 0.31006085872650146, "learning_rate": 7.680113702018345e-06, "loss": 0.402, "step": 2101 }, { "epoch": 1.1640055376095986, "grad_norm": 0.3419899046421051, "learning_rate": 7.67739188149144e-06, "loss": 0.4247, "step": 2102 }, { "epoch": 1.1645592985694508, "grad_norm": 0.3148891031742096, "learning_rate": 7.674668948160093e-06, "loss": 0.4552, "step": 2103 }, { "epoch": 1.165113059529303, "grad_norm": 0.31656646728515625, "learning_rate": 7.67194490315604e-06, "loss": 0.4191, "step": 2104 }, { "epoch": 1.1656668204891556, "grad_norm": 0.34166809916496277, "learning_rate": 7.669219747611474e-06, "loss": 0.4582, "step": 2105 }, { "epoch": 1.1662205814490079, "grad_norm": 0.3187251389026642, "learning_rate": 7.666493482659053e-06, "loss": 0.4213, "step": 2106 }, { "epoch": 1.1667743424088601, "grad_norm": 0.33141306042671204, "learning_rate": 7.663766109431893e-06, "loss": 0.4349, "step": 2107 }, { "epoch": 1.1673281033687126, "grad_norm": 0.3501977324485779, "learning_rate": 7.661037629063573e-06, "loss": 0.4082, "step": 2108 }, { "epoch": 1.167881864328565, "grad_norm": 0.30911269783973694, "learning_rate": 7.658308042688132e-06, "loss": 0.4533, "step": 2109 }, { "epoch": 1.1684356252884172, "grad_norm": 0.3163524270057678, "learning_rate": 7.655577351440067e-06, "loss": 0.4305, "step": 2110 }, { "epoch": 1.1689893862482694, "grad_norm": 0.3300507664680481, "learning_rate": 7.652845556454336e-06, "loss": 0.4328, "step": 2111 }, { "epoch": 1.169543147208122, "grad_norm": 0.3490801155567169, "learning_rate": 7.650112658866353e-06, "loss": 0.478, "step": 2112 }, { "epoch": 1.1700969081679742, "grad_norm": 0.32662585377693176, "learning_rate": 7.647378659811997e-06, "loss": 0.4082, "step": 2113 }, { "epoch": 1.1706506691278264, "grad_norm": 0.36368104815483093, "learning_rate": 7.644643560427593e-06, "loss": 0.4573, "step": 2114 }, { "epoch": 1.1712044300876787, "grad_norm": 0.34787797927856445, "learning_rate": 7.641907361849939e-06, "loss": 0.455, "step": 2115 }, { "epoch": 1.1717581910475312, "grad_norm": 0.31044140458106995, "learning_rate": 7.639170065216273e-06, "loss": 0.3841, "step": 2116 }, { "epoch": 1.1723119520073835, "grad_norm": 0.39388710260391235, "learning_rate": 7.636431671664303e-06, "loss": 0.4288, "step": 2117 }, { "epoch": 1.1728657129672357, "grad_norm": 0.3231658637523651, "learning_rate": 7.633692182332184e-06, "loss": 0.4339, "step": 2118 }, { "epoch": 1.1734194739270882, "grad_norm": 0.32435494661331177, "learning_rate": 7.630951598358534e-06, "loss": 0.4452, "step": 2119 }, { "epoch": 1.1739732348869405, "grad_norm": 0.37196940183639526, "learning_rate": 7.62820992088242e-06, "loss": 0.4109, "step": 2120 }, { "epoch": 1.1745269958467928, "grad_norm": 0.35919874906539917, "learning_rate": 7.625467151043364e-06, "loss": 0.4179, "step": 2121 }, { "epoch": 1.1750807568066453, "grad_norm": 0.326972633600235, "learning_rate": 7.622723289981348e-06, "loss": 0.4291, "step": 2122 }, { "epoch": 1.1756345177664975, "grad_norm": 0.3395402133464813, "learning_rate": 7.619978338836796e-06, "loss": 0.4691, "step": 2123 }, { "epoch": 1.1761882787263498, "grad_norm": 0.35187143087387085, "learning_rate": 7.617232298750599e-06, "loss": 0.4204, "step": 2124 }, { "epoch": 1.176742039686202, "grad_norm": 0.3472062647342682, "learning_rate": 7.614485170864088e-06, "loss": 0.4223, "step": 2125 }, { "epoch": 1.1772958006460545, "grad_norm": 0.3177010715007782, "learning_rate": 7.611736956319055e-06, "loss": 0.4356, "step": 2126 }, { "epoch": 1.1778495616059068, "grad_norm": 0.3215820789337158, "learning_rate": 7.60898765625774e-06, "loss": 0.4143, "step": 2127 }, { "epoch": 1.178403322565759, "grad_norm": 0.34983518719673157, "learning_rate": 7.606237271822835e-06, "loss": 0.4165, "step": 2128 }, { "epoch": 1.1789570835256113, "grad_norm": 0.3599419593811035, "learning_rate": 7.603485804157482e-06, "loss": 0.4314, "step": 2129 }, { "epoch": 1.1795108444854638, "grad_norm": 0.3707613945007324, "learning_rate": 7.600733254405273e-06, "loss": 0.4808, "step": 2130 }, { "epoch": 1.180064605445316, "grad_norm": 0.41522347927093506, "learning_rate": 7.597979623710252e-06, "loss": 0.4686, "step": 2131 }, { "epoch": 1.1806183664051684, "grad_norm": 0.3122965395450592, "learning_rate": 7.595224913216908e-06, "loss": 0.4205, "step": 2132 }, { "epoch": 1.1811721273650209, "grad_norm": 0.3689233958721161, "learning_rate": 7.592469124070183e-06, "loss": 0.4104, "step": 2133 }, { "epoch": 1.1817258883248731, "grad_norm": 0.3316616415977478, "learning_rate": 7.5897122574154665e-06, "loss": 0.4093, "step": 2134 }, { "epoch": 1.1822796492847254, "grad_norm": 0.35952791571617126, "learning_rate": 7.586954314398592e-06, "loss": 0.4727, "step": 2135 }, { "epoch": 1.1828334102445777, "grad_norm": 0.3557761609554291, "learning_rate": 7.584195296165847e-06, "loss": 0.4049, "step": 2136 }, { "epoch": 1.1833871712044302, "grad_norm": 0.3247920572757721, "learning_rate": 7.581435203863961e-06, "loss": 0.4114, "step": 2137 }, { "epoch": 1.1839409321642824, "grad_norm": 0.33477768301963806, "learning_rate": 7.57867403864011e-06, "loss": 0.4352, "step": 2138 }, { "epoch": 1.1844946931241347, "grad_norm": 0.3408827483654022, "learning_rate": 7.575911801641918e-06, "loss": 0.3974, "step": 2139 }, { "epoch": 1.185048454083987, "grad_norm": 0.3153260350227356, "learning_rate": 7.573148494017453e-06, "loss": 0.4344, "step": 2140 }, { "epoch": 1.1856022150438394, "grad_norm": 0.3221636116504669, "learning_rate": 7.570384116915229e-06, "loss": 0.4244, "step": 2141 }, { "epoch": 1.1861559760036917, "grad_norm": 0.3653341233730316, "learning_rate": 7.567618671484206e-06, "loss": 0.4568, "step": 2142 }, { "epoch": 1.186709736963544, "grad_norm": 0.3314267098903656, "learning_rate": 7.564852158873784e-06, "loss": 0.4261, "step": 2143 }, { "epoch": 1.1872634979233965, "grad_norm": 0.36950206756591797, "learning_rate": 7.5620845802338086e-06, "loss": 0.4277, "step": 2144 }, { "epoch": 1.1878172588832487, "grad_norm": 0.39516207575798035, "learning_rate": 7.559315936714569e-06, "loss": 0.4536, "step": 2145 }, { "epoch": 1.188371019843101, "grad_norm": 0.3901834487915039, "learning_rate": 7.556546229466797e-06, "loss": 0.4198, "step": 2146 }, { "epoch": 1.1889247808029535, "grad_norm": 0.33761465549468994, "learning_rate": 7.553775459641667e-06, "loss": 0.437, "step": 2147 }, { "epoch": 1.1894785417628058, "grad_norm": 0.4169747233390808, "learning_rate": 7.551003628390792e-06, "loss": 0.4457, "step": 2148 }, { "epoch": 1.190032302722658, "grad_norm": 0.3547607660293579, "learning_rate": 7.548230736866229e-06, "loss": 0.4174, "step": 2149 }, { "epoch": 1.1905860636825103, "grad_norm": 0.32884764671325684, "learning_rate": 7.545456786220476e-06, "loss": 0.4388, "step": 2150 }, { "epoch": 1.1911398246423628, "grad_norm": 0.3467073142528534, "learning_rate": 7.542681777606467e-06, "loss": 0.4073, "step": 2151 }, { "epoch": 1.191693585602215, "grad_norm": 0.3636122941970825, "learning_rate": 7.539905712177585e-06, "loss": 0.4321, "step": 2152 }, { "epoch": 1.1922473465620673, "grad_norm": 0.33901864290237427, "learning_rate": 7.537128591087639e-06, "loss": 0.4193, "step": 2153 }, { "epoch": 1.1928011075219196, "grad_norm": 0.35764122009277344, "learning_rate": 7.534350415490887e-06, "loss": 0.4176, "step": 2154 }, { "epoch": 1.193354868481772, "grad_norm": 0.3992668390274048, "learning_rate": 7.531571186542026e-06, "loss": 0.4837, "step": 2155 }, { "epoch": 1.1939086294416243, "grad_norm": 0.33795085549354553, "learning_rate": 7.5287909053961795e-06, "loss": 0.4261, "step": 2156 }, { "epoch": 1.1944623904014766, "grad_norm": 0.34177857637405396, "learning_rate": 7.526009573208922e-06, "loss": 0.4489, "step": 2157 }, { "epoch": 1.195016151361329, "grad_norm": 0.2885200083255768, "learning_rate": 7.523227191136256e-06, "loss": 0.3977, "step": 2158 }, { "epoch": 1.1955699123211814, "grad_norm": 0.3389120399951935, "learning_rate": 7.5204437603346224e-06, "loss": 0.3878, "step": 2159 }, { "epoch": 1.1961236732810336, "grad_norm": 0.3512684404850006, "learning_rate": 7.517659281960902e-06, "loss": 0.462, "step": 2160 }, { "epoch": 1.1966774342408861, "grad_norm": 0.3475791811943054, "learning_rate": 7.514873757172402e-06, "loss": 0.3878, "step": 2161 }, { "epoch": 1.1972311952007384, "grad_norm": 0.3224257528781891, "learning_rate": 7.512087187126875e-06, "loss": 0.4176, "step": 2162 }, { "epoch": 1.1977849561605907, "grad_norm": 0.35235318541526794, "learning_rate": 7.509299572982501e-06, "loss": 0.4581, "step": 2163 }, { "epoch": 1.198338717120443, "grad_norm": 0.34027788043022156, "learning_rate": 7.506510915897898e-06, "loss": 0.4139, "step": 2164 }, { "epoch": 1.1988924780802954, "grad_norm": 0.3461192846298218, "learning_rate": 7.503721217032112e-06, "loss": 0.4375, "step": 2165 }, { "epoch": 1.1994462390401477, "grad_norm": 0.313247948884964, "learning_rate": 7.500930477544628e-06, "loss": 0.412, "step": 2166 }, { "epoch": 1.2, "grad_norm": 0.3185102045536041, "learning_rate": 7.498138698595362e-06, "loss": 0.3997, "step": 2167 }, { "epoch": 1.2005537609598522, "grad_norm": 0.3287985920906067, "learning_rate": 7.495345881344658e-06, "loss": 0.4374, "step": 2168 }, { "epoch": 1.2011075219197047, "grad_norm": 0.3163144588470459, "learning_rate": 7.4925520269532995e-06, "loss": 0.4121, "step": 2169 }, { "epoch": 1.201661282879557, "grad_norm": 0.35124772787094116, "learning_rate": 7.48975713658249e-06, "loss": 0.4434, "step": 2170 }, { "epoch": 1.2022150438394092, "grad_norm": 0.31671279668807983, "learning_rate": 7.486961211393878e-06, "loss": 0.455, "step": 2171 }, { "epoch": 1.2027688047992617, "grad_norm": 0.2984941005706787, "learning_rate": 7.484164252549526e-06, "loss": 0.4193, "step": 2172 }, { "epoch": 1.203322565759114, "grad_norm": 0.3448556959629059, "learning_rate": 7.481366261211937e-06, "loss": 0.4466, "step": 2173 }, { "epoch": 1.2038763267189663, "grad_norm": 0.3039524555206299, "learning_rate": 7.4785672385440414e-06, "loss": 0.4053, "step": 2174 }, { "epoch": 1.2044300876788188, "grad_norm": 0.30778801441192627, "learning_rate": 7.4757671857091965e-06, "loss": 0.4223, "step": 2175 }, { "epoch": 1.204983848638671, "grad_norm": 0.338081032037735, "learning_rate": 7.472966103871187e-06, "loss": 0.4265, "step": 2176 }, { "epoch": 1.2055376095985233, "grad_norm": 0.32791537046432495, "learning_rate": 7.470163994194229e-06, "loss": 0.4795, "step": 2177 }, { "epoch": 1.2060913705583756, "grad_norm": 0.31503763794898987, "learning_rate": 7.46736085784296e-06, "loss": 0.4035, "step": 2178 }, { "epoch": 1.206645131518228, "grad_norm": 0.31130537390708923, "learning_rate": 7.4645566959824504e-06, "loss": 0.4339, "step": 2179 }, { "epoch": 1.2071988924780803, "grad_norm": 0.3737187087535858, "learning_rate": 7.461751509778193e-06, "loss": 0.4284, "step": 2180 }, { "epoch": 1.2077526534379326, "grad_norm": 0.4007020890712738, "learning_rate": 7.458945300396108e-06, "loss": 0.4414, "step": 2181 }, { "epoch": 1.2083064143977849, "grad_norm": 0.3745996654033661, "learning_rate": 7.45613806900254e-06, "loss": 0.4275, "step": 2182 }, { "epoch": 1.2088601753576373, "grad_norm": 0.33326661586761475, "learning_rate": 7.4533298167642575e-06, "loss": 0.4202, "step": 2183 }, { "epoch": 1.2094139363174896, "grad_norm": 0.37950772047042847, "learning_rate": 7.450520544848458e-06, "loss": 0.4542, "step": 2184 }, { "epoch": 1.2099676972773419, "grad_norm": 0.3354121744632721, "learning_rate": 7.447710254422753e-06, "loss": 0.4368, "step": 2185 }, { "epoch": 1.2105214582371944, "grad_norm": 0.3190821707248688, "learning_rate": 7.444898946655189e-06, "loss": 0.3861, "step": 2186 }, { "epoch": 1.2110752191970466, "grad_norm": 0.4178685247898102, "learning_rate": 7.442086622714229e-06, "loss": 0.49, "step": 2187 }, { "epoch": 1.211628980156899, "grad_norm": 0.3081350326538086, "learning_rate": 7.439273283768757e-06, "loss": 0.4415, "step": 2188 }, { "epoch": 1.2121827411167512, "grad_norm": 0.3984202444553375, "learning_rate": 7.436458930988084e-06, "loss": 0.4624, "step": 2189 }, { "epoch": 1.2127365020766037, "grad_norm": 0.3274775445461273, "learning_rate": 7.433643565541936e-06, "loss": 0.4214, "step": 2190 }, { "epoch": 1.213290263036456, "grad_norm": 0.33314576745033264, "learning_rate": 7.430827188600467e-06, "loss": 0.4554, "step": 2191 }, { "epoch": 1.2138440239963082, "grad_norm": 0.30854350328445435, "learning_rate": 7.428009801334245e-06, "loss": 0.4099, "step": 2192 }, { "epoch": 1.2143977849561607, "grad_norm": 0.3323761224746704, "learning_rate": 7.425191404914264e-06, "loss": 0.4041, "step": 2193 }, { "epoch": 1.214951545916013, "grad_norm": 0.35437044501304626, "learning_rate": 7.422372000511929e-06, "loss": 0.4487, "step": 2194 }, { "epoch": 1.2155053068758652, "grad_norm": 0.3147411644458771, "learning_rate": 7.419551589299074e-06, "loss": 0.3732, "step": 2195 }, { "epoch": 1.2160590678357175, "grad_norm": 0.3593740463256836, "learning_rate": 7.416730172447946e-06, "loss": 0.4757, "step": 2196 }, { "epoch": 1.21661282879557, "grad_norm": 0.3461996018886566, "learning_rate": 7.413907751131206e-06, "loss": 0.4504, "step": 2197 }, { "epoch": 1.2171665897554222, "grad_norm": 0.37337055802345276, "learning_rate": 7.411084326521944e-06, "loss": 0.4498, "step": 2198 }, { "epoch": 1.2177203507152745, "grad_norm": 0.3390166461467743, "learning_rate": 7.408259899793653e-06, "loss": 0.421, "step": 2199 }, { "epoch": 1.218274111675127, "grad_norm": 0.35852760076522827, "learning_rate": 7.405434472120257e-06, "loss": 0.4518, "step": 2200 }, { "epoch": 1.2188278726349793, "grad_norm": 0.29827579855918884, "learning_rate": 7.4026080446760806e-06, "loss": 0.4397, "step": 2201 }, { "epoch": 1.2193816335948315, "grad_norm": 0.3156038820743561, "learning_rate": 7.3997806186358774e-06, "loss": 0.382, "step": 2202 }, { "epoch": 1.2199353945546838, "grad_norm": 0.3381223678588867, "learning_rate": 7.396952195174809e-06, "loss": 0.4373, "step": 2203 }, { "epoch": 1.2204891555145363, "grad_norm": 0.28955116868019104, "learning_rate": 7.394122775468452e-06, "loss": 0.4145, "step": 2204 }, { "epoch": 1.2210429164743886, "grad_norm": 0.35669830441474915, "learning_rate": 7.3912923606928e-06, "loss": 0.4451, "step": 2205 }, { "epoch": 1.2215966774342408, "grad_norm": 0.33273300528526306, "learning_rate": 7.388460952024257e-06, "loss": 0.4451, "step": 2206 }, { "epoch": 1.222150438394093, "grad_norm": 0.34728381037712097, "learning_rate": 7.385628550639643e-06, "loss": 0.4623, "step": 2207 }, { "epoch": 1.2227041993539456, "grad_norm": 0.34993746876716614, "learning_rate": 7.3827951577161875e-06, "loss": 0.4273, "step": 2208 }, { "epoch": 1.2232579603137979, "grad_norm": 0.3349926471710205, "learning_rate": 7.379960774431533e-06, "loss": 0.4367, "step": 2209 }, { "epoch": 1.2238117212736501, "grad_norm": 0.3383914530277252, "learning_rate": 7.3771254019637365e-06, "loss": 0.4056, "step": 2210 }, { "epoch": 1.2243654822335026, "grad_norm": 0.3672018051147461, "learning_rate": 7.374289041491262e-06, "loss": 0.4441, "step": 2211 }, { "epoch": 1.2249192431933549, "grad_norm": 0.3542934060096741, "learning_rate": 7.371451694192988e-06, "loss": 0.4144, "step": 2212 }, { "epoch": 1.2254730041532071, "grad_norm": 0.3107501268386841, "learning_rate": 7.368613361248199e-06, "loss": 0.4102, "step": 2213 }, { "epoch": 1.2260267651130596, "grad_norm": 0.34838858246803284, "learning_rate": 7.365774043836593e-06, "loss": 0.4322, "step": 2214 }, { "epoch": 1.226580526072912, "grad_norm": 0.31833580136299133, "learning_rate": 7.362933743138275e-06, "loss": 0.4042, "step": 2215 }, { "epoch": 1.2271342870327642, "grad_norm": 0.3677130937576294, "learning_rate": 7.36009246033376e-06, "loss": 0.4409, "step": 2216 }, { "epoch": 1.2276880479926164, "grad_norm": 0.3621925115585327, "learning_rate": 7.357250196603969e-06, "loss": 0.4405, "step": 2217 }, { "epoch": 1.228241808952469, "grad_norm": 0.40282338857650757, "learning_rate": 7.354406953130233e-06, "loss": 0.4408, "step": 2218 }, { "epoch": 1.2287955699123212, "grad_norm": 0.3084102272987366, "learning_rate": 7.351562731094288e-06, "loss": 0.4279, "step": 2219 }, { "epoch": 1.2293493308721735, "grad_norm": 0.32792457938194275, "learning_rate": 7.348717531678282e-06, "loss": 0.4242, "step": 2220 }, { "epoch": 1.2299030918320257, "grad_norm": 0.3110801577568054, "learning_rate": 7.34587135606476e-06, "loss": 0.403, "step": 2221 }, { "epoch": 1.2304568527918782, "grad_norm": 0.34459441900253296, "learning_rate": 7.343024205436682e-06, "loss": 0.4215, "step": 2222 }, { "epoch": 1.2310106137517305, "grad_norm": 0.34599176049232483, "learning_rate": 7.340176080977406e-06, "loss": 0.4311, "step": 2223 }, { "epoch": 1.2315643747115828, "grad_norm": 0.3501961827278137, "learning_rate": 7.337326983870703e-06, "loss": 0.4268, "step": 2224 }, { "epoch": 1.2321181356714352, "grad_norm": 0.41438034176826477, "learning_rate": 7.3344769153007366e-06, "loss": 0.4424, "step": 2225 }, { "epoch": 1.2326718966312875, "grad_norm": 0.3494097888469696, "learning_rate": 7.3316258764520866e-06, "loss": 0.4371, "step": 2226 }, { "epoch": 1.2332256575911398, "grad_norm": 0.3827064633369446, "learning_rate": 7.328773868509728e-06, "loss": 0.4262, "step": 2227 }, { "epoch": 1.2337794185509923, "grad_norm": 0.35693180561065674, "learning_rate": 7.3259208926590395e-06, "loss": 0.4327, "step": 2228 }, { "epoch": 1.2343331795108445, "grad_norm": 0.3049055337905884, "learning_rate": 7.323066950085806e-06, "loss": 0.3785, "step": 2229 }, { "epoch": 1.2348869404706968, "grad_norm": 0.40723496675491333, "learning_rate": 7.32021204197621e-06, "loss": 0.4464, "step": 2230 }, { "epoch": 1.235440701430549, "grad_norm": 0.3568432033061981, "learning_rate": 7.317356169516839e-06, "loss": 0.4264, "step": 2231 }, { "epoch": 1.2359944623904016, "grad_norm": 0.32122284173965454, "learning_rate": 7.314499333894678e-06, "loss": 0.4384, "step": 2232 }, { "epoch": 1.2365482233502538, "grad_norm": 0.33375778794288635, "learning_rate": 7.311641536297114e-06, "loss": 0.4205, "step": 2233 }, { "epoch": 1.237101984310106, "grad_norm": 0.3546895682811737, "learning_rate": 7.308782777911932e-06, "loss": 0.4521, "step": 2234 }, { "epoch": 1.2376557452699584, "grad_norm": 0.31817081570625305, "learning_rate": 7.30592305992732e-06, "loss": 0.473, "step": 2235 }, { "epoch": 1.2382095062298109, "grad_norm": 0.29596227407455444, "learning_rate": 7.3030623835318605e-06, "loss": 0.4189, "step": 2236 }, { "epoch": 1.2387632671896631, "grad_norm": 0.35197606682777405, "learning_rate": 7.3002007499145365e-06, "loss": 0.4377, "step": 2237 }, { "epoch": 1.2393170281495154, "grad_norm": 0.337466299533844, "learning_rate": 7.297338160264733e-06, "loss": 0.4585, "step": 2238 }, { "epoch": 1.2398707891093679, "grad_norm": 0.2921857535839081, "learning_rate": 7.294474615772225e-06, "loss": 0.4159, "step": 2239 }, { "epoch": 1.2404245500692201, "grad_norm": 0.32307302951812744, "learning_rate": 7.2916101176271855e-06, "loss": 0.4205, "step": 2240 }, { "epoch": 1.2409783110290724, "grad_norm": 0.34135332703590393, "learning_rate": 7.2887446670201904e-06, "loss": 0.4381, "step": 2241 }, { "epoch": 1.241532071988925, "grad_norm": 0.3145383596420288, "learning_rate": 7.2858782651422035e-06, "loss": 0.4181, "step": 2242 }, { "epoch": 1.2420858329487772, "grad_norm": 0.2992677390575409, "learning_rate": 7.2830109131845885e-06, "loss": 0.4346, "step": 2243 }, { "epoch": 1.2426395939086294, "grad_norm": 0.322989284992218, "learning_rate": 7.280142612339103e-06, "loss": 0.4716, "step": 2244 }, { "epoch": 1.2431933548684817, "grad_norm": 0.3308155834674835, "learning_rate": 7.277273363797899e-06, "loss": 0.4314, "step": 2245 }, { "epoch": 1.2437471158283342, "grad_norm": 0.31402671337127686, "learning_rate": 7.274403168753523e-06, "loss": 0.423, "step": 2246 }, { "epoch": 1.2443008767881865, "grad_norm": 0.32319745421409607, "learning_rate": 7.271532028398913e-06, "loss": 0.4081, "step": 2247 }, { "epoch": 1.2448546377480387, "grad_norm": 0.36102983355522156, "learning_rate": 7.268659943927403e-06, "loss": 0.4522, "step": 2248 }, { "epoch": 1.245408398707891, "grad_norm": 0.28966906666755676, "learning_rate": 7.2657869165327155e-06, "loss": 0.4233, "step": 2249 }, { "epoch": 1.2459621596677435, "grad_norm": 0.32769912481307983, "learning_rate": 7.262912947408968e-06, "loss": 0.4036, "step": 2250 }, { "epoch": 1.2465159206275958, "grad_norm": 0.3434944748878479, "learning_rate": 7.260038037750668e-06, "loss": 0.4365, "step": 2251 }, { "epoch": 1.247069681587448, "grad_norm": 0.34374260902404785, "learning_rate": 7.2571621887527135e-06, "loss": 0.4241, "step": 2252 }, { "epoch": 1.2476234425473005, "grad_norm": 0.39021849632263184, "learning_rate": 7.254285401610395e-06, "loss": 0.4717, "step": 2253 }, { "epoch": 1.2481772035071528, "grad_norm": 0.307012140750885, "learning_rate": 7.251407677519391e-06, "loss": 0.42, "step": 2254 }, { "epoch": 1.248730964467005, "grad_norm": 0.32597586512565613, "learning_rate": 7.24852901767577e-06, "loss": 0.4459, "step": 2255 }, { "epoch": 1.2492847254268573, "grad_norm": 0.3611048758029938, "learning_rate": 7.24564942327599e-06, "loss": 0.4244, "step": 2256 }, { "epoch": 1.2498384863867098, "grad_norm": 0.3715953528881073, "learning_rate": 7.242768895516897e-06, "loss": 0.4737, "step": 2257 }, { "epoch": 1.250392247346562, "grad_norm": 0.312743604183197, "learning_rate": 7.2398874355957235e-06, "loss": 0.4152, "step": 2258 }, { "epoch": 1.2509460083064143, "grad_norm": 0.3188340663909912, "learning_rate": 7.23700504471009e-06, "loss": 0.4226, "step": 2259 }, { "epoch": 1.2514997692662666, "grad_norm": 0.3569521903991699, "learning_rate": 7.234121724058009e-06, "loss": 0.4079, "step": 2260 }, { "epoch": 1.252053530226119, "grad_norm": 0.32865968346595764, "learning_rate": 7.23123747483787e-06, "loss": 0.449, "step": 2261 }, { "epoch": 1.2526072911859714, "grad_norm": 0.3248818516731262, "learning_rate": 7.228352298248458e-06, "loss": 0.4321, "step": 2262 }, { "epoch": 1.2531610521458236, "grad_norm": 0.3525533378124237, "learning_rate": 7.2254661954889345e-06, "loss": 0.464, "step": 2263 }, { "epoch": 1.2537148131056761, "grad_norm": 0.30017971992492676, "learning_rate": 7.222579167758854e-06, "loss": 0.4189, "step": 2264 }, { "epoch": 1.2542685740655284, "grad_norm": 0.3185668885707855, "learning_rate": 7.219691216258151e-06, "loss": 0.4177, "step": 2265 }, { "epoch": 1.2548223350253807, "grad_norm": 0.3931023180484772, "learning_rate": 7.216802342187142e-06, "loss": 0.4091, "step": 2266 }, { "epoch": 1.2553760959852331, "grad_norm": 0.3283444046974182, "learning_rate": 7.213912546746535e-06, "loss": 0.4015, "step": 2267 }, { "epoch": 1.2559298569450854, "grad_norm": 0.31872570514678955, "learning_rate": 7.211021831137413e-06, "loss": 0.4495, "step": 2268 }, { "epoch": 1.2564836179049377, "grad_norm": 0.3432103991508484, "learning_rate": 7.2081301965612435e-06, "loss": 0.3923, "step": 2269 }, { "epoch": 1.2570373788647902, "grad_norm": 0.34547683596611023, "learning_rate": 7.205237644219878e-06, "loss": 0.441, "step": 2270 }, { "epoch": 1.2575911398246424, "grad_norm": 0.34949061274528503, "learning_rate": 7.202344175315546e-06, "loss": 0.4493, "step": 2271 }, { "epoch": 1.2581449007844947, "grad_norm": 0.30543598532676697, "learning_rate": 7.199449791050862e-06, "loss": 0.3688, "step": 2272 }, { "epoch": 1.258698661744347, "grad_norm": 0.40346550941467285, "learning_rate": 7.196554492628819e-06, "loss": 0.4563, "step": 2273 }, { "epoch": 1.2592524227041992, "grad_norm": 0.3192417323589325, "learning_rate": 7.1936582812527885e-06, "loss": 0.4216, "step": 2274 }, { "epoch": 1.2598061836640517, "grad_norm": 0.32792577147483826, "learning_rate": 7.1907611581265244e-06, "loss": 0.4157, "step": 2275 }, { "epoch": 1.260359944623904, "grad_norm": 0.32613348960876465, "learning_rate": 7.187863124454158e-06, "loss": 0.428, "step": 2276 }, { "epoch": 1.2609137055837563, "grad_norm": 0.3287932872772217, "learning_rate": 7.184964181440198e-06, "loss": 0.4419, "step": 2277 }, { "epoch": 1.2614674665436088, "grad_norm": 0.33425065875053406, "learning_rate": 7.182064330289533e-06, "loss": 0.4103, "step": 2278 }, { "epoch": 1.262021227503461, "grad_norm": 0.3241965174674988, "learning_rate": 7.179163572207427e-06, "loss": 0.4693, "step": 2279 }, { "epoch": 1.2625749884633133, "grad_norm": 0.3588312864303589, "learning_rate": 7.176261908399523e-06, "loss": 0.441, "step": 2280 }, { "epoch": 1.2631287494231658, "grad_norm": 0.34174007177352905, "learning_rate": 7.17335934007184e-06, "loss": 0.4084, "step": 2281 }, { "epoch": 1.263682510383018, "grad_norm": 0.3905993700027466, "learning_rate": 7.170455868430772e-06, "loss": 0.4832, "step": 2282 }, { "epoch": 1.2642362713428703, "grad_norm": 0.3522692322731018, "learning_rate": 7.167551494683091e-06, "loss": 0.4183, "step": 2283 }, { "epoch": 1.2647900323027226, "grad_norm": 0.3202967643737793, "learning_rate": 7.164646220035938e-06, "loss": 0.4126, "step": 2284 }, { "epoch": 1.265343793262575, "grad_norm": 0.3194389045238495, "learning_rate": 7.161740045696834e-06, "loss": 0.4239, "step": 2285 }, { "epoch": 1.2658975542224273, "grad_norm": 0.3293907046318054, "learning_rate": 7.158832972873673e-06, "loss": 0.4226, "step": 2286 }, { "epoch": 1.2664513151822796, "grad_norm": 0.3251712918281555, "learning_rate": 7.155925002774722e-06, "loss": 0.4205, "step": 2287 }, { "epoch": 1.2670050761421319, "grad_norm": 0.3135354518890381, "learning_rate": 7.1530161366086195e-06, "loss": 0.3931, "step": 2288 }, { "epoch": 1.2675588371019844, "grad_norm": 0.3520801365375519, "learning_rate": 7.15010637558438e-06, "loss": 0.4403, "step": 2289 }, { "epoch": 1.2681125980618366, "grad_norm": 0.37010860443115234, "learning_rate": 7.1471957209113826e-06, "loss": 0.4119, "step": 2290 }, { "epoch": 1.268666359021689, "grad_norm": 0.34664782881736755, "learning_rate": 7.144284173799389e-06, "loss": 0.4458, "step": 2291 }, { "epoch": 1.2692201199815414, "grad_norm": 0.37426844239234924, "learning_rate": 7.14137173545852e-06, "loss": 0.3915, "step": 2292 }, { "epoch": 1.2697738809413937, "grad_norm": 0.3590279221534729, "learning_rate": 7.138458407099275e-06, "loss": 0.4278, "step": 2293 }, { "epoch": 1.270327641901246, "grad_norm": 0.4048329293727875, "learning_rate": 7.135544189932522e-06, "loss": 0.4793, "step": 2294 }, { "epoch": 1.2708814028610984, "grad_norm": 0.3510269522666931, "learning_rate": 7.1326290851694935e-06, "loss": 0.4001, "step": 2295 }, { "epoch": 1.2714351638209507, "grad_norm": 0.354041188955307, "learning_rate": 7.129713094021799e-06, "loss": 0.4259, "step": 2296 }, { "epoch": 1.271988924780803, "grad_norm": 0.3745872378349304, "learning_rate": 7.126796217701408e-06, "loss": 0.4571, "step": 2297 }, { "epoch": 1.2725426857406552, "grad_norm": 0.34840741753578186, "learning_rate": 7.123878457420665e-06, "loss": 0.4445, "step": 2298 }, { "epoch": 1.2730964467005075, "grad_norm": 0.3546462953090668, "learning_rate": 7.120959814392276e-06, "loss": 0.4164, "step": 2299 }, { "epoch": 1.27365020766036, "grad_norm": 0.33851414918899536, "learning_rate": 7.11804028982932e-06, "loss": 0.4434, "step": 2300 }, { "epoch": 1.2742039686202122, "grad_norm": 0.37884873151779175, "learning_rate": 7.115119884945237e-06, "loss": 0.4896, "step": 2301 }, { "epoch": 1.2747577295800645, "grad_norm": 0.33800816535949707, "learning_rate": 7.112198600953832e-06, "loss": 0.4151, "step": 2302 }, { "epoch": 1.275311490539917, "grad_norm": 0.3313348591327667, "learning_rate": 7.1092764390692835e-06, "loss": 0.4287, "step": 2303 }, { "epoch": 1.2758652514997693, "grad_norm": 0.3483647406101227, "learning_rate": 7.106353400506127e-06, "loss": 0.4392, "step": 2304 }, { "epoch": 1.2764190124596215, "grad_norm": 0.33032727241516113, "learning_rate": 7.1034294864792666e-06, "loss": 0.4353, "step": 2305 }, { "epoch": 1.276972773419474, "grad_norm": 0.3803156316280365, "learning_rate": 7.100504698203967e-06, "loss": 0.4111, "step": 2306 }, { "epoch": 1.2775265343793263, "grad_norm": 0.31852173805236816, "learning_rate": 7.097579036895859e-06, "loss": 0.4373, "step": 2307 }, { "epoch": 1.2780802953391786, "grad_norm": 0.38203373551368713, "learning_rate": 7.094652503770933e-06, "loss": 0.4209, "step": 2308 }, { "epoch": 1.278634056299031, "grad_norm": 0.31812843680381775, "learning_rate": 7.091725100045548e-06, "loss": 0.441, "step": 2309 }, { "epoch": 1.2791878172588833, "grad_norm": 0.3016834855079651, "learning_rate": 7.0887968269364186e-06, "loss": 0.4246, "step": 2310 }, { "epoch": 1.2797415782187356, "grad_norm": 0.33418864011764526, "learning_rate": 7.085867685660624e-06, "loss": 0.4293, "step": 2311 }, { "epoch": 1.2802953391785878, "grad_norm": 0.34438514709472656, "learning_rate": 7.082937677435601e-06, "loss": 0.4465, "step": 2312 }, { "epoch": 1.28084910013844, "grad_norm": 0.315830260515213, "learning_rate": 7.0800068034791515e-06, "loss": 0.4845, "step": 2313 }, { "epoch": 1.2814028610982926, "grad_norm": 0.31214484572410583, "learning_rate": 7.0770750650094335e-06, "loss": 0.4338, "step": 2314 }, { "epoch": 1.2819566220581449, "grad_norm": 0.3828431963920593, "learning_rate": 7.0741424632449655e-06, "loss": 0.4843, "step": 2315 }, { "epoch": 1.2825103830179971, "grad_norm": 0.3196432292461395, "learning_rate": 7.071208999404624e-06, "loss": 0.4373, "step": 2316 }, { "epoch": 1.2830641439778496, "grad_norm": 0.3185766339302063, "learning_rate": 7.068274674707647e-06, "loss": 0.4331, "step": 2317 }, { "epoch": 1.283617904937702, "grad_norm": 0.29190677404403687, "learning_rate": 7.065339490373629e-06, "loss": 0.3868, "step": 2318 }, { "epoch": 1.2841716658975542, "grad_norm": 0.35260701179504395, "learning_rate": 7.062403447622515e-06, "loss": 0.4464, "step": 2319 }, { "epoch": 1.2847254268574066, "grad_norm": 0.328008234500885, "learning_rate": 7.059466547674616e-06, "loss": 0.432, "step": 2320 }, { "epoch": 1.285279187817259, "grad_norm": 0.3314267694950104, "learning_rate": 7.056528791750597e-06, "loss": 0.4605, "step": 2321 }, { "epoch": 1.2858329487771112, "grad_norm": 0.32184603810310364, "learning_rate": 7.053590181071475e-06, "loss": 0.3843, "step": 2322 }, { "epoch": 1.2863867097369637, "grad_norm": 0.32258665561676025, "learning_rate": 7.050650716858626e-06, "loss": 0.4499, "step": 2323 }, { "epoch": 1.286940470696816, "grad_norm": 0.3327908515930176, "learning_rate": 7.04771040033378e-06, "loss": 0.4206, "step": 2324 }, { "epoch": 1.2874942316566682, "grad_norm": 0.30946025252342224, "learning_rate": 7.044769232719022e-06, "loss": 0.3963, "step": 2325 }, { "epoch": 1.2880479926165205, "grad_norm": 0.3332348167896271, "learning_rate": 7.041827215236785e-06, "loss": 0.4547, "step": 2326 }, { "epoch": 1.2886017535763727, "grad_norm": 0.3017779588699341, "learning_rate": 7.038884349109865e-06, "loss": 0.4365, "step": 2327 }, { "epoch": 1.2891555145362252, "grad_norm": 0.31614258885383606, "learning_rate": 7.0359406355614015e-06, "loss": 0.4283, "step": 2328 }, { "epoch": 1.2897092754960775, "grad_norm": 0.33555659651756287, "learning_rate": 7.032996075814894e-06, "loss": 0.4593, "step": 2329 }, { "epoch": 1.2902630364559298, "grad_norm": 0.3083667457103729, "learning_rate": 7.030050671094186e-06, "loss": 0.428, "step": 2330 }, { "epoch": 1.2908167974157823, "grad_norm": 0.34141889214515686, "learning_rate": 7.02710442262348e-06, "loss": 0.4443, "step": 2331 }, { "epoch": 1.2913705583756345, "grad_norm": 0.3201657831668854, "learning_rate": 7.024157331627322e-06, "loss": 0.4059, "step": 2332 }, { "epoch": 1.2919243193354868, "grad_norm": 0.308566689491272, "learning_rate": 7.021209399330611e-06, "loss": 0.4208, "step": 2333 }, { "epoch": 1.2924780802953393, "grad_norm": 0.3336751163005829, "learning_rate": 7.018260626958601e-06, "loss": 0.4267, "step": 2334 }, { "epoch": 1.2930318412551915, "grad_norm": 0.3387017548084259, "learning_rate": 7.015311015736885e-06, "loss": 0.4546, "step": 2335 }, { "epoch": 1.2935856022150438, "grad_norm": 0.3102567493915558, "learning_rate": 7.012360566891414e-06, "loss": 0.4178, "step": 2336 }, { "epoch": 1.2941393631748963, "grad_norm": 0.35240495204925537, "learning_rate": 7.00940928164848e-06, "loss": 0.4191, "step": 2337 }, { "epoch": 1.2946931241347486, "grad_norm": 0.3475927412509918, "learning_rate": 7.006457161234729e-06, "loss": 0.4835, "step": 2338 }, { "epoch": 1.2952468850946008, "grad_norm": 0.3695668876171112, "learning_rate": 7.003504206877148e-06, "loss": 0.4633, "step": 2339 }, { "epoch": 1.295800646054453, "grad_norm": 0.3143689036369324, "learning_rate": 7.000550419803073e-06, "loss": 0.3654, "step": 2340 }, { "epoch": 1.2963544070143054, "grad_norm": 0.3383753299713135, "learning_rate": 6.997595801240189e-06, "loss": 0.4717, "step": 2341 }, { "epoch": 1.2969081679741579, "grad_norm": 0.33234888315200806, "learning_rate": 6.994640352416524e-06, "loss": 0.4239, "step": 2342 }, { "epoch": 1.2974619289340101, "grad_norm": 0.3519286513328552, "learning_rate": 6.9916840745604495e-06, "loss": 0.4335, "step": 2343 }, { "epoch": 1.2980156898938624, "grad_norm": 0.32328149676322937, "learning_rate": 6.988726968900684e-06, "loss": 0.3986, "step": 2344 }, { "epoch": 1.298569450853715, "grad_norm": 0.36551809310913086, "learning_rate": 6.98576903666629e-06, "loss": 0.4779, "step": 2345 }, { "epoch": 1.2991232118135672, "grad_norm": 0.30520305037498474, "learning_rate": 6.982810279086673e-06, "loss": 0.4389, "step": 2346 }, { "epoch": 1.2996769727734194, "grad_norm": 0.3585580289363861, "learning_rate": 6.97985069739158e-06, "loss": 0.4736, "step": 2347 }, { "epoch": 1.300230733733272, "grad_norm": 0.3234131336212158, "learning_rate": 6.976890292811105e-06, "loss": 0.4288, "step": 2348 }, { "epoch": 1.3007844946931242, "grad_norm": 0.3140997886657715, "learning_rate": 6.973929066575678e-06, "loss": 0.397, "step": 2349 }, { "epoch": 1.3013382556529764, "grad_norm": 0.34123122692108154, "learning_rate": 6.970967019916074e-06, "loss": 0.4276, "step": 2350 }, { "epoch": 1.3018920166128287, "grad_norm": 0.32014861702919006, "learning_rate": 6.968004154063409e-06, "loss": 0.4265, "step": 2351 }, { "epoch": 1.3024457775726812, "grad_norm": 0.3344269096851349, "learning_rate": 6.96504047024914e-06, "loss": 0.4309, "step": 2352 }, { "epoch": 1.3029995385325335, "grad_norm": 0.3356058895587921, "learning_rate": 6.962075969705061e-06, "loss": 0.4158, "step": 2353 }, { "epoch": 1.3035532994923857, "grad_norm": 0.35437485575675964, "learning_rate": 6.959110653663309e-06, "loss": 0.4149, "step": 2354 }, { "epoch": 1.304107060452238, "grad_norm": 0.33421510457992554, "learning_rate": 6.956144523356359e-06, "loss": 0.4649, "step": 2355 }, { "epoch": 1.3046608214120905, "grad_norm": 0.3053072690963745, "learning_rate": 6.953177580017022e-06, "loss": 0.3896, "step": 2356 }, { "epoch": 1.3052145823719428, "grad_norm": 0.3525082468986511, "learning_rate": 6.950209824878448e-06, "loss": 0.4453, "step": 2357 }, { "epoch": 1.305768343331795, "grad_norm": 0.3669533133506775, "learning_rate": 6.947241259174128e-06, "loss": 0.4379, "step": 2358 }, { "epoch": 1.3063221042916475, "grad_norm": 0.33350473642349243, "learning_rate": 6.944271884137883e-06, "loss": 0.3938, "step": 2359 }, { "epoch": 1.3068758652514998, "grad_norm": 0.345032662153244, "learning_rate": 6.9413017010038795e-06, "loss": 0.3935, "step": 2360 }, { "epoch": 1.307429626211352, "grad_norm": 0.38768327236175537, "learning_rate": 6.938330711006612e-06, "loss": 0.4721, "step": 2361 }, { "epoch": 1.3079833871712045, "grad_norm": 0.351590096950531, "learning_rate": 6.935358915380912e-06, "loss": 0.4303, "step": 2362 }, { "epoch": 1.3085371481310568, "grad_norm": 0.3631264865398407, "learning_rate": 6.932386315361949e-06, "loss": 0.4296, "step": 2363 }, { "epoch": 1.309090909090909, "grad_norm": 0.3774767518043518, "learning_rate": 6.929412912185224e-06, "loss": 0.4504, "step": 2364 }, { "epoch": 1.3096446700507614, "grad_norm": 0.3191295862197876, "learning_rate": 6.926438707086574e-06, "loss": 0.4256, "step": 2365 }, { "epoch": 1.3101984310106136, "grad_norm": 0.4066324532032013, "learning_rate": 6.923463701302163e-06, "loss": 0.461, "step": 2366 }, { "epoch": 1.310752191970466, "grad_norm": 0.3235243260860443, "learning_rate": 6.9204878960685e-06, "loss": 0.4092, "step": 2367 }, { "epoch": 1.3113059529303184, "grad_norm": 0.36312201619148254, "learning_rate": 6.917511292622414e-06, "loss": 0.4441, "step": 2368 }, { "epoch": 1.3118597138901706, "grad_norm": 0.3658106327056885, "learning_rate": 6.914533892201072e-06, "loss": 0.4553, "step": 2369 }, { "epoch": 1.3124134748500231, "grad_norm": 0.3973175585269928, "learning_rate": 6.911555696041971e-06, "loss": 0.3911, "step": 2370 }, { "epoch": 1.3129672358098754, "grad_norm": 0.35568028688430786, "learning_rate": 6.908576705382939e-06, "loss": 0.4531, "step": 2371 }, { "epoch": 1.3135209967697277, "grad_norm": 0.34518539905548096, "learning_rate": 6.905596921462133e-06, "loss": 0.4487, "step": 2372 }, { "epoch": 1.3140747577295802, "grad_norm": 0.3639824390411377, "learning_rate": 6.90261634551804e-06, "loss": 0.4115, "step": 2373 }, { "epoch": 1.3146285186894324, "grad_norm": 0.32046496868133545, "learning_rate": 6.899634978789483e-06, "loss": 0.4203, "step": 2374 }, { "epoch": 1.3151822796492847, "grad_norm": 0.4158479869365692, "learning_rate": 6.8966528225156e-06, "loss": 0.4878, "step": 2375 }, { "epoch": 1.3157360406091372, "grad_norm": 0.3189457654953003, "learning_rate": 6.893669877935867e-06, "loss": 0.4134, "step": 2376 }, { "epoch": 1.3162898015689894, "grad_norm": 0.35912755131721497, "learning_rate": 6.890686146290085e-06, "loss": 0.4777, "step": 2377 }, { "epoch": 1.3168435625288417, "grad_norm": 0.3289629817008972, "learning_rate": 6.887701628818384e-06, "loss": 0.4459, "step": 2378 }, { "epoch": 1.317397323488694, "grad_norm": 0.3270723521709442, "learning_rate": 6.884716326761218e-06, "loss": 0.4527, "step": 2379 }, { "epoch": 1.3179510844485463, "grad_norm": 0.3573155403137207, "learning_rate": 6.88173024135937e-06, "loss": 0.4609, "step": 2380 }, { "epoch": 1.3185048454083987, "grad_norm": 0.3167283833026886, "learning_rate": 6.8787433738539435e-06, "loss": 0.4134, "step": 2381 }, { "epoch": 1.319058606368251, "grad_norm": 0.3381032645702362, "learning_rate": 6.875755725486372e-06, "loss": 0.4178, "step": 2382 }, { "epoch": 1.3196123673281033, "grad_norm": 0.3348497450351715, "learning_rate": 6.872767297498412e-06, "loss": 0.4141, "step": 2383 }, { "epoch": 1.3201661282879558, "grad_norm": 0.32760539650917053, "learning_rate": 6.869778091132143e-06, "loss": 0.442, "step": 2384 }, { "epoch": 1.320719889247808, "grad_norm": 0.33837011456489563, "learning_rate": 6.86678810762997e-06, "loss": 0.4412, "step": 2385 }, { "epoch": 1.3212736502076603, "grad_norm": 0.29025137424468994, "learning_rate": 6.86379734823462e-06, "loss": 0.3965, "step": 2386 }, { "epoch": 1.3218274111675128, "grad_norm": 0.31470558047294617, "learning_rate": 6.860805814189142e-06, "loss": 0.4503, "step": 2387 }, { "epoch": 1.322381172127365, "grad_norm": 0.3304067850112915, "learning_rate": 6.857813506736905e-06, "loss": 0.4244, "step": 2388 }, { "epoch": 1.3229349330872173, "grad_norm": 0.3494798243045807, "learning_rate": 6.8548204271216065e-06, "loss": 0.4109, "step": 2389 }, { "epoch": 1.3234886940470698, "grad_norm": 0.3391423225402832, "learning_rate": 6.8518265765872565e-06, "loss": 0.4413, "step": 2390 }, { "epoch": 1.324042455006922, "grad_norm": 0.3066764771938324, "learning_rate": 6.84883195637819e-06, "loss": 0.4441, "step": 2391 }, { "epoch": 1.3245962159667743, "grad_norm": 0.35414811968803406, "learning_rate": 6.845836567739063e-06, "loss": 0.426, "step": 2392 }, { "epoch": 1.3251499769266266, "grad_norm": 0.3006488084793091, "learning_rate": 6.8428404119148465e-06, "loss": 0.3957, "step": 2393 }, { "epoch": 1.3257037378864789, "grad_norm": 0.30402109026908875, "learning_rate": 6.839843490150834e-06, "loss": 0.4079, "step": 2394 }, { "epoch": 1.3262574988463314, "grad_norm": 0.31300175189971924, "learning_rate": 6.836845803692635e-06, "loss": 0.4402, "step": 2395 }, { "epoch": 1.3268112598061836, "grad_norm": 0.3690386414527893, "learning_rate": 6.833847353786179e-06, "loss": 0.4583, "step": 2396 }, { "epoch": 1.327365020766036, "grad_norm": 0.3488906919956207, "learning_rate": 6.830848141677711e-06, "loss": 0.4165, "step": 2397 }, { "epoch": 1.3279187817258884, "grad_norm": 0.3144093453884125, "learning_rate": 6.827848168613795e-06, "loss": 0.447, "step": 2398 }, { "epoch": 1.3284725426857407, "grad_norm": 0.3477556109428406, "learning_rate": 6.82484743584131e-06, "loss": 0.4499, "step": 2399 }, { "epoch": 1.329026303645593, "grad_norm": 0.42044952511787415, "learning_rate": 6.821845944607448e-06, "loss": 0.4067, "step": 2400 }, { "epoch": 1.3295800646054454, "grad_norm": 0.3515290915966034, "learning_rate": 6.8188436961597216e-06, "loss": 0.4545, "step": 2401 }, { "epoch": 1.3301338255652977, "grad_norm": 0.35704395174980164, "learning_rate": 6.815840691745953e-06, "loss": 0.4372, "step": 2402 }, { "epoch": 1.33068758652515, "grad_norm": 0.39177390933036804, "learning_rate": 6.812836932614284e-06, "loss": 0.4308, "step": 2403 }, { "epoch": 1.3312413474850022, "grad_norm": 0.34464582800865173, "learning_rate": 6.809832420013165e-06, "loss": 0.4408, "step": 2404 }, { "epoch": 1.3317951084448547, "grad_norm": 0.34975963830947876, "learning_rate": 6.80682715519136e-06, "loss": 0.4321, "step": 2405 }, { "epoch": 1.332348869404707, "grad_norm": 0.4052976071834564, "learning_rate": 6.803821139397951e-06, "loss": 0.4215, "step": 2406 }, { "epoch": 1.3329026303645592, "grad_norm": 0.33932822942733765, "learning_rate": 6.800814373882328e-06, "loss": 0.4366, "step": 2407 }, { "epoch": 1.3334563913244115, "grad_norm": 0.3383556604385376, "learning_rate": 6.797806859894189e-06, "loss": 0.3935, "step": 2408 }, { "epoch": 1.334010152284264, "grad_norm": 0.33893272280693054, "learning_rate": 6.7947985986835504e-06, "loss": 0.4173, "step": 2409 }, { "epoch": 1.3345639132441163, "grad_norm": 0.31190621852874756, "learning_rate": 6.791789591500736e-06, "loss": 0.4426, "step": 2410 }, { "epoch": 1.3351176742039685, "grad_norm": 0.35466665029525757, "learning_rate": 6.788779839596378e-06, "loss": 0.426, "step": 2411 }, { "epoch": 1.335671435163821, "grad_norm": 0.3667890429496765, "learning_rate": 6.785769344221421e-06, "loss": 0.4892, "step": 2412 }, { "epoch": 1.3362251961236733, "grad_norm": 0.29107028245925903, "learning_rate": 6.782758106627116e-06, "loss": 0.3758, "step": 2413 }, { "epoch": 1.3367789570835256, "grad_norm": 0.38572749495506287, "learning_rate": 6.779746128065026e-06, "loss": 0.4379, "step": 2414 }, { "epoch": 1.337332718043378, "grad_norm": 0.30933624505996704, "learning_rate": 6.776733409787017e-06, "loss": 0.445, "step": 2415 }, { "epoch": 1.3378864790032303, "grad_norm": 0.36941906809806824, "learning_rate": 6.773719953045265e-06, "loss": 0.4787, "step": 2416 }, { "epoch": 1.3384402399630826, "grad_norm": 0.32275471091270447, "learning_rate": 6.770705759092255e-06, "loss": 0.428, "step": 2417 }, { "epoch": 1.3389940009229349, "grad_norm": 0.3607829213142395, "learning_rate": 6.767690829180777e-06, "loss": 0.459, "step": 2418 }, { "epoch": 1.3395477618827873, "grad_norm": 0.32859131693840027, "learning_rate": 6.764675164563926e-06, "loss": 0.4096, "step": 2419 }, { "epoch": 1.3401015228426396, "grad_norm": 0.3483637869358063, "learning_rate": 6.7616587664951e-06, "loss": 0.4478, "step": 2420 }, { "epoch": 1.3406552838024919, "grad_norm": 0.37723615765571594, "learning_rate": 6.758641636228006e-06, "loss": 0.4612, "step": 2421 }, { "epoch": 1.3412090447623441, "grad_norm": 0.3199814260005951, "learning_rate": 6.755623775016656e-06, "loss": 0.4356, "step": 2422 }, { "epoch": 1.3417628057221966, "grad_norm": 0.3861233592033386, "learning_rate": 6.752605184115361e-06, "loss": 0.3827, "step": 2423 }, { "epoch": 1.342316566682049, "grad_norm": 0.4381290078163147, "learning_rate": 6.74958586477874e-06, "loss": 0.4709, "step": 2424 }, { "epoch": 1.3428703276419012, "grad_norm": 0.31224122643470764, "learning_rate": 6.746565818261712e-06, "loss": 0.4117, "step": 2425 }, { "epoch": 1.3434240886017537, "grad_norm": 0.36918047070503235, "learning_rate": 6.743545045819498e-06, "loss": 0.4195, "step": 2426 }, { "epoch": 1.343977849561606, "grad_norm": 0.3938567042350769, "learning_rate": 6.740523548707624e-06, "loss": 0.4401, "step": 2427 }, { "epoch": 1.3445316105214582, "grad_norm": 0.3282095193862915, "learning_rate": 6.737501328181912e-06, "loss": 0.4134, "step": 2428 }, { "epoch": 1.3450853714813107, "grad_norm": 0.33034488558769226, "learning_rate": 6.734478385498491e-06, "loss": 0.4432, "step": 2429 }, { "epoch": 1.345639132441163, "grad_norm": 0.3354782164096832, "learning_rate": 6.731454721913784e-06, "loss": 0.4355, "step": 2430 }, { "epoch": 1.3461928934010152, "grad_norm": 0.3533826172351837, "learning_rate": 6.728430338684515e-06, "loss": 0.4194, "step": 2431 }, { "epoch": 1.3467466543608675, "grad_norm": 0.337444543838501, "learning_rate": 6.725405237067714e-06, "loss": 0.4054, "step": 2432 }, { "epoch": 1.3473004153207198, "grad_norm": 0.32867327332496643, "learning_rate": 6.722379418320698e-06, "loss": 0.4213, "step": 2433 }, { "epoch": 1.3478541762805722, "grad_norm": 0.3617905080318451, "learning_rate": 6.719352883701092e-06, "loss": 0.4745, "step": 2434 }, { "epoch": 1.3484079372404245, "grad_norm": 0.31930628418922424, "learning_rate": 6.716325634466813e-06, "loss": 0.4357, "step": 2435 }, { "epoch": 1.3489616982002768, "grad_norm": 0.34171539545059204, "learning_rate": 6.713297671876078e-06, "loss": 0.3975, "step": 2436 }, { "epoch": 1.3495154591601293, "grad_norm": 0.32884159684181213, "learning_rate": 6.710268997187398e-06, "loss": 0.4447, "step": 2437 }, { "epoch": 1.3500692201199815, "grad_norm": 0.3368884027004242, "learning_rate": 6.707239611659581e-06, "loss": 0.4663, "step": 2438 }, { "epoch": 1.3506229810798338, "grad_norm": 0.3154871165752411, "learning_rate": 6.7042095165517305e-06, "loss": 0.4015, "step": 2439 }, { "epoch": 1.3511767420396863, "grad_norm": 0.3374076783657074, "learning_rate": 6.701178713123246e-06, "loss": 0.4452, "step": 2440 }, { "epoch": 1.3517305029995386, "grad_norm": 0.3319837749004364, "learning_rate": 6.69814720263382e-06, "loss": 0.4126, "step": 2441 }, { "epoch": 1.3522842639593908, "grad_norm": 0.32674962282180786, "learning_rate": 6.695114986343438e-06, "loss": 0.4489, "step": 2442 }, { "epoch": 1.3528380249192433, "grad_norm": 0.31147482991218567, "learning_rate": 6.6920820655123816e-06, "loss": 0.4597, "step": 2443 }, { "epoch": 1.3533917858790956, "grad_norm": 0.315082311630249, "learning_rate": 6.689048441401225e-06, "loss": 0.4382, "step": 2444 }, { "epoch": 1.3539455468389479, "grad_norm": 0.28360092639923096, "learning_rate": 6.686014115270829e-06, "loss": 0.3982, "step": 2445 }, { "epoch": 1.3544993077988001, "grad_norm": 0.31845080852508545, "learning_rate": 6.6829790883823555e-06, "loss": 0.4331, "step": 2446 }, { "epoch": 1.3550530687586524, "grad_norm": 0.3309492766857147, "learning_rate": 6.679943361997249e-06, "loss": 0.4521, "step": 2447 }, { "epoch": 1.3556068297185049, "grad_norm": 0.32162758708000183, "learning_rate": 6.676906937377252e-06, "loss": 0.4724, "step": 2448 }, { "epoch": 1.3561605906783571, "grad_norm": 0.2934490740299225, "learning_rate": 6.673869815784391e-06, "loss": 0.4076, "step": 2449 }, { "epoch": 1.3567143516382094, "grad_norm": 0.3367052674293518, "learning_rate": 6.670831998480985e-06, "loss": 0.4154, "step": 2450 }, { "epoch": 1.357268112598062, "grad_norm": 0.34910961985588074, "learning_rate": 6.6677934867296446e-06, "loss": 0.4595, "step": 2451 }, { "epoch": 1.3578218735579142, "grad_norm": 0.34362223744392395, "learning_rate": 6.664754281793264e-06, "loss": 0.4298, "step": 2452 }, { "epoch": 1.3583756345177664, "grad_norm": 0.2733013331890106, "learning_rate": 6.661714384935028e-06, "loss": 0.3666, "step": 2453 }, { "epoch": 1.358929395477619, "grad_norm": 0.3344862759113312, "learning_rate": 6.658673797418412e-06, "loss": 0.4534, "step": 2454 }, { "epoch": 1.3594831564374712, "grad_norm": 0.3306773900985718, "learning_rate": 6.655632520507172e-06, "loss": 0.4394, "step": 2455 }, { "epoch": 1.3600369173973235, "grad_norm": 0.3102458119392395, "learning_rate": 6.652590555465355e-06, "loss": 0.4383, "step": 2456 }, { "epoch": 1.360590678357176, "grad_norm": 0.34778374433517456, "learning_rate": 6.649547903557292e-06, "loss": 0.3806, "step": 2457 }, { "epoch": 1.3611444393170282, "grad_norm": 0.3464162349700928, "learning_rate": 6.646504566047602e-06, "loss": 0.4549, "step": 2458 }, { "epoch": 1.3616982002768805, "grad_norm": 0.3449877202510834, "learning_rate": 6.643460544201189e-06, "loss": 0.3926, "step": 2459 }, { "epoch": 1.3622519612367328, "grad_norm": 0.3758089542388916, "learning_rate": 6.640415839283236e-06, "loss": 0.4247, "step": 2460 }, { "epoch": 1.362805722196585, "grad_norm": 0.36839208006858826, "learning_rate": 6.637370452559219e-06, "loss": 0.4579, "step": 2461 }, { "epoch": 1.3633594831564375, "grad_norm": 0.33592161536216736, "learning_rate": 6.634324385294886e-06, "loss": 0.451, "step": 2462 }, { "epoch": 1.3639132441162898, "grad_norm": 0.3945862352848053, "learning_rate": 6.6312776387562775e-06, "loss": 0.4476, "step": 2463 }, { "epoch": 1.364467005076142, "grad_norm": 0.3545833230018616, "learning_rate": 6.628230214209711e-06, "loss": 0.4374, "step": 2464 }, { "epoch": 1.3650207660359945, "grad_norm": 0.318996787071228, "learning_rate": 6.62518211292179e-06, "loss": 0.3895, "step": 2465 }, { "epoch": 1.3655745269958468, "grad_norm": 0.34137725830078125, "learning_rate": 6.6221333361593975e-06, "loss": 0.4303, "step": 2466 }, { "epoch": 1.366128287955699, "grad_norm": 0.35655343532562256, "learning_rate": 6.619083885189692e-06, "loss": 0.4639, "step": 2467 }, { "epoch": 1.3666820489155516, "grad_norm": 0.36334553360939026, "learning_rate": 6.616033761280122e-06, "loss": 0.3933, "step": 2468 }, { "epoch": 1.3672358098754038, "grad_norm": 0.33474114537239075, "learning_rate": 6.612982965698407e-06, "loss": 0.4354, "step": 2469 }, { "epoch": 1.367789570835256, "grad_norm": 0.3609922528266907, "learning_rate": 6.6099314997125496e-06, "loss": 0.4266, "step": 2470 }, { "epoch": 1.3683433317951084, "grad_norm": 0.3736201822757721, "learning_rate": 6.606879364590832e-06, "loss": 0.4412, "step": 2471 }, { "epoch": 1.3688970927549609, "grad_norm": 0.35394227504730225, "learning_rate": 6.603826561601815e-06, "loss": 0.4527, "step": 2472 }, { "epoch": 1.3694508537148131, "grad_norm": 0.38320082426071167, "learning_rate": 6.600773092014331e-06, "loss": 0.3877, "step": 2473 }, { "epoch": 1.3700046146746654, "grad_norm": 0.3721438944339752, "learning_rate": 6.597718957097496e-06, "loss": 0.4502, "step": 2474 }, { "epoch": 1.3705583756345177, "grad_norm": 0.31690487265586853, "learning_rate": 6.594664158120699e-06, "loss": 0.4166, "step": 2475 }, { "epoch": 1.3711121365943701, "grad_norm": 0.36020374298095703, "learning_rate": 6.591608696353607e-06, "loss": 0.434, "step": 2476 }, { "epoch": 1.3716658975542224, "grad_norm": 0.33441877365112305, "learning_rate": 6.588552573066162e-06, "loss": 0.409, "step": 2477 }, { "epoch": 1.3722196585140747, "grad_norm": 0.32183679938316345, "learning_rate": 6.585495789528581e-06, "loss": 0.4153, "step": 2478 }, { "epoch": 1.3727734194739272, "grad_norm": 0.36899349093437195, "learning_rate": 6.582438347011353e-06, "loss": 0.4272, "step": 2479 }, { "epoch": 1.3733271804337794, "grad_norm": 0.337520033121109, "learning_rate": 6.579380246785244e-06, "loss": 0.4728, "step": 2480 }, { "epoch": 1.3738809413936317, "grad_norm": 0.3679571747779846, "learning_rate": 6.5763214901212924e-06, "loss": 0.4623, "step": 2481 }, { "epoch": 1.3744347023534842, "grad_norm": 0.31460532546043396, "learning_rate": 6.5732620782908085e-06, "loss": 0.404, "step": 2482 }, { "epoch": 1.3749884633133365, "grad_norm": 0.334552139043808, "learning_rate": 6.570202012565378e-06, "loss": 0.4417, "step": 2483 }, { "epoch": 1.3755422242731887, "grad_norm": 0.3649669587612152, "learning_rate": 6.5671412942168526e-06, "loss": 0.4647, "step": 2484 }, { "epoch": 1.376095985233041, "grad_norm": 0.28191328048706055, "learning_rate": 6.564079924517363e-06, "loss": 0.3857, "step": 2485 }, { "epoch": 1.3766497461928933, "grad_norm": 0.3456518054008484, "learning_rate": 6.5610179047393e-06, "loss": 0.4256, "step": 2486 }, { "epoch": 1.3772035071527458, "grad_norm": 0.3526061475276947, "learning_rate": 6.557955236155338e-06, "loss": 0.4244, "step": 2487 }, { "epoch": 1.377757268112598, "grad_norm": 0.2859128415584564, "learning_rate": 6.554891920038409e-06, "loss": 0.392, "step": 2488 }, { "epoch": 1.3783110290724503, "grad_norm": 0.3359667658805847, "learning_rate": 6.551827957661722e-06, "loss": 0.4341, "step": 2489 }, { "epoch": 1.3788647900323028, "grad_norm": 0.296140193939209, "learning_rate": 6.548763350298751e-06, "loss": 0.4145, "step": 2490 }, { "epoch": 1.379418550992155, "grad_norm": 0.3286019563674927, "learning_rate": 6.545698099223236e-06, "loss": 0.4203, "step": 2491 }, { "epoch": 1.3799723119520073, "grad_norm": 0.3517322838306427, "learning_rate": 6.542632205709194e-06, "loss": 0.4385, "step": 2492 }, { "epoch": 1.3805260729118598, "grad_norm": 0.32218578457832336, "learning_rate": 6.539565671030894e-06, "loss": 0.4551, "step": 2493 }, { "epoch": 1.381079833871712, "grad_norm": 0.3575578033924103, "learning_rate": 6.536498496462889e-06, "loss": 0.421, "step": 2494 }, { "epoch": 1.3816335948315643, "grad_norm": 0.35625895857810974, "learning_rate": 6.533430683279979e-06, "loss": 0.4314, "step": 2495 }, { "epoch": 1.3821873557914168, "grad_norm": 0.3474896550178528, "learning_rate": 6.530362232757248e-06, "loss": 0.4292, "step": 2496 }, { "epoch": 1.382741116751269, "grad_norm": 0.3289237916469574, "learning_rate": 6.5272931461700305e-06, "loss": 0.4459, "step": 2497 }, { "epoch": 1.3832948777111214, "grad_norm": 0.33103692531585693, "learning_rate": 6.524223424793932e-06, "loss": 0.4378, "step": 2498 }, { "epoch": 1.3838486386709736, "grad_norm": 0.3253140151500702, "learning_rate": 6.5211530699048246e-06, "loss": 0.4205, "step": 2499 }, { "epoch": 1.384402399630826, "grad_norm": 0.3666572868824005, "learning_rate": 6.518082082778834e-06, "loss": 0.4618, "step": 2500 }, { "epoch": 1.3849561605906784, "grad_norm": 0.31874534487724304, "learning_rate": 6.51501046469236e-06, "loss": 0.4236, "step": 2501 }, { "epoch": 1.3855099215505307, "grad_norm": 0.3129189610481262, "learning_rate": 6.511938216922055e-06, "loss": 0.4409, "step": 2502 }, { "epoch": 1.386063682510383, "grad_norm": 0.32889115810394287, "learning_rate": 6.508865340744841e-06, "loss": 0.4387, "step": 2503 }, { "epoch": 1.3866174434702354, "grad_norm": 0.359159380197525, "learning_rate": 6.505791837437896e-06, "loss": 0.4119, "step": 2504 }, { "epoch": 1.3871712044300877, "grad_norm": 0.30356934666633606, "learning_rate": 6.502717708278659e-06, "loss": 0.4164, "step": 2505 }, { "epoch": 1.38772496538994, "grad_norm": 0.31647035479545593, "learning_rate": 6.499642954544833e-06, "loss": 0.4088, "step": 2506 }, { "epoch": 1.3882787263497924, "grad_norm": 0.33557572960853577, "learning_rate": 6.496567577514375e-06, "loss": 0.4378, "step": 2507 }, { "epoch": 1.3888324873096447, "grad_norm": 0.34858283400535583, "learning_rate": 6.493491578465508e-06, "loss": 0.4205, "step": 2508 }, { "epoch": 1.389386248269497, "grad_norm": 0.3005806505680084, "learning_rate": 6.490414958676705e-06, "loss": 0.4009, "step": 2509 }, { "epoch": 1.3899400092293495, "grad_norm": 0.3244060277938843, "learning_rate": 6.487337719426704e-06, "loss": 0.4531, "step": 2510 }, { "epoch": 1.3904937701892017, "grad_norm": 0.3260844349861145, "learning_rate": 6.484259861994497e-06, "loss": 0.4466, "step": 2511 }, { "epoch": 1.391047531149054, "grad_norm": 0.3843885362148285, "learning_rate": 6.481181387659337e-06, "loss": 0.4393, "step": 2512 }, { "epoch": 1.3916012921089063, "grad_norm": 0.29992932081222534, "learning_rate": 6.478102297700726e-06, "loss": 0.4069, "step": 2513 }, { "epoch": 1.3921550530687585, "grad_norm": 0.3998725414276123, "learning_rate": 6.475022593398429e-06, "loss": 0.4939, "step": 2514 }, { "epoch": 1.392708814028611, "grad_norm": 0.32673683762550354, "learning_rate": 6.471942276032462e-06, "loss": 0.4035, "step": 2515 }, { "epoch": 1.3932625749884633, "grad_norm": 0.3447602391242981, "learning_rate": 6.4688613468831e-06, "loss": 0.437, "step": 2516 }, { "epoch": 1.3938163359483156, "grad_norm": 0.36924198269844055, "learning_rate": 6.465779807230865e-06, "loss": 0.4286, "step": 2517 }, { "epoch": 1.394370096908168, "grad_norm": 0.3466911017894745, "learning_rate": 6.462697658356543e-06, "loss": 0.4324, "step": 2518 }, { "epoch": 1.3949238578680203, "grad_norm": 0.32316070795059204, "learning_rate": 6.459614901541162e-06, "loss": 0.4157, "step": 2519 }, { "epoch": 1.3954776188278726, "grad_norm": 0.32182133197784424, "learning_rate": 6.456531538066013e-06, "loss": 0.3989, "step": 2520 }, { "epoch": 1.396031379787725, "grad_norm": 0.3637009561061859, "learning_rate": 6.4534475692126315e-06, "loss": 0.4552, "step": 2521 }, { "epoch": 1.3965851407475773, "grad_norm": 0.3309849798679352, "learning_rate": 6.450362996262809e-06, "loss": 0.4122, "step": 2522 }, { "epoch": 1.3971389017074296, "grad_norm": 0.3493950664997101, "learning_rate": 6.447277820498586e-06, "loss": 0.4188, "step": 2523 }, { "epoch": 1.397692662667282, "grad_norm": 0.3293689787387848, "learning_rate": 6.444192043202253e-06, "loss": 0.4436, "step": 2524 }, { "epoch": 1.3982464236271344, "grad_norm": 0.30544590950012207, "learning_rate": 6.441105665656355e-06, "loss": 0.4244, "step": 2525 }, { "epoch": 1.3988001845869866, "grad_norm": 0.33151954412460327, "learning_rate": 6.438018689143679e-06, "loss": 0.4644, "step": 2526 }, { "epoch": 1.399353945546839, "grad_norm": 0.3273170292377472, "learning_rate": 6.434931114947268e-06, "loss": 0.421, "step": 2527 }, { "epoch": 1.3999077065066912, "grad_norm": 0.3378547132015228, "learning_rate": 6.4318429443504115e-06, "loss": 0.4625, "step": 2528 }, { "epoch": 1.4004614674665437, "grad_norm": 0.3092031478881836, "learning_rate": 6.428754178636642e-06, "loss": 0.3958, "step": 2529 }, { "epoch": 1.401015228426396, "grad_norm": 0.32259392738342285, "learning_rate": 6.425664819089746e-06, "loss": 0.4426, "step": 2530 }, { "epoch": 1.4015689893862482, "grad_norm": 0.3200598359107971, "learning_rate": 6.422574866993753e-06, "loss": 0.4168, "step": 2531 }, { "epoch": 1.4021227503461007, "grad_norm": 0.31189650297164917, "learning_rate": 6.419484323632942e-06, "loss": 0.4359, "step": 2532 }, { "epoch": 1.402676511305953, "grad_norm": 0.3360437750816345, "learning_rate": 6.416393190291832e-06, "loss": 0.4423, "step": 2533 }, { "epoch": 1.4032302722658052, "grad_norm": 0.28385844826698303, "learning_rate": 6.413301468255193e-06, "loss": 0.4128, "step": 2534 }, { "epoch": 1.4037840332256577, "grad_norm": 0.31652164459228516, "learning_rate": 6.4102091588080385e-06, "loss": 0.424, "step": 2535 }, { "epoch": 1.40433779418551, "grad_norm": 0.3261098861694336, "learning_rate": 6.407116263235622e-06, "loss": 0.4674, "step": 2536 }, { "epoch": 1.4048915551453622, "grad_norm": 0.31049200892448425, "learning_rate": 6.404022782823447e-06, "loss": 0.4019, "step": 2537 }, { "epoch": 1.4054453161052145, "grad_norm": 0.3274073898792267, "learning_rate": 6.4009287188572534e-06, "loss": 0.4258, "step": 2538 }, { "epoch": 1.405999077065067, "grad_norm": 0.3241378366947174, "learning_rate": 6.397834072623032e-06, "loss": 0.4606, "step": 2539 }, { "epoch": 1.4065528380249193, "grad_norm": 0.34082674980163574, "learning_rate": 6.394738845407006e-06, "loss": 0.4429, "step": 2540 }, { "epoch": 1.4071065989847715, "grad_norm": 0.32892662286758423, "learning_rate": 6.391643038495646e-06, "loss": 0.4174, "step": 2541 }, { "epoch": 1.4076603599446238, "grad_norm": 0.3684942126274109, "learning_rate": 6.388546653175663e-06, "loss": 0.4738, "step": 2542 }, { "epoch": 1.4082141209044763, "grad_norm": 0.3162696063518524, "learning_rate": 6.3854496907340095e-06, "loss": 0.4067, "step": 2543 }, { "epoch": 1.4087678818643286, "grad_norm": 0.37218889594078064, "learning_rate": 6.3823521524578714e-06, "loss": 0.4703, "step": 2544 }, { "epoch": 1.4093216428241808, "grad_norm": 0.33042797446250916, "learning_rate": 6.3792540396346825e-06, "loss": 0.4104, "step": 2545 }, { "epoch": 1.4098754037840333, "grad_norm": 0.31874728202819824, "learning_rate": 6.376155353552111e-06, "loss": 0.457, "step": 2546 }, { "epoch": 1.4104291647438856, "grad_norm": 0.3244047462940216, "learning_rate": 6.373056095498061e-06, "loss": 0.4615, "step": 2547 }, { "epoch": 1.4109829257037378, "grad_norm": 0.30273687839508057, "learning_rate": 6.369956266760681e-06, "loss": 0.3963, "step": 2548 }, { "epoch": 1.4115366866635903, "grad_norm": 0.30199751257896423, "learning_rate": 6.36685586862835e-06, "loss": 0.4546, "step": 2549 }, { "epoch": 1.4120904476234426, "grad_norm": 0.28718239068984985, "learning_rate": 6.3637549023896886e-06, "loss": 0.4233, "step": 2550 }, { "epoch": 1.4126442085832949, "grad_norm": 0.3684097230434418, "learning_rate": 6.360653369333549e-06, "loss": 0.4635, "step": 2551 }, { "epoch": 1.4131979695431471, "grad_norm": 0.29726868867874146, "learning_rate": 6.357551270749025e-06, "loss": 0.4138, "step": 2552 }, { "epoch": 1.4137517305029994, "grad_norm": 0.3271128535270691, "learning_rate": 6.354448607925439e-06, "loss": 0.44, "step": 2553 }, { "epoch": 1.414305491462852, "grad_norm": 0.31306126713752747, "learning_rate": 6.351345382152352e-06, "loss": 0.4298, "step": 2554 }, { "epoch": 1.4148592524227042, "grad_norm": 0.28567296266555786, "learning_rate": 6.3482415947195566e-06, "loss": 0.3955, "step": 2555 }, { "epoch": 1.4154130133825564, "grad_norm": 0.3271925747394562, "learning_rate": 6.345137246917081e-06, "loss": 0.4707, "step": 2556 }, { "epoch": 1.415966774342409, "grad_norm": 0.2949606478214264, "learning_rate": 6.342032340035185e-06, "loss": 0.3935, "step": 2557 }, { "epoch": 1.4165205353022612, "grad_norm": 0.32326123118400574, "learning_rate": 6.338926875364362e-06, "loss": 0.4629, "step": 2558 }, { "epoch": 1.4170742962621135, "grad_norm": 0.3351379930973053, "learning_rate": 6.335820854195337e-06, "loss": 0.4158, "step": 2559 }, { "epoch": 1.417628057221966, "grad_norm": 0.313466876745224, "learning_rate": 6.332714277819062e-06, "loss": 0.3884, "step": 2560 }, { "epoch": 1.4181818181818182, "grad_norm": 0.43750908970832825, "learning_rate": 6.3296071475267276e-06, "loss": 0.4601, "step": 2561 }, { "epoch": 1.4187355791416705, "grad_norm": 0.30761653184890747, "learning_rate": 6.326499464609747e-06, "loss": 0.4012, "step": 2562 }, { "epoch": 1.419289340101523, "grad_norm": 0.37385696172714233, "learning_rate": 6.32339123035977e-06, "loss": 0.453, "step": 2563 }, { "epoch": 1.4198431010613752, "grad_norm": 0.38181769847869873, "learning_rate": 6.3202824460686685e-06, "loss": 0.3907, "step": 2564 }, { "epoch": 1.4203968620212275, "grad_norm": 0.35578569769859314, "learning_rate": 6.317173113028549e-06, "loss": 0.4134, "step": 2565 }, { "epoch": 1.4209506229810798, "grad_norm": 0.33540859818458557, "learning_rate": 6.314063232531742e-06, "loss": 0.4418, "step": 2566 }, { "epoch": 1.421504383940932, "grad_norm": 0.3419015109539032, "learning_rate": 6.310952805870807e-06, "loss": 0.4237, "step": 2567 }, { "epoch": 1.4220581449007845, "grad_norm": 0.37837088108062744, "learning_rate": 6.3078418343385336e-06, "loss": 0.4631, "step": 2568 }, { "epoch": 1.4226119058606368, "grad_norm": 0.3365512490272522, "learning_rate": 6.304730319227932e-06, "loss": 0.4216, "step": 2569 }, { "epoch": 1.423165666820489, "grad_norm": 0.3822040855884552, "learning_rate": 6.301618261832244e-06, "loss": 0.4521, "step": 2570 }, { "epoch": 1.4237194277803416, "grad_norm": 0.35111767053604126, "learning_rate": 6.298505663444932e-06, "loss": 0.4141, "step": 2571 }, { "epoch": 1.4242731887401938, "grad_norm": 0.3181440532207489, "learning_rate": 6.295392525359685e-06, "loss": 0.4384, "step": 2572 }, { "epoch": 1.424826949700046, "grad_norm": 0.3754483759403229, "learning_rate": 6.292278848870418e-06, "loss": 0.4187, "step": 2573 }, { "epoch": 1.4253807106598986, "grad_norm": 0.386976957321167, "learning_rate": 6.289164635271268e-06, "loss": 0.4279, "step": 2574 }, { "epoch": 1.4259344716197508, "grad_norm": 0.3839198350906372, "learning_rate": 6.286049885856594e-06, "loss": 0.4928, "step": 2575 }, { "epoch": 1.426488232579603, "grad_norm": 0.29228079319000244, "learning_rate": 6.282934601920982e-06, "loss": 0.4272, "step": 2576 }, { "epoch": 1.4270419935394556, "grad_norm": 0.35197916626930237, "learning_rate": 6.279818784759236e-06, "loss": 0.4259, "step": 2577 }, { "epoch": 1.4275957544993079, "grad_norm": 0.35543590784072876, "learning_rate": 6.2767024356663846e-06, "loss": 0.4355, "step": 2578 }, { "epoch": 1.4281495154591601, "grad_norm": 0.319234162569046, "learning_rate": 6.273585555937674e-06, "loss": 0.4211, "step": 2579 }, { "epoch": 1.4287032764190124, "grad_norm": 0.3364875018596649, "learning_rate": 6.270468146868574e-06, "loss": 0.4377, "step": 2580 }, { "epoch": 1.4292570373788647, "grad_norm": 0.3575862646102905, "learning_rate": 6.267350209754774e-06, "loss": 0.4271, "step": 2581 }, { "epoch": 1.4298107983387172, "grad_norm": 0.3749852478504181, "learning_rate": 6.264231745892181e-06, "loss": 0.437, "step": 2582 }, { "epoch": 1.4303645592985694, "grad_norm": 0.34788331389427185, "learning_rate": 6.261112756576923e-06, "loss": 0.4356, "step": 2583 }, { "epoch": 1.4309183202584217, "grad_norm": 0.2944987416267395, "learning_rate": 6.257993243105345e-06, "loss": 0.3979, "step": 2584 }, { "epoch": 1.4314720812182742, "grad_norm": 0.4024405777454376, "learning_rate": 6.2548732067740115e-06, "loss": 0.4969, "step": 2585 }, { "epoch": 1.4320258421781265, "grad_norm": 0.287917822599411, "learning_rate": 6.251752648879702e-06, "loss": 0.3838, "step": 2586 }, { "epoch": 1.4325796031379787, "grad_norm": 0.35818958282470703, "learning_rate": 6.2486315707194145e-06, "loss": 0.4352, "step": 2587 }, { "epoch": 1.4331333640978312, "grad_norm": 0.3300982415676117, "learning_rate": 6.2455099735903636e-06, "loss": 0.4436, "step": 2588 }, { "epoch": 1.4336871250576835, "grad_norm": 0.3586180508136749, "learning_rate": 6.242387858789978e-06, "loss": 0.4393, "step": 2589 }, { "epoch": 1.4342408860175357, "grad_norm": 0.297576367855072, "learning_rate": 6.239265227615903e-06, "loss": 0.4071, "step": 2590 }, { "epoch": 1.4347946469773882, "grad_norm": 0.3493654131889343, "learning_rate": 6.236142081365995e-06, "loss": 0.4069, "step": 2591 }, { "epoch": 1.4353484079372405, "grad_norm": 0.3192264139652252, "learning_rate": 6.233018421338333e-06, "loss": 0.4285, "step": 2592 }, { "epoch": 1.4359021688970928, "grad_norm": 0.34763967990875244, "learning_rate": 6.229894248831197e-06, "loss": 0.4535, "step": 2593 }, { "epoch": 1.436455929856945, "grad_norm": 0.32830289006233215, "learning_rate": 6.226769565143093e-06, "loss": 0.3955, "step": 2594 }, { "epoch": 1.4370096908167973, "grad_norm": 0.34520068764686584, "learning_rate": 6.22364437157273e-06, "loss": 0.4753, "step": 2595 }, { "epoch": 1.4375634517766498, "grad_norm": 0.30303728580474854, "learning_rate": 6.220518669419032e-06, "loss": 0.4349, "step": 2596 }, { "epoch": 1.438117212736502, "grad_norm": 0.33733823895454407, "learning_rate": 6.2173924599811376e-06, "loss": 0.4264, "step": 2597 }, { "epoch": 1.4386709736963543, "grad_norm": 0.32009097933769226, "learning_rate": 6.214265744558389e-06, "loss": 0.4209, "step": 2598 }, { "epoch": 1.4392247346562068, "grad_norm": 0.32253915071487427, "learning_rate": 6.211138524450347e-06, "loss": 0.4361, "step": 2599 }, { "epoch": 1.439778495616059, "grad_norm": 0.31976261734962463, "learning_rate": 6.208010800956775e-06, "loss": 0.4347, "step": 2600 }, { "epoch": 1.4403322565759114, "grad_norm": 0.2875135838985443, "learning_rate": 6.204882575377652e-06, "loss": 0.4077, "step": 2601 }, { "epoch": 1.4408860175357638, "grad_norm": 0.3368063271045685, "learning_rate": 6.20175384901316e-06, "loss": 0.4483, "step": 2602 }, { "epoch": 1.441439778495616, "grad_norm": 0.2969508469104767, "learning_rate": 6.198624623163693e-06, "loss": 0.3813, "step": 2603 }, { "epoch": 1.4419935394554684, "grad_norm": 0.31974077224731445, "learning_rate": 6.195494899129849e-06, "loss": 0.4573, "step": 2604 }, { "epoch": 1.4425473004153206, "grad_norm": 0.29332393407821655, "learning_rate": 6.192364678212437e-06, "loss": 0.4198, "step": 2605 }, { "epoch": 1.4431010613751731, "grad_norm": 0.32631054520606995, "learning_rate": 6.1892339617124706e-06, "loss": 0.4368, "step": 2606 }, { "epoch": 1.4436548223350254, "grad_norm": 0.31280317902565, "learning_rate": 6.186102750931171e-06, "loss": 0.4409, "step": 2607 }, { "epoch": 1.4442085832948777, "grad_norm": 0.3267487585544586, "learning_rate": 6.18297104716996e-06, "loss": 0.4576, "step": 2608 }, { "epoch": 1.44476234425473, "grad_norm": 0.2891599237918854, "learning_rate": 6.179838851730471e-06, "loss": 0.4176, "step": 2609 }, { "epoch": 1.4453161052145824, "grad_norm": 0.347728967666626, "learning_rate": 6.176706165914536e-06, "loss": 0.4543, "step": 2610 }, { "epoch": 1.4458698661744347, "grad_norm": 0.39356422424316406, "learning_rate": 6.173572991024195e-06, "loss": 0.4749, "step": 2611 }, { "epoch": 1.446423627134287, "grad_norm": 0.32681775093078613, "learning_rate": 6.170439328361688e-06, "loss": 0.4361, "step": 2612 }, { "epoch": 1.4469773880941394, "grad_norm": 0.3007923364639282, "learning_rate": 6.167305179229462e-06, "loss": 0.4131, "step": 2613 }, { "epoch": 1.4475311490539917, "grad_norm": 0.33451879024505615, "learning_rate": 6.164170544930159e-06, "loss": 0.4307, "step": 2614 }, { "epoch": 1.448084910013844, "grad_norm": 0.30693843960762024, "learning_rate": 6.161035426766632e-06, "loss": 0.4129, "step": 2615 }, { "epoch": 1.4486386709736965, "grad_norm": 0.3137039542198181, "learning_rate": 6.157899826041926e-06, "loss": 0.4135, "step": 2616 }, { "epoch": 1.4491924319335487, "grad_norm": 0.30646172165870667, "learning_rate": 6.1547637440592945e-06, "loss": 0.4317, "step": 2617 }, { "epoch": 1.449746192893401, "grad_norm": 0.38222214579582214, "learning_rate": 6.151627182122184e-06, "loss": 0.4519, "step": 2618 }, { "epoch": 1.4502999538532533, "grad_norm": 0.284368634223938, "learning_rate": 6.148490141534247e-06, "loss": 0.3816, "step": 2619 }, { "epoch": 1.4508537148131055, "grad_norm": 0.29967162013053894, "learning_rate": 6.145352623599329e-06, "loss": 0.403, "step": 2620 }, { "epoch": 1.451407475772958, "grad_norm": 0.35811084508895874, "learning_rate": 6.142214629621479e-06, "loss": 0.4439, "step": 2621 }, { "epoch": 1.4519612367328103, "grad_norm": 0.3156915605068207, "learning_rate": 6.139076160904938e-06, "loss": 0.4702, "step": 2622 }, { "epoch": 1.4525149976926626, "grad_norm": 0.31403544545173645, "learning_rate": 6.135937218754151e-06, "loss": 0.4029, "step": 2623 }, { "epoch": 1.453068758652515, "grad_norm": 0.3351687788963318, "learning_rate": 6.132797804473756e-06, "loss": 0.423, "step": 2624 }, { "epoch": 1.4536225196123673, "grad_norm": 0.29761064052581787, "learning_rate": 6.129657919368587e-06, "loss": 0.4615, "step": 2625 }, { "epoch": 1.4541762805722196, "grad_norm": 0.32901668548583984, "learning_rate": 6.126517564743676e-06, "loss": 0.4211, "step": 2626 }, { "epoch": 1.454730041532072, "grad_norm": 0.38678643107414246, "learning_rate": 6.123376741904246e-06, "loss": 0.4282, "step": 2627 }, { "epoch": 1.4552838024919243, "grad_norm": 0.34847506880760193, "learning_rate": 6.120235452155721e-06, "loss": 0.4876, "step": 2628 }, { "epoch": 1.4558375634517766, "grad_norm": 0.3076949119567871, "learning_rate": 6.117093696803711e-06, "loss": 0.3987, "step": 2629 }, { "epoch": 1.456391324411629, "grad_norm": 0.33392348885536194, "learning_rate": 6.113951477154027e-06, "loss": 0.3982, "step": 2630 }, { "epoch": 1.4569450853714814, "grad_norm": 0.40024158358573914, "learning_rate": 6.110808794512668e-06, "loss": 0.4462, "step": 2631 }, { "epoch": 1.4574988463313336, "grad_norm": 0.39043939113616943, "learning_rate": 6.10766565018583e-06, "loss": 0.4378, "step": 2632 }, { "epoch": 1.458052607291186, "grad_norm": 0.3149799406528473, "learning_rate": 6.104522045479895e-06, "loss": 0.4249, "step": 2633 }, { "epoch": 1.4586063682510382, "grad_norm": 0.42111220955848694, "learning_rate": 6.101377981701439e-06, "loss": 0.4384, "step": 2634 }, { "epoch": 1.4591601292108907, "grad_norm": 0.3694205582141876, "learning_rate": 6.098233460157233e-06, "loss": 0.4357, "step": 2635 }, { "epoch": 1.459713890170743, "grad_norm": 0.3308403789997101, "learning_rate": 6.095088482154232e-06, "loss": 0.4363, "step": 2636 }, { "epoch": 1.4602676511305952, "grad_norm": 0.3247449994087219, "learning_rate": 6.091943048999585e-06, "loss": 0.4395, "step": 2637 }, { "epoch": 1.4608214120904477, "grad_norm": 0.33515721559524536, "learning_rate": 6.088797162000626e-06, "loss": 0.4436, "step": 2638 }, { "epoch": 1.4613751730503, "grad_norm": 0.32418736815452576, "learning_rate": 6.085650822464881e-06, "loss": 0.4242, "step": 2639 }, { "epoch": 1.4619289340101522, "grad_norm": 0.316387414932251, "learning_rate": 6.082504031700065e-06, "loss": 0.4587, "step": 2640 }, { "epoch": 1.4624826949700047, "grad_norm": 0.27871066331863403, "learning_rate": 6.079356791014076e-06, "loss": 0.4114, "step": 2641 }, { "epoch": 1.463036455929857, "grad_norm": 0.30785104632377625, "learning_rate": 6.076209101715004e-06, "loss": 0.4376, "step": 2642 }, { "epoch": 1.4635902168897093, "grad_norm": 0.2993660271167755, "learning_rate": 6.073060965111121e-06, "loss": 0.4034, "step": 2643 }, { "epoch": 1.4641439778495617, "grad_norm": 0.3093186914920807, "learning_rate": 6.069912382510887e-06, "loss": 0.3931, "step": 2644 }, { "epoch": 1.464697738809414, "grad_norm": 0.32682642340660095, "learning_rate": 6.066763355222951e-06, "loss": 0.4412, "step": 2645 }, { "epoch": 1.4652514997692663, "grad_norm": 0.3133857548236847, "learning_rate": 6.06361388455614e-06, "loss": 0.4225, "step": 2646 }, { "epoch": 1.4658052607291185, "grad_norm": 0.30962657928466797, "learning_rate": 6.060463971819469e-06, "loss": 0.4561, "step": 2647 }, { "epoch": 1.4663590216889708, "grad_norm": 0.30626288056373596, "learning_rate": 6.057313618322137e-06, "loss": 0.4589, "step": 2648 }, { "epoch": 1.4669127826488233, "grad_norm": 0.276400089263916, "learning_rate": 6.054162825373525e-06, "loss": 0.4044, "step": 2649 }, { "epoch": 1.4674665436086756, "grad_norm": 0.30720433592796326, "learning_rate": 6.051011594283199e-06, "loss": 0.4512, "step": 2650 }, { "epoch": 1.4680203045685278, "grad_norm": 0.31797224283218384, "learning_rate": 6.0478599263609e-06, "loss": 0.402, "step": 2651 }, { "epoch": 1.4685740655283803, "grad_norm": 0.30381834506988525, "learning_rate": 6.044707822916562e-06, "loss": 0.4263, "step": 2652 }, { "epoch": 1.4691278264882326, "grad_norm": 0.3469447195529938, "learning_rate": 6.041555285260291e-06, "loss": 0.4892, "step": 2653 }, { "epoch": 1.4696815874480849, "grad_norm": 0.3148252069950104, "learning_rate": 6.038402314702376e-06, "loss": 0.4139, "step": 2654 }, { "epoch": 1.4702353484079373, "grad_norm": 0.3047473430633545, "learning_rate": 6.035248912553288e-06, "loss": 0.3963, "step": 2655 }, { "epoch": 1.4707891093677896, "grad_norm": 0.3407709002494812, "learning_rate": 6.032095080123672e-06, "loss": 0.4739, "step": 2656 }, { "epoch": 1.4713428703276419, "grad_norm": 0.33790117502212524, "learning_rate": 6.02894081872436e-06, "loss": 0.4221, "step": 2657 }, { "epoch": 1.4718966312874942, "grad_norm": 0.3257625699043274, "learning_rate": 6.025786129666353e-06, "loss": 0.4283, "step": 2658 }, { "epoch": 1.4724503922473466, "grad_norm": 0.33915627002716064, "learning_rate": 6.02263101426084e-06, "loss": 0.4516, "step": 2659 }, { "epoch": 1.473004153207199, "grad_norm": 0.30072516202926636, "learning_rate": 6.0194754738191766e-06, "loss": 0.4547, "step": 2660 }, { "epoch": 1.4735579141670512, "grad_norm": 0.33295944333076477, "learning_rate": 6.016319509652903e-06, "loss": 0.4063, "step": 2661 }, { "epoch": 1.4741116751269034, "grad_norm": 0.3823614716529846, "learning_rate": 6.01316312307373e-06, "loss": 0.4629, "step": 2662 }, { "epoch": 1.474665436086756, "grad_norm": 0.3311207890510559, "learning_rate": 6.0100063153935495e-06, "loss": 0.4371, "step": 2663 }, { "epoch": 1.4752191970466082, "grad_norm": 0.34634414315223694, "learning_rate": 6.006849087924426e-06, "loss": 0.4252, "step": 2664 }, { "epoch": 1.4757729580064605, "grad_norm": 0.3334685266017914, "learning_rate": 6.003691441978593e-06, "loss": 0.4202, "step": 2665 }, { "epoch": 1.476326718966313, "grad_norm": 0.29860877990722656, "learning_rate": 6.00053337886847e-06, "loss": 0.4146, "step": 2666 }, { "epoch": 1.4768804799261652, "grad_norm": 0.3422168493270874, "learning_rate": 5.997374899906636e-06, "loss": 0.4318, "step": 2667 }, { "epoch": 1.4774342408860175, "grad_norm": 0.3233844041824341, "learning_rate": 5.994216006405857e-06, "loss": 0.4196, "step": 2668 }, { "epoch": 1.47798800184587, "grad_norm": 0.29278749227523804, "learning_rate": 5.991056699679057e-06, "loss": 0.46, "step": 2669 }, { "epoch": 1.4785417628057222, "grad_norm": 0.31550881266593933, "learning_rate": 5.987896981039342e-06, "loss": 0.4228, "step": 2670 }, { "epoch": 1.4790955237655745, "grad_norm": 0.32655683159828186, "learning_rate": 5.984736851799985e-06, "loss": 0.4456, "step": 2671 }, { "epoch": 1.4796492847254268, "grad_norm": 0.3324959874153137, "learning_rate": 5.981576313274431e-06, "loss": 0.4383, "step": 2672 }, { "epoch": 1.480203045685279, "grad_norm": 0.36922261118888855, "learning_rate": 5.978415366776296e-06, "loss": 0.4609, "step": 2673 }, { "epoch": 1.4807568066451315, "grad_norm": 0.3314974308013916, "learning_rate": 5.9752540136193625e-06, "loss": 0.435, "step": 2674 }, { "epoch": 1.4813105676049838, "grad_norm": 0.2833001911640167, "learning_rate": 5.972092255117584e-06, "loss": 0.4378, "step": 2675 }, { "epoch": 1.481864328564836, "grad_norm": 0.3682185113430023, "learning_rate": 5.968930092585082e-06, "loss": 0.3941, "step": 2676 }, { "epoch": 1.4824180895246886, "grad_norm": 0.3708493411540985, "learning_rate": 5.965767527336149e-06, "loss": 0.4598, "step": 2677 }, { "epoch": 1.4829718504845408, "grad_norm": 0.3070976436138153, "learning_rate": 5.962604560685238e-06, "loss": 0.3896, "step": 2678 }, { "epoch": 1.483525611444393, "grad_norm": 0.36592036485671997, "learning_rate": 5.959441193946974e-06, "loss": 0.4045, "step": 2679 }, { "epoch": 1.4840793724042456, "grad_norm": 0.36291345953941345, "learning_rate": 5.956277428436149e-06, "loss": 0.4628, "step": 2680 }, { "epoch": 1.4846331333640979, "grad_norm": 0.3147815763950348, "learning_rate": 5.953113265467718e-06, "loss": 0.4052, "step": 2681 }, { "epoch": 1.4851868943239501, "grad_norm": 0.3820616900920868, "learning_rate": 5.949948706356801e-06, "loss": 0.3937, "step": 2682 }, { "epoch": 1.4857406552838026, "grad_norm": 0.3405337333679199, "learning_rate": 5.946783752418686e-06, "loss": 0.4449, "step": 2683 }, { "epoch": 1.4862944162436549, "grad_norm": 0.3361853063106537, "learning_rate": 5.943618404968821e-06, "loss": 0.3966, "step": 2684 }, { "epoch": 1.4868481772035071, "grad_norm": 0.3627251088619232, "learning_rate": 5.940452665322819e-06, "loss": 0.4376, "step": 2685 }, { "epoch": 1.4874019381633594, "grad_norm": 0.35615628957748413, "learning_rate": 5.937286534796458e-06, "loss": 0.4189, "step": 2686 }, { "epoch": 1.4879556991232117, "grad_norm": 0.3978285789489746, "learning_rate": 5.9341200147056755e-06, "loss": 0.4858, "step": 2687 }, { "epoch": 1.4885094600830642, "grad_norm": 0.2944282591342926, "learning_rate": 5.930953106366575e-06, "loss": 0.4416, "step": 2688 }, { "epoch": 1.4890632210429164, "grad_norm": 0.3555610179901123, "learning_rate": 5.9277858110954146e-06, "loss": 0.4434, "step": 2689 }, { "epoch": 1.4896169820027687, "grad_norm": 0.3370499312877655, "learning_rate": 5.924618130208621e-06, "loss": 0.4169, "step": 2690 }, { "epoch": 1.4901707429626212, "grad_norm": 0.3279837369918823, "learning_rate": 5.921450065022775e-06, "loss": 0.4311, "step": 2691 }, { "epoch": 1.4907245039224735, "grad_norm": 0.28797027468681335, "learning_rate": 5.918281616854621e-06, "loss": 0.3973, "step": 2692 }, { "epoch": 1.4912782648823257, "grad_norm": 0.36229437589645386, "learning_rate": 5.915112787021061e-06, "loss": 0.4448, "step": 2693 }, { "epoch": 1.4918320258421782, "grad_norm": 0.29176804423332214, "learning_rate": 5.9119435768391545e-06, "loss": 0.3981, "step": 2694 }, { "epoch": 1.4923857868020305, "grad_norm": 0.29988425970077515, "learning_rate": 5.908773987626123e-06, "loss": 0.4384, "step": 2695 }, { "epoch": 1.4929395477618828, "grad_norm": 0.3486144542694092, "learning_rate": 5.905604020699338e-06, "loss": 0.418, "step": 2696 }, { "epoch": 1.4934933087217352, "grad_norm": 0.32017311453819275, "learning_rate": 5.90243367737634e-06, "loss": 0.4178, "step": 2697 }, { "epoch": 1.4940470696815875, "grad_norm": 0.3613751232624054, "learning_rate": 5.899262958974813e-06, "loss": 0.4245, "step": 2698 }, { "epoch": 1.4946008306414398, "grad_norm": 0.3524060845375061, "learning_rate": 5.896091866812605e-06, "loss": 0.4489, "step": 2699 }, { "epoch": 1.495154591601292, "grad_norm": 0.32281258702278137, "learning_rate": 5.892920402207718e-06, "loss": 0.4181, "step": 2700 }, { "epoch": 1.4957083525611443, "grad_norm": 0.3196570575237274, "learning_rate": 5.889748566478306e-06, "loss": 0.4042, "step": 2701 }, { "epoch": 1.4962621135209968, "grad_norm": 0.32822033762931824, "learning_rate": 5.886576360942679e-06, "loss": 0.4353, "step": 2702 }, { "epoch": 1.496815874480849, "grad_norm": 0.3138622045516968, "learning_rate": 5.883403786919303e-06, "loss": 0.431, "step": 2703 }, { "epoch": 1.4973696354407013, "grad_norm": 0.2988635301589966, "learning_rate": 5.880230845726794e-06, "loss": 0.4308, "step": 2704 }, { "epoch": 1.4979233964005538, "grad_norm": 0.2773298919200897, "learning_rate": 5.8770575386839214e-06, "loss": 0.3917, "step": 2705 }, { "epoch": 1.498477157360406, "grad_norm": 0.30790722370147705, "learning_rate": 5.873883867109607e-06, "loss": 0.4482, "step": 2706 }, { "epoch": 1.4990309183202584, "grad_norm": 0.3097562789916992, "learning_rate": 5.8707098323229225e-06, "loss": 0.415, "step": 2707 }, { "epoch": 1.4995846792801109, "grad_norm": 0.30279192328453064, "learning_rate": 5.8675354356430946e-06, "loss": 0.422, "step": 2708 }, { "epoch": 1.5001384402399631, "grad_norm": 0.2950608730316162, "learning_rate": 5.864360678389497e-06, "loss": 0.4436, "step": 2709 }, { "epoch": 1.5006922011998154, "grad_norm": 0.34209680557250977, "learning_rate": 5.861185561881654e-06, "loss": 0.4041, "step": 2710 }, { "epoch": 1.5012459621596679, "grad_norm": 0.31726282835006714, "learning_rate": 5.858010087439238e-06, "loss": 0.4139, "step": 2711 }, { "epoch": 1.50179972311952, "grad_norm": 0.33411160111427307, "learning_rate": 5.854834256382073e-06, "loss": 0.4388, "step": 2712 }, { "epoch": 1.5023534840793724, "grad_norm": 0.2921265959739685, "learning_rate": 5.851658070030128e-06, "loss": 0.4234, "step": 2713 }, { "epoch": 1.5029072450392247, "grad_norm": 0.3161417245864868, "learning_rate": 5.848481529703525e-06, "loss": 0.4102, "step": 2714 }, { "epoch": 1.503461005999077, "grad_norm": 0.3377859890460968, "learning_rate": 5.845304636722526e-06, "loss": 0.4185, "step": 2715 }, { "epoch": 1.5040147669589294, "grad_norm": 0.3003102242946625, "learning_rate": 5.842127392407545e-06, "loss": 0.4247, "step": 2716 }, { "epoch": 1.5045685279187817, "grad_norm": 0.32223373651504517, "learning_rate": 5.838949798079139e-06, "loss": 0.4123, "step": 2717 }, { "epoch": 1.505122288878634, "grad_norm": 0.35497307777404785, "learning_rate": 5.835771855058013e-06, "loss": 0.4542, "step": 2718 }, { "epoch": 1.5056760498384865, "grad_norm": 0.29814469814300537, "learning_rate": 5.832593564665015e-06, "loss": 0.4304, "step": 2719 }, { "epoch": 1.5062298107983387, "grad_norm": 0.3499521017074585, "learning_rate": 5.8294149282211344e-06, "loss": 0.4517, "step": 2720 }, { "epoch": 1.506783571758191, "grad_norm": 0.3246844410896301, "learning_rate": 5.826235947047514e-06, "loss": 0.4081, "step": 2721 }, { "epoch": 1.5073373327180435, "grad_norm": 0.3206155300140381, "learning_rate": 5.823056622465429e-06, "loss": 0.424, "step": 2722 }, { "epoch": 1.5078910936778958, "grad_norm": 0.31514543294906616, "learning_rate": 5.8198769557963045e-06, "loss": 0.441, "step": 2723 }, { "epoch": 1.508444854637748, "grad_norm": 0.3127477467060089, "learning_rate": 5.816696948361706e-06, "loss": 0.469, "step": 2724 }, { "epoch": 1.5089986155976005, "grad_norm": 0.30691930651664734, "learning_rate": 5.813516601483338e-06, "loss": 0.435, "step": 2725 }, { "epoch": 1.5095523765574526, "grad_norm": 0.342327356338501, "learning_rate": 5.81033591648305e-06, "loss": 0.4433, "step": 2726 }, { "epoch": 1.510106137517305, "grad_norm": 0.37519097328186035, "learning_rate": 5.807154894682827e-06, "loss": 0.4392, "step": 2727 }, { "epoch": 1.5106598984771573, "grad_norm": 0.31369689106941223, "learning_rate": 5.803973537404801e-06, "loss": 0.3964, "step": 2728 }, { "epoch": 1.5112136594370096, "grad_norm": 0.3682628870010376, "learning_rate": 5.800791845971236e-06, "loss": 0.4125, "step": 2729 }, { "epoch": 1.511767420396862, "grad_norm": 0.35376808047294617, "learning_rate": 5.797609821704544e-06, "loss": 0.4275, "step": 2730 }, { "epoch": 1.5123211813567143, "grad_norm": 0.34718337655067444, "learning_rate": 5.794427465927265e-06, "loss": 0.449, "step": 2731 }, { "epoch": 1.5128749423165666, "grad_norm": 0.29538220167160034, "learning_rate": 5.791244779962081e-06, "loss": 0.3979, "step": 2732 }, { "epoch": 1.513428703276419, "grad_norm": 0.3229120075702667, "learning_rate": 5.788061765131816e-06, "loss": 0.4308, "step": 2733 }, { "epoch": 1.5139824642362714, "grad_norm": 0.3237987756729126, "learning_rate": 5.784878422759421e-06, "loss": 0.4511, "step": 2734 }, { "epoch": 1.5145362251961236, "grad_norm": 0.313384473323822, "learning_rate": 5.781694754167996e-06, "loss": 0.4101, "step": 2735 }, { "epoch": 1.5150899861559761, "grad_norm": 0.3331104516983032, "learning_rate": 5.778510760680762e-06, "loss": 0.4367, "step": 2736 }, { "epoch": 1.5156437471158284, "grad_norm": 0.32301726937294006, "learning_rate": 5.775326443621088e-06, "loss": 0.3974, "step": 2737 }, { "epoch": 1.5161975080756807, "grad_norm": 0.3152177631855011, "learning_rate": 5.772141804312467e-06, "loss": 0.419, "step": 2738 }, { "epoch": 1.5167512690355331, "grad_norm": 0.31538647413253784, "learning_rate": 5.7689568440785326e-06, "loss": 0.4079, "step": 2739 }, { "epoch": 1.5173050299953852, "grad_norm": 0.3364804685115814, "learning_rate": 5.7657715642430516e-06, "loss": 0.4376, "step": 2740 }, { "epoch": 1.5178587909552377, "grad_norm": 0.3125019073486328, "learning_rate": 5.7625859661299186e-06, "loss": 0.433, "step": 2741 }, { "epoch": 1.51841255191509, "grad_norm": 0.3081597089767456, "learning_rate": 5.759400051063167e-06, "loss": 0.4208, "step": 2742 }, { "epoch": 1.5189663128749422, "grad_norm": 0.31530138850212097, "learning_rate": 5.756213820366957e-06, "loss": 0.4336, "step": 2743 }, { "epoch": 1.5195200738347947, "grad_norm": 0.28626275062561035, "learning_rate": 5.7530272753655826e-06, "loss": 0.3896, "step": 2744 }, { "epoch": 1.520073834794647, "grad_norm": 0.3092755377292633, "learning_rate": 5.749840417383466e-06, "loss": 0.4494, "step": 2745 }, { "epoch": 1.5206275957544992, "grad_norm": 0.33104971051216125, "learning_rate": 5.746653247745163e-06, "loss": 0.4315, "step": 2746 }, { "epoch": 1.5211813567143517, "grad_norm": 0.3360465466976166, "learning_rate": 5.7434657677753555e-06, "loss": 0.4458, "step": 2747 }, { "epoch": 1.521735117674204, "grad_norm": 0.3324883282184601, "learning_rate": 5.740277978798857e-06, "loss": 0.437, "step": 2748 }, { "epoch": 1.5222888786340563, "grad_norm": 0.316558837890625, "learning_rate": 5.7370898821406075e-06, "loss": 0.4074, "step": 2749 }, { "epoch": 1.5228426395939088, "grad_norm": 0.3624095320701599, "learning_rate": 5.733901479125676e-06, "loss": 0.4228, "step": 2750 }, { "epoch": 1.5233964005537608, "grad_norm": 0.3356234133243561, "learning_rate": 5.7307127710792584e-06, "loss": 0.4312, "step": 2751 }, { "epoch": 1.5239501615136133, "grad_norm": 0.3251594007015228, "learning_rate": 5.727523759326678e-06, "loss": 0.4347, "step": 2752 }, { "epoch": 1.5245039224734658, "grad_norm": 0.36349010467529297, "learning_rate": 5.724334445193383e-06, "loss": 0.4663, "step": 2753 }, { "epoch": 1.5250576834333178, "grad_norm": 0.3309013843536377, "learning_rate": 5.721144830004948e-06, "loss": 0.4172, "step": 2754 }, { "epoch": 1.5256114443931703, "grad_norm": 0.3465234935283661, "learning_rate": 5.717954915087074e-06, "loss": 0.46, "step": 2755 }, { "epoch": 1.5261652053530226, "grad_norm": 0.29549169540405273, "learning_rate": 5.714764701765583e-06, "loss": 0.4381, "step": 2756 }, { "epoch": 1.5267189663128748, "grad_norm": 0.3003445565700531, "learning_rate": 5.711574191366427e-06, "loss": 0.4229, "step": 2757 }, { "epoch": 1.5272727272727273, "grad_norm": 0.33258992433547974, "learning_rate": 5.7083833852156745e-06, "loss": 0.4131, "step": 2758 }, { "epoch": 1.5278264882325796, "grad_norm": 0.309030681848526, "learning_rate": 5.705192284639524e-06, "loss": 0.4338, "step": 2759 }, { "epoch": 1.5283802491924319, "grad_norm": 0.3111814558506012, "learning_rate": 5.702000890964286e-06, "loss": 0.4151, "step": 2760 }, { "epoch": 1.5289340101522844, "grad_norm": 0.3126777410507202, "learning_rate": 5.698809205516408e-06, "loss": 0.4375, "step": 2761 }, { "epoch": 1.5294877711121366, "grad_norm": 0.3037567138671875, "learning_rate": 5.695617229622445e-06, "loss": 0.4217, "step": 2762 }, { "epoch": 1.530041532071989, "grad_norm": 0.33300259709358215, "learning_rate": 5.692424964609079e-06, "loss": 0.4449, "step": 2763 }, { "epoch": 1.5305952930318414, "grad_norm": 0.3299334943294525, "learning_rate": 5.689232411803114e-06, "loss": 0.4308, "step": 2764 }, { "epoch": 1.5311490539916934, "grad_norm": 0.3158760964870453, "learning_rate": 5.686039572531464e-06, "loss": 0.4562, "step": 2765 }, { "epoch": 1.531702814951546, "grad_norm": 0.31375616788864136, "learning_rate": 5.6828464481211765e-06, "loss": 0.4222, "step": 2766 }, { "epoch": 1.5322565759113984, "grad_norm": 0.3386520445346832, "learning_rate": 5.6796530398994054e-06, "loss": 0.426, "step": 2767 }, { "epoch": 1.5328103368712505, "grad_norm": 0.3171537518501282, "learning_rate": 5.676459349193428e-06, "loss": 0.4432, "step": 2768 }, { "epoch": 1.533364097831103, "grad_norm": 0.2964668869972229, "learning_rate": 5.673265377330638e-06, "loss": 0.3981, "step": 2769 }, { "epoch": 1.5339178587909552, "grad_norm": 0.32675236463546753, "learning_rate": 5.6700711256385454e-06, "loss": 0.4941, "step": 2770 }, { "epoch": 1.5344716197508075, "grad_norm": 0.2639056444168091, "learning_rate": 5.666876595444779e-06, "loss": 0.3862, "step": 2771 }, { "epoch": 1.53502538071066, "grad_norm": 0.3315480351448059, "learning_rate": 5.663681788077079e-06, "loss": 0.4411, "step": 2772 }, { "epoch": 1.5355791416705122, "grad_norm": 0.31560197472572327, "learning_rate": 5.6604867048633065e-06, "loss": 0.4038, "step": 2773 }, { "epoch": 1.5361329026303645, "grad_norm": 0.31583961844444275, "learning_rate": 5.657291347131431e-06, "loss": 0.4385, "step": 2774 }, { "epoch": 1.536686663590217, "grad_norm": 0.31411445140838623, "learning_rate": 5.654095716209539e-06, "loss": 0.4033, "step": 2775 }, { "epoch": 1.5372404245500693, "grad_norm": 0.3310469388961792, "learning_rate": 5.650899813425832e-06, "loss": 0.4462, "step": 2776 }, { "epoch": 1.5377941855099215, "grad_norm": 0.294601172208786, "learning_rate": 5.647703640108624e-06, "loss": 0.4355, "step": 2777 }, { "epoch": 1.538347946469774, "grad_norm": 0.31340616941452026, "learning_rate": 5.644507197586339e-06, "loss": 0.4123, "step": 2778 }, { "epoch": 1.538901707429626, "grad_norm": 0.32719507813453674, "learning_rate": 5.641310487187515e-06, "loss": 0.4294, "step": 2779 }, { "epoch": 1.5394554683894786, "grad_norm": 0.31491100788116455, "learning_rate": 5.638113510240799e-06, "loss": 0.421, "step": 2780 }, { "epoch": 1.5400092293493308, "grad_norm": 0.2978060245513916, "learning_rate": 5.634916268074953e-06, "loss": 0.4082, "step": 2781 }, { "epoch": 1.540562990309183, "grad_norm": 0.33377280831336975, "learning_rate": 5.631718762018847e-06, "loss": 0.4301, "step": 2782 }, { "epoch": 1.5411167512690356, "grad_norm": 0.32655465602874756, "learning_rate": 5.62852099340146e-06, "loss": 0.4227, "step": 2783 }, { "epoch": 1.5416705122288878, "grad_norm": 0.3496456444263458, "learning_rate": 5.625322963551881e-06, "loss": 0.4596, "step": 2784 }, { "epoch": 1.5422242731887401, "grad_norm": 0.30011263489723206, "learning_rate": 5.622124673799304e-06, "loss": 0.4016, "step": 2785 }, { "epoch": 1.5427780341485926, "grad_norm": 0.34192612767219543, "learning_rate": 5.618926125473039e-06, "loss": 0.4374, "step": 2786 }, { "epoch": 1.5433317951084449, "grad_norm": 0.3180321156978607, "learning_rate": 5.615727319902494e-06, "loss": 0.4288, "step": 2787 }, { "epoch": 1.5438855560682971, "grad_norm": 0.3274809420108795, "learning_rate": 5.612528258417192e-06, "loss": 0.4451, "step": 2788 }, { "epoch": 1.5444393170281496, "grad_norm": 0.33374881744384766, "learning_rate": 5.609328942346759e-06, "loss": 0.4117, "step": 2789 }, { "epoch": 1.544993077988002, "grad_norm": 0.32345449924468994, "learning_rate": 5.606129373020924e-06, "loss": 0.4457, "step": 2790 }, { "epoch": 1.5455468389478542, "grad_norm": 0.3096688389778137, "learning_rate": 5.602929551769527e-06, "loss": 0.4093, "step": 2791 }, { "epoch": 1.5461005999077067, "grad_norm": 0.3487413823604584, "learning_rate": 5.5997294799225085e-06, "loss": 0.4545, "step": 2792 }, { "epoch": 1.5466543608675587, "grad_norm": 0.3440273106098175, "learning_rate": 5.596529158809917e-06, "loss": 0.4463, "step": 2793 }, { "epoch": 1.5472081218274112, "grad_norm": 0.3340478837490082, "learning_rate": 5.593328589761895e-06, "loss": 0.4621, "step": 2794 }, { "epoch": 1.5477618827872635, "grad_norm": 0.3218305706977844, "learning_rate": 5.590127774108703e-06, "loss": 0.426, "step": 2795 }, { "epoch": 1.5483156437471157, "grad_norm": 0.37199708819389343, "learning_rate": 5.586926713180691e-06, "loss": 0.4398, "step": 2796 }, { "epoch": 1.5488694047069682, "grad_norm": 0.3308813273906708, "learning_rate": 5.583725408308319e-06, "loss": 0.4294, "step": 2797 }, { "epoch": 1.5494231656668205, "grad_norm": 0.33532485365867615, "learning_rate": 5.5805238608221445e-06, "loss": 0.4185, "step": 2798 }, { "epoch": 1.5499769266266727, "grad_norm": 0.3216826915740967, "learning_rate": 5.577322072052826e-06, "loss": 0.4449, "step": 2799 }, { "epoch": 1.5505306875865252, "grad_norm": 0.35075145959854126, "learning_rate": 5.574120043331122e-06, "loss": 0.4499, "step": 2800 }, { "epoch": 1.5510844485463775, "grad_norm": 0.31588101387023926, "learning_rate": 5.570917775987893e-06, "loss": 0.3948, "step": 2801 }, { "epoch": 1.5516382095062298, "grad_norm": 0.3229123055934906, "learning_rate": 5.5677152713541e-06, "loss": 0.4312, "step": 2802 }, { "epoch": 1.5521919704660823, "grad_norm": 0.3578728437423706, "learning_rate": 5.564512530760795e-06, "loss": 0.427, "step": 2803 }, { "epoch": 1.5527457314259343, "grad_norm": 0.29605674743652344, "learning_rate": 5.561309555539139e-06, "loss": 0.4479, "step": 2804 }, { "epoch": 1.5532994923857868, "grad_norm": 0.28422442078590393, "learning_rate": 5.55810634702038e-06, "loss": 0.3977, "step": 2805 }, { "epoch": 1.5538532533456393, "grad_norm": 0.3924718201160431, "learning_rate": 5.554902906535869e-06, "loss": 0.4372, "step": 2806 }, { "epoch": 1.5544070143054913, "grad_norm": 0.32693642377853394, "learning_rate": 5.5516992354170515e-06, "loss": 0.4603, "step": 2807 }, { "epoch": 1.5549607752653438, "grad_norm": 0.32337966561317444, "learning_rate": 5.548495334995471e-06, "loss": 0.4549, "step": 2808 }, { "epoch": 1.555514536225196, "grad_norm": 0.32084667682647705, "learning_rate": 5.545291206602763e-06, "loss": 0.4076, "step": 2809 }, { "epoch": 1.5560682971850484, "grad_norm": 0.345134437084198, "learning_rate": 5.54208685157066e-06, "loss": 0.4334, "step": 2810 }, { "epoch": 1.5566220581449008, "grad_norm": 0.330228328704834, "learning_rate": 5.538882271230989e-06, "loss": 0.4142, "step": 2811 }, { "epoch": 1.5571758191047531, "grad_norm": 0.3554970324039459, "learning_rate": 5.5356774669156684e-06, "loss": 0.4406, "step": 2812 }, { "epoch": 1.5577295800646054, "grad_norm": 0.3283653259277344, "learning_rate": 5.532472439956713e-06, "loss": 0.406, "step": 2813 }, { "epoch": 1.5582833410244579, "grad_norm": 0.3483888804912567, "learning_rate": 5.529267191686226e-06, "loss": 0.4192, "step": 2814 }, { "epoch": 1.5588371019843101, "grad_norm": 0.32905253767967224, "learning_rate": 5.5260617234364054e-06, "loss": 0.4523, "step": 2815 }, { "epoch": 1.5593908629441624, "grad_norm": 0.3168792128562927, "learning_rate": 5.522856036539541e-06, "loss": 0.3927, "step": 2816 }, { "epoch": 1.559944623904015, "grad_norm": 0.2992810606956482, "learning_rate": 5.5196501323280125e-06, "loss": 0.3918, "step": 2817 }, { "epoch": 1.560498384863867, "grad_norm": 0.37109747529029846, "learning_rate": 5.516444012134289e-06, "loss": 0.4241, "step": 2818 }, { "epoch": 1.5610521458237194, "grad_norm": 0.3459615409374237, "learning_rate": 5.51323767729093e-06, "loss": 0.461, "step": 2819 }, { "epoch": 1.561605906783572, "grad_norm": 0.3277900218963623, "learning_rate": 5.5100311291305866e-06, "loss": 0.4343, "step": 2820 }, { "epoch": 1.562159667743424, "grad_norm": 0.3143216371536255, "learning_rate": 5.506824368985994e-06, "loss": 0.3893, "step": 2821 }, { "epoch": 1.5627134287032765, "grad_norm": 0.3696420192718506, "learning_rate": 5.50361739818998e-06, "loss": 0.4514, "step": 2822 }, { "epoch": 1.5632671896631287, "grad_norm": 0.32334423065185547, "learning_rate": 5.500410218075455e-06, "loss": 0.4174, "step": 2823 }, { "epoch": 1.563820950622981, "grad_norm": 0.34091439843177795, "learning_rate": 5.497202829975423e-06, "loss": 0.4518, "step": 2824 }, { "epoch": 1.5643747115828335, "grad_norm": 0.3007235527038574, "learning_rate": 5.493995235222969e-06, "loss": 0.3992, "step": 2825 }, { "epoch": 1.5649284725426857, "grad_norm": 0.34311848878860474, "learning_rate": 5.490787435151266e-06, "loss": 0.466, "step": 2826 }, { "epoch": 1.565482233502538, "grad_norm": 0.2790631651878357, "learning_rate": 5.487579431093569e-06, "loss": 0.388, "step": 2827 }, { "epoch": 1.5660359944623905, "grad_norm": 0.3287922441959381, "learning_rate": 5.484371224383226e-06, "loss": 0.4101, "step": 2828 }, { "epoch": 1.5665897554222428, "grad_norm": 0.31544893980026245, "learning_rate": 5.4811628163536624e-06, "loss": 0.421, "step": 2829 }, { "epoch": 1.567143516382095, "grad_norm": 0.33273860812187195, "learning_rate": 5.477954208338386e-06, "loss": 0.417, "step": 2830 }, { "epoch": 1.5676972773419475, "grad_norm": 0.3165159225463867, "learning_rate": 5.474745401670994e-06, "loss": 0.4216, "step": 2831 }, { "epoch": 1.5682510383017996, "grad_norm": 0.3508507311344147, "learning_rate": 5.47153639768516e-06, "loss": 0.454, "step": 2832 }, { "epoch": 1.568804799261652, "grad_norm": 0.3404361605644226, "learning_rate": 5.468327197714646e-06, "loss": 0.4266, "step": 2833 }, { "epoch": 1.5693585602215046, "grad_norm": 0.3223764896392822, "learning_rate": 5.465117803093287e-06, "loss": 0.427, "step": 2834 }, { "epoch": 1.5699123211813566, "grad_norm": 0.30647414922714233, "learning_rate": 5.46190821515501e-06, "loss": 0.372, "step": 2835 }, { "epoch": 1.570466082141209, "grad_norm": 0.3443816006183624, "learning_rate": 5.45869843523381e-06, "loss": 0.464, "step": 2836 }, { "epoch": 1.5710198431010614, "grad_norm": 0.3437236547470093, "learning_rate": 5.455488464663771e-06, "loss": 0.4362, "step": 2837 }, { "epoch": 1.5715736040609136, "grad_norm": 0.28998246788978577, "learning_rate": 5.452278304779053e-06, "loss": 0.3965, "step": 2838 }, { "epoch": 1.572127365020766, "grad_norm": 0.3405493199825287, "learning_rate": 5.449067956913893e-06, "loss": 0.4111, "step": 2839 }, { "epoch": 1.5726811259806184, "grad_norm": 0.3615589439868927, "learning_rate": 5.445857422402611e-06, "loss": 0.4509, "step": 2840 }, { "epoch": 1.5732348869404706, "grad_norm": 0.30704212188720703, "learning_rate": 5.442646702579598e-06, "loss": 0.4148, "step": 2841 }, { "epoch": 1.5737886479003231, "grad_norm": 0.33663642406463623, "learning_rate": 5.439435798779329e-06, "loss": 0.4556, "step": 2842 }, { "epoch": 1.5743424088601754, "grad_norm": 0.33699002861976624, "learning_rate": 5.436224712336349e-06, "loss": 0.4422, "step": 2843 }, { "epoch": 1.5748961698200277, "grad_norm": 0.321536123752594, "learning_rate": 5.433013444585284e-06, "loss": 0.429, "step": 2844 }, { "epoch": 1.5754499307798802, "grad_norm": 0.3277962803840637, "learning_rate": 5.429801996860833e-06, "loss": 0.4185, "step": 2845 }, { "epoch": 1.5760036917397322, "grad_norm": 0.3080247640609741, "learning_rate": 5.426590370497769e-06, "loss": 0.4557, "step": 2846 }, { "epoch": 1.5765574526995847, "grad_norm": 0.3417413532733917, "learning_rate": 5.423378566830941e-06, "loss": 0.4102, "step": 2847 }, { "epoch": 1.577111213659437, "grad_norm": 0.3682325780391693, "learning_rate": 5.420166587195271e-06, "loss": 0.4344, "step": 2848 }, { "epoch": 1.5776649746192892, "grad_norm": 0.3238619267940521, "learning_rate": 5.416954432925755e-06, "loss": 0.4461, "step": 2849 }, { "epoch": 1.5782187355791417, "grad_norm": 0.3457857370376587, "learning_rate": 5.413742105357459e-06, "loss": 0.4179, "step": 2850 }, { "epoch": 1.578772496538994, "grad_norm": 0.3167168200016022, "learning_rate": 5.410529605825524e-06, "loss": 0.4138, "step": 2851 }, { "epoch": 1.5793262574988463, "grad_norm": 0.34858042001724243, "learning_rate": 5.4073169356651614e-06, "loss": 0.4731, "step": 2852 }, { "epoch": 1.5798800184586987, "grad_norm": 0.3097596764564514, "learning_rate": 5.404104096211653e-06, "loss": 0.4207, "step": 2853 }, { "epoch": 1.580433779418551, "grad_norm": 0.3143722712993622, "learning_rate": 5.40089108880035e-06, "loss": 0.4048, "step": 2854 }, { "epoch": 1.5809875403784033, "grad_norm": 0.33094578981399536, "learning_rate": 5.397677914766678e-06, "loss": 0.4773, "step": 2855 }, { "epoch": 1.5815413013382558, "grad_norm": 0.2871837913990021, "learning_rate": 5.3944645754461235e-06, "loss": 0.3961, "step": 2856 }, { "epoch": 1.582095062298108, "grad_norm": 0.3103751838207245, "learning_rate": 5.3912510721742515e-06, "loss": 0.4408, "step": 2857 }, { "epoch": 1.5826488232579603, "grad_norm": 0.34093454480171204, "learning_rate": 5.388037406286689e-06, "loss": 0.4423, "step": 2858 }, { "epoch": 1.5832025842178128, "grad_norm": 0.31510454416275024, "learning_rate": 5.3848235791191316e-06, "loss": 0.4115, "step": 2859 }, { "epoch": 1.5837563451776648, "grad_norm": 0.2959226369857788, "learning_rate": 5.381609592007343e-06, "loss": 0.4455, "step": 2860 }, { "epoch": 1.5843101061375173, "grad_norm": 0.3389517664909363, "learning_rate": 5.378395446287152e-06, "loss": 0.4466, "step": 2861 }, { "epoch": 1.5848638670973696, "grad_norm": 0.3261348605155945, "learning_rate": 5.375181143294454e-06, "loss": 0.4523, "step": 2862 }, { "epoch": 1.5854176280572219, "grad_norm": 0.31867414712905884, "learning_rate": 5.371966684365209e-06, "loss": 0.4341, "step": 2863 }, { "epoch": 1.5859713890170744, "grad_norm": 0.33531278371810913, "learning_rate": 5.368752070835446e-06, "loss": 0.4181, "step": 2864 }, { "epoch": 1.5865251499769266, "grad_norm": 0.3137817978858948, "learning_rate": 5.365537304041251e-06, "loss": 0.4389, "step": 2865 }, { "epoch": 1.5870789109367789, "grad_norm": 0.2863517105579376, "learning_rate": 5.362322385318779e-06, "loss": 0.4177, "step": 2866 }, { "epoch": 1.5876326718966314, "grad_norm": 0.3385557234287262, "learning_rate": 5.359107316004247e-06, "loss": 0.445, "step": 2867 }, { "epoch": 1.5881864328564836, "grad_norm": 0.3570921719074249, "learning_rate": 5.355892097433934e-06, "loss": 0.4701, "step": 2868 }, { "epoch": 1.588740193816336, "grad_norm": 0.336752325296402, "learning_rate": 5.352676730944184e-06, "loss": 0.445, "step": 2869 }, { "epoch": 1.5892939547761884, "grad_norm": 0.311056524515152, "learning_rate": 5.349461217871395e-06, "loss": 0.4117, "step": 2870 }, { "epoch": 1.5898477157360404, "grad_norm": 0.32523682713508606, "learning_rate": 5.346245559552035e-06, "loss": 0.4516, "step": 2871 }, { "epoch": 1.590401476695893, "grad_norm": 0.31168603897094727, "learning_rate": 5.3430297573226244e-06, "loss": 0.3965, "step": 2872 }, { "epoch": 1.5909552376557454, "grad_norm": 0.3291795551776886, "learning_rate": 5.339813812519749e-06, "loss": 0.4529, "step": 2873 }, { "epoch": 1.5915089986155975, "grad_norm": 0.30072855949401855, "learning_rate": 5.336597726480054e-06, "loss": 0.413, "step": 2874 }, { "epoch": 1.59206275957545, "grad_norm": 0.32348862290382385, "learning_rate": 5.333381500540239e-06, "loss": 0.4127, "step": 2875 }, { "epoch": 1.5926165205353022, "grad_norm": 0.3337811827659607, "learning_rate": 5.330165136037064e-06, "loss": 0.4294, "step": 2876 }, { "epoch": 1.5931702814951545, "grad_norm": 0.3153518736362457, "learning_rate": 5.3269486343073465e-06, "loss": 0.4282, "step": 2877 }, { "epoch": 1.593724042455007, "grad_norm": 0.3291095793247223, "learning_rate": 5.323731996687962e-06, "loss": 0.4273, "step": 2878 }, { "epoch": 1.5942778034148593, "grad_norm": 0.3588603734970093, "learning_rate": 5.320515224515841e-06, "loss": 0.4596, "step": 2879 }, { "epoch": 1.5948315643747115, "grad_norm": 0.3568255305290222, "learning_rate": 5.317298319127972e-06, "loss": 0.4146, "step": 2880 }, { "epoch": 1.595385325334564, "grad_norm": 0.32378950715065, "learning_rate": 5.314081281861396e-06, "loss": 0.4281, "step": 2881 }, { "epoch": 1.5959390862944163, "grad_norm": 0.3757023513317108, "learning_rate": 5.31086411405321e-06, "loss": 0.4334, "step": 2882 }, { "epoch": 1.5964928472542685, "grad_norm": 0.3389672636985779, "learning_rate": 5.307646817040567e-06, "loss": 0.4122, "step": 2883 }, { "epoch": 1.597046608214121, "grad_norm": 0.3487643599510193, "learning_rate": 5.304429392160672e-06, "loss": 0.4058, "step": 2884 }, { "epoch": 1.597600369173973, "grad_norm": 0.36072176694869995, "learning_rate": 5.301211840750782e-06, "loss": 0.4274, "step": 2885 }, { "epoch": 1.5981541301338256, "grad_norm": 0.32092148065567017, "learning_rate": 5.297994164148209e-06, "loss": 0.4172, "step": 2886 }, { "epoch": 1.598707891093678, "grad_norm": 0.34349027276039124, "learning_rate": 5.2947763636903145e-06, "loss": 0.4211, "step": 2887 }, { "epoch": 1.59926165205353, "grad_norm": 0.31608065962791443, "learning_rate": 5.291558440714516e-06, "loss": 0.4123, "step": 2888 }, { "epoch": 1.5998154130133826, "grad_norm": 0.3583356440067291, "learning_rate": 5.288340396558277e-06, "loss": 0.4477, "step": 2889 }, { "epoch": 1.6003691739732349, "grad_norm": 0.3236846327781677, "learning_rate": 5.285122232559113e-06, "loss": 0.388, "step": 2890 }, { "epoch": 1.6009229349330871, "grad_norm": 0.3129679262638092, "learning_rate": 5.28190395005459e-06, "loss": 0.4303, "step": 2891 }, { "epoch": 1.6014766958929396, "grad_norm": 0.34654340147972107, "learning_rate": 5.278685550382322e-06, "loss": 0.451, "step": 2892 }, { "epoch": 1.6020304568527919, "grad_norm": 0.3134615421295166, "learning_rate": 5.275467034879974e-06, "loss": 0.4313, "step": 2893 }, { "epoch": 1.6025842178126442, "grad_norm": 0.3497122526168823, "learning_rate": 5.272248404885256e-06, "loss": 0.4584, "step": 2894 }, { "epoch": 1.6031379787724966, "grad_norm": 0.3869912326335907, "learning_rate": 5.269029661735929e-06, "loss": 0.4579, "step": 2895 }, { "epoch": 1.603691739732349, "grad_norm": 0.32981398701667786, "learning_rate": 5.265810806769799e-06, "loss": 0.4349, "step": 2896 }, { "epoch": 1.6042455006922012, "grad_norm": 0.32621532678604126, "learning_rate": 5.262591841324717e-06, "loss": 0.4381, "step": 2897 }, { "epoch": 1.6047992616520537, "grad_norm": 0.33191531896591187, "learning_rate": 5.2593727667385844e-06, "loss": 0.4208, "step": 2898 }, { "epoch": 1.6053530226119057, "grad_norm": 0.3329445719718933, "learning_rate": 5.256153584349341e-06, "loss": 0.4561, "step": 2899 }, { "epoch": 1.6059067835717582, "grad_norm": 0.2851967513561249, "learning_rate": 5.252934295494981e-06, "loss": 0.3854, "step": 2900 }, { "epoch": 1.6064605445316105, "grad_norm": 0.3705032169818878, "learning_rate": 5.249714901513533e-06, "loss": 0.4473, "step": 2901 }, { "epoch": 1.6070143054914627, "grad_norm": 0.3220651149749756, "learning_rate": 5.2464954037430775e-06, "loss": 0.3972, "step": 2902 }, { "epoch": 1.6075680664513152, "grad_norm": 0.32666781544685364, "learning_rate": 5.2432758035217315e-06, "loss": 0.4174, "step": 2903 }, { "epoch": 1.6081218274111675, "grad_norm": 0.32356202602386475, "learning_rate": 5.240056102187659e-06, "loss": 0.4618, "step": 2904 }, { "epoch": 1.6086755883710198, "grad_norm": 0.2842966914176941, "learning_rate": 5.236836301079063e-06, "loss": 0.4047, "step": 2905 }, { "epoch": 1.6092293493308722, "grad_norm": 0.3118673264980316, "learning_rate": 5.2336164015341905e-06, "loss": 0.4028, "step": 2906 }, { "epoch": 1.6097831102907245, "grad_norm": 0.32202568650245667, "learning_rate": 5.230396404891329e-06, "loss": 0.4247, "step": 2907 }, { "epoch": 1.6103368712505768, "grad_norm": 0.3210746645927429, "learning_rate": 5.227176312488807e-06, "loss": 0.4681, "step": 2908 }, { "epoch": 1.6108906322104293, "grad_norm": 0.3128736615180969, "learning_rate": 5.223956125664987e-06, "loss": 0.4, "step": 2909 }, { "epoch": 1.6114443931702815, "grad_norm": 0.3394741714000702, "learning_rate": 5.220735845758279e-06, "loss": 0.4373, "step": 2910 }, { "epoch": 1.6119981541301338, "grad_norm": 0.3793395161628723, "learning_rate": 5.217515474107127e-06, "loss": 0.436, "step": 2911 }, { "epoch": 1.6125519150899863, "grad_norm": 0.29625728726387024, "learning_rate": 5.214295012050016e-06, "loss": 0.3924, "step": 2912 }, { "epoch": 1.6131056760498383, "grad_norm": 0.3659203052520752, "learning_rate": 5.211074460925462e-06, "loss": 0.4462, "step": 2913 }, { "epoch": 1.6136594370096908, "grad_norm": 0.294954776763916, "learning_rate": 5.2078538220720284e-06, "loss": 0.4375, "step": 2914 }, { "epoch": 1.614213197969543, "grad_norm": 0.36864158511161804, "learning_rate": 5.204633096828306e-06, "loss": 0.4296, "step": 2915 }, { "epoch": 1.6147669589293954, "grad_norm": 0.2969891130924225, "learning_rate": 5.201412286532924e-06, "loss": 0.4022, "step": 2916 }, { "epoch": 1.6153207198892479, "grad_norm": 0.3115274906158447, "learning_rate": 5.198191392524551e-06, "loss": 0.4088, "step": 2917 }, { "epoch": 1.6158744808491001, "grad_norm": 0.31438660621643066, "learning_rate": 5.194970416141885e-06, "loss": 0.4275, "step": 2918 }, { "epoch": 1.6164282418089524, "grad_norm": 0.3100181221961975, "learning_rate": 5.191749358723662e-06, "loss": 0.361, "step": 2919 }, { "epoch": 1.6169820027688049, "grad_norm": 0.3196339011192322, "learning_rate": 5.188528221608648e-06, "loss": 0.4267, "step": 2920 }, { "epoch": 1.6175357637286571, "grad_norm": 0.3179873526096344, "learning_rate": 5.185307006135647e-06, "loss": 0.4829, "step": 2921 }, { "epoch": 1.6180895246885094, "grad_norm": 0.32171690464019775, "learning_rate": 5.182085713643492e-06, "loss": 0.4078, "step": 2922 }, { "epoch": 1.618643285648362, "grad_norm": 0.3017343580722809, "learning_rate": 5.1788643454710454e-06, "loss": 0.435, "step": 2923 }, { "epoch": 1.6191970466082142, "grad_norm": 0.3427005410194397, "learning_rate": 5.17564290295721e-06, "loss": 0.4459, "step": 2924 }, { "epoch": 1.6197508075680664, "grad_norm": 0.33953481912612915, "learning_rate": 5.172421387440909e-06, "loss": 0.4565, "step": 2925 }, { "epoch": 1.620304568527919, "grad_norm": 0.3151327073574066, "learning_rate": 5.169199800261105e-06, "loss": 0.3952, "step": 2926 }, { "epoch": 1.620858329487771, "grad_norm": 0.357323557138443, "learning_rate": 5.165978142756786e-06, "loss": 0.4106, "step": 2927 }, { "epoch": 1.6214120904476235, "grad_norm": 0.3326731324195862, "learning_rate": 5.1627564162669655e-06, "loss": 0.4351, "step": 2928 }, { "epoch": 1.6219658514074757, "grad_norm": 0.3249122202396393, "learning_rate": 5.159534622130695e-06, "loss": 0.4382, "step": 2929 }, { "epoch": 1.622519612367328, "grad_norm": 0.3539556562900543, "learning_rate": 5.156312761687045e-06, "loss": 0.4349, "step": 2930 }, { "epoch": 1.6230733733271805, "grad_norm": 0.3380756080150604, "learning_rate": 5.15309083627512e-06, "loss": 0.4075, "step": 2931 }, { "epoch": 1.6236271342870328, "grad_norm": 0.3687676787376404, "learning_rate": 5.149868847234045e-06, "loss": 0.477, "step": 2932 }, { "epoch": 1.624180895246885, "grad_norm": 0.3454224765300751, "learning_rate": 5.146646795902981e-06, "loss": 0.4493, "step": 2933 }, { "epoch": 1.6247346562067375, "grad_norm": 0.3270970284938812, "learning_rate": 5.143424683621105e-06, "loss": 0.4376, "step": 2934 }, { "epoch": 1.6252884171665898, "grad_norm": 0.3296034038066864, "learning_rate": 5.140202511727626e-06, "loss": 0.4411, "step": 2935 }, { "epoch": 1.625842178126442, "grad_norm": 0.29066380858421326, "learning_rate": 5.1369802815617735e-06, "loss": 0.4064, "step": 2936 }, { "epoch": 1.6263959390862945, "grad_norm": 0.32447314262390137, "learning_rate": 5.1337579944628025e-06, "loss": 0.4471, "step": 2937 }, { "epoch": 1.6269497000461466, "grad_norm": 0.34752127528190613, "learning_rate": 5.130535651769995e-06, "loss": 0.4262, "step": 2938 }, { "epoch": 1.627503461005999, "grad_norm": 0.3519541323184967, "learning_rate": 5.127313254822652e-06, "loss": 0.4532, "step": 2939 }, { "epoch": 1.6280572219658516, "grad_norm": 0.2922991216182709, "learning_rate": 5.124090804960099e-06, "loss": 0.4137, "step": 2940 }, { "epoch": 1.6286109829257036, "grad_norm": 0.32855847477912903, "learning_rate": 5.120868303521682e-06, "loss": 0.4247, "step": 2941 }, { "epoch": 1.629164743885556, "grad_norm": 0.33713605999946594, "learning_rate": 5.117645751846769e-06, "loss": 0.4578, "step": 2942 }, { "epoch": 1.6297185048454084, "grad_norm": 0.26824063062667847, "learning_rate": 5.114423151274751e-06, "loss": 0.3819, "step": 2943 }, { "epoch": 1.6302722658052606, "grad_norm": 0.3003247380256653, "learning_rate": 5.111200503145035e-06, "loss": 0.4169, "step": 2944 }, { "epoch": 1.6308260267651131, "grad_norm": 0.3259737193584442, "learning_rate": 5.107977808797053e-06, "loss": 0.4476, "step": 2945 }, { "epoch": 1.6313797877249654, "grad_norm": 0.31371229887008667, "learning_rate": 5.1047550695702516e-06, "loss": 0.3889, "step": 2946 }, { "epoch": 1.6319335486848177, "grad_norm": 0.30334925651550293, "learning_rate": 5.101532286804098e-06, "loss": 0.3998, "step": 2947 }, { "epoch": 1.6324873096446701, "grad_norm": 0.3059585392475128, "learning_rate": 5.098309461838079e-06, "loss": 0.4333, "step": 2948 }, { "epoch": 1.6330410706045224, "grad_norm": 0.3181636333465576, "learning_rate": 5.095086596011696e-06, "loss": 0.4405, "step": 2949 }, { "epoch": 1.6335948315643747, "grad_norm": 0.2977037727832794, "learning_rate": 5.091863690664469e-06, "loss": 0.4345, "step": 2950 }, { "epoch": 1.6341485925242272, "grad_norm": 0.30499687790870667, "learning_rate": 5.0886407471359365e-06, "loss": 0.4422, "step": 2951 }, { "epoch": 1.6347023534840792, "grad_norm": 0.3074968159198761, "learning_rate": 5.085417766765646e-06, "loss": 0.4176, "step": 2952 }, { "epoch": 1.6352561144439317, "grad_norm": 0.32092493772506714, "learning_rate": 5.082194750893168e-06, "loss": 0.4423, "step": 2953 }, { "epoch": 1.6358098754037842, "grad_norm": 0.3017289340496063, "learning_rate": 5.0789717008580844e-06, "loss": 0.4123, "step": 2954 }, { "epoch": 1.6363636363636362, "grad_norm": 0.30167704820632935, "learning_rate": 5.075748617999992e-06, "loss": 0.4183, "step": 2955 }, { "epoch": 1.6369173973234887, "grad_norm": 0.290499210357666, "learning_rate": 5.072525503658499e-06, "loss": 0.4131, "step": 2956 }, { "epoch": 1.637471158283341, "grad_norm": 0.31038373708724976, "learning_rate": 5.069302359173229e-06, "loss": 0.4308, "step": 2957 }, { "epoch": 1.6380249192431933, "grad_norm": 0.2956450879573822, "learning_rate": 5.066079185883821e-06, "loss": 0.4003, "step": 2958 }, { "epoch": 1.6385786802030458, "grad_norm": 0.29393553733825684, "learning_rate": 5.062855985129916e-06, "loss": 0.4194, "step": 2959 }, { "epoch": 1.639132441162898, "grad_norm": 0.31262868642807007, "learning_rate": 5.059632758251179e-06, "loss": 0.4304, "step": 2960 }, { "epoch": 1.6396862021227503, "grad_norm": 0.30097222328186035, "learning_rate": 5.056409506587277e-06, "loss": 0.4222, "step": 2961 }, { "epoch": 1.6402399630826028, "grad_norm": 0.2926746606826782, "learning_rate": 5.05318623147789e-06, "loss": 0.4078, "step": 2962 }, { "epoch": 1.640793724042455, "grad_norm": 0.3013404309749603, "learning_rate": 5.0499629342627085e-06, "loss": 0.4288, "step": 2963 }, { "epoch": 1.6413474850023073, "grad_norm": 0.2896387577056885, "learning_rate": 5.046739616281433e-06, "loss": 0.4021, "step": 2964 }, { "epoch": 1.6419012459621598, "grad_norm": 0.33616769313812256, "learning_rate": 5.043516278873769e-06, "loss": 0.4615, "step": 2965 }, { "epoch": 1.6424550069220119, "grad_norm": 0.3219744861125946, "learning_rate": 5.0402929233794315e-06, "loss": 0.4368, "step": 2966 }, { "epoch": 1.6430087678818643, "grad_norm": 0.3126920163631439, "learning_rate": 5.037069551138149e-06, "loss": 0.4236, "step": 2967 }, { "epoch": 1.6435625288417166, "grad_norm": 0.31991222500801086, "learning_rate": 5.033846163489645e-06, "loss": 0.4223, "step": 2968 }, { "epoch": 1.6441162898015689, "grad_norm": 0.2892738878726959, "learning_rate": 5.0306227617736626e-06, "loss": 0.4494, "step": 2969 }, { "epoch": 1.6446700507614214, "grad_norm": 0.2747286260128021, "learning_rate": 5.027399347329939e-06, "loss": 0.3786, "step": 2970 }, { "epoch": 1.6452238117212736, "grad_norm": 0.35796359181404114, "learning_rate": 5.024175921498224e-06, "loss": 0.4957, "step": 2971 }, { "epoch": 1.645777572681126, "grad_norm": 0.29848724603652954, "learning_rate": 5.0209524856182716e-06, "loss": 0.4348, "step": 2972 }, { "epoch": 1.6463313336409784, "grad_norm": 0.31298789381980896, "learning_rate": 5.017729041029836e-06, "loss": 0.4599, "step": 2973 }, { "epoch": 1.6468850946008307, "grad_norm": 0.3322941064834595, "learning_rate": 5.014505589072679e-06, "loss": 0.4439, "step": 2974 }, { "epoch": 1.647438855560683, "grad_norm": 0.3016454577445984, "learning_rate": 5.011282131086564e-06, "loss": 0.4036, "step": 2975 }, { "epoch": 1.6479926165205354, "grad_norm": 0.2941789925098419, "learning_rate": 5.0080586684112565e-06, "loss": 0.404, "step": 2976 }, { "epoch": 1.6485463774803877, "grad_norm": 0.36343279480934143, "learning_rate": 5.004835202386524e-06, "loss": 0.4648, "step": 2977 }, { "epoch": 1.64910013844024, "grad_norm": 0.33416521549224854, "learning_rate": 5.001611734352136e-06, "loss": 0.4188, "step": 2978 }, { "epoch": 1.6496538994000924, "grad_norm": 0.35815998911857605, "learning_rate": 4.9983882656478646e-06, "loss": 0.4514, "step": 2979 }, { "epoch": 1.6502076603599445, "grad_norm": 0.3053007125854492, "learning_rate": 4.995164797613478e-06, "loss": 0.4055, "step": 2980 }, { "epoch": 1.650761421319797, "grad_norm": 0.33746102452278137, "learning_rate": 4.9919413315887435e-06, "loss": 0.4446, "step": 2981 }, { "epoch": 1.6513151822796492, "grad_norm": 0.3213005065917969, "learning_rate": 4.988717868913437e-06, "loss": 0.428, "step": 2982 }, { "epoch": 1.6518689432395015, "grad_norm": 0.3219180405139923, "learning_rate": 4.985494410927322e-06, "loss": 0.3889, "step": 2983 }, { "epoch": 1.652422704199354, "grad_norm": 0.30810239911079407, "learning_rate": 4.982270958970166e-06, "loss": 0.4515, "step": 2984 }, { "epoch": 1.6529764651592063, "grad_norm": 0.3425951302051544, "learning_rate": 4.979047514381731e-06, "loss": 0.4212, "step": 2985 }, { "epoch": 1.6535302261190585, "grad_norm": 0.33013638854026794, "learning_rate": 4.9758240785017766e-06, "loss": 0.4194, "step": 2986 }, { "epoch": 1.654083987078911, "grad_norm": 0.30082637071609497, "learning_rate": 4.972600652670062e-06, "loss": 0.4278, "step": 2987 }, { "epoch": 1.6546377480387633, "grad_norm": 0.2941136956214905, "learning_rate": 4.969377238226339e-06, "loss": 0.4285, "step": 2988 }, { "epoch": 1.6551915089986156, "grad_norm": 0.34502366185188293, "learning_rate": 4.966153836510356e-06, "loss": 0.4375, "step": 2989 }, { "epoch": 1.655745269958468, "grad_norm": 0.3526110053062439, "learning_rate": 4.962930448861853e-06, "loss": 0.4563, "step": 2990 }, { "epoch": 1.65629903091832, "grad_norm": 0.32701823115348816, "learning_rate": 4.959707076620568e-06, "loss": 0.434, "step": 2991 }, { "epoch": 1.6568527918781726, "grad_norm": 0.2994314134120941, "learning_rate": 4.956483721126232e-06, "loss": 0.4026, "step": 2992 }, { "epoch": 1.657406552838025, "grad_norm": 0.3282289505004883, "learning_rate": 4.953260383718568e-06, "loss": 0.4548, "step": 2993 }, { "epoch": 1.6579603137978771, "grad_norm": 0.34469905495643616, "learning_rate": 4.950037065737293e-06, "loss": 0.4555, "step": 2994 }, { "epoch": 1.6585140747577296, "grad_norm": 0.29145893454551697, "learning_rate": 4.946813768522112e-06, "loss": 0.412, "step": 2995 }, { "epoch": 1.6590678357175819, "grad_norm": 0.30090123414993286, "learning_rate": 4.943590493412724e-06, "loss": 0.4715, "step": 2996 }, { "epoch": 1.6596215966774341, "grad_norm": 0.3308440148830414, "learning_rate": 4.940367241748822e-06, "loss": 0.4466, "step": 2997 }, { "epoch": 1.6601753576372866, "grad_norm": 0.3056281507015228, "learning_rate": 4.937144014870085e-06, "loss": 0.4273, "step": 2998 }, { "epoch": 1.660729118597139, "grad_norm": 0.29526886343955994, "learning_rate": 4.933920814116182e-06, "loss": 0.3995, "step": 2999 }, { "epoch": 1.6612828795569912, "grad_norm": 0.3556796908378601, "learning_rate": 4.930697640826771e-06, "loss": 0.4296, "step": 3000 }, { "epoch": 1.6618366405168437, "grad_norm": 0.3034296929836273, "learning_rate": 4.927474496341501e-06, "loss": 0.4225, "step": 3001 }, { "epoch": 1.662390401476696, "grad_norm": 0.32159721851348877, "learning_rate": 4.92425138200001e-06, "loss": 0.4092, "step": 3002 }, { "epoch": 1.6629441624365482, "grad_norm": 0.2919296622276306, "learning_rate": 4.921028299141916e-06, "loss": 0.4136, "step": 3003 }, { "epoch": 1.6634979233964007, "grad_norm": 0.2976877689361572, "learning_rate": 4.917805249106833e-06, "loss": 0.4238, "step": 3004 }, { "epoch": 1.6640516843562527, "grad_norm": 0.3588540554046631, "learning_rate": 4.914582233234354e-06, "loss": 0.4453, "step": 3005 }, { "epoch": 1.6646054453161052, "grad_norm": 0.33000949025154114, "learning_rate": 4.911359252864064e-06, "loss": 0.4111, "step": 3006 }, { "epoch": 1.6651592062759577, "grad_norm": 0.2931382358074188, "learning_rate": 4.908136309335532e-06, "loss": 0.408, "step": 3007 }, { "epoch": 1.6657129672358097, "grad_norm": 0.3089747130870819, "learning_rate": 4.904913403988305e-06, "loss": 0.4586, "step": 3008 }, { "epoch": 1.6662667281956622, "grad_norm": 0.3102089464664459, "learning_rate": 4.901690538161923e-06, "loss": 0.4217, "step": 3009 }, { "epoch": 1.6668204891555145, "grad_norm": 0.31701767444610596, "learning_rate": 4.898467713195902e-06, "loss": 0.428, "step": 3010 }, { "epoch": 1.6673742501153668, "grad_norm": 0.27651476860046387, "learning_rate": 4.89524493042975e-06, "loss": 0.3933, "step": 3011 }, { "epoch": 1.6679280110752193, "grad_norm": 0.303730845451355, "learning_rate": 4.892022191202949e-06, "loss": 0.43, "step": 3012 }, { "epoch": 1.6684817720350715, "grad_norm": 0.3038012385368347, "learning_rate": 4.888799496854967e-06, "loss": 0.4496, "step": 3013 }, { "epoch": 1.6690355329949238, "grad_norm": 0.2926311194896698, "learning_rate": 4.885576848725252e-06, "loss": 0.4312, "step": 3014 }, { "epoch": 1.6695892939547763, "grad_norm": 0.27392518520355225, "learning_rate": 4.882354248153232e-06, "loss": 0.3947, "step": 3015 }, { "epoch": 1.6701430549146286, "grad_norm": 0.3392791450023651, "learning_rate": 4.87913169647832e-06, "loss": 0.4459, "step": 3016 }, { "epoch": 1.6706968158744808, "grad_norm": 0.30777373909950256, "learning_rate": 4.875909195039903e-06, "loss": 0.4258, "step": 3017 }, { "epoch": 1.6712505768343333, "grad_norm": 0.28206750750541687, "learning_rate": 4.87268674517735e-06, "loss": 0.449, "step": 3018 }, { "epoch": 1.6718043377941854, "grad_norm": 0.32461783289909363, "learning_rate": 4.869464348230007e-06, "loss": 0.447, "step": 3019 }, { "epoch": 1.6723580987540378, "grad_norm": 0.30024924874305725, "learning_rate": 4.866242005537198e-06, "loss": 0.4175, "step": 3020 }, { "epoch": 1.6729118597138903, "grad_norm": 0.2923471927642822, "learning_rate": 4.863019718438229e-06, "loss": 0.4526, "step": 3021 }, { "epoch": 1.6734656206737424, "grad_norm": 0.28654471039772034, "learning_rate": 4.859797488272377e-06, "loss": 0.4123, "step": 3022 }, { "epoch": 1.6740193816335949, "grad_norm": 0.2785278260707855, "learning_rate": 4.856575316378897e-06, "loss": 0.3944, "step": 3023 }, { "epoch": 1.6745731425934471, "grad_norm": 0.28786712884902954, "learning_rate": 4.853353204097021e-06, "loss": 0.4242, "step": 3024 }, { "epoch": 1.6751269035532994, "grad_norm": 0.29366353154182434, "learning_rate": 4.850131152765956e-06, "loss": 0.4144, "step": 3025 }, { "epoch": 1.675680664513152, "grad_norm": 0.3185460567474365, "learning_rate": 4.846909163724882e-06, "loss": 0.4546, "step": 3026 }, { "epoch": 1.6762344254730042, "grad_norm": 0.2936978340148926, "learning_rate": 4.843687238312957e-06, "loss": 0.4236, "step": 3027 }, { "epoch": 1.6767881864328564, "grad_norm": 0.3418363034725189, "learning_rate": 4.840465377869308e-06, "loss": 0.4428, "step": 3028 }, { "epoch": 1.677341947392709, "grad_norm": 0.30220508575439453, "learning_rate": 4.8372435837330344e-06, "loss": 0.3873, "step": 3029 }, { "epoch": 1.6778957083525612, "grad_norm": 0.3074381351470947, "learning_rate": 4.834021857243216e-06, "loss": 0.4469, "step": 3030 }, { "epoch": 1.6784494693124135, "grad_norm": 0.3226567506790161, "learning_rate": 4.830800199738896e-06, "loss": 0.4375, "step": 3031 }, { "epoch": 1.679003230272266, "grad_norm": 0.30922365188598633, "learning_rate": 4.827578612559092e-06, "loss": 0.4538, "step": 3032 }, { "epoch": 1.679556991232118, "grad_norm": 0.2679731249809265, "learning_rate": 4.8243570970427926e-06, "loss": 0.4039, "step": 3033 }, { "epoch": 1.6801107521919705, "grad_norm": 0.31835755705833435, "learning_rate": 4.821135654528955e-06, "loss": 0.4529, "step": 3034 }, { "epoch": 1.6806645131518227, "grad_norm": 0.303681880235672, "learning_rate": 4.81791428635651e-06, "loss": 0.4412, "step": 3035 }, { "epoch": 1.681218274111675, "grad_norm": 0.3196357488632202, "learning_rate": 4.814692993864354e-06, "loss": 0.4147, "step": 3036 }, { "epoch": 1.6817720350715275, "grad_norm": 0.30660587549209595, "learning_rate": 4.8114717783913524e-06, "loss": 0.4362, "step": 3037 }, { "epoch": 1.6823257960313798, "grad_norm": 0.33592262864112854, "learning_rate": 4.80825064127634e-06, "loss": 0.451, "step": 3038 }, { "epoch": 1.682879556991232, "grad_norm": 0.3122929036617279, "learning_rate": 4.805029583858115e-06, "loss": 0.4282, "step": 3039 }, { "epoch": 1.6834333179510845, "grad_norm": 0.27728426456451416, "learning_rate": 4.80180860747545e-06, "loss": 0.3917, "step": 3040 }, { "epoch": 1.6839870789109368, "grad_norm": 0.3123800456523895, "learning_rate": 4.798587713467077e-06, "loss": 0.4181, "step": 3041 }, { "epoch": 1.684540839870789, "grad_norm": 0.33237263560295105, "learning_rate": 4.795366903171696e-06, "loss": 0.4355, "step": 3042 }, { "epoch": 1.6850946008306416, "grad_norm": 0.3016481399536133, "learning_rate": 4.792146177927972e-06, "loss": 0.4262, "step": 3043 }, { "epoch": 1.6856483617904938, "grad_norm": 0.286548912525177, "learning_rate": 4.7889255390745385e-06, "loss": 0.3988, "step": 3044 }, { "epoch": 1.686202122750346, "grad_norm": 0.29183125495910645, "learning_rate": 4.785704987949987e-06, "loss": 0.4107, "step": 3045 }, { "epoch": 1.6867558837101986, "grad_norm": 0.31141898036003113, "learning_rate": 4.7824845258928736e-06, "loss": 0.4538, "step": 3046 }, { "epoch": 1.6873096446700506, "grad_norm": 0.29336437582969666, "learning_rate": 4.779264154241723e-06, "loss": 0.3744, "step": 3047 }, { "epoch": 1.6878634056299031, "grad_norm": 0.33579427003860474, "learning_rate": 4.776043874335014e-06, "loss": 0.4554, "step": 3048 }, { "epoch": 1.6884171665897554, "grad_norm": 0.30351945757865906, "learning_rate": 4.772823687511196e-06, "loss": 0.4275, "step": 3049 }, { "epoch": 1.6889709275496076, "grad_norm": 0.3030593693256378, "learning_rate": 4.769603595108671e-06, "loss": 0.4338, "step": 3050 }, { "epoch": 1.6895246885094601, "grad_norm": 0.31019043922424316, "learning_rate": 4.76638359846581e-06, "loss": 0.4381, "step": 3051 }, { "epoch": 1.6900784494693124, "grad_norm": 0.2760007083415985, "learning_rate": 4.76316369892094e-06, "loss": 0.422, "step": 3052 }, { "epoch": 1.6906322104291647, "grad_norm": 0.3021181523799896, "learning_rate": 4.759943897812343e-06, "loss": 0.4047, "step": 3053 }, { "epoch": 1.6911859713890172, "grad_norm": 0.28427979350090027, "learning_rate": 4.75672419647827e-06, "loss": 0.3885, "step": 3054 }, { "epoch": 1.6917397323488694, "grad_norm": 0.3228673040866852, "learning_rate": 4.753504596256924e-06, "loss": 0.4668, "step": 3055 }, { "epoch": 1.6922934933087217, "grad_norm": 0.2971746027469635, "learning_rate": 4.750285098486469e-06, "loss": 0.409, "step": 3056 }, { "epoch": 1.6928472542685742, "grad_norm": 0.31200605630874634, "learning_rate": 4.747065704505021e-06, "loss": 0.4225, "step": 3057 }, { "epoch": 1.6934010152284262, "grad_norm": 0.3055035173892975, "learning_rate": 4.743846415650659e-06, "loss": 0.4544, "step": 3058 }, { "epoch": 1.6939547761882787, "grad_norm": 0.32493314146995544, "learning_rate": 4.740627233261417e-06, "loss": 0.4273, "step": 3059 }, { "epoch": 1.6945085371481312, "grad_norm": 0.30957406759262085, "learning_rate": 4.737408158675285e-06, "loss": 0.4509, "step": 3060 }, { "epoch": 1.6950622981079833, "grad_norm": 0.3346579372882843, "learning_rate": 4.734189193230203e-06, "loss": 0.4491, "step": 3061 }, { "epoch": 1.6956160590678357, "grad_norm": 0.3174078166484833, "learning_rate": 4.7309703382640726e-06, "loss": 0.4116, "step": 3062 }, { "epoch": 1.696169820027688, "grad_norm": 0.2942684590816498, "learning_rate": 4.727751595114745e-06, "loss": 0.4179, "step": 3063 }, { "epoch": 1.6967235809875403, "grad_norm": 0.31039953231811523, "learning_rate": 4.724532965120027e-06, "loss": 0.4167, "step": 3064 }, { "epoch": 1.6972773419473928, "grad_norm": 0.30213502049446106, "learning_rate": 4.7213144496176795e-06, "loss": 0.4029, "step": 3065 }, { "epoch": 1.697831102907245, "grad_norm": 0.2649918496608734, "learning_rate": 4.718096049945412e-06, "loss": 0.3919, "step": 3066 }, { "epoch": 1.6983848638670973, "grad_norm": 0.3156733512878418, "learning_rate": 4.714877767440888e-06, "loss": 0.4472, "step": 3067 }, { "epoch": 1.6989386248269498, "grad_norm": 0.31926682591438293, "learning_rate": 4.711659603441724e-06, "loss": 0.4263, "step": 3068 }, { "epoch": 1.699492385786802, "grad_norm": 0.3013575077056885, "learning_rate": 4.708441559285485e-06, "loss": 0.4121, "step": 3069 }, { "epoch": 1.7000461467466543, "grad_norm": 0.2884569466114044, "learning_rate": 4.705223636309686e-06, "loss": 0.401, "step": 3070 }, { "epoch": 1.7005999077065068, "grad_norm": 0.30552950501441956, "learning_rate": 4.7020058358517926e-06, "loss": 0.4466, "step": 3071 }, { "epoch": 1.7011536686663589, "grad_norm": 0.2739506959915161, "learning_rate": 4.698788159249219e-06, "loss": 0.4213, "step": 3072 }, { "epoch": 1.7017074296262114, "grad_norm": 0.30315983295440674, "learning_rate": 4.6955706078393285e-06, "loss": 0.4112, "step": 3073 }, { "epoch": 1.7022611905860638, "grad_norm": 0.3231998682022095, "learning_rate": 4.692353182959434e-06, "loss": 0.4463, "step": 3074 }, { "epoch": 1.702814951545916, "grad_norm": 0.3311412036418915, "learning_rate": 4.689135885946791e-06, "loss": 0.448, "step": 3075 }, { "epoch": 1.7033687125057684, "grad_norm": 0.32041704654693604, "learning_rate": 4.685918718138607e-06, "loss": 0.4292, "step": 3076 }, { "epoch": 1.7039224734656206, "grad_norm": 0.3210969865322113, "learning_rate": 4.682701680872028e-06, "loss": 0.4063, "step": 3077 }, { "epoch": 1.704476234425473, "grad_norm": 0.30148518085479736, "learning_rate": 4.67948477548416e-06, "loss": 0.4353, "step": 3078 }, { "epoch": 1.7050299953853254, "grad_norm": 0.3023066520690918, "learning_rate": 4.67626800331204e-06, "loss": 0.4243, "step": 3079 }, { "epoch": 1.7055837563451777, "grad_norm": 0.3675469160079956, "learning_rate": 4.673051365692655e-06, "loss": 0.4409, "step": 3080 }, { "epoch": 1.70613751730503, "grad_norm": 0.31131166219711304, "learning_rate": 4.669834863962939e-06, "loss": 0.4446, "step": 3081 }, { "epoch": 1.7066912782648824, "grad_norm": 0.2865957021713257, "learning_rate": 4.666618499459763e-06, "loss": 0.4393, "step": 3082 }, { "epoch": 1.7072450392247347, "grad_norm": 0.2989899814128876, "learning_rate": 4.663402273519947e-06, "loss": 0.3939, "step": 3083 }, { "epoch": 1.707798800184587, "grad_norm": 0.3320668339729309, "learning_rate": 4.6601861874802514e-06, "loss": 0.4509, "step": 3084 }, { "epoch": 1.7083525611444395, "grad_norm": 0.3029744029045105, "learning_rate": 4.656970242677378e-06, "loss": 0.4108, "step": 3085 }, { "epoch": 1.7089063221042915, "grad_norm": 0.339834600687027, "learning_rate": 4.653754440447966e-06, "loss": 0.4648, "step": 3086 }, { "epoch": 1.709460083064144, "grad_norm": 0.309882789850235, "learning_rate": 4.650538782128606e-06, "loss": 0.4094, "step": 3087 }, { "epoch": 1.7100138440239963, "grad_norm": 0.3373565375804901, "learning_rate": 4.647323269055818e-06, "loss": 0.4238, "step": 3088 }, { "epoch": 1.7105676049838485, "grad_norm": 0.31652429699897766, "learning_rate": 4.644107902566067e-06, "loss": 0.4236, "step": 3089 }, { "epoch": 1.711121365943701, "grad_norm": 0.3415795564651489, "learning_rate": 4.640892683995755e-06, "loss": 0.4368, "step": 3090 }, { "epoch": 1.7116751269035533, "grad_norm": 0.3429851233959198, "learning_rate": 4.637677614681222e-06, "loss": 0.4092, "step": 3091 }, { "epoch": 1.7122288878634055, "grad_norm": 0.325604110956192, "learning_rate": 4.634462695958751e-06, "loss": 0.4399, "step": 3092 }, { "epoch": 1.712782648823258, "grad_norm": 0.3412575423717499, "learning_rate": 4.631247929164556e-06, "loss": 0.4466, "step": 3093 }, { "epoch": 1.7133364097831103, "grad_norm": 0.42156505584716797, "learning_rate": 4.628033315634793e-06, "loss": 0.4158, "step": 3094 }, { "epoch": 1.7138901707429626, "grad_norm": 0.3549681603908539, "learning_rate": 4.624818856705549e-06, "loss": 0.4376, "step": 3095 }, { "epoch": 1.714443931702815, "grad_norm": 0.32077574729919434, "learning_rate": 4.621604553712849e-06, "loss": 0.4494, "step": 3096 }, { "epoch": 1.7149976926626673, "grad_norm": 0.34168094396591187, "learning_rate": 4.618390407992658e-06, "loss": 0.4419, "step": 3097 }, { "epoch": 1.7155514536225196, "grad_norm": 0.3126146197319031, "learning_rate": 4.61517642088087e-06, "loss": 0.4047, "step": 3098 }, { "epoch": 1.716105214582372, "grad_norm": 0.36544400453567505, "learning_rate": 4.611962593713312e-06, "loss": 0.469, "step": 3099 }, { "epoch": 1.7166589755422241, "grad_norm": 0.3138729929924011, "learning_rate": 4.608748927825749e-06, "loss": 0.406, "step": 3100 }, { "epoch": 1.7172127365020766, "grad_norm": 0.3222994804382324, "learning_rate": 4.6055354245538765e-06, "loss": 0.4531, "step": 3101 }, { "epoch": 1.7177664974619289, "grad_norm": 0.3504128158092499, "learning_rate": 4.602322085233324e-06, "loss": 0.4259, "step": 3102 }, { "epoch": 1.7183202584217812, "grad_norm": 0.32837238907814026, "learning_rate": 4.599108911199651e-06, "loss": 0.4476, "step": 3103 }, { "epoch": 1.7188740193816336, "grad_norm": 0.3056107461452484, "learning_rate": 4.595895903788349e-06, "loss": 0.4216, "step": 3104 }, { "epoch": 1.719427780341486, "grad_norm": 0.30549395084381104, "learning_rate": 4.592683064334841e-06, "loss": 0.4224, "step": 3105 }, { "epoch": 1.7199815413013382, "grad_norm": 0.3306173086166382, "learning_rate": 4.589470394174476e-06, "loss": 0.4302, "step": 3106 }, { "epoch": 1.7205353022611907, "grad_norm": 0.30858567357063293, "learning_rate": 4.586257894642542e-06, "loss": 0.4155, "step": 3107 }, { "epoch": 1.721089063221043, "grad_norm": 0.3070850372314453, "learning_rate": 4.5830455670742465e-06, "loss": 0.4019, "step": 3108 }, { "epoch": 1.7216428241808952, "grad_norm": 0.34536656737327576, "learning_rate": 4.57983341280473e-06, "loss": 0.4361, "step": 3109 }, { "epoch": 1.7221965851407477, "grad_norm": 0.32152679562568665, "learning_rate": 4.576621433169059e-06, "loss": 0.4284, "step": 3110 }, { "epoch": 1.7227503461006, "grad_norm": 0.31217896938323975, "learning_rate": 4.573409629502232e-06, "loss": 0.449, "step": 3111 }, { "epoch": 1.7233041070604522, "grad_norm": 0.3316410481929779, "learning_rate": 4.570198003139169e-06, "loss": 0.4253, "step": 3112 }, { "epoch": 1.7238578680203047, "grad_norm": 0.3200621008872986, "learning_rate": 4.566986555414718e-06, "loss": 0.4398, "step": 3113 }, { "epoch": 1.7244116289801568, "grad_norm": 0.33757784962654114, "learning_rate": 4.563775287663653e-06, "loss": 0.4315, "step": 3114 }, { "epoch": 1.7249653899400093, "grad_norm": 0.3030734360218048, "learning_rate": 4.560564201220672e-06, "loss": 0.3807, "step": 3115 }, { "epoch": 1.7255191508998615, "grad_norm": 0.31801554560661316, "learning_rate": 4.557353297420403e-06, "loss": 0.4596, "step": 3116 }, { "epoch": 1.7260729118597138, "grad_norm": 0.3350652754306793, "learning_rate": 4.554142577597391e-06, "loss": 0.4433, "step": 3117 }, { "epoch": 1.7266266728195663, "grad_norm": 0.3168596923351288, "learning_rate": 4.550932043086108e-06, "loss": 0.4102, "step": 3118 }, { "epoch": 1.7271804337794185, "grad_norm": 0.3127128481864929, "learning_rate": 4.54772169522095e-06, "loss": 0.3732, "step": 3119 }, { "epoch": 1.7277341947392708, "grad_norm": 0.3138090670108795, "learning_rate": 4.544511535336231e-06, "loss": 0.4332, "step": 3120 }, { "epoch": 1.7282879556991233, "grad_norm": 0.30883097648620605, "learning_rate": 4.5413015647661915e-06, "loss": 0.4424, "step": 3121 }, { "epoch": 1.7288417166589756, "grad_norm": 0.3229777216911316, "learning_rate": 4.538091784844992e-06, "loss": 0.4426, "step": 3122 }, { "epoch": 1.7293954776188278, "grad_norm": 0.34419146180152893, "learning_rate": 4.534882196906714e-06, "loss": 0.422, "step": 3123 }, { "epoch": 1.7299492385786803, "grad_norm": 0.311109334230423, "learning_rate": 4.5316728022853565e-06, "loss": 0.4191, "step": 3124 }, { "epoch": 1.7305029995385324, "grad_norm": 0.2904793918132782, "learning_rate": 4.528463602314839e-06, "loss": 0.3974, "step": 3125 }, { "epoch": 1.7310567604983849, "grad_norm": 0.3009130656719208, "learning_rate": 4.525254598329007e-06, "loss": 0.394, "step": 3126 }, { "epoch": 1.7316105214582374, "grad_norm": 0.34047234058380127, "learning_rate": 4.522045791661616e-06, "loss": 0.4602, "step": 3127 }, { "epoch": 1.7321642824180894, "grad_norm": 0.3086244761943817, "learning_rate": 4.51883718364634e-06, "loss": 0.4212, "step": 3128 }, { "epoch": 1.7327180433779419, "grad_norm": 0.2954614460468292, "learning_rate": 4.515628775616774e-06, "loss": 0.4836, "step": 3129 }, { "epoch": 1.7332718043377942, "grad_norm": 0.272122859954834, "learning_rate": 4.512420568906431e-06, "loss": 0.4102, "step": 3130 }, { "epoch": 1.7338255652976464, "grad_norm": 0.2849988043308258, "learning_rate": 4.5092125648487365e-06, "loss": 0.4367, "step": 3131 }, { "epoch": 1.734379326257499, "grad_norm": 0.3198147714138031, "learning_rate": 4.506004764777033e-06, "loss": 0.4083, "step": 3132 }, { "epoch": 1.7349330872173512, "grad_norm": 0.2743470370769501, "learning_rate": 4.502797170024578e-06, "loss": 0.417, "step": 3133 }, { "epoch": 1.7354868481772034, "grad_norm": 0.3452639579772949, "learning_rate": 4.499589781924545e-06, "loss": 0.4742, "step": 3134 }, { "epoch": 1.736040609137056, "grad_norm": 0.3156910836696625, "learning_rate": 4.4963826018100216e-06, "loss": 0.4182, "step": 3135 }, { "epoch": 1.7365943700969082, "grad_norm": 0.2986757457256317, "learning_rate": 4.493175631014008e-06, "loss": 0.4691, "step": 3136 }, { "epoch": 1.7371481310567605, "grad_norm": 0.2834433913230896, "learning_rate": 4.489968870869416e-06, "loss": 0.4211, "step": 3137 }, { "epoch": 1.737701892016613, "grad_norm": 0.31177422404289246, "learning_rate": 4.486762322709071e-06, "loss": 0.4105, "step": 3138 }, { "epoch": 1.738255652976465, "grad_norm": 0.30344703793525696, "learning_rate": 4.483555987865711e-06, "loss": 0.4468, "step": 3139 }, { "epoch": 1.7388094139363175, "grad_norm": 0.2925468981266022, "learning_rate": 4.480349867671989e-06, "loss": 0.4154, "step": 3140 }, { "epoch": 1.73936317489617, "grad_norm": 0.2891515791416168, "learning_rate": 4.47714396346046e-06, "loss": 0.4427, "step": 3141 }, { "epoch": 1.739916935856022, "grad_norm": 0.2927708327770233, "learning_rate": 4.473938276563595e-06, "loss": 0.4307, "step": 3142 }, { "epoch": 1.7404706968158745, "grad_norm": 0.29625415802001953, "learning_rate": 4.470732808313777e-06, "loss": 0.4235, "step": 3143 }, { "epoch": 1.7410244577757268, "grad_norm": 0.3005526661872864, "learning_rate": 4.467527560043288e-06, "loss": 0.45, "step": 3144 }, { "epoch": 1.741578218735579, "grad_norm": 0.3388163149356842, "learning_rate": 4.464322533084332e-06, "loss": 0.4463, "step": 3145 }, { "epoch": 1.7421319796954315, "grad_norm": 0.2909385561943054, "learning_rate": 4.461117728769013e-06, "loss": 0.4144, "step": 3146 }, { "epoch": 1.7426857406552838, "grad_norm": 0.34408095479011536, "learning_rate": 4.4579131484293415e-06, "loss": 0.4591, "step": 3147 }, { "epoch": 1.743239501615136, "grad_norm": 0.2988058924674988, "learning_rate": 4.45470879339724e-06, "loss": 0.4303, "step": 3148 }, { "epoch": 1.7437932625749886, "grad_norm": 0.3028274476528168, "learning_rate": 4.4515046650045316e-06, "loss": 0.3891, "step": 3149 }, { "epoch": 1.7443470235348408, "grad_norm": 0.349435031414032, "learning_rate": 4.44830076458295e-06, "loss": 0.4426, "step": 3150 }, { "epoch": 1.744900784494693, "grad_norm": 0.2941223978996277, "learning_rate": 4.445097093464133e-06, "loss": 0.4357, "step": 3151 }, { "epoch": 1.7454545454545456, "grad_norm": 0.31436046957969666, "learning_rate": 4.441893652979623e-06, "loss": 0.3867, "step": 3152 }, { "epoch": 1.7460083064143976, "grad_norm": 0.28375059366226196, "learning_rate": 4.438690444460861e-06, "loss": 0.4123, "step": 3153 }, { "epoch": 1.7465620673742501, "grad_norm": 0.3170319199562073, "learning_rate": 4.435487469239205e-06, "loss": 0.4567, "step": 3154 }, { "epoch": 1.7471158283341024, "grad_norm": 0.27762943506240845, "learning_rate": 4.432284728645901e-06, "loss": 0.3854, "step": 3155 }, { "epoch": 1.7476695892939547, "grad_norm": 0.27726078033447266, "learning_rate": 4.429082224012109e-06, "loss": 0.3717, "step": 3156 }, { "epoch": 1.7482233502538072, "grad_norm": 0.2992873191833496, "learning_rate": 4.425879956668881e-06, "loss": 0.4374, "step": 3157 }, { "epoch": 1.7487771112136594, "grad_norm": 0.2993599772453308, "learning_rate": 4.422677927947177e-06, "loss": 0.408, "step": 3158 }, { "epoch": 1.7493308721735117, "grad_norm": 0.2951045632362366, "learning_rate": 4.419476139177857e-06, "loss": 0.4297, "step": 3159 }, { "epoch": 1.7498846331333642, "grad_norm": 0.2887507975101471, "learning_rate": 4.416274591691681e-06, "loss": 0.4368, "step": 3160 }, { "epoch": 1.7504383940932164, "grad_norm": 0.31316298246383667, "learning_rate": 4.4130732868193104e-06, "loss": 0.4563, "step": 3161 }, { "epoch": 1.7509921550530687, "grad_norm": 0.2813262641429901, "learning_rate": 4.409872225891299e-06, "loss": 0.3921, "step": 3162 }, { "epoch": 1.7515459160129212, "grad_norm": 0.32862237095832825, "learning_rate": 4.406671410238105e-06, "loss": 0.428, "step": 3163 }, { "epoch": 1.7520996769727735, "grad_norm": 0.32919079065322876, "learning_rate": 4.403470841190085e-06, "loss": 0.439, "step": 3164 }, { "epoch": 1.7526534379326257, "grad_norm": 0.3306533694267273, "learning_rate": 4.400270520077492e-06, "loss": 0.4316, "step": 3165 }, { "epoch": 1.7532071988924782, "grad_norm": 0.2684718370437622, "learning_rate": 4.397070448230474e-06, "loss": 0.3896, "step": 3166 }, { "epoch": 1.7537609598523303, "grad_norm": 0.3214348256587982, "learning_rate": 4.393870626979077e-06, "loss": 0.4624, "step": 3167 }, { "epoch": 1.7543147208121828, "grad_norm": 0.3101145327091217, "learning_rate": 4.390671057653242e-06, "loss": 0.4213, "step": 3168 }, { "epoch": 1.754868481772035, "grad_norm": 0.2989545166492462, "learning_rate": 4.387471741582809e-06, "loss": 0.4108, "step": 3169 }, { "epoch": 1.7554222427318873, "grad_norm": 0.306134968996048, "learning_rate": 4.3842726800975076e-06, "loss": 0.4445, "step": 3170 }, { "epoch": 1.7559760036917398, "grad_norm": 0.3281269073486328, "learning_rate": 4.381073874526964e-06, "loss": 0.468, "step": 3171 }, { "epoch": 1.756529764651592, "grad_norm": 0.29547300934791565, "learning_rate": 4.3778753262006965e-06, "loss": 0.4297, "step": 3172 }, { "epoch": 1.7570835256114443, "grad_norm": 0.3187280297279358, "learning_rate": 4.37467703644812e-06, "loss": 0.4628, "step": 3173 }, { "epoch": 1.7576372865712968, "grad_norm": 0.2796596884727478, "learning_rate": 4.371479006598541e-06, "loss": 0.3822, "step": 3174 }, { "epoch": 1.758191047531149, "grad_norm": 0.30091729760169983, "learning_rate": 4.368281237981154e-06, "loss": 0.4397, "step": 3175 }, { "epoch": 1.7587448084910013, "grad_norm": 0.3095286786556244, "learning_rate": 4.3650837319250475e-06, "loss": 0.4137, "step": 3176 }, { "epoch": 1.7592985694508538, "grad_norm": 0.29491445422172546, "learning_rate": 4.361886489759201e-06, "loss": 0.4237, "step": 3177 }, { "epoch": 1.759852330410706, "grad_norm": 0.30001387000083923, "learning_rate": 4.358689512812487e-06, "loss": 0.4045, "step": 3178 }, { "epoch": 1.7604060913705584, "grad_norm": 0.3576946556568146, "learning_rate": 4.355492802413663e-06, "loss": 0.4249, "step": 3179 }, { "epoch": 1.7609598523304109, "grad_norm": 0.3145162761211395, "learning_rate": 4.352296359891378e-06, "loss": 0.4597, "step": 3180 }, { "epoch": 1.761513613290263, "grad_norm": 0.3096579611301422, "learning_rate": 4.34910018657417e-06, "loss": 0.417, "step": 3181 }, { "epoch": 1.7620673742501154, "grad_norm": 0.3209769129753113, "learning_rate": 4.345904283790461e-06, "loss": 0.4289, "step": 3182 }, { "epoch": 1.7626211352099677, "grad_norm": 0.30739104747772217, "learning_rate": 4.342708652868572e-06, "loss": 0.396, "step": 3183 }, { "epoch": 1.76317489616982, "grad_norm": 0.3153156638145447, "learning_rate": 4.339513295136695e-06, "loss": 0.4461, "step": 3184 }, { "epoch": 1.7637286571296724, "grad_norm": 0.30041399598121643, "learning_rate": 4.336318211922923e-06, "loss": 0.4247, "step": 3185 }, { "epoch": 1.7642824180895247, "grad_norm": 0.2896289825439453, "learning_rate": 4.333123404555223e-06, "loss": 0.4407, "step": 3186 }, { "epoch": 1.764836179049377, "grad_norm": 0.29623156785964966, "learning_rate": 4.329928874361455e-06, "loss": 0.4397, "step": 3187 }, { "epoch": 1.7653899400092294, "grad_norm": 0.30080074071884155, "learning_rate": 4.326734622669364e-06, "loss": 0.4379, "step": 3188 }, { "epoch": 1.7659437009690817, "grad_norm": 0.28613394498825073, "learning_rate": 4.323540650806574e-06, "loss": 0.4222, "step": 3189 }, { "epoch": 1.766497461928934, "grad_norm": 0.2912857234477997, "learning_rate": 4.320346960100597e-06, "loss": 0.4312, "step": 3190 }, { "epoch": 1.7670512228887865, "grad_norm": 0.30576545000076294, "learning_rate": 4.317153551878825e-06, "loss": 0.4355, "step": 3191 }, { "epoch": 1.7676049838486385, "grad_norm": 0.29748204350471497, "learning_rate": 4.313960427468536e-06, "loss": 0.4109, "step": 3192 }, { "epoch": 1.768158744808491, "grad_norm": 0.33463940024375916, "learning_rate": 4.310767588196888e-06, "loss": 0.4523, "step": 3193 }, { "epoch": 1.7687125057683435, "grad_norm": 0.2982579469680786, "learning_rate": 4.307575035390922e-06, "loss": 0.4192, "step": 3194 }, { "epoch": 1.7692662667281955, "grad_norm": 0.26928678154945374, "learning_rate": 4.304382770377556e-06, "loss": 0.4059, "step": 3195 }, { "epoch": 1.769820027688048, "grad_norm": 0.3426336348056793, "learning_rate": 4.301190794483593e-06, "loss": 0.4564, "step": 3196 }, { "epoch": 1.7703737886479003, "grad_norm": 0.31572285294532776, "learning_rate": 4.297999109035713e-06, "loss": 0.3956, "step": 3197 }, { "epoch": 1.7709275496077526, "grad_norm": 0.34202340245246887, "learning_rate": 4.294807715360479e-06, "loss": 0.4404, "step": 3198 }, { "epoch": 1.771481310567605, "grad_norm": 0.33026161789894104, "learning_rate": 4.291616614784327e-06, "loss": 0.409, "step": 3199 }, { "epoch": 1.7720350715274573, "grad_norm": 0.30646657943725586, "learning_rate": 4.2884258086335755e-06, "loss": 0.4266, "step": 3200 }, { "epoch": 1.7725888324873096, "grad_norm": 0.3460366427898407, "learning_rate": 4.285235298234417e-06, "loss": 0.4385, "step": 3201 }, { "epoch": 1.773142593447162, "grad_norm": 0.2988540828227997, "learning_rate": 4.282045084912927e-06, "loss": 0.4426, "step": 3202 }, { "epoch": 1.7736963544070143, "grad_norm": 0.309998482465744, "learning_rate": 4.278855169995055e-06, "loss": 0.4142, "step": 3203 }, { "epoch": 1.7742501153668666, "grad_norm": 0.3098202347755432, "learning_rate": 4.275665554806619e-06, "loss": 0.402, "step": 3204 }, { "epoch": 1.774803876326719, "grad_norm": 0.2932251989841461, "learning_rate": 4.272476240673324e-06, "loss": 0.4341, "step": 3205 }, { "epoch": 1.7753576372865711, "grad_norm": 0.29061537981033325, "learning_rate": 4.269287228920742e-06, "loss": 0.4057, "step": 3206 }, { "epoch": 1.7759113982464236, "grad_norm": 0.33174240589141846, "learning_rate": 4.2660985208743255e-06, "loss": 0.4419, "step": 3207 }, { "epoch": 1.7764651592062761, "grad_norm": 0.30789878964424133, "learning_rate": 4.262910117859393e-06, "loss": 0.4336, "step": 3208 }, { "epoch": 1.7770189201661282, "grad_norm": 0.316359281539917, "learning_rate": 4.259722021201144e-06, "loss": 0.4258, "step": 3209 }, { "epoch": 1.7775726811259807, "grad_norm": 0.2800091505050659, "learning_rate": 4.256534232224646e-06, "loss": 0.3565, "step": 3210 }, { "epoch": 1.778126442085833, "grad_norm": 0.31576377153396606, "learning_rate": 4.253346752254837e-06, "loss": 0.4457, "step": 3211 }, { "epoch": 1.7786802030456852, "grad_norm": 0.3069250285625458, "learning_rate": 4.250159582616535e-06, "loss": 0.449, "step": 3212 }, { "epoch": 1.7792339640055377, "grad_norm": 0.28639549016952515, "learning_rate": 4.246972724634419e-06, "loss": 0.4105, "step": 3213 }, { "epoch": 1.77978772496539, "grad_norm": 0.316984623670578, "learning_rate": 4.243786179633045e-06, "loss": 0.454, "step": 3214 }, { "epoch": 1.7803414859252422, "grad_norm": 0.34374621510505676, "learning_rate": 4.240599948936834e-06, "loss": 0.4362, "step": 3215 }, { "epoch": 1.7808952468850947, "grad_norm": 0.28485366702079773, "learning_rate": 4.237414033870082e-06, "loss": 0.4096, "step": 3216 }, { "epoch": 1.781449007844947, "grad_norm": 0.3291647136211395, "learning_rate": 4.23422843575695e-06, "loss": 0.4165, "step": 3217 }, { "epoch": 1.7820027688047992, "grad_norm": 0.3472009301185608, "learning_rate": 4.231043155921468e-06, "loss": 0.4168, "step": 3218 }, { "epoch": 1.7825565297646517, "grad_norm": 0.3003288507461548, "learning_rate": 4.227858195687536e-06, "loss": 0.4295, "step": 3219 }, { "epoch": 1.7831102907245038, "grad_norm": 0.2703339755535126, "learning_rate": 4.224673556378914e-06, "loss": 0.4075, "step": 3220 }, { "epoch": 1.7836640516843563, "grad_norm": 0.315410315990448, "learning_rate": 4.221489239319239e-06, "loss": 0.4217, "step": 3221 }, { "epoch": 1.7842178126442085, "grad_norm": 0.3310101330280304, "learning_rate": 4.218305245832006e-06, "loss": 0.4689, "step": 3222 }, { "epoch": 1.7847715736040608, "grad_norm": 0.3088635802268982, "learning_rate": 4.21512157724058e-06, "loss": 0.4356, "step": 3223 }, { "epoch": 1.7853253345639133, "grad_norm": 0.3337719142436981, "learning_rate": 4.211938234868187e-06, "loss": 0.4635, "step": 3224 }, { "epoch": 1.7858790955237656, "grad_norm": 0.3393329083919525, "learning_rate": 4.208755220037919e-06, "loss": 0.4247, "step": 3225 }, { "epoch": 1.7864328564836178, "grad_norm": 0.3158133625984192, "learning_rate": 4.205572534072737e-06, "loss": 0.4461, "step": 3226 }, { "epoch": 1.7869866174434703, "grad_norm": 0.2904072105884552, "learning_rate": 4.2023901782954565e-06, "loss": 0.4033, "step": 3227 }, { "epoch": 1.7875403784033226, "grad_norm": 0.3231183588504791, "learning_rate": 4.1992081540287645e-06, "loss": 0.4618, "step": 3228 }, { "epoch": 1.7880941393631749, "grad_norm": 0.30805879831314087, "learning_rate": 4.1960264625952005e-06, "loss": 0.3996, "step": 3229 }, { "epoch": 1.7886479003230273, "grad_norm": 0.34352123737335205, "learning_rate": 4.1928451053171735e-06, "loss": 0.4376, "step": 3230 }, { "epoch": 1.7892016612828796, "grad_norm": 0.2834811210632324, "learning_rate": 4.189664083516952e-06, "loss": 0.4541, "step": 3231 }, { "epoch": 1.7897554222427319, "grad_norm": 0.2754083275794983, "learning_rate": 4.1864833985166645e-06, "loss": 0.4147, "step": 3232 }, { "epoch": 1.7903091832025844, "grad_norm": 0.3336610794067383, "learning_rate": 4.1833030516382965e-06, "loss": 0.453, "step": 3233 }, { "epoch": 1.7908629441624364, "grad_norm": 0.2834082543849945, "learning_rate": 4.180123044203696e-06, "loss": 0.4006, "step": 3234 }, { "epoch": 1.791416705122289, "grad_norm": 0.27375954389572144, "learning_rate": 4.176943377534572e-06, "loss": 0.4068, "step": 3235 }, { "epoch": 1.7919704660821412, "grad_norm": 0.32246819138526917, "learning_rate": 4.173764052952488e-06, "loss": 0.4505, "step": 3236 }, { "epoch": 1.7925242270419934, "grad_norm": 0.31806349754333496, "learning_rate": 4.170585071778866e-06, "loss": 0.4165, "step": 3237 }, { "epoch": 1.793077988001846, "grad_norm": 0.28582319617271423, "learning_rate": 4.167406435334988e-06, "loss": 0.4783, "step": 3238 }, { "epoch": 1.7936317489616982, "grad_norm": 0.2943061590194702, "learning_rate": 4.164228144941987e-06, "loss": 0.4142, "step": 3239 }, { "epoch": 1.7941855099215505, "grad_norm": 0.3173786401748657, "learning_rate": 4.161050201920861e-06, "loss": 0.4369, "step": 3240 }, { "epoch": 1.794739270881403, "grad_norm": 0.3226882815361023, "learning_rate": 4.157872607592456e-06, "loss": 0.4111, "step": 3241 }, { "epoch": 1.7952930318412552, "grad_norm": 0.28212830424308777, "learning_rate": 4.154695363277475e-06, "loss": 0.4132, "step": 3242 }, { "epoch": 1.7958467928011075, "grad_norm": 0.3208777606487274, "learning_rate": 4.151518470296476e-06, "loss": 0.44, "step": 3243 }, { "epoch": 1.79640055376096, "grad_norm": 0.31172168254852295, "learning_rate": 4.148341929969871e-06, "loss": 0.4263, "step": 3244 }, { "epoch": 1.796954314720812, "grad_norm": 0.30285489559173584, "learning_rate": 4.145165743617929e-06, "loss": 0.4832, "step": 3245 }, { "epoch": 1.7975080756806645, "grad_norm": 0.3138018548488617, "learning_rate": 4.141989912560764e-06, "loss": 0.4276, "step": 3246 }, { "epoch": 1.798061836640517, "grad_norm": 0.3127243220806122, "learning_rate": 4.1388144381183484e-06, "loss": 0.4565, "step": 3247 }, { "epoch": 1.798615597600369, "grad_norm": 0.299184113740921, "learning_rate": 4.135639321610505e-06, "loss": 0.4261, "step": 3248 }, { "epoch": 1.7991693585602215, "grad_norm": 0.30018219351768494, "learning_rate": 4.132464564356906e-06, "loss": 0.4193, "step": 3249 }, { "epoch": 1.7997231195200738, "grad_norm": 0.30533191561698914, "learning_rate": 4.129290167677078e-06, "loss": 0.4227, "step": 3250 }, { "epoch": 1.800276880479926, "grad_norm": 0.3007320165634155, "learning_rate": 4.126116132890394e-06, "loss": 0.4568, "step": 3251 }, { "epoch": 1.8008306414397786, "grad_norm": 0.332731693983078, "learning_rate": 4.122942461316081e-06, "loss": 0.4188, "step": 3252 }, { "epoch": 1.8013844023996308, "grad_norm": 0.2968289256095886, "learning_rate": 4.1197691542732076e-06, "loss": 0.423, "step": 3253 }, { "epoch": 1.801938163359483, "grad_norm": 0.2715300917625427, "learning_rate": 4.116596213080697e-06, "loss": 0.3947, "step": 3254 }, { "epoch": 1.8024919243193356, "grad_norm": 0.3036361634731293, "learning_rate": 4.1134236390573214e-06, "loss": 0.4048, "step": 3255 }, { "epoch": 1.8030456852791878, "grad_norm": 0.33239293098449707, "learning_rate": 4.110251433521695e-06, "loss": 0.4396, "step": 3256 }, { "epoch": 1.8035994462390401, "grad_norm": 0.2836073040962219, "learning_rate": 4.107079597792285e-06, "loss": 0.3818, "step": 3257 }, { "epoch": 1.8041532071988926, "grad_norm": 0.3265187740325928, "learning_rate": 4.103908133187395e-06, "loss": 0.4467, "step": 3258 }, { "epoch": 1.8047069681587447, "grad_norm": 0.2883315980434418, "learning_rate": 4.100737041025188e-06, "loss": 0.4196, "step": 3259 }, { "epoch": 1.8052607291185971, "grad_norm": 0.29624640941619873, "learning_rate": 4.097566322623661e-06, "loss": 0.4627, "step": 3260 }, { "epoch": 1.8058144900784496, "grad_norm": 0.2798279821872711, "learning_rate": 4.094395979300663e-06, "loss": 0.4181, "step": 3261 }, { "epoch": 1.8063682510383017, "grad_norm": 0.3093916177749634, "learning_rate": 4.09122601237388e-06, "loss": 0.3992, "step": 3262 }, { "epoch": 1.8069220119981542, "grad_norm": 0.28666451573371887, "learning_rate": 4.088056423160846e-06, "loss": 0.3981, "step": 3263 }, { "epoch": 1.8074757729580064, "grad_norm": 0.28431978821754456, "learning_rate": 4.08488721297894e-06, "loss": 0.4315, "step": 3264 }, { "epoch": 1.8080295339178587, "grad_norm": 0.2824808955192566, "learning_rate": 4.081718383145381e-06, "loss": 0.4031, "step": 3265 }, { "epoch": 1.8085832948777112, "grad_norm": 0.3519880771636963, "learning_rate": 4.078549934977227e-06, "loss": 0.4167, "step": 3266 }, { "epoch": 1.8091370558375635, "grad_norm": 0.3189856708049774, "learning_rate": 4.075381869791381e-06, "loss": 0.4223, "step": 3267 }, { "epoch": 1.8096908167974157, "grad_norm": 0.31234806776046753, "learning_rate": 4.072214188904585e-06, "loss": 0.4087, "step": 3268 }, { "epoch": 1.8102445777572682, "grad_norm": 0.3271569013595581, "learning_rate": 4.069046893633426e-06, "loss": 0.4644, "step": 3269 }, { "epoch": 1.8107983387171205, "grad_norm": 0.3105159103870392, "learning_rate": 4.065879985294325e-06, "loss": 0.3635, "step": 3270 }, { "epoch": 1.8113520996769727, "grad_norm": 0.3598727583885193, "learning_rate": 4.062713465203544e-06, "loss": 0.4641, "step": 3271 }, { "epoch": 1.8119058606368252, "grad_norm": 0.33211931586265564, "learning_rate": 4.059547334677183e-06, "loss": 0.4406, "step": 3272 }, { "epoch": 1.8124596215966773, "grad_norm": 0.34353864192962646, "learning_rate": 4.05638159503118e-06, "loss": 0.4257, "step": 3273 }, { "epoch": 1.8130133825565298, "grad_norm": 0.2960684597492218, "learning_rate": 4.053216247581317e-06, "loss": 0.4393, "step": 3274 }, { "epoch": 1.813567143516382, "grad_norm": 0.31106698513031006, "learning_rate": 4.0500512936432e-06, "loss": 0.4262, "step": 3275 }, { "epoch": 1.8141209044762343, "grad_norm": 0.32311102747917175, "learning_rate": 4.046886734532284e-06, "loss": 0.4262, "step": 3276 }, { "epoch": 1.8146746654360868, "grad_norm": 0.2775634825229645, "learning_rate": 4.0437225715638535e-06, "loss": 0.4174, "step": 3277 }, { "epoch": 1.815228426395939, "grad_norm": 0.30161407589912415, "learning_rate": 4.040558806053026e-06, "loss": 0.4233, "step": 3278 }, { "epoch": 1.8157821873557913, "grad_norm": 0.345079243183136, "learning_rate": 4.037395439314764e-06, "loss": 0.4307, "step": 3279 }, { "epoch": 1.8163359483156438, "grad_norm": 0.2986268699169159, "learning_rate": 4.034232472663853e-06, "loss": 0.399, "step": 3280 }, { "epoch": 1.816889709275496, "grad_norm": 0.2912280261516571, "learning_rate": 4.0310699074149195e-06, "loss": 0.4244, "step": 3281 }, { "epoch": 1.8174434702353484, "grad_norm": 0.30122503638267517, "learning_rate": 4.027907744882417e-06, "loss": 0.4074, "step": 3282 }, { "epoch": 1.8179972311952008, "grad_norm": 0.31441208720207214, "learning_rate": 4.024745986380639e-06, "loss": 0.4386, "step": 3283 }, { "epoch": 1.8185509921550531, "grad_norm": 0.3086138963699341, "learning_rate": 4.021584633223706e-06, "loss": 0.4326, "step": 3284 }, { "epoch": 1.8191047531149054, "grad_norm": 0.30359163880348206, "learning_rate": 4.01842368672557e-06, "loss": 0.4139, "step": 3285 }, { "epoch": 1.8196585140747579, "grad_norm": 0.26910117268562317, "learning_rate": 4.0152631482000176e-06, "loss": 0.4151, "step": 3286 }, { "epoch": 1.82021227503461, "grad_norm": 0.30445459485054016, "learning_rate": 4.0121030189606605e-06, "loss": 0.4164, "step": 3287 }, { "epoch": 1.8207660359944624, "grad_norm": 0.3002162277698517, "learning_rate": 4.008943300320945e-06, "loss": 0.442, "step": 3288 }, { "epoch": 1.8213197969543147, "grad_norm": 0.2875029444694519, "learning_rate": 4.005783993594146e-06, "loss": 0.388, "step": 3289 }, { "epoch": 1.821873557914167, "grad_norm": 0.32958370447158813, "learning_rate": 4.0026251000933655e-06, "loss": 0.4732, "step": 3290 }, { "epoch": 1.8224273188740194, "grad_norm": 0.2747132480144501, "learning_rate": 3.999466621131533e-06, "loss": 0.4135, "step": 3291 }, { "epoch": 1.8229810798338717, "grad_norm": 0.28566375374794006, "learning_rate": 3.996308558021406e-06, "loss": 0.4055, "step": 3292 }, { "epoch": 1.823534840793724, "grad_norm": 0.3097899258136749, "learning_rate": 3.9931509120755765e-06, "loss": 0.4453, "step": 3293 }, { "epoch": 1.8240886017535765, "grad_norm": 0.3028116524219513, "learning_rate": 3.989993684606452e-06, "loss": 0.4475, "step": 3294 }, { "epoch": 1.8246423627134287, "grad_norm": 0.32450881600379944, "learning_rate": 3.986836876926271e-06, "loss": 0.4458, "step": 3295 }, { "epoch": 1.825196123673281, "grad_norm": 0.31317538022994995, "learning_rate": 3.9836804903470995e-06, "loss": 0.4157, "step": 3296 }, { "epoch": 1.8257498846331335, "grad_norm": 0.29174304008483887, "learning_rate": 3.980524526180824e-06, "loss": 0.4338, "step": 3297 }, { "epoch": 1.8263036455929857, "grad_norm": 0.3137667775154114, "learning_rate": 3.9773689857391615e-06, "loss": 0.4212, "step": 3298 }, { "epoch": 1.826857406552838, "grad_norm": 0.27514877915382385, "learning_rate": 3.974213870333648e-06, "loss": 0.4058, "step": 3299 }, { "epoch": 1.8274111675126905, "grad_norm": 0.30560067296028137, "learning_rate": 3.971059181275642e-06, "loss": 0.4494, "step": 3300 }, { "epoch": 1.8279649284725425, "grad_norm": 0.3173049986362457, "learning_rate": 3.967904919876328e-06, "loss": 0.4132, "step": 3301 }, { "epoch": 1.828518689432395, "grad_norm": 0.3006884455680847, "learning_rate": 3.964751087446713e-06, "loss": 0.4499, "step": 3302 }, { "epoch": 1.8290724503922473, "grad_norm": 0.2728024423122406, "learning_rate": 3.961597685297625e-06, "loss": 0.403, "step": 3303 }, { "epoch": 1.8296262113520996, "grad_norm": 0.28599584102630615, "learning_rate": 3.958444714739711e-06, "loss": 0.3946, "step": 3304 }, { "epoch": 1.830179972311952, "grad_norm": 0.3240286707878113, "learning_rate": 3.955292177083439e-06, "loss": 0.4355, "step": 3305 }, { "epoch": 1.8307337332718043, "grad_norm": 0.27809569239616394, "learning_rate": 3.952140073639099e-06, "loss": 0.3837, "step": 3306 }, { "epoch": 1.8312874942316566, "grad_norm": 0.2697128355503082, "learning_rate": 3.9489884057168025e-06, "loss": 0.4186, "step": 3307 }, { "epoch": 1.831841255191509, "grad_norm": 0.31446364521980286, "learning_rate": 3.9458371746264765e-06, "loss": 0.4558, "step": 3308 }, { "epoch": 1.8323950161513614, "grad_norm": 0.27213671803474426, "learning_rate": 3.942686381677864e-06, "loss": 0.4231, "step": 3309 }, { "epoch": 1.8329487771112136, "grad_norm": 0.27902257442474365, "learning_rate": 3.939536028180533e-06, "loss": 0.434, "step": 3310 }, { "epoch": 1.8335025380710661, "grad_norm": 0.3206426203250885, "learning_rate": 3.93638611544386e-06, "loss": 0.4524, "step": 3311 }, { "epoch": 1.8340562990309182, "grad_norm": 0.2730531096458435, "learning_rate": 3.9332366447770505e-06, "loss": 0.4292, "step": 3312 }, { "epoch": 1.8346100599907706, "grad_norm": 0.2919239401817322, "learning_rate": 3.930087617489113e-06, "loss": 0.4062, "step": 3313 }, { "epoch": 1.8351638209506231, "grad_norm": 0.29984021186828613, "learning_rate": 3.926939034888881e-06, "loss": 0.4313, "step": 3314 }, { "epoch": 1.8357175819104752, "grad_norm": 0.30963653326034546, "learning_rate": 3.923790898284999e-06, "loss": 0.4067, "step": 3315 }, { "epoch": 1.8362713428703277, "grad_norm": 0.2988680899143219, "learning_rate": 3.920643208985925e-06, "loss": 0.4296, "step": 3316 }, { "epoch": 1.83682510383018, "grad_norm": 0.3138475716114044, "learning_rate": 3.917495968299936e-06, "loss": 0.4209, "step": 3317 }, { "epoch": 1.8373788647900322, "grad_norm": 0.31080400943756104, "learning_rate": 3.91434917753512e-06, "loss": 0.4289, "step": 3318 }, { "epoch": 1.8379326257498847, "grad_norm": 0.3169233202934265, "learning_rate": 3.911202837999376e-06, "loss": 0.4209, "step": 3319 }, { "epoch": 1.838486386709737, "grad_norm": 0.3572251498699188, "learning_rate": 3.908056951000418e-06, "loss": 0.4368, "step": 3320 }, { "epoch": 1.8390401476695892, "grad_norm": 0.2696188986301422, "learning_rate": 3.904911517845769e-06, "loss": 0.4101, "step": 3321 }, { "epoch": 1.8395939086294417, "grad_norm": 0.29039767384529114, "learning_rate": 3.901766539842768e-06, "loss": 0.4549, "step": 3322 }, { "epoch": 1.840147669589294, "grad_norm": 0.3376428782939911, "learning_rate": 3.898622018298562e-06, "loss": 0.4355, "step": 3323 }, { "epoch": 1.8407014305491463, "grad_norm": 0.31436020135879517, "learning_rate": 3.895477954520108e-06, "loss": 0.4363, "step": 3324 }, { "epoch": 1.8412551915089987, "grad_norm": 0.3235335648059845, "learning_rate": 3.892334349814172e-06, "loss": 0.4879, "step": 3325 }, { "epoch": 1.8418089524688508, "grad_norm": 0.30004674196243286, "learning_rate": 3.889191205487333e-06, "loss": 0.4123, "step": 3326 }, { "epoch": 1.8423627134287033, "grad_norm": 0.3231438100337982, "learning_rate": 3.886048522845974e-06, "loss": 0.4313, "step": 3327 }, { "epoch": 1.8429164743885558, "grad_norm": 0.29877007007598877, "learning_rate": 3.882906303196291e-06, "loss": 0.4047, "step": 3328 }, { "epoch": 1.8434702353484078, "grad_norm": 0.3383420705795288, "learning_rate": 3.879764547844282e-06, "loss": 0.4483, "step": 3329 }, { "epoch": 1.8440239963082603, "grad_norm": 0.2910620868206024, "learning_rate": 3.876623258095754e-06, "loss": 0.3899, "step": 3330 }, { "epoch": 1.8445777572681126, "grad_norm": 0.31341439485549927, "learning_rate": 3.873482435256325e-06, "loss": 0.4367, "step": 3331 }, { "epoch": 1.8451315182279648, "grad_norm": 0.3015069365501404, "learning_rate": 3.870342080631415e-06, "loss": 0.4163, "step": 3332 }, { "epoch": 1.8456852791878173, "grad_norm": 0.29551467299461365, "learning_rate": 3.867202195526246e-06, "loss": 0.4235, "step": 3333 }, { "epoch": 1.8462390401476696, "grad_norm": 0.30370935797691345, "learning_rate": 3.86406278124585e-06, "loss": 0.432, "step": 3334 }, { "epoch": 1.8467928011075219, "grad_norm": 0.28706204891204834, "learning_rate": 3.860923839095063e-06, "loss": 0.4167, "step": 3335 }, { "epoch": 1.8473465620673744, "grad_norm": 0.2904368042945862, "learning_rate": 3.857785370378523e-06, "loss": 0.4257, "step": 3336 }, { "epoch": 1.8479003230272266, "grad_norm": 0.2914562523365021, "learning_rate": 3.854647376400672e-06, "loss": 0.4177, "step": 3337 }, { "epoch": 1.8484540839870789, "grad_norm": 0.2945109009742737, "learning_rate": 3.851509858465755e-06, "loss": 0.3916, "step": 3338 }, { "epoch": 1.8490078449469314, "grad_norm": 0.29474860429763794, "learning_rate": 3.848372817877818e-06, "loss": 0.4662, "step": 3339 }, { "epoch": 1.8495616059067834, "grad_norm": 0.302403062582016, "learning_rate": 3.8452362559407054e-06, "loss": 0.4397, "step": 3340 }, { "epoch": 1.850115366866636, "grad_norm": 0.28388693928718567, "learning_rate": 3.842100173958075e-06, "loss": 0.4219, "step": 3341 }, { "epoch": 1.8506691278264882, "grad_norm": 0.3037259578704834, "learning_rate": 3.8389645732333696e-06, "loss": 0.4247, "step": 3342 }, { "epoch": 1.8512228887863404, "grad_norm": 0.3110923171043396, "learning_rate": 3.835829455069842e-06, "loss": 0.4083, "step": 3343 }, { "epoch": 1.851776649746193, "grad_norm": 0.3161934018135071, "learning_rate": 3.832694820770539e-06, "loss": 0.4337, "step": 3344 }, { "epoch": 1.8523304107060452, "grad_norm": 0.30323484539985657, "learning_rate": 3.829560671638313e-06, "loss": 0.4123, "step": 3345 }, { "epoch": 1.8528841716658975, "grad_norm": 0.3290657699108124, "learning_rate": 3.8264270089758066e-06, "loss": 0.4144, "step": 3346 }, { "epoch": 1.85343793262575, "grad_norm": 0.2789836823940277, "learning_rate": 3.823293834085465e-06, "loss": 0.4132, "step": 3347 }, { "epoch": 1.8539916935856022, "grad_norm": 0.3068215250968933, "learning_rate": 3.820161148269531e-06, "loss": 0.4188, "step": 3348 }, { "epoch": 1.8545454545454545, "grad_norm": 0.32076290249824524, "learning_rate": 3.81702895283004e-06, "loss": 0.4334, "step": 3349 }, { "epoch": 1.855099215505307, "grad_norm": 0.28980743885040283, "learning_rate": 3.813897249068831e-06, "loss": 0.3985, "step": 3350 }, { "epoch": 1.8556529764651593, "grad_norm": 0.3087499737739563, "learning_rate": 3.81076603828753e-06, "loss": 0.4439, "step": 3351 }, { "epoch": 1.8562067374250115, "grad_norm": 0.33149003982543945, "learning_rate": 3.807635321787564e-06, "loss": 0.4572, "step": 3352 }, { "epoch": 1.856760498384864, "grad_norm": 0.2988987863063812, "learning_rate": 3.804505100870153e-06, "loss": 0.4168, "step": 3353 }, { "epoch": 1.857314259344716, "grad_norm": 0.3277234733104706, "learning_rate": 3.8013753768363093e-06, "loss": 0.4015, "step": 3354 }, { "epoch": 1.8578680203045685, "grad_norm": 0.30677640438079834, "learning_rate": 3.798246150986841e-06, "loss": 0.4394, "step": 3355 }, { "epoch": 1.8584217812644208, "grad_norm": 0.299007385969162, "learning_rate": 3.795117424622349e-06, "loss": 0.4409, "step": 3356 }, { "epoch": 1.858975542224273, "grad_norm": 0.3122585713863373, "learning_rate": 3.791989199043226e-06, "loss": 0.4407, "step": 3357 }, { "epoch": 1.8595293031841256, "grad_norm": 0.2711135447025299, "learning_rate": 3.788861475549655e-06, "loss": 0.4105, "step": 3358 }, { "epoch": 1.8600830641439778, "grad_norm": 0.3266054093837738, "learning_rate": 3.7857342554416115e-06, "loss": 0.4347, "step": 3359 }, { "epoch": 1.86063682510383, "grad_norm": 0.31578701734542847, "learning_rate": 3.7826075400188637e-06, "loss": 0.4157, "step": 3360 }, { "epoch": 1.8611905860636826, "grad_norm": 0.3033410310745239, "learning_rate": 3.7794813305809695e-06, "loss": 0.4268, "step": 3361 }, { "epoch": 1.8617443470235349, "grad_norm": 0.2968435287475586, "learning_rate": 3.7763556284272717e-06, "loss": 0.4169, "step": 3362 }, { "epoch": 1.8622981079833871, "grad_norm": 0.2988579571247101, "learning_rate": 3.7732304348569087e-06, "loss": 0.427, "step": 3363 }, { "epoch": 1.8628518689432396, "grad_norm": 0.3080503046512604, "learning_rate": 3.7701057511688026e-06, "loss": 0.4141, "step": 3364 }, { "epoch": 1.8634056299030919, "grad_norm": 0.31789642572402954, "learning_rate": 3.7669815786616682e-06, "loss": 0.4594, "step": 3365 }, { "epoch": 1.8639593908629442, "grad_norm": 0.2762637436389923, "learning_rate": 3.7638579186340055e-06, "loss": 0.3815, "step": 3366 }, { "epoch": 1.8645131518227966, "grad_norm": 0.31104525923728943, "learning_rate": 3.760734772384099e-06, "loss": 0.4535, "step": 3367 }, { "epoch": 1.8650669127826487, "grad_norm": 0.2915252149105072, "learning_rate": 3.7576121412100224e-06, "loss": 0.4288, "step": 3368 }, { "epoch": 1.8656206737425012, "grad_norm": 0.30895400047302246, "learning_rate": 3.754490026409637e-06, "loss": 0.3865, "step": 3369 }, { "epoch": 1.8661744347023534, "grad_norm": 0.30798861384391785, "learning_rate": 3.7513684292805864e-06, "loss": 0.4107, "step": 3370 }, { "epoch": 1.8667281956622057, "grad_norm": 0.308783620595932, "learning_rate": 3.7482473511202993e-06, "loss": 0.4833, "step": 3371 }, { "epoch": 1.8672819566220582, "grad_norm": 0.29411837458610535, "learning_rate": 3.7451267932259905e-06, "loss": 0.3799, "step": 3372 }, { "epoch": 1.8678357175819105, "grad_norm": 0.3150050938129425, "learning_rate": 3.742006756894655e-06, "loss": 0.419, "step": 3373 }, { "epoch": 1.8683894785417627, "grad_norm": 0.2740013301372528, "learning_rate": 3.7388872434230784e-06, "loss": 0.4136, "step": 3374 }, { "epoch": 1.8689432395016152, "grad_norm": 0.27956798672676086, "learning_rate": 3.7357682541078206e-06, "loss": 0.4202, "step": 3375 }, { "epoch": 1.8694970004614675, "grad_norm": 0.32993167638778687, "learning_rate": 3.7326497902452275e-06, "loss": 0.4459, "step": 3376 }, { "epoch": 1.8700507614213198, "grad_norm": 0.2917972803115845, "learning_rate": 3.7295318531314285e-06, "loss": 0.461, "step": 3377 }, { "epoch": 1.8706045223811723, "grad_norm": 0.25127628445625305, "learning_rate": 3.7264144440623263e-06, "loss": 0.3327, "step": 3378 }, { "epoch": 1.8711582833410243, "grad_norm": 0.32228830456733704, "learning_rate": 3.7232975643336167e-06, "loss": 0.4245, "step": 3379 }, { "epoch": 1.8717120443008768, "grad_norm": 0.31149160861968994, "learning_rate": 3.7201812152407646e-06, "loss": 0.4559, "step": 3380 }, { "epoch": 1.8722658052607293, "grad_norm": 0.25578799843788147, "learning_rate": 3.7170653980790193e-06, "loss": 0.3951, "step": 3381 }, { "epoch": 1.8728195662205813, "grad_norm": 0.29075032472610474, "learning_rate": 3.713950114143408e-06, "loss": 0.4167, "step": 3382 }, { "epoch": 1.8733733271804338, "grad_norm": 0.31224215030670166, "learning_rate": 3.7108353647287343e-06, "loss": 0.441, "step": 3383 }, { "epoch": 1.873927088140286, "grad_norm": 0.2759512662887573, "learning_rate": 3.707721151129584e-06, "loss": 0.407, "step": 3384 }, { "epoch": 1.8744808491001383, "grad_norm": 0.2906224727630615, "learning_rate": 3.704607474640316e-06, "loss": 0.4134, "step": 3385 }, { "epoch": 1.8750346100599908, "grad_norm": 0.3337690234184265, "learning_rate": 3.7014943365550703e-06, "loss": 0.4724, "step": 3386 }, { "epoch": 1.875588371019843, "grad_norm": 0.3011837601661682, "learning_rate": 3.698381738167756e-06, "loss": 0.4758, "step": 3387 }, { "epoch": 1.8761421319796954, "grad_norm": 0.28206560015678406, "learning_rate": 3.6952696807720677e-06, "loss": 0.4144, "step": 3388 }, { "epoch": 1.8766958929395479, "grad_norm": 0.31197240948677063, "learning_rate": 3.692158165661467e-06, "loss": 0.4399, "step": 3389 }, { "epoch": 1.8772496538994001, "grad_norm": 0.2950364947319031, "learning_rate": 3.6890471941291946e-06, "loss": 0.4404, "step": 3390 }, { "epoch": 1.8778034148592524, "grad_norm": 0.2701135575771332, "learning_rate": 3.685936767468261e-06, "loss": 0.3627, "step": 3391 }, { "epoch": 1.8783571758191049, "grad_norm": 0.323590487241745, "learning_rate": 3.6828268869714532e-06, "loss": 0.4709, "step": 3392 }, { "epoch": 1.878910936778957, "grad_norm": 0.2836271822452545, "learning_rate": 3.6797175539313336e-06, "loss": 0.423, "step": 3393 }, { "epoch": 1.8794646977388094, "grad_norm": 0.2746359705924988, "learning_rate": 3.676608769640232e-06, "loss": 0.3998, "step": 3394 }, { "epoch": 1.880018458698662, "grad_norm": 0.30527374148368835, "learning_rate": 3.6735005353902548e-06, "loss": 0.4429, "step": 3395 }, { "epoch": 1.880572219658514, "grad_norm": 0.28421032428741455, "learning_rate": 3.6703928524732745e-06, "loss": 0.4294, "step": 3396 }, { "epoch": 1.8811259806183664, "grad_norm": 0.31643304228782654, "learning_rate": 3.667285722180938e-06, "loss": 0.4533, "step": 3397 }, { "epoch": 1.8816797415782187, "grad_norm": 0.3011121153831482, "learning_rate": 3.6641791458046644e-06, "loss": 0.393, "step": 3398 }, { "epoch": 1.882233502538071, "grad_norm": 0.2879936397075653, "learning_rate": 3.661073124635639e-06, "loss": 0.4037, "step": 3399 }, { "epoch": 1.8827872634979235, "grad_norm": 0.2859354615211487, "learning_rate": 3.6579676599648163e-06, "loss": 0.4223, "step": 3400 }, { "epoch": 1.8833410244577757, "grad_norm": 0.2944367229938507, "learning_rate": 3.6548627530829205e-06, "loss": 0.4134, "step": 3401 }, { "epoch": 1.883894785417628, "grad_norm": 0.3698466122150421, "learning_rate": 3.6517584052804443e-06, "loss": 0.4034, "step": 3402 }, { "epoch": 1.8844485463774805, "grad_norm": 0.3266870081424713, "learning_rate": 3.648654617847649e-06, "loss": 0.4415, "step": 3403 }, { "epoch": 1.8850023073373328, "grad_norm": 0.28361597657203674, "learning_rate": 3.645551392074563e-06, "loss": 0.4239, "step": 3404 }, { "epoch": 1.885556068297185, "grad_norm": 0.3206707537174225, "learning_rate": 3.642448729250977e-06, "loss": 0.4067, "step": 3405 }, { "epoch": 1.8861098292570375, "grad_norm": 0.3732202351093292, "learning_rate": 3.6393466306664527e-06, "loss": 0.4146, "step": 3406 }, { "epoch": 1.8866635902168896, "grad_norm": 0.3049001395702362, "learning_rate": 3.6362450976103127e-06, "loss": 0.4033, "step": 3407 }, { "epoch": 1.887217351176742, "grad_norm": 0.3150828778743744, "learning_rate": 3.6331441313716515e-06, "loss": 0.4637, "step": 3408 }, { "epoch": 1.8877711121365943, "grad_norm": 0.33154699206352234, "learning_rate": 3.6300437332393212e-06, "loss": 0.4442, "step": 3409 }, { "epoch": 1.8883248730964466, "grad_norm": 0.35509392619132996, "learning_rate": 3.6269439045019407e-06, "loss": 0.4222, "step": 3410 }, { "epoch": 1.888878634056299, "grad_norm": 0.30652251839637756, "learning_rate": 3.6238446464478906e-06, "loss": 0.4381, "step": 3411 }, { "epoch": 1.8894323950161513, "grad_norm": 0.27212393283843994, "learning_rate": 3.6207459603653187e-06, "loss": 0.406, "step": 3412 }, { "epoch": 1.8899861559760036, "grad_norm": 0.3122164011001587, "learning_rate": 3.6176478475421294e-06, "loss": 0.4241, "step": 3413 }, { "epoch": 1.890539916935856, "grad_norm": 0.33945024013519287, "learning_rate": 3.6145503092659926e-06, "loss": 0.4241, "step": 3414 }, { "epoch": 1.8910936778957084, "grad_norm": 0.3068271577358246, "learning_rate": 3.6114533468243384e-06, "loss": 0.4443, "step": 3415 }, { "epoch": 1.8916474388555606, "grad_norm": 0.27258795499801636, "learning_rate": 3.608356961504354e-06, "loss": 0.375, "step": 3416 }, { "epoch": 1.8922011998154131, "grad_norm": 0.3218095898628235, "learning_rate": 3.6052611545929963e-06, "loss": 0.4242, "step": 3417 }, { "epoch": 1.8927549607752654, "grad_norm": 0.27056634426116943, "learning_rate": 3.60216592737697e-06, "loss": 0.4036, "step": 3418 }, { "epoch": 1.8933087217351177, "grad_norm": 0.30672675371170044, "learning_rate": 3.5990712811427482e-06, "loss": 0.4586, "step": 3419 }, { "epoch": 1.8938624826949702, "grad_norm": 0.2952890396118164, "learning_rate": 3.595977217176556e-06, "loss": 0.4307, "step": 3420 }, { "epoch": 1.8944162436548222, "grad_norm": 0.2868746519088745, "learning_rate": 3.5928837367643793e-06, "loss": 0.4262, "step": 3421 }, { "epoch": 1.8949700046146747, "grad_norm": 0.2950320541858673, "learning_rate": 3.5897908411919635e-06, "loss": 0.4277, "step": 3422 }, { "epoch": 1.895523765574527, "grad_norm": 0.32183149456977844, "learning_rate": 3.5866985317448077e-06, "loss": 0.4363, "step": 3423 }, { "epoch": 1.8960775265343792, "grad_norm": 0.28905507922172546, "learning_rate": 3.58360680970817e-06, "loss": 0.4266, "step": 3424 }, { "epoch": 1.8966312874942317, "grad_norm": 0.3042190968990326, "learning_rate": 3.5805156763670613e-06, "loss": 0.4289, "step": 3425 }, { "epoch": 1.897185048454084, "grad_norm": 0.27945476770401, "learning_rate": 3.5774251330062476e-06, "loss": 0.4068, "step": 3426 }, { "epoch": 1.8977388094139362, "grad_norm": 0.3017016053199768, "learning_rate": 3.574335180910255e-06, "loss": 0.4458, "step": 3427 }, { "epoch": 1.8982925703737887, "grad_norm": 0.31489473581314087, "learning_rate": 3.57124582136336e-06, "loss": 0.4721, "step": 3428 }, { "epoch": 1.898846331333641, "grad_norm": 0.26782113313674927, "learning_rate": 3.568157055649591e-06, "loss": 0.385, "step": 3429 }, { "epoch": 1.8994000922934933, "grad_norm": 0.2936322093009949, "learning_rate": 3.5650688850527317e-06, "loss": 0.4112, "step": 3430 }, { "epoch": 1.8999538532533458, "grad_norm": 0.33944016695022583, "learning_rate": 3.561981310856321e-06, "loss": 0.4453, "step": 3431 }, { "epoch": 1.9005076142131978, "grad_norm": 0.298913836479187, "learning_rate": 3.5588943343436463e-06, "loss": 0.4283, "step": 3432 }, { "epoch": 1.9010613751730503, "grad_norm": 0.299864798784256, "learning_rate": 3.555807956797748e-06, "loss": 0.4197, "step": 3433 }, { "epoch": 1.9016151361329028, "grad_norm": 0.2833140194416046, "learning_rate": 3.552722179501416e-06, "loss": 0.4061, "step": 3434 }, { "epoch": 1.9021688970927548, "grad_norm": 0.30785033106803894, "learning_rate": 3.5496370037371918e-06, "loss": 0.4019, "step": 3435 }, { "epoch": 1.9027226580526073, "grad_norm": 0.3078601658344269, "learning_rate": 3.546552430787369e-06, "loss": 0.4324, "step": 3436 }, { "epoch": 1.9032764190124596, "grad_norm": 0.3068637251853943, "learning_rate": 3.543468461933989e-06, "loss": 0.4403, "step": 3437 }, { "epoch": 1.9038301799723119, "grad_norm": 0.3201625645160675, "learning_rate": 3.540385098458839e-06, "loss": 0.453, "step": 3438 }, { "epoch": 1.9043839409321643, "grad_norm": 0.2788097858428955, "learning_rate": 3.5373023416434595e-06, "loss": 0.3811, "step": 3439 }, { "epoch": 1.9049377018920166, "grad_norm": 0.32006365060806274, "learning_rate": 3.5342201927691342e-06, "loss": 0.4265, "step": 3440 }, { "epoch": 1.9054914628518689, "grad_norm": 0.28935351967811584, "learning_rate": 3.5311386531169023e-06, "loss": 0.4352, "step": 3441 }, { "epoch": 1.9060452238117214, "grad_norm": 0.3084854781627655, "learning_rate": 3.528057723967539e-06, "loss": 0.4333, "step": 3442 }, { "epoch": 1.9065989847715736, "grad_norm": 0.28457972407341003, "learning_rate": 3.5249774066015722e-06, "loss": 0.4113, "step": 3443 }, { "epoch": 1.907152745731426, "grad_norm": 0.31899407505989075, "learning_rate": 3.5218977022992763e-06, "loss": 0.4292, "step": 3444 }, { "epoch": 1.9077065066912784, "grad_norm": 0.2765054702758789, "learning_rate": 3.5188186123406638e-06, "loss": 0.4228, "step": 3445 }, { "epoch": 1.9082602676511304, "grad_norm": 0.27864888310432434, "learning_rate": 3.5157401380055034e-06, "loss": 0.4168, "step": 3446 }, { "epoch": 1.908814028610983, "grad_norm": 0.2851966917514801, "learning_rate": 3.5126622805732975e-06, "loss": 0.418, "step": 3447 }, { "epoch": 1.9093677895708354, "grad_norm": 0.26850947737693787, "learning_rate": 3.509585041323297e-06, "loss": 0.3956, "step": 3448 }, { "epoch": 1.9099215505306875, "grad_norm": 0.29039400815963745, "learning_rate": 3.5065084215344953e-06, "loss": 0.4336, "step": 3449 }, { "epoch": 1.91047531149054, "grad_norm": 0.30111342668533325, "learning_rate": 3.503432422485626e-06, "loss": 0.4015, "step": 3450 }, { "epoch": 1.9110290724503922, "grad_norm": 0.3146003186702728, "learning_rate": 3.5003570454551687e-06, "loss": 0.4229, "step": 3451 }, { "epoch": 1.9115828334102445, "grad_norm": 0.29623687267303467, "learning_rate": 3.4972822917213423e-06, "loss": 0.4164, "step": 3452 }, { "epoch": 1.912136594370097, "grad_norm": 0.27985477447509766, "learning_rate": 3.4942081625621073e-06, "loss": 0.4377, "step": 3453 }, { "epoch": 1.9126903553299492, "grad_norm": 0.31314849853515625, "learning_rate": 3.49113465925516e-06, "loss": 0.425, "step": 3454 }, { "epoch": 1.9132441162898015, "grad_norm": 0.30255961418151855, "learning_rate": 3.488061783077946e-06, "loss": 0.3804, "step": 3455 }, { "epoch": 1.913797877249654, "grad_norm": 0.29979127645492554, "learning_rate": 3.4849895353076423e-06, "loss": 0.4494, "step": 3456 }, { "epoch": 1.9143516382095063, "grad_norm": 0.2810087203979492, "learning_rate": 3.481917917221168e-06, "loss": 0.4314, "step": 3457 }, { "epoch": 1.9149053991693585, "grad_norm": 0.2650015950202942, "learning_rate": 3.478846930095179e-06, "loss": 0.3882, "step": 3458 }, { "epoch": 1.915459160129211, "grad_norm": 0.3217345178127289, "learning_rate": 3.4757765752060692e-06, "loss": 0.4497, "step": 3459 }, { "epoch": 1.916012921089063, "grad_norm": 0.27983787655830383, "learning_rate": 3.472706853829971e-06, "loss": 0.4389, "step": 3460 }, { "epoch": 1.9165666820489156, "grad_norm": 0.30085331201553345, "learning_rate": 3.469637767242754e-06, "loss": 0.4462, "step": 3461 }, { "epoch": 1.9171204430087678, "grad_norm": 0.29509371519088745, "learning_rate": 3.466569316720022e-06, "loss": 0.4438, "step": 3462 }, { "epoch": 1.91767420396862, "grad_norm": 0.28278830647468567, "learning_rate": 3.4635015035371146e-06, "loss": 0.4248, "step": 3463 }, { "epoch": 1.9182279649284726, "grad_norm": 0.7157668471336365, "learning_rate": 3.4604343289691056e-06, "loss": 0.424, "step": 3464 }, { "epoch": 1.9187817258883249, "grad_norm": 0.2908251881599426, "learning_rate": 3.457367794290808e-06, "loss": 0.4133, "step": 3465 }, { "epoch": 1.9193354868481771, "grad_norm": 0.29650387167930603, "learning_rate": 3.454301900776764e-06, "loss": 0.4189, "step": 3466 }, { "epoch": 1.9198892478080296, "grad_norm": 0.3069692850112915, "learning_rate": 3.4512366497012517e-06, "loss": 0.4093, "step": 3467 }, { "epoch": 1.9204430087678819, "grad_norm": 0.2947542071342468, "learning_rate": 3.44817204233828e-06, "loss": 0.4297, "step": 3468 }, { "epoch": 1.9209967697277341, "grad_norm": 0.29407942295074463, "learning_rate": 3.4451080799615916e-06, "loss": 0.4583, "step": 3469 }, { "epoch": 1.9215505306875866, "grad_norm": 0.30211174488067627, "learning_rate": 3.4420447638446643e-06, "loss": 0.4186, "step": 3470 }, { "epoch": 1.922104291647439, "grad_norm": 0.30548736453056335, "learning_rate": 3.438982095260701e-06, "loss": 0.4271, "step": 3471 }, { "epoch": 1.9226580526072912, "grad_norm": 0.32161983847618103, "learning_rate": 3.4359200754826403e-06, "loss": 0.4308, "step": 3472 }, { "epoch": 1.9232118135671437, "grad_norm": 0.2777984142303467, "learning_rate": 3.4328587057831474e-06, "loss": 0.4058, "step": 3473 }, { "epoch": 1.9237655745269957, "grad_norm": 0.2806267738342285, "learning_rate": 3.4297979874346234e-06, "loss": 0.4295, "step": 3474 }, { "epoch": 1.9243193354868482, "grad_norm": 0.2933661639690399, "learning_rate": 3.4267379217091923e-06, "loss": 0.4253, "step": 3475 }, { "epoch": 1.9248730964467005, "grad_norm": 0.30830687284469604, "learning_rate": 3.4236785098787097e-06, "loss": 0.4897, "step": 3476 }, { "epoch": 1.9254268574065527, "grad_norm": 0.2805060148239136, "learning_rate": 3.420619753214758e-06, "loss": 0.3931, "step": 3477 }, { "epoch": 1.9259806183664052, "grad_norm": 0.2920530140399933, "learning_rate": 3.417561652988648e-06, "loss": 0.4352, "step": 3478 }, { "epoch": 1.9265343793262575, "grad_norm": 0.28925591707229614, "learning_rate": 3.414504210471421e-06, "loss": 0.4317, "step": 3479 }, { "epoch": 1.9270881402861098, "grad_norm": 0.27578040957450867, "learning_rate": 3.411447426933839e-06, "loss": 0.4232, "step": 3480 }, { "epoch": 1.9276419012459622, "grad_norm": 0.2946160137653351, "learning_rate": 3.4083913036463935e-06, "loss": 0.4119, "step": 3481 }, { "epoch": 1.9281956622058145, "grad_norm": 0.3097228705883026, "learning_rate": 3.405335841879303e-06, "loss": 0.4367, "step": 3482 }, { "epoch": 1.9287494231656668, "grad_norm": 0.305948942899704, "learning_rate": 3.4022810429025044e-06, "loss": 0.4435, "step": 3483 }, { "epoch": 1.9293031841255193, "grad_norm": 0.31220707297325134, "learning_rate": 3.3992269079856705e-06, "loss": 0.413, "step": 3484 }, { "epoch": 1.9298569450853715, "grad_norm": 0.29419994354248047, "learning_rate": 3.396173438398187e-06, "loss": 0.443, "step": 3485 }, { "epoch": 1.9304107060452238, "grad_norm": 0.29246559739112854, "learning_rate": 3.3931206354091696e-06, "loss": 0.4327, "step": 3486 }, { "epoch": 1.9309644670050763, "grad_norm": 0.27442631125450134, "learning_rate": 3.390068500287452e-06, "loss": 0.4223, "step": 3487 }, { "epoch": 1.9315182279649283, "grad_norm": 0.299121230840683, "learning_rate": 3.387017034301595e-06, "loss": 0.414, "step": 3488 }, { "epoch": 1.9320719889247808, "grad_norm": 0.2906515598297119, "learning_rate": 3.38396623871988e-06, "loss": 0.4115, "step": 3489 }, { "epoch": 1.932625749884633, "grad_norm": 0.2934666872024536, "learning_rate": 3.380916114810309e-06, "loss": 0.4091, "step": 3490 }, { "epoch": 1.9331795108444854, "grad_norm": 0.29018449783325195, "learning_rate": 3.3778666638406054e-06, "loss": 0.4426, "step": 3491 }, { "epoch": 1.9337332718043378, "grad_norm": 0.2830198109149933, "learning_rate": 3.3748178870782113e-06, "loss": 0.4223, "step": 3492 }, { "epoch": 1.9342870327641901, "grad_norm": 0.27757421135902405, "learning_rate": 3.3717697857902886e-06, "loss": 0.3892, "step": 3493 }, { "epoch": 1.9348407937240424, "grad_norm": 0.30862948298454285, "learning_rate": 3.3687223612437237e-06, "loss": 0.4391, "step": 3494 }, { "epoch": 1.9353945546838949, "grad_norm": 0.3015842139720917, "learning_rate": 3.365675614705116e-06, "loss": 0.4365, "step": 3495 }, { "epoch": 1.9359483156437471, "grad_norm": 0.29976606369018555, "learning_rate": 3.362629547440784e-06, "loss": 0.4008, "step": 3496 }, { "epoch": 1.9365020766035994, "grad_norm": 0.2821332514286041, "learning_rate": 3.3595841607167632e-06, "loss": 0.4516, "step": 3497 }, { "epoch": 1.937055837563452, "grad_norm": 0.28689467906951904, "learning_rate": 3.356539455798812e-06, "loss": 0.4067, "step": 3498 }, { "epoch": 1.937609598523304, "grad_norm": 0.29594480991363525, "learning_rate": 3.3534954339523983e-06, "loss": 0.4053, "step": 3499 }, { "epoch": 1.9381633594831564, "grad_norm": 0.2954941689968109, "learning_rate": 3.3504520964427088e-06, "loss": 0.4133, "step": 3500 }, { "epoch": 1.938717120443009, "grad_norm": 0.3371991515159607, "learning_rate": 3.347409444534647e-06, "loss": 0.4204, "step": 3501 }, { "epoch": 1.939270881402861, "grad_norm": 0.3236100673675537, "learning_rate": 3.344367479492829e-06, "loss": 0.4192, "step": 3502 }, { "epoch": 1.9398246423627135, "grad_norm": 0.31139710545539856, "learning_rate": 3.3413262025815894e-06, "loss": 0.4326, "step": 3503 }, { "epoch": 1.9403784033225657, "grad_norm": 0.3160155117511749, "learning_rate": 3.338285615064973e-06, "loss": 0.4247, "step": 3504 }, { "epoch": 1.940932164282418, "grad_norm": 0.29419901967048645, "learning_rate": 3.3352457182067378e-06, "loss": 0.4164, "step": 3505 }, { "epoch": 1.9414859252422705, "grad_norm": 0.33856475353240967, "learning_rate": 3.3322065132703575e-06, "loss": 0.4345, "step": 3506 }, { "epoch": 1.9420396862021228, "grad_norm": 0.3141358494758606, "learning_rate": 3.329168001519015e-06, "loss": 0.3958, "step": 3507 }, { "epoch": 1.942593447161975, "grad_norm": 0.3366664946079254, "learning_rate": 3.3261301842156106e-06, "loss": 0.4525, "step": 3508 }, { "epoch": 1.9431472081218275, "grad_norm": 0.3189690411090851, "learning_rate": 3.3230930626227497e-06, "loss": 0.4098, "step": 3509 }, { "epoch": 1.9437009690816798, "grad_norm": 0.29476091265678406, "learning_rate": 3.3200566380027522e-06, "loss": 0.3846, "step": 3510 }, { "epoch": 1.944254730041532, "grad_norm": 0.315979540348053, "learning_rate": 3.317020911617647e-06, "loss": 0.435, "step": 3511 }, { "epoch": 1.9448084910013845, "grad_norm": 0.3421128988265991, "learning_rate": 3.3139858847291704e-06, "loss": 0.4318, "step": 3512 }, { "epoch": 1.9453622519612366, "grad_norm": 0.3226802349090576, "learning_rate": 3.3109515585987773e-06, "loss": 0.4251, "step": 3513 }, { "epoch": 1.945916012921089, "grad_norm": 0.32171908020973206, "learning_rate": 3.307917934487619e-06, "loss": 0.4034, "step": 3514 }, { "epoch": 1.9464697738809416, "grad_norm": 0.30785509943962097, "learning_rate": 3.304885013656564e-06, "loss": 0.397, "step": 3515 }, { "epoch": 1.9470235348407936, "grad_norm": 0.31972795724868774, "learning_rate": 3.3018527973661807e-06, "loss": 0.4483, "step": 3516 }, { "epoch": 1.947577295800646, "grad_norm": 0.29962459206581116, "learning_rate": 3.2988212868767553e-06, "loss": 0.4342, "step": 3517 }, { "epoch": 1.9481310567604984, "grad_norm": 0.2874276041984558, "learning_rate": 3.2957904834482708e-06, "loss": 0.3943, "step": 3518 }, { "epoch": 1.9486848177203506, "grad_norm": 0.3093646168708801, "learning_rate": 3.2927603883404204e-06, "loss": 0.4075, "step": 3519 }, { "epoch": 1.9492385786802031, "grad_norm": 0.30476781725883484, "learning_rate": 3.2897310028126045e-06, "loss": 0.4599, "step": 3520 }, { "epoch": 1.9497923396400554, "grad_norm": 0.28528374433517456, "learning_rate": 3.286702328123923e-06, "loss": 0.3877, "step": 3521 }, { "epoch": 1.9503461005999077, "grad_norm": 0.30252256989479065, "learning_rate": 3.283674365533187e-06, "loss": 0.4414, "step": 3522 }, { "epoch": 1.9508998615597601, "grad_norm": 0.3405768573284149, "learning_rate": 3.2806471162989084e-06, "loss": 0.438, "step": 3523 }, { "epoch": 1.9514536225196124, "grad_norm": 0.3042523264884949, "learning_rate": 3.2776205816793037e-06, "loss": 0.3956, "step": 3524 }, { "epoch": 1.9520073834794647, "grad_norm": 0.2614644765853882, "learning_rate": 3.2745947629322887e-06, "loss": 0.4162, "step": 3525 }, { "epoch": 1.9525611444393172, "grad_norm": 0.32723528146743774, "learning_rate": 3.271569661315485e-06, "loss": 0.4488, "step": 3526 }, { "epoch": 1.9531149053991692, "grad_norm": 0.2944730818271637, "learning_rate": 3.268545278086218e-06, "loss": 0.3878, "step": 3527 }, { "epoch": 1.9536686663590217, "grad_norm": 0.3313755989074707, "learning_rate": 3.2655216145015106e-06, "loss": 0.4613, "step": 3528 }, { "epoch": 1.954222427318874, "grad_norm": 0.3043105900287628, "learning_rate": 3.2624986718180895e-06, "loss": 0.4603, "step": 3529 }, { "epoch": 1.9547761882787262, "grad_norm": 0.30113890767097473, "learning_rate": 3.259476451292378e-06, "loss": 0.4114, "step": 3530 }, { "epoch": 1.9553299492385787, "grad_norm": 0.3168541491031647, "learning_rate": 3.2564549541805022e-06, "loss": 0.4446, "step": 3531 }, { "epoch": 1.955883710198431, "grad_norm": 0.2883016765117645, "learning_rate": 3.2534341817382886e-06, "loss": 0.4286, "step": 3532 }, { "epoch": 1.9564374711582833, "grad_norm": 0.3087553083896637, "learning_rate": 3.2504141352212615e-06, "loss": 0.4045, "step": 3533 }, { "epoch": 1.9569912321181357, "grad_norm": 0.33026495575904846, "learning_rate": 3.2473948158846403e-06, "loss": 0.4481, "step": 3534 }, { "epoch": 1.957544993077988, "grad_norm": 0.30902525782585144, "learning_rate": 3.2443762249833455e-06, "loss": 0.4608, "step": 3535 }, { "epoch": 1.9580987540378403, "grad_norm": 0.28757068514823914, "learning_rate": 3.241358363771994e-06, "loss": 0.4416, "step": 3536 }, { "epoch": 1.9586525149976928, "grad_norm": 0.2674272954463959, "learning_rate": 3.2383412335049023e-06, "loss": 0.4042, "step": 3537 }, { "epoch": 1.959206275957545, "grad_norm": 0.33981457352638245, "learning_rate": 3.235324835436077e-06, "loss": 0.4382, "step": 3538 }, { "epoch": 1.9597600369173973, "grad_norm": 0.337677538394928, "learning_rate": 3.2323091708192244e-06, "loss": 0.414, "step": 3539 }, { "epoch": 1.9603137978772498, "grad_norm": 0.30294328927993774, "learning_rate": 3.2292942409077445e-06, "loss": 0.4125, "step": 3540 }, { "epoch": 1.9608675588371018, "grad_norm": 0.2942797541618347, "learning_rate": 3.226280046954735e-06, "loss": 0.4129, "step": 3541 }, { "epoch": 1.9614213197969543, "grad_norm": 0.29490169882774353, "learning_rate": 3.2232665902129854e-06, "loss": 0.4249, "step": 3542 }, { "epoch": 1.9619750807568066, "grad_norm": 0.3174092769622803, "learning_rate": 3.220253871934976e-06, "loss": 0.41, "step": 3543 }, { "epoch": 1.9625288417166589, "grad_norm": 0.3152987062931061, "learning_rate": 3.217241893372886e-06, "loss": 0.3902, "step": 3544 }, { "epoch": 1.9630826026765114, "grad_norm": 0.31210243701934814, "learning_rate": 3.2142306557785796e-06, "loss": 0.4417, "step": 3545 }, { "epoch": 1.9636363636363636, "grad_norm": 0.29048311710357666, "learning_rate": 3.211220160403623e-06, "loss": 0.4431, "step": 3546 }, { "epoch": 1.964190124596216, "grad_norm": 0.3044293224811554, "learning_rate": 3.2082104084992655e-06, "loss": 0.4399, "step": 3547 }, { "epoch": 1.9647438855560684, "grad_norm": 0.3165484368801117, "learning_rate": 3.2052014013164512e-06, "loss": 0.4226, "step": 3548 }, { "epoch": 1.9652976465159206, "grad_norm": 0.3197067677974701, "learning_rate": 3.2021931401058136e-06, "loss": 0.4455, "step": 3549 }, { "epoch": 1.965851407475773, "grad_norm": 0.34667232632637024, "learning_rate": 3.1991856261176744e-06, "loss": 0.4126, "step": 3550 }, { "epoch": 1.9664051684356254, "grad_norm": 0.33034488558769226, "learning_rate": 3.19617886060205e-06, "loss": 0.4635, "step": 3551 }, { "epoch": 1.9669589293954777, "grad_norm": 0.3181135356426239, "learning_rate": 3.1931728448086407e-06, "loss": 0.4234, "step": 3552 }, { "epoch": 1.96751269035533, "grad_norm": 0.3233545124530792, "learning_rate": 3.190167579986838e-06, "loss": 0.4546, "step": 3553 }, { "epoch": 1.9680664513151824, "grad_norm": 0.3003171384334564, "learning_rate": 3.187163067385719e-06, "loss": 0.3783, "step": 3554 }, { "epoch": 1.9686202122750345, "grad_norm": 0.2966054379940033, "learning_rate": 3.1841593082540478e-06, "loss": 0.464, "step": 3555 }, { "epoch": 1.969173973234887, "grad_norm": 0.3044261038303375, "learning_rate": 3.1811563038402797e-06, "loss": 0.4054, "step": 3556 }, { "epoch": 1.9697277341947392, "grad_norm": 0.3077813982963562, "learning_rate": 3.1781540553925526e-06, "loss": 0.4254, "step": 3557 }, { "epoch": 1.9702814951545915, "grad_norm": 0.30152979493141174, "learning_rate": 3.175152564158693e-06, "loss": 0.4738, "step": 3558 }, { "epoch": 1.970835256114444, "grad_norm": 0.29054245352745056, "learning_rate": 3.1721518313862053e-06, "loss": 0.4169, "step": 3559 }, { "epoch": 1.9713890170742963, "grad_norm": 0.3013758361339569, "learning_rate": 3.169151858322289e-06, "loss": 0.4248, "step": 3560 }, { "epoch": 1.9719427780341485, "grad_norm": 0.2842105031013489, "learning_rate": 3.166152646213822e-06, "loss": 0.4256, "step": 3561 }, { "epoch": 1.972496538994001, "grad_norm": 0.28116005659103394, "learning_rate": 3.1631541963073674e-06, "loss": 0.4114, "step": 3562 }, { "epoch": 1.9730502999538533, "grad_norm": 0.2973566949367523, "learning_rate": 3.1601565098491682e-06, "loss": 0.4137, "step": 3563 }, { "epoch": 1.9736040609137055, "grad_norm": 0.3046891391277313, "learning_rate": 3.1571595880851547e-06, "loss": 0.4345, "step": 3564 }, { "epoch": 1.974157821873558, "grad_norm": 0.28899919986724854, "learning_rate": 3.1541634322609384e-06, "loss": 0.4109, "step": 3565 }, { "epoch": 1.97471158283341, "grad_norm": 0.2911128103733063, "learning_rate": 3.1511680436218105e-06, "loss": 0.3902, "step": 3566 }, { "epoch": 1.9752653437932626, "grad_norm": 0.3096137046813965, "learning_rate": 3.148173423412745e-06, "loss": 0.4889, "step": 3567 }, { "epoch": 1.975819104753115, "grad_norm": 0.30558115243911743, "learning_rate": 3.145179572878395e-06, "loss": 0.4013, "step": 3568 }, { "epoch": 1.976372865712967, "grad_norm": 0.28164494037628174, "learning_rate": 3.1421864932630946e-06, "loss": 0.409, "step": 3569 }, { "epoch": 1.9769266266728196, "grad_norm": 0.3337146043777466, "learning_rate": 3.1391941858108586e-06, "loss": 0.4495, "step": 3570 }, { "epoch": 1.9774803876326719, "grad_norm": 0.29436755180358887, "learning_rate": 3.1362026517653816e-06, "loss": 0.4243, "step": 3571 }, { "epoch": 1.9780341485925241, "grad_norm": 0.26728105545043945, "learning_rate": 3.1332118923700304e-06, "loss": 0.4157, "step": 3572 }, { "epoch": 1.9785879095523766, "grad_norm": 0.2908436059951782, "learning_rate": 3.130221908867859e-06, "loss": 0.4096, "step": 3573 }, { "epoch": 1.979141670512229, "grad_norm": 0.28761598467826843, "learning_rate": 3.1272327025015885e-06, "loss": 0.4175, "step": 3574 }, { "epoch": 1.9796954314720812, "grad_norm": 0.2963384985923767, "learning_rate": 3.1242442745136293e-06, "loss": 0.445, "step": 3575 }, { "epoch": 1.9802491924319336, "grad_norm": 0.2798655331134796, "learning_rate": 3.121256626146058e-06, "loss": 0.3884, "step": 3576 }, { "epoch": 1.980802953391786, "grad_norm": 0.2995126247406006, "learning_rate": 3.118269758640632e-06, "loss": 0.4266, "step": 3577 }, { "epoch": 1.9813567143516382, "grad_norm": 0.3104132413864136, "learning_rate": 3.1152836732387837e-06, "loss": 0.4124, "step": 3578 }, { "epoch": 1.9819104753114907, "grad_norm": 0.31565549969673157, "learning_rate": 3.112298371181617e-06, "loss": 0.428, "step": 3579 }, { "epoch": 1.9824642362713427, "grad_norm": 0.3060174584388733, "learning_rate": 3.109313853709916e-06, "loss": 0.389, "step": 3580 }, { "epoch": 1.9830179972311952, "grad_norm": 0.28016197681427, "learning_rate": 3.1063301220641352e-06, "loss": 0.4176, "step": 3581 }, { "epoch": 1.9835717581910477, "grad_norm": 0.3025878667831421, "learning_rate": 3.1033471774844037e-06, "loss": 0.4542, "step": 3582 }, { "epoch": 1.9841255191508997, "grad_norm": 0.30200865864753723, "learning_rate": 3.1003650212105187e-06, "loss": 0.4376, "step": 3583 }, { "epoch": 1.9846792801107522, "grad_norm": 0.28180795907974243, "learning_rate": 3.097383654481959e-06, "loss": 0.3601, "step": 3584 }, { "epoch": 1.9852330410706045, "grad_norm": 0.3176789879798889, "learning_rate": 3.094403078537868e-06, "loss": 0.4832, "step": 3585 }, { "epoch": 1.9857868020304568, "grad_norm": 0.27177560329437256, "learning_rate": 3.091423294617063e-06, "loss": 0.4021, "step": 3586 }, { "epoch": 1.9863405629903093, "grad_norm": 0.2873704135417938, "learning_rate": 3.0884443039580314e-06, "loss": 0.4321, "step": 3587 }, { "epoch": 1.9868943239501615, "grad_norm": 0.3105296492576599, "learning_rate": 3.0854661077989294e-06, "loss": 0.4712, "step": 3588 }, { "epoch": 1.9874480849100138, "grad_norm": 0.25738874077796936, "learning_rate": 3.0824887073775877e-06, "loss": 0.3946, "step": 3589 }, { "epoch": 1.9880018458698663, "grad_norm": 0.2861161231994629, "learning_rate": 3.079512103931501e-06, "loss": 0.4451, "step": 3590 }, { "epoch": 1.9885556068297185, "grad_norm": 0.3207224905490875, "learning_rate": 3.076536298697838e-06, "loss": 0.4374, "step": 3591 }, { "epoch": 1.9891093677895708, "grad_norm": 0.3133947551250458, "learning_rate": 3.0735612929134296e-06, "loss": 0.4414, "step": 3592 }, { "epoch": 1.9896631287494233, "grad_norm": 0.2730363607406616, "learning_rate": 3.070587087814777e-06, "loss": 0.4181, "step": 3593 }, { "epoch": 1.9902168897092753, "grad_norm": 0.28587111830711365, "learning_rate": 3.0676136846380523e-06, "loss": 0.4171, "step": 3594 }, { "epoch": 1.9907706506691278, "grad_norm": 0.280687153339386, "learning_rate": 3.06464108461909e-06, "loss": 0.3873, "step": 3595 }, { "epoch": 1.99132441162898, "grad_norm": 0.3475039005279541, "learning_rate": 3.0616692889933907e-06, "loss": 0.4699, "step": 3596 }, { "epoch": 1.9918781725888324, "grad_norm": 0.30603885650634766, "learning_rate": 3.058698298996122e-06, "loss": 0.4149, "step": 3597 }, { "epoch": 1.9924319335486849, "grad_norm": 0.27211371064186096, "learning_rate": 3.0557281158621167e-06, "loss": 0.3827, "step": 3598 }, { "epoch": 1.9929856945085371, "grad_norm": 0.2650807499885559, "learning_rate": 3.0527587408258737e-06, "loss": 0.4163, "step": 3599 }, { "epoch": 1.9935394554683894, "grad_norm": 0.331822007894516, "learning_rate": 3.0497901751215544e-06, "loss": 0.435, "step": 3600 }, { "epoch": 1.9940932164282419, "grad_norm": 0.3270885944366455, "learning_rate": 3.046822419982981e-06, "loss": 0.4595, "step": 3601 }, { "epoch": 1.9946469773880942, "grad_norm": 0.27623826265335083, "learning_rate": 3.043855476643642e-06, "loss": 0.421, "step": 3602 }, { "epoch": 1.9952007383479464, "grad_norm": 0.2713167071342468, "learning_rate": 3.040889346336691e-06, "loss": 0.4086, "step": 3603 }, { "epoch": 1.995754499307799, "grad_norm": 0.2947195768356323, "learning_rate": 3.0379240302949397e-06, "loss": 0.4412, "step": 3604 }, { "epoch": 1.9963082602676512, "grad_norm": 0.28442129492759705, "learning_rate": 3.0349595297508615e-06, "loss": 0.4222, "step": 3605 }, { "epoch": 1.9968620212275034, "grad_norm": 0.319937139749527, "learning_rate": 3.0319958459365923e-06, "loss": 0.4563, "step": 3606 }, { "epoch": 1.997415782187356, "grad_norm": 0.29300034046173096, "learning_rate": 3.029032980083927e-06, "loss": 0.3794, "step": 3607 }, { "epoch": 1.997969543147208, "grad_norm": 0.3214569687843323, "learning_rate": 3.0260709334243233e-06, "loss": 0.45, "step": 3608 }, { "epoch": 1.9985233041070605, "grad_norm": 0.31760433316230774, "learning_rate": 3.023109707188897e-06, "loss": 0.4543, "step": 3609 }, { "epoch": 1.9990770650669127, "grad_norm": 0.2794058918952942, "learning_rate": 3.0201493026084204e-06, "loss": 0.4115, "step": 3610 }, { "epoch": 1.999630826026765, "grad_norm": 0.31297415494918823, "learning_rate": 3.0171897209133294e-06, "loss": 0.4184, "step": 3611 }, { "epoch": 2.0001845869866175, "grad_norm": 0.33381083607673645, "learning_rate": 3.0142309633337103e-06, "loss": 0.4491, "step": 3612 }, { "epoch": 2.00073834794647, "grad_norm": 0.30622902512550354, "learning_rate": 3.011273031099317e-06, "loss": 0.4, "step": 3613 }, { "epoch": 2.001292108906322, "grad_norm": 0.28307023644447327, "learning_rate": 3.008315925439552e-06, "loss": 0.3953, "step": 3614 }, { "epoch": 2.0018458698661745, "grad_norm": 0.30677974224090576, "learning_rate": 3.0053596475834777e-06, "loss": 0.4118, "step": 3615 }, { "epoch": 2.0023996308260266, "grad_norm": 0.2919459044933319, "learning_rate": 3.0024041987598127e-06, "loss": 0.423, "step": 3616 }, { "epoch": 2.002953391785879, "grad_norm": 0.29270392656326294, "learning_rate": 2.9994495801969283e-06, "loss": 0.3899, "step": 3617 }, { "epoch": 2.0035071527457315, "grad_norm": 0.286924809217453, "learning_rate": 2.9964957931228544e-06, "loss": 0.3816, "step": 3618 }, { "epoch": 2.0040609137055836, "grad_norm": 0.3057766556739807, "learning_rate": 2.993542838765273e-06, "loss": 0.4018, "step": 3619 }, { "epoch": 2.004614674665436, "grad_norm": 0.2942327857017517, "learning_rate": 2.9905907183515214e-06, "loss": 0.4117, "step": 3620 }, { "epoch": 2.0051684356252886, "grad_norm": 0.29506585001945496, "learning_rate": 2.9876394331085886e-06, "loss": 0.4085, "step": 3621 }, { "epoch": 2.0057221965851406, "grad_norm": 0.2834751009941101, "learning_rate": 2.9846889842631154e-06, "loss": 0.3842, "step": 3622 }, { "epoch": 2.006275957544993, "grad_norm": 0.2962374687194824, "learning_rate": 2.9817393730414e-06, "loss": 0.4069, "step": 3623 }, { "epoch": 2.0068297185048456, "grad_norm": 0.27545493841171265, "learning_rate": 2.97879060066939e-06, "loss": 0.3562, "step": 3624 }, { "epoch": 2.0073834794646976, "grad_norm": 0.30450382828712463, "learning_rate": 2.975842668372681e-06, "loss": 0.4154, "step": 3625 }, { "epoch": 2.00793724042455, "grad_norm": 0.2941696345806122, "learning_rate": 2.972895577376522e-06, "loss": 0.4018, "step": 3626 }, { "epoch": 2.0084910013844026, "grad_norm": 0.30516061186790466, "learning_rate": 2.9699493289058144e-06, "loss": 0.3927, "step": 3627 }, { "epoch": 2.0090447623442547, "grad_norm": 0.3145807385444641, "learning_rate": 2.967003924185107e-06, "loss": 0.4415, "step": 3628 }, { "epoch": 2.009598523304107, "grad_norm": 0.2772063612937927, "learning_rate": 2.9640593644385997e-06, "loss": 0.3783, "step": 3629 }, { "epoch": 2.010152284263959, "grad_norm": 0.311174601316452, "learning_rate": 2.9611156508901373e-06, "loss": 0.4267, "step": 3630 }, { "epoch": 2.0107060452238117, "grad_norm": 0.2954865097999573, "learning_rate": 2.958172784763215e-06, "loss": 0.3794, "step": 3631 }, { "epoch": 2.011259806183664, "grad_norm": 0.2965220808982849, "learning_rate": 2.95523076728098e-06, "loss": 0.4018, "step": 3632 }, { "epoch": 2.0118135671435162, "grad_norm": 0.2780364453792572, "learning_rate": 2.9522895996662216e-06, "loss": 0.3868, "step": 3633 }, { "epoch": 2.0123673281033687, "grad_norm": 0.2936638295650482, "learning_rate": 2.949349283141375e-06, "loss": 0.3933, "step": 3634 }, { "epoch": 2.012921089063221, "grad_norm": 0.3034653663635254, "learning_rate": 2.9464098189285274e-06, "loss": 0.4159, "step": 3635 }, { "epoch": 2.0134748500230732, "grad_norm": 0.30195000767707825, "learning_rate": 2.9434712082494042e-06, "loss": 0.4143, "step": 3636 }, { "epoch": 2.0140286109829257, "grad_norm": 0.3107575476169586, "learning_rate": 2.9405334523253846e-06, "loss": 0.3721, "step": 3637 }, { "epoch": 2.0145823719427782, "grad_norm": 0.3050781786441803, "learning_rate": 2.9375965523774873e-06, "loss": 0.3952, "step": 3638 }, { "epoch": 2.0151361329026303, "grad_norm": 0.2845546305179596, "learning_rate": 2.9346605096263748e-06, "loss": 0.3877, "step": 3639 }, { "epoch": 2.0156898938624828, "grad_norm": 0.2980063259601593, "learning_rate": 2.931725325292355e-06, "loss": 0.3984, "step": 3640 }, { "epoch": 2.016243654822335, "grad_norm": 0.2662931978702545, "learning_rate": 2.928791000595376e-06, "loss": 0.3561, "step": 3641 }, { "epoch": 2.0167974157821873, "grad_norm": 0.2847183048725128, "learning_rate": 2.9258575367550366e-06, "loss": 0.4127, "step": 3642 }, { "epoch": 2.01735117674204, "grad_norm": 0.3080267906188965, "learning_rate": 2.9229249349905686e-06, "loss": 0.4183, "step": 3643 }, { "epoch": 2.017904937701892, "grad_norm": 0.2828117311000824, "learning_rate": 2.9199931965208506e-06, "loss": 0.4218, "step": 3644 }, { "epoch": 2.0184586986617443, "grad_norm": 0.29823037981987, "learning_rate": 2.9170623225643995e-06, "loss": 0.4146, "step": 3645 }, { "epoch": 2.019012459621597, "grad_norm": 0.31444039940834045, "learning_rate": 2.914132314339377e-06, "loss": 0.4345, "step": 3646 }, { "epoch": 2.019566220581449, "grad_norm": 0.3089647591114044, "learning_rate": 2.9112031730635814e-06, "loss": 0.3742, "step": 3647 }, { "epoch": 2.0201199815413013, "grad_norm": 0.2773577868938446, "learning_rate": 2.908274899954454e-06, "loss": 0.3738, "step": 3648 }, { "epoch": 2.020673742501154, "grad_norm": 0.2887389659881592, "learning_rate": 2.9053474962290685e-06, "loss": 0.3868, "step": 3649 }, { "epoch": 2.021227503461006, "grad_norm": 0.28660520911216736, "learning_rate": 2.902420963104142e-06, "loss": 0.4338, "step": 3650 }, { "epoch": 2.0217812644208584, "grad_norm": 0.2875312268733978, "learning_rate": 2.8994953017960337e-06, "loss": 0.391, "step": 3651 }, { "epoch": 2.022335025380711, "grad_norm": 0.25162473320961, "learning_rate": 2.896570513520736e-06, "loss": 0.368, "step": 3652 }, { "epoch": 2.022888786340563, "grad_norm": 0.28234627842903137, "learning_rate": 2.8936465994938747e-06, "loss": 0.4258, "step": 3653 }, { "epoch": 2.0234425473004154, "grad_norm": 0.2905428409576416, "learning_rate": 2.890723560930718e-06, "loss": 0.4145, "step": 3654 }, { "epoch": 2.0239963082602674, "grad_norm": 0.310023695230484, "learning_rate": 2.8878013990461673e-06, "loss": 0.4237, "step": 3655 }, { "epoch": 2.02455006922012, "grad_norm": 0.2912820279598236, "learning_rate": 2.8848801150547663e-06, "loss": 0.3966, "step": 3656 }, { "epoch": 2.0251038301799724, "grad_norm": 0.3003734052181244, "learning_rate": 2.881959710170682e-06, "loss": 0.3789, "step": 3657 }, { "epoch": 2.0256575911398245, "grad_norm": 0.3143835961818695, "learning_rate": 2.8790401856077244e-06, "loss": 0.4112, "step": 3658 }, { "epoch": 2.026211352099677, "grad_norm": 0.2740318775177002, "learning_rate": 2.8761215425793365e-06, "loss": 0.3573, "step": 3659 }, { "epoch": 2.0267651130595294, "grad_norm": 0.29030299186706543, "learning_rate": 2.873203782298591e-06, "loss": 0.4043, "step": 3660 }, { "epoch": 2.0273188740193815, "grad_norm": 0.29025599360466003, "learning_rate": 2.8702869059782025e-06, "loss": 0.418, "step": 3661 }, { "epoch": 2.027872634979234, "grad_norm": 0.2813250422477722, "learning_rate": 2.8673709148305073e-06, "loss": 0.4066, "step": 3662 }, { "epoch": 2.0284263959390865, "grad_norm": 0.28221118450164795, "learning_rate": 2.86445581006748e-06, "loss": 0.3894, "step": 3663 }, { "epoch": 2.0289801568989385, "grad_norm": 0.2792515158653259, "learning_rate": 2.861541592900726e-06, "loss": 0.3918, "step": 3664 }, { "epoch": 2.029533917858791, "grad_norm": 0.27872416377067566, "learning_rate": 2.858628264541482e-06, "loss": 0.4045, "step": 3665 }, { "epoch": 2.0300876788186435, "grad_norm": 0.2900249660015106, "learning_rate": 2.8557158262006135e-06, "loss": 0.4137, "step": 3666 }, { "epoch": 2.0306414397784955, "grad_norm": 0.2820768356323242, "learning_rate": 2.852804279088619e-06, "loss": 0.3876, "step": 3667 }, { "epoch": 2.031195200738348, "grad_norm": 0.2596098780632019, "learning_rate": 2.8498936244156227e-06, "loss": 0.3671, "step": 3668 }, { "epoch": 2.0317489616982, "grad_norm": 0.32204148173332214, "learning_rate": 2.8469838633913792e-06, "loss": 0.4441, "step": 3669 }, { "epoch": 2.0323027226580526, "grad_norm": 0.2776470482349396, "learning_rate": 2.844074997225279e-06, "loss": 0.3776, "step": 3670 }, { "epoch": 2.032856483617905, "grad_norm": 0.3047775626182556, "learning_rate": 2.841167027126328e-06, "loss": 0.4133, "step": 3671 }, { "epoch": 2.033410244577757, "grad_norm": 0.3094249963760376, "learning_rate": 2.8382599543031673e-06, "loss": 0.3798, "step": 3672 }, { "epoch": 2.0339640055376096, "grad_norm": 0.30947771668434143, "learning_rate": 2.835353779964064e-06, "loss": 0.3875, "step": 3673 }, { "epoch": 2.034517766497462, "grad_norm": 0.2881522476673126, "learning_rate": 2.8324485053169113e-06, "loss": 0.3619, "step": 3674 }, { "epoch": 2.035071527457314, "grad_norm": 0.29367634654045105, "learning_rate": 2.8295441315692284e-06, "loss": 0.4008, "step": 3675 }, { "epoch": 2.0356252884171666, "grad_norm": 0.28881412744522095, "learning_rate": 2.8266406599281605e-06, "loss": 0.4305, "step": 3676 }, { "epoch": 2.036179049377019, "grad_norm": 0.25382715463638306, "learning_rate": 2.8237380916004777e-06, "loss": 0.3366, "step": 3677 }, { "epoch": 2.036732810336871, "grad_norm": 0.28394097089767456, "learning_rate": 2.820836427792574e-06, "loss": 0.3823, "step": 3678 }, { "epoch": 2.0372865712967236, "grad_norm": 0.2691429555416107, "learning_rate": 2.8179356697104686e-06, "loss": 0.3839, "step": 3679 }, { "epoch": 2.037840332256576, "grad_norm": 0.28813236951828003, "learning_rate": 2.8150358185598035e-06, "loss": 0.4561, "step": 3680 }, { "epoch": 2.038394093216428, "grad_norm": 0.2942282557487488, "learning_rate": 2.8121368755458435e-06, "loss": 0.3962, "step": 3681 }, { "epoch": 2.0389478541762807, "grad_norm": 0.2798100709915161, "learning_rate": 2.8092388418734764e-06, "loss": 0.3969, "step": 3682 }, { "epoch": 2.0395016151361327, "grad_norm": 0.29696518182754517, "learning_rate": 2.806341718747212e-06, "loss": 0.4058, "step": 3683 }, { "epoch": 2.040055376095985, "grad_norm": 0.31304579973220825, "learning_rate": 2.803445507371182e-06, "loss": 0.3696, "step": 3684 }, { "epoch": 2.0406091370558377, "grad_norm": 0.2922705411911011, "learning_rate": 2.8005502089491387e-06, "loss": 0.4231, "step": 3685 }, { "epoch": 2.0411628980156897, "grad_norm": 0.26448413729667664, "learning_rate": 2.7976558246844553e-06, "loss": 0.3605, "step": 3686 }, { "epoch": 2.041716658975542, "grad_norm": 0.29372894763946533, "learning_rate": 2.794762355780124e-06, "loss": 0.4401, "step": 3687 }, { "epoch": 2.0422704199353947, "grad_norm": 0.33206045627593994, "learning_rate": 2.791869803438758e-06, "loss": 0.4205, "step": 3688 }, { "epoch": 2.0428241808952468, "grad_norm": 0.3113385736942291, "learning_rate": 2.788978168862588e-06, "loss": 0.3982, "step": 3689 }, { "epoch": 2.0433779418550992, "grad_norm": 0.26107579469680786, "learning_rate": 2.7860874532534653e-06, "loss": 0.3706, "step": 3690 }, { "epoch": 2.0439317028149517, "grad_norm": 0.2729372978210449, "learning_rate": 2.783197657812858e-06, "loss": 0.386, "step": 3691 }, { "epoch": 2.044485463774804, "grad_norm": 0.2940216362476349, "learning_rate": 2.7803087837418506e-06, "loss": 0.3995, "step": 3692 }, { "epoch": 2.0450392247346563, "grad_norm": 0.3003492057323456, "learning_rate": 2.7774208322411474e-06, "loss": 0.4011, "step": 3693 }, { "epoch": 2.0455929856945083, "grad_norm": 0.3122671842575073, "learning_rate": 2.7745338045110663e-06, "loss": 0.4428, "step": 3694 }, { "epoch": 2.046146746654361, "grad_norm": 0.2818413972854614, "learning_rate": 2.7716477017515444e-06, "loss": 0.402, "step": 3695 }, { "epoch": 2.0467005076142133, "grad_norm": 0.3161904215812683, "learning_rate": 2.768762525162131e-06, "loss": 0.3995, "step": 3696 }, { "epoch": 2.0472542685740653, "grad_norm": 0.28692692518234253, "learning_rate": 2.7658782759419944e-06, "loss": 0.3524, "step": 3697 }, { "epoch": 2.047808029533918, "grad_norm": 0.3644881844520569, "learning_rate": 2.76299495528991e-06, "loss": 0.4178, "step": 3698 }, { "epoch": 2.0483617904937703, "grad_norm": 0.31846383213996887, "learning_rate": 2.7601125644042777e-06, "loss": 0.4137, "step": 3699 }, { "epoch": 2.0489155514536224, "grad_norm": 0.2794274687767029, "learning_rate": 2.757231104483104e-06, "loss": 0.3686, "step": 3700 }, { "epoch": 2.049469312413475, "grad_norm": 0.3271770179271698, "learning_rate": 2.754350576724012e-06, "loss": 0.3939, "step": 3701 }, { "epoch": 2.0500230733733273, "grad_norm": 0.366217702627182, "learning_rate": 2.7514709823242317e-06, "loss": 0.3985, "step": 3702 }, { "epoch": 2.0505768343331794, "grad_norm": 0.35056981444358826, "learning_rate": 2.748592322480609e-06, "loss": 0.4009, "step": 3703 }, { "epoch": 2.051130595293032, "grad_norm": 0.2726879417896271, "learning_rate": 2.745714598389605e-06, "loss": 0.3623, "step": 3704 }, { "epoch": 2.0516843562528844, "grad_norm": 0.31392666697502136, "learning_rate": 2.742837811247287e-06, "loss": 0.4443, "step": 3705 }, { "epoch": 2.0522381172127364, "grad_norm": 0.31276780366897583, "learning_rate": 2.739961962249335e-06, "loss": 0.4039, "step": 3706 }, { "epoch": 2.052791878172589, "grad_norm": 0.3119528889656067, "learning_rate": 2.7370870525910347e-06, "loss": 0.3972, "step": 3707 }, { "epoch": 2.053345639132441, "grad_norm": 0.299368292093277, "learning_rate": 2.7342130834672853e-06, "loss": 0.4253, "step": 3708 }, { "epoch": 2.0538994000922934, "grad_norm": 0.2888703942298889, "learning_rate": 2.731340056072598e-06, "loss": 0.409, "step": 3709 }, { "epoch": 2.054453161052146, "grad_norm": 0.30038759112358093, "learning_rate": 2.7284679716010886e-06, "loss": 0.3944, "step": 3710 }, { "epoch": 2.055006922011998, "grad_norm": 0.2910996377468109, "learning_rate": 2.7255968312464792e-06, "loss": 0.4062, "step": 3711 }, { "epoch": 2.0555606829718505, "grad_norm": 0.2722136676311493, "learning_rate": 2.722726636202101e-06, "loss": 0.4223, "step": 3712 }, { "epoch": 2.056114443931703, "grad_norm": 0.2732338011264801, "learning_rate": 2.7198573876608976e-06, "loss": 0.3676, "step": 3713 }, { "epoch": 2.056668204891555, "grad_norm": 0.2930958867073059, "learning_rate": 2.7169890868154124e-06, "loss": 0.4235, "step": 3714 }, { "epoch": 2.0572219658514075, "grad_norm": 0.29875579476356506, "learning_rate": 2.7141217348578e-06, "loss": 0.4139, "step": 3715 }, { "epoch": 2.05777572681126, "grad_norm": 0.27339401841163635, "learning_rate": 2.711255332979813e-06, "loss": 0.415, "step": 3716 }, { "epoch": 2.058329487771112, "grad_norm": 0.2756052017211914, "learning_rate": 2.7083898823728145e-06, "loss": 0.4238, "step": 3717 }, { "epoch": 2.0588832487309645, "grad_norm": 0.27331265807151794, "learning_rate": 2.7055253842277764e-06, "loss": 0.4006, "step": 3718 }, { "epoch": 2.059437009690817, "grad_norm": 0.2738235890865326, "learning_rate": 2.7026618397352688e-06, "loss": 0.3827, "step": 3719 }, { "epoch": 2.059990770650669, "grad_norm": 0.28176453709602356, "learning_rate": 2.6997992500854644e-06, "loss": 0.3936, "step": 3720 }, { "epoch": 2.0605445316105215, "grad_norm": 0.2816528379917145, "learning_rate": 2.6969376164681416e-06, "loss": 0.4035, "step": 3721 }, { "epoch": 2.0610982925703736, "grad_norm": 0.32517483830451965, "learning_rate": 2.694076940072681e-06, "loss": 0.4484, "step": 3722 }, { "epoch": 2.061652053530226, "grad_norm": 0.2778967320919037, "learning_rate": 2.6912172220880704e-06, "loss": 0.4007, "step": 3723 }, { "epoch": 2.0622058144900786, "grad_norm": 0.28312990069389343, "learning_rate": 2.6883584637028893e-06, "loss": 0.4028, "step": 3724 }, { "epoch": 2.0627595754499306, "grad_norm": 0.28735044598579407, "learning_rate": 2.685500666105324e-06, "loss": 0.4045, "step": 3725 }, { "epoch": 2.063313336409783, "grad_norm": 0.3035443425178528, "learning_rate": 2.6826438304831627e-06, "loss": 0.3471, "step": 3726 }, { "epoch": 2.0638670973696356, "grad_norm": 0.31861135363578796, "learning_rate": 2.6797879580237897e-06, "loss": 0.43, "step": 3727 }, { "epoch": 2.0644208583294876, "grad_norm": 0.27022549510002136, "learning_rate": 2.6769330499141955e-06, "loss": 0.3607, "step": 3728 }, { "epoch": 2.06497461928934, "grad_norm": 0.29104694724082947, "learning_rate": 2.674079107340962e-06, "loss": 0.386, "step": 3729 }, { "epoch": 2.0655283802491926, "grad_norm": 0.33319151401519775, "learning_rate": 2.6712261314902745e-06, "loss": 0.408, "step": 3730 }, { "epoch": 2.0660821412090447, "grad_norm": 0.28962379693984985, "learning_rate": 2.6683741235479134e-06, "loss": 0.4304, "step": 3731 }, { "epoch": 2.066635902168897, "grad_norm": 0.5693771243095398, "learning_rate": 2.665523084699265e-06, "loss": 0.4075, "step": 3732 }, { "epoch": 2.0671896631287496, "grad_norm": 0.2971894443035126, "learning_rate": 2.6626730161293003e-06, "loss": 0.4298, "step": 3733 }, { "epoch": 2.0677434240886017, "grad_norm": 0.2961648404598236, "learning_rate": 2.659823919022595e-06, "loss": 0.4081, "step": 3734 }, { "epoch": 2.068297185048454, "grad_norm": 0.27772256731987, "learning_rate": 2.65697579456332e-06, "loss": 0.3952, "step": 3735 }, { "epoch": 2.068850946008306, "grad_norm": 0.26119667291641235, "learning_rate": 2.6541286439352416e-06, "loss": 0.3784, "step": 3736 }, { "epoch": 2.0694047069681587, "grad_norm": 0.29653218388557434, "learning_rate": 2.6512824683217203e-06, "loss": 0.401, "step": 3737 }, { "epoch": 2.069958467928011, "grad_norm": 0.2839754521846771, "learning_rate": 2.6484372689057124e-06, "loss": 0.3922, "step": 3738 }, { "epoch": 2.0705122288878632, "grad_norm": 0.27544739842414856, "learning_rate": 2.6455930468697687e-06, "loss": 0.3995, "step": 3739 }, { "epoch": 2.0710659898477157, "grad_norm": 0.30643734335899353, "learning_rate": 2.6427498033960324e-06, "loss": 0.4132, "step": 3740 }, { "epoch": 2.071619750807568, "grad_norm": 0.2657535970211029, "learning_rate": 2.6399075396662417e-06, "loss": 0.3639, "step": 3741 }, { "epoch": 2.0721735117674203, "grad_norm": 0.2664085328578949, "learning_rate": 2.6370662568617257e-06, "loss": 0.4086, "step": 3742 }, { "epoch": 2.0727272727272728, "grad_norm": 0.2826140820980072, "learning_rate": 2.6342259561634077e-06, "loss": 0.4041, "step": 3743 }, { "epoch": 2.0732810336871252, "grad_norm": 0.28652048110961914, "learning_rate": 2.631386638751802e-06, "loss": 0.3865, "step": 3744 }, { "epoch": 2.0738347946469773, "grad_norm": 0.2801413834095001, "learning_rate": 2.6285483058070137e-06, "loss": 0.3957, "step": 3745 }, { "epoch": 2.0743885556068298, "grad_norm": 0.2838718295097351, "learning_rate": 2.6257109585087393e-06, "loss": 0.385, "step": 3746 }, { "epoch": 2.0749423165666823, "grad_norm": 0.31632545590400696, "learning_rate": 2.622874598036265e-06, "loss": 0.4401, "step": 3747 }, { "epoch": 2.0754960775265343, "grad_norm": 0.27622953057289124, "learning_rate": 2.620039225568468e-06, "loss": 0.4212, "step": 3748 }, { "epoch": 2.076049838486387, "grad_norm": 0.2716497778892517, "learning_rate": 2.6172048422838146e-06, "loss": 0.4076, "step": 3749 }, { "epoch": 2.076603599446239, "grad_norm": 0.26099124550819397, "learning_rate": 2.6143714493603587e-06, "loss": 0.3918, "step": 3750 }, { "epoch": 2.0771573604060913, "grad_norm": 0.28657370805740356, "learning_rate": 2.611539047975744e-06, "loss": 0.41, "step": 3751 }, { "epoch": 2.077711121365944, "grad_norm": 0.30911746621131897, "learning_rate": 2.6087076393072015e-06, "loss": 0.3755, "step": 3752 }, { "epoch": 2.078264882325796, "grad_norm": 0.293660968542099, "learning_rate": 2.605877224531549e-06, "loss": 0.4224, "step": 3753 }, { "epoch": 2.0788186432856484, "grad_norm": 0.2599320113658905, "learning_rate": 2.6030478048251927e-06, "loss": 0.3899, "step": 3754 }, { "epoch": 2.079372404245501, "grad_norm": 0.2956102192401886, "learning_rate": 2.6002193813641242e-06, "loss": 0.4039, "step": 3755 }, { "epoch": 2.079926165205353, "grad_norm": 0.36120152473449707, "learning_rate": 2.5973919553239203e-06, "loss": 0.4488, "step": 3756 }, { "epoch": 2.0804799261652054, "grad_norm": 0.3161151111125946, "learning_rate": 2.5945655278797455e-06, "loss": 0.3888, "step": 3757 }, { "epoch": 2.081033687125058, "grad_norm": 0.2819730043411255, "learning_rate": 2.591740100206347e-06, "loss": 0.4201, "step": 3758 }, { "epoch": 2.08158744808491, "grad_norm": 0.28260669112205505, "learning_rate": 2.5889156734780572e-06, "loss": 0.4043, "step": 3759 }, { "epoch": 2.0821412090447624, "grad_norm": 0.3008151948451996, "learning_rate": 2.586092248868794e-06, "loss": 0.3905, "step": 3760 }, { "epoch": 2.0826949700046145, "grad_norm": 0.34845277667045593, "learning_rate": 2.5832698275520557e-06, "loss": 0.4166, "step": 3761 }, { "epoch": 2.083248730964467, "grad_norm": 0.3104749321937561, "learning_rate": 2.5804484107009265e-06, "loss": 0.3929, "step": 3762 }, { "epoch": 2.0838024919243194, "grad_norm": 0.27857109904289246, "learning_rate": 2.5776279994880715e-06, "loss": 0.4202, "step": 3763 }, { "epoch": 2.0843562528841715, "grad_norm": 0.310429185628891, "learning_rate": 2.57480859508574e-06, "loss": 0.4193, "step": 3764 }, { "epoch": 2.084910013844024, "grad_norm": 0.2823527455329895, "learning_rate": 2.571990198665756e-06, "loss": 0.4041, "step": 3765 }, { "epoch": 2.0854637748038765, "grad_norm": 0.26771241426467896, "learning_rate": 2.569172811399534e-06, "loss": 0.3555, "step": 3766 }, { "epoch": 2.0860175357637285, "grad_norm": 0.26783987879753113, "learning_rate": 2.566356434458065e-06, "loss": 0.3882, "step": 3767 }, { "epoch": 2.086571296723581, "grad_norm": 0.2736116051673889, "learning_rate": 2.5635410690119193e-06, "loss": 0.3959, "step": 3768 }, { "epoch": 2.0871250576834335, "grad_norm": 0.28292298316955566, "learning_rate": 2.5607267162312453e-06, "loss": 0.436, "step": 3769 }, { "epoch": 2.0876788186432855, "grad_norm": 0.27733075618743896, "learning_rate": 2.5579133772857722e-06, "loss": 0.3966, "step": 3770 }, { "epoch": 2.088232579603138, "grad_norm": 0.29938697814941406, "learning_rate": 2.5551010533448112e-06, "loss": 0.4279, "step": 3771 }, { "epoch": 2.0887863405629905, "grad_norm": 0.2993182837963104, "learning_rate": 2.552289745577247e-06, "loss": 0.3978, "step": 3772 }, { "epoch": 2.0893401015228426, "grad_norm": 0.28126341104507446, "learning_rate": 2.5494794551515456e-06, "loss": 0.4006, "step": 3773 }, { "epoch": 2.089893862482695, "grad_norm": 0.3164224624633789, "learning_rate": 2.546670183235743e-06, "loss": 0.4568, "step": 3774 }, { "epoch": 2.090447623442547, "grad_norm": 0.27199873328208923, "learning_rate": 2.543861930997461e-06, "loss": 0.3689, "step": 3775 }, { "epoch": 2.0910013844023996, "grad_norm": 0.3157602846622467, "learning_rate": 2.541054699603892e-06, "loss": 0.4328, "step": 3776 }, { "epoch": 2.091555145362252, "grad_norm": 0.2680946886539459, "learning_rate": 2.538248490221809e-06, "loss": 0.3965, "step": 3777 }, { "epoch": 2.092108906322104, "grad_norm": 0.27443650364875793, "learning_rate": 2.535443304017552e-06, "loss": 0.4359, "step": 3778 }, { "epoch": 2.0926626672819566, "grad_norm": 0.2685670852661133, "learning_rate": 2.5326391421570408e-06, "loss": 0.379, "step": 3779 }, { "epoch": 2.093216428241809, "grad_norm": 0.30289655923843384, "learning_rate": 2.5298360058057727e-06, "loss": 0.4282, "step": 3780 }, { "epoch": 2.093770189201661, "grad_norm": 0.26567935943603516, "learning_rate": 2.5270338961288153e-06, "loss": 0.3868, "step": 3781 }, { "epoch": 2.0943239501615136, "grad_norm": 0.2651604115962982, "learning_rate": 2.5242328142908056e-06, "loss": 0.3664, "step": 3782 }, { "epoch": 2.094877711121366, "grad_norm": 0.3138391673564911, "learning_rate": 2.5214327614559606e-06, "loss": 0.4484, "step": 3783 }, { "epoch": 2.095431472081218, "grad_norm": 0.29014983773231506, "learning_rate": 2.5186337387880635e-06, "loss": 0.4158, "step": 3784 }, { "epoch": 2.0959852330410707, "grad_norm": 0.27504751086235046, "learning_rate": 2.5158357474504744e-06, "loss": 0.3978, "step": 3785 }, { "epoch": 2.096538994000923, "grad_norm": 0.3337762653827667, "learning_rate": 2.5130387886061253e-06, "loss": 0.4197, "step": 3786 }, { "epoch": 2.097092754960775, "grad_norm": 0.3011614978313446, "learning_rate": 2.51024286341751e-06, "loss": 0.4096, "step": 3787 }, { "epoch": 2.0976465159206277, "grad_norm": 0.2766771614551544, "learning_rate": 2.5074479730467026e-06, "loss": 0.3991, "step": 3788 }, { "epoch": 2.0982002768804797, "grad_norm": 0.3221772611141205, "learning_rate": 2.5046541186553406e-06, "loss": 0.4531, "step": 3789 }, { "epoch": 2.098754037840332, "grad_norm": 0.2934660315513611, "learning_rate": 2.50186130140464e-06, "loss": 0.3756, "step": 3790 }, { "epoch": 2.0993077988001847, "grad_norm": 0.27576345205307007, "learning_rate": 2.499069522455374e-06, "loss": 0.3657, "step": 3791 }, { "epoch": 2.0998615597600367, "grad_norm": 0.30402782559394836, "learning_rate": 2.49627878296789e-06, "loss": 0.4452, "step": 3792 }, { "epoch": 2.1004153207198892, "grad_norm": 0.2839505672454834, "learning_rate": 2.4934890841021053e-06, "loss": 0.3912, "step": 3793 }, { "epoch": 2.1009690816797417, "grad_norm": 0.3002185523509979, "learning_rate": 2.490700427017499e-06, "loss": 0.4062, "step": 3794 }, { "epoch": 2.1015228426395938, "grad_norm": 0.29272040724754333, "learning_rate": 2.487912812873127e-06, "loss": 0.4534, "step": 3795 }, { "epoch": 2.1020766035994463, "grad_norm": 0.26748403906822205, "learning_rate": 2.4851262428275997e-06, "loss": 0.3935, "step": 3796 }, { "epoch": 2.1026303645592987, "grad_norm": 0.27699509263038635, "learning_rate": 2.4823407180391008e-06, "loss": 0.3954, "step": 3797 }, { "epoch": 2.103184125519151, "grad_norm": 0.2818959057331085, "learning_rate": 2.4795562396653767e-06, "loss": 0.4137, "step": 3798 }, { "epoch": 2.1037378864790033, "grad_norm": 0.3039186894893646, "learning_rate": 2.4767728088637455e-06, "loss": 0.3883, "step": 3799 }, { "epoch": 2.1042916474388558, "grad_norm": 0.29870668053627014, "learning_rate": 2.473990426791079e-06, "loss": 0.399, "step": 3800 }, { "epoch": 2.104845408398708, "grad_norm": 0.2844809591770172, "learning_rate": 2.4712090946038213e-06, "loss": 0.4093, "step": 3801 }, { "epoch": 2.1053991693585603, "grad_norm": 0.2878606617450714, "learning_rate": 2.4684288134579765e-06, "loss": 0.4034, "step": 3802 }, { "epoch": 2.1059529303184124, "grad_norm": 0.279180109500885, "learning_rate": 2.4656495845091134e-06, "loss": 0.3945, "step": 3803 }, { "epoch": 2.106506691278265, "grad_norm": 0.3070521652698517, "learning_rate": 2.4628714089123623e-06, "loss": 0.4232, "step": 3804 }, { "epoch": 2.1070604522381173, "grad_norm": 0.2857449948787689, "learning_rate": 2.460094287822418e-06, "loss": 0.3893, "step": 3805 }, { "epoch": 2.1076142131979694, "grad_norm": 0.28847870230674744, "learning_rate": 2.457318222393533e-06, "loss": 0.3968, "step": 3806 }, { "epoch": 2.108167974157822, "grad_norm": 0.2849317789077759, "learning_rate": 2.454543213779526e-06, "loss": 0.4241, "step": 3807 }, { "epoch": 2.1087217351176744, "grad_norm": 0.2723308503627777, "learning_rate": 2.4517692631337723e-06, "loss": 0.3675, "step": 3808 }, { "epoch": 2.1092754960775264, "grad_norm": 0.2994322180747986, "learning_rate": 2.4489963716092096e-06, "loss": 0.4201, "step": 3809 }, { "epoch": 2.109829257037379, "grad_norm": 0.2867002487182617, "learning_rate": 2.4462245403583344e-06, "loss": 0.4059, "step": 3810 }, { "epoch": 2.1103830179972314, "grad_norm": 0.2949376702308655, "learning_rate": 2.443453770533204e-06, "loss": 0.4196, "step": 3811 }, { "epoch": 2.1109367789570834, "grad_norm": 0.291920006275177, "learning_rate": 2.4406840632854316e-06, "loss": 0.3835, "step": 3812 }, { "epoch": 2.111490539916936, "grad_norm": 0.24504922330379486, "learning_rate": 2.437915419766193e-06, "loss": 0.3648, "step": 3813 }, { "epoch": 2.1120443008767884, "grad_norm": 0.27524930238723755, "learning_rate": 2.435147841126218e-06, "loss": 0.4038, "step": 3814 }, { "epoch": 2.1125980618366405, "grad_norm": 0.27393636107444763, "learning_rate": 2.4323813285157954e-06, "loss": 0.3765, "step": 3815 }, { "epoch": 2.113151822796493, "grad_norm": 0.2827381491661072, "learning_rate": 2.429615883084772e-06, "loss": 0.3859, "step": 3816 }, { "epoch": 2.113705583756345, "grad_norm": 0.28573843836784363, "learning_rate": 2.4268515059825486e-06, "loss": 0.4175, "step": 3817 }, { "epoch": 2.1142593447161975, "grad_norm": 0.27809077501296997, "learning_rate": 2.4240881983580844e-06, "loss": 0.3835, "step": 3818 }, { "epoch": 2.11481310567605, "grad_norm": 0.2724258005619049, "learning_rate": 2.421325961359892e-06, "loss": 0.4177, "step": 3819 }, { "epoch": 2.115366866635902, "grad_norm": 0.2860298156738281, "learning_rate": 2.4185647961360413e-06, "loss": 0.4188, "step": 3820 }, { "epoch": 2.1159206275957545, "grad_norm": 0.2689015865325928, "learning_rate": 2.4158047038341543e-06, "loss": 0.4359, "step": 3821 }, { "epoch": 2.116474388555607, "grad_norm": 0.26045888662338257, "learning_rate": 2.413045685601409e-06, "loss": 0.4014, "step": 3822 }, { "epoch": 2.117028149515459, "grad_norm": 0.27137961983680725, "learning_rate": 2.410287742584535e-06, "loss": 0.3912, "step": 3823 }, { "epoch": 2.1175819104753115, "grad_norm": 0.2803013026714325, "learning_rate": 2.407530875929818e-06, "loss": 0.3889, "step": 3824 }, { "epoch": 2.118135671435164, "grad_norm": 0.26989254355430603, "learning_rate": 2.404775086783093e-06, "loss": 0.3931, "step": 3825 }, { "epoch": 2.118689432395016, "grad_norm": 0.2577977478504181, "learning_rate": 2.4020203762897508e-06, "loss": 0.3816, "step": 3826 }, { "epoch": 2.1192431933548685, "grad_norm": 0.271791011095047, "learning_rate": 2.3992667455947273e-06, "loss": 0.4176, "step": 3827 }, { "epoch": 2.1197969543147206, "grad_norm": 0.283843070268631, "learning_rate": 2.3965141958425185e-06, "loss": 0.3963, "step": 3828 }, { "epoch": 2.120350715274573, "grad_norm": 0.31291142106056213, "learning_rate": 2.393762728177165e-06, "loss": 0.4464, "step": 3829 }, { "epoch": 2.1209044762344256, "grad_norm": 0.2689676880836487, "learning_rate": 2.3910123437422605e-06, "loss": 0.3917, "step": 3830 }, { "epoch": 2.1214582371942776, "grad_norm": 0.28036364912986755, "learning_rate": 2.388263043680948e-06, "loss": 0.4117, "step": 3831 }, { "epoch": 2.12201199815413, "grad_norm": 0.3066314160823822, "learning_rate": 2.385514829135913e-06, "loss": 0.3947, "step": 3832 }, { "epoch": 2.1225657591139826, "grad_norm": 0.2649688124656677, "learning_rate": 2.3827677012494033e-06, "loss": 0.3755, "step": 3833 }, { "epoch": 2.1231195200738346, "grad_norm": 0.29979291558265686, "learning_rate": 2.3800216611632044e-06, "loss": 0.4168, "step": 3834 }, { "epoch": 2.123673281033687, "grad_norm": 0.3086956739425659, "learning_rate": 2.377276710018656e-06, "loss": 0.4314, "step": 3835 }, { "epoch": 2.1242270419935396, "grad_norm": 0.2775607705116272, "learning_rate": 2.3745328489566373e-06, "loss": 0.3873, "step": 3836 }, { "epoch": 2.1247808029533917, "grad_norm": 0.2612302601337433, "learning_rate": 2.37179007911758e-06, "loss": 0.3717, "step": 3837 }, { "epoch": 2.125334563913244, "grad_norm": 0.2906816601753235, "learning_rate": 2.3690484016414655e-06, "loss": 0.4336, "step": 3838 }, { "epoch": 2.1258883248730966, "grad_norm": 0.29077500104904175, "learning_rate": 2.366307817667815e-06, "loss": 0.3912, "step": 3839 }, { "epoch": 2.1264420858329487, "grad_norm": 0.2996022701263428, "learning_rate": 2.3635683283357e-06, "loss": 0.4346, "step": 3840 }, { "epoch": 2.126995846792801, "grad_norm": 0.279100239276886, "learning_rate": 2.3608299347837277e-06, "loss": 0.3886, "step": 3841 }, { "epoch": 2.1275496077526537, "grad_norm": 0.28437671065330505, "learning_rate": 2.358092638150063e-06, "loss": 0.4127, "step": 3842 }, { "epoch": 2.1281033687125057, "grad_norm": 0.3163800537586212, "learning_rate": 2.3553564395724066e-06, "loss": 0.4022, "step": 3843 }, { "epoch": 2.128657129672358, "grad_norm": 0.2944936752319336, "learning_rate": 2.3526213401880066e-06, "loss": 0.4025, "step": 3844 }, { "epoch": 2.1292108906322103, "grad_norm": 0.2850501835346222, "learning_rate": 2.3498873411336485e-06, "loss": 0.3978, "step": 3845 }, { "epoch": 2.1297646515920627, "grad_norm": 0.30216071009635925, "learning_rate": 2.347154443545665e-06, "loss": 0.394, "step": 3846 }, { "epoch": 2.1303184125519152, "grad_norm": 0.28608980774879456, "learning_rate": 2.344422648559934e-06, "loss": 0.4088, "step": 3847 }, { "epoch": 2.1308721735117673, "grad_norm": 0.3017576038837433, "learning_rate": 2.3416919573118707e-06, "loss": 0.3917, "step": 3848 }, { "epoch": 2.1314259344716198, "grad_norm": 0.27331677079200745, "learning_rate": 2.3389623709364297e-06, "loss": 0.407, "step": 3849 }, { "epoch": 2.1319796954314723, "grad_norm": 0.3048727512359619, "learning_rate": 2.3362338905681097e-06, "loss": 0.4286, "step": 3850 }, { "epoch": 2.1325334563913243, "grad_norm": 0.2756733298301697, "learning_rate": 2.333506517340948e-06, "loss": 0.3825, "step": 3851 }, { "epoch": 2.133087217351177, "grad_norm": 0.27366364002227783, "learning_rate": 2.3307802523885265e-06, "loss": 0.3793, "step": 3852 }, { "epoch": 2.1336409783110293, "grad_norm": 0.30011799931526184, "learning_rate": 2.3280550968439624e-06, "loss": 0.4233, "step": 3853 }, { "epoch": 2.1341947392708813, "grad_norm": 0.31364160776138306, "learning_rate": 2.325331051839909e-06, "loss": 0.4246, "step": 3854 }, { "epoch": 2.134748500230734, "grad_norm": 0.28782933950424194, "learning_rate": 2.3226081185085624e-06, "loss": 0.396, "step": 3855 }, { "epoch": 2.135302261190586, "grad_norm": 0.3116982877254486, "learning_rate": 2.3198862979816538e-06, "loss": 0.4245, "step": 3856 }, { "epoch": 2.1358560221504383, "grad_norm": 0.25993838906288147, "learning_rate": 2.3171655913904596e-06, "loss": 0.3517, "step": 3857 }, { "epoch": 2.136409783110291, "grad_norm": 0.29636189341545105, "learning_rate": 2.314445999865781e-06, "loss": 0.3935, "step": 3858 }, { "epoch": 2.136963544070143, "grad_norm": 0.2868352234363556, "learning_rate": 2.3117275245379635e-06, "loss": 0.4146, "step": 3859 }, { "epoch": 2.1375173050299954, "grad_norm": 0.2873097062110901, "learning_rate": 2.309010166536885e-06, "loss": 0.4386, "step": 3860 }, { "epoch": 2.138071065989848, "grad_norm": 0.2858348786830902, "learning_rate": 2.3062939269919684e-06, "loss": 0.4004, "step": 3861 }, { "epoch": 2.1386248269497, "grad_norm": 0.2635882496833801, "learning_rate": 2.303578807032157e-06, "loss": 0.3627, "step": 3862 }, { "epoch": 2.1391785879095524, "grad_norm": 0.28865930438041687, "learning_rate": 2.3008648077859388e-06, "loss": 0.4172, "step": 3863 }, { "epoch": 2.139732348869405, "grad_norm": 0.27644819021224976, "learning_rate": 2.2981519303813327e-06, "loss": 0.3905, "step": 3864 }, { "epoch": 2.140286109829257, "grad_norm": 0.26638978719711304, "learning_rate": 2.2954401759458904e-06, "loss": 0.3852, "step": 3865 }, { "epoch": 2.1408398707891094, "grad_norm": 0.30779963731765747, "learning_rate": 2.292729545606705e-06, "loss": 0.4091, "step": 3866 }, { "epoch": 2.1413936317489615, "grad_norm": 0.29210713505744934, "learning_rate": 2.2900200404903892e-06, "loss": 0.4079, "step": 3867 }, { "epoch": 2.141947392708814, "grad_norm": 0.27390968799591064, "learning_rate": 2.2873116617230967e-06, "loss": 0.3784, "step": 3868 }, { "epoch": 2.1425011536686664, "grad_norm": 0.3007439970970154, "learning_rate": 2.2846044104305115e-06, "loss": 0.4389, "step": 3869 }, { "epoch": 2.1430549146285185, "grad_norm": 0.27235597372055054, "learning_rate": 2.281898287737848e-06, "loss": 0.3915, "step": 3870 }, { "epoch": 2.143608675588371, "grad_norm": 0.2721829414367676, "learning_rate": 2.2791932947698515e-06, "loss": 0.3599, "step": 3871 }, { "epoch": 2.1441624365482235, "grad_norm": 0.29906633496284485, "learning_rate": 2.2764894326508e-06, "loss": 0.4354, "step": 3872 }, { "epoch": 2.1447161975080755, "grad_norm": 0.2825183868408203, "learning_rate": 2.2737867025044975e-06, "loss": 0.41, "step": 3873 }, { "epoch": 2.145269958467928, "grad_norm": 0.2702684700489044, "learning_rate": 2.2710851054542814e-06, "loss": 0.371, "step": 3874 }, { "epoch": 2.1458237194277805, "grad_norm": 0.30350565910339355, "learning_rate": 2.2683846426230166e-06, "loss": 0.4268, "step": 3875 }, { "epoch": 2.1463774803876325, "grad_norm": 0.33377349376678467, "learning_rate": 2.265685315133096e-06, "loss": 0.4338, "step": 3876 }, { "epoch": 2.146931241347485, "grad_norm": 0.2618556320667267, "learning_rate": 2.262987124106441e-06, "loss": 0.3847, "step": 3877 }, { "epoch": 2.1474850023073375, "grad_norm": 0.2969992160797119, "learning_rate": 2.2602900706645007e-06, "loss": 0.404, "step": 3878 }, { "epoch": 2.1480387632671896, "grad_norm": 0.2739669680595398, "learning_rate": 2.2575941559282523e-06, "loss": 0.4017, "step": 3879 }, { "epoch": 2.148592524227042, "grad_norm": 0.2721763551235199, "learning_rate": 2.254899381018198e-06, "loss": 0.4012, "step": 3880 }, { "epoch": 2.1491462851868945, "grad_norm": 0.33546876907348633, "learning_rate": 2.2522057470543672e-06, "loss": 0.4261, "step": 3881 }, { "epoch": 2.1497000461467466, "grad_norm": 0.2831169664859772, "learning_rate": 2.249513255156316e-06, "loss": 0.4161, "step": 3882 }, { "epoch": 2.150253807106599, "grad_norm": 0.2820163369178772, "learning_rate": 2.2468219064431235e-06, "loss": 0.3892, "step": 3883 }, { "epoch": 2.150807568066451, "grad_norm": 0.2977032959461212, "learning_rate": 2.2441317020333962e-06, "loss": 0.4219, "step": 3884 }, { "epoch": 2.1513613290263036, "grad_norm": 0.2954150438308716, "learning_rate": 2.241442643045263e-06, "loss": 0.388, "step": 3885 }, { "epoch": 2.151915089986156, "grad_norm": 0.2830072045326233, "learning_rate": 2.238754730596378e-06, "loss": 0.3963, "step": 3886 }, { "epoch": 2.152468850946008, "grad_norm": 0.2863837778568268, "learning_rate": 2.236067965803918e-06, "loss": 0.4156, "step": 3887 }, { "epoch": 2.1530226119058606, "grad_norm": 0.2852438986301422, "learning_rate": 2.2333823497845836e-06, "loss": 0.4515, "step": 3888 }, { "epoch": 2.153576372865713, "grad_norm": 0.26495620608329773, "learning_rate": 2.2306978836545974e-06, "loss": 0.3823, "step": 3889 }, { "epoch": 2.154130133825565, "grad_norm": 0.29320308566093445, "learning_rate": 2.2280145685297026e-06, "loss": 0.4017, "step": 3890 }, { "epoch": 2.1546838947854177, "grad_norm": 0.28081968426704407, "learning_rate": 2.225332405525168e-06, "loss": 0.4049, "step": 3891 }, { "epoch": 2.15523765574527, "grad_norm": 0.2758653461933136, "learning_rate": 2.222651395755779e-06, "loss": 0.388, "step": 3892 }, { "epoch": 2.155791416705122, "grad_norm": 0.3054896891117096, "learning_rate": 2.219971540335847e-06, "loss": 0.4417, "step": 3893 }, { "epoch": 2.1563451776649747, "grad_norm": 0.27716824412345886, "learning_rate": 2.217292840379194e-06, "loss": 0.3762, "step": 3894 }, { "epoch": 2.1568989386248267, "grad_norm": 0.2844136357307434, "learning_rate": 2.214615296999173e-06, "loss": 0.409, "step": 3895 }, { "epoch": 2.1574526995846792, "grad_norm": 0.2801808714866638, "learning_rate": 2.2119389113086514e-06, "loss": 0.3961, "step": 3896 }, { "epoch": 2.1580064605445317, "grad_norm": 0.28930234909057617, "learning_rate": 2.2092636844200137e-06, "loss": 0.4192, "step": 3897 }, { "epoch": 2.1585602215043838, "grad_norm": 0.29025033116340637, "learning_rate": 2.206589617445168e-06, "loss": 0.4264, "step": 3898 }, { "epoch": 2.1591139824642362, "grad_norm": 0.2943166494369507, "learning_rate": 2.2039167114955303e-06, "loss": 0.3942, "step": 3899 }, { "epoch": 2.1596677434240887, "grad_norm": 0.27367693185806274, "learning_rate": 2.2012449676820453e-06, "loss": 0.37, "step": 3900 }, { "epoch": 2.160221504383941, "grad_norm": 0.30235719680786133, "learning_rate": 2.1985743871151693e-06, "loss": 0.4487, "step": 3901 }, { "epoch": 2.1607752653437933, "grad_norm": 0.30284562706947327, "learning_rate": 2.1959049709048773e-06, "loss": 0.3934, "step": 3902 }, { "epoch": 2.1613290263036458, "grad_norm": 0.28702184557914734, "learning_rate": 2.1932367201606536e-06, "loss": 0.3955, "step": 3903 }, { "epoch": 2.161882787263498, "grad_norm": 0.27050599455833435, "learning_rate": 2.190569635991508e-06, "loss": 0.3953, "step": 3904 }, { "epoch": 2.1624365482233503, "grad_norm": 0.28901225328445435, "learning_rate": 2.1879037195059598e-06, "loss": 0.4042, "step": 3905 }, { "epoch": 2.162990309183203, "grad_norm": 0.28063756227493286, "learning_rate": 2.185238971812045e-06, "loss": 0.4051, "step": 3906 }, { "epoch": 2.163544070143055, "grad_norm": 0.2722231149673462, "learning_rate": 2.1825753940173095e-06, "loss": 0.4047, "step": 3907 }, { "epoch": 2.1640978311029073, "grad_norm": 0.29284146428108215, "learning_rate": 2.179912987228816e-06, "loss": 0.4574, "step": 3908 }, { "epoch": 2.16465159206276, "grad_norm": 0.2809986472129822, "learning_rate": 2.1772517525531445e-06, "loss": 0.3817, "step": 3909 }, { "epoch": 2.165205353022612, "grad_norm": 0.263867050409317, "learning_rate": 2.174591691096382e-06, "loss": 0.3692, "step": 3910 }, { "epoch": 2.1657591139824643, "grad_norm": 0.296572744846344, "learning_rate": 2.171932803964132e-06, "loss": 0.4501, "step": 3911 }, { "epoch": 2.1663128749423164, "grad_norm": 0.2918112576007843, "learning_rate": 2.169275092261504e-06, "loss": 0.3947, "step": 3912 }, { "epoch": 2.166866635902169, "grad_norm": 0.2911160886287689, "learning_rate": 2.1666185570931224e-06, "loss": 0.4281, "step": 3913 }, { "epoch": 2.1674203968620214, "grad_norm": 0.27015653252601624, "learning_rate": 2.163963199563128e-06, "loss": 0.3908, "step": 3914 }, { "epoch": 2.1679741578218734, "grad_norm": 0.29925763607025146, "learning_rate": 2.161309020775165e-06, "loss": 0.4029, "step": 3915 }, { "epoch": 2.168527918781726, "grad_norm": 0.2796288728713989, "learning_rate": 2.1586560218323875e-06, "loss": 0.3822, "step": 3916 }, { "epoch": 2.1690816797415784, "grad_norm": 0.3009617328643799, "learning_rate": 2.156004203837463e-06, "loss": 0.4132, "step": 3917 }, { "epoch": 2.1696354407014304, "grad_norm": 0.2680744230747223, "learning_rate": 2.153353567892565e-06, "loss": 0.4122, "step": 3918 }, { "epoch": 2.170189201661283, "grad_norm": 0.2932836413383484, "learning_rate": 2.1507041150993813e-06, "loss": 0.3749, "step": 3919 }, { "epoch": 2.1707429626211354, "grad_norm": 0.3060685694217682, "learning_rate": 2.1480558465591027e-06, "loss": 0.4107, "step": 3920 }, { "epoch": 2.1712967235809875, "grad_norm": 0.27954375743865967, "learning_rate": 2.1454087633724276e-06, "loss": 0.3972, "step": 3921 }, { "epoch": 2.17185048454084, "grad_norm": 0.2742178738117218, "learning_rate": 2.1427628666395626e-06, "loss": 0.3736, "step": 3922 }, { "epoch": 2.172404245500692, "grad_norm": 0.31983381509780884, "learning_rate": 2.140118157460222e-06, "loss": 0.4096, "step": 3923 }, { "epoch": 2.1729580064605445, "grad_norm": 0.29321348667144775, "learning_rate": 2.137474636933631e-06, "loss": 0.3964, "step": 3924 }, { "epoch": 2.173511767420397, "grad_norm": 0.28752779960632324, "learning_rate": 2.1348323061585103e-06, "loss": 0.4271, "step": 3925 }, { "epoch": 2.174065528380249, "grad_norm": 0.29354408383369446, "learning_rate": 2.132191166233094e-06, "loss": 0.4377, "step": 3926 }, { "epoch": 2.1746192893401015, "grad_norm": 0.27496904134750366, "learning_rate": 2.1295512182551176e-06, "loss": 0.3757, "step": 3927 }, { "epoch": 2.175173050299954, "grad_norm": 0.2672199606895447, "learning_rate": 2.126912463321828e-06, "loss": 0.3908, "step": 3928 }, { "epoch": 2.175726811259806, "grad_norm": 0.2915489077568054, "learning_rate": 2.124274902529965e-06, "loss": 0.4329, "step": 3929 }, { "epoch": 2.1762805722196585, "grad_norm": 0.29409006237983704, "learning_rate": 2.121638536975781e-06, "loss": 0.3917, "step": 3930 }, { "epoch": 2.176834333179511, "grad_norm": 0.28870850801467896, "learning_rate": 2.1190033677550274e-06, "loss": 0.3977, "step": 3931 }, { "epoch": 2.177388094139363, "grad_norm": 0.2747563421726227, "learning_rate": 2.116369395962959e-06, "loss": 0.3773, "step": 3932 }, { "epoch": 2.1779418550992156, "grad_norm": 0.2752552926540375, "learning_rate": 2.1137366226943377e-06, "loss": 0.4203, "step": 3933 }, { "epoch": 2.1784956160590676, "grad_norm": 0.27889126539230347, "learning_rate": 2.1111050490434193e-06, "loss": 0.4035, "step": 3934 }, { "epoch": 2.17904937701892, "grad_norm": 0.28218209743499756, "learning_rate": 2.1084746761039655e-06, "loss": 0.3677, "step": 3935 }, { "epoch": 2.1796031379787726, "grad_norm": 0.2994769513607025, "learning_rate": 2.105845504969238e-06, "loss": 0.4209, "step": 3936 }, { "epoch": 2.1801568989386246, "grad_norm": 0.2889765202999115, "learning_rate": 2.1032175367319996e-06, "loss": 0.3697, "step": 3937 }, { "epoch": 2.180710659898477, "grad_norm": 0.33418574929237366, "learning_rate": 2.1005907724845133e-06, "loss": 0.4279, "step": 3938 }, { "epoch": 2.1812644208583296, "grad_norm": 0.2713718116283417, "learning_rate": 2.0979652133185403e-06, "loss": 0.3785, "step": 3939 }, { "epoch": 2.1818181818181817, "grad_norm": 0.301504909992218, "learning_rate": 2.095340860325343e-06, "loss": 0.4142, "step": 3940 }, { "epoch": 2.182371942778034, "grad_norm": 0.2825715243816376, "learning_rate": 2.0927177145956797e-06, "loss": 0.4019, "step": 3941 }, { "epoch": 2.1829257037378866, "grad_norm": 0.31448909640312195, "learning_rate": 2.09009577721981e-06, "loss": 0.4445, "step": 3942 }, { "epoch": 2.1834794646977387, "grad_norm": 0.2751956582069397, "learning_rate": 2.087475049287489e-06, "loss": 0.3889, "step": 3943 }, { "epoch": 2.184033225657591, "grad_norm": 0.29384711384773254, "learning_rate": 2.08485553188797e-06, "loss": 0.4089, "step": 3944 }, { "epoch": 2.1845869866174437, "grad_norm": 0.2744613587856293, "learning_rate": 2.0822372261100028e-06, "loss": 0.3709, "step": 3945 }, { "epoch": 2.1851407475772957, "grad_norm": 0.30690744519233704, "learning_rate": 2.0796201330418346e-06, "loss": 0.4168, "step": 3946 }, { "epoch": 2.185694508537148, "grad_norm": 0.2752636671066284, "learning_rate": 2.0770042537712072e-06, "loss": 0.3938, "step": 3947 }, { "epoch": 2.1862482694970007, "grad_norm": 0.2747426927089691, "learning_rate": 2.0743895893853587e-06, "loss": 0.4292, "step": 3948 }, { "epoch": 2.1868020304568527, "grad_norm": 0.2826738953590393, "learning_rate": 2.071776140971023e-06, "loss": 0.3903, "step": 3949 }, { "epoch": 2.187355791416705, "grad_norm": 0.2816164493560791, "learning_rate": 2.069163909614426e-06, "loss": 0.3768, "step": 3950 }, { "epoch": 2.1879095523765573, "grad_norm": 0.3086695969104767, "learning_rate": 2.066552896401291e-06, "loss": 0.4065, "step": 3951 }, { "epoch": 2.1884633133364098, "grad_norm": 0.2901241183280945, "learning_rate": 2.0639431024168322e-06, "loss": 0.4, "step": 3952 }, { "epoch": 2.1890170742962622, "grad_norm": 0.2615766227245331, "learning_rate": 2.06133452874576e-06, "loss": 0.3888, "step": 3953 }, { "epoch": 2.1895708352561143, "grad_norm": 0.28737515211105347, "learning_rate": 2.058727176472274e-06, "loss": 0.4114, "step": 3954 }, { "epoch": 2.190124596215967, "grad_norm": 0.2945122718811035, "learning_rate": 2.056121046680069e-06, "loss": 0.396, "step": 3955 }, { "epoch": 2.1906783571758193, "grad_norm": 0.30384016036987305, "learning_rate": 2.0535161404523313e-06, "loss": 0.4179, "step": 3956 }, { "epoch": 2.1912321181356713, "grad_norm": 0.2628325819969177, "learning_rate": 2.0509124588717373e-06, "loss": 0.3694, "step": 3957 }, { "epoch": 2.191785879095524, "grad_norm": 0.2837620973587036, "learning_rate": 2.0483100030204558e-06, "loss": 0.407, "step": 3958 }, { "epoch": 2.1923396400553763, "grad_norm": 0.2820863425731659, "learning_rate": 2.0457087739801447e-06, "loss": 0.3929, "step": 3959 }, { "epoch": 2.1928934010152283, "grad_norm": 0.29204273223876953, "learning_rate": 2.043108772831956e-06, "loss": 0.3918, "step": 3960 }, { "epoch": 2.193447161975081, "grad_norm": 0.289264976978302, "learning_rate": 2.0405100006565213e-06, "loss": 0.3954, "step": 3961 }, { "epoch": 2.194000922934933, "grad_norm": 0.2712600529193878, "learning_rate": 2.0379124585339747e-06, "loss": 0.3758, "step": 3962 }, { "epoch": 2.1945546838947854, "grad_norm": 0.3011544942855835, "learning_rate": 2.0353161475439316e-06, "loss": 0.4383, "step": 3963 }, { "epoch": 2.195108444854638, "grad_norm": 0.26710397005081177, "learning_rate": 2.0327210687654952e-06, "loss": 0.3834, "step": 3964 }, { "epoch": 2.19566220581449, "grad_norm": 0.28315311670303345, "learning_rate": 2.0301272232772606e-06, "loss": 0.4342, "step": 3965 }, { "epoch": 2.1962159667743424, "grad_norm": 0.2591688930988312, "learning_rate": 2.0275346121573023e-06, "loss": 0.3634, "step": 3966 }, { "epoch": 2.196769727734195, "grad_norm": 0.280154287815094, "learning_rate": 2.0249432364831933e-06, "loss": 0.3805, "step": 3967 }, { "epoch": 2.197323488694047, "grad_norm": 0.33367201685905457, "learning_rate": 2.0223530973319847e-06, "loss": 0.4497, "step": 3968 }, { "epoch": 2.1978772496538994, "grad_norm": 0.32844844460487366, "learning_rate": 2.0197641957802185e-06, "loss": 0.4554, "step": 3969 }, { "epoch": 2.198431010613752, "grad_norm": 0.2869871258735657, "learning_rate": 2.017176532903914e-06, "loss": 0.369, "step": 3970 }, { "epoch": 2.198984771573604, "grad_norm": 0.27098172903060913, "learning_rate": 2.014590109778587e-06, "loss": 0.3739, "step": 3971 }, { "epoch": 2.1995385325334564, "grad_norm": 0.2697124481201172, "learning_rate": 2.0120049274792306e-06, "loss": 0.4176, "step": 3972 }, { "epoch": 2.200092293493309, "grad_norm": 0.30418282747268677, "learning_rate": 2.0094209870803264e-06, "loss": 0.4284, "step": 3973 }, { "epoch": 2.200646054453161, "grad_norm": 0.32443106174468994, "learning_rate": 2.006838289655834e-06, "loss": 0.3971, "step": 3974 }, { "epoch": 2.2011998154130135, "grad_norm": 0.3036612868309021, "learning_rate": 2.0042568362791996e-06, "loss": 0.4355, "step": 3975 }, { "epoch": 2.2017535763728655, "grad_norm": 0.293973445892334, "learning_rate": 2.0016766280233567e-06, "loss": 0.4196, "step": 3976 }, { "epoch": 2.202307337332718, "grad_norm": 0.2889305651187897, "learning_rate": 1.9990976659607153e-06, "loss": 0.3885, "step": 3977 }, { "epoch": 2.2028610982925705, "grad_norm": 0.29011714458465576, "learning_rate": 1.9965199511631715e-06, "loss": 0.3826, "step": 3978 }, { "epoch": 2.2034148592524225, "grad_norm": 0.2958281636238098, "learning_rate": 1.993943484702097e-06, "loss": 0.4067, "step": 3979 }, { "epoch": 2.203968620212275, "grad_norm": 0.298227995634079, "learning_rate": 1.9913682676483485e-06, "loss": 0.4338, "step": 3980 }, { "epoch": 2.2045223811721275, "grad_norm": 0.2889571487903595, "learning_rate": 1.9887943010722675e-06, "loss": 0.4003, "step": 3981 }, { "epoch": 2.2050761421319796, "grad_norm": 0.27795636653900146, "learning_rate": 1.9862215860436725e-06, "loss": 0.3911, "step": 3982 }, { "epoch": 2.205629903091832, "grad_norm": 0.30463460087776184, "learning_rate": 1.9836501236318554e-06, "loss": 0.4382, "step": 3983 }, { "epoch": 2.2061836640516845, "grad_norm": 0.2826821208000183, "learning_rate": 1.981079914905597e-06, "loss": 0.3822, "step": 3984 }, { "epoch": 2.2067374250115366, "grad_norm": 0.28529471158981323, "learning_rate": 1.9785109609331505e-06, "loss": 0.3823, "step": 3985 }, { "epoch": 2.207291185971389, "grad_norm": 0.27012813091278076, "learning_rate": 1.9759432627822557e-06, "loss": 0.3714, "step": 3986 }, { "epoch": 2.2078449469312416, "grad_norm": 0.29270315170288086, "learning_rate": 1.97337682152012e-06, "loss": 0.418, "step": 3987 }, { "epoch": 2.2083987078910936, "grad_norm": 0.27827340364456177, "learning_rate": 1.9708116382134344e-06, "loss": 0.3895, "step": 3988 }, { "epoch": 2.208952468850946, "grad_norm": 0.3090246915817261, "learning_rate": 1.9682477139283653e-06, "loss": 0.4273, "step": 3989 }, { "epoch": 2.209506229810798, "grad_norm": 0.26536160707473755, "learning_rate": 1.9656850497305595e-06, "loss": 0.3311, "step": 3990 }, { "epoch": 2.2100599907706506, "grad_norm": 0.27588266134262085, "learning_rate": 1.9631236466851377e-06, "loss": 0.4277, "step": 3991 }, { "epoch": 2.210613751730503, "grad_norm": 0.26009246706962585, "learning_rate": 1.960563505856692e-06, "loss": 0.3537, "step": 3992 }, { "epoch": 2.211167512690355, "grad_norm": 0.31340491771698, "learning_rate": 1.9580046283092958e-06, "loss": 0.4256, "step": 3993 }, { "epoch": 2.2117212736502077, "grad_norm": 0.27814242243766785, "learning_rate": 1.955447015106493e-06, "loss": 0.4011, "step": 3994 }, { "epoch": 2.21227503461006, "grad_norm": 0.2789451777935028, "learning_rate": 1.9528906673113105e-06, "loss": 0.4132, "step": 3995 }, { "epoch": 2.212828795569912, "grad_norm": 0.2661590874195099, "learning_rate": 1.9503355859862377e-06, "loss": 0.4011, "step": 3996 }, { "epoch": 2.2133825565297647, "grad_norm": 0.2902107238769531, "learning_rate": 1.947781772193245e-06, "loss": 0.4, "step": 3997 }, { "epoch": 2.213936317489617, "grad_norm": 0.2813757061958313, "learning_rate": 1.945229226993773e-06, "loss": 0.4003, "step": 3998 }, { "epoch": 2.214490078449469, "grad_norm": 0.2871949076652527, "learning_rate": 1.9426779514487354e-06, "loss": 0.4272, "step": 3999 }, { "epoch": 2.2150438394093217, "grad_norm": 0.2738092839717865, "learning_rate": 1.940127946618524e-06, "loss": 0.3934, "step": 4000 }, { "epoch": 2.2155976003691737, "grad_norm": 0.2616533935070038, "learning_rate": 1.937579213562993e-06, "loss": 0.3702, "step": 4001 }, { "epoch": 2.2161513613290262, "grad_norm": 0.27374374866485596, "learning_rate": 1.935031753341472e-06, "loss": 0.3902, "step": 4002 }, { "epoch": 2.2167051222888787, "grad_norm": 0.31663769483566284, "learning_rate": 1.9324855670127635e-06, "loss": 0.431, "step": 4003 }, { "epoch": 2.2172588832487308, "grad_norm": 0.27422580122947693, "learning_rate": 1.9299406556351385e-06, "loss": 0.3837, "step": 4004 }, { "epoch": 2.2178126442085833, "grad_norm": 0.29084450006484985, "learning_rate": 1.927397020266339e-06, "loss": 0.4193, "step": 4005 }, { "epoch": 2.2183664051684358, "grad_norm": 0.2688285708427429, "learning_rate": 1.9248546619635755e-06, "loss": 0.4084, "step": 4006 }, { "epoch": 2.218920166128288, "grad_norm": 0.2907765507698059, "learning_rate": 1.9223135817835294e-06, "loss": 0.4467, "step": 4007 }, { "epoch": 2.2194739270881403, "grad_norm": 0.27350884675979614, "learning_rate": 1.9197737807823484e-06, "loss": 0.3821, "step": 4008 }, { "epoch": 2.2200276880479928, "grad_norm": 0.27825528383255005, "learning_rate": 1.917235260015651e-06, "loss": 0.4331, "step": 4009 }, { "epoch": 2.220581449007845, "grad_norm": 0.24365858733654022, "learning_rate": 1.9146980205385233e-06, "loss": 0.3575, "step": 4010 }, { "epoch": 2.2211352099676973, "grad_norm": 0.27645373344421387, "learning_rate": 1.9121620634055172e-06, "loss": 0.4366, "step": 4011 }, { "epoch": 2.22168897092755, "grad_norm": 0.283847838640213, "learning_rate": 1.9096273896706528e-06, "loss": 0.3766, "step": 4012 }, { "epoch": 2.222242731887402, "grad_norm": 0.2624025046825409, "learning_rate": 1.9070940003874172e-06, "loss": 0.3839, "step": 4013 }, { "epoch": 2.2227964928472543, "grad_norm": 0.28043797612190247, "learning_rate": 1.904561896608762e-06, "loss": 0.3849, "step": 4014 }, { "epoch": 2.223350253807107, "grad_norm": 0.27365073561668396, "learning_rate": 1.902031079387106e-06, "loss": 0.4057, "step": 4015 }, { "epoch": 2.223904014766959, "grad_norm": 0.2874981462955475, "learning_rate": 1.899501549774333e-06, "loss": 0.4234, "step": 4016 }, { "epoch": 2.2244577757268114, "grad_norm": 0.2783260941505432, "learning_rate": 1.8969733088217911e-06, "loss": 0.4028, "step": 4017 }, { "epoch": 2.2250115366866634, "grad_norm": 0.2574338912963867, "learning_rate": 1.894446357580294e-06, "loss": 0.3864, "step": 4018 }, { "epoch": 2.225565297646516, "grad_norm": 0.2874571979045868, "learning_rate": 1.8919206971001174e-06, "loss": 0.4129, "step": 4019 }, { "epoch": 2.2261190586063684, "grad_norm": 0.31935375928878784, "learning_rate": 1.8893963284310018e-06, "loss": 0.4424, "step": 4020 }, { "epoch": 2.2266728195662204, "grad_norm": 0.2589271664619446, "learning_rate": 1.8868732526221513e-06, "loss": 0.3758, "step": 4021 }, { "epoch": 2.227226580526073, "grad_norm": 0.26348015666007996, "learning_rate": 1.8843514707222316e-06, "loss": 0.3963, "step": 4022 }, { "epoch": 2.2277803414859254, "grad_norm": 0.26158079504966736, "learning_rate": 1.8818309837793702e-06, "loss": 0.4019, "step": 4023 }, { "epoch": 2.2283341024457775, "grad_norm": 0.29862380027770996, "learning_rate": 1.8793117928411585e-06, "loss": 0.4287, "step": 4024 }, { "epoch": 2.22888786340563, "grad_norm": 0.2935699224472046, "learning_rate": 1.8767938989546469e-06, "loss": 0.4213, "step": 4025 }, { "epoch": 2.2294416243654824, "grad_norm": 0.2727924883365631, "learning_rate": 1.874277303166348e-06, "loss": 0.4001, "step": 4026 }, { "epoch": 2.2299953853253345, "grad_norm": 0.26203271746635437, "learning_rate": 1.8717620065222363e-06, "loss": 0.3693, "step": 4027 }, { "epoch": 2.230549146285187, "grad_norm": 0.3036115765571594, "learning_rate": 1.869248010067739e-06, "loss": 0.417, "step": 4028 }, { "epoch": 2.231102907245039, "grad_norm": 0.278999388217926, "learning_rate": 1.8667353148477547e-06, "loss": 0.3984, "step": 4029 }, { "epoch": 2.2316566682048915, "grad_norm": 0.2643652856349945, "learning_rate": 1.8642239219066322e-06, "loss": 0.3824, "step": 4030 }, { "epoch": 2.232210429164744, "grad_norm": 0.2891468107700348, "learning_rate": 1.861713832288184e-06, "loss": 0.4144, "step": 4031 }, { "epoch": 2.232764190124596, "grad_norm": 0.2581600248813629, "learning_rate": 1.8592050470356731e-06, "loss": 0.3638, "step": 4032 }, { "epoch": 2.2333179510844485, "grad_norm": 0.2799096405506134, "learning_rate": 1.856697567191832e-06, "loss": 0.4066, "step": 4033 }, { "epoch": 2.233871712044301, "grad_norm": 0.33308953046798706, "learning_rate": 1.854191393798842e-06, "loss": 0.4549, "step": 4034 }, { "epoch": 2.234425473004153, "grad_norm": 0.2882808744907379, "learning_rate": 1.8516865278983442e-06, "loss": 0.4027, "step": 4035 }, { "epoch": 2.2349792339640056, "grad_norm": 0.331625372171402, "learning_rate": 1.8491829705314379e-06, "loss": 0.4037, "step": 4036 }, { "epoch": 2.235532994923858, "grad_norm": 0.25290244817733765, "learning_rate": 1.8466807227386718e-06, "loss": 0.3582, "step": 4037 }, { "epoch": 2.23608675588371, "grad_norm": 0.26376286149024963, "learning_rate": 1.8441797855600602e-06, "loss": 0.4101, "step": 4038 }, { "epoch": 2.2366405168435626, "grad_norm": 0.2872953414916992, "learning_rate": 1.8416801600350658e-06, "loss": 0.4046, "step": 4039 }, { "epoch": 2.237194277803415, "grad_norm": 0.34605610370635986, "learning_rate": 1.8391818472026097e-06, "loss": 0.4265, "step": 4040 }, { "epoch": 2.237748038763267, "grad_norm": 0.28938964009284973, "learning_rate": 1.8366848481010625e-06, "loss": 0.4116, "step": 4041 }, { "epoch": 2.2383017997231196, "grad_norm": 0.2682281732559204, "learning_rate": 1.8341891637682514e-06, "loss": 0.4047, "step": 4042 }, { "epoch": 2.2388555606829716, "grad_norm": 0.24214604496955872, "learning_rate": 1.831694795241462e-06, "loss": 0.3786, "step": 4043 }, { "epoch": 2.239409321642824, "grad_norm": 0.2907559275627136, "learning_rate": 1.8292017435574267e-06, "loss": 0.4045, "step": 4044 }, { "epoch": 2.2399630826026766, "grad_norm": 0.27655360102653503, "learning_rate": 1.8267100097523338e-06, "loss": 0.3805, "step": 4045 }, { "epoch": 2.2405168435625287, "grad_norm": 0.26702654361724854, "learning_rate": 1.8242195948618202e-06, "loss": 0.3982, "step": 4046 }, { "epoch": 2.241070604522381, "grad_norm": 0.27244019508361816, "learning_rate": 1.8217304999209762e-06, "loss": 0.367, "step": 4047 }, { "epoch": 2.2416243654822336, "grad_norm": 0.28130412101745605, "learning_rate": 1.819242725964348e-06, "loss": 0.4533, "step": 4048 }, { "epoch": 2.2421781264420857, "grad_norm": 0.28974756598472595, "learning_rate": 1.8167562740259297e-06, "loss": 0.3995, "step": 4049 }, { "epoch": 2.242731887401938, "grad_norm": 0.27935948967933655, "learning_rate": 1.814271145139162e-06, "loss": 0.394, "step": 4050 }, { "epoch": 2.2432856483617907, "grad_norm": 0.25275489687919617, "learning_rate": 1.8117873403369395e-06, "loss": 0.3699, "step": 4051 }, { "epoch": 2.2438394093216427, "grad_norm": 0.27345308661460876, "learning_rate": 1.8093048606516062e-06, "loss": 0.3753, "step": 4052 }, { "epoch": 2.244393170281495, "grad_norm": 0.2831556499004364, "learning_rate": 1.8068237071149586e-06, "loss": 0.4012, "step": 4053 }, { "epoch": 2.2449469312413477, "grad_norm": 0.2720467448234558, "learning_rate": 1.8043438807582342e-06, "loss": 0.4135, "step": 4054 }, { "epoch": 2.2455006922011997, "grad_norm": 0.2705231010913849, "learning_rate": 1.8018653826121251e-06, "loss": 0.3985, "step": 4055 }, { "epoch": 2.2460544531610522, "grad_norm": 0.29561057686805725, "learning_rate": 1.7993882137067675e-06, "loss": 0.4431, "step": 4056 }, { "epoch": 2.2466082141209043, "grad_norm": 0.290768027305603, "learning_rate": 1.7969123750717509e-06, "loss": 0.4234, "step": 4057 }, { "epoch": 2.2471619750807568, "grad_norm": 0.25770384073257446, "learning_rate": 1.794437867736108e-06, "loss": 0.4065, "step": 4058 }, { "epoch": 2.2477157360406093, "grad_norm": 0.2494744062423706, "learning_rate": 1.7919646927283147e-06, "loss": 0.3572, "step": 4059 }, { "epoch": 2.2482694970004613, "grad_norm": 0.31323882937431335, "learning_rate": 1.7894928510762988e-06, "loss": 0.4342, "step": 4060 }, { "epoch": 2.248823257960314, "grad_norm": 0.2877180576324463, "learning_rate": 1.7870223438074302e-06, "loss": 0.4112, "step": 4061 }, { "epoch": 2.2493770189201663, "grad_norm": 0.27569398283958435, "learning_rate": 1.7845531719485309e-06, "loss": 0.3641, "step": 4062 }, { "epoch": 2.2499307798800183, "grad_norm": 0.27296432852745056, "learning_rate": 1.7820853365258578e-06, "loss": 0.3902, "step": 4063 }, { "epoch": 2.250484540839871, "grad_norm": 0.26025134325027466, "learning_rate": 1.7796188385651192e-06, "loss": 0.393, "step": 4064 }, { "epoch": 2.2510383017997233, "grad_norm": 0.2862880527973175, "learning_rate": 1.777153679091465e-06, "loss": 0.4028, "step": 4065 }, { "epoch": 2.2515920627595754, "grad_norm": 0.27636298537254333, "learning_rate": 1.7746898591294903e-06, "loss": 0.3665, "step": 4066 }, { "epoch": 2.252145823719428, "grad_norm": 0.31285199522972107, "learning_rate": 1.7722273797032324e-06, "loss": 0.4347, "step": 4067 }, { "epoch": 2.25269958467928, "grad_norm": 0.28987327218055725, "learning_rate": 1.769766241836171e-06, "loss": 0.4066, "step": 4068 }, { "epoch": 2.2532533456391324, "grad_norm": 0.3105824291706085, "learning_rate": 1.76730644655123e-06, "loss": 0.4153, "step": 4069 }, { "epoch": 2.253807106598985, "grad_norm": 0.2989872097969055, "learning_rate": 1.7648479948707736e-06, "loss": 0.4318, "step": 4070 }, { "epoch": 2.254360867558837, "grad_norm": 0.28232720494270325, "learning_rate": 1.7623908878166085e-06, "loss": 0.4136, "step": 4071 }, { "epoch": 2.2549146285186894, "grad_norm": 0.2735951244831085, "learning_rate": 1.7599351264099811e-06, "loss": 0.3803, "step": 4072 }, { "epoch": 2.255468389478542, "grad_norm": 0.29088136553764343, "learning_rate": 1.7574807116715808e-06, "loss": 0.3915, "step": 4073 }, { "epoch": 2.256022150438394, "grad_norm": 0.278920441865921, "learning_rate": 1.7550276446215352e-06, "loss": 0.4121, "step": 4074 }, { "epoch": 2.2565759113982464, "grad_norm": 0.2732516825199127, "learning_rate": 1.7525759262794134e-06, "loss": 0.4288, "step": 4075 }, { "epoch": 2.257129672358099, "grad_norm": 0.312664657831192, "learning_rate": 1.7501255576642223e-06, "loss": 0.453, "step": 4076 }, { "epoch": 2.257683433317951, "grad_norm": 0.274894118309021, "learning_rate": 1.7476765397944095e-06, "loss": 0.3759, "step": 4077 }, { "epoch": 2.2582371942778035, "grad_norm": 0.2698676288127899, "learning_rate": 1.7452288736878592e-06, "loss": 0.4072, "step": 4078 }, { "epoch": 2.2587909552376555, "grad_norm": 0.27208688855171204, "learning_rate": 1.7427825603618958e-06, "loss": 0.4074, "step": 4079 }, { "epoch": 2.259344716197508, "grad_norm": 0.2813667953014374, "learning_rate": 1.7403376008332806e-06, "loss": 0.4226, "step": 4080 }, { "epoch": 2.2598984771573605, "grad_norm": 0.27170974016189575, "learning_rate": 1.7378939961182116e-06, "loss": 0.37, "step": 4081 }, { "epoch": 2.260452238117213, "grad_norm": 0.28442901372909546, "learning_rate": 1.7354517472323252e-06, "loss": 0.4081, "step": 4082 }, { "epoch": 2.261005999077065, "grad_norm": 0.28130775690078735, "learning_rate": 1.7330108551906922e-06, "loss": 0.3974, "step": 4083 }, { "epoch": 2.2615597600369175, "grad_norm": 0.2964949905872345, "learning_rate": 1.7305713210078211e-06, "loss": 0.4253, "step": 4084 }, { "epoch": 2.2621135209967695, "grad_norm": 0.26036471128463745, "learning_rate": 1.7281331456976558e-06, "loss": 0.4093, "step": 4085 }, { "epoch": 2.262667281956622, "grad_norm": 0.26171430945396423, "learning_rate": 1.7256963302735752e-06, "loss": 0.3798, "step": 4086 }, { "epoch": 2.2632210429164745, "grad_norm": 0.31113797426223755, "learning_rate": 1.723260875748392e-06, "loss": 0.4467, "step": 4087 }, { "epoch": 2.2637748038763266, "grad_norm": 0.2823711037635803, "learning_rate": 1.7208267831343555e-06, "loss": 0.4089, "step": 4088 }, { "epoch": 2.264328564836179, "grad_norm": 0.2656492292881012, "learning_rate": 1.718394053443147e-06, "loss": 0.3864, "step": 4089 }, { "epoch": 2.2648823257960315, "grad_norm": 0.2678931653499603, "learning_rate": 1.7159626876858816e-06, "loss": 0.373, "step": 4090 }, { "epoch": 2.2654360867558836, "grad_norm": 0.2815852761268616, "learning_rate": 1.7135326868731088e-06, "loss": 0.4173, "step": 4091 }, { "epoch": 2.265989847715736, "grad_norm": 0.294308602809906, "learning_rate": 1.7111040520148092e-06, "loss": 0.3901, "step": 4092 }, { "epoch": 2.2665436086755886, "grad_norm": 0.26045238971710205, "learning_rate": 1.7086767841203965e-06, "loss": 0.3641, "step": 4093 }, { "epoch": 2.2670973696354406, "grad_norm": 0.2761538326740265, "learning_rate": 1.706250884198718e-06, "loss": 0.3861, "step": 4094 }, { "epoch": 2.267651130595293, "grad_norm": 0.2935260236263275, "learning_rate": 1.7038263532580457e-06, "loss": 0.4, "step": 4095 }, { "epoch": 2.268204891555145, "grad_norm": 0.29620876908302307, "learning_rate": 1.701403192306092e-06, "loss": 0.4276, "step": 4096 }, { "epoch": 2.2687586525149976, "grad_norm": 0.2648007571697235, "learning_rate": 1.6989814023499934e-06, "loss": 0.3787, "step": 4097 }, { "epoch": 2.26931241347485, "grad_norm": 0.27609017491340637, "learning_rate": 1.696560984396321e-06, "loss": 0.3902, "step": 4098 }, { "epoch": 2.269866174434702, "grad_norm": 0.31262874603271484, "learning_rate": 1.6941419394510673e-06, "loss": 0.4237, "step": 4099 }, { "epoch": 2.2704199353945547, "grad_norm": 0.26640188694000244, "learning_rate": 1.6917242685196655e-06, "loss": 0.3826, "step": 4100 }, { "epoch": 2.270973696354407, "grad_norm": 0.28939497470855713, "learning_rate": 1.6893079726069704e-06, "loss": 0.3922, "step": 4101 }, { "epoch": 2.271527457314259, "grad_norm": 0.28485003113746643, "learning_rate": 1.6868930527172678e-06, "loss": 0.4233, "step": 4102 }, { "epoch": 2.2720812182741117, "grad_norm": 0.2513393461704254, "learning_rate": 1.6844795098542715e-06, "loss": 0.3756, "step": 4103 }, { "epoch": 2.272634979233964, "grad_norm": 0.27429017424583435, "learning_rate": 1.6820673450211178e-06, "loss": 0.3992, "step": 4104 }, { "epoch": 2.2731887401938162, "grad_norm": 0.27585381269454956, "learning_rate": 1.67965655922038e-06, "loss": 0.4124, "step": 4105 }, { "epoch": 2.2737425011536687, "grad_norm": 0.2840491831302643, "learning_rate": 1.6772471534540512e-06, "loss": 0.3925, "step": 4106 }, { "epoch": 2.2742962621135208, "grad_norm": 0.2808879613876343, "learning_rate": 1.6748391287235543e-06, "loss": 0.4204, "step": 4107 }, { "epoch": 2.2748500230733733, "grad_norm": 0.287153959274292, "learning_rate": 1.6724324860297336e-06, "loss": 0.428, "step": 4108 }, { "epoch": 2.2754037840332257, "grad_norm": 0.27363869547843933, "learning_rate": 1.6700272263728624e-06, "loss": 0.3662, "step": 4109 }, { "epoch": 2.2759575449930782, "grad_norm": 0.2913447320461273, "learning_rate": 1.6676233507526412e-06, "loss": 0.3998, "step": 4110 }, { "epoch": 2.2765113059529303, "grad_norm": 0.29301100969314575, "learning_rate": 1.6652208601681935e-06, "loss": 0.3992, "step": 4111 }, { "epoch": 2.2770650669127828, "grad_norm": 0.2989904582500458, "learning_rate": 1.6628197556180625e-06, "loss": 0.3943, "step": 4112 }, { "epoch": 2.277618827872635, "grad_norm": 0.28236857056617737, "learning_rate": 1.6604200381002223e-06, "loss": 0.3934, "step": 4113 }, { "epoch": 2.2781725888324873, "grad_norm": 0.28987300395965576, "learning_rate": 1.6580217086120648e-06, "loss": 0.4111, "step": 4114 }, { "epoch": 2.27872634979234, "grad_norm": 0.2847365736961365, "learning_rate": 1.6556247681504112e-06, "loss": 0.4286, "step": 4115 }, { "epoch": 2.279280110752192, "grad_norm": 0.2889038920402527, "learning_rate": 1.6532292177115023e-06, "loss": 0.3738, "step": 4116 }, { "epoch": 2.2798338717120443, "grad_norm": 0.2972509562969208, "learning_rate": 1.6508350582909972e-06, "loss": 0.4498, "step": 4117 }, { "epoch": 2.280387632671897, "grad_norm": 0.2683562636375427, "learning_rate": 1.6484422908839808e-06, "loss": 0.3671, "step": 4118 }, { "epoch": 2.280941393631749, "grad_norm": 0.28620386123657227, "learning_rate": 1.6460509164849613e-06, "loss": 0.3972, "step": 4119 }, { "epoch": 2.2814951545916013, "grad_norm": 0.2920527458190918, "learning_rate": 1.643660936087867e-06, "loss": 0.4421, "step": 4120 }, { "epoch": 2.282048915551454, "grad_norm": 0.2643652856349945, "learning_rate": 1.6412723506860418e-06, "loss": 0.3809, "step": 4121 }, { "epoch": 2.282602676511306, "grad_norm": 0.26862263679504395, "learning_rate": 1.6388851612722546e-06, "loss": 0.4185, "step": 4122 }, { "epoch": 2.2831564374711584, "grad_norm": 0.2662033438682556, "learning_rate": 1.6364993688386915e-06, "loss": 0.3763, "step": 4123 }, { "epoch": 2.2837101984310104, "grad_norm": 0.2692206799983978, "learning_rate": 1.634114974376963e-06, "loss": 0.3997, "step": 4124 }, { "epoch": 2.284263959390863, "grad_norm": 0.2715854048728943, "learning_rate": 1.6317319788780955e-06, "loss": 0.3805, "step": 4125 }, { "epoch": 2.2848177203507154, "grad_norm": 0.2679227888584137, "learning_rate": 1.6293503833325292e-06, "loss": 0.4082, "step": 4126 }, { "epoch": 2.2853714813105674, "grad_norm": 0.2878059148788452, "learning_rate": 1.6269701887301288e-06, "loss": 0.4072, "step": 4127 }, { "epoch": 2.28592524227042, "grad_norm": 0.2718622386455536, "learning_rate": 1.6245913960601728e-06, "loss": 0.38, "step": 4128 }, { "epoch": 2.2864790032302724, "grad_norm": 0.3106292188167572, "learning_rate": 1.6222140063113645e-06, "loss": 0.4587, "step": 4129 }, { "epoch": 2.2870327641901245, "grad_norm": 0.2654130160808563, "learning_rate": 1.6198380204718128e-06, "loss": 0.3862, "step": 4130 }, { "epoch": 2.287586525149977, "grad_norm": 0.26725825667381287, "learning_rate": 1.61746343952905e-06, "loss": 0.3747, "step": 4131 }, { "epoch": 2.2881402861098294, "grad_norm": 0.2608865201473236, "learning_rate": 1.6150902644700246e-06, "loss": 0.3951, "step": 4132 }, { "epoch": 2.2886940470696815, "grad_norm": 0.2747619152069092, "learning_rate": 1.6127184962810983e-06, "loss": 0.4087, "step": 4133 }, { "epoch": 2.289247808029534, "grad_norm": 0.2668324410915375, "learning_rate": 1.6103481359480493e-06, "loss": 0.3886, "step": 4134 }, { "epoch": 2.289801568989386, "grad_norm": 0.2711455821990967, "learning_rate": 1.6079791844560705e-06, "loss": 0.3779, "step": 4135 }, { "epoch": 2.2903553299492385, "grad_norm": 0.28389307856559753, "learning_rate": 1.6056116427897695e-06, "loss": 0.426, "step": 4136 }, { "epoch": 2.290909090909091, "grad_norm": 0.27535396814346313, "learning_rate": 1.603245511933168e-06, "loss": 0.4135, "step": 4137 }, { "epoch": 2.291462851868943, "grad_norm": 0.2744169235229492, "learning_rate": 1.6008807928697002e-06, "loss": 0.4183, "step": 4138 }, { "epoch": 2.2920166128287955, "grad_norm": 0.2666880190372467, "learning_rate": 1.5985174865822146e-06, "loss": 0.3841, "step": 4139 }, { "epoch": 2.292570373788648, "grad_norm": 0.29043665528297424, "learning_rate": 1.5961555940529727e-06, "loss": 0.4403, "step": 4140 }, { "epoch": 2.2931241347485, "grad_norm": 0.2661484181880951, "learning_rate": 1.5937951162636472e-06, "loss": 0.3781, "step": 4141 }, { "epoch": 2.2936778957083526, "grad_norm": 0.2771502733230591, "learning_rate": 1.5914360541953244e-06, "loss": 0.4115, "step": 4142 }, { "epoch": 2.294231656668205, "grad_norm": 0.24025622010231018, "learning_rate": 1.5890784088285005e-06, "loss": 0.3796, "step": 4143 }, { "epoch": 2.294785417628057, "grad_norm": 0.2984811067581177, "learning_rate": 1.5867221811430845e-06, "loss": 0.4339, "step": 4144 }, { "epoch": 2.2953391785879096, "grad_norm": 0.2741227447986603, "learning_rate": 1.5843673721183945e-06, "loss": 0.4148, "step": 4145 }, { "epoch": 2.2958929395477616, "grad_norm": 0.28668704628944397, "learning_rate": 1.5820139827331603e-06, "loss": 0.4464, "step": 4146 }, { "epoch": 2.296446700507614, "grad_norm": 0.2689066529273987, "learning_rate": 1.579662013965521e-06, "loss": 0.3837, "step": 4147 }, { "epoch": 2.2970004614674666, "grad_norm": 0.28059738874435425, "learning_rate": 1.5773114667930257e-06, "loss": 0.4199, "step": 4148 }, { "epoch": 2.297554222427319, "grad_norm": 0.2776411473751068, "learning_rate": 1.5749623421926324e-06, "loss": 0.4065, "step": 4149 }, { "epoch": 2.298107983387171, "grad_norm": 0.25290417671203613, "learning_rate": 1.5726146411407073e-06, "loss": 0.3469, "step": 4150 }, { "epoch": 2.2986617443470236, "grad_norm": 0.2831743359565735, "learning_rate": 1.5702683646130257e-06, "loss": 0.4123, "step": 4151 }, { "epoch": 2.2992155053068757, "grad_norm": 0.2603956162929535, "learning_rate": 1.5679235135847703e-06, "loss": 0.3951, "step": 4152 }, { "epoch": 2.299769266266728, "grad_norm": 0.2737022936344147, "learning_rate": 1.5655800890305323e-06, "loss": 0.3858, "step": 4153 }, { "epoch": 2.3003230272265807, "grad_norm": 0.28095561265945435, "learning_rate": 1.563238091924309e-06, "loss": 0.4357, "step": 4154 }, { "epoch": 2.3008767881864327, "grad_norm": 0.2873704433441162, "learning_rate": 1.560897523239504e-06, "loss": 0.4225, "step": 4155 }, { "epoch": 2.301430549146285, "grad_norm": 0.2663287818431854, "learning_rate": 1.5585583839489305e-06, "loss": 0.3924, "step": 4156 }, { "epoch": 2.3019843101061377, "grad_norm": 0.2652375400066376, "learning_rate": 1.5562206750247998e-06, "loss": 0.3974, "step": 4157 }, { "epoch": 2.3025380710659897, "grad_norm": 0.2705978453159332, "learning_rate": 1.5538843974387386e-06, "loss": 0.394, "step": 4158 }, { "epoch": 2.3030918320258422, "grad_norm": 0.27819544076919556, "learning_rate": 1.5515495521617723e-06, "loss": 0.4019, "step": 4159 }, { "epoch": 2.3036455929856947, "grad_norm": 0.2973634600639343, "learning_rate": 1.5492161401643329e-06, "loss": 0.3993, "step": 4160 }, { "epoch": 2.3041993539455468, "grad_norm": 0.3075932562351227, "learning_rate": 1.5468841624162567e-06, "loss": 0.4068, "step": 4161 }, { "epoch": 2.3047531149053992, "grad_norm": 0.27645421028137207, "learning_rate": 1.5445536198867834e-06, "loss": 0.4089, "step": 4162 }, { "epoch": 2.3053068758652513, "grad_norm": 0.2628445625305176, "learning_rate": 1.5422245135445563e-06, "loss": 0.3819, "step": 4163 }, { "epoch": 2.305860636825104, "grad_norm": 0.28630319237709045, "learning_rate": 1.5398968443576218e-06, "loss": 0.4104, "step": 4164 }, { "epoch": 2.3064143977849563, "grad_norm": 0.268375039100647, "learning_rate": 1.537570613293431e-06, "loss": 0.3934, "step": 4165 }, { "epoch": 2.3069681587448083, "grad_norm": 0.26405245065689087, "learning_rate": 1.5352458213188308e-06, "loss": 0.4066, "step": 4166 }, { "epoch": 2.307521919704661, "grad_norm": 0.2825782895088196, "learning_rate": 1.532922469400079e-06, "loss": 0.4194, "step": 4167 }, { "epoch": 2.3080756806645133, "grad_norm": 0.2806036174297333, "learning_rate": 1.530600558502829e-06, "loss": 0.4301, "step": 4168 }, { "epoch": 2.3086294416243653, "grad_norm": 0.25940659642219543, "learning_rate": 1.5282800895921357e-06, "loss": 0.3989, "step": 4169 }, { "epoch": 2.309183202584218, "grad_norm": 0.25690287351608276, "learning_rate": 1.525961063632459e-06, "loss": 0.3963, "step": 4170 }, { "epoch": 2.3097369635440703, "grad_norm": 0.2730851471424103, "learning_rate": 1.5236434815876494e-06, "loss": 0.3931, "step": 4171 }, { "epoch": 2.3102907245039224, "grad_norm": 0.2750282883644104, "learning_rate": 1.5213273444209693e-06, "loss": 0.4232, "step": 4172 }, { "epoch": 2.310844485463775, "grad_norm": 0.2611590623855591, "learning_rate": 1.5190126530950727e-06, "loss": 0.3971, "step": 4173 }, { "epoch": 2.311398246423627, "grad_norm": 0.26669877767562866, "learning_rate": 1.516699408572016e-06, "loss": 0.4134, "step": 4174 }, { "epoch": 2.3119520073834794, "grad_norm": 0.2598952353000641, "learning_rate": 1.514387611813251e-06, "loss": 0.3782, "step": 4175 }, { "epoch": 2.312505768343332, "grad_norm": 0.2833543121814728, "learning_rate": 1.5120772637796293e-06, "loss": 0.427, "step": 4176 }, { "epoch": 2.3130595293031844, "grad_norm": 0.27779310941696167, "learning_rate": 1.5097683654314043e-06, "loss": 0.4172, "step": 4177 }, { "epoch": 2.3136132902630364, "grad_norm": 0.2496919482946396, "learning_rate": 1.507460917728223e-06, "loss": 0.3847, "step": 4178 }, { "epoch": 2.314167051222889, "grad_norm": 0.2763901352882385, "learning_rate": 1.5051549216291266e-06, "loss": 0.3813, "step": 4179 }, { "epoch": 2.314720812182741, "grad_norm": 0.2703729271888733, "learning_rate": 1.5028503780925585e-06, "loss": 0.4189, "step": 4180 }, { "epoch": 2.3152745731425934, "grad_norm": 0.298546701669693, "learning_rate": 1.5005472880763544e-06, "loss": 0.4346, "step": 4181 }, { "epoch": 2.315828334102446, "grad_norm": 0.2819088101387024, "learning_rate": 1.4982456525377508e-06, "loss": 0.3557, "step": 4182 }, { "epoch": 2.316382095062298, "grad_norm": 0.2934330999851227, "learning_rate": 1.495945472433376e-06, "loss": 0.4207, "step": 4183 }, { "epoch": 2.3169358560221505, "grad_norm": 0.28358322381973267, "learning_rate": 1.4936467487192518e-06, "loss": 0.3992, "step": 4184 }, { "epoch": 2.317489616982003, "grad_norm": 0.2491660714149475, "learning_rate": 1.4913494823507963e-06, "loss": 0.3787, "step": 4185 }, { "epoch": 2.318043377941855, "grad_norm": 0.2746664583683014, "learning_rate": 1.4890536742828255e-06, "loss": 0.4471, "step": 4186 }, { "epoch": 2.3185971389017075, "grad_norm": 0.2773613929748535, "learning_rate": 1.4867593254695456e-06, "loss": 0.3711, "step": 4187 }, { "epoch": 2.31915089986156, "grad_norm": 0.3096785843372345, "learning_rate": 1.4844664368645544e-06, "loss": 0.4253, "step": 4188 }, { "epoch": 2.319704660821412, "grad_norm": 0.2770707309246063, "learning_rate": 1.4821750094208465e-06, "loss": 0.3892, "step": 4189 }, { "epoch": 2.3202584217812645, "grad_norm": 0.32324305176734924, "learning_rate": 1.4798850440908063e-06, "loss": 0.3962, "step": 4190 }, { "epoch": 2.3208121827411166, "grad_norm": 0.2981623411178589, "learning_rate": 1.4775965418262172e-06, "loss": 0.3697, "step": 4191 }, { "epoch": 2.321365943700969, "grad_norm": 0.2994433641433716, "learning_rate": 1.4753095035782443e-06, "loss": 0.4788, "step": 4192 }, { "epoch": 2.3219197046608215, "grad_norm": 0.27998098731040955, "learning_rate": 1.473023930297452e-06, "loss": 0.3852, "step": 4193 }, { "epoch": 2.3224734656206736, "grad_norm": 0.27316832542419434, "learning_rate": 1.4707398229337916e-06, "loss": 0.4039, "step": 4194 }, { "epoch": 2.323027226580526, "grad_norm": 0.2624499797821045, "learning_rate": 1.4684571824366056e-06, "loss": 0.3921, "step": 4195 }, { "epoch": 2.3235809875403786, "grad_norm": 0.27824142575263977, "learning_rate": 1.4661760097546335e-06, "loss": 0.4014, "step": 4196 }, { "epoch": 2.3241347485002306, "grad_norm": 0.273160845041275, "learning_rate": 1.4638963058359935e-06, "loss": 0.3898, "step": 4197 }, { "epoch": 2.324688509460083, "grad_norm": 0.31807941198349, "learning_rate": 1.4616180716282008e-06, "loss": 0.4493, "step": 4198 }, { "epoch": 2.3252422704199356, "grad_norm": 0.27562329173088074, "learning_rate": 1.4593413080781577e-06, "loss": 0.3852, "step": 4199 }, { "epoch": 2.3257960313797876, "grad_norm": 0.285534143447876, "learning_rate": 1.4570660161321566e-06, "loss": 0.4172, "step": 4200 }, { "epoch": 2.32634979233964, "grad_norm": 0.27362653613090515, "learning_rate": 1.4547921967358752e-06, "loss": 0.3568, "step": 4201 }, { "epoch": 2.326903553299492, "grad_norm": 0.28212592005729675, "learning_rate": 1.452519850834383e-06, "loss": 0.4116, "step": 4202 }, { "epoch": 2.3274573142593447, "grad_norm": 0.2930621802806854, "learning_rate": 1.4502489793721342e-06, "loss": 0.4118, "step": 4203 }, { "epoch": 2.328011075219197, "grad_norm": 0.2887679934501648, "learning_rate": 1.4479795832929693e-06, "loss": 0.4029, "step": 4204 }, { "epoch": 2.328564836179049, "grad_norm": 0.28872352838516235, "learning_rate": 1.445711663540123e-06, "loss": 0.414, "step": 4205 }, { "epoch": 2.3291185971389017, "grad_norm": 0.2563149929046631, "learning_rate": 1.4434452210562062e-06, "loss": 0.4122, "step": 4206 }, { "epoch": 2.329672358098754, "grad_norm": 0.27872008085250854, "learning_rate": 1.4411802567832207e-06, "loss": 0.4025, "step": 4207 }, { "epoch": 2.330226119058606, "grad_norm": 0.25740623474121094, "learning_rate": 1.4389167716625545e-06, "loss": 0.3561, "step": 4208 }, { "epoch": 2.3307798800184587, "grad_norm": 0.28752100467681885, "learning_rate": 1.4366547666349801e-06, "loss": 0.4072, "step": 4209 }, { "epoch": 2.331333640978311, "grad_norm": 0.2843523323535919, "learning_rate": 1.434394242640655e-06, "loss": 0.4347, "step": 4210 }, { "epoch": 2.3318874019381632, "grad_norm": 0.2864055037498474, "learning_rate": 1.4321352006191197e-06, "loss": 0.4184, "step": 4211 }, { "epoch": 2.3324411628980157, "grad_norm": 0.2698751389980316, "learning_rate": 1.4298776415093007e-06, "loss": 0.3807, "step": 4212 }, { "epoch": 2.3329949238578678, "grad_norm": 0.27001091837882996, "learning_rate": 1.4276215662495075e-06, "loss": 0.3797, "step": 4213 }, { "epoch": 2.3335486848177203, "grad_norm": 0.2734541594982147, "learning_rate": 1.425366975777432e-06, "loss": 0.3982, "step": 4214 }, { "epoch": 2.3341024457775728, "grad_norm": 0.27734020352363586, "learning_rate": 1.4231138710301501e-06, "loss": 0.3911, "step": 4215 }, { "epoch": 2.3346562067374252, "grad_norm": 0.282973051071167, "learning_rate": 1.4208622529441195e-06, "loss": 0.3949, "step": 4216 }, { "epoch": 2.3352099676972773, "grad_norm": 0.28493499755859375, "learning_rate": 1.4186121224551807e-06, "loss": 0.4344, "step": 4217 }, { "epoch": 2.33576372865713, "grad_norm": 0.27878424525260925, "learning_rate": 1.4163634804985555e-06, "loss": 0.4433, "step": 4218 }, { "epoch": 2.336317489616982, "grad_norm": 0.25360623002052307, "learning_rate": 1.4141163280088465e-06, "loss": 0.3653, "step": 4219 }, { "epoch": 2.3368712505768343, "grad_norm": 0.2908909022808075, "learning_rate": 1.4118706659200383e-06, "loss": 0.4248, "step": 4220 }, { "epoch": 2.337425011536687, "grad_norm": 0.2749338448047638, "learning_rate": 1.4096264951654947e-06, "loss": 0.3906, "step": 4221 }, { "epoch": 2.337978772496539, "grad_norm": 0.27490195631980896, "learning_rate": 1.4073838166779614e-06, "loss": 0.4011, "step": 4222 }, { "epoch": 2.3385325334563913, "grad_norm": 0.2672739624977112, "learning_rate": 1.4051426313895639e-06, "loss": 0.3875, "step": 4223 }, { "epoch": 2.339086294416244, "grad_norm": 0.2690718173980713, "learning_rate": 1.4029029402318012e-06, "loss": 0.4031, "step": 4224 }, { "epoch": 2.339640055376096, "grad_norm": 0.2696898877620697, "learning_rate": 1.4006647441355615e-06, "loss": 0.3824, "step": 4225 }, { "epoch": 2.3401938163359484, "grad_norm": 0.2817174196243286, "learning_rate": 1.3984280440311038e-06, "loss": 0.4284, "step": 4226 }, { "epoch": 2.340747577295801, "grad_norm": 0.2687709331512451, "learning_rate": 1.396192840848069e-06, "loss": 0.4172, "step": 4227 }, { "epoch": 2.341301338255653, "grad_norm": 0.2652980387210846, "learning_rate": 1.3939591355154736e-06, "loss": 0.3796, "step": 4228 }, { "epoch": 2.3418550992155054, "grad_norm": 0.2823992371559143, "learning_rate": 1.3917269289617136e-06, "loss": 0.4221, "step": 4229 }, { "epoch": 2.3424088601753574, "grad_norm": 0.27419060468673706, "learning_rate": 1.3894962221145598e-06, "loss": 0.3855, "step": 4230 }, { "epoch": 2.34296262113521, "grad_norm": 0.2704528868198395, "learning_rate": 1.3872670159011619e-06, "loss": 0.4012, "step": 4231 }, { "epoch": 2.3435163820950624, "grad_norm": 0.28245702385902405, "learning_rate": 1.3850393112480458e-06, "loss": 0.4334, "step": 4232 }, { "epoch": 2.3440701430549145, "grad_norm": 0.2932415008544922, "learning_rate": 1.3828131090811086e-06, "loss": 0.395, "step": 4233 }, { "epoch": 2.344623904014767, "grad_norm": 0.27638208866119385, "learning_rate": 1.380588410325631e-06, "loss": 0.3916, "step": 4234 }, { "epoch": 2.3451776649746194, "grad_norm": 0.28110799193382263, "learning_rate": 1.3783652159062627e-06, "loss": 0.4124, "step": 4235 }, { "epoch": 2.3457314259344715, "grad_norm": 0.2593018412590027, "learning_rate": 1.376143526747032e-06, "loss": 0.3769, "step": 4236 }, { "epoch": 2.346285186894324, "grad_norm": 0.2815247178077698, "learning_rate": 1.3739233437713361e-06, "loss": 0.4057, "step": 4237 }, { "epoch": 2.3468389478541765, "grad_norm": 0.2635657489299774, "learning_rate": 1.3717046679019501e-06, "loss": 0.3946, "step": 4238 }, { "epoch": 2.3473927088140285, "grad_norm": 0.27062880992889404, "learning_rate": 1.369487500061026e-06, "loss": 0.4216, "step": 4239 }, { "epoch": 2.347946469773881, "grad_norm": 0.2666405439376831, "learning_rate": 1.367271841170082e-06, "loss": 0.3774, "step": 4240 }, { "epoch": 2.348500230733733, "grad_norm": 0.2794666886329651, "learning_rate": 1.3650576921500158e-06, "loss": 0.38, "step": 4241 }, { "epoch": 2.3490539916935855, "grad_norm": 0.2989083230495453, "learning_rate": 1.3628450539210896e-06, "loss": 0.41, "step": 4242 }, { "epoch": 2.349607752653438, "grad_norm": 0.27386245131492615, "learning_rate": 1.3606339274029434e-06, "loss": 0.3762, "step": 4243 }, { "epoch": 2.3501615136132905, "grad_norm": 0.28093913197517395, "learning_rate": 1.3584243135145907e-06, "loss": 0.4056, "step": 4244 }, { "epoch": 2.3507152745731426, "grad_norm": 0.2630765438079834, "learning_rate": 1.3562162131744128e-06, "loss": 0.4282, "step": 4245 }, { "epoch": 2.351269035532995, "grad_norm": 0.26849842071533203, "learning_rate": 1.3540096273001596e-06, "loss": 0.3713, "step": 4246 }, { "epoch": 2.351822796492847, "grad_norm": 0.273348867893219, "learning_rate": 1.351804556808955e-06, "loss": 0.396, "step": 4247 }, { "epoch": 2.3523765574526996, "grad_norm": 0.2789197564125061, "learning_rate": 1.3496010026172952e-06, "loss": 0.419, "step": 4248 }, { "epoch": 2.352930318412552, "grad_norm": 0.27798226475715637, "learning_rate": 1.3473989656410413e-06, "loss": 0.4103, "step": 4249 }, { "epoch": 2.353484079372404, "grad_norm": 0.27295342087745667, "learning_rate": 1.3451984467954282e-06, "loss": 0.3737, "step": 4250 }, { "epoch": 2.3540378403322566, "grad_norm": 0.2841170132160187, "learning_rate": 1.342999446995054e-06, "loss": 0.3897, "step": 4251 }, { "epoch": 2.354591601292109, "grad_norm": 0.24644559621810913, "learning_rate": 1.3408019671538902e-06, "loss": 0.3512, "step": 4252 }, { "epoch": 2.355145362251961, "grad_norm": 0.27851444482803345, "learning_rate": 1.3386060081852776e-06, "loss": 0.4362, "step": 4253 }, { "epoch": 2.3556991232118136, "grad_norm": 0.2741681635379791, "learning_rate": 1.336411571001922e-06, "loss": 0.3824, "step": 4254 }, { "epoch": 2.356252884171666, "grad_norm": 0.278521329164505, "learning_rate": 1.334218656515896e-06, "loss": 0.3948, "step": 4255 }, { "epoch": 2.356806645131518, "grad_norm": 0.26988425850868225, "learning_rate": 1.3320272656386406e-06, "loss": 0.4067, "step": 4256 }, { "epoch": 2.3573604060913707, "grad_norm": 0.2741510570049286, "learning_rate": 1.3298373992809631e-06, "loss": 0.4051, "step": 4257 }, { "epoch": 2.3579141670512227, "grad_norm": 0.27337029576301575, "learning_rate": 1.3276490583530415e-06, "loss": 0.3844, "step": 4258 }, { "epoch": 2.358467928011075, "grad_norm": 0.26399192214012146, "learning_rate": 1.325462243764412e-06, "loss": 0.448, "step": 4259 }, { "epoch": 2.3590216889709277, "grad_norm": 0.2573324143886566, "learning_rate": 1.3232769564239823e-06, "loss": 0.4096, "step": 4260 }, { "epoch": 2.3595754499307797, "grad_norm": 0.25833189487457275, "learning_rate": 1.3210931972400226e-06, "loss": 0.3726, "step": 4261 }, { "epoch": 2.360129210890632, "grad_norm": 0.2991647720336914, "learning_rate": 1.3189109671201671e-06, "loss": 0.4149, "step": 4262 }, { "epoch": 2.3606829718504847, "grad_norm": 0.2750507593154907, "learning_rate": 1.3167302669714216e-06, "loss": 0.3922, "step": 4263 }, { "epoch": 2.3612367328103367, "grad_norm": 0.2709118723869324, "learning_rate": 1.3145510977001452e-06, "loss": 0.3938, "step": 4264 }, { "epoch": 2.3617904937701892, "grad_norm": 0.2769683003425598, "learning_rate": 1.3123734602120687e-06, "loss": 0.4203, "step": 4265 }, { "epoch": 2.3623442547300417, "grad_norm": 0.24843011796474457, "learning_rate": 1.310197355412282e-06, "loss": 0.3623, "step": 4266 }, { "epoch": 2.3628980156898938, "grad_norm": 0.2811950743198395, "learning_rate": 1.3080227842052406e-06, "loss": 0.4209, "step": 4267 }, { "epoch": 2.3634517766497463, "grad_norm": 0.27756330370903015, "learning_rate": 1.3058497474947613e-06, "loss": 0.3838, "step": 4268 }, { "epoch": 2.3640055376095983, "grad_norm": 0.2734494209289551, "learning_rate": 1.3036782461840236e-06, "loss": 0.4387, "step": 4269 }, { "epoch": 2.364559298569451, "grad_norm": 0.2763257622718811, "learning_rate": 1.3015082811755686e-06, "loss": 0.3848, "step": 4270 }, { "epoch": 2.3651130595293033, "grad_norm": 0.2732127010822296, "learning_rate": 1.2993398533712986e-06, "loss": 0.4192, "step": 4271 }, { "epoch": 2.3656668204891553, "grad_norm": 0.2744021415710449, "learning_rate": 1.2971729636724778e-06, "loss": 0.4329, "step": 4272 }, { "epoch": 2.366220581449008, "grad_norm": 0.2716732621192932, "learning_rate": 1.2950076129797302e-06, "loss": 0.4221, "step": 4273 }, { "epoch": 2.3667743424088603, "grad_norm": 0.28418487310409546, "learning_rate": 1.2928438021930407e-06, "loss": 0.4057, "step": 4274 }, { "epoch": 2.3673281033687124, "grad_norm": 0.262446790933609, "learning_rate": 1.290681532211754e-06, "loss": 0.3634, "step": 4275 }, { "epoch": 2.367881864328565, "grad_norm": 0.31106510758399963, "learning_rate": 1.2885208039345742e-06, "loss": 0.4407, "step": 4276 }, { "epoch": 2.3684356252884173, "grad_norm": 0.28196826577186584, "learning_rate": 1.286361618259565e-06, "loss": 0.3958, "step": 4277 }, { "epoch": 2.3689893862482694, "grad_norm": 0.2843796908855438, "learning_rate": 1.284203976084149e-06, "loss": 0.4025, "step": 4278 }, { "epoch": 2.369543147208122, "grad_norm": 0.260067343711853, "learning_rate": 1.2820478783051066e-06, "loss": 0.4056, "step": 4279 }, { "epoch": 2.370096908167974, "grad_norm": 0.2747773826122284, "learning_rate": 1.279893325818577e-06, "loss": 0.3901, "step": 4280 }, { "epoch": 2.3706506691278264, "grad_norm": 0.27593058347702026, "learning_rate": 1.2777403195200572e-06, "loss": 0.437, "step": 4281 }, { "epoch": 2.371204430087679, "grad_norm": 0.2627639174461365, "learning_rate": 1.275588860304401e-06, "loss": 0.3986, "step": 4282 }, { "epoch": 2.3717581910475314, "grad_norm": 0.28758201003074646, "learning_rate": 1.2734389490658194e-06, "loss": 0.395, "step": 4283 }, { "epoch": 2.3723119520073834, "grad_norm": 0.25965607166290283, "learning_rate": 1.2712905866978797e-06, "loss": 0.3904, "step": 4284 }, { "epoch": 2.372865712967236, "grad_norm": 0.27482789754867554, "learning_rate": 1.2691437740935063e-06, "loss": 0.4062, "step": 4285 }, { "epoch": 2.373419473927088, "grad_norm": 0.2672329545021057, "learning_rate": 1.266998512144979e-06, "loss": 0.3901, "step": 4286 }, { "epoch": 2.3739732348869405, "grad_norm": 0.2526915371417999, "learning_rate": 1.2648548017439327e-06, "loss": 0.4072, "step": 4287 }, { "epoch": 2.374526995846793, "grad_norm": 0.25956326723098755, "learning_rate": 1.262712643781358e-06, "loss": 0.3999, "step": 4288 }, { "epoch": 2.375080756806645, "grad_norm": 0.26591455936431885, "learning_rate": 1.2605720391476e-06, "loss": 0.3691, "step": 4289 }, { "epoch": 2.3756345177664975, "grad_norm": 0.2969902455806732, "learning_rate": 1.2584329887323588e-06, "loss": 0.4473, "step": 4290 }, { "epoch": 2.37618827872635, "grad_norm": 0.25050419569015503, "learning_rate": 1.2562954934246869e-06, "loss": 0.38, "step": 4291 }, { "epoch": 2.376742039686202, "grad_norm": 0.25868141651153564, "learning_rate": 1.2541595541129919e-06, "loss": 0.4316, "step": 4292 }, { "epoch": 2.3772958006460545, "grad_norm": 0.2847694456577301, "learning_rate": 1.2520251716850351e-06, "loss": 0.4004, "step": 4293 }, { "epoch": 2.377849561605907, "grad_norm": 0.3014201521873474, "learning_rate": 1.249892347027929e-06, "loss": 0.4224, "step": 4294 }, { "epoch": 2.378403322565759, "grad_norm": 0.2557206153869629, "learning_rate": 1.2477610810281394e-06, "loss": 0.4032, "step": 4295 }, { "epoch": 2.3789570835256115, "grad_norm": 0.2531353533267975, "learning_rate": 1.245631374571485e-06, "loss": 0.3747, "step": 4296 }, { "epoch": 2.3795108444854636, "grad_norm": 0.27862218022346497, "learning_rate": 1.2435032285431358e-06, "loss": 0.4349, "step": 4297 }, { "epoch": 2.380064605445316, "grad_norm": 0.2813938558101654, "learning_rate": 1.2413766438276132e-06, "loss": 0.3909, "step": 4298 }, { "epoch": 2.3806183664051686, "grad_norm": 0.26779305934906006, "learning_rate": 1.2392516213087902e-06, "loss": 0.4222, "step": 4299 }, { "epoch": 2.3811721273650206, "grad_norm": 0.2833978831768036, "learning_rate": 1.2371281618698867e-06, "loss": 0.3956, "step": 4300 }, { "epoch": 2.381725888324873, "grad_norm": 0.2787715494632721, "learning_rate": 1.2350062663934804e-06, "loss": 0.4042, "step": 4301 }, { "epoch": 2.3822796492847256, "grad_norm": 0.2763793468475342, "learning_rate": 1.2328859357614926e-06, "loss": 0.4163, "step": 4302 }, { "epoch": 2.3828334102445776, "grad_norm": 0.2659527659416199, "learning_rate": 1.2307671708551978e-06, "loss": 0.3815, "step": 4303 }, { "epoch": 2.38338717120443, "grad_norm": 0.27160629630088806, "learning_rate": 1.2286499725552165e-06, "loss": 0.4148, "step": 4304 }, { "epoch": 2.3839409321642826, "grad_norm": 0.26805445551872253, "learning_rate": 1.2265343417415187e-06, "loss": 0.394, "step": 4305 }, { "epoch": 2.3844946931241346, "grad_norm": 0.26376932859420776, "learning_rate": 1.224420279293428e-06, "loss": 0.3814, "step": 4306 }, { "epoch": 2.385048454083987, "grad_norm": 0.2791670560836792, "learning_rate": 1.22230778608961e-06, "loss": 0.4491, "step": 4307 }, { "epoch": 2.385602215043839, "grad_norm": 0.25988033413887024, "learning_rate": 1.220196863008082e-06, "loss": 0.3514, "step": 4308 }, { "epoch": 2.3861559760036917, "grad_norm": 0.28300073742866516, "learning_rate": 1.2180875109262037e-06, "loss": 0.4449, "step": 4309 }, { "epoch": 2.386709736963544, "grad_norm": 0.2550990879535675, "learning_rate": 1.2159797307206855e-06, "loss": 0.3573, "step": 4310 }, { "epoch": 2.3872634979233966, "grad_norm": 0.26842981576919556, "learning_rate": 1.2138735232675874e-06, "loss": 0.4007, "step": 4311 }, { "epoch": 2.3878172588832487, "grad_norm": 0.26750996708869934, "learning_rate": 1.2117688894423124e-06, "loss": 0.4002, "step": 4312 }, { "epoch": 2.388371019843101, "grad_norm": 0.27889305353164673, "learning_rate": 1.2096658301196057e-06, "loss": 0.4133, "step": 4313 }, { "epoch": 2.3889247808029532, "grad_norm": 0.2647861838340759, "learning_rate": 1.2075643461735625e-06, "loss": 0.361, "step": 4314 }, { "epoch": 2.3894785417628057, "grad_norm": 0.2604064643383026, "learning_rate": 1.2054644384776259e-06, "loss": 0.3984, "step": 4315 }, { "epoch": 2.390032302722658, "grad_norm": 0.2545636296272278, "learning_rate": 1.20336610790458e-06, "loss": 0.4052, "step": 4316 }, { "epoch": 2.3905860636825103, "grad_norm": 0.27108034491539, "learning_rate": 1.2012693553265508e-06, "loss": 0.4152, "step": 4317 }, { "epoch": 2.3911398246423627, "grad_norm": 0.2748431861400604, "learning_rate": 1.1991741816150142e-06, "loss": 0.4073, "step": 4318 }, { "epoch": 2.3916935856022152, "grad_norm": 0.2751467227935791, "learning_rate": 1.1970805876407848e-06, "loss": 0.4184, "step": 4319 }, { "epoch": 2.3922473465620673, "grad_norm": 0.2856447100639343, "learning_rate": 1.1949885742740264e-06, "loss": 0.4252, "step": 4320 }, { "epoch": 2.3928011075219198, "grad_norm": 0.2597223222255707, "learning_rate": 1.1928981423842428e-06, "loss": 0.3475, "step": 4321 }, { "epoch": 2.3933548684817723, "grad_norm": 0.2803245484828949, "learning_rate": 1.1908092928402765e-06, "loss": 0.4377, "step": 4322 }, { "epoch": 2.3939086294416243, "grad_norm": 0.26782578229904175, "learning_rate": 1.1887220265103183e-06, "loss": 0.3856, "step": 4323 }, { "epoch": 2.394462390401477, "grad_norm": 0.2613855004310608, "learning_rate": 1.1866363442618972e-06, "loss": 0.3652, "step": 4324 }, { "epoch": 2.395016151361329, "grad_norm": 0.2838379442691803, "learning_rate": 1.1845522469618898e-06, "loss": 0.4286, "step": 4325 }, { "epoch": 2.3955699123211813, "grad_norm": 0.2861630916595459, "learning_rate": 1.1824697354765046e-06, "loss": 0.4183, "step": 4326 }, { "epoch": 2.396123673281034, "grad_norm": 0.26394516229629517, "learning_rate": 1.180388810671298e-06, "loss": 0.3983, "step": 4327 }, { "epoch": 2.396677434240886, "grad_norm": 0.2730492651462555, "learning_rate": 1.178309473411164e-06, "loss": 0.3489, "step": 4328 }, { "epoch": 2.3972311952007384, "grad_norm": 0.2737986445426941, "learning_rate": 1.1762317245603366e-06, "loss": 0.4238, "step": 4329 }, { "epoch": 2.397784956160591, "grad_norm": 0.28468412160873413, "learning_rate": 1.1741555649823955e-06, "loss": 0.4044, "step": 4330 }, { "epoch": 2.398338717120443, "grad_norm": 0.2662509083747864, "learning_rate": 1.1720809955402496e-06, "loss": 0.3934, "step": 4331 }, { "epoch": 2.3988924780802954, "grad_norm": 0.27842143177986145, "learning_rate": 1.1700080170961547e-06, "loss": 0.4062, "step": 4332 }, { "epoch": 2.399446239040148, "grad_norm": 0.26144781708717346, "learning_rate": 1.1679366305117002e-06, "loss": 0.3609, "step": 4333 }, { "epoch": 2.4, "grad_norm": 0.3036467134952545, "learning_rate": 1.1658668366478221e-06, "loss": 0.3681, "step": 4334 }, { "epoch": 2.4005537609598524, "grad_norm": 0.2887554466724396, "learning_rate": 1.1637986363647835e-06, "loss": 0.464, "step": 4335 }, { "epoch": 2.4011075219197044, "grad_norm": 0.24939768016338348, "learning_rate": 1.161732030522193e-06, "loss": 0.3491, "step": 4336 }, { "epoch": 2.401661282879557, "grad_norm": 0.27684131264686584, "learning_rate": 1.1596670199789933e-06, "loss": 0.4249, "step": 4337 }, { "epoch": 2.4022150438394094, "grad_norm": 0.2849019169807434, "learning_rate": 1.1576036055934647e-06, "loss": 0.4193, "step": 4338 }, { "epoch": 2.4027688047992615, "grad_norm": 0.2793867290019989, "learning_rate": 1.1555417882232251e-06, "loss": 0.3793, "step": 4339 }, { "epoch": 2.403322565759114, "grad_norm": 0.266747385263443, "learning_rate": 1.153481568725226e-06, "loss": 0.424, "step": 4340 }, { "epoch": 2.4038763267189664, "grad_norm": 0.27895668148994446, "learning_rate": 1.1514229479557581e-06, "loss": 0.3648, "step": 4341 }, { "epoch": 2.4044300876788185, "grad_norm": 0.28793859481811523, "learning_rate": 1.1493659267704455e-06, "loss": 0.4185, "step": 4342 }, { "epoch": 2.404983848638671, "grad_norm": 0.26966556906700134, "learning_rate": 1.1473105060242478e-06, "loss": 0.3839, "step": 4343 }, { "epoch": 2.4055376095985235, "grad_norm": 0.27788054943084717, "learning_rate": 1.1452566865714593e-06, "loss": 0.4148, "step": 4344 }, { "epoch": 2.4060913705583755, "grad_norm": 0.29069337248802185, "learning_rate": 1.1432044692657095e-06, "loss": 0.4255, "step": 4345 }, { "epoch": 2.406645131518228, "grad_norm": 0.2638160288333893, "learning_rate": 1.141153854959961e-06, "loss": 0.4104, "step": 4346 }, { "epoch": 2.40719889247808, "grad_norm": 0.26099538803100586, "learning_rate": 1.1391048445065118e-06, "loss": 0.3793, "step": 4347 }, { "epoch": 2.4077526534379325, "grad_norm": 0.28196951746940613, "learning_rate": 1.1370574387569904e-06, "loss": 0.4065, "step": 4348 }, { "epoch": 2.408306414397785, "grad_norm": 0.28393086791038513, "learning_rate": 1.1350116385623617e-06, "loss": 0.4007, "step": 4349 }, { "epoch": 2.4088601753576375, "grad_norm": 0.2974472641944885, "learning_rate": 1.1329674447729206e-06, "loss": 0.4399, "step": 4350 }, { "epoch": 2.4094139363174896, "grad_norm": 0.27174991369247437, "learning_rate": 1.1309248582382964e-06, "loss": 0.3806, "step": 4351 }, { "epoch": 2.409967697277342, "grad_norm": 0.2569180428981781, "learning_rate": 1.128883879807448e-06, "loss": 0.3848, "step": 4352 }, { "epoch": 2.410521458237194, "grad_norm": 0.27496206760406494, "learning_rate": 1.126844510328669e-06, "loss": 0.4265, "step": 4353 }, { "epoch": 2.4110752191970466, "grad_norm": 0.26897794008255005, "learning_rate": 1.1248067506495808e-06, "loss": 0.399, "step": 4354 }, { "epoch": 2.411628980156899, "grad_norm": 0.28752341866493225, "learning_rate": 1.1227706016171392e-06, "loss": 0.4108, "step": 4355 }, { "epoch": 2.412182741116751, "grad_norm": 0.2680257558822632, "learning_rate": 1.1207360640776277e-06, "loss": 0.3666, "step": 4356 }, { "epoch": 2.4127365020766036, "grad_norm": 0.2642374634742737, "learning_rate": 1.1187031388766612e-06, "loss": 0.3995, "step": 4357 }, { "epoch": 2.413290263036456, "grad_norm": 0.281202107667923, "learning_rate": 1.1166718268591852e-06, "loss": 0.4554, "step": 4358 }, { "epoch": 2.413844023996308, "grad_norm": 0.24558034539222717, "learning_rate": 1.114642128869473e-06, "loss": 0.3732, "step": 4359 }, { "epoch": 2.4143977849561606, "grad_norm": 0.2566201686859131, "learning_rate": 1.112614045751129e-06, "loss": 0.4179, "step": 4360 }, { "epoch": 2.414951545916013, "grad_norm": 0.2832145690917969, "learning_rate": 1.1105875783470866e-06, "loss": 0.4216, "step": 4361 }, { "epoch": 2.415505306875865, "grad_norm": 0.2688491642475128, "learning_rate": 1.1085627274996013e-06, "loss": 0.3854, "step": 4362 }, { "epoch": 2.4160590678357177, "grad_norm": 0.27154284715652466, "learning_rate": 1.1065394940502678e-06, "loss": 0.3717, "step": 4363 }, { "epoch": 2.4166128287955697, "grad_norm": 0.30437585711479187, "learning_rate": 1.1045178788399996e-06, "loss": 0.4113, "step": 4364 }, { "epoch": 2.417166589755422, "grad_norm": 0.26176750659942627, "learning_rate": 1.102497882709041e-06, "loss": 0.3895, "step": 4365 }, { "epoch": 2.4177203507152747, "grad_norm": 0.25176915526390076, "learning_rate": 1.1004795064969653e-06, "loss": 0.3685, "step": 4366 }, { "epoch": 2.4182741116751267, "grad_norm": 0.28461289405822754, "learning_rate": 1.098462751042666e-06, "loss": 0.4083, "step": 4367 }, { "epoch": 2.4188278726349792, "grad_norm": 0.2670905888080597, "learning_rate": 1.09644761718437e-06, "loss": 0.3931, "step": 4368 }, { "epoch": 2.4193816335948317, "grad_norm": 0.28363680839538574, "learning_rate": 1.094434105759628e-06, "loss": 0.4387, "step": 4369 }, { "epoch": 2.4199353945546838, "grad_norm": 0.24310392141342163, "learning_rate": 1.0924222176053157e-06, "loss": 0.3607, "step": 4370 }, { "epoch": 2.4204891555145363, "grad_norm": 0.2620546221733093, "learning_rate": 1.0904119535576312e-06, "loss": 0.4284, "step": 4371 }, { "epoch": 2.4210429164743887, "grad_norm": 0.2520657181739807, "learning_rate": 1.0884033144521023e-06, "loss": 0.3983, "step": 4372 }, { "epoch": 2.421596677434241, "grad_norm": 0.2735734283924103, "learning_rate": 1.0863963011235812e-06, "loss": 0.4109, "step": 4373 }, { "epoch": 2.4221504383940933, "grad_norm": 0.25938528776168823, "learning_rate": 1.0843909144062415e-06, "loss": 0.3724, "step": 4374 }, { "epoch": 2.4227041993539453, "grad_norm": 0.2710282802581787, "learning_rate": 1.082387155133584e-06, "loss": 0.3708, "step": 4375 }, { "epoch": 2.423257960313798, "grad_norm": 0.28002673387527466, "learning_rate": 1.0803850241384267e-06, "loss": 0.4113, "step": 4376 }, { "epoch": 2.4238117212736503, "grad_norm": 0.2800930142402649, "learning_rate": 1.0783845222529193e-06, "loss": 0.4318, "step": 4377 }, { "epoch": 2.4243654822335023, "grad_norm": 0.2818918228149414, "learning_rate": 1.07638565030853e-06, "loss": 0.3847, "step": 4378 }, { "epoch": 2.424919243193355, "grad_norm": 0.26583951711654663, "learning_rate": 1.0743884091360507e-06, "loss": 0.3928, "step": 4379 }, { "epoch": 2.4254730041532073, "grad_norm": 0.2966756224632263, "learning_rate": 1.0723927995655913e-06, "loss": 0.4345, "step": 4380 }, { "epoch": 2.4260267651130594, "grad_norm": 0.2848028242588043, "learning_rate": 1.0703988224265888e-06, "loss": 0.4183, "step": 4381 }, { "epoch": 2.426580526072912, "grad_norm": 0.26810184121131897, "learning_rate": 1.068406478547801e-06, "loss": 0.3954, "step": 4382 }, { "epoch": 2.4271342870327643, "grad_norm": 0.25437963008880615, "learning_rate": 1.0664157687573068e-06, "loss": 0.4048, "step": 4383 }, { "epoch": 2.4276880479926164, "grad_norm": 0.2606421113014221, "learning_rate": 1.0644266938825021e-06, "loss": 0.4162, "step": 4384 }, { "epoch": 2.428241808952469, "grad_norm": 0.25913408398628235, "learning_rate": 1.0624392547501073e-06, "loss": 0.3851, "step": 4385 }, { "epoch": 2.4287955699123214, "grad_norm": 0.25716283917427063, "learning_rate": 1.0604534521861604e-06, "loss": 0.3843, "step": 4386 }, { "epoch": 2.4293493308721734, "grad_norm": 0.27220121026039124, "learning_rate": 1.0584692870160235e-06, "loss": 0.4097, "step": 4387 }, { "epoch": 2.429903091832026, "grad_norm": 0.26714998483657837, "learning_rate": 1.0564867600643747e-06, "loss": 0.4209, "step": 4388 }, { "epoch": 2.4304568527918784, "grad_norm": 0.27289071679115295, "learning_rate": 1.0545058721552092e-06, "loss": 0.3983, "step": 4389 }, { "epoch": 2.4310106137517304, "grad_norm": 0.2983061671257019, "learning_rate": 1.0525266241118453e-06, "loss": 0.4583, "step": 4390 }, { "epoch": 2.431564374711583, "grad_norm": 0.2619453966617584, "learning_rate": 1.0505490167569166e-06, "loss": 0.4135, "step": 4391 }, { "epoch": 2.432118135671435, "grad_norm": 0.25369033217430115, "learning_rate": 1.0485730509123793e-06, "loss": 0.3642, "step": 4392 }, { "epoch": 2.4326718966312875, "grad_norm": 0.28247183561325073, "learning_rate": 1.046598727399501e-06, "loss": 0.4108, "step": 4393 }, { "epoch": 2.43322565759114, "grad_norm": 0.29498621821403503, "learning_rate": 1.0446260470388702e-06, "loss": 0.4273, "step": 4394 }, { "epoch": 2.433779418550992, "grad_norm": 0.264722615480423, "learning_rate": 1.0426550106503924e-06, "loss": 0.3639, "step": 4395 }, { "epoch": 2.4343331795108445, "grad_norm": 0.2904975414276123, "learning_rate": 1.0406856190532905e-06, "loss": 0.4313, "step": 4396 }, { "epoch": 2.434886940470697, "grad_norm": 0.2651444375514984, "learning_rate": 1.0387178730661012e-06, "loss": 0.3968, "step": 4397 }, { "epoch": 2.435440701430549, "grad_norm": 0.27891629934310913, "learning_rate": 1.0367517735066795e-06, "loss": 0.3673, "step": 4398 }, { "epoch": 2.4359944623904015, "grad_norm": 0.26334142684936523, "learning_rate": 1.034787321192196e-06, "loss": 0.4226, "step": 4399 }, { "epoch": 2.436548223350254, "grad_norm": 0.26523295044898987, "learning_rate": 1.0328245169391337e-06, "loss": 0.4, "step": 4400 }, { "epoch": 2.437101984310106, "grad_norm": 0.2865385115146637, "learning_rate": 1.030863361563298e-06, "loss": 0.4176, "step": 4401 }, { "epoch": 2.4376557452699585, "grad_norm": 0.26273414492607117, "learning_rate": 1.0289038558797993e-06, "loss": 0.3834, "step": 4402 }, { "epoch": 2.4382095062298106, "grad_norm": 0.2985856831073761, "learning_rate": 1.026946000703068e-06, "loss": 0.4471, "step": 4403 }, { "epoch": 2.438763267189663, "grad_norm": 0.2575697898864746, "learning_rate": 1.024989796846848e-06, "loss": 0.3658, "step": 4404 }, { "epoch": 2.4393170281495156, "grad_norm": 0.3000069260597229, "learning_rate": 1.0230352451241965e-06, "loss": 0.4041, "step": 4405 }, { "epoch": 2.4398707891093676, "grad_norm": 0.27640220522880554, "learning_rate": 1.0210823463474834e-06, "loss": 0.4321, "step": 4406 }, { "epoch": 2.44042455006922, "grad_norm": 0.27708643674850464, "learning_rate": 1.0191311013283923e-06, "loss": 0.3963, "step": 4407 }, { "epoch": 2.4409783110290726, "grad_norm": 0.25382453203201294, "learning_rate": 1.0171815108779188e-06, "loss": 0.3802, "step": 4408 }, { "epoch": 2.4415320719889246, "grad_norm": 0.2661491930484772, "learning_rate": 1.015233575806372e-06, "loss": 0.4301, "step": 4409 }, { "epoch": 2.442085832948777, "grad_norm": 0.2674829363822937, "learning_rate": 1.0132872969233716e-06, "loss": 0.3932, "step": 4410 }, { "epoch": 2.4426395939086296, "grad_norm": 0.26790663599967957, "learning_rate": 1.0113426750378497e-06, "loss": 0.4019, "step": 4411 }, { "epoch": 2.4431933548684817, "grad_norm": 0.26379865407943726, "learning_rate": 1.00939971095805e-06, "loss": 0.3946, "step": 4412 }, { "epoch": 2.443747115828334, "grad_norm": 0.25451868772506714, "learning_rate": 1.007458405491526e-06, "loss": 0.3809, "step": 4413 }, { "epoch": 2.444300876788186, "grad_norm": 0.24942266941070557, "learning_rate": 1.0055187594451426e-06, "loss": 0.3947, "step": 4414 }, { "epoch": 2.4448546377480387, "grad_norm": 0.24906621873378754, "learning_rate": 1.0035807736250746e-06, "loss": 0.4002, "step": 4415 }, { "epoch": 2.445408398707891, "grad_norm": 0.24378572404384613, "learning_rate": 1.0016444488368082e-06, "loss": 0.3714, "step": 4416 }, { "epoch": 2.4459621596677437, "grad_norm": 0.2804885506629944, "learning_rate": 9.99709785885138e-07, "loss": 0.4037, "step": 4417 }, { "epoch": 2.4465159206275957, "grad_norm": 0.2650180160999298, "learning_rate": 9.977767855741665e-07, "loss": 0.3674, "step": 4418 }, { "epoch": 2.447069681587448, "grad_norm": 0.28002896904945374, "learning_rate": 9.958454487073083e-07, "loss": 0.418, "step": 4419 }, { "epoch": 2.4476234425473002, "grad_norm": 0.2663615047931671, "learning_rate": 9.939157760872835e-07, "loss": 0.3959, "step": 4420 }, { "epoch": 2.4481772035071527, "grad_norm": 0.26619887351989746, "learning_rate": 9.91987768516124e-07, "loss": 0.3927, "step": 4421 }, { "epoch": 2.4487309644670052, "grad_norm": 0.281735897064209, "learning_rate": 9.900614267951652e-07, "loss": 0.4409, "step": 4422 }, { "epoch": 2.4492847254268573, "grad_norm": 0.2704601585865021, "learning_rate": 9.881367517250546e-07, "loss": 0.3956, "step": 4423 }, { "epoch": 2.4498384863867098, "grad_norm": 0.26885271072387695, "learning_rate": 9.86213744105744e-07, "loss": 0.4126, "step": 4424 }, { "epoch": 2.4503922473465622, "grad_norm": 0.26491379737854004, "learning_rate": 9.842924047364926e-07, "loss": 0.3639, "step": 4425 }, { "epoch": 2.4509460083064143, "grad_norm": 0.2654478847980499, "learning_rate": 9.823727344158679e-07, "loss": 0.4093, "step": 4426 }, { "epoch": 2.451499769266267, "grad_norm": 0.2647843360900879, "learning_rate": 9.80454733941742e-07, "loss": 0.3898, "step": 4427 }, { "epoch": 2.4520535302261193, "grad_norm": 0.26904183626174927, "learning_rate": 9.785384041112944e-07, "loss": 0.4086, "step": 4428 }, { "epoch": 2.4526072911859713, "grad_norm": 0.26513415575027466, "learning_rate": 9.76623745721006e-07, "loss": 0.3885, "step": 4429 }, { "epoch": 2.453161052145824, "grad_norm": 0.27709612250328064, "learning_rate": 9.747107595666694e-07, "loss": 0.4064, "step": 4430 }, { "epoch": 2.453714813105676, "grad_norm": 0.2569100558757782, "learning_rate": 9.727994464433787e-07, "loss": 0.3916, "step": 4431 }, { "epoch": 2.4542685740655283, "grad_norm": 0.27623486518859863, "learning_rate": 9.708898071455324e-07, "loss": 0.4093, "step": 4432 }, { "epoch": 2.454822335025381, "grad_norm": 0.2598889172077179, "learning_rate": 9.689818424668356e-07, "loss": 0.3715, "step": 4433 }, { "epoch": 2.455376095985233, "grad_norm": 0.2744204103946686, "learning_rate": 9.670755532002913e-07, "loss": 0.4038, "step": 4434 }, { "epoch": 2.4559298569450854, "grad_norm": 0.2870950698852539, "learning_rate": 9.651709401382147e-07, "loss": 0.4033, "step": 4435 }, { "epoch": 2.456483617904938, "grad_norm": 0.24959112703800201, "learning_rate": 9.632680040722192e-07, "loss": 0.4021, "step": 4436 }, { "epoch": 2.45703737886479, "grad_norm": 0.2875157296657562, "learning_rate": 9.613667457932235e-07, "loss": 0.4272, "step": 4437 }, { "epoch": 2.4575911398246424, "grad_norm": 0.2669791877269745, "learning_rate": 9.59467166091444e-07, "loss": 0.4249, "step": 4438 }, { "epoch": 2.458144900784495, "grad_norm": 0.25309357047080994, "learning_rate": 9.57569265756404e-07, "loss": 0.3535, "step": 4439 }, { "epoch": 2.458698661744347, "grad_norm": 0.28949275612831116, "learning_rate": 9.556730455769304e-07, "loss": 0.4245, "step": 4440 }, { "epoch": 2.4592524227041994, "grad_norm": 0.28393006324768066, "learning_rate": 9.537785063411487e-07, "loss": 0.4214, "step": 4441 }, { "epoch": 2.4598061836640515, "grad_norm": 0.2620568871498108, "learning_rate": 9.518856488364842e-07, "loss": 0.374, "step": 4442 }, { "epoch": 2.460359944623904, "grad_norm": 0.28689953684806824, "learning_rate": 9.499944738496653e-07, "loss": 0.4203, "step": 4443 }, { "epoch": 2.4609137055837564, "grad_norm": 0.2686920464038849, "learning_rate": 9.481049821667232e-07, "loss": 0.3679, "step": 4444 }, { "epoch": 2.4614674665436085, "grad_norm": 0.2873205542564392, "learning_rate": 9.462171745729864e-07, "loss": 0.3768, "step": 4445 }, { "epoch": 2.462021227503461, "grad_norm": 0.27623823285102844, "learning_rate": 9.443310518530857e-07, "loss": 0.4338, "step": 4446 }, { "epoch": 2.4625749884633135, "grad_norm": 0.2842974364757538, "learning_rate": 9.424466147909473e-07, "loss": 0.4437, "step": 4447 }, { "epoch": 2.4631287494231655, "grad_norm": 0.2761882245540619, "learning_rate": 9.405638641698006e-07, "loss": 0.4027, "step": 4448 }, { "epoch": 2.463682510383018, "grad_norm": 0.2989676892757416, "learning_rate": 9.386828007721754e-07, "loss": 0.41, "step": 4449 }, { "epoch": 2.4642362713428705, "grad_norm": 0.27078521251678467, "learning_rate": 9.368034253798985e-07, "loss": 0.3871, "step": 4450 }, { "epoch": 2.4647900323027225, "grad_norm": 0.2800975739955902, "learning_rate": 9.349257387740918e-07, "loss": 0.4045, "step": 4451 }, { "epoch": 2.465343793262575, "grad_norm": 0.2814874053001404, "learning_rate": 9.330497417351792e-07, "loss": 0.4206, "step": 4452 }, { "epoch": 2.4658975542224275, "grad_norm": 0.2636072039604187, "learning_rate": 9.311754350428803e-07, "loss": 0.3904, "step": 4453 }, { "epoch": 2.4664513151822796, "grad_norm": 0.303575724363327, "learning_rate": 9.293028194762166e-07, "loss": 0.4078, "step": 4454 }, { "epoch": 2.467005076142132, "grad_norm": 0.2639390230178833, "learning_rate": 9.27431895813502e-07, "loss": 0.3958, "step": 4455 }, { "epoch": 2.4675588371019845, "grad_norm": 0.271467387676239, "learning_rate": 9.255626648323474e-07, "loss": 0.3947, "step": 4456 }, { "epoch": 2.4681125980618366, "grad_norm": 0.26796942949295044, "learning_rate": 9.236951273096611e-07, "loss": 0.4094, "step": 4457 }, { "epoch": 2.468666359021689, "grad_norm": 0.26645031571388245, "learning_rate": 9.218292840216476e-07, "loss": 0.4133, "step": 4458 }, { "epoch": 2.469220119981541, "grad_norm": 0.26613306999206543, "learning_rate": 9.199651357438111e-07, "loss": 0.3818, "step": 4459 }, { "epoch": 2.4697738809413936, "grad_norm": 0.29730895161628723, "learning_rate": 9.181026832509432e-07, "loss": 0.4244, "step": 4460 }, { "epoch": 2.470327641901246, "grad_norm": 0.26325714588165283, "learning_rate": 9.162419273171364e-07, "loss": 0.3884, "step": 4461 }, { "epoch": 2.470881402861098, "grad_norm": 0.2867657542228699, "learning_rate": 9.143828687157758e-07, "loss": 0.4047, "step": 4462 }, { "epoch": 2.4714351638209506, "grad_norm": 0.29645806550979614, "learning_rate": 9.125255082195461e-07, "loss": 0.4154, "step": 4463 }, { "epoch": 2.471988924780803, "grad_norm": 0.28083929419517517, "learning_rate": 9.106698466004177e-07, "loss": 0.411, "step": 4464 }, { "epoch": 2.472542685740655, "grad_norm": 0.2599436640739441, "learning_rate": 9.088158846296602e-07, "loss": 0.3771, "step": 4465 }, { "epoch": 2.4730964467005077, "grad_norm": 0.27060168981552124, "learning_rate": 9.069636230778367e-07, "loss": 0.3932, "step": 4466 }, { "epoch": 2.47365020766036, "grad_norm": 0.2581622898578644, "learning_rate": 9.051130627148013e-07, "loss": 0.3859, "step": 4467 }, { "epoch": 2.474203968620212, "grad_norm": 0.28090280294418335, "learning_rate": 9.032642043097056e-07, "loss": 0.4179, "step": 4468 }, { "epoch": 2.4747577295800647, "grad_norm": 0.2543794512748718, "learning_rate": 9.014170486309875e-07, "loss": 0.3909, "step": 4469 }, { "epoch": 2.4753114905399167, "grad_norm": 0.28129255771636963, "learning_rate": 8.995715964463814e-07, "loss": 0.4483, "step": 4470 }, { "epoch": 2.475865251499769, "grad_norm": 0.25866416096687317, "learning_rate": 8.977278485229124e-07, "loss": 0.4081, "step": 4471 }, { "epoch": 2.4764190124596217, "grad_norm": 0.26224297285079956, "learning_rate": 8.958858056268982e-07, "loss": 0.3906, "step": 4472 }, { "epoch": 2.4769727734194738, "grad_norm": 0.2856189012527466, "learning_rate": 8.940454685239463e-07, "loss": 0.4242, "step": 4473 }, { "epoch": 2.4775265343793262, "grad_norm": 0.2425912320613861, "learning_rate": 8.922068379789561e-07, "loss": 0.3626, "step": 4474 }, { "epoch": 2.4780802953391787, "grad_norm": 0.2723233103752136, "learning_rate": 8.903699147561185e-07, "loss": 0.4311, "step": 4475 }, { "epoch": 2.4786340562990308, "grad_norm": 0.269638329744339, "learning_rate": 8.885346996189131e-07, "loss": 0.4137, "step": 4476 }, { "epoch": 2.4791878172588833, "grad_norm": 0.2754197120666504, "learning_rate": 8.867011933301107e-07, "loss": 0.4226, "step": 4477 }, { "epoch": 2.4797415782187358, "grad_norm": 0.2687733769416809, "learning_rate": 8.848693966517712e-07, "loss": 0.3993, "step": 4478 }, { "epoch": 2.480295339178588, "grad_norm": 0.2498006522655487, "learning_rate": 8.830393103452445e-07, "loss": 0.3741, "step": 4479 }, { "epoch": 2.4808491001384403, "grad_norm": 0.2595152258872986, "learning_rate": 8.812109351711701e-07, "loss": 0.4108, "step": 4480 }, { "epoch": 2.4814028610982923, "grad_norm": 0.26860561966896057, "learning_rate": 8.793842718894746e-07, "loss": 0.4214, "step": 4481 }, { "epoch": 2.481956622058145, "grad_norm": 0.2574399709701538, "learning_rate": 8.775593212593747e-07, "loss": 0.3793, "step": 4482 }, { "epoch": 2.4825103830179973, "grad_norm": 0.27738428115844727, "learning_rate": 8.757360840393747e-07, "loss": 0.4232, "step": 4483 }, { "epoch": 2.48306414397785, "grad_norm": 0.25269538164138794, "learning_rate": 8.739145609872662e-07, "loss": 0.3846, "step": 4484 }, { "epoch": 2.483617904937702, "grad_norm": 0.2719399034976959, "learning_rate": 8.720947528601292e-07, "loss": 0.4108, "step": 4485 }, { "epoch": 2.4841716658975543, "grad_norm": 0.261881947517395, "learning_rate": 8.702766604143304e-07, "loss": 0.4097, "step": 4486 }, { "epoch": 2.4847254268574064, "grad_norm": 0.2849792540073395, "learning_rate": 8.68460284405524e-07, "loss": 0.414, "step": 4487 }, { "epoch": 2.485279187817259, "grad_norm": 0.26150670647621155, "learning_rate": 8.666456255886502e-07, "loss": 0.3896, "step": 4488 }, { "epoch": 2.4858329487771114, "grad_norm": 0.2557274103164673, "learning_rate": 8.64832684717935e-07, "loss": 0.4011, "step": 4489 }, { "epoch": 2.4863867097369634, "grad_norm": 0.28404685854911804, "learning_rate": 8.630214625468925e-07, "loss": 0.4268, "step": 4490 }, { "epoch": 2.486940470696816, "grad_norm": 0.2818399667739868, "learning_rate": 8.612119598283197e-07, "loss": 0.4259, "step": 4491 }, { "epoch": 2.4874942316566684, "grad_norm": 0.2584396302700043, "learning_rate": 8.594041773143008e-07, "loss": 0.3815, "step": 4492 }, { "epoch": 2.4880479926165204, "grad_norm": 0.25961732864379883, "learning_rate": 8.575981157562046e-07, "loss": 0.4138, "step": 4493 }, { "epoch": 2.488601753576373, "grad_norm": 0.27324992418289185, "learning_rate": 8.557937759046841e-07, "loss": 0.4088, "step": 4494 }, { "epoch": 2.4891555145362254, "grad_norm": 0.27866506576538086, "learning_rate": 8.539911585096789e-07, "loss": 0.4071, "step": 4495 }, { "epoch": 2.4897092754960775, "grad_norm": 0.2789616882801056, "learning_rate": 8.521902643204061e-07, "loss": 0.4009, "step": 4496 }, { "epoch": 2.49026303645593, "grad_norm": 0.2620631456375122, "learning_rate": 8.503910940853766e-07, "loss": 0.3871, "step": 4497 }, { "epoch": 2.490816797415782, "grad_norm": 0.2639429271221161, "learning_rate": 8.485936485523771e-07, "loss": 0.3754, "step": 4498 }, { "epoch": 2.4913705583756345, "grad_norm": 0.2680778205394745, "learning_rate": 8.467979284684808e-07, "loss": 0.3929, "step": 4499 }, { "epoch": 2.491924319335487, "grad_norm": 0.2710658013820648, "learning_rate": 8.450039345800431e-07, "loss": 0.4205, "step": 4500 }, { "epoch": 2.492478080295339, "grad_norm": 0.2584711015224457, "learning_rate": 8.43211667632699e-07, "loss": 0.3961, "step": 4501 }, { "epoch": 2.4930318412551915, "grad_norm": 0.266398549079895, "learning_rate": 8.41421128371372e-07, "loss": 0.4253, "step": 4502 }, { "epoch": 2.493585602215044, "grad_norm": 0.27515941858291626, "learning_rate": 8.396323175402621e-07, "loss": 0.3563, "step": 4503 }, { "epoch": 2.494139363174896, "grad_norm": 0.302212119102478, "learning_rate": 8.378452358828548e-07, "loss": 0.4313, "step": 4504 }, { "epoch": 2.4946931241347485, "grad_norm": 0.2626042068004608, "learning_rate": 8.360598841419104e-07, "loss": 0.4101, "step": 4505 }, { "epoch": 2.495246885094601, "grad_norm": 0.2516461908817291, "learning_rate": 8.342762630594792e-07, "loss": 0.3824, "step": 4506 }, { "epoch": 2.495800646054453, "grad_norm": 0.2754150331020355, "learning_rate": 8.324943733768859e-07, "loss": 0.394, "step": 4507 }, { "epoch": 2.4963544070143056, "grad_norm": 0.2744555175304413, "learning_rate": 8.307142158347381e-07, "loss": 0.4268, "step": 4508 }, { "epoch": 2.4969081679741576, "grad_norm": 0.2552061975002289, "learning_rate": 8.289357911729212e-07, "loss": 0.3637, "step": 4509 }, { "epoch": 2.49746192893401, "grad_norm": 0.27311182022094727, "learning_rate": 8.271591001306018e-07, "loss": 0.3839, "step": 4510 }, { "epoch": 2.4980156898938626, "grad_norm": 0.2570371627807617, "learning_rate": 8.253841434462279e-07, "loss": 0.3899, "step": 4511 }, { "epoch": 2.4985694508537146, "grad_norm": 0.2576935589313507, "learning_rate": 8.236109218575239e-07, "loss": 0.3753, "step": 4512 }, { "epoch": 2.499123211813567, "grad_norm": 0.2704581320285797, "learning_rate": 8.218394361014953e-07, "loss": 0.4241, "step": 4513 }, { "epoch": 2.4996769727734196, "grad_norm": 0.28029900789260864, "learning_rate": 8.200696869144214e-07, "loss": 0.4, "step": 4514 }, { "epoch": 2.5002307337332716, "grad_norm": 0.29517000913619995, "learning_rate": 8.183016750318645e-07, "loss": 0.4401, "step": 4515 }, { "epoch": 2.500784494693124, "grad_norm": 0.24071955680847168, "learning_rate": 8.165354011886651e-07, "loss": 0.3616, "step": 4516 }, { "epoch": 2.5013382556529766, "grad_norm": 0.25136280059814453, "learning_rate": 8.147708661189396e-07, "loss": 0.3827, "step": 4517 }, { "epoch": 2.5018920166128287, "grad_norm": 0.2619665563106537, "learning_rate": 8.130080705560794e-07, "loss": 0.3631, "step": 4518 }, { "epoch": 2.502445777572681, "grad_norm": 0.2757844030857086, "learning_rate": 8.112470152327562e-07, "loss": 0.4296, "step": 4519 }, { "epoch": 2.502999538532533, "grad_norm": 0.24477659165859222, "learning_rate": 8.094877008809171e-07, "loss": 0.3745, "step": 4520 }, { "epoch": 2.5035532994923857, "grad_norm": 0.27765464782714844, "learning_rate": 8.077301282317884e-07, "loss": 0.4161, "step": 4521 }, { "epoch": 2.504107060452238, "grad_norm": 0.26674386858940125, "learning_rate": 8.059742980158675e-07, "loss": 0.4027, "step": 4522 }, { "epoch": 2.5046608214120907, "grad_norm": 0.26226741075515747, "learning_rate": 8.042202109629305e-07, "loss": 0.4136, "step": 4523 }, { "epoch": 2.5052145823719427, "grad_norm": 0.2574157416820526, "learning_rate": 8.024678678020292e-07, "loss": 0.4129, "step": 4524 }, { "epoch": 2.505768343331795, "grad_norm": 0.24432441592216492, "learning_rate": 8.007172692614884e-07, "loss": 0.3729, "step": 4525 }, { "epoch": 2.5063221042916473, "grad_norm": 0.2766527235507965, "learning_rate": 7.989684160689137e-07, "loss": 0.4144, "step": 4526 }, { "epoch": 2.5068758652514997, "grad_norm": 0.2724073529243469, "learning_rate": 7.972213089511766e-07, "loss": 0.4057, "step": 4527 }, { "epoch": 2.5074296262113522, "grad_norm": 0.26971861720085144, "learning_rate": 7.954759486344288e-07, "loss": 0.409, "step": 4528 }, { "epoch": 2.5079833871712043, "grad_norm": 0.25494176149368286, "learning_rate": 7.937323358440935e-07, "loss": 0.3745, "step": 4529 }, { "epoch": 2.5085371481310568, "grad_norm": 0.2977398633956909, "learning_rate": 7.919904713048721e-07, "loss": 0.408, "step": 4530 }, { "epoch": 2.509090909090909, "grad_norm": 0.26780858635902405, "learning_rate": 7.902503557407315e-07, "loss": 0.4023, "step": 4531 }, { "epoch": 2.5096446700507613, "grad_norm": 0.28510865569114685, "learning_rate": 7.885119898749177e-07, "loss": 0.3976, "step": 4532 }, { "epoch": 2.510198431010614, "grad_norm": 0.2624487280845642, "learning_rate": 7.867753744299472e-07, "loss": 0.3373, "step": 4533 }, { "epoch": 2.5107521919704663, "grad_norm": 0.27503809332847595, "learning_rate": 7.850405101276093e-07, "loss": 0.4193, "step": 4534 }, { "epoch": 2.5113059529303183, "grad_norm": 0.2734338045120239, "learning_rate": 7.833073976889688e-07, "loss": 0.4001, "step": 4535 }, { "epoch": 2.511859713890171, "grad_norm": 0.28795769810676575, "learning_rate": 7.815760378343557e-07, "loss": 0.4422, "step": 4536 }, { "epoch": 2.512413474850023, "grad_norm": 0.2728448212146759, "learning_rate": 7.798464312833759e-07, "loss": 0.3913, "step": 4537 }, { "epoch": 2.5129672358098754, "grad_norm": 0.25175347924232483, "learning_rate": 7.781185787549061e-07, "loss": 0.3812, "step": 4538 }, { "epoch": 2.513520996769728, "grad_norm": 0.2711610496044159, "learning_rate": 7.763924809670942e-07, "loss": 0.3864, "step": 4539 }, { "epoch": 2.5140747577295803, "grad_norm": 0.2785853147506714, "learning_rate": 7.746681386373573e-07, "loss": 0.3923, "step": 4540 }, { "epoch": 2.5146285186894324, "grad_norm": 0.26172834634780884, "learning_rate": 7.729455524823847e-07, "loss": 0.4057, "step": 4541 }, { "epoch": 2.515182279649285, "grad_norm": 0.25957584381103516, "learning_rate": 7.712247232181341e-07, "loss": 0.4018, "step": 4542 }, { "epoch": 2.515736040609137, "grad_norm": 0.2691158950328827, "learning_rate": 7.695056515598348e-07, "loss": 0.4108, "step": 4543 }, { "epoch": 2.5162898015689894, "grad_norm": 0.2975912392139435, "learning_rate": 7.677883382219836e-07, "loss": 0.4042, "step": 4544 }, { "epoch": 2.516843562528842, "grad_norm": 0.2661561071872711, "learning_rate": 7.66072783918348e-07, "loss": 0.3911, "step": 4545 }, { "epoch": 2.517397323488694, "grad_norm": 0.2804882824420929, "learning_rate": 7.643589893619641e-07, "loss": 0.4276, "step": 4546 }, { "epoch": 2.5179510844485464, "grad_norm": 0.25852763652801514, "learning_rate": 7.626469552651356e-07, "loss": 0.3634, "step": 4547 }, { "epoch": 2.5185048454083985, "grad_norm": 0.28282660245895386, "learning_rate": 7.609366823394354e-07, "loss": 0.4064, "step": 4548 }, { "epoch": 2.519058606368251, "grad_norm": 0.30842217803001404, "learning_rate": 7.59228171295705e-07, "loss": 0.4368, "step": 4549 }, { "epoch": 2.5196123673281035, "grad_norm": 0.2581627666950226, "learning_rate": 7.575214228440525e-07, "loss": 0.3625, "step": 4550 }, { "epoch": 2.520166128287956, "grad_norm": 0.26633593440055847, "learning_rate": 7.558164376938538e-07, "loss": 0.3906, "step": 4551 }, { "epoch": 2.520719889247808, "grad_norm": 0.27780881524086, "learning_rate": 7.541132165537518e-07, "loss": 0.4073, "step": 4552 }, { "epoch": 2.5212736502076605, "grad_norm": 0.25776195526123047, "learning_rate": 7.524117601316561e-07, "loss": 0.4166, "step": 4553 }, { "epoch": 2.5218274111675125, "grad_norm": 0.2646631598472595, "learning_rate": 7.507120691347436e-07, "loss": 0.3972, "step": 4554 }, { "epoch": 2.522381172127365, "grad_norm": 0.2567483186721802, "learning_rate": 7.490141442694565e-07, "loss": 0.3963, "step": 4555 }, { "epoch": 2.5229349330872175, "grad_norm": 0.2665424048900604, "learning_rate": 7.473179862415036e-07, "loss": 0.4395, "step": 4556 }, { "epoch": 2.5234886940470695, "grad_norm": 0.2812087833881378, "learning_rate": 7.456235957558594e-07, "loss": 0.3911, "step": 4557 }, { "epoch": 2.524042455006922, "grad_norm": 0.2887595593929291, "learning_rate": 7.439309735167627e-07, "loss": 0.4011, "step": 4558 }, { "epoch": 2.524596215966774, "grad_norm": 0.258523166179657, "learning_rate": 7.422401202277179e-07, "loss": 0.3949, "step": 4559 }, { "epoch": 2.5251499769266266, "grad_norm": 0.262901246547699, "learning_rate": 7.405510365914958e-07, "loss": 0.3902, "step": 4560 }, { "epoch": 2.525703737886479, "grad_norm": 0.25048109889030457, "learning_rate": 7.388637233101292e-07, "loss": 0.3819, "step": 4561 }, { "epoch": 2.5262574988463316, "grad_norm": 0.27576759457588196, "learning_rate": 7.371781810849171e-07, "loss": 0.4219, "step": 4562 }, { "epoch": 2.5268112598061836, "grad_norm": 0.2740483283996582, "learning_rate": 7.354944106164185e-07, "loss": 0.3905, "step": 4563 }, { "epoch": 2.527365020766036, "grad_norm": 0.278437077999115, "learning_rate": 7.338124126044627e-07, "loss": 0.4212, "step": 4564 }, { "epoch": 2.527918781725888, "grad_norm": 0.2629513740539551, "learning_rate": 7.321321877481369e-07, "loss": 0.4078, "step": 4565 }, { "epoch": 2.5284725426857406, "grad_norm": 0.2611263394355774, "learning_rate": 7.304537367457942e-07, "loss": 0.3781, "step": 4566 }, { "epoch": 2.529026303645593, "grad_norm": 0.2947920262813568, "learning_rate": 7.287770602950467e-07, "loss": 0.4062, "step": 4567 }, { "epoch": 2.529580064605445, "grad_norm": 0.2742443084716797, "learning_rate": 7.271021590927723e-07, "loss": 0.3818, "step": 4568 }, { "epoch": 2.5301338255652976, "grad_norm": 0.2795540392398834, "learning_rate": 7.254290338351116e-07, "loss": 0.4172, "step": 4569 }, { "epoch": 2.53068758652515, "grad_norm": 0.2812526524066925, "learning_rate": 7.237576852174649e-07, "loss": 0.3896, "step": 4570 }, { "epoch": 2.531241347485002, "grad_norm": 0.2568764090538025, "learning_rate": 7.22088113934496e-07, "loss": 0.3952, "step": 4571 }, { "epoch": 2.5317951084448547, "grad_norm": 0.2906055152416229, "learning_rate": 7.204203206801258e-07, "loss": 0.4131, "step": 4572 }, { "epoch": 2.532348869404707, "grad_norm": 0.2609862983226776, "learning_rate": 7.187543061475422e-07, "loss": 0.3746, "step": 4573 }, { "epoch": 2.532902630364559, "grad_norm": 0.27241504192352295, "learning_rate": 7.170900710291895e-07, "loss": 0.4008, "step": 4574 }, { "epoch": 2.5334563913244117, "grad_norm": 0.28357669711112976, "learning_rate": 7.15427616016775e-07, "loss": 0.403, "step": 4575 }, { "epoch": 2.5340101522842637, "grad_norm": 0.2645058333873749, "learning_rate": 7.137669418012633e-07, "loss": 0.3979, "step": 4576 }, { "epoch": 2.5345639132441162, "grad_norm": 0.282266229391098, "learning_rate": 7.121080490728799e-07, "loss": 0.3898, "step": 4577 }, { "epoch": 2.5351176742039687, "grad_norm": 0.26073741912841797, "learning_rate": 7.104509385211128e-07, "loss": 0.3933, "step": 4578 }, { "epoch": 2.535671435163821, "grad_norm": 0.25776997208595276, "learning_rate": 7.08795610834706e-07, "loss": 0.447, "step": 4579 }, { "epoch": 2.5362251961236733, "grad_norm": 0.2571149170398712, "learning_rate": 7.071420667016643e-07, "loss": 0.3899, "step": 4580 }, { "epoch": 2.5367789570835257, "grad_norm": 0.2572614848613739, "learning_rate": 7.054903068092478e-07, "loss": 0.3858, "step": 4581 }, { "epoch": 2.537332718043378, "grad_norm": 0.25401854515075684, "learning_rate": 7.038403318439774e-07, "loss": 0.3879, "step": 4582 }, { "epoch": 2.5378864790032303, "grad_norm": 0.2748258709907532, "learning_rate": 7.021921424916351e-07, "loss": 0.4252, "step": 4583 }, { "epoch": 2.5384402399630828, "grad_norm": 0.26103606820106506, "learning_rate": 7.005457394372573e-07, "loss": 0.4041, "step": 4584 }, { "epoch": 2.538994000922935, "grad_norm": 0.2840994894504547, "learning_rate": 6.989011233651366e-07, "loss": 0.4254, "step": 4585 }, { "epoch": 2.5395477618827873, "grad_norm": 0.2594205439090729, "learning_rate": 6.972582949588258e-07, "loss": 0.3856, "step": 4586 }, { "epoch": 2.5401015228426393, "grad_norm": 0.2571709156036377, "learning_rate": 6.956172549011325e-07, "loss": 0.4025, "step": 4587 }, { "epoch": 2.540655283802492, "grad_norm": 0.2726648449897766, "learning_rate": 6.939780038741261e-07, "loss": 0.4179, "step": 4588 }, { "epoch": 2.5412090447623443, "grad_norm": 0.32112640142440796, "learning_rate": 6.923405425591246e-07, "loss": 0.463, "step": 4589 }, { "epoch": 2.541762805722197, "grad_norm": 0.26135993003845215, "learning_rate": 6.907048716367077e-07, "loss": 0.3504, "step": 4590 }, { "epoch": 2.542316566682049, "grad_norm": 0.29566267132759094, "learning_rate": 6.890709917867073e-07, "loss": 0.4367, "step": 4591 }, { "epoch": 2.5428703276419014, "grad_norm": 0.2646361291408539, "learning_rate": 6.874389036882162e-07, "loss": 0.3651, "step": 4592 }, { "epoch": 2.5434240886017534, "grad_norm": 0.2612144947052002, "learning_rate": 6.858086080195786e-07, "loss": 0.4228, "step": 4593 }, { "epoch": 2.543977849561606, "grad_norm": 0.27061864733695984, "learning_rate": 6.841801054583924e-07, "loss": 0.3666, "step": 4594 }, { "epoch": 2.5445316105214584, "grad_norm": 0.2552361786365509, "learning_rate": 6.825533966815129e-07, "loss": 0.3729, "step": 4595 }, { "epoch": 2.5450853714813104, "grad_norm": 0.2565443217754364, "learning_rate": 6.80928482365048e-07, "loss": 0.4162, "step": 4596 }, { "epoch": 2.545639132441163, "grad_norm": 0.29232534766197205, "learning_rate": 6.79305363184365e-07, "loss": 0.4142, "step": 4597 }, { "epoch": 2.546192893401015, "grad_norm": 0.2917829155921936, "learning_rate": 6.776840398140766e-07, "loss": 0.3951, "step": 4598 }, { "epoch": 2.5467466543608674, "grad_norm": 0.256096214056015, "learning_rate": 6.760645129280552e-07, "loss": 0.3708, "step": 4599 }, { "epoch": 2.54730041532072, "grad_norm": 0.28080782294273376, "learning_rate": 6.744467831994245e-07, "loss": 0.4246, "step": 4600 }, { "epoch": 2.5478541762805724, "grad_norm": 0.27050942182540894, "learning_rate": 6.728308513005616e-07, "loss": 0.4031, "step": 4601 }, { "epoch": 2.5484079372404245, "grad_norm": 0.259002685546875, "learning_rate": 6.712167179030965e-07, "loss": 0.3893, "step": 4602 }, { "epoch": 2.548961698200277, "grad_norm": 0.2563716769218445, "learning_rate": 6.696043836779109e-07, "loss": 0.4141, "step": 4603 }, { "epoch": 2.549515459160129, "grad_norm": 0.250912606716156, "learning_rate": 6.679938492951404e-07, "loss": 0.3745, "step": 4604 }, { "epoch": 2.5500692201199815, "grad_norm": 0.2733975648880005, "learning_rate": 6.663851154241702e-07, "loss": 0.4061, "step": 4605 }, { "epoch": 2.550622981079834, "grad_norm": 0.3017726242542267, "learning_rate": 6.647781827336392e-07, "loss": 0.434, "step": 4606 }, { "epoch": 2.5511767420396865, "grad_norm": 0.26858317852020264, "learning_rate": 6.63173051891437e-07, "loss": 0.3895, "step": 4607 }, { "epoch": 2.5517305029995385, "grad_norm": 0.2604641914367676, "learning_rate": 6.615697235647034e-07, "loss": 0.3712, "step": 4608 }, { "epoch": 2.552284263959391, "grad_norm": 0.26486650109291077, "learning_rate": 6.599681984198303e-07, "loss": 0.4417, "step": 4609 }, { "epoch": 2.552838024919243, "grad_norm": 0.26176440715789795, "learning_rate": 6.583684771224591e-07, "loss": 0.3632, "step": 4610 }, { "epoch": 2.5533917858790955, "grad_norm": 0.26462823152542114, "learning_rate": 6.56770560337483e-07, "loss": 0.3762, "step": 4611 }, { "epoch": 2.553945546838948, "grad_norm": 0.2787936329841614, "learning_rate": 6.551744487290429e-07, "loss": 0.4214, "step": 4612 }, { "epoch": 2.5544993077988, "grad_norm": 0.2572127878665924, "learning_rate": 6.53580142960532e-07, "loss": 0.3821, "step": 4613 }, { "epoch": 2.5550530687586526, "grad_norm": 0.27807122468948364, "learning_rate": 6.51987643694591e-07, "loss": 0.4202, "step": 4614 }, { "epoch": 2.5556068297185046, "grad_norm": 0.26179176568984985, "learning_rate": 6.503969515931103e-07, "loss": 0.3792, "step": 4615 }, { "epoch": 2.556160590678357, "grad_norm": 0.26012080907821655, "learning_rate": 6.488080673172293e-07, "loss": 0.4152, "step": 4616 }, { "epoch": 2.5567143516382096, "grad_norm": 0.27008089423179626, "learning_rate": 6.472209915273359e-07, "loss": 0.4195, "step": 4617 }, { "epoch": 2.557268112598062, "grad_norm": 0.2609677016735077, "learning_rate": 6.456357248830669e-07, "loss": 0.3928, "step": 4618 }, { "epoch": 2.557821873557914, "grad_norm": 0.2669029235839844, "learning_rate": 6.440522680433064e-07, "loss": 0.4204, "step": 4619 }, { "epoch": 2.5583756345177666, "grad_norm": 0.2778733968734741, "learning_rate": 6.424706216661869e-07, "loss": 0.437, "step": 4620 }, { "epoch": 2.5589293954776187, "grad_norm": 0.2551177740097046, "learning_rate": 6.40890786409088e-07, "loss": 0.3852, "step": 4621 }, { "epoch": 2.559483156437471, "grad_norm": 0.2675500512123108, "learning_rate": 6.393127629286361e-07, "loss": 0.39, "step": 4622 }, { "epoch": 2.5600369173973236, "grad_norm": 0.28506308794021606, "learning_rate": 6.377365518807061e-07, "loss": 0.4063, "step": 4623 }, { "epoch": 2.5605906783571757, "grad_norm": 0.27348417043685913, "learning_rate": 6.361621539204177e-07, "loss": 0.4036, "step": 4624 }, { "epoch": 2.561144439317028, "grad_norm": 0.2732211649417877, "learning_rate": 6.345895697021381e-07, "loss": 0.4276, "step": 4625 }, { "epoch": 2.56169820027688, "grad_norm": 0.2626565396785736, "learning_rate": 6.330187998794812e-07, "loss": 0.3801, "step": 4626 }, { "epoch": 2.5622519612367327, "grad_norm": 0.280337929725647, "learning_rate": 6.314498451053052e-07, "loss": 0.4461, "step": 4627 }, { "epoch": 2.562805722196585, "grad_norm": 0.2468683421611786, "learning_rate": 6.298827060317158e-07, "loss": 0.3786, "step": 4628 }, { "epoch": 2.5633594831564377, "grad_norm": 0.26183128356933594, "learning_rate": 6.283173833100631e-07, "loss": 0.3987, "step": 4629 }, { "epoch": 2.5639132441162897, "grad_norm": 0.2693942189216614, "learning_rate": 6.267538775909399e-07, "loss": 0.4109, "step": 4630 }, { "epoch": 2.5644670050761422, "grad_norm": 0.27322691679000854, "learning_rate": 6.251921895241886e-07, "loss": 0.4269, "step": 4631 }, { "epoch": 2.5650207660359943, "grad_norm": 0.24507249891757965, "learning_rate": 6.236323197588928e-07, "loss": 0.3674, "step": 4632 }, { "epoch": 2.5655745269958468, "grad_norm": 0.26169073581695557, "learning_rate": 6.220742689433823e-07, "loss": 0.3613, "step": 4633 }, { "epoch": 2.5661282879556992, "grad_norm": 0.27921366691589355, "learning_rate": 6.205180377252268e-07, "loss": 0.419, "step": 4634 }, { "epoch": 2.5666820489155513, "grad_norm": 0.2665126919746399, "learning_rate": 6.189636267512461e-07, "loss": 0.3829, "step": 4635 }, { "epoch": 2.567235809875404, "grad_norm": 0.2588074505329132, "learning_rate": 6.174110366674979e-07, "loss": 0.3959, "step": 4636 }, { "epoch": 2.5677895708352563, "grad_norm": 0.25938090682029724, "learning_rate": 6.158602681192866e-07, "loss": 0.4156, "step": 4637 }, { "epoch": 2.5683433317951083, "grad_norm": 0.2456352859735489, "learning_rate": 6.143113217511581e-07, "loss": 0.3379, "step": 4638 }, { "epoch": 2.568897092754961, "grad_norm": 0.27476921677589417, "learning_rate": 6.127641982068988e-07, "loss": 0.4251, "step": 4639 }, { "epoch": 2.5694508537148133, "grad_norm": 0.2596856951713562, "learning_rate": 6.11218898129542e-07, "loss": 0.4185, "step": 4640 }, { "epoch": 2.5700046146746653, "grad_norm": 0.2552991509437561, "learning_rate": 6.096754221613605e-07, "loss": 0.3726, "step": 4641 }, { "epoch": 2.570558375634518, "grad_norm": 0.2727959454059601, "learning_rate": 6.081337709438695e-07, "loss": 0.3937, "step": 4642 }, { "epoch": 2.57111213659437, "grad_norm": 0.2715253233909607, "learning_rate": 6.065939451178243e-07, "loss": 0.4119, "step": 4643 }, { "epoch": 2.5716658975542224, "grad_norm": 0.27433887124061584, "learning_rate": 6.050559453232218e-07, "loss": 0.4019, "step": 4644 }, { "epoch": 2.572219658514075, "grad_norm": 0.26932841539382935, "learning_rate": 6.035197721993035e-07, "loss": 0.3776, "step": 4645 }, { "epoch": 2.5727734194739273, "grad_norm": 0.27677202224731445, "learning_rate": 6.019854263845499e-07, "loss": 0.3793, "step": 4646 }, { "epoch": 2.5733271804337794, "grad_norm": 0.2418980449438095, "learning_rate": 6.004529085166771e-07, "loss": 0.3739, "step": 4647 }, { "epoch": 2.573880941393632, "grad_norm": 0.28254327178001404, "learning_rate": 5.989222192326482e-07, "loss": 0.4297, "step": 4648 }, { "epoch": 2.574434702353484, "grad_norm": 0.26133307814598083, "learning_rate": 5.973933591686626e-07, "loss": 0.3668, "step": 4649 }, { "epoch": 2.5749884633133364, "grad_norm": 0.2567996382713318, "learning_rate": 5.958663289601613e-07, "loss": 0.4243, "step": 4650 }, { "epoch": 2.575542224273189, "grad_norm": 0.26300132274627686, "learning_rate": 5.943411292418249e-07, "loss": 0.3721, "step": 4651 }, { "epoch": 2.576095985233041, "grad_norm": 0.282741516828537, "learning_rate": 5.928177606475699e-07, "loss": 0.4258, "step": 4652 }, { "epoch": 2.5766497461928934, "grad_norm": 0.26685380935668945, "learning_rate": 5.912962238105535e-07, "loss": 0.4129, "step": 4653 }, { "epoch": 2.5772035071527455, "grad_norm": 0.26430973410606384, "learning_rate": 5.897765193631749e-07, "loss": 0.4232, "step": 4654 }, { "epoch": 2.577757268112598, "grad_norm": 0.26398277282714844, "learning_rate": 5.882586479370689e-07, "loss": 0.3631, "step": 4655 }, { "epoch": 2.5783110290724505, "grad_norm": 0.2680082321166992, "learning_rate": 5.867426101631057e-07, "loss": 0.413, "step": 4656 }, { "epoch": 2.578864790032303, "grad_norm": 0.2543841004371643, "learning_rate": 5.85228406671397e-07, "loss": 0.4101, "step": 4657 }, { "epoch": 2.579418550992155, "grad_norm": 0.26363909244537354, "learning_rate": 5.837160380912915e-07, "loss": 0.4202, "step": 4658 }, { "epoch": 2.5799723119520075, "grad_norm": 0.2549119293689728, "learning_rate": 5.822055050513758e-07, "loss": 0.3915, "step": 4659 }, { "epoch": 2.5805260729118595, "grad_norm": 0.2749481499195099, "learning_rate": 5.806968081794733e-07, "loss": 0.4359, "step": 4660 }, { "epoch": 2.581079833871712, "grad_norm": 0.28374335169792175, "learning_rate": 5.791899481026414e-07, "loss": 0.3984, "step": 4661 }, { "epoch": 2.5816335948315645, "grad_norm": 0.27765920758247375, "learning_rate": 5.776849254471778e-07, "loss": 0.3992, "step": 4662 }, { "epoch": 2.5821873557914166, "grad_norm": 0.2596611976623535, "learning_rate": 5.761817408386139e-07, "loss": 0.3725, "step": 4663 }, { "epoch": 2.582741116751269, "grad_norm": 0.26520413160324097, "learning_rate": 5.74680394901721e-07, "loss": 0.4324, "step": 4664 }, { "epoch": 2.583294877711121, "grad_norm": 0.278999388217926, "learning_rate": 5.731808882605011e-07, "loss": 0.4132, "step": 4665 }, { "epoch": 2.5838486386709736, "grad_norm": 0.27048736810684204, "learning_rate": 5.716832215381945e-07, "loss": 0.3754, "step": 4666 }, { "epoch": 2.584402399630826, "grad_norm": 0.2622511684894562, "learning_rate": 5.701873953572773e-07, "loss": 0.4089, "step": 4667 }, { "epoch": 2.5849561605906786, "grad_norm": 0.27257558703422546, "learning_rate": 5.686934103394592e-07, "loss": 0.4152, "step": 4668 }, { "epoch": 2.5855099215505306, "grad_norm": 0.25897789001464844, "learning_rate": 5.672012671056853e-07, "loss": 0.4125, "step": 4669 }, { "epoch": 2.586063682510383, "grad_norm": 0.27802297472953796, "learning_rate": 5.657109662761356e-07, "loss": 0.3993, "step": 4670 }, { "epoch": 2.586617443470235, "grad_norm": 0.2964835464954376, "learning_rate": 5.64222508470223e-07, "loss": 0.4106, "step": 4671 }, { "epoch": 2.5871712044300876, "grad_norm": 0.25568827986717224, "learning_rate": 5.627358943065959e-07, "loss": 0.4003, "step": 4672 }, { "epoch": 2.58772496538994, "grad_norm": 0.24669942259788513, "learning_rate": 5.612511244031355e-07, "loss": 0.3787, "step": 4673 }, { "epoch": 2.5882787263497926, "grad_norm": 0.2610163986682892, "learning_rate": 5.597681993769566e-07, "loss": 0.4047, "step": 4674 }, { "epoch": 2.5888324873096447, "grad_norm": 0.26623037457466125, "learning_rate": 5.582871198444079e-07, "loss": 0.3898, "step": 4675 }, { "epoch": 2.589386248269497, "grad_norm": 0.23957575857639313, "learning_rate": 5.568078864210702e-07, "loss": 0.3611, "step": 4676 }, { "epoch": 2.589940009229349, "grad_norm": 0.28069889545440674, "learning_rate": 5.553304997217568e-07, "loss": 0.4614, "step": 4677 }, { "epoch": 2.5904937701892017, "grad_norm": 0.2602003514766693, "learning_rate": 5.538549603605148e-07, "loss": 0.3971, "step": 4678 }, { "epoch": 2.591047531149054, "grad_norm": 0.2531012296676636, "learning_rate": 5.523812689506225e-07, "loss": 0.3984, "step": 4679 }, { "epoch": 2.591601292108906, "grad_norm": 0.2569826543331146, "learning_rate": 5.509094261045894e-07, "loss": 0.3814, "step": 4680 }, { "epoch": 2.5921550530687587, "grad_norm": 0.249299556016922, "learning_rate": 5.49439432434159e-07, "loss": 0.4068, "step": 4681 }, { "epoch": 2.5927088140286108, "grad_norm": 0.2664724290370941, "learning_rate": 5.479712885503036e-07, "loss": 0.4024, "step": 4682 }, { "epoch": 2.5932625749884632, "grad_norm": 0.2816915810108185, "learning_rate": 5.46504995063229e-07, "loss": 0.4226, "step": 4683 }, { "epoch": 2.5938163359483157, "grad_norm": 0.2765473425388336, "learning_rate": 5.450405525823694e-07, "loss": 0.4115, "step": 4684 }, { "epoch": 2.594370096908168, "grad_norm": 0.26871708035469055, "learning_rate": 5.435779617163922e-07, "loss": 0.4141, "step": 4685 }, { "epoch": 2.5949238578680203, "grad_norm": 0.2574812173843384, "learning_rate": 5.421172230731936e-07, "loss": 0.3954, "step": 4686 }, { "epoch": 2.5954776188278728, "grad_norm": 0.2586316764354706, "learning_rate": 5.406583372599005e-07, "loss": 0.404, "step": 4687 }, { "epoch": 2.596031379787725, "grad_norm": 0.2752704918384552, "learning_rate": 5.392013048828692e-07, "loss": 0.3985, "step": 4688 }, { "epoch": 2.5965851407475773, "grad_norm": 0.27729618549346924, "learning_rate": 5.377461265476868e-07, "loss": 0.4, "step": 4689 }, { "epoch": 2.59713890170743, "grad_norm": 0.2714489996433258, "learning_rate": 5.362928028591691e-07, "loss": 0.3858, "step": 4690 }, { "epoch": 2.597692662667282, "grad_norm": 0.2760624289512634, "learning_rate": 5.348413344213616e-07, "loss": 0.3819, "step": 4691 }, { "epoch": 2.5982464236271343, "grad_norm": 0.25962239503860474, "learning_rate": 5.333917218375356e-07, "loss": 0.3758, "step": 4692 }, { "epoch": 2.5988001845869864, "grad_norm": 0.2525016963481903, "learning_rate": 5.31943965710196e-07, "loss": 0.397, "step": 4693 }, { "epoch": 2.599353945546839, "grad_norm": 0.27021950483322144, "learning_rate": 5.304980666410731e-07, "loss": 0.4142, "step": 4694 }, { "epoch": 2.5999077065066913, "grad_norm": 0.27316129207611084, "learning_rate": 5.290540252311255e-07, "loss": 0.4174, "step": 4695 }, { "epoch": 2.600461467466544, "grad_norm": 0.2610187232494354, "learning_rate": 5.27611842080541e-07, "loss": 0.3769, "step": 4696 }, { "epoch": 2.601015228426396, "grad_norm": 0.28499820828437805, "learning_rate": 5.261715177887338e-07, "loss": 0.44, "step": 4697 }, { "epoch": 2.6015689893862484, "grad_norm": 0.2555350363254547, "learning_rate": 5.24733052954346e-07, "loss": 0.3651, "step": 4698 }, { "epoch": 2.6021227503461004, "grad_norm": 0.26324567198753357, "learning_rate": 5.232964481752462e-07, "loss": 0.4069, "step": 4699 }, { "epoch": 2.602676511305953, "grad_norm": 0.2789902985095978, "learning_rate": 5.218617040485325e-07, "loss": 0.4011, "step": 4700 }, { "epoch": 2.6032302722658054, "grad_norm": 0.2637130916118622, "learning_rate": 5.204288211705238e-07, "loss": 0.404, "step": 4701 }, { "epoch": 2.6037840332256574, "grad_norm": 0.2607278525829315, "learning_rate": 5.189978001367724e-07, "loss": 0.4127, "step": 4702 }, { "epoch": 2.60433779418551, "grad_norm": 0.2610120475292206, "learning_rate": 5.175686415420528e-07, "loss": 0.4115, "step": 4703 }, { "epoch": 2.6048915551453624, "grad_norm": 0.26737338304519653, "learning_rate": 5.161413459803661e-07, "loss": 0.4046, "step": 4704 }, { "epoch": 2.6054453161052145, "grad_norm": 0.26707711815834045, "learning_rate": 5.147159140449398e-07, "loss": 0.3982, "step": 4705 }, { "epoch": 2.605999077065067, "grad_norm": 0.2503456175327301, "learning_rate": 5.132923463282241e-07, "loss": 0.3811, "step": 4706 }, { "epoch": 2.6065528380249194, "grad_norm": 0.2668859660625458, "learning_rate": 5.11870643421899e-07, "loss": 0.4118, "step": 4707 }, { "epoch": 2.6071065989847715, "grad_norm": 0.26472535729408264, "learning_rate": 5.104508059168656e-07, "loss": 0.3744, "step": 4708 }, { "epoch": 2.607660359944624, "grad_norm": 0.2869988977909088, "learning_rate": 5.090328344032525e-07, "loss": 0.4057, "step": 4709 }, { "epoch": 2.608214120904476, "grad_norm": 0.26442578434944153, "learning_rate": 5.076167294704088e-07, "loss": 0.3829, "step": 4710 }, { "epoch": 2.6087678818643285, "grad_norm": 0.2489709109067917, "learning_rate": 5.062024917069102e-07, "loss": 0.3744, "step": 4711 }, { "epoch": 2.609321642824181, "grad_norm": 0.26304852962493896, "learning_rate": 5.047901217005591e-07, "loss": 0.371, "step": 4712 }, { "epoch": 2.6098754037840335, "grad_norm": 0.3238840103149414, "learning_rate": 5.033796200383778e-07, "loss": 0.4178, "step": 4713 }, { "epoch": 2.6104291647438855, "grad_norm": 0.28173646330833435, "learning_rate": 5.019709873066125e-07, "loss": 0.4091, "step": 4714 }, { "epoch": 2.610982925703738, "grad_norm": 0.29242193698883057, "learning_rate": 5.005642240907326e-07, "loss": 0.4229, "step": 4715 }, { "epoch": 2.61153668666359, "grad_norm": 0.26628369092941284, "learning_rate": 4.991593309754317e-07, "loss": 0.3877, "step": 4716 }, { "epoch": 2.6120904476234426, "grad_norm": 0.2709799110889435, "learning_rate": 4.977563085446263e-07, "loss": 0.4204, "step": 4717 }, { "epoch": 2.612644208583295, "grad_norm": 0.24893687665462494, "learning_rate": 4.963551573814551e-07, "loss": 0.3365, "step": 4718 }, { "epoch": 2.613197969543147, "grad_norm": 0.29093775153160095, "learning_rate": 4.949558780682773e-07, "loss": 0.4559, "step": 4719 }, { "epoch": 2.6137517305029996, "grad_norm": 0.27325016260147095, "learning_rate": 4.935584711866742e-07, "loss": 0.4408, "step": 4720 }, { "epoch": 2.6143054914628516, "grad_norm": 0.24542458355426788, "learning_rate": 4.921629373174524e-07, "loss": 0.3649, "step": 4721 }, { "epoch": 2.614859252422704, "grad_norm": 0.28902050852775574, "learning_rate": 4.907692770406381e-07, "loss": 0.3766, "step": 4722 }, { "epoch": 2.6154130133825566, "grad_norm": 0.268311470746994, "learning_rate": 4.893774909354765e-07, "loss": 0.3983, "step": 4723 }, { "epoch": 2.615966774342409, "grad_norm": 0.2712451219558716, "learning_rate": 4.879875795804357e-07, "loss": 0.401, "step": 4724 }, { "epoch": 2.616520535302261, "grad_norm": 0.27701258659362793, "learning_rate": 4.865995435532045e-07, "loss": 0.3855, "step": 4725 }, { "epoch": 2.6170742962621136, "grad_norm": 0.2805980443954468, "learning_rate": 4.85213383430695e-07, "loss": 0.4288, "step": 4726 }, { "epoch": 2.6176280572219657, "grad_norm": 0.2871350646018982, "learning_rate": 4.838290997890338e-07, "loss": 0.4118, "step": 4727 }, { "epoch": 2.618181818181818, "grad_norm": 0.26219722628593445, "learning_rate": 4.824466932035721e-07, "loss": 0.4057, "step": 4728 }, { "epoch": 2.6187355791416707, "grad_norm": 0.2607058882713318, "learning_rate": 4.810661642488789e-07, "loss": 0.3888, "step": 4729 }, { "epoch": 2.6192893401015227, "grad_norm": 0.2531439960002899, "learning_rate": 4.796875134987433e-07, "loss": 0.4022, "step": 4730 }, { "epoch": 2.619843101061375, "grad_norm": 0.269115686416626, "learning_rate": 4.783107415261767e-07, "loss": 0.4226, "step": 4731 }, { "epoch": 2.6203968620212272, "grad_norm": 0.27558085322380066, "learning_rate": 4.769358489034037e-07, "loss": 0.4, "step": 4732 }, { "epoch": 2.6209506229810797, "grad_norm": 0.2722338140010834, "learning_rate": 4.7556283620187217e-07, "loss": 0.4333, "step": 4733 }, { "epoch": 2.621504383940932, "grad_norm": 0.26523348689079285, "learning_rate": 4.7419170399224666e-07, "loss": 0.4079, "step": 4734 }, { "epoch": 2.6220581449007847, "grad_norm": 0.2451266050338745, "learning_rate": 4.72822452844412e-07, "loss": 0.386, "step": 4735 }, { "epoch": 2.6226119058606367, "grad_norm": 0.25770601630210876, "learning_rate": 4.7145508332746947e-07, "loss": 0.4043, "step": 4736 }, { "epoch": 2.6231656668204892, "grad_norm": 0.26418983936309814, "learning_rate": 4.7008959600973846e-07, "loss": 0.3761, "step": 4737 }, { "epoch": 2.6237194277803413, "grad_norm": 0.2711509168148041, "learning_rate": 4.6872599145875776e-07, "loss": 0.3805, "step": 4738 }, { "epoch": 2.6242731887401938, "grad_norm": 0.26156190037727356, "learning_rate": 4.6736427024128004e-07, "loss": 0.3875, "step": 4739 }, { "epoch": 2.6248269497000463, "grad_norm": 0.26979759335517883, "learning_rate": 4.66004432923281e-07, "loss": 0.4257, "step": 4740 }, { "epoch": 2.6253807106598988, "grad_norm": 0.2655320167541504, "learning_rate": 4.646464800699474e-07, "loss": 0.4191, "step": 4741 }, { "epoch": 2.625934471619751, "grad_norm": 0.237704798579216, "learning_rate": 4.632904122456855e-07, "loss": 0.357, "step": 4742 }, { "epoch": 2.6264882325796033, "grad_norm": 0.2677523195743561, "learning_rate": 4.619362300141178e-07, "loss": 0.406, "step": 4743 }, { "epoch": 2.6270419935394553, "grad_norm": 0.2549428343772888, "learning_rate": 4.605839339380835e-07, "loss": 0.3849, "step": 4744 }, { "epoch": 2.627595754499308, "grad_norm": 0.28042688965797424, "learning_rate": 4.5923352457963697e-07, "loss": 0.4276, "step": 4745 }, { "epoch": 2.6281495154591603, "grad_norm": 0.2596152126789093, "learning_rate": 4.5788500250004977e-07, "loss": 0.4131, "step": 4746 }, { "epoch": 2.6287032764190124, "grad_norm": 0.2580232620239258, "learning_rate": 4.565383682598068e-07, "loss": 0.3784, "step": 4747 }, { "epoch": 2.629257037378865, "grad_norm": 0.2775174379348755, "learning_rate": 4.55193622418611e-07, "loss": 0.4061, "step": 4748 }, { "epoch": 2.629810798338717, "grad_norm": 0.28618699312210083, "learning_rate": 4.53850765535378e-07, "loss": 0.426, "step": 4749 }, { "epoch": 2.6303645592985694, "grad_norm": 0.2772199809551239, "learning_rate": 4.5250979816824025e-07, "loss": 0.4152, "step": 4750 }, { "epoch": 2.630918320258422, "grad_norm": 0.2669052183628082, "learning_rate": 4.511707208745436e-07, "loss": 0.4171, "step": 4751 }, { "epoch": 2.6314720812182744, "grad_norm": 0.242843359708786, "learning_rate": 4.4983353421084895e-07, "loss": 0.3831, "step": 4752 }, { "epoch": 2.6320258421781264, "grad_norm": 0.270956426858902, "learning_rate": 4.484982387329312e-07, "loss": 0.4138, "step": 4753 }, { "epoch": 2.632579603137979, "grad_norm": 0.28168317675590515, "learning_rate": 4.4716483499577966e-07, "loss": 0.4241, "step": 4754 }, { "epoch": 2.633133364097831, "grad_norm": 0.26640376448631287, "learning_rate": 4.458333235535961e-07, "loss": 0.4081, "step": 4755 }, { "epoch": 2.6336871250576834, "grad_norm": 0.26315563917160034, "learning_rate": 4.445037049597972e-07, "loss": 0.3662, "step": 4756 }, { "epoch": 2.634240886017536, "grad_norm": 0.290656179189682, "learning_rate": 4.431759797670121e-07, "loss": 0.4362, "step": 4757 }, { "epoch": 2.634794646977388, "grad_norm": 0.2685661017894745, "learning_rate": 4.418501485270843e-07, "loss": 0.4308, "step": 4758 }, { "epoch": 2.6353484079372405, "grad_norm": 0.2697654962539673, "learning_rate": 4.4052621179106636e-07, "loss": 0.4153, "step": 4759 }, { "epoch": 2.6359021688970925, "grad_norm": 0.2613182067871094, "learning_rate": 4.3920417010922866e-07, "loss": 0.3733, "step": 4760 }, { "epoch": 2.636455929856945, "grad_norm": 0.28470635414123535, "learning_rate": 4.378840240310511e-07, "loss": 0.4025, "step": 4761 }, { "epoch": 2.6370096908167975, "grad_norm": 0.26558738946914673, "learning_rate": 4.365657741052248e-07, "loss": 0.4214, "step": 4762 }, { "epoch": 2.63756345177665, "grad_norm": 0.24280427396297455, "learning_rate": 4.3524942087965536e-07, "loss": 0.3699, "step": 4763 }, { "epoch": 2.638117212736502, "grad_norm": 0.2768287658691406, "learning_rate": 4.339349649014585e-07, "loss": 0.4135, "step": 4764 }, { "epoch": 2.6386709736963545, "grad_norm": 0.2406657189130783, "learning_rate": 4.326224067169604e-07, "loss": 0.3576, "step": 4765 }, { "epoch": 2.6392247346562066, "grad_norm": 0.25831112265586853, "learning_rate": 4.31311746871701e-07, "loss": 0.4007, "step": 4766 }, { "epoch": 2.639778495616059, "grad_norm": 0.26578593254089355, "learning_rate": 4.3000298591042986e-07, "loss": 0.3779, "step": 4767 }, { "epoch": 2.6403322565759115, "grad_norm": 0.2782944142818451, "learning_rate": 4.2869612437710483e-07, "loss": 0.3981, "step": 4768 }, { "epoch": 2.6408860175357636, "grad_norm": 0.273144006729126, "learning_rate": 4.273911628149002e-07, "loss": 0.4013, "step": 4769 }, { "epoch": 2.641439778495616, "grad_norm": 0.27771106362342834, "learning_rate": 4.2608810176619553e-07, "loss": 0.4132, "step": 4770 }, { "epoch": 2.6419935394554686, "grad_norm": 0.25817129015922546, "learning_rate": 4.2478694177258304e-07, "loss": 0.3952, "step": 4771 }, { "epoch": 2.6425473004153206, "grad_norm": 0.24699194729328156, "learning_rate": 4.234876833748625e-07, "loss": 0.3893, "step": 4772 }, { "epoch": 2.643101061375173, "grad_norm": 0.2755123972892761, "learning_rate": 4.22190327113044e-07, "loss": 0.4594, "step": 4773 }, { "epoch": 2.6436548223350256, "grad_norm": 0.2467888444662094, "learning_rate": 4.208948735263507e-07, "loss": 0.3946, "step": 4774 }, { "epoch": 2.6442085832948776, "grad_norm": 0.2471018135547638, "learning_rate": 4.196013231532098e-07, "loss": 0.3976, "step": 4775 }, { "epoch": 2.64476234425473, "grad_norm": 0.26361483335494995, "learning_rate": 4.183096765312611e-07, "loss": 0.3969, "step": 4776 }, { "epoch": 2.645316105214582, "grad_norm": 0.2681559920310974, "learning_rate": 4.170199341973502e-07, "loss": 0.4154, "step": 4777 }, { "epoch": 2.6458698661744346, "grad_norm": 0.25632596015930176, "learning_rate": 4.157320966875328e-07, "loss": 0.3946, "step": 4778 }, { "epoch": 2.646423627134287, "grad_norm": 0.2702345550060272, "learning_rate": 4.1444616453707396e-07, "loss": 0.4331, "step": 4779 }, { "epoch": 2.6469773880941396, "grad_norm": 0.27374300360679626, "learning_rate": 4.1316213828044626e-07, "loss": 0.373, "step": 4780 }, { "epoch": 2.6475311490539917, "grad_norm": 0.28278687596321106, "learning_rate": 4.118800184513272e-07, "loss": 0.435, "step": 4781 }, { "epoch": 2.648084910013844, "grad_norm": 0.25240468978881836, "learning_rate": 4.1059980558260495e-07, "loss": 0.3621, "step": 4782 }, { "epoch": 2.648638670973696, "grad_norm": 0.2666909098625183, "learning_rate": 4.093215002063761e-07, "loss": 0.416, "step": 4783 }, { "epoch": 2.6491924319335487, "grad_norm": 0.24443115293979645, "learning_rate": 4.0804510285394116e-07, "loss": 0.3672, "step": 4784 }, { "epoch": 2.649746192893401, "grad_norm": 0.28888219594955444, "learning_rate": 4.067706140558109e-07, "loss": 0.4377, "step": 4785 }, { "epoch": 2.6502999538532532, "grad_norm": 0.2518102526664734, "learning_rate": 4.0549803434169864e-07, "loss": 0.4041, "step": 4786 }, { "epoch": 2.6508537148131057, "grad_norm": 0.26810377836227417, "learning_rate": 4.04227364240527e-07, "loss": 0.4136, "step": 4787 }, { "epoch": 2.6514074757729578, "grad_norm": 0.26165780425071716, "learning_rate": 4.0295860428042674e-07, "loss": 0.3849, "step": 4788 }, { "epoch": 2.6519612367328103, "grad_norm": 0.274017333984375, "learning_rate": 4.0169175498873224e-07, "loss": 0.4429, "step": 4789 }, { "epoch": 2.6525149976926627, "grad_norm": 0.26986294984817505, "learning_rate": 4.0042681689198227e-07, "loss": 0.3705, "step": 4790 }, { "epoch": 2.6530687586525152, "grad_norm": 0.26319724321365356, "learning_rate": 3.9916379051592456e-07, "loss": 0.4047, "step": 4791 }, { "epoch": 2.6536225196123673, "grad_norm": 0.256365567445755, "learning_rate": 3.9790267638550873e-07, "loss": 0.3903, "step": 4792 }, { "epoch": 2.6541762805722198, "grad_norm": 0.2711634635925293, "learning_rate": 3.966434750248954e-07, "loss": 0.376, "step": 4793 }, { "epoch": 2.654730041532072, "grad_norm": 0.2738797962665558, "learning_rate": 3.9538618695744325e-07, "loss": 0.4577, "step": 4794 }, { "epoch": 2.6552838024919243, "grad_norm": 0.24215391278266907, "learning_rate": 3.941308127057203e-07, "loss": 0.3743, "step": 4795 }, { "epoch": 2.655837563451777, "grad_norm": 0.25377658009529114, "learning_rate": 3.9287735279149695e-07, "loss": 0.4082, "step": 4796 }, { "epoch": 2.656391324411629, "grad_norm": 0.2451423704624176, "learning_rate": 3.9162580773574876e-07, "loss": 0.3805, "step": 4797 }, { "epoch": 2.6569450853714813, "grad_norm": 0.27233660221099854, "learning_rate": 3.9037617805865733e-07, "loss": 0.3985, "step": 4798 }, { "epoch": 2.6574988463313334, "grad_norm": 0.27237388491630554, "learning_rate": 3.891284642796045e-07, "loss": 0.4236, "step": 4799 }, { "epoch": 2.658052607291186, "grad_norm": 0.26785093545913696, "learning_rate": 3.878826669171776e-07, "loss": 0.4079, "step": 4800 }, { "epoch": 2.6586063682510384, "grad_norm": 0.24969100952148438, "learning_rate": 3.8663878648916795e-07, "loss": 0.3994, "step": 4801 }, { "epoch": 2.659160129210891, "grad_norm": 0.2661703824996948, "learning_rate": 3.8539682351256935e-07, "loss": 0.4241, "step": 4802 }, { "epoch": 2.659713890170743, "grad_norm": 0.24546760320663452, "learning_rate": 3.8415677850357933e-07, "loss": 0.3847, "step": 4803 }, { "epoch": 2.6602676511305954, "grad_norm": 0.25496405363082886, "learning_rate": 3.829186519775968e-07, "loss": 0.4112, "step": 4804 }, { "epoch": 2.6608214120904474, "grad_norm": 0.27650994062423706, "learning_rate": 3.8168244444922566e-07, "loss": 0.4102, "step": 4805 }, { "epoch": 2.6613751730503, "grad_norm": 0.26931822299957275, "learning_rate": 3.8044815643226985e-07, "loss": 0.4083, "step": 4806 }, { "epoch": 2.6619289340101524, "grad_norm": 0.24877621233463287, "learning_rate": 3.7921578843973683e-07, "loss": 0.3933, "step": 4807 }, { "epoch": 2.6624826949700044, "grad_norm": 0.2574176788330078, "learning_rate": 3.7798534098383575e-07, "loss": 0.3951, "step": 4808 }, { "epoch": 2.663036455929857, "grad_norm": 0.2814246416091919, "learning_rate": 3.7675681457597756e-07, "loss": 0.4406, "step": 4809 }, { "epoch": 2.6635902168897094, "grad_norm": 0.26297441124916077, "learning_rate": 3.755302097267743e-07, "loss": 0.3595, "step": 4810 }, { "epoch": 2.6641439778495615, "grad_norm": 0.2362803965806961, "learning_rate": 3.743055269460405e-07, "loss": 0.3503, "step": 4811 }, { "epoch": 2.664697738809414, "grad_norm": 0.26778095960617065, "learning_rate": 3.7308276674279054e-07, "loss": 0.4186, "step": 4812 }, { "epoch": 2.6652514997692665, "grad_norm": 0.25879788398742676, "learning_rate": 3.718619296252407e-07, "loss": 0.3956, "step": 4813 }, { "epoch": 2.6658052607291185, "grad_norm": 0.2850698232650757, "learning_rate": 3.706430161008073e-07, "loss": 0.433, "step": 4814 }, { "epoch": 2.666359021688971, "grad_norm": 0.2686587870121002, "learning_rate": 3.694260266761074e-07, "loss": 0.413, "step": 4815 }, { "epoch": 2.666912782648823, "grad_norm": 0.24413630366325378, "learning_rate": 3.6821096185695793e-07, "loss": 0.39, "step": 4816 }, { "epoch": 2.6674665436086755, "grad_norm": 0.25990384817123413, "learning_rate": 3.6699782214837723e-07, "loss": 0.4426, "step": 4817 }, { "epoch": 2.668020304568528, "grad_norm": 0.2672097086906433, "learning_rate": 3.6578660805458135e-07, "loss": 0.4116, "step": 4818 }, { "epoch": 2.6685740655283805, "grad_norm": 0.25866076350212097, "learning_rate": 3.6457732007898817e-07, "loss": 0.3995, "step": 4819 }, { "epoch": 2.6691278264882325, "grad_norm": 0.2810901999473572, "learning_rate": 3.63369958724214e-07, "loss": 0.4014, "step": 4820 }, { "epoch": 2.669681587448085, "grad_norm": 0.2528679072856903, "learning_rate": 3.6216452449207464e-07, "loss": 0.3987, "step": 4821 }, { "epoch": 2.670235348407937, "grad_norm": 0.26429370045661926, "learning_rate": 3.6096101788358375e-07, "loss": 0.4305, "step": 4822 }, { "epoch": 2.6707891093677896, "grad_norm": 0.25132548809051514, "learning_rate": 3.5975943939895566e-07, "loss": 0.3871, "step": 4823 }, { "epoch": 2.671342870327642, "grad_norm": 0.2623085379600525, "learning_rate": 3.585597895376025e-07, "loss": 0.4105, "step": 4824 }, { "epoch": 2.671896631287494, "grad_norm": 0.26075315475463867, "learning_rate": 3.5736206879813384e-07, "loss": 0.3946, "step": 4825 }, { "epoch": 2.6724503922473466, "grad_norm": 0.2559032142162323, "learning_rate": 3.5616627767835977e-07, "loss": 0.3948, "step": 4826 }, { "epoch": 2.6730041532071986, "grad_norm": 0.2615891695022583, "learning_rate": 3.5497241667528604e-07, "loss": 0.3917, "step": 4827 }, { "epoch": 2.673557914167051, "grad_norm": 0.2861909568309784, "learning_rate": 3.5378048628511685e-07, "loss": 0.4156, "step": 4828 }, { "epoch": 2.6741116751269036, "grad_norm": 0.2673929035663605, "learning_rate": 3.525904870032554e-07, "loss": 0.3846, "step": 4829 }, { "epoch": 2.674665436086756, "grad_norm": 0.26123037934303284, "learning_rate": 3.514024193242999e-07, "loss": 0.407, "step": 4830 }, { "epoch": 2.675219197046608, "grad_norm": 0.2478809803724289, "learning_rate": 3.5021628374204807e-07, "loss": 0.3832, "step": 4831 }, { "epoch": 2.6757729580064606, "grad_norm": 0.25510475039482117, "learning_rate": 3.4903208074949236e-07, "loss": 0.4012, "step": 4832 }, { "epoch": 2.6763267189663127, "grad_norm": 0.2681092619895935, "learning_rate": 3.4784981083882396e-07, "loss": 0.4204, "step": 4833 }, { "epoch": 2.676880479926165, "grad_norm": 0.2753102481365204, "learning_rate": 3.4666947450142986e-07, "loss": 0.453, "step": 4834 }, { "epoch": 2.6774342408860177, "grad_norm": 0.27392399311065674, "learning_rate": 3.454910722278915e-07, "loss": 0.3869, "step": 4835 }, { "epoch": 2.6779880018458697, "grad_norm": 0.26311853528022766, "learning_rate": 3.4431460450799034e-07, "loss": 0.4201, "step": 4836 }, { "epoch": 2.678541762805722, "grad_norm": 0.25172510743141174, "learning_rate": 3.4314007183070044e-07, "loss": 0.3558, "step": 4837 }, { "epoch": 2.6790955237655747, "grad_norm": 0.28253158926963806, "learning_rate": 3.4196747468419444e-07, "loss": 0.4435, "step": 4838 }, { "epoch": 2.6796492847254267, "grad_norm": 0.2576224207878113, "learning_rate": 3.4079681355583706e-07, "loss": 0.371, "step": 4839 }, { "epoch": 2.6802030456852792, "grad_norm": 0.2612955868244171, "learning_rate": 3.396280889321901e-07, "loss": 0.4211, "step": 4840 }, { "epoch": 2.6807568066451317, "grad_norm": 0.27642881870269775, "learning_rate": 3.3846130129901177e-07, "loss": 0.4178, "step": 4841 }, { "epoch": 2.6813105676049838, "grad_norm": 0.254348486661911, "learning_rate": 3.372964511412541e-07, "loss": 0.4357, "step": 4842 }, { "epoch": 2.6818643285648363, "grad_norm": 0.2662368714809418, "learning_rate": 3.361335389430642e-07, "loss": 0.4055, "step": 4843 }, { "epoch": 2.6824180895246883, "grad_norm": 0.26418983936309814, "learning_rate": 3.349725651877811e-07, "loss": 0.4202, "step": 4844 }, { "epoch": 2.682971850484541, "grad_norm": 0.2422872930765152, "learning_rate": 3.338135303579415e-07, "loss": 0.3831, "step": 4845 }, { "epoch": 2.6835256114443933, "grad_norm": 0.25364258885383606, "learning_rate": 3.3265643493527567e-07, "loss": 0.4064, "step": 4846 }, { "epoch": 2.6840793724042458, "grad_norm": 0.2527864873409271, "learning_rate": 3.3150127940070773e-07, "loss": 0.4044, "step": 4847 }, { "epoch": 2.684633133364098, "grad_norm": 0.27381494641304016, "learning_rate": 3.3034806423435317e-07, "loss": 0.4358, "step": 4848 }, { "epoch": 2.6851868943239503, "grad_norm": 0.25519025325775146, "learning_rate": 3.2919678991552293e-07, "loss": 0.3883, "step": 4849 }, { "epoch": 2.6857406552838023, "grad_norm": 0.25930055975914, "learning_rate": 3.2804745692272266e-07, "loss": 0.4109, "step": 4850 }, { "epoch": 2.686294416243655, "grad_norm": 0.25119784474372864, "learning_rate": 3.269000657336502e-07, "loss": 0.3744, "step": 4851 }, { "epoch": 2.6868481772035073, "grad_norm": 0.2654167115688324, "learning_rate": 3.257546168251935e-07, "loss": 0.4109, "step": 4852 }, { "epoch": 2.6874019381633594, "grad_norm": 0.25954270362854004, "learning_rate": 3.246111106734362e-07, "loss": 0.4079, "step": 4853 }, { "epoch": 2.687955699123212, "grad_norm": 0.2584855258464813, "learning_rate": 3.234695477536537e-07, "loss": 0.3914, "step": 4854 }, { "epoch": 2.688509460083064, "grad_norm": 0.2663382291793823, "learning_rate": 3.223299285403153e-07, "loss": 0.3986, "step": 4855 }, { "epoch": 2.6890632210429164, "grad_norm": 0.27401983737945557, "learning_rate": 3.211922535070799e-07, "loss": 0.393, "step": 4856 }, { "epoch": 2.689616982002769, "grad_norm": 0.2688019573688507, "learning_rate": 3.2005652312679936e-07, "loss": 0.4196, "step": 4857 }, { "epoch": 2.6901707429626214, "grad_norm": 0.2602902948856354, "learning_rate": 3.1892273787151705e-07, "loss": 0.394, "step": 4858 }, { "epoch": 2.6907245039224734, "grad_norm": 0.2642815411090851, "learning_rate": 3.177908982124678e-07, "loss": 0.3873, "step": 4859 }, { "epoch": 2.691278264882326, "grad_norm": 0.24302786588668823, "learning_rate": 3.166610046200802e-07, "loss": 0.3615, "step": 4860 }, { "epoch": 2.691832025842178, "grad_norm": 0.2801657021045685, "learning_rate": 3.155330575639698e-07, "loss": 0.4188, "step": 4861 }, { "epoch": 2.6923857868020304, "grad_norm": 0.2681110203266144, "learning_rate": 3.14407057512946e-07, "loss": 0.4008, "step": 4862 }, { "epoch": 2.692939547761883, "grad_norm": 0.2796190083026886, "learning_rate": 3.132830049350083e-07, "loss": 0.4313, "step": 4863 }, { "epoch": 2.693493308721735, "grad_norm": 0.24434994161128998, "learning_rate": 3.1216090029734583e-07, "loss": 0.3779, "step": 4864 }, { "epoch": 2.6940470696815875, "grad_norm": 0.26303592324256897, "learning_rate": 3.11040744066341e-07, "loss": 0.4349, "step": 4865 }, { "epoch": 2.6946008306414395, "grad_norm": 0.2718876898288727, "learning_rate": 3.099225367075631e-07, "loss": 0.4029, "step": 4866 }, { "epoch": 2.695154591601292, "grad_norm": 0.26653948426246643, "learning_rate": 3.088062786857721e-07, "loss": 0.4277, "step": 4867 }, { "epoch": 2.6957083525611445, "grad_norm": 0.2592476010322571, "learning_rate": 3.076919704649184e-07, "loss": 0.3932, "step": 4868 }, { "epoch": 2.696262113520997, "grad_norm": 0.25565820932388306, "learning_rate": 3.065796125081438e-07, "loss": 0.3885, "step": 4869 }, { "epoch": 2.696815874480849, "grad_norm": 0.3059822618961334, "learning_rate": 3.054692052777763e-07, "loss": 0.419, "step": 4870 }, { "epoch": 2.6973696354407015, "grad_norm": 0.2583981454372406, "learning_rate": 3.04360749235334e-07, "loss": 0.3766, "step": 4871 }, { "epoch": 2.6979233964005536, "grad_norm": 0.2748742997646332, "learning_rate": 3.0325424484152486e-07, "loss": 0.4534, "step": 4872 }, { "epoch": 2.698477157360406, "grad_norm": 0.24691152572631836, "learning_rate": 3.0214969255624615e-07, "loss": 0.3661, "step": 4873 }, { "epoch": 2.6990309183202585, "grad_norm": 0.2913316488265991, "learning_rate": 3.010470928385817e-07, "loss": 0.4178, "step": 4874 }, { "epoch": 2.6995846792801106, "grad_norm": 0.2529304325580597, "learning_rate": 2.99946446146806e-07, "loss": 0.4005, "step": 4875 }, { "epoch": 2.700138440239963, "grad_norm": 0.2776043117046356, "learning_rate": 2.988477529383804e-07, "loss": 0.4227, "step": 4876 }, { "epoch": 2.7006922011998156, "grad_norm": 0.2701217830181122, "learning_rate": 2.977510136699546e-07, "loss": 0.3828, "step": 4877 }, { "epoch": 2.7012459621596676, "grad_norm": 0.27316993474960327, "learning_rate": 2.966562287973673e-07, "loss": 0.3988, "step": 4878 }, { "epoch": 2.70179972311952, "grad_norm": 0.2708851993083954, "learning_rate": 2.9556339877564287e-07, "loss": 0.4323, "step": 4879 }, { "epoch": 2.7023534840793726, "grad_norm": 0.22947414219379425, "learning_rate": 2.9447252405899517e-07, "loss": 0.3517, "step": 4880 }, { "epoch": 2.7029072450392246, "grad_norm": 0.2761908173561096, "learning_rate": 2.9338360510082376e-07, "loss": 0.4052, "step": 4881 }, { "epoch": 2.703461005999077, "grad_norm": 0.30318358540534973, "learning_rate": 2.92296642353716e-07, "loss": 0.4226, "step": 4882 }, { "epoch": 2.704014766958929, "grad_norm": 0.2677801549434662, "learning_rate": 2.9121163626944725e-07, "loss": 0.3941, "step": 4883 }, { "epoch": 2.7045685279187817, "grad_norm": 0.26820480823516846, "learning_rate": 2.901285872989773e-07, "loss": 0.4192, "step": 4884 }, { "epoch": 2.705122288878634, "grad_norm": 0.25819462537765503, "learning_rate": 2.8904749589245496e-07, "loss": 0.3793, "step": 4885 }, { "epoch": 2.7056760498384866, "grad_norm": 0.2909316122531891, "learning_rate": 2.87968362499213e-07, "loss": 0.4167, "step": 4886 }, { "epoch": 2.7062298107983387, "grad_norm": 0.27248331904411316, "learning_rate": 2.8689118756777324e-07, "loss": 0.3968, "step": 4887 }, { "epoch": 2.706783571758191, "grad_norm": 0.2547625005245209, "learning_rate": 2.8581597154584084e-07, "loss": 0.4131, "step": 4888 }, { "epoch": 2.707337332718043, "grad_norm": 0.2546736001968384, "learning_rate": 2.847427148803078e-07, "loss": 0.3853, "step": 4889 }, { "epoch": 2.7078910936778957, "grad_norm": 0.27089208364486694, "learning_rate": 2.8367141801725227e-07, "loss": 0.4261, "step": 4890 }, { "epoch": 2.708444854637748, "grad_norm": 0.25876012444496155, "learning_rate": 2.82602081401937e-07, "loss": 0.4118, "step": 4891 }, { "epoch": 2.7089986155976002, "grad_norm": 0.2305353730916977, "learning_rate": 2.815347054788109e-07, "loss": 0.3695, "step": 4892 }, { "epoch": 2.7095523765574527, "grad_norm": 0.2537437081336975, "learning_rate": 2.804692906915074e-07, "loss": 0.3925, "step": 4893 }, { "epoch": 2.710106137517305, "grad_norm": 0.27386030554771423, "learning_rate": 2.7940583748284454e-07, "loss": 0.3687, "step": 4894 }, { "epoch": 2.7106598984771573, "grad_norm": 0.28300991654396057, "learning_rate": 2.783443462948249e-07, "loss": 0.4344, "step": 4895 }, { "epoch": 2.7112136594370098, "grad_norm": 0.24423737823963165, "learning_rate": 2.772848175686382e-07, "loss": 0.335, "step": 4896 }, { "epoch": 2.7117674203968622, "grad_norm": 0.30074435472488403, "learning_rate": 2.7622725174465293e-07, "loss": 0.4185, "step": 4897 }, { "epoch": 2.7123211813567143, "grad_norm": 0.2654198408126831, "learning_rate": 2.751716492624279e-07, "loss": 0.362, "step": 4898 }, { "epoch": 2.712874942316567, "grad_norm": 0.27697786688804626, "learning_rate": 2.741180105607022e-07, "loss": 0.4121, "step": 4899 }, { "epoch": 2.713428703276419, "grad_norm": 0.2666405737400055, "learning_rate": 2.7306633607739996e-07, "loss": 0.4212, "step": 4900 }, { "epoch": 2.7139824642362713, "grad_norm": 0.2598150372505188, "learning_rate": 2.720166262496299e-07, "loss": 0.4077, "step": 4901 }, { "epoch": 2.714536225196124, "grad_norm": 0.2416316568851471, "learning_rate": 2.709688815136796e-07, "loss": 0.3653, "step": 4902 }, { "epoch": 2.715089986155976, "grad_norm": 0.2769257128238678, "learning_rate": 2.699231023050264e-07, "loss": 0.4464, "step": 4903 }, { "epoch": 2.7156437471158283, "grad_norm": 0.24378454685211182, "learning_rate": 2.688792890583258e-07, "loss": 0.3843, "step": 4904 }, { "epoch": 2.7161975080756804, "grad_norm": 0.25132298469543457, "learning_rate": 2.678374422074198e-07, "loss": 0.4109, "step": 4905 }, { "epoch": 2.716751269035533, "grad_norm": 0.2660961449146271, "learning_rate": 2.6679756218532915e-07, "loss": 0.3829, "step": 4906 }, { "epoch": 2.7173050299953854, "grad_norm": 0.2851080596446991, "learning_rate": 2.6575964942425923e-07, "loss": 0.413, "step": 4907 }, { "epoch": 2.717858790955238, "grad_norm": 0.28070104122161865, "learning_rate": 2.6472370435559946e-07, "loss": 0.3859, "step": 4908 }, { "epoch": 2.71841255191509, "grad_norm": 0.28203079104423523, "learning_rate": 2.636897274099187e-07, "loss": 0.3955, "step": 4909 }, { "epoch": 2.7189663128749424, "grad_norm": 0.252451092004776, "learning_rate": 2.6265771901696933e-07, "loss": 0.4242, "step": 4910 }, { "epoch": 2.7195200738347944, "grad_norm": 0.2562096118927002, "learning_rate": 2.6162767960568324e-07, "loss": 0.3725, "step": 4911 }, { "epoch": 2.720073834794647, "grad_norm": 0.26443126797676086, "learning_rate": 2.6059960960417796e-07, "loss": 0.4027, "step": 4912 }, { "epoch": 2.7206275957544994, "grad_norm": 0.2626219689846039, "learning_rate": 2.595735094397489e-07, "loss": 0.41, "step": 4913 }, { "epoch": 2.721181356714352, "grad_norm": 0.253595769405365, "learning_rate": 2.585493795388755e-07, "loss": 0.4028, "step": 4914 }, { "epoch": 2.721735117674204, "grad_norm": 0.2569968104362488, "learning_rate": 2.5752722032721553e-07, "loss": 0.3874, "step": 4915 }, { "epoch": 2.7222888786340564, "grad_norm": 0.2835262715816498, "learning_rate": 2.565070322296082e-07, "loss": 0.4411, "step": 4916 }, { "epoch": 2.7228426395939085, "grad_norm": 0.25788435339927673, "learning_rate": 2.5548881567007687e-07, "loss": 0.4157, "step": 4917 }, { "epoch": 2.723396400553761, "grad_norm": 0.2456507533788681, "learning_rate": 2.5447257107182155e-07, "loss": 0.3583, "step": 4918 }, { "epoch": 2.7239501615136135, "grad_norm": 0.26200056076049805, "learning_rate": 2.534582988572237e-07, "loss": 0.4197, "step": 4919 }, { "epoch": 2.7245039224734655, "grad_norm": 0.2701203227043152, "learning_rate": 2.5244599944784565e-07, "loss": 0.4078, "step": 4920 }, { "epoch": 2.725057683433318, "grad_norm": 0.26453036069869995, "learning_rate": 2.514356732644291e-07, "loss": 0.4213, "step": 4921 }, { "epoch": 2.72561144439317, "grad_norm": 0.2533872723579407, "learning_rate": 2.504273207268965e-07, "loss": 0.3909, "step": 4922 }, { "epoch": 2.7261652053530225, "grad_norm": 0.2563759982585907, "learning_rate": 2.4942094225434975e-07, "loss": 0.4, "step": 4923 }, { "epoch": 2.726718966312875, "grad_norm": 0.26903772354125977, "learning_rate": 2.4841653826506883e-07, "loss": 0.4026, "step": 4924 }, { "epoch": 2.7272727272727275, "grad_norm": 0.2790171802043915, "learning_rate": 2.474141091765148e-07, "loss": 0.4234, "step": 4925 }, { "epoch": 2.7278264882325796, "grad_norm": 0.2992580831050873, "learning_rate": 2.4641365540532704e-07, "loss": 0.4509, "step": 4926 }, { "epoch": 2.728380249192432, "grad_norm": 0.24914619326591492, "learning_rate": 2.454151773673252e-07, "loss": 0.4016, "step": 4927 }, { "epoch": 2.728934010152284, "grad_norm": 0.24967677891254425, "learning_rate": 2.444186754775052e-07, "loss": 0.3635, "step": 4928 }, { "epoch": 2.7294877711121366, "grad_norm": 0.253267377614975, "learning_rate": 2.4342415015004396e-07, "loss": 0.4093, "step": 4929 }, { "epoch": 2.730041532071989, "grad_norm": 0.2514583468437195, "learning_rate": 2.4243160179829585e-07, "loss": 0.3977, "step": 4930 }, { "epoch": 2.730595293031841, "grad_norm": 0.2769954800605774, "learning_rate": 2.4144103083479364e-07, "loss": 0.4097, "step": 4931 }, { "epoch": 2.7311490539916936, "grad_norm": 0.2616341710090637, "learning_rate": 2.4045243767124916e-07, "loss": 0.4103, "step": 4932 }, { "epoch": 2.7317028149515457, "grad_norm": 0.24560078978538513, "learning_rate": 2.3946582271855033e-07, "loss": 0.3686, "step": 4933 }, { "epoch": 2.732256575911398, "grad_norm": 0.24541142582893372, "learning_rate": 2.384811863867642e-07, "loss": 0.3654, "step": 4934 }, { "epoch": 2.7328103368712506, "grad_norm": 0.2797073721885681, "learning_rate": 2.3749852908513503e-07, "loss": 0.4327, "step": 4935 }, { "epoch": 2.733364097831103, "grad_norm": 0.23697729408740997, "learning_rate": 2.365178512220867e-07, "loss": 0.3434, "step": 4936 }, { "epoch": 2.733917858790955, "grad_norm": 0.2798789441585541, "learning_rate": 2.3553915320521591e-07, "loss": 0.4338, "step": 4937 }, { "epoch": 2.7344716197508077, "grad_norm": 0.25640588998794556, "learning_rate": 2.3456243544129953e-07, "loss": 0.3682, "step": 4938 }, { "epoch": 2.7350253807106597, "grad_norm": 0.26423606276512146, "learning_rate": 2.3358769833629114e-07, "loss": 0.3977, "step": 4939 }, { "epoch": 2.735579141670512, "grad_norm": 0.24796538054943085, "learning_rate": 2.3261494229532055e-07, "loss": 0.3404, "step": 4940 }, { "epoch": 2.7361329026303647, "grad_norm": 0.2676283121109009, "learning_rate": 2.3164416772269495e-07, "loss": 0.4358, "step": 4941 }, { "epoch": 2.7366866635902167, "grad_norm": 0.26006394624710083, "learning_rate": 2.3067537502189596e-07, "loss": 0.3978, "step": 4942 }, { "epoch": 2.737240424550069, "grad_norm": 0.26339539885520935, "learning_rate": 2.2970856459558488e-07, "loss": 0.3703, "step": 4943 }, { "epoch": 2.7377941855099217, "grad_norm": 0.2587943375110626, "learning_rate": 2.2874373684559526e-07, "loss": 0.4176, "step": 4944 }, { "epoch": 2.7383479464697738, "grad_norm": 0.26445919275283813, "learning_rate": 2.2778089217293964e-07, "loss": 0.3979, "step": 4945 }, { "epoch": 2.7389017074296262, "grad_norm": 0.2589643597602844, "learning_rate": 2.2682003097780514e-07, "loss": 0.4258, "step": 4946 }, { "epoch": 2.7394554683894787, "grad_norm": 0.23986683785915375, "learning_rate": 2.2586115365955397e-07, "loss": 0.3472, "step": 4947 }, { "epoch": 2.7400092293493308, "grad_norm": 0.26756060123443604, "learning_rate": 2.2490426061672453e-07, "loss": 0.4448, "step": 4948 }, { "epoch": 2.7405629903091833, "grad_norm": 0.2665136456489563, "learning_rate": 2.239493522470304e-07, "loss": 0.398, "step": 4949 }, { "epoch": 2.7411167512690353, "grad_norm": 0.252077579498291, "learning_rate": 2.2299642894735962e-07, "loss": 0.3771, "step": 4950 }, { "epoch": 2.741670512228888, "grad_norm": 0.2504142224788666, "learning_rate": 2.2204549111377705e-07, "loss": 0.4067, "step": 4951 }, { "epoch": 2.7422242731887403, "grad_norm": 0.2706136703491211, "learning_rate": 2.2109653914151986e-07, "loss": 0.3993, "step": 4952 }, { "epoch": 2.742778034148593, "grad_norm": 0.25663214921951294, "learning_rate": 2.2014957342500088e-07, "loss": 0.3922, "step": 4953 }, { "epoch": 2.743331795108445, "grad_norm": 0.24605193734169006, "learning_rate": 2.1920459435780804e-07, "loss": 0.3679, "step": 4954 }, { "epoch": 2.7438855560682973, "grad_norm": 0.26981961727142334, "learning_rate": 2.1826160233270333e-07, "loss": 0.4368, "step": 4955 }, { "epoch": 2.7444393170281494, "grad_norm": 0.24405398964881897, "learning_rate": 2.1732059774162206e-07, "loss": 0.3653, "step": 4956 }, { "epoch": 2.744993077988002, "grad_norm": 0.25069135427474976, "learning_rate": 2.1638158097567364e-07, "loss": 0.3986, "step": 4957 }, { "epoch": 2.7455468389478543, "grad_norm": 0.28167620301246643, "learning_rate": 2.1544455242514195e-07, "loss": 0.4437, "step": 4958 }, { "epoch": 2.7461005999077064, "grad_norm": 0.26140618324279785, "learning_rate": 2.1450951247948437e-07, "loss": 0.3954, "step": 4959 }, { "epoch": 2.746654360867559, "grad_norm": 0.260381281375885, "learning_rate": 2.1357646152733115e-07, "loss": 0.383, "step": 4960 }, { "epoch": 2.747208121827411, "grad_norm": 0.27780744433403015, "learning_rate": 2.1264539995648702e-07, "loss": 0.3916, "step": 4961 }, { "epoch": 2.7477618827872634, "grad_norm": 0.27338293194770813, "learning_rate": 2.1171632815392805e-07, "loss": 0.422, "step": 4962 }, { "epoch": 2.748315643747116, "grad_norm": 0.25061702728271484, "learning_rate": 2.1078924650580645e-07, "loss": 0.38, "step": 4963 }, { "epoch": 2.7488694047069684, "grad_norm": 0.2640968859195709, "learning_rate": 2.0986415539744177e-07, "loss": 0.4005, "step": 4964 }, { "epoch": 2.7494231656668204, "grad_norm": 0.27090147137641907, "learning_rate": 2.0894105521333253e-07, "loss": 0.4101, "step": 4965 }, { "epoch": 2.749976926626673, "grad_norm": 0.2647792398929596, "learning_rate": 2.0801994633714574e-07, "loss": 0.4106, "step": 4966 }, { "epoch": 2.750530687586525, "grad_norm": 0.25026053190231323, "learning_rate": 2.071008291517218e-07, "loss": 0.3823, "step": 4967 }, { "epoch": 2.7510844485463775, "grad_norm": 0.2533360719680786, "learning_rate": 2.061837040390746e-07, "loss": 0.4237, "step": 4968 }, { "epoch": 2.75163820950623, "grad_norm": 0.24911342561244965, "learning_rate": 2.0526857138038703e-07, "loss": 0.3624, "step": 4969 }, { "epoch": 2.752191970466082, "grad_norm": 0.25712066888809204, "learning_rate": 2.04355431556017e-07, "loss": 0.4475, "step": 4970 }, { "epoch": 2.7527457314259345, "grad_norm": 0.24064067006111145, "learning_rate": 2.0344428494549206e-07, "loss": 0.3998, "step": 4971 }, { "epoch": 2.7532994923857865, "grad_norm": 0.2702878713607788, "learning_rate": 2.0253513192751374e-07, "loss": 0.4272, "step": 4972 }, { "epoch": 2.753853253345639, "grad_norm": 0.2444780170917511, "learning_rate": 2.0162797287995083e-07, "loss": 0.3834, "step": 4973 }, { "epoch": 2.7544070143054915, "grad_norm": 0.25765061378479004, "learning_rate": 2.007228081798468e-07, "loss": 0.3996, "step": 4974 }, { "epoch": 2.754960775265344, "grad_norm": 0.2614576518535614, "learning_rate": 1.9981963820341622e-07, "loss": 0.3809, "step": 4975 }, { "epoch": 2.755514536225196, "grad_norm": 0.285338431596756, "learning_rate": 1.989184633260438e-07, "loss": 0.4394, "step": 4976 }, { "epoch": 2.7560682971850485, "grad_norm": 0.25181227922439575, "learning_rate": 1.9801928392228332e-07, "loss": 0.3541, "step": 4977 }, { "epoch": 2.7566220581449006, "grad_norm": 0.2761199176311493, "learning_rate": 1.9712210036586077e-07, "loss": 0.4252, "step": 4978 }, { "epoch": 2.757175819104753, "grad_norm": 0.2568990886211395, "learning_rate": 1.96226913029674e-07, "loss": 0.4347, "step": 4979 }, { "epoch": 2.7577295800646056, "grad_norm": 0.25365403294563293, "learning_rate": 1.9533372228578872e-07, "loss": 0.3726, "step": 4980 }, { "epoch": 2.758283341024458, "grad_norm": 0.2833214998245239, "learning_rate": 1.9444252850544298e-07, "loss": 0.4286, "step": 4981 }, { "epoch": 2.75883710198431, "grad_norm": 0.25328555703163147, "learning_rate": 1.935533320590427e-07, "loss": 0.385, "step": 4982 }, { "epoch": 2.7593908629441626, "grad_norm": 0.24779801070690155, "learning_rate": 1.9266613331616335e-07, "loss": 0.3632, "step": 4983 }, { "epoch": 2.7599446239040146, "grad_norm": 0.2635747194290161, "learning_rate": 1.9178093264555386e-07, "loss": 0.3932, "step": 4984 }, { "epoch": 2.760498384863867, "grad_norm": 0.25637286901474, "learning_rate": 1.9089773041512938e-07, "loss": 0.4023, "step": 4985 }, { "epoch": 2.7610521458237196, "grad_norm": 0.26658767461776733, "learning_rate": 1.9001652699197458e-07, "loss": 0.4289, "step": 4986 }, { "epoch": 2.7616059067835717, "grad_norm": 0.2880879044532776, "learning_rate": 1.8913732274234432e-07, "loss": 0.4165, "step": 4987 }, { "epoch": 2.762159667743424, "grad_norm": 0.2563074827194214, "learning_rate": 1.8826011803166177e-07, "loss": 0.3704, "step": 4988 }, { "epoch": 2.762713428703276, "grad_norm": 0.2881113588809967, "learning_rate": 1.8738491322452036e-07, "loss": 0.4135, "step": 4989 }, { "epoch": 2.7632671896631287, "grad_norm": 0.27584874629974365, "learning_rate": 1.8651170868468238e-07, "loss": 0.4249, "step": 4990 }, { "epoch": 2.763820950622981, "grad_norm": 0.25749605894088745, "learning_rate": 1.8564050477507645e-07, "loss": 0.3685, "step": 4991 }, { "epoch": 2.7643747115828337, "grad_norm": 0.2746692895889282, "learning_rate": 1.8477130185780068e-07, "loss": 0.4151, "step": 4992 }, { "epoch": 2.7649284725426857, "grad_norm": 0.24172277748584747, "learning_rate": 1.8390410029412276e-07, "loss": 0.3719, "step": 4993 }, { "epoch": 2.765482233502538, "grad_norm": 0.2786256670951843, "learning_rate": 1.8303890044447825e-07, "loss": 0.4006, "step": 4994 }, { "epoch": 2.7660359944623902, "grad_norm": 0.25809693336486816, "learning_rate": 1.8217570266846896e-07, "loss": 0.3829, "step": 4995 }, { "epoch": 2.7665897554222427, "grad_norm": 0.2778603732585907, "learning_rate": 1.8131450732486622e-07, "loss": 0.4429, "step": 4996 }, { "epoch": 2.767143516382095, "grad_norm": 0.2504754662513733, "learning_rate": 1.8045531477160817e-07, "loss": 0.3913, "step": 4997 }, { "epoch": 2.7676972773419473, "grad_norm": 0.24554158747196198, "learning_rate": 1.79598125365803e-07, "loss": 0.3807, "step": 4998 }, { "epoch": 2.7682510383017997, "grad_norm": 0.2792815864086151, "learning_rate": 1.7874293946372245e-07, "loss": 0.4407, "step": 4999 }, { "epoch": 2.768804799261652, "grad_norm": 0.2738465964794159, "learning_rate": 1.7788975742080772e-07, "loss": 0.4071, "step": 5000 }, { "epoch": 2.7693585602215043, "grad_norm": 0.24764381349086761, "learning_rate": 1.7703857959166682e-07, "loss": 0.3737, "step": 5001 }, { "epoch": 2.7699123211813568, "grad_norm": 0.26550039649009705, "learning_rate": 1.761894063300751e-07, "loss": 0.4102, "step": 5002 }, { "epoch": 2.7704660821412093, "grad_norm": 0.2570869028568268, "learning_rate": 1.753422379889752e-07, "loss": 0.389, "step": 5003 }, { "epoch": 2.7710198431010613, "grad_norm": 0.2571440637111664, "learning_rate": 1.7449707492047497e-07, "loss": 0.3864, "step": 5004 }, { "epoch": 2.771573604060914, "grad_norm": 0.27153924107551575, "learning_rate": 1.736539174758489e-07, "loss": 0.4593, "step": 5005 }, { "epoch": 2.772127365020766, "grad_norm": 0.25568434596061707, "learning_rate": 1.728127660055401e-07, "loss": 0.4048, "step": 5006 }, { "epoch": 2.7726811259806183, "grad_norm": 0.24015021324157715, "learning_rate": 1.7197362085915492e-07, "loss": 0.3864, "step": 5007 }, { "epoch": 2.773234886940471, "grad_norm": 0.27968665957450867, "learning_rate": 1.7113648238546776e-07, "loss": 0.4058, "step": 5008 }, { "epoch": 2.773788647900323, "grad_norm": 0.26498982310295105, "learning_rate": 1.7030135093241862e-07, "loss": 0.3639, "step": 5009 }, { "epoch": 2.7743424088601754, "grad_norm": 0.2590166926383972, "learning_rate": 1.6946822684711317e-07, "loss": 0.384, "step": 5010 }, { "epoch": 2.774896169820028, "grad_norm": 0.268111914396286, "learning_rate": 1.6863711047582277e-07, "loss": 0.4414, "step": 5011 }, { "epoch": 2.77544993077988, "grad_norm": 0.2647212743759155, "learning_rate": 1.678080021639844e-07, "loss": 0.4124, "step": 5012 }, { "epoch": 2.7760036917397324, "grad_norm": 0.2521960437297821, "learning_rate": 1.6698090225620022e-07, "loss": 0.3496, "step": 5013 }, { "epoch": 2.776557452699585, "grad_norm": 0.26635226607322693, "learning_rate": 1.6615581109623803e-07, "loss": 0.3989, "step": 5014 }, { "epoch": 2.777111213659437, "grad_norm": 0.26789361238479614, "learning_rate": 1.6533272902703013e-07, "loss": 0.3852, "step": 5015 }, { "epoch": 2.7776649746192894, "grad_norm": 0.2540515065193176, "learning_rate": 1.6451165639067458e-07, "loss": 0.3662, "step": 5016 }, { "epoch": 2.7782187355791415, "grad_norm": 0.25540080666542053, "learning_rate": 1.6369259352843335e-07, "loss": 0.41, "step": 5017 }, { "epoch": 2.778772496538994, "grad_norm": 0.25324657559394836, "learning_rate": 1.6287554078073365e-07, "loss": 0.4093, "step": 5018 }, { "epoch": 2.7793262574988464, "grad_norm": 0.2650192975997925, "learning_rate": 1.6206049848716765e-07, "loss": 0.3809, "step": 5019 }, { "epoch": 2.779880018458699, "grad_norm": 0.2710305452346802, "learning_rate": 1.6124746698649053e-07, "loss": 0.4183, "step": 5020 }, { "epoch": 2.780433779418551, "grad_norm": 0.25854557752609253, "learning_rate": 1.6043644661662304e-07, "loss": 0.4013, "step": 5021 }, { "epoch": 2.7809875403784035, "grad_norm": 0.26860398054122925, "learning_rate": 1.5962743771465006e-07, "loss": 0.4104, "step": 5022 }, { "epoch": 2.7815413013382555, "grad_norm": 0.25848543643951416, "learning_rate": 1.5882044061681924e-07, "loss": 0.377, "step": 5023 }, { "epoch": 2.782095062298108, "grad_norm": 0.26225030422210693, "learning_rate": 1.5801545565854338e-07, "loss": 0.391, "step": 5024 }, { "epoch": 2.7826488232579605, "grad_norm": 0.27720701694488525, "learning_rate": 1.5721248317439762e-07, "loss": 0.4118, "step": 5025 }, { "epoch": 2.7832025842178125, "grad_norm": 0.2651831805706024, "learning_rate": 1.5641152349812226e-07, "loss": 0.4007, "step": 5026 }, { "epoch": 2.783756345177665, "grad_norm": 0.2583303451538086, "learning_rate": 1.5561257696261933e-07, "loss": 0.3721, "step": 5027 }, { "epoch": 2.784310106137517, "grad_norm": 0.2766134440898895, "learning_rate": 1.5481564389995597e-07, "loss": 0.4198, "step": 5028 }, { "epoch": 2.7848638670973695, "grad_norm": 0.24750743806362152, "learning_rate": 1.540207246413611e-07, "loss": 0.4025, "step": 5029 }, { "epoch": 2.785417628057222, "grad_norm": 0.2683151662349701, "learning_rate": 1.532278195172271e-07, "loss": 0.4121, "step": 5030 }, { "epoch": 2.7859713890170745, "grad_norm": 0.2749336063861847, "learning_rate": 1.524369288571076e-07, "loss": 0.3948, "step": 5031 }, { "epoch": 2.7865251499769266, "grad_norm": 0.2742990255355835, "learning_rate": 1.5164805298972297e-07, "loss": 0.4032, "step": 5032 }, { "epoch": 2.787078910936779, "grad_norm": 0.25155535340309143, "learning_rate": 1.5086119224295258e-07, "loss": 0.3928, "step": 5033 }, { "epoch": 2.787632671896631, "grad_norm": 0.2552608549594879, "learning_rate": 1.5007634694383932e-07, "loss": 0.3743, "step": 5034 }, { "epoch": 2.7881864328564836, "grad_norm": 0.23969599604606628, "learning_rate": 1.492935174185889e-07, "loss": 0.3992, "step": 5035 }, { "epoch": 2.788740193816336, "grad_norm": 0.25388872623443604, "learning_rate": 1.4851270399256767e-07, "loss": 0.3554, "step": 5036 }, { "epoch": 2.789293954776188, "grad_norm": 0.26303255558013916, "learning_rate": 1.477339069903061e-07, "loss": 0.4319, "step": 5037 }, { "epoch": 2.7898477157360406, "grad_norm": 0.2572634220123291, "learning_rate": 1.4695712673549522e-07, "loss": 0.4383, "step": 5038 }, { "epoch": 2.7904014766958927, "grad_norm": 0.26042795181274414, "learning_rate": 1.4618236355098958e-07, "loss": 0.3804, "step": 5039 }, { "epoch": 2.790955237655745, "grad_norm": 0.2588919699192047, "learning_rate": 1.4540961775880104e-07, "loss": 0.393, "step": 5040 }, { "epoch": 2.7915089986155976, "grad_norm": 0.259524941444397, "learning_rate": 1.4463888968010874e-07, "loss": 0.3942, "step": 5041 }, { "epoch": 2.79206275957545, "grad_norm": 0.263653963804245, "learning_rate": 1.438701796352493e-07, "loss": 0.4141, "step": 5042 }, { "epoch": 2.792616520535302, "grad_norm": 0.2593684494495392, "learning_rate": 1.4310348794372208e-07, "loss": 0.4066, "step": 5043 }, { "epoch": 2.7931702814951547, "grad_norm": 0.2636902630329132, "learning_rate": 1.423388149241861e-07, "loss": 0.4334, "step": 5044 }, { "epoch": 2.7937240424550067, "grad_norm": 0.25285935401916504, "learning_rate": 1.4157616089446324e-07, "loss": 0.3562, "step": 5045 }, { "epoch": 2.794277803414859, "grad_norm": 0.2652846872806549, "learning_rate": 1.4081552617153548e-07, "loss": 0.3864, "step": 5046 }, { "epoch": 2.7948315643747117, "grad_norm": 0.26638075709342957, "learning_rate": 1.400569110715455e-07, "loss": 0.3929, "step": 5047 }, { "epoch": 2.795385325334564, "grad_norm": 0.2512982487678528, "learning_rate": 1.393003159097972e-07, "loss": 0.4036, "step": 5048 }, { "epoch": 2.7959390862944162, "grad_norm": 0.2685050070285797, "learning_rate": 1.3854574100075237e-07, "loss": 0.3929, "step": 5049 }, { "epoch": 2.7964928472542687, "grad_norm": 0.2802902162075043, "learning_rate": 1.3779318665803566e-07, "loss": 0.4059, "step": 5050 }, { "epoch": 2.7970466082141208, "grad_norm": 0.2518865466117859, "learning_rate": 1.370426531944319e-07, "loss": 0.3945, "step": 5051 }, { "epoch": 2.7976003691739733, "grad_norm": 0.25441259145736694, "learning_rate": 1.3629414092188543e-07, "loss": 0.435, "step": 5052 }, { "epoch": 2.7981541301338257, "grad_norm": 0.2523966431617737, "learning_rate": 1.3554765015149962e-07, "loss": 0.407, "step": 5053 }, { "epoch": 2.798707891093678, "grad_norm": 0.25751811265945435, "learning_rate": 1.3480318119353908e-07, "loss": 0.4288, "step": 5054 }, { "epoch": 2.7992616520535303, "grad_norm": 0.2435324341058731, "learning_rate": 1.340607343574263e-07, "loss": 0.3609, "step": 5055 }, { "epoch": 2.7998154130133823, "grad_norm": 0.2765052318572998, "learning_rate": 1.3332030995174662e-07, "loss": 0.4655, "step": 5056 }, { "epoch": 2.800369173973235, "grad_norm": 0.25011101365089417, "learning_rate": 1.3258190828424112e-07, "loss": 0.3602, "step": 5057 }, { "epoch": 2.8009229349330873, "grad_norm": 0.26073122024536133, "learning_rate": 1.3184552966181153e-07, "loss": 0.4066, "step": 5058 }, { "epoch": 2.80147669589294, "grad_norm": 0.2590304911136627, "learning_rate": 1.3111117439051968e-07, "loss": 0.3955, "step": 5059 }, { "epoch": 2.802030456852792, "grad_norm": 0.26639360189437866, "learning_rate": 1.3037884277558478e-07, "loss": 0.4247, "step": 5060 }, { "epoch": 2.8025842178126443, "grad_norm": 0.25083160400390625, "learning_rate": 1.2964853512138719e-07, "loss": 0.3585, "step": 5061 }, { "epoch": 2.8031379787724964, "grad_norm": 0.27158114314079285, "learning_rate": 1.2892025173146362e-07, "loss": 0.4171, "step": 5062 }, { "epoch": 2.803691739732349, "grad_norm": 0.2440040558576584, "learning_rate": 1.2819399290851074e-07, "loss": 0.3799, "step": 5063 }, { "epoch": 2.8042455006922014, "grad_norm": 0.26363804936408997, "learning_rate": 1.2746975895438274e-07, "loss": 0.409, "step": 5064 }, { "epoch": 2.8047992616520534, "grad_norm": 0.2799742817878723, "learning_rate": 1.2674755017009544e-07, "loss": 0.4222, "step": 5065 }, { "epoch": 2.805353022611906, "grad_norm": 0.2626654803752899, "learning_rate": 1.2602736685581819e-07, "loss": 0.3971, "step": 5066 }, { "epoch": 2.805906783571758, "grad_norm": 0.27311626076698303, "learning_rate": 1.25309209310881e-07, "loss": 0.4271, "step": 5067 }, { "epoch": 2.8064605445316104, "grad_norm": 0.2692154049873352, "learning_rate": 1.2459307783377283e-07, "loss": 0.4067, "step": 5068 }, { "epoch": 2.807014305491463, "grad_norm": 0.24925391376018524, "learning_rate": 1.2387897272213788e-07, "loss": 0.3981, "step": 5069 }, { "epoch": 2.8075680664513154, "grad_norm": 0.2687492370605469, "learning_rate": 1.231668942727815e-07, "loss": 0.416, "step": 5070 }, { "epoch": 2.8081218274111674, "grad_norm": 0.25597208738327026, "learning_rate": 1.2245684278166358e-07, "loss": 0.4069, "step": 5071 }, { "epoch": 2.80867558837102, "grad_norm": 0.2504473626613617, "learning_rate": 1.2174881854390364e-07, "loss": 0.3972, "step": 5072 }, { "epoch": 2.809229349330872, "grad_norm": 0.26425105333328247, "learning_rate": 1.210428218537768e-07, "loss": 0.4286, "step": 5073 }, { "epoch": 2.8097831102907245, "grad_norm": 0.26164761185646057, "learning_rate": 1.2033885300471726e-07, "loss": 0.396, "step": 5074 }, { "epoch": 2.810336871250577, "grad_norm": 0.2624509632587433, "learning_rate": 1.196369122893154e-07, "loss": 0.3899, "step": 5075 }, { "epoch": 2.810890632210429, "grad_norm": 0.2759982943534851, "learning_rate": 1.189369999993184e-07, "loss": 0.3979, "step": 5076 }, { "epoch": 2.8114443931702815, "grad_norm": 0.2618606984615326, "learning_rate": 1.182391164256319e-07, "loss": 0.4095, "step": 5077 }, { "epoch": 2.811998154130134, "grad_norm": 0.25739288330078125, "learning_rate": 1.175432618583161e-07, "loss": 0.3787, "step": 5078 }, { "epoch": 2.812551915089986, "grad_norm": 0.28251633048057556, "learning_rate": 1.1684943658658965e-07, "loss": 0.4192, "step": 5079 }, { "epoch": 2.8131056760498385, "grad_norm": 0.28444036841392517, "learning_rate": 1.1615764089882742e-07, "loss": 0.4032, "step": 5080 }, { "epoch": 2.813659437009691, "grad_norm": 0.28151336312294006, "learning_rate": 1.1546787508255997e-07, "loss": 0.4038, "step": 5081 }, { "epoch": 2.814213197969543, "grad_norm": 0.2699057459831238, "learning_rate": 1.1478013942447464e-07, "loss": 0.4243, "step": 5082 }, { "epoch": 2.8147669589293955, "grad_norm": 0.2344837784767151, "learning_rate": 1.1409443421041555e-07, "loss": 0.3496, "step": 5083 }, { "epoch": 2.8153207198892476, "grad_norm": 0.2516562044620514, "learning_rate": 1.1341075972538196e-07, "loss": 0.4136, "step": 5084 }, { "epoch": 2.8158744808491, "grad_norm": 0.25078192353248596, "learning_rate": 1.1272911625352934e-07, "loss": 0.4053, "step": 5085 }, { "epoch": 2.8164282418089526, "grad_norm": 0.2577981650829315, "learning_rate": 1.1204950407816939e-07, "loss": 0.4051, "step": 5086 }, { "epoch": 2.816982002768805, "grad_norm": 0.2765219807624817, "learning_rate": 1.1137192348176951e-07, "loss": 0.4101, "step": 5087 }, { "epoch": 2.817535763728657, "grad_norm": 0.2640921473503113, "learning_rate": 1.1069637474595219e-07, "loss": 0.4117, "step": 5088 }, { "epoch": 2.8180895246885096, "grad_norm": 0.25555065274238586, "learning_rate": 1.1002285815149616e-07, "loss": 0.395, "step": 5089 }, { "epoch": 2.8186432856483616, "grad_norm": 0.25520530343055725, "learning_rate": 1.0935137397833528e-07, "loss": 0.3673, "step": 5090 }, { "epoch": 2.819197046608214, "grad_norm": 0.2626339793205261, "learning_rate": 1.0868192250555798e-07, "loss": 0.4389, "step": 5091 }, { "epoch": 2.8197508075680666, "grad_norm": 0.2452274113893509, "learning_rate": 1.0801450401140833e-07, "loss": 0.381, "step": 5092 }, { "epoch": 2.8203045685279187, "grad_norm": 0.24455946683883667, "learning_rate": 1.0734911877328558e-07, "loss": 0.3785, "step": 5093 }, { "epoch": 2.820858329487771, "grad_norm": 0.26370903849601746, "learning_rate": 1.0668576706774458e-07, "loss": 0.4459, "step": 5094 }, { "epoch": 2.821412090447623, "grad_norm": 0.23782028257846832, "learning_rate": 1.0602444917049315e-07, "loss": 0.3761, "step": 5095 }, { "epoch": 2.8219658514074757, "grad_norm": 0.27731800079345703, "learning_rate": 1.0536516535639529e-07, "loss": 0.4508, "step": 5096 }, { "epoch": 2.822519612367328, "grad_norm": 0.24080510437488556, "learning_rate": 1.0470791589947016e-07, "loss": 0.369, "step": 5097 }, { "epoch": 2.8230733733271807, "grad_norm": 0.27247360348701477, "learning_rate": 1.0405270107288812e-07, "loss": 0.4502, "step": 5098 }, { "epoch": 2.8236271342870327, "grad_norm": 0.25533631443977356, "learning_rate": 1.03399521148978e-07, "loss": 0.3729, "step": 5099 }, { "epoch": 2.824180895246885, "grad_norm": 0.27161645889282227, "learning_rate": 1.0274837639921986e-07, "loss": 0.4025, "step": 5100 }, { "epoch": 2.8247346562067372, "grad_norm": 0.2673325836658478, "learning_rate": 1.0209926709425056e-07, "loss": 0.3943, "step": 5101 }, { "epoch": 2.8252884171665897, "grad_norm": 0.26048240065574646, "learning_rate": 1.014521935038576e-07, "loss": 0.4057, "step": 5102 }, { "epoch": 2.8258421781264422, "grad_norm": 0.2514619827270508, "learning_rate": 1.0080715589698476e-07, "loss": 0.4074, "step": 5103 }, { "epoch": 2.8263959390862943, "grad_norm": 0.24946245551109314, "learning_rate": 1.0016415454172978e-07, "loss": 0.3648, "step": 5104 }, { "epoch": 2.8269497000461468, "grad_norm": 0.2671642005443573, "learning_rate": 9.952318970534225e-08, "loss": 0.4387, "step": 5105 }, { "epoch": 2.827503461005999, "grad_norm": 0.26887041330337524, "learning_rate": 9.888426165422793e-08, "loss": 0.436, "step": 5106 }, { "epoch": 2.8280572219658513, "grad_norm": 0.25460541248321533, "learning_rate": 9.824737065394219e-08, "loss": 0.3926, "step": 5107 }, { "epoch": 2.828610982925704, "grad_norm": 0.2578769028186798, "learning_rate": 9.761251696919827e-08, "loss": 0.3982, "step": 5108 }, { "epoch": 2.8291647438855563, "grad_norm": 0.2698090672492981, "learning_rate": 9.697970086385899e-08, "loss": 0.4113, "step": 5109 }, { "epoch": 2.8297185048454083, "grad_norm": 0.24238666892051697, "learning_rate": 9.634892260094286e-08, "loss": 0.3863, "step": 5110 }, { "epoch": 2.830272265805261, "grad_norm": 0.27395471930503845, "learning_rate": 9.572018244261905e-08, "loss": 0.4428, "step": 5111 }, { "epoch": 2.830826026765113, "grad_norm": 0.26052239537239075, "learning_rate": 9.509348065021129e-08, "loss": 0.3912, "step": 5112 }, { "epoch": 2.8313797877249653, "grad_norm": 0.24959491193294525, "learning_rate": 9.446881748419623e-08, "loss": 0.4078, "step": 5113 }, { "epoch": 2.831933548684818, "grad_norm": 0.25951647758483887, "learning_rate": 9.38461932042023e-08, "loss": 0.4223, "step": 5114 }, { "epoch": 2.8324873096446703, "grad_norm": 0.2664281129837036, "learning_rate": 9.322560806901082e-08, "loss": 0.3912, "step": 5115 }, { "epoch": 2.8330410706045224, "grad_norm": 0.2620655298233032, "learning_rate": 9.260706233655493e-08, "loss": 0.4261, "step": 5116 }, { "epoch": 2.833594831564375, "grad_norm": 0.2635103762149811, "learning_rate": 9.199055626392173e-08, "loss": 0.394, "step": 5117 }, { "epoch": 2.834148592524227, "grad_norm": 0.2829095125198364, "learning_rate": 9.137609010734904e-08, "loss": 0.4511, "step": 5118 }, { "epoch": 2.8347023534840794, "grad_norm": 0.24980193376541138, "learning_rate": 9.07636641222287e-08, "loss": 0.3775, "step": 5119 }, { "epoch": 2.835256114443932, "grad_norm": 0.24034661054611206, "learning_rate": 9.015327856310152e-08, "loss": 0.3454, "step": 5120 }, { "epoch": 2.835809875403784, "grad_norm": 0.2746758759021759, "learning_rate": 8.95449336836629e-08, "loss": 0.4876, "step": 5121 }, { "epoch": 2.8363636363636364, "grad_norm": 0.23238864541053772, "learning_rate": 8.893862973675894e-08, "loss": 0.3447, "step": 5122 }, { "epoch": 2.8369173973234885, "grad_norm": 0.2708374857902527, "learning_rate": 8.833436697438858e-08, "loss": 0.3882, "step": 5123 }, { "epoch": 2.837471158283341, "grad_norm": 0.25207003951072693, "learning_rate": 8.773214564770094e-08, "loss": 0.3687, "step": 5124 }, { "epoch": 2.8380249192431934, "grad_norm": 0.2549060583114624, "learning_rate": 8.713196600699747e-08, "loss": 0.399, "step": 5125 }, { "epoch": 2.838578680203046, "grad_norm": 0.25930365920066833, "learning_rate": 8.65338283017303e-08, "loss": 0.4003, "step": 5126 }, { "epoch": 2.839132441162898, "grad_norm": 0.27247679233551025, "learning_rate": 8.593773278050443e-08, "loss": 0.4453, "step": 5127 }, { "epoch": 2.8396862021227505, "grad_norm": 0.2391788512468338, "learning_rate": 8.534367969107449e-08, "loss": 0.4059, "step": 5128 }, { "epoch": 2.8402399630826025, "grad_norm": 0.24185200035572052, "learning_rate": 8.475166928034684e-08, "loss": 0.3761, "step": 5129 }, { "epoch": 2.840793724042455, "grad_norm": 0.2535075843334198, "learning_rate": 8.416170179437799e-08, "loss": 0.3992, "step": 5130 }, { "epoch": 2.8413474850023075, "grad_norm": 0.25260624289512634, "learning_rate": 8.357377747837736e-08, "loss": 0.3933, "step": 5131 }, { "epoch": 2.8419012459621595, "grad_norm": 0.261059433221817, "learning_rate": 8.298789657670337e-08, "loss": 0.4242, "step": 5132 }, { "epoch": 2.842455006922012, "grad_norm": 0.27565792202949524, "learning_rate": 8.240405933286622e-08, "loss": 0.4125, "step": 5133 }, { "epoch": 2.843008767881864, "grad_norm": 0.25285467505455017, "learning_rate": 8.182226598952514e-08, "loss": 0.3811, "step": 5134 }, { "epoch": 2.8435625288417166, "grad_norm": 0.25831013917922974, "learning_rate": 8.124251678849171e-08, "loss": 0.4213, "step": 5135 }, { "epoch": 2.844116289801569, "grad_norm": 0.2665303647518158, "learning_rate": 8.066481197072707e-08, "loss": 0.4444, "step": 5136 }, { "epoch": 2.8446700507614215, "grad_norm": 0.2505907416343689, "learning_rate": 8.008915177634246e-08, "loss": 0.3734, "step": 5137 }, { "epoch": 2.8452238117212736, "grad_norm": 0.27377352118492126, "learning_rate": 7.951553644459986e-08, "loss": 0.4011, "step": 5138 }, { "epoch": 2.845777572681126, "grad_norm": 0.2553969621658325, "learning_rate": 7.894396621391021e-08, "loss": 0.3818, "step": 5139 }, { "epoch": 2.846331333640978, "grad_norm": 0.2448127716779709, "learning_rate": 7.837444132183625e-08, "loss": 0.3822, "step": 5140 }, { "epoch": 2.8468850946008306, "grad_norm": 0.2735103368759155, "learning_rate": 7.78069620050892e-08, "loss": 0.3997, "step": 5141 }, { "epoch": 2.847438855560683, "grad_norm": 0.2648429274559021, "learning_rate": 7.724152849953037e-08, "loss": 0.3966, "step": 5142 }, { "epoch": 2.847992616520535, "grad_norm": 0.25853464007377625, "learning_rate": 7.667814104017124e-08, "loss": 0.4261, "step": 5143 }, { "epoch": 2.8485463774803876, "grad_norm": 0.2364911586046219, "learning_rate": 7.611679986117226e-08, "loss": 0.3665, "step": 5144 }, { "epoch": 2.84910013844024, "grad_norm": 0.2542654871940613, "learning_rate": 7.555750519584348e-08, "loss": 0.3889, "step": 5145 }, { "epoch": 2.849653899400092, "grad_norm": 0.26308175921440125, "learning_rate": 7.500025727664507e-08, "loss": 0.4177, "step": 5146 }, { "epoch": 2.8502076603599447, "grad_norm": 0.2687308192253113, "learning_rate": 7.444505633518562e-08, "loss": 0.4054, "step": 5147 }, { "epoch": 2.850761421319797, "grad_norm": 0.2624754309654236, "learning_rate": 7.389190260222334e-08, "loss": 0.4188, "step": 5148 }, { "epoch": 2.851315182279649, "grad_norm": 0.2590891420841217, "learning_rate": 7.3340796307666e-08, "loss": 0.3887, "step": 5149 }, { "epoch": 2.8518689432395017, "grad_norm": 0.2550787627696991, "learning_rate": 7.279173768056923e-08, "loss": 0.3967, "step": 5150 }, { "epoch": 2.8524227041993537, "grad_norm": 0.26570722460746765, "learning_rate": 7.224472694913886e-08, "loss": 0.4229, "step": 5151 }, { "epoch": 2.852976465159206, "grad_norm": 0.2544189989566803, "learning_rate": 7.169976434072856e-08, "loss": 0.4286, "step": 5152 }, { "epoch": 2.8535302261190587, "grad_norm": 0.25667718052864075, "learning_rate": 7.115685008184159e-08, "loss": 0.4301, "step": 5153 }, { "epoch": 2.854083987078911, "grad_norm": 0.26151126623153687, "learning_rate": 7.061598439812855e-08, "loss": 0.4092, "step": 5154 }, { "epoch": 2.8546377480387632, "grad_norm": 0.26739734411239624, "learning_rate": 7.00771675143902e-08, "loss": 0.3925, "step": 5155 }, { "epoch": 2.8551915089986157, "grad_norm": 0.2895129919052124, "learning_rate": 6.954039965457515e-08, "loss": 0.4452, "step": 5156 }, { "epoch": 2.855745269958468, "grad_norm": 0.2617914080619812, "learning_rate": 6.900568104177941e-08, "loss": 0.4083, "step": 5157 }, { "epoch": 2.8562990309183203, "grad_norm": 0.25639915466308594, "learning_rate": 6.847301189824907e-08, "loss": 0.3871, "step": 5158 }, { "epoch": 2.8568527918781728, "grad_norm": 0.27162668108940125, "learning_rate": 6.794239244537648e-08, "loss": 0.3963, "step": 5159 }, { "epoch": 2.857406552838025, "grad_norm": 0.26489514112472534, "learning_rate": 6.741382290370412e-08, "loss": 0.4113, "step": 5160 }, { "epoch": 2.8579603137978773, "grad_norm": 0.2554105520248413, "learning_rate": 6.688730349292017e-08, "loss": 0.3815, "step": 5161 }, { "epoch": 2.8585140747577293, "grad_norm": 0.25633418560028076, "learning_rate": 6.636283443186286e-08, "loss": 0.3904, "step": 5162 }, { "epoch": 2.859067835717582, "grad_norm": 0.25812798738479614, "learning_rate": 6.584041593851675e-08, "loss": 0.4278, "step": 5163 }, { "epoch": 2.8596215966774343, "grad_norm": 0.26276102662086487, "learning_rate": 6.532004823001481e-08, "loss": 0.3885, "step": 5164 }, { "epoch": 2.860175357637287, "grad_norm": 0.25043603777885437, "learning_rate": 6.48017315226368e-08, "loss": 0.4117, "step": 5165 }, { "epoch": 2.860729118597139, "grad_norm": 0.26660531759262085, "learning_rate": 6.428546603181208e-08, "loss": 0.4077, "step": 5166 }, { "epoch": 2.8612828795569913, "grad_norm": 0.2366938441991806, "learning_rate": 6.377125197211509e-08, "loss": 0.3484, "step": 5167 }, { "epoch": 2.8618366405168434, "grad_norm": 0.2870185375213623, "learning_rate": 6.325908955726933e-08, "loss": 0.4518, "step": 5168 }, { "epoch": 2.862390401476696, "grad_norm": 0.26398715376853943, "learning_rate": 6.274897900014343e-08, "loss": 0.3993, "step": 5169 }, { "epoch": 2.8629441624365484, "grad_norm": 0.25369778275489807, "learning_rate": 6.22409205127561e-08, "loss": 0.3844, "step": 5170 }, { "epoch": 2.8634979233964004, "grad_norm": 0.25578773021698, "learning_rate": 6.173491430627121e-08, "loss": 0.3787, "step": 5171 }, { "epoch": 2.864051684356253, "grad_norm": 0.2469591498374939, "learning_rate": 6.123096059100054e-08, "loss": 0.3687, "step": 5172 }, { "epoch": 2.864605445316105, "grad_norm": 0.2500249743461609, "learning_rate": 6.072905957640207e-08, "loss": 0.3959, "step": 5173 }, { "epoch": 2.8651592062759574, "grad_norm": 0.2614101767539978, "learning_rate": 6.022921147108007e-08, "loss": 0.4033, "step": 5174 }, { "epoch": 2.86571296723581, "grad_norm": 0.2488444596529007, "learning_rate": 5.97314164827878e-08, "loss": 0.3784, "step": 5175 }, { "epoch": 2.8662667281956624, "grad_norm": 0.2442483901977539, "learning_rate": 5.923567481842307e-08, "loss": 0.3799, "step": 5176 }, { "epoch": 2.8668204891555145, "grad_norm": 0.2795432507991791, "learning_rate": 5.874198668403164e-08, "loss": 0.4351, "step": 5177 }, { "epoch": 2.867374250115367, "grad_norm": 0.2567443251609802, "learning_rate": 5.8250352284804355e-08, "loss": 0.4112, "step": 5178 }, { "epoch": 2.867928011075219, "grad_norm": 0.2849137783050537, "learning_rate": 5.7760771825078886e-08, "loss": 0.416, "step": 5179 }, { "epoch": 2.8684817720350715, "grad_norm": 0.2575938403606415, "learning_rate": 5.727324550834079e-08, "loss": 0.3807, "step": 5180 }, { "epoch": 2.869035532994924, "grad_norm": 0.275816947221756, "learning_rate": 5.67877735372202e-08, "loss": 0.4094, "step": 5181 }, { "epoch": 2.8695892939547765, "grad_norm": 0.28357112407684326, "learning_rate": 5.6304356113493495e-08, "loss": 0.3832, "step": 5182 }, { "epoch": 2.8701430549146285, "grad_norm": 0.25089922547340393, "learning_rate": 5.582299343808329e-08, "loss": 0.3895, "step": 5183 }, { "epoch": 2.870696815874481, "grad_norm": 0.25614845752716064, "learning_rate": 5.5343685711058415e-08, "loss": 0.4019, "step": 5184 }, { "epoch": 2.871250576834333, "grad_norm": 0.2588282525539398, "learning_rate": 5.486643313163453e-08, "loss": 0.4188, "step": 5185 }, { "epoch": 2.8718043377941855, "grad_norm": 0.2623474895954132, "learning_rate": 5.439123589817186e-08, "loss": 0.3897, "step": 5186 }, { "epoch": 2.872358098754038, "grad_norm": 0.27259397506713867, "learning_rate": 5.391809420817518e-08, "loss": 0.411, "step": 5187 }, { "epoch": 2.87291185971389, "grad_norm": 0.24883954226970673, "learning_rate": 5.3447008258298295e-08, "loss": 0.386, "step": 5188 }, { "epoch": 2.8734656206737426, "grad_norm": 0.2520677149295807, "learning_rate": 5.297797824433682e-08, "loss": 0.3671, "step": 5189 }, { "epoch": 2.8740193816335946, "grad_norm": 0.2656787633895874, "learning_rate": 5.251100436123591e-08, "loss": 0.4246, "step": 5190 }, { "epoch": 2.874573142593447, "grad_norm": 0.25748056173324585, "learning_rate": 5.2046086803081985e-08, "loss": 0.4052, "step": 5191 }, { "epoch": 2.8751269035532996, "grad_norm": 0.24802373349666595, "learning_rate": 5.1583225763109924e-08, "loss": 0.3779, "step": 5192 }, { "epoch": 2.875680664513152, "grad_norm": 0.27041852474212646, "learning_rate": 5.11224214336975e-08, "loss": 0.4353, "step": 5193 }, { "epoch": 2.876234425473004, "grad_norm": 0.23863740265369415, "learning_rate": 5.0663674006369845e-08, "loss": 0.3869, "step": 5194 }, { "epoch": 2.8767881864328566, "grad_norm": 0.25671276450157166, "learning_rate": 5.02069836717961e-08, "loss": 0.4137, "step": 5195 }, { "epoch": 2.8773419473927087, "grad_norm": 0.2397291213274002, "learning_rate": 4.975235061979056e-08, "loss": 0.3587, "step": 5196 }, { "epoch": 2.877895708352561, "grad_norm": 0.26165831089019775, "learning_rate": 4.929977503931149e-08, "loss": 0.396, "step": 5197 }, { "epoch": 2.8784494693124136, "grad_norm": 0.2646643817424774, "learning_rate": 4.884925711846289e-08, "loss": 0.4138, "step": 5198 }, { "epoch": 2.8790032302722657, "grad_norm": 0.26198089122772217, "learning_rate": 4.840079704449552e-08, "loss": 0.4232, "step": 5199 }, { "epoch": 2.879556991232118, "grad_norm": 0.2483299970626831, "learning_rate": 4.7954395003800283e-08, "loss": 0.3704, "step": 5200 }, { "epoch": 2.88011075219197, "grad_norm": 0.28235477209091187, "learning_rate": 4.7510051181917094e-08, "loss": 0.4299, "step": 5201 }, { "epoch": 2.8806645131518227, "grad_norm": 0.2462083399295807, "learning_rate": 4.7067765763527115e-08, "loss": 0.3969, "step": 5202 }, { "epoch": 2.881218274111675, "grad_norm": 0.2547208368778229, "learning_rate": 4.662753893245886e-08, "loss": 0.4204, "step": 5203 }, { "epoch": 2.8817720350715277, "grad_norm": 0.25654327869415283, "learning_rate": 4.6189370871683184e-08, "loss": 0.4428, "step": 5204 }, { "epoch": 2.8823257960313797, "grad_norm": 0.2465514838695526, "learning_rate": 4.575326176331551e-08, "loss": 0.3898, "step": 5205 }, { "epoch": 2.882879556991232, "grad_norm": 0.234212264418602, "learning_rate": 4.531921178861642e-08, "loss": 0.3599, "step": 5206 }, { "epoch": 2.8834333179510843, "grad_norm": 0.2639654874801636, "learning_rate": 4.488722112798993e-08, "loss": 0.4291, "step": 5207 }, { "epoch": 2.8839870789109368, "grad_norm": 0.2557573616504669, "learning_rate": 4.4457289960984086e-08, "loss": 0.4182, "step": 5208 }, { "epoch": 2.8845408398707892, "grad_norm": 0.26723024249076843, "learning_rate": 4.402941846629205e-08, "loss": 0.4049, "step": 5209 }, { "epoch": 2.8850946008306413, "grad_norm": 0.24873842298984528, "learning_rate": 4.36036068217488e-08, "loss": 0.3778, "step": 5210 }, { "epoch": 2.8856483617904938, "grad_norm": 0.2922867238521576, "learning_rate": 4.317985520433554e-08, "loss": 0.4192, "step": 5211 }, { "epoch": 2.8862021227503463, "grad_norm": 0.25089702010154724, "learning_rate": 4.275816379017528e-08, "loss": 0.4139, "step": 5212 }, { "epoch": 2.8867558837101983, "grad_norm": 0.2640869915485382, "learning_rate": 4.23385327545367e-08, "loss": 0.4168, "step": 5213 }, { "epoch": 2.887309644670051, "grad_norm": 0.28546884655952454, "learning_rate": 4.192096227182974e-08, "loss": 0.3962, "step": 5214 }, { "epoch": 2.8878634056299033, "grad_norm": 0.2674746513366699, "learning_rate": 4.1505452515610004e-08, "loss": 0.4182, "step": 5215 }, { "epoch": 2.8884171665897553, "grad_norm": 0.23029905557632446, "learning_rate": 4.109200365857602e-08, "loss": 0.3582, "step": 5216 }, { "epoch": 2.888970927549608, "grad_norm": 0.27221837639808655, "learning_rate": 4.0680615872569216e-08, "loss": 0.4587, "step": 5217 }, { "epoch": 2.88952468850946, "grad_norm": 0.26951178908348083, "learning_rate": 4.027128932857449e-08, "loss": 0.4297, "step": 5218 }, { "epoch": 2.8900784494693124, "grad_norm": 0.2510021924972534, "learning_rate": 3.9864024196720755e-08, "loss": 0.3863, "step": 5219 }, { "epoch": 2.890632210429165, "grad_norm": 0.25796976685523987, "learning_rate": 3.9458820646278706e-08, "loss": 0.4338, "step": 5220 }, { "epoch": 2.8911859713890173, "grad_norm": 0.25225332379341125, "learning_rate": 3.9055678845664194e-08, "loss": 0.3743, "step": 5221 }, { "epoch": 2.8917397323488694, "grad_norm": 0.246939018368721, "learning_rate": 3.865459896243484e-08, "loss": 0.3556, "step": 5222 }, { "epoch": 2.892293493308722, "grad_norm": 0.2724308669567108, "learning_rate": 3.82555811632912e-08, "loss": 0.422, "step": 5223 }, { "epoch": 2.892847254268574, "grad_norm": 0.25678688287734985, "learning_rate": 3.7858625614076716e-08, "loss": 0.4095, "step": 5224 }, { "epoch": 2.8934010152284264, "grad_norm": 0.2557617127895355, "learning_rate": 3.746373247977886e-08, "loss": 0.4114, "step": 5225 }, { "epoch": 2.893954776188279, "grad_norm": 0.27286678552627563, "learning_rate": 3.707090192452634e-08, "loss": 0.4062, "step": 5226 }, { "epoch": 2.894508537148131, "grad_norm": 0.2748638093471527, "learning_rate": 3.6680134111591884e-08, "loss": 0.4034, "step": 5227 }, { "epoch": 2.8950622981079834, "grad_norm": 0.2622748017311096, "learning_rate": 3.6291429203389464e-08, "loss": 0.3801, "step": 5228 }, { "epoch": 2.8956160590678355, "grad_norm": 0.2506093680858612, "learning_rate": 3.590478736147762e-08, "loss": 0.4157, "step": 5229 }, { "epoch": 2.896169820027688, "grad_norm": 0.2506883442401886, "learning_rate": 3.552020874655559e-08, "loss": 0.4097, "step": 5230 }, { "epoch": 2.8967235809875405, "grad_norm": 0.24471871554851532, "learning_rate": 3.513769351846663e-08, "loss": 0.4141, "step": 5231 }, { "epoch": 2.897277341947393, "grad_norm": 0.25170689821243286, "learning_rate": 3.4757241836194114e-08, "loss": 0.4148, "step": 5232 }, { "epoch": 2.897831102907245, "grad_norm": 0.2577512562274933, "learning_rate": 3.437885385786599e-08, "loss": 0.3982, "step": 5233 }, { "epoch": 2.8983848638670975, "grad_norm": 0.31100741028785706, "learning_rate": 3.400252974075202e-08, "loss": 0.4037, "step": 5234 }, { "epoch": 2.8989386248269495, "grad_norm": 0.2774052321910858, "learning_rate": 3.36282696412632e-08, "loss": 0.4389, "step": 5235 }, { "epoch": 2.899492385786802, "grad_norm": 0.2402508705854416, "learning_rate": 3.325607371495343e-08, "loss": 0.3568, "step": 5236 }, { "epoch": 2.9000461467466545, "grad_norm": 0.3026539087295532, "learning_rate": 3.2885942116518966e-08, "loss": 0.4281, "step": 5237 }, { "epoch": 2.9005999077065066, "grad_norm": 0.2522125542163849, "learning_rate": 3.251787499979675e-08, "loss": 0.3876, "step": 5238 }, { "epoch": 2.901153668666359, "grad_norm": 0.2787318825721741, "learning_rate": 3.2151872517767194e-08, "loss": 0.4317, "step": 5239 }, { "epoch": 2.901707429626211, "grad_norm": 0.2786957323551178, "learning_rate": 3.178793482255249e-08, "loss": 0.417, "step": 5240 }, { "epoch": 2.9022611905860636, "grad_norm": 0.26168808341026306, "learning_rate": 3.142606206541388e-08, "loss": 0.3889, "step": 5241 }, { "epoch": 2.902814951545916, "grad_norm": 0.2642349600791931, "learning_rate": 3.106625439675881e-08, "loss": 0.4044, "step": 5242 }, { "epoch": 2.9033687125057686, "grad_norm": 0.24640585482120514, "learning_rate": 3.0708511966133224e-08, "loss": 0.3939, "step": 5243 }, { "epoch": 2.9039224734656206, "grad_norm": 0.26123398542404175, "learning_rate": 3.035283492222596e-08, "loss": 0.4212, "step": 5244 }, { "epoch": 2.904476234425473, "grad_norm": 0.2665010392665863, "learning_rate": 2.999922341286654e-08, "loss": 0.3876, "step": 5245 }, { "epoch": 2.905029995385325, "grad_norm": 0.2513507902622223, "learning_rate": 2.9647677585026292e-08, "loss": 0.4024, "step": 5246 }, { "epoch": 2.9055837563451776, "grad_norm": 0.24409541487693787, "learning_rate": 2.9298197584819445e-08, "loss": 0.4013, "step": 5247 }, { "epoch": 2.90613751730503, "grad_norm": 0.24384009838104248, "learning_rate": 2.895078355749925e-08, "loss": 0.3883, "step": 5248 }, { "epoch": 2.906691278264882, "grad_norm": 0.2562786936759949, "learning_rate": 2.8605435647461877e-08, "loss": 0.3813, "step": 5249 }, { "epoch": 2.9072450392247347, "grad_norm": 0.26798921823501587, "learning_rate": 2.8262153998244724e-08, "loss": 0.3898, "step": 5250 }, { "epoch": 2.907798800184587, "grad_norm": 0.2487436681985855, "learning_rate": 2.7920938752524772e-08, "loss": 0.3883, "step": 5251 }, { "epoch": 2.908352561144439, "grad_norm": 0.25723421573638916, "learning_rate": 2.7581790052121915e-08, "loss": 0.4337, "step": 5252 }, { "epoch": 2.9089063221042917, "grad_norm": 0.2374645620584488, "learning_rate": 2.724470803799728e-08, "loss": 0.3703, "step": 5253 }, { "epoch": 2.909460083064144, "grad_norm": 0.24825982749462128, "learning_rate": 2.6909692850251022e-08, "loss": 0.4116, "step": 5254 }, { "epoch": 2.910013844023996, "grad_norm": 0.23829872906208038, "learning_rate": 2.6576744628126204e-08, "loss": 0.3726, "step": 5255 }, { "epoch": 2.9105676049838487, "grad_norm": 0.2660926580429077, "learning_rate": 2.624586351000602e-08, "loss": 0.423, "step": 5256 }, { "epoch": 2.9111213659437007, "grad_norm": 0.24532924592494965, "learning_rate": 2.5917049633414903e-08, "loss": 0.3917, "step": 5257 }, { "epoch": 2.9116751269035532, "grad_norm": 0.25807514786720276, "learning_rate": 2.559030313501687e-08, "loss": 0.4085, "step": 5258 }, { "epoch": 2.9122288878634057, "grad_norm": 0.23748187720775604, "learning_rate": 2.5265624150618286e-08, "loss": 0.3632, "step": 5259 }, { "epoch": 2.912782648823258, "grad_norm": 0.26079368591308594, "learning_rate": 2.494301281516509e-08, "loss": 0.434, "step": 5260 }, { "epoch": 2.9133364097831103, "grad_norm": 0.25391435623168945, "learning_rate": 2.462246926274503e-08, "loss": 0.3942, "step": 5261 }, { "epoch": 2.9138901707429627, "grad_norm": 0.23999054729938507, "learning_rate": 2.4303993626584865e-08, "loss": 0.376, "step": 5262 }, { "epoch": 2.914443931702815, "grad_norm": 0.2782882750034332, "learning_rate": 2.3987586039053155e-08, "loss": 0.433, "step": 5263 }, { "epoch": 2.9149976926626673, "grad_norm": 0.2809552550315857, "learning_rate": 2.3673246631657488e-08, "loss": 0.4242, "step": 5264 }, { "epoch": 2.9155514536225198, "grad_norm": 0.2664337754249573, "learning_rate": 2.3360975535047236e-08, "loss": 0.4055, "step": 5265 }, { "epoch": 2.916105214582372, "grad_norm": 0.2549518048763275, "learning_rate": 2.3050772879012474e-08, "loss": 0.3948, "step": 5266 }, { "epoch": 2.9166589755422243, "grad_norm": 0.2505522668361664, "learning_rate": 2.274263879248173e-08, "loss": 0.3813, "step": 5267 }, { "epoch": 2.9172127365020764, "grad_norm": 0.2554278075695038, "learning_rate": 2.2436573403524786e-08, "loss": 0.3828, "step": 5268 }, { "epoch": 2.917766497461929, "grad_norm": 0.24863380193710327, "learning_rate": 2.2132576839352103e-08, "loss": 0.3669, "step": 5269 }, { "epoch": 2.9183202584217813, "grad_norm": 0.2271716594696045, "learning_rate": 2.1830649226313726e-08, "loss": 0.3474, "step": 5270 }, { "epoch": 2.918874019381634, "grad_norm": 0.25852465629577637, "learning_rate": 2.153079068989927e-08, "loss": 0.4076, "step": 5271 }, { "epoch": 2.919427780341486, "grad_norm": 0.25852763652801514, "learning_rate": 2.12330013547396e-08, "loss": 0.4069, "step": 5272 }, { "epoch": 2.9199815413013384, "grad_norm": 0.25206854939460754, "learning_rate": 2.09372813446046e-08, "loss": 0.4182, "step": 5273 }, { "epoch": 2.9205353022611904, "grad_norm": 0.24979069828987122, "learning_rate": 2.0643630782404834e-08, "loss": 0.4115, "step": 5274 }, { "epoch": 2.921089063221043, "grad_norm": 0.24938635528087616, "learning_rate": 2.0352049790189342e-08, "loss": 0.3831, "step": 5275 }, { "epoch": 2.9216428241808954, "grad_norm": 0.2612561881542206, "learning_rate": 2.006253848914841e-08, "loss": 0.4581, "step": 5276 }, { "epoch": 2.9221965851407474, "grad_norm": 0.2573609948158264, "learning_rate": 1.9775096999611888e-08, "loss": 0.4118, "step": 5277 }, { "epoch": 2.9227503461006, "grad_norm": 0.24426063895225525, "learning_rate": 1.948972544104921e-08, "loss": 0.369, "step": 5278 }, { "epoch": 2.923304107060452, "grad_norm": 0.29145756363868713, "learning_rate": 1.920642393206884e-08, "loss": 0.4349, "step": 5279 }, { "epoch": 2.9238578680203045, "grad_norm": 0.2625810503959656, "learning_rate": 1.892519259041936e-08, "loss": 0.3961, "step": 5280 }, { "epoch": 2.924411628980157, "grad_norm": 0.2545835077762604, "learning_rate": 1.8646031532989494e-08, "loss": 0.4133, "step": 5281 }, { "epoch": 2.9249653899400094, "grad_norm": 0.2522362470626831, "learning_rate": 1.836894087580643e-08, "loss": 0.3817, "step": 5282 }, { "epoch": 2.9255191508998615, "grad_norm": 0.25102564692497253, "learning_rate": 1.8093920734037486e-08, "loss": 0.3848, "step": 5283 }, { "epoch": 2.926072911859714, "grad_norm": 0.2784979045391083, "learning_rate": 1.7820971221990112e-08, "loss": 0.4365, "step": 5284 }, { "epoch": 2.926626672819566, "grad_norm": 0.25569894909858704, "learning_rate": 1.7550092453109126e-08, "loss": 0.3952, "step": 5285 }, { "epoch": 2.9271804337794185, "grad_norm": 0.25126272439956665, "learning_rate": 1.7281284539980015e-08, "loss": 0.4193, "step": 5286 }, { "epoch": 2.927734194739271, "grad_norm": 0.27386486530303955, "learning_rate": 1.701454759432841e-08, "loss": 0.4137, "step": 5287 }, { "epoch": 2.9282879556991235, "grad_norm": 0.2495892196893692, "learning_rate": 1.674988172701675e-08, "loss": 0.3819, "step": 5288 }, { "epoch": 2.9288417166589755, "grad_norm": 0.27480074763298035, "learning_rate": 1.6487287048049806e-08, "loss": 0.4537, "step": 5289 }, { "epoch": 2.929395477618828, "grad_norm": 0.25020715594291687, "learning_rate": 1.6226763666568056e-08, "loss": 0.418, "step": 5290 }, { "epoch": 2.92994923857868, "grad_norm": 0.23909002542495728, "learning_rate": 1.5968311690854333e-08, "loss": 0.3549, "step": 5291 }, { "epoch": 2.9305029995385325, "grad_norm": 0.2508569359779358, "learning_rate": 1.571193122832826e-08, "loss": 0.3878, "step": 5292 }, { "epoch": 2.931056760498385, "grad_norm": 0.2620028853416443, "learning_rate": 1.54576223855496e-08, "loss": 0.426, "step": 5293 }, { "epoch": 2.931610521458237, "grad_norm": 0.27040743827819824, "learning_rate": 1.5205385268216023e-08, "loss": 0.4191, "step": 5294 }, { "epoch": 2.9321642824180896, "grad_norm": 0.2656794488430023, "learning_rate": 1.4955219981165337e-08, "loss": 0.3812, "step": 5295 }, { "epoch": 2.9327180433779416, "grad_norm": 0.2695595622062683, "learning_rate": 1.470712662837437e-08, "loss": 0.4023, "step": 5296 }, { "epoch": 2.933271804337794, "grad_norm": 0.2568214535713196, "learning_rate": 1.4461105312956747e-08, "loss": 0.3799, "step": 5297 }, { "epoch": 2.9338255652976466, "grad_norm": 0.24736398458480835, "learning_rate": 1.4217156137167898e-08, "loss": 0.3947, "step": 5298 }, { "epoch": 2.934379326257499, "grad_norm": 0.2606477737426758, "learning_rate": 1.3975279202399495e-08, "loss": 0.4079, "step": 5299 }, { "epoch": 2.934933087217351, "grad_norm": 0.26333245635032654, "learning_rate": 1.3735474609182786e-08, "loss": 0.4209, "step": 5300 }, { "epoch": 2.9354868481772036, "grad_norm": 0.25401538610458374, "learning_rate": 1.3497742457188045e-08, "loss": 0.3896, "step": 5301 }, { "epoch": 2.9360406091370557, "grad_norm": 0.24499261379241943, "learning_rate": 1.3262082845224012e-08, "loss": 0.3921, "step": 5302 }, { "epoch": 2.936594370096908, "grad_norm": 0.26429083943367004, "learning_rate": 1.3028495871237334e-08, "loss": 0.4214, "step": 5303 }, { "epoch": 2.9371481310567606, "grad_norm": 0.25045663118362427, "learning_rate": 1.2796981632314242e-08, "loss": 0.4161, "step": 5304 }, { "epoch": 2.9377018920166127, "grad_norm": 0.2409869134426117, "learning_rate": 1.2567540224678875e-08, "loss": 0.3934, "step": 5305 }, { "epoch": 2.938255652976465, "grad_norm": 0.26891952753067017, "learning_rate": 1.2340171743694397e-08, "loss": 0.3895, "step": 5306 }, { "epoch": 2.9388094139363172, "grad_norm": 0.2595224380493164, "learning_rate": 1.211487628386132e-08, "loss": 0.3828, "step": 5307 }, { "epoch": 2.9393631748961697, "grad_norm": 0.26910585165023804, "learning_rate": 1.1891653938819193e-08, "loss": 0.4448, "step": 5308 }, { "epoch": 2.939916935856022, "grad_norm": 0.2591690421104431, "learning_rate": 1.1670504801346572e-08, "loss": 0.4226, "step": 5309 }, { "epoch": 2.9404706968158747, "grad_norm": 0.25839123129844666, "learning_rate": 1.1451428963359379e-08, "loss": 0.4042, "step": 5310 }, { "epoch": 2.9410244577757267, "grad_norm": 0.2469741851091385, "learning_rate": 1.1234426515911445e-08, "loss": 0.3802, "step": 5311 }, { "epoch": 2.9415782187355792, "grad_norm": 0.2546882927417755, "learning_rate": 1.1019497549196733e-08, "loss": 0.4406, "step": 5312 }, { "epoch": 2.9421319796954313, "grad_norm": 0.25522133708000183, "learning_rate": 1.08066421525449e-08, "loss": 0.3866, "step": 5313 }, { "epoch": 2.9426857406552838, "grad_norm": 0.26993927359580994, "learning_rate": 1.0595860414426284e-08, "loss": 0.4282, "step": 5314 }, { "epoch": 2.9432395016151363, "grad_norm": 0.2443794161081314, "learning_rate": 1.0387152422447478e-08, "loss": 0.3817, "step": 5315 }, { "epoch": 2.9437932625749883, "grad_norm": 0.2559572458267212, "learning_rate": 1.0180518263353534e-08, "loss": 0.3739, "step": 5316 }, { "epoch": 2.944347023534841, "grad_norm": 0.26703205704689026, "learning_rate": 9.975958023027976e-09, "loss": 0.4191, "step": 5317 }, { "epoch": 2.9449007844946933, "grad_norm": 0.26426491141319275, "learning_rate": 9.773471786492239e-09, "loss": 0.4035, "step": 5318 }, { "epoch": 2.9454545454545453, "grad_norm": 0.24750109016895294, "learning_rate": 9.573059637905669e-09, "loss": 0.4181, "step": 5319 }, { "epoch": 2.946008306414398, "grad_norm": 0.2453097701072693, "learning_rate": 9.374721660566077e-09, "loss": 0.4179, "step": 5320 }, { "epoch": 2.9465620673742503, "grad_norm": 0.2415550798177719, "learning_rate": 9.178457936908081e-09, "loss": 0.3919, "step": 5321 }, { "epoch": 2.9471158283341023, "grad_norm": 0.24861766397953033, "learning_rate": 8.98426854850476e-09, "loss": 0.4174, "step": 5322 }, { "epoch": 2.947669589293955, "grad_norm": 0.25409892201423645, "learning_rate": 8.792153576067108e-09, "loss": 0.3695, "step": 5323 }, { "epoch": 2.948223350253807, "grad_norm": 0.2810618579387665, "learning_rate": 8.602113099444032e-09, "loss": 0.4297, "step": 5324 }, { "epoch": 2.9487771112136594, "grad_norm": 0.23809756338596344, "learning_rate": 8.414147197622346e-09, "loss": 0.4047, "step": 5325 }, { "epoch": 2.949330872173512, "grad_norm": 0.25585660338401794, "learning_rate": 8.228255948725673e-09, "loss": 0.3902, "step": 5326 }, { "epoch": 2.9498846331333644, "grad_norm": 0.2758561372756958, "learning_rate": 8.044439430016093e-09, "loss": 0.4509, "step": 5327 }, { "epoch": 2.9504383940932164, "grad_norm": 0.2639373540878296, "learning_rate": 7.862697717894164e-09, "loss": 0.3786, "step": 5328 }, { "epoch": 2.950992155053069, "grad_norm": 0.24230211973190308, "learning_rate": 7.68303088789557e-09, "loss": 0.388, "step": 5329 }, { "epoch": 2.951545916012921, "grad_norm": 0.2598593831062317, "learning_rate": 7.505439014696691e-09, "loss": 0.4207, "step": 5330 }, { "epoch": 2.9520996769727734, "grad_norm": 0.26465433835983276, "learning_rate": 7.329922172109594e-09, "loss": 0.4254, "step": 5331 }, { "epoch": 2.952653437932626, "grad_norm": 0.27902722358703613, "learning_rate": 7.156480433084256e-09, "loss": 0.419, "step": 5332 }, { "epoch": 2.953207198892478, "grad_norm": 0.258953332901001, "learning_rate": 6.985113869708571e-09, "loss": 0.3857, "step": 5333 }, { "epoch": 2.9537609598523304, "grad_norm": 0.2538006901741028, "learning_rate": 6.81582255320723e-09, "loss": 0.408, "step": 5334 }, { "epoch": 2.9543147208121825, "grad_norm": 0.25087809562683105, "learning_rate": 6.648606553942283e-09, "loss": 0.3754, "step": 5335 }, { "epoch": 2.954868481772035, "grad_norm": 0.2542981207370758, "learning_rate": 6.483465941414801e-09, "loss": 0.4202, "step": 5336 }, { "epoch": 2.9554222427318875, "grad_norm": 0.23547303676605225, "learning_rate": 6.320400784262104e-09, "loss": 0.3563, "step": 5337 }, { "epoch": 2.95597600369174, "grad_norm": 0.2617853879928589, "learning_rate": 6.15941115025831e-09, "loss": 0.4167, "step": 5338 }, { "epoch": 2.956529764651592, "grad_norm": 0.255849689245224, "learning_rate": 6.000497106315451e-09, "loss": 0.417, "step": 5339 }, { "epoch": 2.9570835256114445, "grad_norm": 0.23559615015983582, "learning_rate": 5.8436587184834696e-09, "loss": 0.3632, "step": 5340 }, { "epoch": 2.9576372865712965, "grad_norm": 0.2625911831855774, "learning_rate": 5.688896051949666e-09, "loss": 0.4209, "step": 5341 }, { "epoch": 2.958191047531149, "grad_norm": 0.2605162560939789, "learning_rate": 5.536209171036477e-09, "loss": 0.3823, "step": 5342 }, { "epoch": 2.9587448084910015, "grad_norm": 0.2429397851228714, "learning_rate": 5.385598139206471e-09, "loss": 0.3992, "step": 5343 }, { "epoch": 2.9592985694508536, "grad_norm": 0.2833608388900757, "learning_rate": 5.237063019057908e-09, "loss": 0.4247, "step": 5344 }, { "epoch": 2.959852330410706, "grad_norm": 0.2576238214969635, "learning_rate": 5.09060387232585e-09, "loss": 0.4034, "step": 5345 }, { "epoch": 2.960406091370558, "grad_norm": 0.2561042308807373, "learning_rate": 4.946220759883824e-09, "loss": 0.3741, "step": 5346 }, { "epoch": 2.9609598523304106, "grad_norm": 0.2617475688457489, "learning_rate": 4.803913741741051e-09, "loss": 0.4336, "step": 5347 }, { "epoch": 2.961513613290263, "grad_norm": 0.24742434918880463, "learning_rate": 4.663682877045217e-09, "loss": 0.3993, "step": 5348 }, { "epoch": 2.9620673742501156, "grad_norm": 0.2688279151916504, "learning_rate": 4.5255282240802554e-09, "loss": 0.397, "step": 5349 }, { "epoch": 2.9626211352099676, "grad_norm": 0.27693548798561096, "learning_rate": 4.3894498402674565e-09, "loss": 0.4122, "step": 5350 }, { "epoch": 2.96317489616982, "grad_norm": 0.24560411274433136, "learning_rate": 4.255447782164912e-09, "loss": 0.3733, "step": 5351 }, { "epoch": 2.963728657129672, "grad_norm": 0.26295360922813416, "learning_rate": 4.123522105468069e-09, "loss": 0.4194, "step": 5352 }, { "epoch": 2.9642824180895246, "grad_norm": 0.268032044172287, "learning_rate": 3.993672865008624e-09, "loss": 0.4103, "step": 5353 }, { "epoch": 2.964836179049377, "grad_norm": 0.24890756607055664, "learning_rate": 3.865900114757293e-09, "loss": 0.3762, "step": 5354 }, { "epoch": 2.9653899400092296, "grad_norm": 0.24624677002429962, "learning_rate": 3.740203907818818e-09, "loss": 0.4043, "step": 5355 }, { "epoch": 2.9659437009690817, "grad_norm": 0.29380878806114197, "learning_rate": 3.6165842964364096e-09, "loss": 0.4325, "step": 5356 }, { "epoch": 2.966497461928934, "grad_norm": 0.24760690331459045, "learning_rate": 3.495041331991189e-09, "loss": 0.3937, "step": 5357 }, { "epoch": 2.967051222888786, "grad_norm": 0.25444021821022034, "learning_rate": 3.3755750649988596e-09, "loss": 0.4131, "step": 5358 }, { "epoch": 2.9676049838486387, "grad_norm": 0.2602452039718628, "learning_rate": 3.2581855451141453e-09, "loss": 0.4047, "step": 5359 }, { "epoch": 2.968158744808491, "grad_norm": 0.2507275342941284, "learning_rate": 3.142872821126908e-09, "loss": 0.3703, "step": 5360 }, { "epoch": 2.9687125057683432, "grad_norm": 0.26460060477256775, "learning_rate": 3.0296369409649197e-09, "loss": 0.4302, "step": 5361 }, { "epoch": 2.9692662667281957, "grad_norm": 0.2673996090888977, "learning_rate": 2.9184779516922e-09, "loss": 0.3996, "step": 5362 }, { "epoch": 2.9698200276880478, "grad_norm": 0.25631603598594666, "learning_rate": 2.8093958995101256e-09, "loss": 0.4225, "step": 5363 }, { "epoch": 2.9703737886479002, "grad_norm": 0.2548345625400543, "learning_rate": 2.7023908297557633e-09, "loss": 0.392, "step": 5364 }, { "epoch": 2.9709275496077527, "grad_norm": 0.27704688906669617, "learning_rate": 2.5974627869046476e-09, "loss": 0.3671, "step": 5365 }, { "epoch": 2.9714813105676052, "grad_norm": 0.2609788179397583, "learning_rate": 2.4946118145674493e-09, "loss": 0.4054, "step": 5366 }, { "epoch": 2.9720350715274573, "grad_norm": 0.2746116816997528, "learning_rate": 2.3938379554921954e-09, "loss": 0.4186, "step": 5367 }, { "epoch": 2.9725888324873098, "grad_norm": 0.26100313663482666, "learning_rate": 2.29514125156316e-09, "loss": 0.4009, "step": 5368 }, { "epoch": 2.973142593447162, "grad_norm": 0.2601189911365509, "learning_rate": 2.1985217438025285e-09, "loss": 0.4464, "step": 5369 }, { "epoch": 2.9736963544070143, "grad_norm": 0.2685212790966034, "learning_rate": 2.1039794723676233e-09, "loss": 0.426, "step": 5370 }, { "epoch": 2.974250115366867, "grad_norm": 0.25529786944389343, "learning_rate": 2.0115144765531226e-09, "loss": 0.3555, "step": 5371 }, { "epoch": 2.974803876326719, "grad_norm": 0.2507651746273041, "learning_rate": 1.9211267947899514e-09, "loss": 0.3798, "step": 5372 }, { "epoch": 2.9753576372865713, "grad_norm": 0.2557556927204132, "learning_rate": 1.832816464646947e-09, "loss": 0.4208, "step": 5373 }, { "epoch": 2.9759113982464234, "grad_norm": 0.28133928775787354, "learning_rate": 1.7465835228275274e-09, "loss": 0.4183, "step": 5374 }, { "epoch": 2.976465159206276, "grad_norm": 0.26675301790237427, "learning_rate": 1.6624280051730223e-09, "loss": 0.3976, "step": 5375 }, { "epoch": 2.9770189201661283, "grad_norm": 0.2688071131706238, "learning_rate": 1.5803499466610083e-09, "loss": 0.4157, "step": 5376 }, { "epoch": 2.977572681125981, "grad_norm": 0.24979889392852783, "learning_rate": 1.5003493814053083e-09, "loss": 0.3663, "step": 5377 }, { "epoch": 2.978126442085833, "grad_norm": 0.24887768924236298, "learning_rate": 1.4224263426576567e-09, "loss": 0.4146, "step": 5378 }, { "epoch": 2.9786802030456854, "grad_norm": 0.24592632055282593, "learning_rate": 1.3465808628038145e-09, "loss": 0.3807, "step": 5379 }, { "epoch": 2.9792339640055374, "grad_norm": 0.2648659348487854, "learning_rate": 1.2728129733680094e-09, "loss": 0.3992, "step": 5380 }, { "epoch": 2.97978772496539, "grad_norm": 0.2665444016456604, "learning_rate": 1.2011227050107155e-09, "loss": 0.4191, "step": 5381 }, { "epoch": 2.9803414859252424, "grad_norm": 0.2757125496864319, "learning_rate": 1.1315100875280983e-09, "loss": 0.4026, "step": 5382 }, { "epoch": 2.9808952468850944, "grad_norm": 0.26828813552856445, "learning_rate": 1.0639751498531248e-09, "loss": 0.4077, "step": 5383 }, { "epoch": 2.981449007844947, "grad_norm": 0.24338103830814362, "learning_rate": 9.98517920056119e-10, "loss": 0.3808, "step": 5384 }, { "epoch": 2.9820027688047994, "grad_norm": 0.24670632183551788, "learning_rate": 9.351384253419859e-10, "loss": 0.3895, "step": 5385 }, { "epoch": 2.9825565297646515, "grad_norm": 0.26214584708213806, "learning_rate": 8.738366920540975e-10, "loss": 0.4181, "step": 5386 }, { "epoch": 2.983110290724504, "grad_norm": 0.25461694598197937, "learning_rate": 8.146127456709618e-10, "loss": 0.4202, "step": 5387 }, { "epoch": 2.9836640516843564, "grad_norm": 0.26047879457473755, "learning_rate": 7.574666108078888e-10, "loss": 0.387, "step": 5388 }, { "epoch": 2.9842178126442085, "grad_norm": 0.2956352233886719, "learning_rate": 7.023983112164345e-10, "loss": 0.4335, "step": 5389 }, { "epoch": 2.984771573604061, "grad_norm": 0.24423180520534515, "learning_rate": 6.494078697844019e-10, "loss": 0.3744, "step": 5390 }, { "epoch": 2.985325334563913, "grad_norm": 0.26536402106285095, "learning_rate": 5.984953085363954e-10, "loss": 0.4274, "step": 5391 }, { "epoch": 2.9858790955237655, "grad_norm": 0.2585996389389038, "learning_rate": 5.496606486332656e-10, "loss": 0.3775, "step": 5392 }, { "epoch": 2.986432856483618, "grad_norm": 0.24085862934589386, "learning_rate": 5.029039103726652e-10, "loss": 0.3634, "step": 5393 }, { "epoch": 2.9869866174434705, "grad_norm": 0.2627558410167694, "learning_rate": 4.582251131873827e-10, "loss": 0.4125, "step": 5394 }, { "epoch": 2.9875403784033225, "grad_norm": 0.27955466508865356, "learning_rate": 4.156242756470086e-10, "loss": 0.4606, "step": 5395 }, { "epoch": 2.988094139363175, "grad_norm": 0.24420253932476044, "learning_rate": 3.7510141545848975e-10, "loss": 0.3893, "step": 5396 }, { "epoch": 2.988647900323027, "grad_norm": 0.23726971447467804, "learning_rate": 3.3665654946390956e-10, "loss": 0.3612, "step": 5397 }, { "epoch": 2.9892016612828796, "grad_norm": 0.2613838016986847, "learning_rate": 3.00289693642708e-10, "loss": 0.3891, "step": 5398 }, { "epoch": 2.989755422242732, "grad_norm": 0.2642286717891693, "learning_rate": 2.660008631094613e-10, "loss": 0.4349, "step": 5399 }, { "epoch": 2.990309183202584, "grad_norm": 0.25935670733451843, "learning_rate": 2.3379007211554727e-10, "loss": 0.4036, "step": 5400 }, { "epoch": 2.9908629441624366, "grad_norm": 0.2450847029685974, "learning_rate": 2.0365733404859034e-10, "loss": 0.3948, "step": 5401 }, { "epoch": 2.9914167051222886, "grad_norm": 0.24641531705856323, "learning_rate": 1.7560266143357152e-10, "loss": 0.408, "step": 5402 }, { "epoch": 2.991970466082141, "grad_norm": 0.26754680275917053, "learning_rate": 1.4962606593005303e-10, "loss": 0.4467, "step": 5403 }, { "epoch": 2.9925242270419936, "grad_norm": 0.24715517461299896, "learning_rate": 1.2572755833495375e-10, "loss": 0.3461, "step": 5404 }, { "epoch": 2.993077988001846, "grad_norm": 0.28043854236602783, "learning_rate": 1.0390714858143913e-10, "loss": 0.4484, "step": 5405 }, { "epoch": 2.993631748961698, "grad_norm": 0.2463710606098175, "learning_rate": 8.416484573836592e-11, "loss": 0.3711, "step": 5406 }, { "epoch": 2.9941855099215506, "grad_norm": 0.2660749554634094, "learning_rate": 6.650065801083738e-11, "loss": 0.404, "step": 5407 }, { "epoch": 2.9947392708814027, "grad_norm": 0.2701558470726013, "learning_rate": 5.0914592741868606e-11, "loss": 0.4498, "step": 5408 }, { "epoch": 2.995293031841255, "grad_norm": 0.2538999915122986, "learning_rate": 3.740665640850072e-11, "loss": 0.3432, "step": 5409 }, { "epoch": 2.9958467928011077, "grad_norm": 0.25628992915153503, "learning_rate": 2.5976854625131554e-11, "loss": 0.3909, "step": 5410 }, { "epoch": 2.9964005537609597, "grad_norm": 0.26208439469337463, "learning_rate": 1.6625192142405432e-11, "loss": 0.4205, "step": 5411 }, { "epoch": 2.996954314720812, "grad_norm": 0.25840750336647034, "learning_rate": 9.351672847213167e-12, "loss": 0.3831, "step": 5412 }, { "epoch": 2.9975080756806642, "grad_norm": 0.26530954241752625, "learning_rate": 4.1562997632471624e-12, "loss": 0.3937, "step": 5413 }, { "epoch": 2.9980618366405167, "grad_norm": 0.2573135197162628, "learning_rate": 1.0390750487809798e-12, "loss": 0.4123, "step": 5414 }, { "epoch": 2.998615597600369, "grad_norm": 0.25032615661621094, "learning_rate": 0.0, "loss": 0.3833, "step": 5415 }, { "epoch": 2.998615597600369, "step": 5415, "total_flos": 8180294453035008.0, "train_loss": 0.44426089589065243, "train_runtime": 204753.4998, "train_samples_per_second": 2.54, "train_steps_per_second": 0.026 } ], "logging_steps": 1.0, "max_steps": 5415, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8180294453035008.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }