{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 32628, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.0648522741203875e-05, "grad_norm": 89.08560970136007, "learning_rate": 1.0214504596527069e-08, "loss": 2.0546, "step": 1 }, { "epoch": 6.129704548240775e-05, "grad_norm": 63.6346443415281, "learning_rate": 2.0429009193054138e-08, "loss": 1.9353, "step": 2 }, { "epoch": 9.194556822361162e-05, "grad_norm": 7.008418468016836, "learning_rate": 3.0643513789581204e-08, "loss": 0.7687, "step": 3 }, { "epoch": 0.0001225940909648155, "grad_norm": 78.63474792396211, "learning_rate": 4.0858018386108276e-08, "loss": 2.0782, "step": 4 }, { "epoch": 0.00015324261370601937, "grad_norm": 77.75173234271104, "learning_rate": 5.107252298263535e-08, "loss": 2.0403, "step": 5 }, { "epoch": 0.00018389113644722325, "grad_norm": 80.9344025842625, "learning_rate": 6.128702757916241e-08, "loss": 2.0655, "step": 6 }, { "epoch": 0.00021453965918842712, "grad_norm": 77.39658974042007, "learning_rate": 7.150153217568949e-08, "loss": 1.9496, "step": 7 }, { "epoch": 0.000245188181929631, "grad_norm": 68.55102766587733, "learning_rate": 8.171603677221655e-08, "loss": 1.9041, "step": 8 }, { "epoch": 0.0002758367046708349, "grad_norm": 76.97069918578106, "learning_rate": 9.193054136874362e-08, "loss": 2.0512, "step": 9 }, { "epoch": 0.00030648522741203875, "grad_norm": 77.26026967769032, "learning_rate": 1.021450459652707e-07, "loss": 2.1065, "step": 10 }, { "epoch": 0.0003371337501532426, "grad_norm": 71.98054745241204, "learning_rate": 1.1235955056179776e-07, "loss": 1.963, "step": 11 }, { "epoch": 0.0003677822728944465, "grad_norm": 59.54901532655414, "learning_rate": 1.2257405515832481e-07, "loss": 1.8054, "step": 12 }, { "epoch": 0.0003984307956356504, "grad_norm": 92.31521671616618, "learning_rate": 1.327885597548519e-07, "loss": 2.5807, "step": 13 }, { "epoch": 0.00042907931837685425, "grad_norm": 6.900672339908472, "learning_rate": 1.4300306435137899e-07, "loss": 0.7186, "step": 14 }, { "epoch": 0.0004597278411180581, "grad_norm": 70.58978240813653, "learning_rate": 1.5321756894790606e-07, "loss": 2.0012, "step": 15 }, { "epoch": 0.000490376363859262, "grad_norm": 7.124069940215887, "learning_rate": 1.634320735444331e-07, "loss": 0.7339, "step": 16 }, { "epoch": 0.0005210248866004658, "grad_norm": 7.244685119281876, "learning_rate": 1.7364657814096015e-07, "loss": 0.7621, "step": 17 }, { "epoch": 0.0005516734093416697, "grad_norm": 7.005677202133421, "learning_rate": 1.8386108273748725e-07, "loss": 0.7716, "step": 18 }, { "epoch": 0.0005823219320828736, "grad_norm": 63.72202525143337, "learning_rate": 1.9407558733401432e-07, "loss": 1.9179, "step": 19 }, { "epoch": 0.0006129704548240775, "grad_norm": 59.88329132065079, "learning_rate": 2.042900919305414e-07, "loss": 1.7654, "step": 20 }, { "epoch": 0.0006436189775652813, "grad_norm": 58.72977527997002, "learning_rate": 2.1450459652706847e-07, "loss": 1.8379, "step": 21 }, { "epoch": 0.0006742675003064852, "grad_norm": 60.31981818331502, "learning_rate": 2.247191011235955e-07, "loss": 1.9848, "step": 22 }, { "epoch": 0.0007049160230476891, "grad_norm": 51.278640317574606, "learning_rate": 2.349336057201226e-07, "loss": 1.6186, "step": 23 }, { "epoch": 0.000735564545788893, "grad_norm": 46.151723122145654, "learning_rate": 2.4514811031664963e-07, "loss": 1.7823, "step": 24 }, { "epoch": 0.0007662130685300968, "grad_norm": 41.04689363866026, "learning_rate": 2.5536261491317673e-07, "loss": 1.6774, "step": 25 }, { "epoch": 0.0007968615912713007, "grad_norm": 6.550244340599658, "learning_rate": 2.655771195097038e-07, "loss": 0.7199, "step": 26 }, { "epoch": 0.0008275101140125046, "grad_norm": 40.55751608188138, "learning_rate": 2.7579162410623087e-07, "loss": 1.6197, "step": 27 }, { "epoch": 0.0008581586367537085, "grad_norm": 41.62926943478123, "learning_rate": 2.8600612870275797e-07, "loss": 1.7437, "step": 28 }, { "epoch": 0.0008888071594949123, "grad_norm": 46.30795670862777, "learning_rate": 2.96220633299285e-07, "loss": 1.6424, "step": 29 }, { "epoch": 0.0009194556822361162, "grad_norm": 45.21336117318005, "learning_rate": 3.064351378958121e-07, "loss": 1.6123, "step": 30 }, { "epoch": 0.0009501042049773201, "grad_norm": 38.24930164273746, "learning_rate": 3.1664964249233916e-07, "loss": 1.6661, "step": 31 }, { "epoch": 0.000980752727718524, "grad_norm": 25.653989230191055, "learning_rate": 3.268641470888662e-07, "loss": 1.4691, "step": 32 }, { "epoch": 0.001011401250459728, "grad_norm": 6.648868830006903, "learning_rate": 3.3707865168539325e-07, "loss": 0.7461, "step": 33 }, { "epoch": 0.0010420497732009316, "grad_norm": 6.425606260315896, "learning_rate": 3.472931562819203e-07, "loss": 0.745, "step": 34 }, { "epoch": 0.0010726982959421356, "grad_norm": 44.71723681485436, "learning_rate": 3.5750766087844745e-07, "loss": 1.3927, "step": 35 }, { "epoch": 0.0011033468186833395, "grad_norm": 22.519576527379883, "learning_rate": 3.677221654749745e-07, "loss": 1.3519, "step": 36 }, { "epoch": 0.0011339953414245434, "grad_norm": 25.31883674917661, "learning_rate": 3.7793667007150154e-07, "loss": 1.4207, "step": 37 }, { "epoch": 0.0011646438641657471, "grad_norm": 6.611587362543892, "learning_rate": 3.8815117466802864e-07, "loss": 0.7743, "step": 38 }, { "epoch": 0.001195292386906951, "grad_norm": 20.153455522684244, "learning_rate": 3.9836567926455574e-07, "loss": 1.2387, "step": 39 }, { "epoch": 0.001225940909648155, "grad_norm": 16.787039031749995, "learning_rate": 4.085801838610828e-07, "loss": 1.3068, "step": 40 }, { "epoch": 0.001256589432389359, "grad_norm": 14.418101616000696, "learning_rate": 4.1879468845760983e-07, "loss": 1.3356, "step": 41 }, { "epoch": 0.0012872379551305626, "grad_norm": 12.486959274707939, "learning_rate": 4.2900919305413693e-07, "loss": 1.3184, "step": 42 }, { "epoch": 0.0013178864778717666, "grad_norm": 11.031494308816299, "learning_rate": 4.39223697650664e-07, "loss": 1.2462, "step": 43 }, { "epoch": 0.0013485350006129705, "grad_norm": 9.251975380193947, "learning_rate": 4.49438202247191e-07, "loss": 1.3023, "step": 44 }, { "epoch": 0.0013791835233541744, "grad_norm": 9.299659249810281, "learning_rate": 4.5965270684371807e-07, "loss": 1.1836, "step": 45 }, { "epoch": 0.0014098320460953781, "grad_norm": 7.791049211266313, "learning_rate": 4.698672114402452e-07, "loss": 1.2752, "step": 46 }, { "epoch": 0.001440480568836582, "grad_norm": 6.724387816941555, "learning_rate": 4.800817160367723e-07, "loss": 1.2019, "step": 47 }, { "epoch": 0.001471129091577786, "grad_norm": 7.926236892978478, "learning_rate": 4.902962206332993e-07, "loss": 1.1857, "step": 48 }, { "epoch": 0.00150177761431899, "grad_norm": 5.4109546665507064, "learning_rate": 5.005107252298265e-07, "loss": 0.7119, "step": 49 }, { "epoch": 0.0015324261370601936, "grad_norm": 8.049658477838308, "learning_rate": 5.107252298263535e-07, "loss": 1.1436, "step": 50 }, { "epoch": 0.0015630746598013976, "grad_norm": 6.1160754568602815, "learning_rate": 5.209397344228806e-07, "loss": 1.1807, "step": 51 }, { "epoch": 0.0015937231825426015, "grad_norm": 5.7258944741099, "learning_rate": 5.311542390194075e-07, "loss": 1.2282, "step": 52 }, { "epoch": 0.0016243717052838054, "grad_norm": 5.203516672024508, "learning_rate": 5.413687436159346e-07, "loss": 1.1893, "step": 53 }, { "epoch": 0.0016550202280250091, "grad_norm": 5.98723842711262, "learning_rate": 5.515832482124617e-07, "loss": 1.2128, "step": 54 }, { "epoch": 0.001685668750766213, "grad_norm": 5.253032263527259, "learning_rate": 5.617977528089888e-07, "loss": 1.1633, "step": 55 }, { "epoch": 0.001716317273507417, "grad_norm": 5.6500920181636864, "learning_rate": 5.720122574055159e-07, "loss": 1.105, "step": 56 }, { "epoch": 0.001746965796248621, "grad_norm": 5.151414897007073, "learning_rate": 5.822267620020429e-07, "loss": 0.7191, "step": 57 }, { "epoch": 0.0017776143189898246, "grad_norm": 4.943378448535567, "learning_rate": 5.9244126659857e-07, "loss": 1.1309, "step": 58 }, { "epoch": 0.0018082628417310286, "grad_norm": 4.456925974953797, "learning_rate": 6.02655771195097e-07, "loss": 1.0566, "step": 59 }, { "epoch": 0.0018389113644722325, "grad_norm": 4.516446150963775, "learning_rate": 6.128702757916242e-07, "loss": 1.2222, "step": 60 }, { "epoch": 0.0018695598872134364, "grad_norm": 5.198275128022614, "learning_rate": 6.230847803881512e-07, "loss": 0.727, "step": 61 }, { "epoch": 0.0019002084099546401, "grad_norm": 3.990987502049036, "learning_rate": 6.332992849846783e-07, "loss": 1.1488, "step": 62 }, { "epoch": 0.001930856932695844, "grad_norm": 5.313102010861105, "learning_rate": 6.435137895812053e-07, "loss": 1.1725, "step": 63 }, { "epoch": 0.001961505455437048, "grad_norm": 4.367896524099123, "learning_rate": 6.537282941777324e-07, "loss": 1.2599, "step": 64 }, { "epoch": 0.001992153978178252, "grad_norm": 3.807555030072833, "learning_rate": 6.639427987742594e-07, "loss": 1.0266, "step": 65 }, { "epoch": 0.002022802500919456, "grad_norm": 3.4054192704996717, "learning_rate": 6.741573033707865e-07, "loss": 1.0558, "step": 66 }, { "epoch": 0.0020534510236606593, "grad_norm": 4.974995302858568, "learning_rate": 6.843718079673137e-07, "loss": 0.7192, "step": 67 }, { "epoch": 0.0020840995464018633, "grad_norm": 4.254291294935033, "learning_rate": 6.945863125638406e-07, "loss": 1.0923, "step": 68 }, { "epoch": 0.002114748069143067, "grad_norm": 4.467974972326297, "learning_rate": 7.048008171603678e-07, "loss": 1.088, "step": 69 }, { "epoch": 0.002145396591884271, "grad_norm": 5.416955287481584, "learning_rate": 7.150153217568949e-07, "loss": 1.1006, "step": 70 }, { "epoch": 0.002176045114625475, "grad_norm": 3.786110278434636, "learning_rate": 7.252298263534219e-07, "loss": 1.0699, "step": 71 }, { "epoch": 0.002206693637366679, "grad_norm": 3.781489762655271, "learning_rate": 7.35444330949949e-07, "loss": 1.01, "step": 72 }, { "epoch": 0.002237342160107883, "grad_norm": 3.996822165102075, "learning_rate": 7.456588355464761e-07, "loss": 1.1456, "step": 73 }, { "epoch": 0.002267990682849087, "grad_norm": 3.834302254983052, "learning_rate": 7.558733401430031e-07, "loss": 0.9744, "step": 74 }, { "epoch": 0.0022986392055902903, "grad_norm": 4.10052613048204, "learning_rate": 7.660878447395302e-07, "loss": 0.9851, "step": 75 }, { "epoch": 0.0023292877283314943, "grad_norm": 3.6942561266403184, "learning_rate": 7.763023493360573e-07, "loss": 1.035, "step": 76 }, { "epoch": 0.002359936251072698, "grad_norm": 3.4161485471016335, "learning_rate": 7.865168539325843e-07, "loss": 1.0135, "step": 77 }, { "epoch": 0.002390584773813902, "grad_norm": 4.356088598469908, "learning_rate": 7.967313585291115e-07, "loss": 1.1297, "step": 78 }, { "epoch": 0.002421233296555106, "grad_norm": 3.4434990810076056, "learning_rate": 8.069458631256384e-07, "loss": 1.0819, "step": 79 }, { "epoch": 0.00245188181929631, "grad_norm": 3.880491700089945, "learning_rate": 8.171603677221656e-07, "loss": 1.1027, "step": 80 }, { "epoch": 0.002482530342037514, "grad_norm": 3.9487785004894023, "learning_rate": 8.273748723186927e-07, "loss": 0.6709, "step": 81 }, { "epoch": 0.002513178864778718, "grad_norm": 3.9416434036269496, "learning_rate": 8.375893769152197e-07, "loss": 0.6872, "step": 82 }, { "epoch": 0.0025438273875199213, "grad_norm": 3.6516709397826217, "learning_rate": 8.478038815117468e-07, "loss": 1.0285, "step": 83 }, { "epoch": 0.0025744759102611253, "grad_norm": 3.4981395610762496, "learning_rate": 8.580183861082739e-07, "loss": 1.0998, "step": 84 }, { "epoch": 0.002605124433002329, "grad_norm": 4.095477268478066, "learning_rate": 8.682328907048009e-07, "loss": 1.1315, "step": 85 }, { "epoch": 0.002635772955743533, "grad_norm": 3.4390679199229917, "learning_rate": 8.78447395301328e-07, "loss": 0.9974, "step": 86 }, { "epoch": 0.002666421478484737, "grad_norm": 3.9140690373969047, "learning_rate": 8.886618998978551e-07, "loss": 1.0622, "step": 87 }, { "epoch": 0.002697070001225941, "grad_norm": 3.2268371006486403, "learning_rate": 8.98876404494382e-07, "loss": 0.6661, "step": 88 }, { "epoch": 0.002727718523967145, "grad_norm": 3.6742147642940775, "learning_rate": 9.090909090909091e-07, "loss": 1.0184, "step": 89 }, { "epoch": 0.002758367046708349, "grad_norm": 3.2300214225135604, "learning_rate": 9.193054136874361e-07, "loss": 0.9918, "step": 90 }, { "epoch": 0.0027890155694495523, "grad_norm": 3.744473705561353, "learning_rate": 9.295199182839632e-07, "loss": 1.1148, "step": 91 }, { "epoch": 0.0028196640921907563, "grad_norm": 3.6200197727657066, "learning_rate": 9.397344228804904e-07, "loss": 1.0265, "step": 92 }, { "epoch": 0.00285031261493196, "grad_norm": 3.192295139375867, "learning_rate": 9.499489274770174e-07, "loss": 0.6898, "step": 93 }, { "epoch": 0.002880961137673164, "grad_norm": 3.477295683764482, "learning_rate": 9.601634320735445e-07, "loss": 1.0053, "step": 94 }, { "epoch": 0.002911609660414368, "grad_norm": 3.4875838217496598, "learning_rate": 9.703779366700715e-07, "loss": 1.065, "step": 95 }, { "epoch": 0.002942258183155572, "grad_norm": 4.005275121542596, "learning_rate": 9.805924412665985e-07, "loss": 1.0819, "step": 96 }, { "epoch": 0.002972906705896776, "grad_norm": 3.5313105915912755, "learning_rate": 9.908069458631257e-07, "loss": 1.0474, "step": 97 }, { "epoch": 0.00300355522863798, "grad_norm": 4.459997023500788, "learning_rate": 1.001021450459653e-06, "loss": 1.0753, "step": 98 }, { "epoch": 0.0030342037513791833, "grad_norm": 3.693073495477769, "learning_rate": 1.01123595505618e-06, "loss": 1.0984, "step": 99 }, { "epoch": 0.0030648522741203873, "grad_norm": 3.6723368677483874, "learning_rate": 1.021450459652707e-06, "loss": 1.0721, "step": 100 }, { "epoch": 0.003095500796861591, "grad_norm": 3.6373269065618143, "learning_rate": 1.031664964249234e-06, "loss": 1.1199, "step": 101 }, { "epoch": 0.003126149319602795, "grad_norm": 3.489422135604586, "learning_rate": 1.0418794688457611e-06, "loss": 1.0281, "step": 102 }, { "epoch": 0.003156797842343999, "grad_norm": 3.9190214097913025, "learning_rate": 1.052093973442288e-06, "loss": 1.1071, "step": 103 }, { "epoch": 0.003187446365085203, "grad_norm": 3.1445518105414694, "learning_rate": 1.062308478038815e-06, "loss": 0.9801, "step": 104 }, { "epoch": 0.003218094887826407, "grad_norm": 2.820514905590706, "learning_rate": 1.0725229826353423e-06, "loss": 0.6845, "step": 105 }, { "epoch": 0.003248743410567611, "grad_norm": 3.1696283254436404, "learning_rate": 1.0827374872318693e-06, "loss": 1.0023, "step": 106 }, { "epoch": 0.0032793919333088143, "grad_norm": 4.108702066866853, "learning_rate": 1.0929519918283963e-06, "loss": 1.047, "step": 107 }, { "epoch": 0.0033100404560500183, "grad_norm": 3.4279209112489344, "learning_rate": 1.1031664964249235e-06, "loss": 1.0335, "step": 108 }, { "epoch": 0.003340688978791222, "grad_norm": 3.428199449631726, "learning_rate": 1.1133810010214507e-06, "loss": 1.0308, "step": 109 }, { "epoch": 0.003371337501532426, "grad_norm": 3.388233010784284, "learning_rate": 1.1235955056179777e-06, "loss": 1.0805, "step": 110 }, { "epoch": 0.00340198602427363, "grad_norm": 3.9520612051111272, "learning_rate": 1.1338100102145047e-06, "loss": 1.0727, "step": 111 }, { "epoch": 0.003432634547014834, "grad_norm": 3.4480720647634446, "learning_rate": 1.1440245148110319e-06, "loss": 0.9411, "step": 112 }, { "epoch": 0.003463283069756038, "grad_norm": 3.3047390323141888, "learning_rate": 1.1542390194075589e-06, "loss": 1.1894, "step": 113 }, { "epoch": 0.003493931592497242, "grad_norm": 3.0477168052165986, "learning_rate": 1.1644535240040859e-06, "loss": 1.1532, "step": 114 }, { "epoch": 0.0035245801152384453, "grad_norm": 3.2078393845179685, "learning_rate": 1.1746680286006129e-06, "loss": 1.0153, "step": 115 }, { "epoch": 0.0035552286379796493, "grad_norm": 3.5674454274202096, "learning_rate": 1.18488253319714e-06, "loss": 1.051, "step": 116 }, { "epoch": 0.003585877160720853, "grad_norm": 3.0624871255854025, "learning_rate": 1.195097037793667e-06, "loss": 1.0116, "step": 117 }, { "epoch": 0.003616525683462057, "grad_norm": 3.439434271516326, "learning_rate": 1.205311542390194e-06, "loss": 0.9855, "step": 118 }, { "epoch": 0.003647174206203261, "grad_norm": 3.3519180895008263, "learning_rate": 1.2155260469867213e-06, "loss": 1.1147, "step": 119 }, { "epoch": 0.003677822728944465, "grad_norm": 2.583955506410239, "learning_rate": 1.2257405515832485e-06, "loss": 0.677, "step": 120 }, { "epoch": 0.003708471251685669, "grad_norm": 2.3050586894934626, "learning_rate": 1.2359550561797752e-06, "loss": 0.6313, "step": 121 }, { "epoch": 0.003739119774426873, "grad_norm": 2.8360220758966252, "learning_rate": 1.2461695607763025e-06, "loss": 1.0149, "step": 122 }, { "epoch": 0.0037697682971680763, "grad_norm": 3.7682033879316164, "learning_rate": 1.2563840653728297e-06, "loss": 0.9588, "step": 123 }, { "epoch": 0.0038004168199092803, "grad_norm": 4.153984443659916, "learning_rate": 1.2665985699693567e-06, "loss": 0.9886, "step": 124 }, { "epoch": 0.003831065342650484, "grad_norm": 3.8028541140199548, "learning_rate": 1.2768130745658836e-06, "loss": 1.04, "step": 125 }, { "epoch": 0.003861713865391688, "grad_norm": 3.4603010940565406, "learning_rate": 1.2870275791624106e-06, "loss": 0.9819, "step": 126 }, { "epoch": 0.003892362388132892, "grad_norm": 3.43598074030124, "learning_rate": 1.2972420837589378e-06, "loss": 1.0121, "step": 127 }, { "epoch": 0.003923010910874096, "grad_norm": 3.246986790784606, "learning_rate": 1.3074565883554648e-06, "loss": 1.0699, "step": 128 }, { "epoch": 0.0039536594336153, "grad_norm": 3.4927772671567725, "learning_rate": 1.317671092951992e-06, "loss": 0.9985, "step": 129 }, { "epoch": 0.003984307956356504, "grad_norm": 4.25878679662059, "learning_rate": 1.3278855975485188e-06, "loss": 1.0501, "step": 130 }, { "epoch": 0.004014956479097708, "grad_norm": 3.3799398458565593, "learning_rate": 1.338100102145046e-06, "loss": 0.9657, "step": 131 }, { "epoch": 0.004045605001838912, "grad_norm": 2.487454345743188, "learning_rate": 1.348314606741573e-06, "loss": 0.683, "step": 132 }, { "epoch": 0.004076253524580116, "grad_norm": 2.8012874772465004, "learning_rate": 1.3585291113381002e-06, "loss": 0.9319, "step": 133 }, { "epoch": 0.004106902047321319, "grad_norm": 3.3680665460644397, "learning_rate": 1.3687436159346274e-06, "loss": 1.0171, "step": 134 }, { "epoch": 0.004137550570062523, "grad_norm": 3.252892830992809, "learning_rate": 1.3789581205311544e-06, "loss": 0.9732, "step": 135 }, { "epoch": 0.0041681990928037265, "grad_norm": 3.802640287618769, "learning_rate": 1.3891726251276812e-06, "loss": 1.0121, "step": 136 }, { "epoch": 0.0041988476155449305, "grad_norm": 3.351631669051486, "learning_rate": 1.3993871297242084e-06, "loss": 0.9784, "step": 137 }, { "epoch": 0.004229496138286134, "grad_norm": 3.251398157849799, "learning_rate": 1.4096016343207356e-06, "loss": 1.0134, "step": 138 }, { "epoch": 0.004260144661027338, "grad_norm": 3.4009372770795108, "learning_rate": 1.4198161389172626e-06, "loss": 1.0324, "step": 139 }, { "epoch": 0.004290793183768542, "grad_norm": 2.3125921602601633, "learning_rate": 1.4300306435137898e-06, "loss": 0.6819, "step": 140 }, { "epoch": 0.004321441706509746, "grad_norm": 2.9100493595076244, "learning_rate": 1.4402451481103168e-06, "loss": 0.9165, "step": 141 }, { "epoch": 0.00435209022925095, "grad_norm": 3.2115352293155897, "learning_rate": 1.4504596527068438e-06, "loss": 0.9862, "step": 142 }, { "epoch": 0.004382738751992154, "grad_norm": 3.199105355709511, "learning_rate": 1.4606741573033708e-06, "loss": 1.0018, "step": 143 }, { "epoch": 0.004413387274733358, "grad_norm": 3.4748128155602838, "learning_rate": 1.470888661899898e-06, "loss": 0.9747, "step": 144 }, { "epoch": 0.004444035797474562, "grad_norm": 3.2891305674130873, "learning_rate": 1.4811031664964252e-06, "loss": 0.9475, "step": 145 }, { "epoch": 0.004474684320215766, "grad_norm": 3.1560075133340275, "learning_rate": 1.4913176710929522e-06, "loss": 0.9506, "step": 146 }, { "epoch": 0.00450533284295697, "grad_norm": 3.1761117994147328, "learning_rate": 1.501532175689479e-06, "loss": 0.9519, "step": 147 }, { "epoch": 0.004535981365698174, "grad_norm": 3.4436202554982906, "learning_rate": 1.5117466802860062e-06, "loss": 1.0429, "step": 148 }, { "epoch": 0.004566629888439378, "grad_norm": 3.4715026779583393, "learning_rate": 1.5219611848825334e-06, "loss": 1.0815, "step": 149 }, { "epoch": 0.004597278411180581, "grad_norm": 2.170857072885516, "learning_rate": 1.5321756894790604e-06, "loss": 0.6939, "step": 150 }, { "epoch": 0.004627926933921785, "grad_norm": 2.212521131912681, "learning_rate": 1.5423901940755876e-06, "loss": 0.6511, "step": 151 }, { "epoch": 0.0046585754566629885, "grad_norm": 4.466721883647417, "learning_rate": 1.5526046986721146e-06, "loss": 0.9391, "step": 152 }, { "epoch": 0.0046892239794041925, "grad_norm": 2.134647841855301, "learning_rate": 1.5628192032686416e-06, "loss": 0.6774, "step": 153 }, { "epoch": 0.004719872502145396, "grad_norm": 2.115748716997937, "learning_rate": 1.5730337078651686e-06, "loss": 0.658, "step": 154 }, { "epoch": 0.0047505210248866, "grad_norm": 2.0260612108613874, "learning_rate": 1.5832482124616958e-06, "loss": 0.6654, "step": 155 }, { "epoch": 0.004781169547627804, "grad_norm": 4.272038030535424, "learning_rate": 1.593462717058223e-06, "loss": 0.8623, "step": 156 }, { "epoch": 0.004811818070369008, "grad_norm": 3.343379292852195, "learning_rate": 1.60367722165475e-06, "loss": 0.9184, "step": 157 }, { "epoch": 0.004842466593110212, "grad_norm": 3.491992769755343, "learning_rate": 1.6138917262512767e-06, "loss": 0.9747, "step": 158 }, { "epoch": 0.004873115115851416, "grad_norm": 4.724495102420808, "learning_rate": 1.624106230847804e-06, "loss": 1.0154, "step": 159 }, { "epoch": 0.00490376363859262, "grad_norm": 3.3513672103385725, "learning_rate": 1.6343207354443311e-06, "loss": 0.9418, "step": 160 }, { "epoch": 0.004934412161333824, "grad_norm": 3.824925191693266, "learning_rate": 1.6445352400408581e-06, "loss": 0.9909, "step": 161 }, { "epoch": 0.004965060684075028, "grad_norm": 3.3744440391641835, "learning_rate": 1.6547497446373853e-06, "loss": 0.9643, "step": 162 }, { "epoch": 0.004995709206816232, "grad_norm": 3.4173097168613684, "learning_rate": 1.6649642492339123e-06, "loss": 0.912, "step": 163 }, { "epoch": 0.005026357729557436, "grad_norm": 3.019044387227756, "learning_rate": 1.6751787538304393e-06, "loss": 0.8663, "step": 164 }, { "epoch": 0.00505700625229864, "grad_norm": 3.1455003067520955, "learning_rate": 1.6853932584269663e-06, "loss": 0.8966, "step": 165 }, { "epoch": 0.005087654775039843, "grad_norm": 3.108785130598784, "learning_rate": 1.6956077630234935e-06, "loss": 0.957, "step": 166 }, { "epoch": 0.005118303297781047, "grad_norm": 3.5075835728701654, "learning_rate": 1.7058222676200205e-06, "loss": 0.9482, "step": 167 }, { "epoch": 0.0051489518205222505, "grad_norm": 2.9487671285535444, "learning_rate": 1.7160367722165477e-06, "loss": 1.001, "step": 168 }, { "epoch": 0.0051796003432634545, "grad_norm": 2.9554458147756106, "learning_rate": 1.7262512768130745e-06, "loss": 1.0078, "step": 169 }, { "epoch": 0.005210248866004658, "grad_norm": 4.155998320226826, "learning_rate": 1.7364657814096017e-06, "loss": 0.9881, "step": 170 }, { "epoch": 0.005240897388745862, "grad_norm": 4.07957118138219, "learning_rate": 1.746680286006129e-06, "loss": 0.8711, "step": 171 }, { "epoch": 0.005271545911487066, "grad_norm": 3.617947824051078, "learning_rate": 1.756894790602656e-06, "loss": 0.8177, "step": 172 }, { "epoch": 0.00530219443422827, "grad_norm": 3.6989039342917334, "learning_rate": 1.7671092951991831e-06, "loss": 0.9532, "step": 173 }, { "epoch": 0.005332842956969474, "grad_norm": 3.473308204810775, "learning_rate": 1.7773237997957101e-06, "loss": 0.9801, "step": 174 }, { "epoch": 0.005363491479710678, "grad_norm": 1.70651540295496, "learning_rate": 1.787538304392237e-06, "loss": 0.6389, "step": 175 }, { "epoch": 0.005394140002451882, "grad_norm": 3.1052002028436134, "learning_rate": 1.797752808988764e-06, "loss": 0.9566, "step": 176 }, { "epoch": 0.005424788525193086, "grad_norm": 3.2451167116582544, "learning_rate": 1.8079673135852913e-06, "loss": 0.8613, "step": 177 }, { "epoch": 0.00545543704793429, "grad_norm": 3.2279097139257, "learning_rate": 1.8181818181818183e-06, "loss": 0.958, "step": 178 }, { "epoch": 0.005486085570675494, "grad_norm": 3.577130990597013, "learning_rate": 1.8283963227783455e-06, "loss": 1.0718, "step": 179 }, { "epoch": 0.005516734093416698, "grad_norm": 2.8163857524374936, "learning_rate": 1.8386108273748723e-06, "loss": 0.8969, "step": 180 }, { "epoch": 0.005547382616157902, "grad_norm": 3.396610291986765, "learning_rate": 1.8488253319713995e-06, "loss": 0.8887, "step": 181 }, { "epoch": 0.005578031138899105, "grad_norm": 3.5220756541384075, "learning_rate": 1.8590398365679265e-06, "loss": 0.9661, "step": 182 }, { "epoch": 0.005608679661640309, "grad_norm": 1.8072339180147845, "learning_rate": 1.8692543411644537e-06, "loss": 0.6554, "step": 183 }, { "epoch": 0.0056393281843815125, "grad_norm": 1.7086600964575829, "learning_rate": 1.8794688457609809e-06, "loss": 0.6525, "step": 184 }, { "epoch": 0.0056699767071227165, "grad_norm": 3.8021707259666675, "learning_rate": 1.8896833503575079e-06, "loss": 1.0149, "step": 185 }, { "epoch": 0.00570062522986392, "grad_norm": 3.0824341123917347, "learning_rate": 1.8998978549540349e-06, "loss": 0.9505, "step": 186 }, { "epoch": 0.005731273752605124, "grad_norm": 3.2195912419120627, "learning_rate": 1.910112359550562e-06, "loss": 0.9943, "step": 187 }, { "epoch": 0.005761922275346328, "grad_norm": 1.584748377553122, "learning_rate": 1.920326864147089e-06, "loss": 0.6274, "step": 188 }, { "epoch": 0.005792570798087532, "grad_norm": 3.402958847184411, "learning_rate": 1.9305413687436163e-06, "loss": 0.9569, "step": 189 }, { "epoch": 0.005823219320828736, "grad_norm": 2.945358184897761, "learning_rate": 1.940755873340143e-06, "loss": 1.051, "step": 190 }, { "epoch": 0.00585386784356994, "grad_norm": 2.8915254148956167, "learning_rate": 1.9509703779366703e-06, "loss": 0.9587, "step": 191 }, { "epoch": 0.005884516366311144, "grad_norm": 3.8891486234004753, "learning_rate": 1.961184882533197e-06, "loss": 0.8889, "step": 192 }, { "epoch": 0.005915164889052348, "grad_norm": 3.895325232768456, "learning_rate": 1.9713993871297242e-06, "loss": 0.9869, "step": 193 }, { "epoch": 0.005945813411793552, "grad_norm": 3.354893261249651, "learning_rate": 1.9816138917262514e-06, "loss": 0.9624, "step": 194 }, { "epoch": 0.005976461934534756, "grad_norm": 3.628338176896766, "learning_rate": 1.9918283963227787e-06, "loss": 0.9495, "step": 195 }, { "epoch": 0.00600711045727596, "grad_norm": 3.5323553071453873, "learning_rate": 2.002042900919306e-06, "loss": 0.8969, "step": 196 }, { "epoch": 0.006037758980017163, "grad_norm": 3.031344466866221, "learning_rate": 2.0122574055158326e-06, "loss": 0.8913, "step": 197 }, { "epoch": 0.006068407502758367, "grad_norm": 4.11802180589706, "learning_rate": 2.02247191011236e-06, "loss": 0.9613, "step": 198 }, { "epoch": 0.006099056025499571, "grad_norm": 3.492610491947565, "learning_rate": 2.0326864147088866e-06, "loss": 1.023, "step": 199 }, { "epoch": 0.0061297045482407745, "grad_norm": 4.6942275026520806, "learning_rate": 2.042900919305414e-06, "loss": 1.1647, "step": 200 }, { "epoch": 0.0061603530709819785, "grad_norm": 3.4565299014965656, "learning_rate": 2.053115423901941e-06, "loss": 0.8833, "step": 201 }, { "epoch": 0.006191001593723182, "grad_norm": 3.644230511302545, "learning_rate": 2.063329928498468e-06, "loss": 0.9712, "step": 202 }, { "epoch": 0.006221650116464386, "grad_norm": 1.6968571303555406, "learning_rate": 2.073544433094995e-06, "loss": 0.6351, "step": 203 }, { "epoch": 0.00625229863920559, "grad_norm": 3.1848286499077387, "learning_rate": 2.0837589376915222e-06, "loss": 0.8456, "step": 204 }, { "epoch": 0.006282947161946794, "grad_norm": 3.78546693636232, "learning_rate": 2.0939734422880494e-06, "loss": 0.9182, "step": 205 }, { "epoch": 0.006313595684687998, "grad_norm": 3.175916076681233, "learning_rate": 2.104187946884576e-06, "loss": 0.9616, "step": 206 }, { "epoch": 0.006344244207429202, "grad_norm": 2.7917080235606226, "learning_rate": 2.1144024514811034e-06, "loss": 0.9161, "step": 207 }, { "epoch": 0.006374892730170406, "grad_norm": 1.5695735120259882, "learning_rate": 2.12461695607763e-06, "loss": 0.6476, "step": 208 }, { "epoch": 0.00640554125291161, "grad_norm": 3.7340818230175086, "learning_rate": 2.1348314606741574e-06, "loss": 1.0016, "step": 209 }, { "epoch": 0.006436189775652814, "grad_norm": 3.5509391533608863, "learning_rate": 2.1450459652706846e-06, "loss": 0.9495, "step": 210 }, { "epoch": 0.006466838298394018, "grad_norm": 3.4104898301445354, "learning_rate": 2.155260469867212e-06, "loss": 0.9637, "step": 211 }, { "epoch": 0.006497486821135222, "grad_norm": 3.4367476129750654, "learning_rate": 2.1654749744637386e-06, "loss": 0.9581, "step": 212 }, { "epoch": 0.006528135343876425, "grad_norm": 3.4871202243056563, "learning_rate": 2.175689479060266e-06, "loss": 0.9322, "step": 213 }, { "epoch": 0.006558783866617629, "grad_norm": 3.1154715314840016, "learning_rate": 2.1859039836567926e-06, "loss": 0.9855, "step": 214 }, { "epoch": 0.006589432389358833, "grad_norm": 3.3157629195787273, "learning_rate": 2.1961184882533198e-06, "loss": 0.8119, "step": 215 }, { "epoch": 0.0066200809121000365, "grad_norm": 2.912465812604796, "learning_rate": 2.206332992849847e-06, "loss": 0.8865, "step": 216 }, { "epoch": 0.0066507294348412405, "grad_norm": 3.0255597024987932, "learning_rate": 2.216547497446374e-06, "loss": 0.9906, "step": 217 }, { "epoch": 0.006681377957582444, "grad_norm": 3.7896284807404204, "learning_rate": 2.2267620020429014e-06, "loss": 0.9427, "step": 218 }, { "epoch": 0.006712026480323648, "grad_norm": 1.5126739039127572, "learning_rate": 2.236976506639428e-06, "loss": 0.6023, "step": 219 }, { "epoch": 0.006742675003064852, "grad_norm": 1.4478259315800626, "learning_rate": 2.2471910112359554e-06, "loss": 0.6374, "step": 220 }, { "epoch": 0.006773323525806056, "grad_norm": 3.128091088580022, "learning_rate": 2.257405515832482e-06, "loss": 0.9313, "step": 221 }, { "epoch": 0.00680397204854726, "grad_norm": 1.570123120001384, "learning_rate": 2.2676200204290094e-06, "loss": 0.6458, "step": 222 }, { "epoch": 0.006834620571288464, "grad_norm": 4.4883386692537774, "learning_rate": 2.2778345250255366e-06, "loss": 0.9949, "step": 223 }, { "epoch": 0.006865269094029668, "grad_norm": 3.5275024342403483, "learning_rate": 2.2880490296220638e-06, "loss": 0.9457, "step": 224 }, { "epoch": 0.006895917616770872, "grad_norm": 3.838269511990043, "learning_rate": 2.2982635342185906e-06, "loss": 0.9286, "step": 225 }, { "epoch": 0.006926566139512076, "grad_norm": 2.969449117064357, "learning_rate": 2.3084780388151178e-06, "loss": 0.8549, "step": 226 }, { "epoch": 0.00695721466225328, "grad_norm": 1.3529663649608028, "learning_rate": 2.3186925434116445e-06, "loss": 0.6271, "step": 227 }, { "epoch": 0.006987863184994484, "grad_norm": 4.831412189045911, "learning_rate": 2.3289070480081717e-06, "loss": 0.9244, "step": 228 }, { "epoch": 0.007018511707735687, "grad_norm": 3.4068773358338404, "learning_rate": 2.339121552604699e-06, "loss": 1.0225, "step": 229 }, { "epoch": 0.007049160230476891, "grad_norm": 3.1162020144088878, "learning_rate": 2.3493360572012257e-06, "loss": 0.953, "step": 230 }, { "epoch": 0.007079808753218095, "grad_norm": 1.31876673780315, "learning_rate": 2.359550561797753e-06, "loss": 0.623, "step": 231 }, { "epoch": 0.0071104572759592985, "grad_norm": 3.963931707540173, "learning_rate": 2.36976506639428e-06, "loss": 0.9648, "step": 232 }, { "epoch": 0.0071411057987005025, "grad_norm": 3.2611983131442823, "learning_rate": 2.3799795709908073e-06, "loss": 0.7852, "step": 233 }, { "epoch": 0.007171754321441706, "grad_norm": 3.1143191308964107, "learning_rate": 2.390194075587334e-06, "loss": 0.9833, "step": 234 }, { "epoch": 0.00720240284418291, "grad_norm": 3.563077023898005, "learning_rate": 2.4004085801838613e-06, "loss": 0.8967, "step": 235 }, { "epoch": 0.007233051366924114, "grad_norm": 2.9768192555020683, "learning_rate": 2.410623084780388e-06, "loss": 0.8465, "step": 236 }, { "epoch": 0.007263699889665318, "grad_norm": 3.3347625979665017, "learning_rate": 2.4208375893769153e-06, "loss": 0.976, "step": 237 }, { "epoch": 0.007294348412406522, "grad_norm": 2.7833419732233824, "learning_rate": 2.4310520939734425e-06, "loss": 0.9077, "step": 238 }, { "epoch": 0.007324996935147726, "grad_norm": 3.668609639664664, "learning_rate": 2.4412665985699697e-06, "loss": 0.9406, "step": 239 }, { "epoch": 0.00735564545788893, "grad_norm": 3.8137923072932165, "learning_rate": 2.451481103166497e-06, "loss": 0.8719, "step": 240 }, { "epoch": 0.007386293980630134, "grad_norm": 2.947444139435356, "learning_rate": 2.4616956077630237e-06, "loss": 0.932, "step": 241 }, { "epoch": 0.007416942503371338, "grad_norm": 3.7768728770534783, "learning_rate": 2.4719101123595505e-06, "loss": 0.8959, "step": 242 }, { "epoch": 0.007447591026112542, "grad_norm": 3.284586616508773, "learning_rate": 2.4821246169560777e-06, "loss": 0.8629, "step": 243 }, { "epoch": 0.007478239548853746, "grad_norm": 3.3649909671723415, "learning_rate": 2.492339121552605e-06, "loss": 0.8381, "step": 244 }, { "epoch": 0.007508888071594949, "grad_norm": 3.496289795428584, "learning_rate": 2.5025536261491317e-06, "loss": 0.9189, "step": 245 }, { "epoch": 0.007539536594336153, "grad_norm": 3.0710547598716618, "learning_rate": 2.5127681307456593e-06, "loss": 0.8905, "step": 246 }, { "epoch": 0.007570185117077357, "grad_norm": 3.838751065413459, "learning_rate": 2.522982635342186e-06, "loss": 0.8679, "step": 247 }, { "epoch": 0.0076008336398185605, "grad_norm": 3.062461013469598, "learning_rate": 2.5331971399387133e-06, "loss": 0.9768, "step": 248 }, { "epoch": 0.0076314821625597645, "grad_norm": 3.3316521886569803, "learning_rate": 2.54341164453524e-06, "loss": 0.8299, "step": 249 }, { "epoch": 0.007662130685300968, "grad_norm": 3.0248590818593146, "learning_rate": 2.5536261491317673e-06, "loss": 0.8794, "step": 250 }, { "epoch": 0.007692779208042172, "grad_norm": 3.456784157041569, "learning_rate": 2.5638406537282945e-06, "loss": 0.8729, "step": 251 }, { "epoch": 0.007723427730783376, "grad_norm": 2.761647645926438, "learning_rate": 2.5740551583248213e-06, "loss": 0.9185, "step": 252 }, { "epoch": 0.00775407625352458, "grad_norm": 2.950582587174075, "learning_rate": 2.584269662921349e-06, "loss": 0.9503, "step": 253 }, { "epoch": 0.007784724776265784, "grad_norm": 3.4234213708522345, "learning_rate": 2.5944841675178757e-06, "loss": 0.9499, "step": 254 }, { "epoch": 0.007815373299006988, "grad_norm": 3.368769285818423, "learning_rate": 2.6046986721144025e-06, "loss": 0.8869, "step": 255 }, { "epoch": 0.007846021821748192, "grad_norm": 3.956938858049432, "learning_rate": 2.6149131767109297e-06, "loss": 0.8755, "step": 256 }, { "epoch": 0.007876670344489396, "grad_norm": 3.3683244053666113, "learning_rate": 2.6251276813074565e-06, "loss": 0.9338, "step": 257 }, { "epoch": 0.0079073188672306, "grad_norm": 3.1117512713898416, "learning_rate": 2.635342185903984e-06, "loss": 0.8871, "step": 258 }, { "epoch": 0.007937967389971804, "grad_norm": 3.0936219587603393, "learning_rate": 2.645556690500511e-06, "loss": 0.8698, "step": 259 }, { "epoch": 0.007968615912713008, "grad_norm": 3.3080286477749463, "learning_rate": 2.6557711950970376e-06, "loss": 0.9051, "step": 260 }, { "epoch": 0.007999264435454212, "grad_norm": 1.325321405348781, "learning_rate": 2.6659856996935653e-06, "loss": 0.6003, "step": 261 }, { "epoch": 0.008029912958195416, "grad_norm": 3.0774188858172233, "learning_rate": 2.676200204290092e-06, "loss": 0.8745, "step": 262 }, { "epoch": 0.00806056148093662, "grad_norm": 2.9238034143692473, "learning_rate": 2.6864147088866193e-06, "loss": 0.9636, "step": 263 }, { "epoch": 0.008091210003677823, "grad_norm": 1.3877663216331586, "learning_rate": 2.696629213483146e-06, "loss": 0.6274, "step": 264 }, { "epoch": 0.008121858526419027, "grad_norm": 3.5530050725438946, "learning_rate": 2.7068437180796737e-06, "loss": 0.9121, "step": 265 }, { "epoch": 0.008152507049160231, "grad_norm": 2.9744806372221286, "learning_rate": 2.7170582226762004e-06, "loss": 0.8906, "step": 266 }, { "epoch": 0.008183155571901435, "grad_norm": 2.9868029297560006, "learning_rate": 2.7272727272727272e-06, "loss": 0.9627, "step": 267 }, { "epoch": 0.008213804094642637, "grad_norm": 1.2133921082687236, "learning_rate": 2.737487231869255e-06, "loss": 0.5921, "step": 268 }, { "epoch": 0.008244452617383841, "grad_norm": 3.408567737654011, "learning_rate": 2.7477017364657816e-06, "loss": 1.027, "step": 269 }, { "epoch": 0.008275101140125045, "grad_norm": 3.136925372366743, "learning_rate": 2.757916241062309e-06, "loss": 0.9307, "step": 270 }, { "epoch": 0.00830574966286625, "grad_norm": 1.202703434706634, "learning_rate": 2.7681307456588356e-06, "loss": 0.6134, "step": 271 }, { "epoch": 0.008336398185607453, "grad_norm": 1.4491471908613063, "learning_rate": 2.7783452502553624e-06, "loss": 0.6243, "step": 272 }, { "epoch": 0.008367046708348657, "grad_norm": 3.644689750424839, "learning_rate": 2.78855975485189e-06, "loss": 0.9169, "step": 273 }, { "epoch": 0.008397695231089861, "grad_norm": 3.124196865258082, "learning_rate": 2.798774259448417e-06, "loss": 0.8765, "step": 274 }, { "epoch": 0.008428343753831065, "grad_norm": 2.980329100992928, "learning_rate": 2.8089887640449444e-06, "loss": 0.8624, "step": 275 }, { "epoch": 0.008458992276572269, "grad_norm": 1.254692338126859, "learning_rate": 2.8192032686414712e-06, "loss": 0.6215, "step": 276 }, { "epoch": 0.008489640799313473, "grad_norm": 3.432027650921869, "learning_rate": 2.829417773237998e-06, "loss": 0.9062, "step": 277 }, { "epoch": 0.008520289322054677, "grad_norm": 3.4231401879558176, "learning_rate": 2.839632277834525e-06, "loss": 0.969, "step": 278 }, { "epoch": 0.00855093784479588, "grad_norm": 1.196910987376432, "learning_rate": 2.849846782431052e-06, "loss": 0.6257, "step": 279 }, { "epoch": 0.008581586367537085, "grad_norm": 2.905759025137128, "learning_rate": 2.8600612870275796e-06, "loss": 0.876, "step": 280 }, { "epoch": 0.008612234890278288, "grad_norm": 3.1684751541148772, "learning_rate": 2.8702757916241064e-06, "loss": 0.8966, "step": 281 }, { "epoch": 0.008642883413019492, "grad_norm": 3.2479875806829988, "learning_rate": 2.8804902962206336e-06, "loss": 0.9053, "step": 282 }, { "epoch": 0.008673531935760696, "grad_norm": 3.861480654797683, "learning_rate": 2.890704800817161e-06, "loss": 1.0196, "step": 283 }, { "epoch": 0.0087041804585019, "grad_norm": 2.6276883618947537, "learning_rate": 2.9009193054136876e-06, "loss": 0.9337, "step": 284 }, { "epoch": 0.008734828981243104, "grad_norm": 3.1526301701410064, "learning_rate": 2.911133810010215e-06, "loss": 0.9101, "step": 285 }, { "epoch": 0.008765477503984308, "grad_norm": 3.2400978103925566, "learning_rate": 2.9213483146067416e-06, "loss": 0.8961, "step": 286 }, { "epoch": 0.008796126026725512, "grad_norm": 3.0005165824272835, "learning_rate": 2.931562819203269e-06, "loss": 0.9566, "step": 287 }, { "epoch": 0.008826774549466716, "grad_norm": 2.9487717797007957, "learning_rate": 2.941777323799796e-06, "loss": 0.9033, "step": 288 }, { "epoch": 0.00885742307220792, "grad_norm": 3.730179151234975, "learning_rate": 2.9519918283963228e-06, "loss": 0.8556, "step": 289 }, { "epoch": 0.008888071594949124, "grad_norm": 3.7700974050309193, "learning_rate": 2.9622063329928504e-06, "loss": 0.8963, "step": 290 }, { "epoch": 0.008918720117690328, "grad_norm": 3.169834681090144, "learning_rate": 2.972420837589377e-06, "loss": 0.8297, "step": 291 }, { "epoch": 0.008949368640431532, "grad_norm": 3.1715819801310867, "learning_rate": 2.9826353421859044e-06, "loss": 0.781, "step": 292 }, { "epoch": 0.008980017163172736, "grad_norm": 1.196888349087265, "learning_rate": 2.992849846782431e-06, "loss": 0.6063, "step": 293 }, { "epoch": 0.00901066568591394, "grad_norm": 3.2023828031615325, "learning_rate": 3.003064351378958e-06, "loss": 0.9106, "step": 294 }, { "epoch": 0.009041314208655143, "grad_norm": 3.206293503182386, "learning_rate": 3.0132788559754856e-06, "loss": 0.9296, "step": 295 }, { "epoch": 0.009071962731396347, "grad_norm": 2.720214226482726, "learning_rate": 3.0234933605720124e-06, "loss": 0.8801, "step": 296 }, { "epoch": 0.009102611254137551, "grad_norm": 1.256799602771568, "learning_rate": 3.03370786516854e-06, "loss": 0.6041, "step": 297 }, { "epoch": 0.009133259776878755, "grad_norm": 1.1542869945939802, "learning_rate": 3.0439223697650668e-06, "loss": 0.5804, "step": 298 }, { "epoch": 0.00916390829961996, "grad_norm": 3.3196795926538893, "learning_rate": 3.0541368743615935e-06, "loss": 0.9078, "step": 299 }, { "epoch": 0.009194556822361161, "grad_norm": 3.285544830679664, "learning_rate": 3.0643513789581207e-06, "loss": 0.7894, "step": 300 }, { "epoch": 0.009225205345102365, "grad_norm": 3.2963910390492788, "learning_rate": 3.0745658835546475e-06, "loss": 0.8708, "step": 301 }, { "epoch": 0.00925585386784357, "grad_norm": 2.9521139312987987, "learning_rate": 3.084780388151175e-06, "loss": 0.868, "step": 302 }, { "epoch": 0.009286502390584773, "grad_norm": 1.145885534241748, "learning_rate": 3.094994892747702e-06, "loss": 0.6004, "step": 303 }, { "epoch": 0.009317150913325977, "grad_norm": 3.4916511900121714, "learning_rate": 3.105209397344229e-06, "loss": 0.8709, "step": 304 }, { "epoch": 0.009347799436067181, "grad_norm": 3.530825277936294, "learning_rate": 3.1154239019407563e-06, "loss": 0.8898, "step": 305 }, { "epoch": 0.009378447958808385, "grad_norm": 3.1063487790596156, "learning_rate": 3.125638406537283e-06, "loss": 0.7664, "step": 306 }, { "epoch": 0.009409096481549589, "grad_norm": 3.0430864825715553, "learning_rate": 3.1358529111338103e-06, "loss": 0.9034, "step": 307 }, { "epoch": 0.009439745004290793, "grad_norm": 2.9103349357785797, "learning_rate": 3.146067415730337e-06, "loss": 0.9009, "step": 308 }, { "epoch": 0.009470393527031997, "grad_norm": 3.4237820164263493, "learning_rate": 3.1562819203268647e-06, "loss": 1.0147, "step": 309 }, { "epoch": 0.0095010420497732, "grad_norm": 1.1021551560504406, "learning_rate": 3.1664964249233915e-06, "loss": 0.5914, "step": 310 }, { "epoch": 0.009531690572514405, "grad_norm": 3.323931624517119, "learning_rate": 3.1767109295199183e-06, "loss": 0.8696, "step": 311 }, { "epoch": 0.009562339095255609, "grad_norm": 3.492881975811172, "learning_rate": 3.186925434116446e-06, "loss": 0.8417, "step": 312 }, { "epoch": 0.009592987617996812, "grad_norm": 3.4256018723459283, "learning_rate": 3.1971399387129727e-06, "loss": 1.016, "step": 313 }, { "epoch": 0.009623636140738016, "grad_norm": 1.174838930809781, "learning_rate": 3.2073544433095e-06, "loss": 0.6117, "step": 314 }, { "epoch": 0.00965428466347922, "grad_norm": 3.9507426029881403, "learning_rate": 3.2175689479060267e-06, "loss": 0.9215, "step": 315 }, { "epoch": 0.009684933186220424, "grad_norm": 4.443852053675296, "learning_rate": 3.2277834525025535e-06, "loss": 0.7801, "step": 316 }, { "epoch": 0.009715581708961628, "grad_norm": 3.548063932940883, "learning_rate": 3.237997957099081e-06, "loss": 1.0531, "step": 317 }, { "epoch": 0.009746230231702832, "grad_norm": 3.4526896883745177, "learning_rate": 3.248212461695608e-06, "loss": 0.9079, "step": 318 }, { "epoch": 0.009776878754444036, "grad_norm": 3.203770670419235, "learning_rate": 3.258426966292135e-06, "loss": 0.8072, "step": 319 }, { "epoch": 0.00980752727718524, "grad_norm": 3.123528345930958, "learning_rate": 3.2686414708886623e-06, "loss": 0.7823, "step": 320 }, { "epoch": 0.009838175799926444, "grad_norm": 3.762634285651869, "learning_rate": 3.278855975485189e-06, "loss": 0.9414, "step": 321 }, { "epoch": 0.009868824322667648, "grad_norm": 3.6338575050821036, "learning_rate": 3.2890704800817163e-06, "loss": 0.8684, "step": 322 }, { "epoch": 0.009899472845408852, "grad_norm": 3.28653890140348, "learning_rate": 3.299284984678243e-06, "loss": 0.893, "step": 323 }, { "epoch": 0.009930121368150056, "grad_norm": 2.7837310717604447, "learning_rate": 3.3094994892747707e-06, "loss": 0.7737, "step": 324 }, { "epoch": 0.00996076989089126, "grad_norm": 3.3372920338551104, "learning_rate": 3.3197139938712975e-06, "loss": 0.9097, "step": 325 }, { "epoch": 0.009991418413632464, "grad_norm": 3.2469963777045976, "learning_rate": 3.3299284984678247e-06, "loss": 0.8756, "step": 326 }, { "epoch": 0.010022066936373667, "grad_norm": 3.2128483631098845, "learning_rate": 3.340143003064352e-06, "loss": 0.9081, "step": 327 }, { "epoch": 0.010052715459114871, "grad_norm": 3.8513228597554354, "learning_rate": 3.3503575076608787e-06, "loss": 0.9289, "step": 328 }, { "epoch": 0.010083363981856075, "grad_norm": 3.088653553752994, "learning_rate": 3.360572012257406e-06, "loss": 0.7953, "step": 329 }, { "epoch": 0.01011401250459728, "grad_norm": 2.967548543873015, "learning_rate": 3.3707865168539327e-06, "loss": 0.8443, "step": 330 }, { "epoch": 0.010144661027338481, "grad_norm": 2.9524092822709673, "learning_rate": 3.3810010214504603e-06, "loss": 0.8719, "step": 331 }, { "epoch": 0.010175309550079685, "grad_norm": 3.4395108755500416, "learning_rate": 3.391215526046987e-06, "loss": 0.9401, "step": 332 }, { "epoch": 0.01020595807282089, "grad_norm": 2.794432689185305, "learning_rate": 3.401430030643514e-06, "loss": 0.8456, "step": 333 }, { "epoch": 0.010236606595562093, "grad_norm": 3.1290674602092174, "learning_rate": 3.411644535240041e-06, "loss": 0.9206, "step": 334 }, { "epoch": 0.010267255118303297, "grad_norm": 2.8996280117902753, "learning_rate": 3.4218590398365683e-06, "loss": 0.8386, "step": 335 }, { "epoch": 0.010297903641044501, "grad_norm": 2.9679830803308827, "learning_rate": 3.4320735444330955e-06, "loss": 0.8024, "step": 336 }, { "epoch": 0.010328552163785705, "grad_norm": 3.2427858686387094, "learning_rate": 3.4422880490296222e-06, "loss": 0.9389, "step": 337 }, { "epoch": 0.010359200686526909, "grad_norm": 3.62830992137235, "learning_rate": 3.452502553626149e-06, "loss": 0.8063, "step": 338 }, { "epoch": 0.010389849209268113, "grad_norm": 3.6016582424842363, "learning_rate": 3.4627170582226766e-06, "loss": 0.8186, "step": 339 }, { "epoch": 0.010420497732009317, "grad_norm": 2.8348699545735205, "learning_rate": 3.4729315628192034e-06, "loss": 0.8903, "step": 340 }, { "epoch": 0.01045114625475052, "grad_norm": 2.923216936924085, "learning_rate": 3.4831460674157306e-06, "loss": 0.9081, "step": 341 }, { "epoch": 0.010481794777491725, "grad_norm": 3.148903452330976, "learning_rate": 3.493360572012258e-06, "loss": 0.8502, "step": 342 }, { "epoch": 0.010512443300232929, "grad_norm": 3.9910381398825026, "learning_rate": 3.5035750766087846e-06, "loss": 0.954, "step": 343 }, { "epoch": 0.010543091822974133, "grad_norm": 3.084968035257475, "learning_rate": 3.513789581205312e-06, "loss": 0.9003, "step": 344 }, { "epoch": 0.010573740345715336, "grad_norm": 2.845978021479907, "learning_rate": 3.5240040858018386e-06, "loss": 0.9905, "step": 345 }, { "epoch": 0.01060438886845654, "grad_norm": 1.209140998227631, "learning_rate": 3.5342185903983662e-06, "loss": 0.611, "step": 346 }, { "epoch": 0.010635037391197744, "grad_norm": 2.887730978389089, "learning_rate": 3.544433094994893e-06, "loss": 0.8483, "step": 347 }, { "epoch": 0.010665685913938948, "grad_norm": 3.2585524233911145, "learning_rate": 3.5546475995914202e-06, "loss": 0.892, "step": 348 }, { "epoch": 0.010696334436680152, "grad_norm": 3.6005153222646533, "learning_rate": 3.564862104187947e-06, "loss": 0.92, "step": 349 }, { "epoch": 0.010726982959421356, "grad_norm": 3.497421380095859, "learning_rate": 3.575076608784474e-06, "loss": 0.8971, "step": 350 }, { "epoch": 0.01075763148216256, "grad_norm": 2.820905183354147, "learning_rate": 3.5852911133810014e-06, "loss": 0.8145, "step": 351 }, { "epoch": 0.010788280004903764, "grad_norm": 3.2562703727312052, "learning_rate": 3.595505617977528e-06, "loss": 0.9036, "step": 352 }, { "epoch": 0.010818928527644968, "grad_norm": 2.820818421620116, "learning_rate": 3.605720122574056e-06, "loss": 0.8782, "step": 353 }, { "epoch": 0.010849577050386172, "grad_norm": 3.054396546470862, "learning_rate": 3.6159346271705826e-06, "loss": 0.9613, "step": 354 }, { "epoch": 0.010880225573127376, "grad_norm": 2.8020513916779604, "learning_rate": 3.6261491317671094e-06, "loss": 0.8948, "step": 355 }, { "epoch": 0.01091087409586858, "grad_norm": 3.1691640543283333, "learning_rate": 3.6363636363636366e-06, "loss": 0.8595, "step": 356 }, { "epoch": 0.010941522618609784, "grad_norm": 2.755821426341309, "learning_rate": 3.646578140960164e-06, "loss": 0.9276, "step": 357 }, { "epoch": 0.010972171141350988, "grad_norm": 3.0467755189675088, "learning_rate": 3.656792645556691e-06, "loss": 0.9529, "step": 358 }, { "epoch": 0.011002819664092191, "grad_norm": 2.8463240178459355, "learning_rate": 3.6670071501532178e-06, "loss": 0.8382, "step": 359 }, { "epoch": 0.011033468186833395, "grad_norm": 2.9422459128005247, "learning_rate": 3.6772216547497446e-06, "loss": 0.8948, "step": 360 }, { "epoch": 0.0110641167095746, "grad_norm": 3.1603288049184464, "learning_rate": 3.687436159346272e-06, "loss": 0.8442, "step": 361 }, { "epoch": 0.011094765232315803, "grad_norm": 3.164658989542033, "learning_rate": 3.697650663942799e-06, "loss": 0.89, "step": 362 }, { "epoch": 0.011125413755057005, "grad_norm": 1.1409301753838441, "learning_rate": 3.707865168539326e-06, "loss": 0.5911, "step": 363 }, { "epoch": 0.01115606227779821, "grad_norm": 2.7879491279335746, "learning_rate": 3.718079673135853e-06, "loss": 0.8923, "step": 364 }, { "epoch": 0.011186710800539413, "grad_norm": 2.9077301280810604, "learning_rate": 3.7282941777323806e-06, "loss": 0.8232, "step": 365 }, { "epoch": 0.011217359323280617, "grad_norm": 1.1461361593937374, "learning_rate": 3.7385086823289074e-06, "loss": 0.5908, "step": 366 }, { "epoch": 0.011248007846021821, "grad_norm": 2.8411152333722143, "learning_rate": 3.748723186925434e-06, "loss": 0.8451, "step": 367 }, { "epoch": 0.011278656368763025, "grad_norm": 1.1012079176578276, "learning_rate": 3.7589376915219618e-06, "loss": 0.5916, "step": 368 }, { "epoch": 0.011309304891504229, "grad_norm": 3.240576902151979, "learning_rate": 3.7691521961184886e-06, "loss": 0.9554, "step": 369 }, { "epoch": 0.011339953414245433, "grad_norm": 3.242171668872021, "learning_rate": 3.7793667007150158e-06, "loss": 0.8115, "step": 370 }, { "epoch": 0.011370601936986637, "grad_norm": 1.097455905226801, "learning_rate": 3.7895812053115425e-06, "loss": 0.617, "step": 371 }, { "epoch": 0.01140125045972784, "grad_norm": 3.06185297784939, "learning_rate": 3.7997957099080697e-06, "loss": 0.8526, "step": 372 }, { "epoch": 0.011431898982469045, "grad_norm": 2.6647185164077887, "learning_rate": 3.810010214504597e-06, "loss": 0.8372, "step": 373 }, { "epoch": 0.011462547505210249, "grad_norm": 2.7828450164086127, "learning_rate": 3.820224719101124e-06, "loss": 0.8637, "step": 374 }, { "epoch": 0.011493196027951453, "grad_norm": 2.907795184765188, "learning_rate": 3.830439223697651e-06, "loss": 0.8525, "step": 375 }, { "epoch": 0.011523844550692657, "grad_norm": 1.1270691778333481, "learning_rate": 3.840653728294178e-06, "loss": 0.571, "step": 376 }, { "epoch": 0.01155449307343386, "grad_norm": 2.954371068526859, "learning_rate": 3.850868232890705e-06, "loss": 0.8246, "step": 377 }, { "epoch": 0.011585141596175064, "grad_norm": 1.1165992979946426, "learning_rate": 3.8610827374872325e-06, "loss": 0.5577, "step": 378 }, { "epoch": 0.011615790118916268, "grad_norm": 1.1587227705176206, "learning_rate": 3.871297242083759e-06, "loss": 0.6087, "step": 379 }, { "epoch": 0.011646438641657472, "grad_norm": 3.1812256190265473, "learning_rate": 3.881511746680286e-06, "loss": 0.8291, "step": 380 }, { "epoch": 0.011677087164398676, "grad_norm": 2.8417051005728804, "learning_rate": 3.891726251276814e-06, "loss": 0.9287, "step": 381 }, { "epoch": 0.01170773568713988, "grad_norm": 3.089530794182103, "learning_rate": 3.9019407558733405e-06, "loss": 0.8419, "step": 382 }, { "epoch": 0.011738384209881084, "grad_norm": 2.9460995591158525, "learning_rate": 3.912155260469867e-06, "loss": 0.9367, "step": 383 }, { "epoch": 0.011769032732622288, "grad_norm": 3.0032635416848623, "learning_rate": 3.922369765066394e-06, "loss": 0.86, "step": 384 }, { "epoch": 0.011799681255363492, "grad_norm": 3.130418104080654, "learning_rate": 3.932584269662922e-06, "loss": 0.9164, "step": 385 }, { "epoch": 0.011830329778104696, "grad_norm": 3.2874669135575876, "learning_rate": 3.9427987742594485e-06, "loss": 0.882, "step": 386 }, { "epoch": 0.0118609783008459, "grad_norm": 2.7462663469800606, "learning_rate": 3.953013278855976e-06, "loss": 0.8852, "step": 387 }, { "epoch": 0.011891626823587104, "grad_norm": 3.0404997266571034, "learning_rate": 3.963227783452503e-06, "loss": 0.9054, "step": 388 }, { "epoch": 0.011922275346328308, "grad_norm": 2.646623183660799, "learning_rate": 3.97344228804903e-06, "loss": 0.8949, "step": 389 }, { "epoch": 0.011952923869069512, "grad_norm": 2.878337501045697, "learning_rate": 3.983656792645557e-06, "loss": 0.9149, "step": 390 }, { "epoch": 0.011983572391810715, "grad_norm": 2.817348320722246, "learning_rate": 3.993871297242084e-06, "loss": 0.8229, "step": 391 }, { "epoch": 0.01201422091455192, "grad_norm": 3.1649329560698107, "learning_rate": 4.004085801838612e-06, "loss": 0.8127, "step": 392 }, { "epoch": 0.012044869437293123, "grad_norm": 3.09471142301152, "learning_rate": 4.0143003064351385e-06, "loss": 0.7908, "step": 393 }, { "epoch": 0.012075517960034325, "grad_norm": 3.097526676154771, "learning_rate": 4.024514811031665e-06, "loss": 0.8022, "step": 394 }, { "epoch": 0.01210616648277553, "grad_norm": 2.836267512199918, "learning_rate": 4.034729315628192e-06, "loss": 0.9257, "step": 395 }, { "epoch": 0.012136815005516733, "grad_norm": 2.8740920124367495, "learning_rate": 4.04494382022472e-06, "loss": 0.9031, "step": 396 }, { "epoch": 0.012167463528257937, "grad_norm": 2.899631215834397, "learning_rate": 4.0551583248212465e-06, "loss": 0.9323, "step": 397 }, { "epoch": 0.012198112050999141, "grad_norm": 1.180737801166812, "learning_rate": 4.065372829417773e-06, "loss": 0.5903, "step": 398 }, { "epoch": 0.012228760573740345, "grad_norm": 3.3154310082963176, "learning_rate": 4.0755873340143e-06, "loss": 0.9241, "step": 399 }, { "epoch": 0.012259409096481549, "grad_norm": 1.2495535692961328, "learning_rate": 4.085801838610828e-06, "loss": 0.5871, "step": 400 }, { "epoch": 0.012290057619222753, "grad_norm": 3.145058778429256, "learning_rate": 4.0960163432073544e-06, "loss": 0.8722, "step": 401 }, { "epoch": 0.012320706141963957, "grad_norm": 1.1473574529907897, "learning_rate": 4.106230847803882e-06, "loss": 0.5696, "step": 402 }, { "epoch": 0.01235135466470516, "grad_norm": 3.1611926936694053, "learning_rate": 4.116445352400409e-06, "loss": 0.8728, "step": 403 }, { "epoch": 0.012382003187446365, "grad_norm": 3.147789428183099, "learning_rate": 4.126659856996936e-06, "loss": 0.9528, "step": 404 }, { "epoch": 0.012412651710187569, "grad_norm": 2.5184813900633, "learning_rate": 4.136874361593463e-06, "loss": 0.8784, "step": 405 }, { "epoch": 0.012443300232928773, "grad_norm": 5.731046367423496, "learning_rate": 4.14708886618999e-06, "loss": 0.9508, "step": 406 }, { "epoch": 0.012473948755669977, "grad_norm": 2.788289113040058, "learning_rate": 4.157303370786518e-06, "loss": 0.8914, "step": 407 }, { "epoch": 0.01250459727841118, "grad_norm": 3.0283790110999744, "learning_rate": 4.1675178753830445e-06, "loss": 0.8298, "step": 408 }, { "epoch": 0.012535245801152384, "grad_norm": 2.778791133345945, "learning_rate": 4.177732379979571e-06, "loss": 0.9067, "step": 409 }, { "epoch": 0.012565894323893588, "grad_norm": 2.651708445690339, "learning_rate": 4.187946884576099e-06, "loss": 0.9043, "step": 410 }, { "epoch": 0.012596542846634792, "grad_norm": 3.2551252660566967, "learning_rate": 4.198161389172626e-06, "loss": 0.9046, "step": 411 }, { "epoch": 0.012627191369375996, "grad_norm": 2.7536016669389407, "learning_rate": 4.208375893769152e-06, "loss": 0.8608, "step": 412 }, { "epoch": 0.0126578398921172, "grad_norm": 2.7224451511172276, "learning_rate": 4.218590398365679e-06, "loss": 0.819, "step": 413 }, { "epoch": 0.012688488414858404, "grad_norm": 2.932661532139795, "learning_rate": 4.228804902962207e-06, "loss": 0.8893, "step": 414 }, { "epoch": 0.012719136937599608, "grad_norm": 3.2867140708217484, "learning_rate": 4.239019407558734e-06, "loss": 0.8556, "step": 415 }, { "epoch": 0.012749785460340812, "grad_norm": 2.960744005422641, "learning_rate": 4.24923391215526e-06, "loss": 0.8267, "step": 416 }, { "epoch": 0.012780433983082016, "grad_norm": 1.107527803270888, "learning_rate": 4.259448416751788e-06, "loss": 0.571, "step": 417 }, { "epoch": 0.01281108250582322, "grad_norm": 1.128221203923007, "learning_rate": 4.269662921348315e-06, "loss": 0.5564, "step": 418 }, { "epoch": 0.012841731028564424, "grad_norm": 3.042497745475226, "learning_rate": 4.2798774259448424e-06, "loss": 0.9101, "step": 419 }, { "epoch": 0.012872379551305628, "grad_norm": 3.096263008958437, "learning_rate": 4.290091930541369e-06, "loss": 0.9037, "step": 420 }, { "epoch": 0.012903028074046832, "grad_norm": 3.338341059506719, "learning_rate": 4.300306435137896e-06, "loss": 0.7318, "step": 421 }, { "epoch": 0.012933676596788036, "grad_norm": 3.1289975038719504, "learning_rate": 4.310520939734424e-06, "loss": 0.9266, "step": 422 }, { "epoch": 0.01296432511952924, "grad_norm": 2.9784029503771117, "learning_rate": 4.32073544433095e-06, "loss": 0.9363, "step": 423 }, { "epoch": 0.012994973642270443, "grad_norm": 1.09953541753196, "learning_rate": 4.330949948927477e-06, "loss": 0.6152, "step": 424 }, { "epoch": 0.013025622165011647, "grad_norm": 2.5610935319953896, "learning_rate": 4.341164453524005e-06, "loss": 0.7599, "step": 425 }, { "epoch": 0.01305627068775285, "grad_norm": 2.938096626867143, "learning_rate": 4.351378958120532e-06, "loss": 0.9027, "step": 426 }, { "epoch": 0.013086919210494053, "grad_norm": 3.1339434744162538, "learning_rate": 4.361593462717058e-06, "loss": 0.8451, "step": 427 }, { "epoch": 0.013117567733235257, "grad_norm": 3.195467497167443, "learning_rate": 4.371807967313585e-06, "loss": 0.8248, "step": 428 }, { "epoch": 0.013148216255976461, "grad_norm": 5.0019649027346995, "learning_rate": 4.382022471910113e-06, "loss": 0.938, "step": 429 }, { "epoch": 0.013178864778717665, "grad_norm": 3.285980925845122, "learning_rate": 4.3922369765066396e-06, "loss": 0.7918, "step": 430 }, { "epoch": 0.01320951330145887, "grad_norm": 2.9571937939593105, "learning_rate": 4.402451481103167e-06, "loss": 0.9191, "step": 431 }, { "epoch": 0.013240161824200073, "grad_norm": 2.8765206307111084, "learning_rate": 4.412665985699694e-06, "loss": 0.806, "step": 432 }, { "epoch": 0.013270810346941277, "grad_norm": 3.584199807678198, "learning_rate": 4.422880490296221e-06, "loss": 0.8353, "step": 433 }, { "epoch": 0.013301458869682481, "grad_norm": 3.262627043539584, "learning_rate": 4.433094994892748e-06, "loss": 0.7976, "step": 434 }, { "epoch": 0.013332107392423685, "grad_norm": 2.8098804035975817, "learning_rate": 4.443309499489275e-06, "loss": 0.9153, "step": 435 }, { "epoch": 0.013362755915164889, "grad_norm": 3.949785753745224, "learning_rate": 4.453524004085803e-06, "loss": 0.856, "step": 436 }, { "epoch": 0.013393404437906093, "grad_norm": 2.9203226284938357, "learning_rate": 4.4637385086823296e-06, "loss": 0.9262, "step": 437 }, { "epoch": 0.013424052960647297, "grad_norm": 3.094861590254716, "learning_rate": 4.473953013278856e-06, "loss": 0.9133, "step": 438 }, { "epoch": 0.0134547014833885, "grad_norm": 3.354187999142606, "learning_rate": 4.484167517875383e-06, "loss": 0.9474, "step": 439 }, { "epoch": 0.013485350006129704, "grad_norm": 2.970212581147025, "learning_rate": 4.494382022471911e-06, "loss": 0.776, "step": 440 }, { "epoch": 0.013515998528870908, "grad_norm": 2.8504655623526363, "learning_rate": 4.5045965270684375e-06, "loss": 0.8646, "step": 441 }, { "epoch": 0.013546647051612112, "grad_norm": 2.8898271419501995, "learning_rate": 4.514811031664964e-06, "loss": 0.9165, "step": 442 }, { "epoch": 0.013577295574353316, "grad_norm": 2.6853383411158136, "learning_rate": 4.525025536261491e-06, "loss": 0.8422, "step": 443 }, { "epoch": 0.01360794409709452, "grad_norm": 3.2520016520407817, "learning_rate": 4.535240040858019e-06, "loss": 0.7906, "step": 444 }, { "epoch": 0.013638592619835724, "grad_norm": 2.796046099350526, "learning_rate": 4.5454545454545455e-06, "loss": 0.7794, "step": 445 }, { "epoch": 0.013669241142576928, "grad_norm": 2.8459727573133713, "learning_rate": 4.555669050051073e-06, "loss": 0.9145, "step": 446 }, { "epoch": 0.013699889665318132, "grad_norm": 2.8567568211903827, "learning_rate": 4.5658835546476e-06, "loss": 0.915, "step": 447 }, { "epoch": 0.013730538188059336, "grad_norm": 2.5996944607252166, "learning_rate": 4.5760980592441276e-06, "loss": 0.8483, "step": 448 }, { "epoch": 0.01376118671080054, "grad_norm": 3.0929327072666957, "learning_rate": 4.586312563840654e-06, "loss": 0.8443, "step": 449 }, { "epoch": 0.013791835233541744, "grad_norm": 3.14269667687473, "learning_rate": 4.596527068437181e-06, "loss": 0.8169, "step": 450 }, { "epoch": 0.013822483756282948, "grad_norm": 1.1130164188890606, "learning_rate": 4.606741573033709e-06, "loss": 0.5867, "step": 451 }, { "epoch": 0.013853132279024152, "grad_norm": 3.08583715579591, "learning_rate": 4.6169560776302355e-06, "loss": 0.9182, "step": 452 }, { "epoch": 0.013883780801765356, "grad_norm": 1.0242526582946794, "learning_rate": 4.627170582226762e-06, "loss": 0.5963, "step": 453 }, { "epoch": 0.01391442932450656, "grad_norm": 3.2229578706655357, "learning_rate": 4.637385086823289e-06, "loss": 0.9167, "step": 454 }, { "epoch": 0.013945077847247763, "grad_norm": 3.448179635243589, "learning_rate": 4.647599591419817e-06, "loss": 0.8293, "step": 455 }, { "epoch": 0.013975726369988967, "grad_norm": 3.1820848899471215, "learning_rate": 4.6578140960163435e-06, "loss": 0.7903, "step": 456 }, { "epoch": 0.01400637489273017, "grad_norm": 3.471281418457695, "learning_rate": 4.66802860061287e-06, "loss": 0.7659, "step": 457 }, { "epoch": 0.014037023415471373, "grad_norm": 3.3992705417565103, "learning_rate": 4.678243105209398e-06, "loss": 0.9349, "step": 458 }, { "epoch": 0.014067671938212577, "grad_norm": 2.980343166661902, "learning_rate": 4.688457609805925e-06, "loss": 0.8362, "step": 459 }, { "epoch": 0.014098320460953781, "grad_norm": 2.952374666583926, "learning_rate": 4.6986721144024515e-06, "loss": 0.8013, "step": 460 }, { "epoch": 0.014128968983694985, "grad_norm": 1.0328460162888402, "learning_rate": 4.708886618998979e-06, "loss": 0.582, "step": 461 }, { "epoch": 0.01415961750643619, "grad_norm": 2.8696244504384825, "learning_rate": 4.719101123595506e-06, "loss": 0.8233, "step": 462 }, { "epoch": 0.014190266029177393, "grad_norm": 1.0826929109753551, "learning_rate": 4.7293156281920335e-06, "loss": 0.5767, "step": 463 }, { "epoch": 0.014220914551918597, "grad_norm": 2.8960959539110016, "learning_rate": 4.73953013278856e-06, "loss": 0.9009, "step": 464 }, { "epoch": 0.014251563074659801, "grad_norm": 3.666953827474016, "learning_rate": 4.749744637385087e-06, "loss": 0.8415, "step": 465 }, { "epoch": 0.014282211597401005, "grad_norm": 2.7528536539261355, "learning_rate": 4.759959141981615e-06, "loss": 0.9336, "step": 466 }, { "epoch": 0.014312860120142209, "grad_norm": 2.9914210745375525, "learning_rate": 4.7701736465781415e-06, "loss": 0.7582, "step": 467 }, { "epoch": 0.014343508642883413, "grad_norm": 2.7279115162474215, "learning_rate": 4.780388151174668e-06, "loss": 0.8423, "step": 468 }, { "epoch": 0.014374157165624617, "grad_norm": 3.57434907291983, "learning_rate": 4.790602655771195e-06, "loss": 0.9274, "step": 469 }, { "epoch": 0.01440480568836582, "grad_norm": 2.8104257408627484, "learning_rate": 4.800817160367723e-06, "loss": 0.927, "step": 470 }, { "epoch": 0.014435454211107025, "grad_norm": 3.1718354878775212, "learning_rate": 4.8110316649642495e-06, "loss": 0.8937, "step": 471 }, { "epoch": 0.014466102733848228, "grad_norm": 3.027503797024198, "learning_rate": 4.821246169560776e-06, "loss": 0.8591, "step": 472 }, { "epoch": 0.014496751256589432, "grad_norm": 3.340988495033096, "learning_rate": 4.831460674157304e-06, "loss": 0.8902, "step": 473 }, { "epoch": 0.014527399779330636, "grad_norm": 3.0542153550347995, "learning_rate": 4.841675178753831e-06, "loss": 0.8944, "step": 474 }, { "epoch": 0.01455804830207184, "grad_norm": 3.1332988398310095, "learning_rate": 4.851889683350358e-06, "loss": 0.813, "step": 475 }, { "epoch": 0.014588696824813044, "grad_norm": 2.4289495181070624, "learning_rate": 4.862104187946885e-06, "loss": 0.7932, "step": 476 }, { "epoch": 0.014619345347554248, "grad_norm": 2.9236133797987716, "learning_rate": 4.872318692543412e-06, "loss": 0.8317, "step": 477 }, { "epoch": 0.014649993870295452, "grad_norm": 2.8958325070107396, "learning_rate": 4.8825331971399395e-06, "loss": 0.9119, "step": 478 }, { "epoch": 0.014680642393036656, "grad_norm": 3.2694425317001894, "learning_rate": 4.892747701736466e-06, "loss": 0.9648, "step": 479 }, { "epoch": 0.01471129091577786, "grad_norm": 2.888550415345471, "learning_rate": 4.902962206332994e-06, "loss": 0.852, "step": 480 }, { "epoch": 0.014741939438519064, "grad_norm": 1.0386803967777178, "learning_rate": 4.913176710929521e-06, "loss": 0.5676, "step": 481 }, { "epoch": 0.014772587961260268, "grad_norm": 2.9655098120878662, "learning_rate": 4.9233912155260474e-06, "loss": 0.9501, "step": 482 }, { "epoch": 0.014803236484001472, "grad_norm": 3.083486146377912, "learning_rate": 4.933605720122574e-06, "loss": 0.8985, "step": 483 }, { "epoch": 0.014833885006742676, "grad_norm": 2.8526146425085024, "learning_rate": 4.943820224719101e-06, "loss": 0.8646, "step": 484 }, { "epoch": 0.01486453352948388, "grad_norm": 2.9473752036235723, "learning_rate": 4.954034729315629e-06, "loss": 0.9521, "step": 485 }, { "epoch": 0.014895182052225083, "grad_norm": 1.0444232929877322, "learning_rate": 4.964249233912155e-06, "loss": 0.5651, "step": 486 }, { "epoch": 0.014925830574966287, "grad_norm": 2.795716172907219, "learning_rate": 4.974463738508682e-06, "loss": 0.7568, "step": 487 }, { "epoch": 0.014956479097707491, "grad_norm": 2.9334155532877366, "learning_rate": 4.98467824310521e-06, "loss": 0.8369, "step": 488 }, { "epoch": 0.014987127620448694, "grad_norm": 2.9299257923105584, "learning_rate": 4.994892747701737e-06, "loss": 0.6903, "step": 489 }, { "epoch": 0.015017776143189897, "grad_norm": 3.2340468250860788, "learning_rate": 5.005107252298263e-06, "loss": 0.9858, "step": 490 }, { "epoch": 0.015048424665931101, "grad_norm": 2.6522099811443067, "learning_rate": 5.015321756894791e-06, "loss": 0.9162, "step": 491 }, { "epoch": 0.015079073188672305, "grad_norm": 2.685659940829006, "learning_rate": 5.025536261491319e-06, "loss": 0.8806, "step": 492 }, { "epoch": 0.01510972171141351, "grad_norm": 3.2233546086130502, "learning_rate": 5.0357507660878446e-06, "loss": 0.9236, "step": 493 }, { "epoch": 0.015140370234154713, "grad_norm": 2.628225516132047, "learning_rate": 5.045965270684372e-06, "loss": 0.9174, "step": 494 }, { "epoch": 0.015171018756895917, "grad_norm": 2.767364830457831, "learning_rate": 5.0561797752809e-06, "loss": 0.8704, "step": 495 }, { "epoch": 0.015201667279637121, "grad_norm": 2.5565222313772944, "learning_rate": 5.066394279877427e-06, "loss": 0.7171, "step": 496 }, { "epoch": 0.015232315802378325, "grad_norm": 2.770539276188391, "learning_rate": 5.076608784473953e-06, "loss": 0.8492, "step": 497 }, { "epoch": 0.015262964325119529, "grad_norm": 3.170705818121551, "learning_rate": 5.08682328907048e-06, "loss": 0.8421, "step": 498 }, { "epoch": 0.015293612847860733, "grad_norm": 2.9768001338826675, "learning_rate": 5.097037793667008e-06, "loss": 0.8912, "step": 499 }, { "epoch": 0.015324261370601937, "grad_norm": 3.007056567795919, "learning_rate": 5.1072522982635346e-06, "loss": 0.8174, "step": 500 }, { "epoch": 0.01535490989334314, "grad_norm": 2.4140079863420816, "learning_rate": 5.117466802860061e-06, "loss": 0.8324, "step": 501 }, { "epoch": 0.015385558416084345, "grad_norm": 2.823686715642901, "learning_rate": 5.127681307456589e-06, "loss": 0.8449, "step": 502 }, { "epoch": 0.015416206938825549, "grad_norm": 2.663099685568847, "learning_rate": 5.137895812053117e-06, "loss": 0.9823, "step": 503 }, { "epoch": 0.015446855461566752, "grad_norm": 3.054160764382166, "learning_rate": 5.1481103166496425e-06, "loss": 0.8442, "step": 504 }, { "epoch": 0.015477503984307956, "grad_norm": 1.1161419776729158, "learning_rate": 5.15832482124617e-06, "loss": 0.579, "step": 505 }, { "epoch": 0.01550815250704916, "grad_norm": 2.8038774265936084, "learning_rate": 5.168539325842698e-06, "loss": 0.8442, "step": 506 }, { "epoch": 0.015538801029790364, "grad_norm": 3.0875416984118056, "learning_rate": 5.178753830439224e-06, "loss": 0.8803, "step": 507 }, { "epoch": 0.015569449552531568, "grad_norm": 2.790366391996132, "learning_rate": 5.188968335035751e-06, "loss": 0.7933, "step": 508 }, { "epoch": 0.015600098075272772, "grad_norm": 2.6929937146763905, "learning_rate": 5.199182839632278e-06, "loss": 0.7835, "step": 509 }, { "epoch": 0.015630746598013976, "grad_norm": 2.768157017632214, "learning_rate": 5.209397344228805e-06, "loss": 0.8846, "step": 510 }, { "epoch": 0.015661395120755178, "grad_norm": 1.1662187660538215, "learning_rate": 5.2196118488253326e-06, "loss": 0.583, "step": 511 }, { "epoch": 0.015692043643496384, "grad_norm": 1.2090409097308505, "learning_rate": 5.229826353421859e-06, "loss": 0.5691, "step": 512 }, { "epoch": 0.015722692166237586, "grad_norm": 0.997411867649856, "learning_rate": 5.240040858018387e-06, "loss": 0.5758, "step": 513 }, { "epoch": 0.015753340688978792, "grad_norm": 3.263583392402083, "learning_rate": 5.250255362614913e-06, "loss": 0.8199, "step": 514 }, { "epoch": 0.015783989211719994, "grad_norm": 2.718189600887344, "learning_rate": 5.2604698672114405e-06, "loss": 0.8913, "step": 515 }, { "epoch": 0.0158146377344612, "grad_norm": 2.7347809995400114, "learning_rate": 5.270684371807968e-06, "loss": 0.8671, "step": 516 }, { "epoch": 0.015845286257202402, "grad_norm": 2.5213211971330147, "learning_rate": 5.280898876404494e-06, "loss": 0.8832, "step": 517 }, { "epoch": 0.015875934779943607, "grad_norm": 3.1528725876060646, "learning_rate": 5.291113381001022e-06, "loss": 0.942, "step": 518 }, { "epoch": 0.01590658330268481, "grad_norm": 2.703919785733534, "learning_rate": 5.301327885597549e-06, "loss": 0.8025, "step": 519 }, { "epoch": 0.015937231825426015, "grad_norm": 2.8737458766193194, "learning_rate": 5.311542390194075e-06, "loss": 0.9299, "step": 520 }, { "epoch": 0.015967880348167218, "grad_norm": 3.1358694944620766, "learning_rate": 5.321756894790603e-06, "loss": 0.9045, "step": 521 }, { "epoch": 0.015998528870908423, "grad_norm": 2.8664733049171014, "learning_rate": 5.3319713993871305e-06, "loss": 0.9052, "step": 522 }, { "epoch": 0.016029177393649625, "grad_norm": 3.1698280642171426, "learning_rate": 5.342185903983657e-06, "loss": 0.8567, "step": 523 }, { "epoch": 0.01605982591639083, "grad_norm": 2.719269023736026, "learning_rate": 5.352400408580184e-06, "loss": 0.8134, "step": 524 }, { "epoch": 0.016090474439132033, "grad_norm": 2.8280406450324787, "learning_rate": 5.362614913176712e-06, "loss": 0.8276, "step": 525 }, { "epoch": 0.01612112296187324, "grad_norm": 3.0346837862716938, "learning_rate": 5.3728294177732385e-06, "loss": 0.8867, "step": 526 }, { "epoch": 0.01615177148461444, "grad_norm": 4.002814559771493, "learning_rate": 5.383043922369765e-06, "loss": 0.8305, "step": 527 }, { "epoch": 0.016182420007355647, "grad_norm": 2.5514522139142883, "learning_rate": 5.393258426966292e-06, "loss": 0.8009, "step": 528 }, { "epoch": 0.01621306853009685, "grad_norm": 2.633835597420876, "learning_rate": 5.40347293156282e-06, "loss": 0.8301, "step": 529 }, { "epoch": 0.016243717052838055, "grad_norm": 1.1754344140812931, "learning_rate": 5.413687436159347e-06, "loss": 0.6064, "step": 530 }, { "epoch": 0.016274365575579257, "grad_norm": 1.260386288407654, "learning_rate": 5.423901940755873e-06, "loss": 0.5556, "step": 531 }, { "epoch": 0.016305014098320463, "grad_norm": 3.188025585464316, "learning_rate": 5.434116445352401e-06, "loss": 0.8698, "step": 532 }, { "epoch": 0.016335662621061665, "grad_norm": 1.3281892469177676, "learning_rate": 5.4443309499489285e-06, "loss": 0.5705, "step": 533 }, { "epoch": 0.01636631114380287, "grad_norm": 2.7462738950625596, "learning_rate": 5.4545454545454545e-06, "loss": 0.8929, "step": 534 }, { "epoch": 0.016396959666544073, "grad_norm": 2.904992001849432, "learning_rate": 5.464759959141982e-06, "loss": 0.8524, "step": 535 }, { "epoch": 0.016427608189285275, "grad_norm": 2.7205965641653727, "learning_rate": 5.47497446373851e-06, "loss": 0.7128, "step": 536 }, { "epoch": 0.01645825671202648, "grad_norm": 2.636917527970757, "learning_rate": 5.485188968335036e-06, "loss": 0.9121, "step": 537 }, { "epoch": 0.016488905234767683, "grad_norm": 1.251555152883313, "learning_rate": 5.495403472931563e-06, "loss": 0.5828, "step": 538 }, { "epoch": 0.01651955375750889, "grad_norm": 2.711524991219316, "learning_rate": 5.50561797752809e-06, "loss": 0.8441, "step": 539 }, { "epoch": 0.01655020228025009, "grad_norm": 2.701050640658386, "learning_rate": 5.515832482124618e-06, "loss": 0.7686, "step": 540 }, { "epoch": 0.016580850802991296, "grad_norm": 3.4157544938738695, "learning_rate": 5.5260469867211445e-06, "loss": 0.8951, "step": 541 }, { "epoch": 0.0166114993257325, "grad_norm": 1.1453271308755009, "learning_rate": 5.536261491317671e-06, "loss": 0.5797, "step": 542 }, { "epoch": 0.016642147848473704, "grad_norm": 2.8687257310984715, "learning_rate": 5.546475995914199e-06, "loss": 0.8599, "step": 543 }, { "epoch": 0.016672796371214906, "grad_norm": 2.9557796997833075, "learning_rate": 5.556690500510725e-06, "loss": 0.7395, "step": 544 }, { "epoch": 0.016703444893956112, "grad_norm": 2.986792631796743, "learning_rate": 5.5669050051072524e-06, "loss": 0.8693, "step": 545 }, { "epoch": 0.016734093416697314, "grad_norm": 3.117748474276798, "learning_rate": 5.57711950970378e-06, "loss": 0.8093, "step": 546 }, { "epoch": 0.01676474193943852, "grad_norm": 1.0414129626698536, "learning_rate": 5.587334014300308e-06, "loss": 0.5861, "step": 547 }, { "epoch": 0.016795390462179722, "grad_norm": 2.9857882252922066, "learning_rate": 5.597548518896834e-06, "loss": 0.9073, "step": 548 }, { "epoch": 0.016826038984920928, "grad_norm": 2.9307943455619525, "learning_rate": 5.607763023493361e-06, "loss": 0.8396, "step": 549 }, { "epoch": 0.01685668750766213, "grad_norm": 2.9659640198224815, "learning_rate": 5.617977528089889e-06, "loss": 0.803, "step": 550 }, { "epoch": 0.016887336030403335, "grad_norm": 3.546023300442476, "learning_rate": 5.628192032686415e-06, "loss": 0.8525, "step": 551 }, { "epoch": 0.016917984553144538, "grad_norm": 2.9537243378825835, "learning_rate": 5.6384065372829424e-06, "loss": 0.9181, "step": 552 }, { "epoch": 0.016948633075885743, "grad_norm": 2.8132097850390707, "learning_rate": 5.648621041879469e-06, "loss": 0.8055, "step": 553 }, { "epoch": 0.016979281598626945, "grad_norm": 2.9218232676186693, "learning_rate": 5.658835546475996e-06, "loss": 0.8023, "step": 554 }, { "epoch": 0.01700993012136815, "grad_norm": 2.6295760018362127, "learning_rate": 5.669050051072524e-06, "loss": 0.7687, "step": 555 }, { "epoch": 0.017040578644109353, "grad_norm": 1.0257433997359908, "learning_rate": 5.67926455566905e-06, "loss": 0.5758, "step": 556 }, { "epoch": 0.01707122716685056, "grad_norm": 1.1531412642264425, "learning_rate": 5.689479060265578e-06, "loss": 0.5721, "step": 557 }, { "epoch": 0.01710187568959176, "grad_norm": 2.6263829071353717, "learning_rate": 5.699693564862104e-06, "loss": 0.8577, "step": 558 }, { "epoch": 0.017132524212332967, "grad_norm": 1.0171827490037695, "learning_rate": 5.709908069458632e-06, "loss": 0.5507, "step": 559 }, { "epoch": 0.01716317273507417, "grad_norm": 1.03795608507198, "learning_rate": 5.720122574055159e-06, "loss": 0.5948, "step": 560 }, { "epoch": 0.017193821257815375, "grad_norm": 2.598828276423245, "learning_rate": 5.730337078651685e-06, "loss": 0.8237, "step": 561 }, { "epoch": 0.017224469780556577, "grad_norm": 3.1653601544801253, "learning_rate": 5.740551583248213e-06, "loss": 0.7955, "step": 562 }, { "epoch": 0.017255118303297783, "grad_norm": 3.2619636937465057, "learning_rate": 5.7507660878447404e-06, "loss": 0.879, "step": 563 }, { "epoch": 0.017285766826038985, "grad_norm": 2.861099540146627, "learning_rate": 5.760980592441267e-06, "loss": 0.8892, "step": 564 }, { "epoch": 0.01731641534878019, "grad_norm": 2.894414129241923, "learning_rate": 5.771195097037794e-06, "loss": 0.8186, "step": 565 }, { "epoch": 0.017347063871521393, "grad_norm": 2.392148124633032, "learning_rate": 5.781409601634322e-06, "loss": 0.8013, "step": 566 }, { "epoch": 0.017377712394262595, "grad_norm": 2.899982590344122, "learning_rate": 5.791624106230848e-06, "loss": 0.8622, "step": 567 }, { "epoch": 0.0174083609170038, "grad_norm": 3.7612736050275792, "learning_rate": 5.801838610827375e-06, "loss": 0.8666, "step": 568 }, { "epoch": 0.017439009439745003, "grad_norm": 3.3171995030274952, "learning_rate": 5.812053115423902e-06, "loss": 0.8687, "step": 569 }, { "epoch": 0.01746965796248621, "grad_norm": 2.7945019573265273, "learning_rate": 5.82226762002043e-06, "loss": 0.8247, "step": 570 }, { "epoch": 0.01750030648522741, "grad_norm": 3.458234741947401, "learning_rate": 5.832482124616956e-06, "loss": 0.8104, "step": 571 }, { "epoch": 0.017530955007968616, "grad_norm": 3.0583534966361148, "learning_rate": 5.842696629213483e-06, "loss": 0.8188, "step": 572 }, { "epoch": 0.01756160353070982, "grad_norm": 2.810251125210679, "learning_rate": 5.852911133810011e-06, "loss": 0.7986, "step": 573 }, { "epoch": 0.017592252053451024, "grad_norm": 1.2484028505736617, "learning_rate": 5.863125638406538e-06, "loss": 0.5671, "step": 574 }, { "epoch": 0.017622900576192226, "grad_norm": 1.2033147952600964, "learning_rate": 5.873340143003064e-06, "loss": 0.57, "step": 575 }, { "epoch": 0.017653549098933432, "grad_norm": 3.2793462920357803, "learning_rate": 5.883554647599592e-06, "loss": 0.9753, "step": 576 }, { "epoch": 0.017684197621674634, "grad_norm": 3.0974791169839637, "learning_rate": 5.89376915219612e-06, "loss": 0.8733, "step": 577 }, { "epoch": 0.01771484614441584, "grad_norm": 2.749834964833258, "learning_rate": 5.9039836567926455e-06, "loss": 0.948, "step": 578 }, { "epoch": 0.017745494667157042, "grad_norm": 2.85623502138601, "learning_rate": 5.914198161389173e-06, "loss": 0.8174, "step": 579 }, { "epoch": 0.017776143189898248, "grad_norm": 2.6678156643880655, "learning_rate": 5.924412665985701e-06, "loss": 0.7862, "step": 580 }, { "epoch": 0.01780679171263945, "grad_norm": 2.761894998742353, "learning_rate": 5.934627170582227e-06, "loss": 0.9069, "step": 581 }, { "epoch": 0.017837440235380655, "grad_norm": 2.6215354906156514, "learning_rate": 5.944841675178754e-06, "loss": 0.8139, "step": 582 }, { "epoch": 0.017868088758121858, "grad_norm": 3.2719839757402185, "learning_rate": 5.955056179775281e-06, "loss": 0.7598, "step": 583 }, { "epoch": 0.017898737280863063, "grad_norm": 2.5819031582809235, "learning_rate": 5.965270684371809e-06, "loss": 0.8935, "step": 584 }, { "epoch": 0.017929385803604266, "grad_norm": 3.1348148453851077, "learning_rate": 5.9754851889683355e-06, "loss": 0.729, "step": 585 }, { "epoch": 0.01796003432634547, "grad_norm": 2.774641341373821, "learning_rate": 5.985699693564862e-06, "loss": 0.8368, "step": 586 }, { "epoch": 0.017990682849086673, "grad_norm": 2.884278109874713, "learning_rate": 5.99591419816139e-06, "loss": 0.8472, "step": 587 }, { "epoch": 0.01802133137182788, "grad_norm": 2.76196141967181, "learning_rate": 6.006128702757916e-06, "loss": 0.7917, "step": 588 }, { "epoch": 0.01805197989456908, "grad_norm": 3.572557269279883, "learning_rate": 6.0163432073544435e-06, "loss": 0.876, "step": 589 }, { "epoch": 0.018082628417310287, "grad_norm": 2.7195154064941773, "learning_rate": 6.026557711950971e-06, "loss": 0.8523, "step": 590 }, { "epoch": 0.01811327694005149, "grad_norm": 2.603345553891082, "learning_rate": 6.036772216547499e-06, "loss": 0.848, "step": 591 }, { "epoch": 0.018143925462792695, "grad_norm": 2.772086847124186, "learning_rate": 6.046986721144025e-06, "loss": 0.9359, "step": 592 }, { "epoch": 0.018174573985533897, "grad_norm": 2.7969987348880316, "learning_rate": 6.057201225740552e-06, "loss": 0.8389, "step": 593 }, { "epoch": 0.018205222508275103, "grad_norm": 1.6176007860306407, "learning_rate": 6.06741573033708e-06, "loss": 0.5551, "step": 594 }, { "epoch": 0.018235871031016305, "grad_norm": 1.5136407140855554, "learning_rate": 6.077630234933606e-06, "loss": 0.5612, "step": 595 }, { "epoch": 0.01826651955375751, "grad_norm": 2.8026386450997585, "learning_rate": 6.0878447395301335e-06, "loss": 0.9343, "step": 596 }, { "epoch": 0.018297168076498713, "grad_norm": 2.8882919707140675, "learning_rate": 6.09805924412666e-06, "loss": 0.7715, "step": 597 }, { "epoch": 0.01832781659923992, "grad_norm": 0.9996479262653694, "learning_rate": 6.108273748723187e-06, "loss": 0.5539, "step": 598 }, { "epoch": 0.01835846512198112, "grad_norm": 2.768014806606586, "learning_rate": 6.118488253319715e-06, "loss": 0.8599, "step": 599 }, { "epoch": 0.018389113644722323, "grad_norm": 3.1423799684036924, "learning_rate": 6.1287027579162415e-06, "loss": 0.8895, "step": 600 }, { "epoch": 0.01841976216746353, "grad_norm": 2.730528612615601, "learning_rate": 6.138917262512769e-06, "loss": 0.7893, "step": 601 }, { "epoch": 0.01845041069020473, "grad_norm": 3.276221868193066, "learning_rate": 6.149131767109295e-06, "loss": 0.9391, "step": 602 }, { "epoch": 0.018481059212945936, "grad_norm": 2.630175617409509, "learning_rate": 6.159346271705823e-06, "loss": 0.8645, "step": 603 }, { "epoch": 0.01851170773568714, "grad_norm": 2.99143463139179, "learning_rate": 6.16956077630235e-06, "loss": 0.8033, "step": 604 }, { "epoch": 0.018542356258428344, "grad_norm": 2.8508012371824725, "learning_rate": 6.179775280898876e-06, "loss": 0.8066, "step": 605 }, { "epoch": 0.018573004781169546, "grad_norm": 2.89285752515505, "learning_rate": 6.189989785495404e-06, "loss": 0.8479, "step": 606 }, { "epoch": 0.018603653303910752, "grad_norm": 2.9528851383172654, "learning_rate": 6.2002042900919315e-06, "loss": 0.821, "step": 607 }, { "epoch": 0.018634301826651954, "grad_norm": 2.0526909725217926, "learning_rate": 6.210418794688458e-06, "loss": 0.5781, "step": 608 }, { "epoch": 0.01866495034939316, "grad_norm": 3.460760246833261, "learning_rate": 6.220633299284985e-06, "loss": 0.6983, "step": 609 }, { "epoch": 0.018695598872134362, "grad_norm": 1.403228363636226, "learning_rate": 6.230847803881513e-06, "loss": 0.5758, "step": 610 }, { "epoch": 0.018726247394875568, "grad_norm": 2.21442261225321, "learning_rate": 6.2410623084780395e-06, "loss": 0.7274, "step": 611 }, { "epoch": 0.01875689591761677, "grad_norm": 1.0077094847678891, "learning_rate": 6.251276813074566e-06, "loss": 0.5477, "step": 612 }, { "epoch": 0.018787544440357976, "grad_norm": 2.852900006230864, "learning_rate": 6.261491317671093e-06, "loss": 0.8067, "step": 613 }, { "epoch": 0.018818192963099178, "grad_norm": 2.7407433017281573, "learning_rate": 6.271705822267621e-06, "loss": 0.9382, "step": 614 }, { "epoch": 0.018848841485840383, "grad_norm": 1.4432757695325467, "learning_rate": 6.2819203268641474e-06, "loss": 0.5788, "step": 615 }, { "epoch": 0.018879490008581586, "grad_norm": 2.6831020226317905, "learning_rate": 6.292134831460674e-06, "loss": 0.8722, "step": 616 }, { "epoch": 0.01891013853132279, "grad_norm": 1.3067365892669007, "learning_rate": 6.302349336057202e-06, "loss": 0.543, "step": 617 }, { "epoch": 0.018940787054063993, "grad_norm": 1.2654551436564394, "learning_rate": 6.3125638406537295e-06, "loss": 0.5682, "step": 618 }, { "epoch": 0.0189714355768052, "grad_norm": 2.6700287262027214, "learning_rate": 6.322778345250255e-06, "loss": 0.9075, "step": 619 }, { "epoch": 0.0190020840995464, "grad_norm": 3.164946438256333, "learning_rate": 6.332992849846783e-06, "loss": 0.7347, "step": 620 }, { "epoch": 0.019032732622287607, "grad_norm": 0.9984360528793536, "learning_rate": 6.343207354443311e-06, "loss": 0.5557, "step": 621 }, { "epoch": 0.01906338114502881, "grad_norm": 2.807612021980321, "learning_rate": 6.353421859039837e-06, "loss": 0.8786, "step": 622 }, { "epoch": 0.019094029667770015, "grad_norm": 3.550384550850249, "learning_rate": 6.363636363636364e-06, "loss": 0.9101, "step": 623 }, { "epoch": 0.019124678190511217, "grad_norm": 2.9589132313521525, "learning_rate": 6.373850868232892e-06, "loss": 0.9171, "step": 624 }, { "epoch": 0.019155326713252423, "grad_norm": 1.2355936921188393, "learning_rate": 6.384065372829419e-06, "loss": 0.5483, "step": 625 }, { "epoch": 0.019185975235993625, "grad_norm": 1.2562870119108291, "learning_rate": 6.3942798774259454e-06, "loss": 0.5616, "step": 626 }, { "epoch": 0.01921662375873483, "grad_norm": 3.189611270872149, "learning_rate": 6.404494382022472e-06, "loss": 0.8781, "step": 627 }, { "epoch": 0.019247272281476033, "grad_norm": 2.7894275274347624, "learning_rate": 6.414708886619e-06, "loss": 0.7685, "step": 628 }, { "epoch": 0.01927792080421724, "grad_norm": 2.8242915971659026, "learning_rate": 6.424923391215527e-06, "loss": 0.8482, "step": 629 }, { "epoch": 0.01930856932695844, "grad_norm": 2.7991407439137745, "learning_rate": 6.435137895812053e-06, "loss": 0.9109, "step": 630 }, { "epoch": 0.019339217849699643, "grad_norm": 2.6843293521114377, "learning_rate": 6.445352400408581e-06, "loss": 0.8108, "step": 631 }, { "epoch": 0.01936986637244085, "grad_norm": 1.0283206808343337, "learning_rate": 6.455566905005107e-06, "loss": 0.5456, "step": 632 }, { "epoch": 0.01940051489518205, "grad_norm": 2.8247267185715086, "learning_rate": 6.465781409601635e-06, "loss": 0.8759, "step": 633 }, { "epoch": 0.019431163417923256, "grad_norm": 2.517981728055398, "learning_rate": 6.475995914198162e-06, "loss": 0.8541, "step": 634 }, { "epoch": 0.01946181194066446, "grad_norm": 2.7346535408829284, "learning_rate": 6.48621041879469e-06, "loss": 0.8419, "step": 635 }, { "epoch": 0.019492460463405664, "grad_norm": 2.7057011425086874, "learning_rate": 6.496424923391216e-06, "loss": 0.7794, "step": 636 }, { "epoch": 0.019523108986146866, "grad_norm": 2.7885735566767282, "learning_rate": 6.506639427987743e-06, "loss": 0.7967, "step": 637 }, { "epoch": 0.019553757508888072, "grad_norm": 3.041792150725988, "learning_rate": 6.51685393258427e-06, "loss": 0.9763, "step": 638 }, { "epoch": 0.019584406031629274, "grad_norm": 2.6413226799912017, "learning_rate": 6.527068437180797e-06, "loss": 0.7732, "step": 639 }, { "epoch": 0.01961505455437048, "grad_norm": 2.7773728180774815, "learning_rate": 6.537282941777325e-06, "loss": 0.7418, "step": 640 }, { "epoch": 0.019645703077111682, "grad_norm": 1.1934708388950996, "learning_rate": 6.547497446373851e-06, "loss": 0.5592, "step": 641 }, { "epoch": 0.019676351599852888, "grad_norm": 1.112349364020128, "learning_rate": 6.557711950970378e-06, "loss": 0.5809, "step": 642 }, { "epoch": 0.01970700012259409, "grad_norm": 2.6273367435610187, "learning_rate": 6.567926455566905e-06, "loss": 0.8465, "step": 643 }, { "epoch": 0.019737648645335296, "grad_norm": 2.8488234349273176, "learning_rate": 6.5781409601634326e-06, "loss": 0.8247, "step": 644 }, { "epoch": 0.019768297168076498, "grad_norm": 2.949485471383936, "learning_rate": 6.58835546475996e-06, "loss": 0.8318, "step": 645 }, { "epoch": 0.019798945690817703, "grad_norm": 2.6173325685527837, "learning_rate": 6.598569969356486e-06, "loss": 0.7565, "step": 646 }, { "epoch": 0.019829594213558906, "grad_norm": 2.6205432344550603, "learning_rate": 6.608784473953014e-06, "loss": 0.8091, "step": 647 }, { "epoch": 0.01986024273630011, "grad_norm": 2.715241952471307, "learning_rate": 6.618998978549541e-06, "loss": 0.8474, "step": 648 }, { "epoch": 0.019890891259041314, "grad_norm": 2.83073456259497, "learning_rate": 6.629213483146067e-06, "loss": 0.926, "step": 649 }, { "epoch": 0.01992153978178252, "grad_norm": 3.425251747818931, "learning_rate": 6.639427987742595e-06, "loss": 0.9049, "step": 650 }, { "epoch": 0.01995218830452372, "grad_norm": 3.0564889529392376, "learning_rate": 6.649642492339123e-06, "loss": 0.8952, "step": 651 }, { "epoch": 0.019982836827264927, "grad_norm": 2.9826188894107597, "learning_rate": 6.659856996935649e-06, "loss": 0.8468, "step": 652 }, { "epoch": 0.02001348535000613, "grad_norm": 3.1087379914693116, "learning_rate": 6.670071501532176e-06, "loss": 0.8054, "step": 653 }, { "epoch": 0.020044133872747335, "grad_norm": 1.213320805171772, "learning_rate": 6.680286006128704e-06, "loss": 0.5433, "step": 654 }, { "epoch": 0.020074782395488537, "grad_norm": 2.5524467000391, "learning_rate": 6.6905005107252305e-06, "loss": 0.8343, "step": 655 }, { "epoch": 0.020105430918229743, "grad_norm": 2.4466106410054977, "learning_rate": 6.700715015321757e-06, "loss": 0.8358, "step": 656 }, { "epoch": 0.020136079440970945, "grad_norm": 2.8915819804631195, "learning_rate": 6.710929519918284e-06, "loss": 0.9236, "step": 657 }, { "epoch": 0.02016672796371215, "grad_norm": 2.7561821256361974, "learning_rate": 6.721144024514812e-06, "loss": 0.845, "step": 658 }, { "epoch": 0.020197376486453353, "grad_norm": 2.536582865711595, "learning_rate": 6.7313585291113385e-06, "loss": 0.7972, "step": 659 }, { "epoch": 0.02022802500919456, "grad_norm": 2.673525040282449, "learning_rate": 6.741573033707865e-06, "loss": 0.8967, "step": 660 }, { "epoch": 0.02025867353193576, "grad_norm": 2.331188146839733, "learning_rate": 6.751787538304393e-06, "loss": 0.861, "step": 661 }, { "epoch": 0.020289322054676963, "grad_norm": 2.953796728373752, "learning_rate": 6.7620020429009206e-06, "loss": 0.8163, "step": 662 }, { "epoch": 0.02031997057741817, "grad_norm": 2.2868528541214146, "learning_rate": 6.7722165474974465e-06, "loss": 0.7793, "step": 663 }, { "epoch": 0.02035061910015937, "grad_norm": 1.0639715278901762, "learning_rate": 6.782431052093974e-06, "loss": 0.5647, "step": 664 }, { "epoch": 0.020381267622900576, "grad_norm": 2.7423584796980314, "learning_rate": 6.792645556690502e-06, "loss": 0.894, "step": 665 }, { "epoch": 0.02041191614564178, "grad_norm": 2.2674153252479523, "learning_rate": 6.802860061287028e-06, "loss": 0.7764, "step": 666 }, { "epoch": 0.020442564668382984, "grad_norm": 2.6683260263425566, "learning_rate": 6.813074565883555e-06, "loss": 0.8451, "step": 667 }, { "epoch": 0.020473213191124186, "grad_norm": 3.096224768372658, "learning_rate": 6.823289070480082e-06, "loss": 0.8003, "step": 668 }, { "epoch": 0.020503861713865392, "grad_norm": 3.460956544474347, "learning_rate": 6.83350357507661e-06, "loss": 0.8372, "step": 669 }, { "epoch": 0.020534510236606594, "grad_norm": 2.789233343735804, "learning_rate": 6.8437180796731365e-06, "loss": 0.8058, "step": 670 }, { "epoch": 0.0205651587593478, "grad_norm": 2.783762162725732, "learning_rate": 6.853932584269663e-06, "loss": 0.7723, "step": 671 }, { "epoch": 0.020595807282089002, "grad_norm": 2.7222759403234984, "learning_rate": 6.864147088866191e-06, "loss": 0.8587, "step": 672 }, { "epoch": 0.020626455804830208, "grad_norm": 2.786726760322622, "learning_rate": 6.874361593462717e-06, "loss": 0.8461, "step": 673 }, { "epoch": 0.02065710432757141, "grad_norm": 2.8801111140139772, "learning_rate": 6.8845760980592445e-06, "loss": 0.8033, "step": 674 }, { "epoch": 0.020687752850312616, "grad_norm": 2.817091054288429, "learning_rate": 6.894790602655772e-06, "loss": 0.7478, "step": 675 }, { "epoch": 0.020718401373053818, "grad_norm": 2.669438590994199, "learning_rate": 6.905005107252298e-06, "loss": 0.8775, "step": 676 }, { "epoch": 0.020749049895795024, "grad_norm": 2.5952635953450893, "learning_rate": 6.915219611848826e-06, "loss": 0.8536, "step": 677 }, { "epoch": 0.020779698418536226, "grad_norm": 2.8293359197967454, "learning_rate": 6.925434116445353e-06, "loss": 0.8485, "step": 678 }, { "epoch": 0.02081034694127743, "grad_norm": 2.598128121441444, "learning_rate": 6.935648621041881e-06, "loss": 0.8077, "step": 679 }, { "epoch": 0.020840995464018634, "grad_norm": 2.835352084378578, "learning_rate": 6.945863125638407e-06, "loss": 0.8177, "step": 680 }, { "epoch": 0.02087164398675984, "grad_norm": 2.9649314779219784, "learning_rate": 6.9560776302349345e-06, "loss": 0.7993, "step": 681 }, { "epoch": 0.02090229250950104, "grad_norm": 2.656014321498177, "learning_rate": 6.966292134831461e-06, "loss": 0.8394, "step": 682 }, { "epoch": 0.020932941032242247, "grad_norm": 2.5038050851265297, "learning_rate": 6.976506639427988e-06, "loss": 0.7937, "step": 683 }, { "epoch": 0.02096358955498345, "grad_norm": 2.6303175152933833, "learning_rate": 6.986721144024516e-06, "loss": 0.8562, "step": 684 }, { "epoch": 0.020994238077724655, "grad_norm": 2.8313430829237207, "learning_rate": 6.9969356486210425e-06, "loss": 0.8633, "step": 685 }, { "epoch": 0.021024886600465857, "grad_norm": 2.4205584412312073, "learning_rate": 7.007150153217569e-06, "loss": 0.8408, "step": 686 }, { "epoch": 0.021055535123207063, "grad_norm": 2.284994523179326, "learning_rate": 7.017364657814096e-06, "loss": 0.8391, "step": 687 }, { "epoch": 0.021086183645948265, "grad_norm": 2.717770440063039, "learning_rate": 7.027579162410624e-06, "loss": 0.7649, "step": 688 }, { "epoch": 0.02111683216868947, "grad_norm": 2.626154837044685, "learning_rate": 7.037793667007151e-06, "loss": 0.7578, "step": 689 }, { "epoch": 0.021147480691430673, "grad_norm": 2.769978419899632, "learning_rate": 7.048008171603677e-06, "loss": 0.8155, "step": 690 }, { "epoch": 0.02117812921417188, "grad_norm": 2.7721533299361587, "learning_rate": 7.058222676200205e-06, "loss": 0.8543, "step": 691 }, { "epoch": 0.02120877773691308, "grad_norm": 1.1440968068972854, "learning_rate": 7.0684371807967325e-06, "loss": 0.5798, "step": 692 }, { "epoch": 0.021239426259654283, "grad_norm": 2.9201016320741013, "learning_rate": 7.078651685393258e-06, "loss": 0.8378, "step": 693 }, { "epoch": 0.02127007478239549, "grad_norm": 2.5047227255074316, "learning_rate": 7.088866189989786e-06, "loss": 0.8512, "step": 694 }, { "epoch": 0.02130072330513669, "grad_norm": 2.5520623067700643, "learning_rate": 7.099080694586314e-06, "loss": 0.8053, "step": 695 }, { "epoch": 0.021331371827877896, "grad_norm": 2.8273848300911806, "learning_rate": 7.1092951991828404e-06, "loss": 0.8415, "step": 696 }, { "epoch": 0.0213620203506191, "grad_norm": 2.800361618206784, "learning_rate": 7.119509703779367e-06, "loss": 0.857, "step": 697 }, { "epoch": 0.021392668873360304, "grad_norm": 2.6708916406627683, "learning_rate": 7.129724208375894e-06, "loss": 0.7528, "step": 698 }, { "epoch": 0.021423317396101507, "grad_norm": 1.0163027499694144, "learning_rate": 7.139938712972422e-06, "loss": 0.5596, "step": 699 }, { "epoch": 0.021453965918842712, "grad_norm": 2.714056486084067, "learning_rate": 7.150153217568948e-06, "loss": 0.8528, "step": 700 }, { "epoch": 0.021484614441583914, "grad_norm": 2.938723866408357, "learning_rate": 7.160367722165475e-06, "loss": 0.8392, "step": 701 }, { "epoch": 0.02151526296432512, "grad_norm": 2.6717498622124007, "learning_rate": 7.170582226762003e-06, "loss": 0.8429, "step": 702 }, { "epoch": 0.021545911487066322, "grad_norm": 2.612437461807258, "learning_rate": 7.180796731358529e-06, "loss": 0.8979, "step": 703 }, { "epoch": 0.021576560009807528, "grad_norm": 2.664763030538786, "learning_rate": 7.191011235955056e-06, "loss": 0.7547, "step": 704 }, { "epoch": 0.02160720853254873, "grad_norm": 2.5189912052957264, "learning_rate": 7.201225740551584e-06, "loss": 0.7004, "step": 705 }, { "epoch": 0.021637857055289936, "grad_norm": 2.874670182387559, "learning_rate": 7.211440245148112e-06, "loss": 0.8955, "step": 706 }, { "epoch": 0.021668505578031138, "grad_norm": 2.401389247097066, "learning_rate": 7.2216547497446376e-06, "loss": 0.8334, "step": 707 }, { "epoch": 0.021699154100772344, "grad_norm": 2.4694361220469965, "learning_rate": 7.231869254341165e-06, "loss": 0.8837, "step": 708 }, { "epoch": 0.021729802623513546, "grad_norm": 2.580423395889507, "learning_rate": 7.242083758937693e-06, "loss": 0.8679, "step": 709 }, { "epoch": 0.02176045114625475, "grad_norm": 2.6574643390849877, "learning_rate": 7.252298263534219e-06, "loss": 0.8469, "step": 710 }, { "epoch": 0.021791099668995954, "grad_norm": 2.5824103450648526, "learning_rate": 7.262512768130746e-06, "loss": 0.8683, "step": 711 }, { "epoch": 0.02182174819173716, "grad_norm": 2.459152536997915, "learning_rate": 7.272727272727273e-06, "loss": 0.8139, "step": 712 }, { "epoch": 0.02185239671447836, "grad_norm": 2.4166547444867503, "learning_rate": 7.282941777323801e-06, "loss": 0.8002, "step": 713 }, { "epoch": 0.021883045237219567, "grad_norm": 2.7827364521945226, "learning_rate": 7.293156281920328e-06, "loss": 0.8186, "step": 714 }, { "epoch": 0.02191369375996077, "grad_norm": 2.8855699938925845, "learning_rate": 7.303370786516854e-06, "loss": 0.8956, "step": 715 }, { "epoch": 0.021944342282701975, "grad_norm": 2.4432746684770983, "learning_rate": 7.313585291113382e-06, "loss": 0.8015, "step": 716 }, { "epoch": 0.021974990805443177, "grad_norm": 2.6078550849793642, "learning_rate": 7.323799795709908e-06, "loss": 0.9135, "step": 717 }, { "epoch": 0.022005639328184383, "grad_norm": 2.519133670855868, "learning_rate": 7.3340143003064355e-06, "loss": 0.8627, "step": 718 }, { "epoch": 0.022036287850925585, "grad_norm": 2.694599306419332, "learning_rate": 7.344228804902963e-06, "loss": 0.8938, "step": 719 }, { "epoch": 0.02206693637366679, "grad_norm": 2.9385697498089747, "learning_rate": 7.354443309499489e-06, "loss": 0.8138, "step": 720 }, { "epoch": 0.022097584896407993, "grad_norm": 3.909301591578764, "learning_rate": 7.364657814096017e-06, "loss": 0.7889, "step": 721 }, { "epoch": 0.0221282334191492, "grad_norm": 2.6572416916499892, "learning_rate": 7.374872318692544e-06, "loss": 0.7914, "step": 722 }, { "epoch": 0.0221588819418904, "grad_norm": 2.55358037634514, "learning_rate": 7.385086823289071e-06, "loss": 0.8487, "step": 723 }, { "epoch": 0.022189530464631606, "grad_norm": 2.642090787198194, "learning_rate": 7.395301327885598e-06, "loss": 0.7767, "step": 724 }, { "epoch": 0.02222017898737281, "grad_norm": 2.793396334148574, "learning_rate": 7.4055158324821256e-06, "loss": 0.8561, "step": 725 }, { "epoch": 0.02225082751011401, "grad_norm": 2.9901552475169764, "learning_rate": 7.415730337078652e-06, "loss": 0.9384, "step": 726 }, { "epoch": 0.022281476032855217, "grad_norm": 1.1491853303491966, "learning_rate": 7.425944841675179e-06, "loss": 0.5879, "step": 727 }, { "epoch": 0.02231212455559642, "grad_norm": 2.675949687916952, "learning_rate": 7.436159346271706e-06, "loss": 0.8772, "step": 728 }, { "epoch": 0.022342773078337624, "grad_norm": 2.4527923846353454, "learning_rate": 7.4463738508682335e-06, "loss": 0.8013, "step": 729 }, { "epoch": 0.022373421601078827, "grad_norm": 2.772561984425348, "learning_rate": 7.456588355464761e-06, "loss": 0.8045, "step": 730 }, { "epoch": 0.022404070123820032, "grad_norm": 2.3679091558353984, "learning_rate": 7.466802860061287e-06, "loss": 0.8082, "step": 731 }, { "epoch": 0.022434718646561234, "grad_norm": 2.424979845145818, "learning_rate": 7.477017364657815e-06, "loss": 0.8533, "step": 732 }, { "epoch": 0.02246536716930244, "grad_norm": 2.94724103547818, "learning_rate": 7.487231869254342e-06, "loss": 0.7972, "step": 733 }, { "epoch": 0.022496015692043642, "grad_norm": 2.556031971029626, "learning_rate": 7.497446373850868e-06, "loss": 0.8536, "step": 734 }, { "epoch": 0.022526664214784848, "grad_norm": 3.050099004606253, "learning_rate": 7.507660878447396e-06, "loss": 0.7765, "step": 735 }, { "epoch": 0.02255731273752605, "grad_norm": 1.1364557491821465, "learning_rate": 7.5178753830439235e-06, "loss": 0.5574, "step": 736 }, { "epoch": 0.022587961260267256, "grad_norm": 2.430942630719383, "learning_rate": 7.5280898876404495e-06, "loss": 0.8897, "step": 737 }, { "epoch": 0.022618609783008458, "grad_norm": 2.6203708101857206, "learning_rate": 7.538304392236977e-06, "loss": 0.8645, "step": 738 }, { "epoch": 0.022649258305749664, "grad_norm": 2.7324650267593076, "learning_rate": 7.548518896833505e-06, "loss": 0.7692, "step": 739 }, { "epoch": 0.022679906828490866, "grad_norm": 2.7644798057649145, "learning_rate": 7.5587334014300315e-06, "loss": 0.7196, "step": 740 }, { "epoch": 0.02271055535123207, "grad_norm": 1.0458700929406632, "learning_rate": 7.568947906026558e-06, "loss": 0.5399, "step": 741 }, { "epoch": 0.022741203873973274, "grad_norm": 3.067656427248182, "learning_rate": 7.579162410623085e-06, "loss": 0.8563, "step": 742 }, { "epoch": 0.02277185239671448, "grad_norm": 2.5480879320346483, "learning_rate": 7.589376915219613e-06, "loss": 0.8486, "step": 743 }, { "epoch": 0.02280250091945568, "grad_norm": 2.8913791709970704, "learning_rate": 7.5995914198161395e-06, "loss": 0.9296, "step": 744 }, { "epoch": 0.022833149442196887, "grad_norm": 2.7166576621215692, "learning_rate": 7.609805924412666e-06, "loss": 0.8437, "step": 745 }, { "epoch": 0.02286379796493809, "grad_norm": 2.868842318468182, "learning_rate": 7.620020429009194e-06, "loss": 0.9104, "step": 746 }, { "epoch": 0.022894446487679295, "grad_norm": 2.7230078276092735, "learning_rate": 7.63023493360572e-06, "loss": 0.8531, "step": 747 }, { "epoch": 0.022925095010420497, "grad_norm": 2.461871043070862, "learning_rate": 7.640449438202247e-06, "loss": 0.7421, "step": 748 }, { "epoch": 0.022955743533161703, "grad_norm": 2.554270964492917, "learning_rate": 7.650663942798775e-06, "loss": 0.8275, "step": 749 }, { "epoch": 0.022986392055902905, "grad_norm": 2.6039206706155626, "learning_rate": 7.660878447395303e-06, "loss": 0.8352, "step": 750 }, { "epoch": 0.02301704057864411, "grad_norm": 2.540145656757064, "learning_rate": 7.671092951991829e-06, "loss": 0.8256, "step": 751 }, { "epoch": 0.023047689101385313, "grad_norm": 2.7214261591154734, "learning_rate": 7.681307456588356e-06, "loss": 0.8706, "step": 752 }, { "epoch": 0.02307833762412652, "grad_norm": 2.7130829383392885, "learning_rate": 7.691521961184884e-06, "loss": 0.7934, "step": 753 }, { "epoch": 0.02310898614686772, "grad_norm": 2.8708390296858273, "learning_rate": 7.70173646578141e-06, "loss": 0.8044, "step": 754 }, { "epoch": 0.023139634669608927, "grad_norm": 3.0505888869524775, "learning_rate": 7.711950970377937e-06, "loss": 0.8254, "step": 755 }, { "epoch": 0.02317028319235013, "grad_norm": 3.6816750037513226, "learning_rate": 7.722165474974465e-06, "loss": 0.7782, "step": 756 }, { "epoch": 0.02320093171509133, "grad_norm": 2.789910921702266, "learning_rate": 7.732379979570993e-06, "loss": 0.7376, "step": 757 }, { "epoch": 0.023231580237832537, "grad_norm": 2.9090057000385916, "learning_rate": 7.742594484167519e-06, "loss": 0.8103, "step": 758 }, { "epoch": 0.02326222876057374, "grad_norm": 2.6081153061363542, "learning_rate": 7.752808988764046e-06, "loss": 0.7587, "step": 759 }, { "epoch": 0.023292877283314944, "grad_norm": 2.6110908510103603, "learning_rate": 7.763023493360572e-06, "loss": 0.909, "step": 760 }, { "epoch": 0.023323525806056147, "grad_norm": 2.9344419178507217, "learning_rate": 7.7732379979571e-06, "loss": 0.7932, "step": 761 }, { "epoch": 0.023354174328797352, "grad_norm": 2.7542743692200546, "learning_rate": 7.783452502553627e-06, "loss": 0.8531, "step": 762 }, { "epoch": 0.023384822851538555, "grad_norm": 2.871857832188165, "learning_rate": 7.793667007150153e-06, "loss": 0.9389, "step": 763 }, { "epoch": 0.02341547137427976, "grad_norm": 1.2972420445787027, "learning_rate": 7.803881511746681e-06, "loss": 0.5451, "step": 764 }, { "epoch": 0.023446119897020962, "grad_norm": 2.663419980525243, "learning_rate": 7.814096016343207e-06, "loss": 0.8129, "step": 765 }, { "epoch": 0.023476768419762168, "grad_norm": 1.207231539583585, "learning_rate": 7.824310520939735e-06, "loss": 0.5618, "step": 766 }, { "epoch": 0.02350741694250337, "grad_norm": 2.899009709608621, "learning_rate": 7.834525025536262e-06, "loss": 0.9287, "step": 767 }, { "epoch": 0.023538065465244576, "grad_norm": 2.494082803195622, "learning_rate": 7.844739530132788e-06, "loss": 0.8086, "step": 768 }, { "epoch": 0.023568713987985778, "grad_norm": 2.623792300924052, "learning_rate": 7.854954034729316e-06, "loss": 0.7774, "step": 769 }, { "epoch": 0.023599362510726984, "grad_norm": 2.610385170073888, "learning_rate": 7.865168539325843e-06, "loss": 0.7268, "step": 770 }, { "epoch": 0.023630011033468186, "grad_norm": 2.633520870832457, "learning_rate": 7.87538304392237e-06, "loss": 0.7838, "step": 771 }, { "epoch": 0.02366065955620939, "grad_norm": 2.624973703497371, "learning_rate": 7.885597548518897e-06, "loss": 0.9103, "step": 772 }, { "epoch": 0.023691308078950594, "grad_norm": 1.5227679154016525, "learning_rate": 7.895812053115425e-06, "loss": 0.5488, "step": 773 }, { "epoch": 0.0237219566016918, "grad_norm": 2.479268014090245, "learning_rate": 7.906026557711952e-06, "loss": 0.8612, "step": 774 }, { "epoch": 0.023752605124433, "grad_norm": 2.8903049664040714, "learning_rate": 7.916241062308478e-06, "loss": 0.8059, "step": 775 }, { "epoch": 0.023783253647174207, "grad_norm": 2.6500010261562834, "learning_rate": 7.926455566905006e-06, "loss": 0.8792, "step": 776 }, { "epoch": 0.02381390216991541, "grad_norm": 2.480893290639923, "learning_rate": 7.936670071501533e-06, "loss": 0.7673, "step": 777 }, { "epoch": 0.023844550692656615, "grad_norm": 2.815091717878676, "learning_rate": 7.94688457609806e-06, "loss": 0.8229, "step": 778 }, { "epoch": 0.023875199215397817, "grad_norm": 2.7115926648353224, "learning_rate": 7.957099080694587e-06, "loss": 0.8917, "step": 779 }, { "epoch": 0.023905847738139023, "grad_norm": 3.023148704765526, "learning_rate": 7.967313585291115e-06, "loss": 0.7239, "step": 780 }, { "epoch": 0.023936496260880225, "grad_norm": 1.272846888223202, "learning_rate": 7.97752808988764e-06, "loss": 0.5541, "step": 781 }, { "epoch": 0.02396714478362143, "grad_norm": 2.3781680149348223, "learning_rate": 7.987742594484168e-06, "loss": 0.7803, "step": 782 }, { "epoch": 0.023997793306362633, "grad_norm": 2.7809861263277096, "learning_rate": 7.997957099080696e-06, "loss": 0.8305, "step": 783 }, { "epoch": 0.02402844182910384, "grad_norm": 3.686101341042195, "learning_rate": 8.008171603677223e-06, "loss": 0.8876, "step": 784 }, { "epoch": 0.02405909035184504, "grad_norm": 2.3595418307545817, "learning_rate": 8.01838610827375e-06, "loss": 0.7125, "step": 785 }, { "epoch": 0.024089738874586247, "grad_norm": 2.362173168076082, "learning_rate": 8.028600612870277e-06, "loss": 0.8467, "step": 786 }, { "epoch": 0.02412038739732745, "grad_norm": 2.5116109332963856, "learning_rate": 8.038815117466805e-06, "loss": 0.8069, "step": 787 }, { "epoch": 0.02415103592006865, "grad_norm": 2.6575777419493827, "learning_rate": 8.04902962206333e-06, "loss": 0.9022, "step": 788 }, { "epoch": 0.024181684442809857, "grad_norm": 2.57174966345801, "learning_rate": 8.059244126659858e-06, "loss": 0.8693, "step": 789 }, { "epoch": 0.02421233296555106, "grad_norm": 2.3741826054283584, "learning_rate": 8.069458631256384e-06, "loss": 0.8724, "step": 790 }, { "epoch": 0.024242981488292265, "grad_norm": 1.3082340538282995, "learning_rate": 8.079673135852912e-06, "loss": 0.5586, "step": 791 }, { "epoch": 0.024273630011033467, "grad_norm": 2.7828894181453383, "learning_rate": 8.08988764044944e-06, "loss": 0.8388, "step": 792 }, { "epoch": 0.024304278533774672, "grad_norm": 2.427629568699509, "learning_rate": 8.100102145045965e-06, "loss": 0.8701, "step": 793 }, { "epoch": 0.024334927056515875, "grad_norm": 2.7552297337204688, "learning_rate": 8.110316649642493e-06, "loss": 0.8267, "step": 794 }, { "epoch": 0.02436557557925708, "grad_norm": 2.573239912320061, "learning_rate": 8.120531154239019e-06, "loss": 0.8433, "step": 795 }, { "epoch": 0.024396224101998282, "grad_norm": 1.0246386722689858, "learning_rate": 8.130745658835547e-06, "loss": 0.5294, "step": 796 }, { "epoch": 0.024426872624739488, "grad_norm": 2.432423185662239, "learning_rate": 8.140960163432074e-06, "loss": 0.8441, "step": 797 }, { "epoch": 0.02445752114748069, "grad_norm": 2.3419335503581187, "learning_rate": 8.1511746680286e-06, "loss": 0.7905, "step": 798 }, { "epoch": 0.024488169670221896, "grad_norm": 2.5703489722906836, "learning_rate": 8.161389172625128e-06, "loss": 0.826, "step": 799 }, { "epoch": 0.024518818192963098, "grad_norm": 2.548085923395877, "learning_rate": 8.171603677221655e-06, "loss": 0.8292, "step": 800 }, { "epoch": 0.024549466715704304, "grad_norm": 2.574537986347276, "learning_rate": 8.181818181818183e-06, "loss": 0.8566, "step": 801 }, { "epoch": 0.024580115238445506, "grad_norm": 2.798053270216666, "learning_rate": 8.192032686414709e-06, "loss": 0.7926, "step": 802 }, { "epoch": 0.02461076376118671, "grad_norm": 2.816179015560676, "learning_rate": 8.202247191011237e-06, "loss": 0.8615, "step": 803 }, { "epoch": 0.024641412283927914, "grad_norm": 2.7135238536985917, "learning_rate": 8.212461695607764e-06, "loss": 0.8315, "step": 804 }, { "epoch": 0.02467206080666912, "grad_norm": 2.3489425466151452, "learning_rate": 8.22267620020429e-06, "loss": 0.7628, "step": 805 }, { "epoch": 0.02470270932941032, "grad_norm": 2.2727347399448843, "learning_rate": 8.232890704800818e-06, "loss": 0.7686, "step": 806 }, { "epoch": 0.024733357852151527, "grad_norm": 2.650219634323542, "learning_rate": 8.243105209397345e-06, "loss": 0.8165, "step": 807 }, { "epoch": 0.02476400637489273, "grad_norm": 2.570779255473059, "learning_rate": 8.253319713993871e-06, "loss": 0.8439, "step": 808 }, { "epoch": 0.024794654897633935, "grad_norm": 1.147276085612331, "learning_rate": 8.263534218590399e-06, "loss": 0.5719, "step": 809 }, { "epoch": 0.024825303420375137, "grad_norm": 1.041089518221371, "learning_rate": 8.273748723186927e-06, "loss": 0.5477, "step": 810 }, { "epoch": 0.024855951943116343, "grad_norm": 2.5837806173658864, "learning_rate": 8.283963227783454e-06, "loss": 0.8565, "step": 811 }, { "epoch": 0.024886600465857545, "grad_norm": 2.3330345787058993, "learning_rate": 8.29417773237998e-06, "loss": 0.7899, "step": 812 }, { "epoch": 0.02491724898859875, "grad_norm": 2.484462669048185, "learning_rate": 8.304392236976508e-06, "loss": 0.9207, "step": 813 }, { "epoch": 0.024947897511339953, "grad_norm": 2.596343764073533, "learning_rate": 8.314606741573035e-06, "loss": 0.832, "step": 814 }, { "epoch": 0.02497854603408116, "grad_norm": 2.5174269083317236, "learning_rate": 8.324821246169561e-06, "loss": 0.8807, "step": 815 }, { "epoch": 0.02500919455682236, "grad_norm": 2.4532373482671597, "learning_rate": 8.335035750766089e-06, "loss": 0.6986, "step": 816 }, { "epoch": 0.025039843079563567, "grad_norm": 2.4661005564791347, "learning_rate": 8.345250255362617e-06, "loss": 0.7601, "step": 817 }, { "epoch": 0.02507049160230477, "grad_norm": 2.6323898354850357, "learning_rate": 8.355464759959142e-06, "loss": 0.8352, "step": 818 }, { "epoch": 0.02510114012504597, "grad_norm": 1.2874777248094558, "learning_rate": 8.36567926455567e-06, "loss": 0.5838, "step": 819 }, { "epoch": 0.025131788647787177, "grad_norm": 1.125204128242292, "learning_rate": 8.375893769152198e-06, "loss": 0.546, "step": 820 }, { "epoch": 0.02516243717052838, "grad_norm": 2.900243627582199, "learning_rate": 8.386108273748724e-06, "loss": 0.788, "step": 821 }, { "epoch": 0.025193085693269585, "grad_norm": 2.7760036339171745, "learning_rate": 8.396322778345251e-06, "loss": 0.8666, "step": 822 }, { "epoch": 0.025223734216010787, "grad_norm": 3.1126109144635428, "learning_rate": 8.406537282941777e-06, "loss": 0.8769, "step": 823 }, { "epoch": 0.025254382738751992, "grad_norm": 2.5415993714914227, "learning_rate": 8.416751787538305e-06, "loss": 0.8567, "step": 824 }, { "epoch": 0.025285031261493195, "grad_norm": 2.6340756389736084, "learning_rate": 8.426966292134832e-06, "loss": 0.8887, "step": 825 }, { "epoch": 0.0253156797842344, "grad_norm": 2.508698032242236, "learning_rate": 8.437180796731358e-06, "loss": 0.7914, "step": 826 }, { "epoch": 0.025346328306975602, "grad_norm": 2.5311781038835712, "learning_rate": 8.447395301327886e-06, "loss": 0.8157, "step": 827 }, { "epoch": 0.025376976829716808, "grad_norm": 2.4099391775740786, "learning_rate": 8.457609805924414e-06, "loss": 0.8328, "step": 828 }, { "epoch": 0.02540762535245801, "grad_norm": 2.8262460628277672, "learning_rate": 8.46782431052094e-06, "loss": 0.8953, "step": 829 }, { "epoch": 0.025438273875199216, "grad_norm": 2.6268744350574695, "learning_rate": 8.478038815117467e-06, "loss": 0.8844, "step": 830 }, { "epoch": 0.025468922397940418, "grad_norm": 2.775673570115626, "learning_rate": 8.488253319713995e-06, "loss": 0.9974, "step": 831 }, { "epoch": 0.025499570920681624, "grad_norm": 2.7274378400206225, "learning_rate": 8.49846782431052e-06, "loss": 0.8877, "step": 832 }, { "epoch": 0.025530219443422826, "grad_norm": 2.6667591907622525, "learning_rate": 8.508682328907048e-06, "loss": 0.8432, "step": 833 }, { "epoch": 0.02556086796616403, "grad_norm": 2.6132873214559322, "learning_rate": 8.518896833503576e-06, "loss": 0.8647, "step": 834 }, { "epoch": 0.025591516488905234, "grad_norm": 3.010548082282907, "learning_rate": 8.529111338100104e-06, "loss": 0.7312, "step": 835 }, { "epoch": 0.02562216501164644, "grad_norm": 2.6632537774454246, "learning_rate": 8.53932584269663e-06, "loss": 0.9057, "step": 836 }, { "epoch": 0.025652813534387642, "grad_norm": 2.246768439379882, "learning_rate": 8.549540347293157e-06, "loss": 0.822, "step": 837 }, { "epoch": 0.025683462057128847, "grad_norm": 2.854966289986523, "learning_rate": 8.559754851889685e-06, "loss": 0.9023, "step": 838 }, { "epoch": 0.02571411057987005, "grad_norm": 2.613408934721483, "learning_rate": 8.56996935648621e-06, "loss": 0.8117, "step": 839 }, { "epoch": 0.025744759102611255, "grad_norm": 2.3556484466166387, "learning_rate": 8.580183861082738e-06, "loss": 0.8202, "step": 840 }, { "epoch": 0.025775407625352457, "grad_norm": 2.532871511557852, "learning_rate": 8.590398365679266e-06, "loss": 0.8054, "step": 841 }, { "epoch": 0.025806056148093663, "grad_norm": 2.559305045378276, "learning_rate": 8.600612870275792e-06, "loss": 0.7622, "step": 842 }, { "epoch": 0.025836704670834865, "grad_norm": 3.1797481867617288, "learning_rate": 8.61082737487232e-06, "loss": 0.7494, "step": 843 }, { "epoch": 0.02586735319357607, "grad_norm": 2.551453691760712, "learning_rate": 8.621041879468847e-06, "loss": 0.9095, "step": 844 }, { "epoch": 0.025898001716317273, "grad_norm": 2.632911451559825, "learning_rate": 8.631256384065375e-06, "loss": 0.8499, "step": 845 }, { "epoch": 0.02592865023905848, "grad_norm": 2.660944977332849, "learning_rate": 8.6414708886619e-06, "loss": 0.7536, "step": 846 }, { "epoch": 0.02595929876179968, "grad_norm": 1.9569939502631544, "learning_rate": 8.651685393258428e-06, "loss": 0.5495, "step": 847 }, { "epoch": 0.025989947284540887, "grad_norm": 3.0209608680331264, "learning_rate": 8.661899897854954e-06, "loss": 0.772, "step": 848 }, { "epoch": 0.02602059580728209, "grad_norm": 2.7434468475357403, "learning_rate": 8.672114402451482e-06, "loss": 0.7995, "step": 849 }, { "epoch": 0.026051244330023295, "grad_norm": 2.650065750088603, "learning_rate": 8.68232890704801e-06, "loss": 0.905, "step": 850 }, { "epoch": 0.026081892852764497, "grad_norm": 2.6140517642519487, "learning_rate": 8.692543411644536e-06, "loss": 0.8161, "step": 851 }, { "epoch": 0.0261125413755057, "grad_norm": 2.6583794182956657, "learning_rate": 8.702757916241063e-06, "loss": 0.7274, "step": 852 }, { "epoch": 0.026143189898246905, "grad_norm": 2.8924937098838837, "learning_rate": 8.712972420837589e-06, "loss": 0.8863, "step": 853 }, { "epoch": 0.026173838420988107, "grad_norm": 2.5322419179387703, "learning_rate": 8.723186925434117e-06, "loss": 0.8366, "step": 854 }, { "epoch": 0.026204486943729313, "grad_norm": 2.6668562751637626, "learning_rate": 8.733401430030644e-06, "loss": 0.8899, "step": 855 }, { "epoch": 0.026235135466470515, "grad_norm": 3.1105253346763884, "learning_rate": 8.74361593462717e-06, "loss": 0.8593, "step": 856 }, { "epoch": 0.02626578398921172, "grad_norm": 2.745522928599825, "learning_rate": 8.753830439223698e-06, "loss": 0.8762, "step": 857 }, { "epoch": 0.026296432511952923, "grad_norm": 2.820371672904367, "learning_rate": 8.764044943820226e-06, "loss": 0.9065, "step": 858 }, { "epoch": 0.026327081034694128, "grad_norm": 2.4258146135636274, "learning_rate": 8.774259448416752e-06, "loss": 0.771, "step": 859 }, { "epoch": 0.02635772955743533, "grad_norm": 2.7328001343894956, "learning_rate": 8.784473953013279e-06, "loss": 0.7946, "step": 860 }, { "epoch": 0.026388378080176536, "grad_norm": 3.093589809640212, "learning_rate": 8.794688457609807e-06, "loss": 0.8917, "step": 861 }, { "epoch": 0.02641902660291774, "grad_norm": 2.673877697109671, "learning_rate": 8.804902962206334e-06, "loss": 0.8392, "step": 862 }, { "epoch": 0.026449675125658944, "grad_norm": 1.7171565847418424, "learning_rate": 8.81511746680286e-06, "loss": 0.554, "step": 863 }, { "epoch": 0.026480323648400146, "grad_norm": 2.8339918016366, "learning_rate": 8.825331971399388e-06, "loss": 0.8552, "step": 864 }, { "epoch": 0.026510972171141352, "grad_norm": 2.758202190881158, "learning_rate": 8.835546475995916e-06, "loss": 0.8577, "step": 865 }, { "epoch": 0.026541620693882554, "grad_norm": 2.666596170857121, "learning_rate": 8.845760980592442e-06, "loss": 0.8614, "step": 866 }, { "epoch": 0.02657226921662376, "grad_norm": 2.4626059530983433, "learning_rate": 8.855975485188969e-06, "loss": 0.8282, "step": 867 }, { "epoch": 0.026602917739364962, "grad_norm": 2.7101234752439294, "learning_rate": 8.866189989785497e-06, "loss": 0.8276, "step": 868 }, { "epoch": 0.026633566262106168, "grad_norm": 2.587988197149573, "learning_rate": 8.876404494382023e-06, "loss": 0.8328, "step": 869 }, { "epoch": 0.02666421478484737, "grad_norm": 2.6950444477476068, "learning_rate": 8.88661899897855e-06, "loss": 0.7748, "step": 870 }, { "epoch": 0.026694863307588575, "grad_norm": 2.354779587495378, "learning_rate": 8.896833503575078e-06, "loss": 0.7574, "step": 871 }, { "epoch": 0.026725511830329778, "grad_norm": 2.8575796679015713, "learning_rate": 8.907048008171606e-06, "loss": 0.8717, "step": 872 }, { "epoch": 0.026756160353070983, "grad_norm": 2.5830526486645824, "learning_rate": 8.917262512768132e-06, "loss": 0.8297, "step": 873 }, { "epoch": 0.026786808875812185, "grad_norm": 1.6344762189202922, "learning_rate": 8.927477017364659e-06, "loss": 0.5805, "step": 874 }, { "epoch": 0.02681745739855339, "grad_norm": 2.7164994883880635, "learning_rate": 8.937691521961187e-06, "loss": 0.8196, "step": 875 }, { "epoch": 0.026848105921294593, "grad_norm": 2.6181483221024937, "learning_rate": 8.947906026557713e-06, "loss": 0.7712, "step": 876 }, { "epoch": 0.0268787544440358, "grad_norm": 3.0806438010104786, "learning_rate": 8.95812053115424e-06, "loss": 0.8944, "step": 877 }, { "epoch": 0.026909402966777, "grad_norm": 2.5834843493950617, "learning_rate": 8.968335035750766e-06, "loss": 0.757, "step": 878 }, { "epoch": 0.026940051489518207, "grad_norm": 2.728155069145252, "learning_rate": 8.978549540347294e-06, "loss": 0.8445, "step": 879 }, { "epoch": 0.02697070001225941, "grad_norm": 2.4450448692081115, "learning_rate": 8.988764044943822e-06, "loss": 0.8559, "step": 880 }, { "epoch": 0.027001348535000615, "grad_norm": 2.3397505420037823, "learning_rate": 8.998978549540347e-06, "loss": 0.7991, "step": 881 }, { "epoch": 0.027031997057741817, "grad_norm": 1.3677263325671185, "learning_rate": 9.009193054136875e-06, "loss": 0.5804, "step": 882 }, { "epoch": 0.02706264558048302, "grad_norm": 2.6644083202892404, "learning_rate": 9.019407558733401e-06, "loss": 0.8311, "step": 883 }, { "epoch": 0.027093294103224225, "grad_norm": 2.6036955467444565, "learning_rate": 9.029622063329929e-06, "loss": 0.8554, "step": 884 }, { "epoch": 0.027123942625965427, "grad_norm": 2.724944389249544, "learning_rate": 9.039836567926456e-06, "loss": 0.825, "step": 885 }, { "epoch": 0.027154591148706633, "grad_norm": 2.414120821805667, "learning_rate": 9.050051072522982e-06, "loss": 0.9189, "step": 886 }, { "epoch": 0.027185239671447835, "grad_norm": 2.3614865687060163, "learning_rate": 9.06026557711951e-06, "loss": 0.7455, "step": 887 }, { "epoch": 0.02721588819418904, "grad_norm": 2.705471842485433, "learning_rate": 9.070480081716037e-06, "loss": 0.8847, "step": 888 }, { "epoch": 0.027246536716930243, "grad_norm": 2.2502470681806126, "learning_rate": 9.080694586312565e-06, "loss": 0.7791, "step": 889 }, { "epoch": 0.02727718523967145, "grad_norm": 2.8895300054915243, "learning_rate": 9.090909090909091e-06, "loss": 0.8423, "step": 890 }, { "epoch": 0.02730783376241265, "grad_norm": 2.400942036621106, "learning_rate": 9.101123595505619e-06, "loss": 0.7522, "step": 891 }, { "epoch": 0.027338482285153856, "grad_norm": 2.6433642995371707, "learning_rate": 9.111338100102146e-06, "loss": 0.825, "step": 892 }, { "epoch": 0.02736913080789506, "grad_norm": 2.393791008434286, "learning_rate": 9.121552604698672e-06, "loss": 0.8107, "step": 893 }, { "epoch": 0.027399779330636264, "grad_norm": 2.5776436532258318, "learning_rate": 9.1317671092952e-06, "loss": 0.7972, "step": 894 }, { "epoch": 0.027430427853377466, "grad_norm": 2.4560225081013414, "learning_rate": 9.141981613891727e-06, "loss": 0.7217, "step": 895 }, { "epoch": 0.027461076376118672, "grad_norm": 2.741831782917869, "learning_rate": 9.152196118488255e-06, "loss": 0.8873, "step": 896 }, { "epoch": 0.027491724898859874, "grad_norm": 2.612857117184384, "learning_rate": 9.162410623084781e-06, "loss": 0.7401, "step": 897 }, { "epoch": 0.02752237342160108, "grad_norm": 2.5049418916516837, "learning_rate": 9.172625127681309e-06, "loss": 0.8022, "step": 898 }, { "epoch": 0.027553021944342282, "grad_norm": 1.2746541085167293, "learning_rate": 9.182839632277836e-06, "loss": 0.5643, "step": 899 }, { "epoch": 0.027583670467083488, "grad_norm": 2.5069582853042007, "learning_rate": 9.193054136874362e-06, "loss": 0.7851, "step": 900 }, { "epoch": 0.02761431898982469, "grad_norm": 2.576669871917871, "learning_rate": 9.20326864147089e-06, "loss": 0.8603, "step": 901 }, { "epoch": 0.027644967512565895, "grad_norm": 2.450572685709724, "learning_rate": 9.213483146067417e-06, "loss": 0.8692, "step": 902 }, { "epoch": 0.027675616035307098, "grad_norm": 3.0014619683246617, "learning_rate": 9.223697650663943e-06, "loss": 0.8959, "step": 903 }, { "epoch": 0.027706264558048303, "grad_norm": 2.6948085119350242, "learning_rate": 9.233912155260471e-06, "loss": 0.8897, "step": 904 }, { "epoch": 0.027736913080789505, "grad_norm": 2.3499546042625745, "learning_rate": 9.244126659856999e-06, "loss": 0.8778, "step": 905 }, { "epoch": 0.02776756160353071, "grad_norm": 2.598433280340729, "learning_rate": 9.254341164453525e-06, "loss": 0.8472, "step": 906 }, { "epoch": 0.027798210126271913, "grad_norm": 2.511034311848889, "learning_rate": 9.264555669050052e-06, "loss": 0.7848, "step": 907 }, { "epoch": 0.02782885864901312, "grad_norm": 2.542738924306037, "learning_rate": 9.274770173646578e-06, "loss": 0.8482, "step": 908 }, { "epoch": 0.02785950717175432, "grad_norm": 1.2571803581608227, "learning_rate": 9.284984678243106e-06, "loss": 0.5664, "step": 909 }, { "epoch": 0.027890155694495527, "grad_norm": 2.540279628831256, "learning_rate": 9.295199182839633e-06, "loss": 0.729, "step": 910 }, { "epoch": 0.02792080421723673, "grad_norm": 2.7514703075998033, "learning_rate": 9.30541368743616e-06, "loss": 0.749, "step": 911 }, { "epoch": 0.027951452739977935, "grad_norm": 2.69877353940744, "learning_rate": 9.315628192032687e-06, "loss": 0.9042, "step": 912 }, { "epoch": 0.027982101262719137, "grad_norm": 2.61050720609846, "learning_rate": 9.325842696629213e-06, "loss": 0.8512, "step": 913 }, { "epoch": 0.02801274978546034, "grad_norm": 2.3527178696530133, "learning_rate": 9.33605720122574e-06, "loss": 0.9047, "step": 914 }, { "epoch": 0.028043398308201545, "grad_norm": 2.334150231626858, "learning_rate": 9.346271705822268e-06, "loss": 0.8385, "step": 915 }, { "epoch": 0.028074046830942747, "grad_norm": 2.8023560630501705, "learning_rate": 9.356486210418796e-06, "loss": 0.8636, "step": 916 }, { "epoch": 0.028104695353683953, "grad_norm": 2.3222114960685065, "learning_rate": 9.366700715015322e-06, "loss": 0.7902, "step": 917 }, { "epoch": 0.028135343876425155, "grad_norm": 2.359591455597825, "learning_rate": 9.37691521961185e-06, "loss": 0.8404, "step": 918 }, { "epoch": 0.02816599239916636, "grad_norm": 2.3866818116315662, "learning_rate": 9.387129724208377e-06, "loss": 0.8796, "step": 919 }, { "epoch": 0.028196640921907563, "grad_norm": 2.5353210704461837, "learning_rate": 9.397344228804903e-06, "loss": 0.8145, "step": 920 }, { "epoch": 0.02822728944464877, "grad_norm": 2.695016878806716, "learning_rate": 9.40755873340143e-06, "loss": 0.8114, "step": 921 }, { "epoch": 0.02825793796738997, "grad_norm": 2.465155074941893, "learning_rate": 9.417773237997958e-06, "loss": 0.8238, "step": 922 }, { "epoch": 0.028288586490131176, "grad_norm": 2.5536123728138254, "learning_rate": 9.427987742594486e-06, "loss": 0.7399, "step": 923 }, { "epoch": 0.02831923501287238, "grad_norm": 1.0604879350764393, "learning_rate": 9.438202247191012e-06, "loss": 0.5711, "step": 924 }, { "epoch": 0.028349883535613584, "grad_norm": 1.1363205112623014, "learning_rate": 9.44841675178754e-06, "loss": 0.5669, "step": 925 }, { "epoch": 0.028380532058354786, "grad_norm": 2.780920729399713, "learning_rate": 9.458631256384067e-06, "loss": 0.7395, "step": 926 }, { "epoch": 0.028411180581095992, "grad_norm": 2.5964547159078673, "learning_rate": 9.468845760980593e-06, "loss": 0.8742, "step": 927 }, { "epoch": 0.028441829103837194, "grad_norm": 2.5366869086366983, "learning_rate": 9.47906026557712e-06, "loss": 0.9066, "step": 928 }, { "epoch": 0.0284724776265784, "grad_norm": 2.4149473626153317, "learning_rate": 9.489274770173648e-06, "loss": 0.7666, "step": 929 }, { "epoch": 0.028503126149319602, "grad_norm": 2.5407076053745072, "learning_rate": 9.499489274770174e-06, "loss": 0.8175, "step": 930 }, { "epoch": 0.028533774672060808, "grad_norm": 2.3090942332571154, "learning_rate": 9.509703779366702e-06, "loss": 0.6784, "step": 931 }, { "epoch": 0.02856442319480201, "grad_norm": 2.5700660692300517, "learning_rate": 9.51991828396323e-06, "loss": 0.8682, "step": 932 }, { "epoch": 0.028595071717543216, "grad_norm": 2.517546560768532, "learning_rate": 9.530132788559755e-06, "loss": 0.7301, "step": 933 }, { "epoch": 0.028625720240284418, "grad_norm": 2.650916379972764, "learning_rate": 9.540347293156283e-06, "loss": 0.8852, "step": 934 }, { "epoch": 0.028656368763025623, "grad_norm": 1.4876408056573778, "learning_rate": 9.55056179775281e-06, "loss": 0.5577, "step": 935 }, { "epoch": 0.028687017285766826, "grad_norm": 2.7786317429846834, "learning_rate": 9.560776302349337e-06, "loss": 0.8532, "step": 936 }, { "epoch": 0.02871766580850803, "grad_norm": 2.549107278732885, "learning_rate": 9.570990806945864e-06, "loss": 0.7941, "step": 937 }, { "epoch": 0.028748314331249233, "grad_norm": 2.621561689797684, "learning_rate": 9.58120531154239e-06, "loss": 0.9105, "step": 938 }, { "epoch": 0.02877896285399044, "grad_norm": 2.621723014722738, "learning_rate": 9.591419816138918e-06, "loss": 0.6723, "step": 939 }, { "epoch": 0.02880961137673164, "grad_norm": 2.824985080602391, "learning_rate": 9.601634320735445e-06, "loss": 0.8584, "step": 940 }, { "epoch": 0.028840259899472847, "grad_norm": 2.646898070762316, "learning_rate": 9.611848825331971e-06, "loss": 0.8113, "step": 941 }, { "epoch": 0.02887090842221405, "grad_norm": 2.960425582015106, "learning_rate": 9.622063329928499e-06, "loss": 0.8674, "step": 942 }, { "epoch": 0.028901556944955255, "grad_norm": 1.4109932533501508, "learning_rate": 9.632277834525027e-06, "loss": 0.5681, "step": 943 }, { "epoch": 0.028932205467696457, "grad_norm": 2.2937909185956955, "learning_rate": 9.642492339121552e-06, "loss": 0.8622, "step": 944 }, { "epoch": 0.02896285399043766, "grad_norm": 2.4541806609726713, "learning_rate": 9.65270684371808e-06, "loss": 0.8571, "step": 945 }, { "epoch": 0.028993502513178865, "grad_norm": 2.494082243658257, "learning_rate": 9.662921348314608e-06, "loss": 0.8313, "step": 946 }, { "epoch": 0.029024151035920067, "grad_norm": 2.5137246009355603, "learning_rate": 9.673135852911134e-06, "loss": 0.8655, "step": 947 }, { "epoch": 0.029054799558661273, "grad_norm": 2.2016583627213824, "learning_rate": 9.683350357507661e-06, "loss": 0.7857, "step": 948 }, { "epoch": 0.029085448081402475, "grad_norm": 2.525177199514098, "learning_rate": 9.693564862104189e-06, "loss": 0.7948, "step": 949 }, { "epoch": 0.02911609660414368, "grad_norm": 2.2685819687037676, "learning_rate": 9.703779366700717e-06, "loss": 0.8242, "step": 950 }, { "epoch": 0.029146745126884883, "grad_norm": 2.250225463776507, "learning_rate": 9.713993871297242e-06, "loss": 0.7679, "step": 951 }, { "epoch": 0.02917739364962609, "grad_norm": 2.5462848063137478, "learning_rate": 9.72420837589377e-06, "loss": 0.8051, "step": 952 }, { "epoch": 0.02920804217236729, "grad_norm": 2.9267964873938292, "learning_rate": 9.734422880490298e-06, "loss": 0.8373, "step": 953 }, { "epoch": 0.029238690695108496, "grad_norm": 2.5980641880267843, "learning_rate": 9.744637385086824e-06, "loss": 0.9197, "step": 954 }, { "epoch": 0.0292693392178497, "grad_norm": 2.7070564447246355, "learning_rate": 9.754851889683351e-06, "loss": 0.9031, "step": 955 }, { "epoch": 0.029299987740590904, "grad_norm": 2.2661360942059723, "learning_rate": 9.765066394279879e-06, "loss": 0.8534, "step": 956 }, { "epoch": 0.029330636263332106, "grad_norm": 2.7702331342658035, "learning_rate": 9.775280898876405e-06, "loss": 0.8182, "step": 957 }, { "epoch": 0.029361284786073312, "grad_norm": 2.3820111219518667, "learning_rate": 9.785495403472932e-06, "loss": 0.8924, "step": 958 }, { "epoch": 0.029391933308814514, "grad_norm": 2.5425465214931693, "learning_rate": 9.79570990806946e-06, "loss": 0.8118, "step": 959 }, { "epoch": 0.02942258183155572, "grad_norm": 2.484143220617581, "learning_rate": 9.805924412665988e-06, "loss": 0.8123, "step": 960 }, { "epoch": 0.029453230354296922, "grad_norm": 2.8482979859598587, "learning_rate": 9.816138917262514e-06, "loss": 0.8288, "step": 961 }, { "epoch": 0.029483878877038128, "grad_norm": 1.8653644994384166, "learning_rate": 9.826353421859041e-06, "loss": 0.5886, "step": 962 }, { "epoch": 0.02951452739977933, "grad_norm": 2.454158731946066, "learning_rate": 9.836567926455567e-06, "loss": 0.8418, "step": 963 }, { "epoch": 0.029545175922520536, "grad_norm": 2.279549380638653, "learning_rate": 9.846782431052095e-06, "loss": 0.7624, "step": 964 }, { "epoch": 0.029575824445261738, "grad_norm": 2.2876376037345514, "learning_rate": 9.856996935648622e-06, "loss": 0.8286, "step": 965 }, { "epoch": 0.029606472968002943, "grad_norm": 2.3732154998779738, "learning_rate": 9.867211440245148e-06, "loss": 0.8217, "step": 966 }, { "epoch": 0.029637121490744146, "grad_norm": 2.5155757813672475, "learning_rate": 9.877425944841676e-06, "loss": 0.8498, "step": 967 }, { "epoch": 0.02966777001348535, "grad_norm": 2.4272298626355946, "learning_rate": 9.887640449438202e-06, "loss": 0.723, "step": 968 }, { "epoch": 0.029698418536226553, "grad_norm": 2.709633377377744, "learning_rate": 9.89785495403473e-06, "loss": 0.8837, "step": 969 }, { "epoch": 0.02972906705896776, "grad_norm": 2.4198269388239413, "learning_rate": 9.908069458631257e-06, "loss": 0.7477, "step": 970 }, { "epoch": 0.02975971558170896, "grad_norm": 3.224906379810377, "learning_rate": 9.918283963227783e-06, "loss": 0.8943, "step": 971 }, { "epoch": 0.029790364104450167, "grad_norm": 2.481287823904104, "learning_rate": 9.92849846782431e-06, "loss": 0.892, "step": 972 }, { "epoch": 0.02982101262719137, "grad_norm": 2.345120480787265, "learning_rate": 9.938712972420838e-06, "loss": 0.7125, "step": 973 }, { "epoch": 0.029851661149932575, "grad_norm": 1.766245848929486, "learning_rate": 9.948927477017364e-06, "loss": 0.582, "step": 974 }, { "epoch": 0.029882309672673777, "grad_norm": 2.3253765514266984, "learning_rate": 9.959141981613892e-06, "loss": 0.8224, "step": 975 }, { "epoch": 0.029912958195414983, "grad_norm": 2.592912671791218, "learning_rate": 9.96935648621042e-06, "loss": 0.8634, "step": 976 }, { "epoch": 0.029943606718156185, "grad_norm": 2.259563972118137, "learning_rate": 9.979570990806947e-06, "loss": 0.8094, "step": 977 }, { "epoch": 0.029974255240897387, "grad_norm": 2.512323072459396, "learning_rate": 9.989785495403473e-06, "loss": 0.8641, "step": 978 }, { "epoch": 0.030004903763638593, "grad_norm": 2.526037549441196, "learning_rate": 1e-05, "loss": 0.8622, "step": 979 }, { "epoch": 0.030035552286379795, "grad_norm": 2.352533466709703, "learning_rate": 9.999999975366861e-06, "loss": 0.9115, "step": 980 }, { "epoch": 0.030066200809121, "grad_norm": 2.5336464647919263, "learning_rate": 9.999999901467443e-06, "loss": 0.7955, "step": 981 }, { "epoch": 0.030096849331862203, "grad_norm": 2.372539279245691, "learning_rate": 9.999999778301746e-06, "loss": 0.8814, "step": 982 }, { "epoch": 0.03012749785460341, "grad_norm": 2.526050673424826, "learning_rate": 9.999999605869772e-06, "loss": 0.9574, "step": 983 }, { "epoch": 0.03015814637734461, "grad_norm": 2.7399580387122997, "learning_rate": 9.999999384171522e-06, "loss": 0.8249, "step": 984 }, { "epoch": 0.030188794900085816, "grad_norm": 1.5262334685412926, "learning_rate": 9.999999113207e-06, "loss": 0.5813, "step": 985 }, { "epoch": 0.03021944342282702, "grad_norm": 2.5409253727367975, "learning_rate": 9.999998792976206e-06, "loss": 0.8519, "step": 986 }, { "epoch": 0.030250091945568224, "grad_norm": 2.7311688988171263, "learning_rate": 9.999998423479145e-06, "loss": 0.7932, "step": 987 }, { "epoch": 0.030280740468309426, "grad_norm": 2.7220181968091333, "learning_rate": 9.99999800471582e-06, "loss": 0.8214, "step": 988 }, { "epoch": 0.030311388991050632, "grad_norm": 1.060146330888957, "learning_rate": 9.999997536686236e-06, "loss": 0.568, "step": 989 }, { "epoch": 0.030342037513791834, "grad_norm": 2.4304142914011804, "learning_rate": 9.999997019390398e-06, "loss": 0.8263, "step": 990 }, { "epoch": 0.03037268603653304, "grad_norm": 2.7619616639983797, "learning_rate": 9.999996452828306e-06, "loss": 0.8919, "step": 991 }, { "epoch": 0.030403334559274242, "grad_norm": 1.0750799408669391, "learning_rate": 9.999995836999975e-06, "loss": 0.5395, "step": 992 }, { "epoch": 0.030433983082015448, "grad_norm": 2.5877142451049595, "learning_rate": 9.999995171905401e-06, "loss": 0.727, "step": 993 }, { "epoch": 0.03046463160475665, "grad_norm": 2.479254893717689, "learning_rate": 9.999994457544599e-06, "loss": 0.9414, "step": 994 }, { "epoch": 0.030495280127497856, "grad_norm": 1.1023587352338724, "learning_rate": 9.99999369391757e-06, "loss": 0.5826, "step": 995 }, { "epoch": 0.030525928650239058, "grad_norm": 2.5982040603898953, "learning_rate": 9.999992881024326e-06, "loss": 0.7893, "step": 996 }, { "epoch": 0.030556577172980263, "grad_norm": 2.53764418194585, "learning_rate": 9.999992018864871e-06, "loss": 0.9024, "step": 997 }, { "epoch": 0.030587225695721466, "grad_norm": 2.313628360783178, "learning_rate": 9.999991107439215e-06, "loss": 0.8027, "step": 998 }, { "epoch": 0.03061787421846267, "grad_norm": 2.760143594788861, "learning_rate": 9.999990146747369e-06, "loss": 0.8229, "step": 999 }, { "epoch": 0.030648522741203874, "grad_norm": 1.217348630970878, "learning_rate": 9.999989136789339e-06, "loss": 0.5669, "step": 1000 }, { "epoch": 0.03067917126394508, "grad_norm": 2.2392593287604647, "learning_rate": 9.999988077565138e-06, "loss": 0.8416, "step": 1001 }, { "epoch": 0.03070981978668628, "grad_norm": 3.2186820645809084, "learning_rate": 9.999986969074775e-06, "loss": 0.7788, "step": 1002 }, { "epoch": 0.030740468309427487, "grad_norm": 2.524674290767132, "learning_rate": 9.99998581131826e-06, "loss": 0.772, "step": 1003 }, { "epoch": 0.03077111683216869, "grad_norm": 2.4988370697004454, "learning_rate": 9.999984604295606e-06, "loss": 0.8432, "step": 1004 }, { "epoch": 0.030801765354909895, "grad_norm": 2.4719805826059726, "learning_rate": 9.999983348006825e-06, "loss": 0.816, "step": 1005 }, { "epoch": 0.030832413877651097, "grad_norm": 2.379912027316148, "learning_rate": 9.999982042451927e-06, "loss": 0.8783, "step": 1006 }, { "epoch": 0.030863062400392303, "grad_norm": 2.529928388286035, "learning_rate": 9.999980687630931e-06, "loss": 0.7689, "step": 1007 }, { "epoch": 0.030893710923133505, "grad_norm": 2.338995616980002, "learning_rate": 9.999979283543842e-06, "loss": 0.8624, "step": 1008 }, { "epoch": 0.030924359445874707, "grad_norm": 2.1692702523563065, "learning_rate": 9.99997783019068e-06, "loss": 0.8628, "step": 1009 }, { "epoch": 0.030955007968615913, "grad_norm": 2.09877885626247, "learning_rate": 9.999976327571454e-06, "loss": 0.7293, "step": 1010 }, { "epoch": 0.030985656491357115, "grad_norm": 1.9866275813199457, "learning_rate": 9.999974775686186e-06, "loss": 0.7178, "step": 1011 }, { "epoch": 0.03101630501409832, "grad_norm": 2.5039500270241555, "learning_rate": 9.999973174534885e-06, "loss": 0.7968, "step": 1012 }, { "epoch": 0.031046953536839523, "grad_norm": 2.4827946679429136, "learning_rate": 9.999971524117569e-06, "loss": 0.8554, "step": 1013 }, { "epoch": 0.03107760205958073, "grad_norm": 2.416495488939969, "learning_rate": 9.999969824434255e-06, "loss": 0.8244, "step": 1014 }, { "epoch": 0.03110825058232193, "grad_norm": 2.6942390535072938, "learning_rate": 9.999968075484959e-06, "loss": 0.8268, "step": 1015 }, { "epoch": 0.031138899105063136, "grad_norm": 2.187838748640899, "learning_rate": 9.999966277269697e-06, "loss": 0.7432, "step": 1016 }, { "epoch": 0.03116954762780434, "grad_norm": 2.528431435558222, "learning_rate": 9.999964429788487e-06, "loss": 0.85, "step": 1017 }, { "epoch": 0.031200196150545544, "grad_norm": 2.6710595856670913, "learning_rate": 9.999962533041352e-06, "loss": 0.9239, "step": 1018 }, { "epoch": 0.031230844673286746, "grad_norm": 2.2519832162019995, "learning_rate": 9.999960587028303e-06, "loss": 0.853, "step": 1019 }, { "epoch": 0.03126149319602795, "grad_norm": 2.3440894648736084, "learning_rate": 9.999958591749366e-06, "loss": 0.8245, "step": 1020 }, { "epoch": 0.03129214171876916, "grad_norm": 2.5666871892799215, "learning_rate": 9.999956547204557e-06, "loss": 0.892, "step": 1021 }, { "epoch": 0.031322790241510357, "grad_norm": 2.0626268537706745, "learning_rate": 9.999954453393896e-06, "loss": 0.7472, "step": 1022 }, { "epoch": 0.03135343876425156, "grad_norm": 2.6684115925220686, "learning_rate": 9.999952310317404e-06, "loss": 0.8973, "step": 1023 }, { "epoch": 0.03138408728699277, "grad_norm": 2.7668706306313298, "learning_rate": 9.999950117975104e-06, "loss": 0.824, "step": 1024 }, { "epoch": 0.031414735809733974, "grad_norm": 2.4535671063428826, "learning_rate": 9.999947876367015e-06, "loss": 0.8186, "step": 1025 }, { "epoch": 0.03144538433247517, "grad_norm": 2.2294805398623327, "learning_rate": 9.999945585493163e-06, "loss": 0.7607, "step": 1026 }, { "epoch": 0.03147603285521638, "grad_norm": 2.3351619034187125, "learning_rate": 9.999943245353566e-06, "loss": 0.8186, "step": 1027 }, { "epoch": 0.031506681377957584, "grad_norm": 2.787552634649161, "learning_rate": 9.999940855948247e-06, "loss": 0.8853, "step": 1028 }, { "epoch": 0.03153732990069879, "grad_norm": 2.3361469019785694, "learning_rate": 9.999938417277234e-06, "loss": 0.8638, "step": 1029 }, { "epoch": 0.03156797842343999, "grad_norm": 2.491584992556915, "learning_rate": 9.999935929340548e-06, "loss": 0.8948, "step": 1030 }, { "epoch": 0.031598626946181194, "grad_norm": 2.6965460057727935, "learning_rate": 9.999933392138212e-06, "loss": 0.8461, "step": 1031 }, { "epoch": 0.0316292754689224, "grad_norm": 2.725897196398422, "learning_rate": 9.999930805670256e-06, "loss": 0.8945, "step": 1032 }, { "epoch": 0.031659923991663605, "grad_norm": 3.126771538097249, "learning_rate": 9.9999281699367e-06, "loss": 0.9015, "step": 1033 }, { "epoch": 0.031690572514404804, "grad_norm": 2.4226596940477476, "learning_rate": 9.999925484937574e-06, "loss": 0.871, "step": 1034 }, { "epoch": 0.03172122103714601, "grad_norm": 2.491777224082894, "learning_rate": 9.999922750672903e-06, "loss": 0.8139, "step": 1035 }, { "epoch": 0.031751869559887215, "grad_norm": 2.505264345182828, "learning_rate": 9.999919967142713e-06, "loss": 0.8192, "step": 1036 }, { "epoch": 0.03178251808262842, "grad_norm": 1.1807428169900465, "learning_rate": 9.999917134347032e-06, "loss": 0.5624, "step": 1037 }, { "epoch": 0.03181316660536962, "grad_norm": 1.147289844532225, "learning_rate": 9.999914252285889e-06, "loss": 0.5456, "step": 1038 }, { "epoch": 0.031843815128110825, "grad_norm": 2.2551127186372715, "learning_rate": 9.999911320959312e-06, "loss": 0.8384, "step": 1039 }, { "epoch": 0.03187446365085203, "grad_norm": 2.3824806922140302, "learning_rate": 9.999908340367331e-06, "loss": 0.9253, "step": 1040 }, { "epoch": 0.03190511217359323, "grad_norm": 2.4208312575025377, "learning_rate": 9.99990531050997e-06, "loss": 0.806, "step": 1041 }, { "epoch": 0.031935760696334435, "grad_norm": 2.3540003256942157, "learning_rate": 9.999902231387264e-06, "loss": 0.8191, "step": 1042 }, { "epoch": 0.03196640921907564, "grad_norm": 2.2293741864851926, "learning_rate": 9.999899102999243e-06, "loss": 0.7228, "step": 1043 }, { "epoch": 0.031997057741816846, "grad_norm": 1.6249912318772541, "learning_rate": 9.999895925345938e-06, "loss": 0.5705, "step": 1044 }, { "epoch": 0.032027706264558045, "grad_norm": 2.4995487209998064, "learning_rate": 9.999892698427377e-06, "loss": 0.8498, "step": 1045 }, { "epoch": 0.03205835478729925, "grad_norm": 2.3392331234514514, "learning_rate": 9.999889422243594e-06, "loss": 0.8853, "step": 1046 }, { "epoch": 0.032089003310040456, "grad_norm": 2.3207338200493877, "learning_rate": 9.999886096794623e-06, "loss": 0.8725, "step": 1047 }, { "epoch": 0.03211965183278166, "grad_norm": 0.9930054374497782, "learning_rate": 9.999882722080491e-06, "loss": 0.5614, "step": 1048 }, { "epoch": 0.03215030035552286, "grad_norm": 2.725149701687121, "learning_rate": 9.99987929810124e-06, "loss": 0.8051, "step": 1049 }, { "epoch": 0.032180948878264067, "grad_norm": 2.1615724413614257, "learning_rate": 9.999875824856897e-06, "loss": 0.8215, "step": 1050 }, { "epoch": 0.03221159740100527, "grad_norm": 2.61061790777966, "learning_rate": 9.999872302347498e-06, "loss": 0.8924, "step": 1051 }, { "epoch": 0.03224224592374648, "grad_norm": 2.4432782558867374, "learning_rate": 9.999868730573078e-06, "loss": 0.8192, "step": 1052 }, { "epoch": 0.03227289444648768, "grad_norm": 2.4316508050580032, "learning_rate": 9.999865109533672e-06, "loss": 0.8641, "step": 1053 }, { "epoch": 0.03230354296922888, "grad_norm": 1.2184657279460587, "learning_rate": 9.999861439229317e-06, "loss": 0.558, "step": 1054 }, { "epoch": 0.03233419149197009, "grad_norm": 1.1262646298539005, "learning_rate": 9.999857719660047e-06, "loss": 0.5644, "step": 1055 }, { "epoch": 0.032364840014711294, "grad_norm": 2.226972248339666, "learning_rate": 9.999853950825898e-06, "loss": 0.7586, "step": 1056 }, { "epoch": 0.03239548853745249, "grad_norm": 2.7764263722418314, "learning_rate": 9.999850132726912e-06, "loss": 0.8724, "step": 1057 }, { "epoch": 0.0324261370601937, "grad_norm": 2.3376827236125806, "learning_rate": 9.999846265363121e-06, "loss": 0.7445, "step": 1058 }, { "epoch": 0.032456785582934904, "grad_norm": 2.4856492994213837, "learning_rate": 9.999842348734566e-06, "loss": 0.8068, "step": 1059 }, { "epoch": 0.03248743410567611, "grad_norm": 2.3770349804539737, "learning_rate": 9.999838382841285e-06, "loss": 0.8208, "step": 1060 }, { "epoch": 0.03251808262841731, "grad_norm": 2.3568466755180855, "learning_rate": 9.99983436768332e-06, "loss": 0.7486, "step": 1061 }, { "epoch": 0.032548731151158514, "grad_norm": 1.6642085284394468, "learning_rate": 9.999830303260703e-06, "loss": 0.549, "step": 1062 }, { "epoch": 0.03257937967389972, "grad_norm": 2.3632521691491744, "learning_rate": 9.999826189573482e-06, "loss": 0.854, "step": 1063 }, { "epoch": 0.032610028196640925, "grad_norm": 2.5920999059132614, "learning_rate": 9.999822026621692e-06, "loss": 0.8114, "step": 1064 }, { "epoch": 0.032640676719382124, "grad_norm": 2.2699637742764334, "learning_rate": 9.999817814405379e-06, "loss": 0.748, "step": 1065 }, { "epoch": 0.03267132524212333, "grad_norm": 2.5939944981720746, "learning_rate": 9.99981355292458e-06, "loss": 0.9342, "step": 1066 }, { "epoch": 0.032701973764864535, "grad_norm": 2.2155506579062596, "learning_rate": 9.999809242179339e-06, "loss": 0.7644, "step": 1067 }, { "epoch": 0.03273262228760574, "grad_norm": 2.5201942129859325, "learning_rate": 9.9998048821697e-06, "loss": 0.9008, "step": 1068 }, { "epoch": 0.03276327081034694, "grad_norm": 2.472011512745823, "learning_rate": 9.999800472895701e-06, "loss": 0.8835, "step": 1069 }, { "epoch": 0.032793919333088145, "grad_norm": 2.494947955662713, "learning_rate": 9.99979601435739e-06, "loss": 0.8022, "step": 1070 }, { "epoch": 0.03282456785582935, "grad_norm": 2.040271663123092, "learning_rate": 9.99979150655481e-06, "loss": 0.748, "step": 1071 }, { "epoch": 0.03285521637857055, "grad_norm": 2.2710535630077446, "learning_rate": 9.999786949488007e-06, "loss": 0.7861, "step": 1072 }, { "epoch": 0.032885864901311755, "grad_norm": 2.646934598491452, "learning_rate": 9.99978234315702e-06, "loss": 0.8808, "step": 1073 }, { "epoch": 0.03291651342405296, "grad_norm": 2.525620782873754, "learning_rate": 9.999777687561901e-06, "loss": 0.789, "step": 1074 }, { "epoch": 0.032947161946794166, "grad_norm": 2.3310707702527593, "learning_rate": 9.999772982702694e-06, "loss": 0.8988, "step": 1075 }, { "epoch": 0.032977810469535365, "grad_norm": 2.6458181105796834, "learning_rate": 9.999768228579442e-06, "loss": 0.7743, "step": 1076 }, { "epoch": 0.03300845899227657, "grad_norm": 2.1170640293649536, "learning_rate": 9.999763425192195e-06, "loss": 0.5613, "step": 1077 }, { "epoch": 0.03303910751501778, "grad_norm": 2.400128379003866, "learning_rate": 9.999758572541001e-06, "loss": 0.8, "step": 1078 }, { "epoch": 0.03306975603775898, "grad_norm": 2.8142451399364368, "learning_rate": 9.999753670625905e-06, "loss": 0.6803, "step": 1079 }, { "epoch": 0.03310040456050018, "grad_norm": 2.6209025591595725, "learning_rate": 9.999748719446958e-06, "loss": 0.9168, "step": 1080 }, { "epoch": 0.03313105308324139, "grad_norm": 2.6515758826281237, "learning_rate": 9.999743719004206e-06, "loss": 0.8237, "step": 1081 }, { "epoch": 0.03316170160598259, "grad_norm": 2.3292769181387554, "learning_rate": 9.999738669297702e-06, "loss": 0.7961, "step": 1082 }, { "epoch": 0.0331923501287238, "grad_norm": 1.2490564090991496, "learning_rate": 9.999733570327492e-06, "loss": 0.566, "step": 1083 }, { "epoch": 0.033222998651465, "grad_norm": 2.3137931009194235, "learning_rate": 9.99972842209363e-06, "loss": 0.7587, "step": 1084 }, { "epoch": 0.0332536471742062, "grad_norm": 2.507608927343861, "learning_rate": 9.999723224596163e-06, "loss": 0.8198, "step": 1085 }, { "epoch": 0.03328429569694741, "grad_norm": 2.430802025399781, "learning_rate": 9.999717977835144e-06, "loss": 0.7868, "step": 1086 }, { "epoch": 0.033314944219688614, "grad_norm": 2.5033238227369923, "learning_rate": 9.999712681810626e-06, "loss": 0.9802, "step": 1087 }, { "epoch": 0.03334559274242981, "grad_norm": 2.5769256893074446, "learning_rate": 9.999707336522657e-06, "loss": 0.6047, "step": 1088 }, { "epoch": 0.03337624126517102, "grad_norm": 2.458828999837031, "learning_rate": 9.999701941971293e-06, "loss": 0.9125, "step": 1089 }, { "epoch": 0.033406889787912224, "grad_norm": 2.403726057655127, "learning_rate": 9.999696498156588e-06, "loss": 0.7567, "step": 1090 }, { "epoch": 0.03343753831065343, "grad_norm": 1.9811919812786096, "learning_rate": 9.999691005078593e-06, "loss": 0.7911, "step": 1091 }, { "epoch": 0.03346818683339463, "grad_norm": 1.035247123478199, "learning_rate": 9.999685462737364e-06, "loss": 0.5371, "step": 1092 }, { "epoch": 0.033498835356135834, "grad_norm": 2.158110953273258, "learning_rate": 9.999679871132955e-06, "loss": 0.824, "step": 1093 }, { "epoch": 0.03352948387887704, "grad_norm": 2.2691176151907677, "learning_rate": 9.999674230265421e-06, "loss": 0.7653, "step": 1094 }, { "epoch": 0.033560132401618245, "grad_norm": 2.293793414169252, "learning_rate": 9.999668540134817e-06, "loss": 0.867, "step": 1095 }, { "epoch": 0.033590780924359444, "grad_norm": 2.4699018487889095, "learning_rate": 9.9996628007412e-06, "loss": 0.918, "step": 1096 }, { "epoch": 0.03362142944710065, "grad_norm": 2.572940617252387, "learning_rate": 9.999657012084627e-06, "loss": 0.8195, "step": 1097 }, { "epoch": 0.033652077969841855, "grad_norm": 2.0490720523299393, "learning_rate": 9.999651174165152e-06, "loss": 0.7412, "step": 1098 }, { "epoch": 0.03368272649258306, "grad_norm": 2.262082679125583, "learning_rate": 9.999645286982838e-06, "loss": 0.8366, "step": 1099 }, { "epoch": 0.03371337501532426, "grad_norm": 2.348774998926491, "learning_rate": 9.999639350537736e-06, "loss": 0.8933, "step": 1100 }, { "epoch": 0.033744023538065465, "grad_norm": 2.1547763670433606, "learning_rate": 9.99963336482991e-06, "loss": 0.7725, "step": 1101 }, { "epoch": 0.03377467206080667, "grad_norm": 1.0132699143945691, "learning_rate": 9.999627329859418e-06, "loss": 0.5595, "step": 1102 }, { "epoch": 0.03380532058354787, "grad_norm": 2.557938089332334, "learning_rate": 9.999621245626318e-06, "loss": 0.8552, "step": 1103 }, { "epoch": 0.033835969106289075, "grad_norm": 2.46586989608345, "learning_rate": 9.999615112130671e-06, "loss": 0.8697, "step": 1104 }, { "epoch": 0.03386661762903028, "grad_norm": 2.439887275933391, "learning_rate": 9.999608929372537e-06, "loss": 0.7895, "step": 1105 }, { "epoch": 0.03389726615177149, "grad_norm": 2.2902043558871767, "learning_rate": 9.999602697351978e-06, "loss": 0.8417, "step": 1106 }, { "epoch": 0.033927914674512685, "grad_norm": 2.6627709575004044, "learning_rate": 9.999596416069052e-06, "loss": 0.7699, "step": 1107 }, { "epoch": 0.03395856319725389, "grad_norm": 2.568585532926355, "learning_rate": 9.999590085523825e-06, "loss": 0.7871, "step": 1108 }, { "epoch": 0.0339892117199951, "grad_norm": 1.989612905280849, "learning_rate": 9.999583705716357e-06, "loss": 0.7543, "step": 1109 }, { "epoch": 0.0340198602427363, "grad_norm": 2.1448993456587457, "learning_rate": 9.999577276646712e-06, "loss": 0.794, "step": 1110 }, { "epoch": 0.0340505087654775, "grad_norm": 2.023026456508637, "learning_rate": 9.999570798314952e-06, "loss": 0.8757, "step": 1111 }, { "epoch": 0.03408115728821871, "grad_norm": 2.3941191718720733, "learning_rate": 9.999564270721144e-06, "loss": 0.7707, "step": 1112 }, { "epoch": 0.03411180581095991, "grad_norm": 2.4323990874724357, "learning_rate": 9.999557693865349e-06, "loss": 0.7984, "step": 1113 }, { "epoch": 0.03414245433370112, "grad_norm": 2.0600381754821258, "learning_rate": 9.999551067747632e-06, "loss": 0.7814, "step": 1114 }, { "epoch": 0.03417310285644232, "grad_norm": 2.182277379463982, "learning_rate": 9.999544392368059e-06, "loss": 0.7568, "step": 1115 }, { "epoch": 0.03420375137918352, "grad_norm": 2.1902019503343753, "learning_rate": 9.999537667726697e-06, "loss": 0.9036, "step": 1116 }, { "epoch": 0.03423439990192473, "grad_norm": 2.5173505135317926, "learning_rate": 9.99953089382361e-06, "loss": 0.8676, "step": 1117 }, { "epoch": 0.034265048424665934, "grad_norm": 1.1163431360204223, "learning_rate": 9.999524070658865e-06, "loss": 0.5419, "step": 1118 }, { "epoch": 0.03429569694740713, "grad_norm": 2.4001408022031154, "learning_rate": 9.999517198232533e-06, "loss": 0.8907, "step": 1119 }, { "epoch": 0.03432634547014834, "grad_norm": 2.2387242416523945, "learning_rate": 9.999510276544677e-06, "loss": 0.7529, "step": 1120 }, { "epoch": 0.034356993992889544, "grad_norm": 3.2050856391832325, "learning_rate": 9.999503305595369e-06, "loss": 0.7505, "step": 1121 }, { "epoch": 0.03438764251563075, "grad_norm": 2.3926212355398007, "learning_rate": 9.999496285384674e-06, "loss": 0.8693, "step": 1122 }, { "epoch": 0.03441829103837195, "grad_norm": 2.6557710885195753, "learning_rate": 9.999489215912664e-06, "loss": 0.803, "step": 1123 }, { "epoch": 0.034448939561113154, "grad_norm": 2.6874205758410135, "learning_rate": 9.999482097179406e-06, "loss": 0.8458, "step": 1124 }, { "epoch": 0.03447958808385436, "grad_norm": 2.2936650368349456, "learning_rate": 9.999474929184972e-06, "loss": 0.7921, "step": 1125 }, { "epoch": 0.034510236606595565, "grad_norm": 2.3147048994528663, "learning_rate": 9.999467711929433e-06, "loss": 0.7233, "step": 1126 }, { "epoch": 0.034540885129336764, "grad_norm": 5.692900448486351, "learning_rate": 9.99946044541286e-06, "loss": 0.8591, "step": 1127 }, { "epoch": 0.03457153365207797, "grad_norm": 2.189037285226436, "learning_rate": 9.999453129635324e-06, "loss": 0.7533, "step": 1128 }, { "epoch": 0.034602182174819175, "grad_norm": 2.4674549585824375, "learning_rate": 9.999445764596896e-06, "loss": 0.7326, "step": 1129 }, { "epoch": 0.03463283069756038, "grad_norm": 2.20437537645294, "learning_rate": 9.99943835029765e-06, "loss": 0.7303, "step": 1130 }, { "epoch": 0.03466347922030158, "grad_norm": 2.296370408322298, "learning_rate": 9.99943088673766e-06, "loss": 0.8341, "step": 1131 }, { "epoch": 0.034694127743042785, "grad_norm": 1.340766069394169, "learning_rate": 9.999423373916997e-06, "loss": 0.5585, "step": 1132 }, { "epoch": 0.03472477626578399, "grad_norm": 2.390635369259343, "learning_rate": 9.999415811835737e-06, "loss": 0.9172, "step": 1133 }, { "epoch": 0.03475542478852519, "grad_norm": 2.4641885082240655, "learning_rate": 9.999408200493954e-06, "loss": 0.8435, "step": 1134 }, { "epoch": 0.034786073311266395, "grad_norm": 2.279881692362399, "learning_rate": 9.999400539891722e-06, "loss": 0.8371, "step": 1135 }, { "epoch": 0.0348167218340076, "grad_norm": 2.186729081161133, "learning_rate": 9.999392830029118e-06, "loss": 0.8186, "step": 1136 }, { "epoch": 0.03484737035674881, "grad_norm": 17.21561035671076, "learning_rate": 9.999385070906217e-06, "loss": 0.8274, "step": 1137 }, { "epoch": 0.034878018879490005, "grad_norm": 1.0693229787310432, "learning_rate": 9.999377262523095e-06, "loss": 0.5613, "step": 1138 }, { "epoch": 0.03490866740223121, "grad_norm": 2.3126547719716504, "learning_rate": 9.99936940487983e-06, "loss": 0.8647, "step": 1139 }, { "epoch": 0.03493931592497242, "grad_norm": 2.3307068656080365, "learning_rate": 9.9993614979765e-06, "loss": 0.8245, "step": 1140 }, { "epoch": 0.03496996444771362, "grad_norm": 2.352976815356306, "learning_rate": 9.999353541813182e-06, "loss": 0.8728, "step": 1141 }, { "epoch": 0.03500061297045482, "grad_norm": 2.165489248325459, "learning_rate": 9.999345536389955e-06, "loss": 0.7996, "step": 1142 }, { "epoch": 0.03503126149319603, "grad_norm": 2.349564845976544, "learning_rate": 9.999337481706897e-06, "loss": 0.7857, "step": 1143 }, { "epoch": 0.03506191001593723, "grad_norm": 2.587876007103602, "learning_rate": 9.999329377764086e-06, "loss": 0.7868, "step": 1144 }, { "epoch": 0.03509255853867844, "grad_norm": 2.269658881688955, "learning_rate": 9.999321224561604e-06, "loss": 0.842, "step": 1145 }, { "epoch": 0.03512320706141964, "grad_norm": 2.98957266016936, "learning_rate": 9.999313022099533e-06, "loss": 0.8809, "step": 1146 }, { "epoch": 0.03515385558416084, "grad_norm": 2.6150043031132375, "learning_rate": 9.999304770377948e-06, "loss": 0.7714, "step": 1147 }, { "epoch": 0.03518450410690205, "grad_norm": 2.578859124757857, "learning_rate": 9.999296469396938e-06, "loss": 0.8503, "step": 1148 }, { "epoch": 0.035215152629643254, "grad_norm": 2.069880551153864, "learning_rate": 9.999288119156578e-06, "loss": 0.6836, "step": 1149 }, { "epoch": 0.03524580115238445, "grad_norm": 2.508195734150611, "learning_rate": 9.999279719656953e-06, "loss": 0.908, "step": 1150 }, { "epoch": 0.03527644967512566, "grad_norm": 1.114813611820268, "learning_rate": 9.999271270898148e-06, "loss": 0.5538, "step": 1151 }, { "epoch": 0.035307098197866864, "grad_norm": 2.181033165165685, "learning_rate": 9.999262772880241e-06, "loss": 0.7737, "step": 1152 }, { "epoch": 0.03533774672060807, "grad_norm": 2.3185263148704585, "learning_rate": 9.99925422560332e-06, "loss": 0.7584, "step": 1153 }, { "epoch": 0.03536839524334927, "grad_norm": 2.2578022835719636, "learning_rate": 9.999245629067469e-06, "loss": 0.8076, "step": 1154 }, { "epoch": 0.035399043766090474, "grad_norm": 2.52497922928254, "learning_rate": 9.999236983272772e-06, "loss": 0.7871, "step": 1155 }, { "epoch": 0.03542969228883168, "grad_norm": 2.5642672420690715, "learning_rate": 9.999228288219314e-06, "loss": 0.8095, "step": 1156 }, { "epoch": 0.035460340811572885, "grad_norm": 1.0931384078123485, "learning_rate": 9.99921954390718e-06, "loss": 0.5532, "step": 1157 }, { "epoch": 0.035490989334314084, "grad_norm": 2.304652164571122, "learning_rate": 9.999210750336455e-06, "loss": 0.8847, "step": 1158 }, { "epoch": 0.03552163785705529, "grad_norm": 2.1737805925639493, "learning_rate": 9.999201907507227e-06, "loss": 0.8093, "step": 1159 }, { "epoch": 0.035552286379796495, "grad_norm": 2.3159717954284598, "learning_rate": 9.999193015419586e-06, "loss": 0.7536, "step": 1160 }, { "epoch": 0.0355829349025377, "grad_norm": 2.2435102327799332, "learning_rate": 9.999184074073618e-06, "loss": 0.7864, "step": 1161 }, { "epoch": 0.0356135834252789, "grad_norm": 2.7520783444048798, "learning_rate": 9.999175083469407e-06, "loss": 0.7822, "step": 1162 }, { "epoch": 0.035644231948020105, "grad_norm": 2.6329354194589225, "learning_rate": 9.999166043607048e-06, "loss": 0.7942, "step": 1163 }, { "epoch": 0.03567488047076131, "grad_norm": 2.309805879337428, "learning_rate": 9.999156954486624e-06, "loss": 0.77, "step": 1164 }, { "epoch": 0.03570552899350252, "grad_norm": 2.435278576632006, "learning_rate": 9.999147816108229e-06, "loss": 0.7738, "step": 1165 }, { "epoch": 0.035736177516243715, "grad_norm": 2.490029926748888, "learning_rate": 9.999138628471951e-06, "loss": 0.7987, "step": 1166 }, { "epoch": 0.03576682603898492, "grad_norm": 1.3000128070090873, "learning_rate": 9.999129391577882e-06, "loss": 0.5664, "step": 1167 }, { "epoch": 0.03579747456172613, "grad_norm": 2.5118180316722842, "learning_rate": 9.99912010542611e-06, "loss": 0.8497, "step": 1168 }, { "epoch": 0.035828123084467325, "grad_norm": 2.343991069668834, "learning_rate": 9.99911077001673e-06, "loss": 0.8863, "step": 1169 }, { "epoch": 0.03585877160720853, "grad_norm": 2.473300398434256, "learning_rate": 9.999101385349831e-06, "loss": 0.8104, "step": 1170 }, { "epoch": 0.03588942012994974, "grad_norm": 2.4629907656042374, "learning_rate": 9.99909195142551e-06, "loss": 0.7638, "step": 1171 }, { "epoch": 0.03592006865269094, "grad_norm": 2.2113459715155175, "learning_rate": 9.999082468243855e-06, "loss": 0.8351, "step": 1172 }, { "epoch": 0.03595071717543214, "grad_norm": 2.2758296986961284, "learning_rate": 9.99907293580496e-06, "loss": 0.7766, "step": 1173 }, { "epoch": 0.03598136569817335, "grad_norm": 2.4554081954752123, "learning_rate": 9.999063354108923e-06, "loss": 0.8179, "step": 1174 }, { "epoch": 0.03601201422091455, "grad_norm": 2.394345240768447, "learning_rate": 9.999053723155834e-06, "loss": 0.8555, "step": 1175 }, { "epoch": 0.03604266274365576, "grad_norm": 2.3045510599878245, "learning_rate": 9.99904404294579e-06, "loss": 0.7003, "step": 1176 }, { "epoch": 0.03607331126639696, "grad_norm": 2.6574272492988604, "learning_rate": 9.999034313478888e-06, "loss": 0.7545, "step": 1177 }, { "epoch": 0.03610395978913816, "grad_norm": 2.611496629034802, "learning_rate": 9.99902453475522e-06, "loss": 0.8053, "step": 1178 }, { "epoch": 0.03613460831187937, "grad_norm": 2.324609420303341, "learning_rate": 9.999014706774883e-06, "loss": 0.7734, "step": 1179 }, { "epoch": 0.036165256834620574, "grad_norm": 2.1973597974679784, "learning_rate": 9.999004829537976e-06, "loss": 0.8306, "step": 1180 }, { "epoch": 0.03619590535736177, "grad_norm": 2.5063279130867877, "learning_rate": 9.998994903044596e-06, "loss": 0.8456, "step": 1181 }, { "epoch": 0.03622655388010298, "grad_norm": 2.2406035395671284, "learning_rate": 9.99898492729484e-06, "loss": 0.7568, "step": 1182 }, { "epoch": 0.036257202402844184, "grad_norm": 2.3335594026309994, "learning_rate": 9.998974902288805e-06, "loss": 0.7324, "step": 1183 }, { "epoch": 0.03628785092558539, "grad_norm": 2.3759307749183205, "learning_rate": 9.998964828026594e-06, "loss": 0.8163, "step": 1184 }, { "epoch": 0.03631849944832659, "grad_norm": 3.1005749591746388, "learning_rate": 9.998954704508301e-06, "loss": 0.8395, "step": 1185 }, { "epoch": 0.036349147971067794, "grad_norm": 2.216687360826011, "learning_rate": 9.99894453173403e-06, "loss": 0.8065, "step": 1186 }, { "epoch": 0.036379796493809, "grad_norm": 2.56813275971025, "learning_rate": 9.998934309703878e-06, "loss": 0.7761, "step": 1187 }, { "epoch": 0.036410445016550205, "grad_norm": 1.6231557064244992, "learning_rate": 9.998924038417947e-06, "loss": 0.5608, "step": 1188 }, { "epoch": 0.036441093539291404, "grad_norm": 2.435695465148863, "learning_rate": 9.99891371787634e-06, "loss": 0.7934, "step": 1189 }, { "epoch": 0.03647174206203261, "grad_norm": 2.337609218110758, "learning_rate": 9.998903348079157e-06, "loss": 0.8472, "step": 1190 }, { "epoch": 0.036502390584773815, "grad_norm": 2.8174565275281527, "learning_rate": 9.998892929026499e-06, "loss": 0.7977, "step": 1191 }, { "epoch": 0.03653303910751502, "grad_norm": 2.4528373992318606, "learning_rate": 9.998882460718472e-06, "loss": 0.8889, "step": 1192 }, { "epoch": 0.03656368763025622, "grad_norm": 2.3138289630510727, "learning_rate": 9.998871943155175e-06, "loss": 0.853, "step": 1193 }, { "epoch": 0.036594336152997425, "grad_norm": 4.452215825315945, "learning_rate": 9.998861376336715e-06, "loss": 0.8206, "step": 1194 }, { "epoch": 0.03662498467573863, "grad_norm": 2.4563495698883955, "learning_rate": 9.998850760263194e-06, "loss": 0.8497, "step": 1195 }, { "epoch": 0.03665563319847984, "grad_norm": 2.4991425664663405, "learning_rate": 9.998840094934717e-06, "loss": 0.8638, "step": 1196 }, { "epoch": 0.036686281721221035, "grad_norm": 1.4002232172667024, "learning_rate": 9.998829380351391e-06, "loss": 0.5537, "step": 1197 }, { "epoch": 0.03671693024396224, "grad_norm": 2.388427552701879, "learning_rate": 9.99881861651332e-06, "loss": 0.8026, "step": 1198 }, { "epoch": 0.03674757876670345, "grad_norm": 2.521629931420432, "learning_rate": 9.998807803420609e-06, "loss": 0.819, "step": 1199 }, { "epoch": 0.036778227289444645, "grad_norm": 2.4461891535913987, "learning_rate": 9.998796941073365e-06, "loss": 0.8504, "step": 1200 }, { "epoch": 0.03680887581218585, "grad_norm": 2.491676746886056, "learning_rate": 9.998786029471698e-06, "loss": 0.8648, "step": 1201 }, { "epoch": 0.03683952433492706, "grad_norm": 2.385062744544643, "learning_rate": 9.998775068615713e-06, "loss": 0.8178, "step": 1202 }, { "epoch": 0.03687017285766826, "grad_norm": 2.4137661580618714, "learning_rate": 9.998764058505517e-06, "loss": 0.7524, "step": 1203 }, { "epoch": 0.03690082138040946, "grad_norm": 2.3732376372654964, "learning_rate": 9.99875299914122e-06, "loss": 0.7433, "step": 1204 }, { "epoch": 0.03693146990315067, "grad_norm": 2.546819248563387, "learning_rate": 9.99874189052293e-06, "loss": 0.8167, "step": 1205 }, { "epoch": 0.03696211842589187, "grad_norm": 3.39279348233648, "learning_rate": 9.99873073265076e-06, "loss": 0.8905, "step": 1206 }, { "epoch": 0.03699276694863308, "grad_norm": 2.4951013129548003, "learning_rate": 9.998719525524814e-06, "loss": 0.7438, "step": 1207 }, { "epoch": 0.03702341547137428, "grad_norm": 2.416194726365278, "learning_rate": 9.998708269145207e-06, "loss": 0.7208, "step": 1208 }, { "epoch": 0.03705406399411548, "grad_norm": 1.4300394333266837, "learning_rate": 9.99869696351205e-06, "loss": 0.5918, "step": 1209 }, { "epoch": 0.03708471251685669, "grad_norm": 2.458616117707772, "learning_rate": 9.99868560862545e-06, "loss": 0.8014, "step": 1210 }, { "epoch": 0.037115361039597894, "grad_norm": 1.1679012695246136, "learning_rate": 9.998674204485524e-06, "loss": 0.5858, "step": 1211 }, { "epoch": 0.03714600956233909, "grad_norm": 2.5346527295340935, "learning_rate": 9.99866275109238e-06, "loss": 0.7466, "step": 1212 }, { "epoch": 0.0371766580850803, "grad_norm": 2.8537656031248653, "learning_rate": 9.998651248446135e-06, "loss": 0.9287, "step": 1213 }, { "epoch": 0.037207306607821504, "grad_norm": 2.5152542815805483, "learning_rate": 9.9986396965469e-06, "loss": 0.8112, "step": 1214 }, { "epoch": 0.03723795513056271, "grad_norm": 2.423784493233649, "learning_rate": 9.998628095394786e-06, "loss": 0.7984, "step": 1215 }, { "epoch": 0.03726860365330391, "grad_norm": 2.2597857321255517, "learning_rate": 9.998616444989912e-06, "loss": 0.7576, "step": 1216 }, { "epoch": 0.037299252176045114, "grad_norm": 2.446026061604031, "learning_rate": 9.998604745332394e-06, "loss": 0.8393, "step": 1217 }, { "epoch": 0.03732990069878632, "grad_norm": 2.5362137685645663, "learning_rate": 9.998592996422342e-06, "loss": 0.7858, "step": 1218 }, { "epoch": 0.037360549221527525, "grad_norm": 2.335274359108287, "learning_rate": 9.998581198259875e-06, "loss": 0.7597, "step": 1219 }, { "epoch": 0.037391197744268724, "grad_norm": 2.7909850184132545, "learning_rate": 9.998569350845107e-06, "loss": 0.9007, "step": 1220 }, { "epoch": 0.03742184626700993, "grad_norm": 4.575709528844049, "learning_rate": 9.998557454178158e-06, "loss": 0.8208, "step": 1221 }, { "epoch": 0.037452494789751135, "grad_norm": 2.1997341708106632, "learning_rate": 9.998545508259143e-06, "loss": 0.5841, "step": 1222 }, { "epoch": 0.03748314331249234, "grad_norm": 2.43952295338665, "learning_rate": 9.99853351308818e-06, "loss": 0.8382, "step": 1223 }, { "epoch": 0.03751379183523354, "grad_norm": 2.68899216541948, "learning_rate": 9.998521468665388e-06, "loss": 0.7649, "step": 1224 }, { "epoch": 0.037544440357974745, "grad_norm": 2.3047228575432652, "learning_rate": 9.998509374990885e-06, "loss": 0.7827, "step": 1225 }, { "epoch": 0.03757508888071595, "grad_norm": 2.417226639677862, "learning_rate": 9.998497232064789e-06, "loss": 0.8708, "step": 1226 }, { "epoch": 0.03760573740345716, "grad_norm": 2.298338055701502, "learning_rate": 9.998485039887222e-06, "loss": 0.841, "step": 1227 }, { "epoch": 0.037636385926198355, "grad_norm": 2.7003783250773514, "learning_rate": 9.998472798458302e-06, "loss": 0.8449, "step": 1228 }, { "epoch": 0.03766703444893956, "grad_norm": 2.642130170114504, "learning_rate": 9.998460507778152e-06, "loss": 0.9184, "step": 1229 }, { "epoch": 0.03769768297168077, "grad_norm": 2.192112056969457, "learning_rate": 9.99844816784689e-06, "loss": 0.7915, "step": 1230 }, { "epoch": 0.037728331494421966, "grad_norm": 1.872364878386262, "learning_rate": 9.99843577866464e-06, "loss": 0.5601, "step": 1231 }, { "epoch": 0.03775898001716317, "grad_norm": 2.5319559474101285, "learning_rate": 9.998423340231524e-06, "loss": 0.8422, "step": 1232 }, { "epoch": 0.03778962853990438, "grad_norm": 2.2141416119707724, "learning_rate": 9.998410852547663e-06, "loss": 0.7951, "step": 1233 }, { "epoch": 0.03782027706264558, "grad_norm": 2.0029568131255986, "learning_rate": 9.998398315613182e-06, "loss": 0.7741, "step": 1234 }, { "epoch": 0.03785092558538678, "grad_norm": 2.2734719889639528, "learning_rate": 9.998385729428202e-06, "loss": 0.8576, "step": 1235 }, { "epoch": 0.03788157410812799, "grad_norm": 2.3087177324198107, "learning_rate": 9.998373093992851e-06, "loss": 0.847, "step": 1236 }, { "epoch": 0.03791222263086919, "grad_norm": 2.5756232248342084, "learning_rate": 9.998360409307248e-06, "loss": 0.8389, "step": 1237 }, { "epoch": 0.0379428711536104, "grad_norm": 2.2921474377958257, "learning_rate": 9.998347675371523e-06, "loss": 0.8194, "step": 1238 }, { "epoch": 0.0379735196763516, "grad_norm": 2.271328827023043, "learning_rate": 9.998334892185799e-06, "loss": 0.7748, "step": 1239 }, { "epoch": 0.0380041681990928, "grad_norm": 1.2352733126727184, "learning_rate": 9.998322059750203e-06, "loss": 0.5326, "step": 1240 }, { "epoch": 0.03803481672183401, "grad_norm": 2.535866704124961, "learning_rate": 9.99830917806486e-06, "loss": 0.824, "step": 1241 }, { "epoch": 0.038065465244575214, "grad_norm": 2.218663381781324, "learning_rate": 9.998296247129897e-06, "loss": 0.8023, "step": 1242 }, { "epoch": 0.03809611376731641, "grad_norm": 2.3999192543366865, "learning_rate": 9.998283266945444e-06, "loss": 0.8693, "step": 1243 }, { "epoch": 0.03812676229005762, "grad_norm": 2.280815314988098, "learning_rate": 9.998270237511627e-06, "loss": 0.7994, "step": 1244 }, { "epoch": 0.038157410812798824, "grad_norm": 2.5630116463088775, "learning_rate": 9.998257158828572e-06, "loss": 0.743, "step": 1245 }, { "epoch": 0.03818805933554003, "grad_norm": 2.5554817778268277, "learning_rate": 9.998244030896413e-06, "loss": 0.7884, "step": 1246 }, { "epoch": 0.03821870785828123, "grad_norm": 1.076889476204951, "learning_rate": 9.998230853715276e-06, "loss": 0.5809, "step": 1247 }, { "epoch": 0.038249356381022434, "grad_norm": 1.0757468502129648, "learning_rate": 9.99821762728529e-06, "loss": 0.538, "step": 1248 }, { "epoch": 0.03828000490376364, "grad_norm": 2.345118834006311, "learning_rate": 9.998204351606591e-06, "loss": 0.8564, "step": 1249 }, { "epoch": 0.038310653426504845, "grad_norm": 2.226306501626551, "learning_rate": 9.998191026679302e-06, "loss": 0.6967, "step": 1250 }, { "epoch": 0.038341301949246044, "grad_norm": 2.3457438917516202, "learning_rate": 9.99817765250356e-06, "loss": 0.7822, "step": 1251 }, { "epoch": 0.03837195047198725, "grad_norm": 2.429551463701764, "learning_rate": 9.998164229079491e-06, "loss": 0.7951, "step": 1252 }, { "epoch": 0.038402598994728455, "grad_norm": 2.7838052828345536, "learning_rate": 9.998150756407236e-06, "loss": 0.7795, "step": 1253 }, { "epoch": 0.03843324751746966, "grad_norm": 2.0825728479181733, "learning_rate": 9.99813723448692e-06, "loss": 0.785, "step": 1254 }, { "epoch": 0.03846389604021086, "grad_norm": 2.222400409695483, "learning_rate": 9.99812366331868e-06, "loss": 0.8755, "step": 1255 }, { "epoch": 0.038494544562952066, "grad_norm": 1.5256205615734886, "learning_rate": 9.998110042902648e-06, "loss": 0.549, "step": 1256 }, { "epoch": 0.03852519308569327, "grad_norm": 2.700863098160979, "learning_rate": 9.998096373238958e-06, "loss": 0.7568, "step": 1257 }, { "epoch": 0.03855584160843448, "grad_norm": 2.089226073574537, "learning_rate": 9.998082654327748e-06, "loss": 0.7135, "step": 1258 }, { "epoch": 0.038586490131175676, "grad_norm": 2.188881239896762, "learning_rate": 9.99806888616915e-06, "loss": 0.8559, "step": 1259 }, { "epoch": 0.03861713865391688, "grad_norm": 2.7174378760192, "learning_rate": 9.998055068763299e-06, "loss": 0.8502, "step": 1260 }, { "epoch": 0.03864778717665809, "grad_norm": 1.9949947306197129, "learning_rate": 9.998041202110334e-06, "loss": 0.7014, "step": 1261 }, { "epoch": 0.038678435699399286, "grad_norm": 1.046443732324221, "learning_rate": 9.99802728621039e-06, "loss": 0.5601, "step": 1262 }, { "epoch": 0.03870908422214049, "grad_norm": 2.435639508644738, "learning_rate": 9.998013321063604e-06, "loss": 0.9334, "step": 1263 }, { "epoch": 0.0387397327448817, "grad_norm": 2.136018925602849, "learning_rate": 9.997999306670114e-06, "loss": 0.8217, "step": 1264 }, { "epoch": 0.0387703812676229, "grad_norm": 2.2523974214495914, "learning_rate": 9.997985243030058e-06, "loss": 0.8331, "step": 1265 }, { "epoch": 0.0388010297903641, "grad_norm": 2.469106404279449, "learning_rate": 9.997971130143575e-06, "loss": 0.924, "step": 1266 }, { "epoch": 0.03883167831310531, "grad_norm": 2.585980535080345, "learning_rate": 9.997956968010803e-06, "loss": 0.7756, "step": 1267 }, { "epoch": 0.03886232683584651, "grad_norm": 1.0478295308717978, "learning_rate": 9.997942756631883e-06, "loss": 0.5419, "step": 1268 }, { "epoch": 0.03889297535858772, "grad_norm": 2.3001692835086898, "learning_rate": 9.997928496006954e-06, "loss": 0.8555, "step": 1269 }, { "epoch": 0.03892362388132892, "grad_norm": 2.2760293739429667, "learning_rate": 9.997914186136157e-06, "loss": 0.862, "step": 1270 }, { "epoch": 0.03895427240407012, "grad_norm": 2.56513413379102, "learning_rate": 9.997899827019632e-06, "loss": 0.857, "step": 1271 }, { "epoch": 0.03898492092681133, "grad_norm": 2.35993494544197, "learning_rate": 9.997885418657522e-06, "loss": 0.7945, "step": 1272 }, { "epoch": 0.039015569449552534, "grad_norm": 2.606988138816629, "learning_rate": 9.997870961049968e-06, "loss": 0.8466, "step": 1273 }, { "epoch": 0.03904621797229373, "grad_norm": 2.883900724456928, "learning_rate": 9.997856454197112e-06, "loss": 0.8733, "step": 1274 }, { "epoch": 0.03907686649503494, "grad_norm": 2.2494859768605755, "learning_rate": 9.997841898099098e-06, "loss": 0.8349, "step": 1275 }, { "epoch": 0.039107515017776144, "grad_norm": 0.9903774472244229, "learning_rate": 9.99782729275607e-06, "loss": 0.522, "step": 1276 }, { "epoch": 0.03913816354051735, "grad_norm": 2.47503948228917, "learning_rate": 9.997812638168169e-06, "loss": 0.805, "step": 1277 }, { "epoch": 0.03916881206325855, "grad_norm": 2.292389806535465, "learning_rate": 9.997797934335542e-06, "loss": 0.9322, "step": 1278 }, { "epoch": 0.039199460585999754, "grad_norm": 2.3067161104310316, "learning_rate": 9.997783181258335e-06, "loss": 0.7436, "step": 1279 }, { "epoch": 0.03923010910874096, "grad_norm": 2.4074077813182644, "learning_rate": 9.99776837893669e-06, "loss": 0.8355, "step": 1280 }, { "epoch": 0.039260757631482165, "grad_norm": 2.651540927831436, "learning_rate": 9.997753527370756e-06, "loss": 0.7749, "step": 1281 }, { "epoch": 0.039291406154223364, "grad_norm": 2.201433182875649, "learning_rate": 9.997738626560676e-06, "loss": 0.7422, "step": 1282 }, { "epoch": 0.03932205467696457, "grad_norm": 2.448083058906365, "learning_rate": 9.9977236765066e-06, "loss": 0.841, "step": 1283 }, { "epoch": 0.039352703199705776, "grad_norm": 2.4924173375472845, "learning_rate": 9.99770867720867e-06, "loss": 0.7892, "step": 1284 }, { "epoch": 0.03938335172244698, "grad_norm": 2.50706164478169, "learning_rate": 9.997693628667042e-06, "loss": 0.7902, "step": 1285 }, { "epoch": 0.03941400024518818, "grad_norm": 2.4094256889996544, "learning_rate": 9.997678530881858e-06, "loss": 0.6843, "step": 1286 }, { "epoch": 0.039444648767929386, "grad_norm": 2.2500524446880372, "learning_rate": 9.99766338385327e-06, "loss": 0.868, "step": 1287 }, { "epoch": 0.03947529729067059, "grad_norm": 5.412902297197953, "learning_rate": 9.997648187581425e-06, "loss": 0.9382, "step": 1288 }, { "epoch": 0.0395059458134118, "grad_norm": 1.151884197170638, "learning_rate": 9.997632942066473e-06, "loss": 0.5495, "step": 1289 }, { "epoch": 0.039536594336152996, "grad_norm": 2.299171579562735, "learning_rate": 9.997617647308566e-06, "loss": 0.7426, "step": 1290 }, { "epoch": 0.0395672428588942, "grad_norm": 2.198921878256667, "learning_rate": 9.997602303307854e-06, "loss": 0.7182, "step": 1291 }, { "epoch": 0.03959789138163541, "grad_norm": 1.8945985753091934, "learning_rate": 9.997586910064488e-06, "loss": 0.7775, "step": 1292 }, { "epoch": 0.039628539904376606, "grad_norm": 2.170921857912556, "learning_rate": 9.997571467578617e-06, "loss": 0.7186, "step": 1293 }, { "epoch": 0.03965918842711781, "grad_norm": 2.7025635450550927, "learning_rate": 9.997555975850398e-06, "loss": 0.7234, "step": 1294 }, { "epoch": 0.03968983694985902, "grad_norm": 2.5696421616467693, "learning_rate": 9.997540434879981e-06, "loss": 0.8466, "step": 1295 }, { "epoch": 0.03972048547260022, "grad_norm": 2.114008596553646, "learning_rate": 9.997524844667519e-06, "loss": 0.7869, "step": 1296 }, { "epoch": 0.03975113399534142, "grad_norm": 2.418323548822359, "learning_rate": 9.997509205213166e-06, "loss": 0.6592, "step": 1297 }, { "epoch": 0.03978178251808263, "grad_norm": 2.449355384008759, "learning_rate": 9.997493516517076e-06, "loss": 0.881, "step": 1298 }, { "epoch": 0.03981243104082383, "grad_norm": 2.6470930498257816, "learning_rate": 9.997477778579404e-06, "loss": 0.7878, "step": 1299 }, { "epoch": 0.03984307956356504, "grad_norm": 2.2706335094414407, "learning_rate": 9.997461991400306e-06, "loss": 0.8371, "step": 1300 }, { "epoch": 0.03987372808630624, "grad_norm": 2.234291857707326, "learning_rate": 9.997446154979936e-06, "loss": 0.8774, "step": 1301 }, { "epoch": 0.03990437660904744, "grad_norm": 1.2913561604202421, "learning_rate": 9.99743026931845e-06, "loss": 0.5811, "step": 1302 }, { "epoch": 0.03993502513178865, "grad_norm": 2.335897382626118, "learning_rate": 9.997414334416002e-06, "loss": 0.8589, "step": 1303 }, { "epoch": 0.039965673654529854, "grad_norm": 2.4296503278088224, "learning_rate": 9.997398350272755e-06, "loss": 0.8725, "step": 1304 }, { "epoch": 0.03999632217727105, "grad_norm": 2.266641878471248, "learning_rate": 9.997382316888864e-06, "loss": 0.7512, "step": 1305 }, { "epoch": 0.04002697070001226, "grad_norm": 2.6622273048903256, "learning_rate": 9.997366234264484e-06, "loss": 0.8638, "step": 1306 }, { "epoch": 0.040057619222753464, "grad_norm": 2.648553669397978, "learning_rate": 9.997350102399777e-06, "loss": 0.7743, "step": 1307 }, { "epoch": 0.04008826774549467, "grad_norm": 2.2589716161157196, "learning_rate": 9.997333921294901e-06, "loss": 0.7507, "step": 1308 }, { "epoch": 0.04011891626823587, "grad_norm": 0.9563017228979653, "learning_rate": 9.997317690950015e-06, "loss": 0.5251, "step": 1309 }, { "epoch": 0.040149564790977074, "grad_norm": 2.3757089249080217, "learning_rate": 9.997301411365279e-06, "loss": 0.839, "step": 1310 }, { "epoch": 0.04018021331371828, "grad_norm": 1.9882448957519219, "learning_rate": 9.997285082540854e-06, "loss": 0.8511, "step": 1311 }, { "epoch": 0.040210861836459486, "grad_norm": 2.682320876988619, "learning_rate": 9.9972687044769e-06, "loss": 0.9029, "step": 1312 }, { "epoch": 0.040241510359200684, "grad_norm": 2.0655720205043875, "learning_rate": 9.997252277173579e-06, "loss": 0.7971, "step": 1313 }, { "epoch": 0.04027215888194189, "grad_norm": 2.2462029332016065, "learning_rate": 9.997235800631053e-06, "loss": 0.8005, "step": 1314 }, { "epoch": 0.040302807404683096, "grad_norm": 2.4437254505947057, "learning_rate": 9.997219274849483e-06, "loss": 0.8512, "step": 1315 }, { "epoch": 0.0403334559274243, "grad_norm": 2.5546141514212732, "learning_rate": 9.997202699829035e-06, "loss": 0.8685, "step": 1316 }, { "epoch": 0.0403641044501655, "grad_norm": 2.521213911435786, "learning_rate": 9.997186075569869e-06, "loss": 0.7944, "step": 1317 }, { "epoch": 0.040394752972906706, "grad_norm": 2.1530617645070818, "learning_rate": 9.99716940207215e-06, "loss": 0.8684, "step": 1318 }, { "epoch": 0.04042540149564791, "grad_norm": 2.2982018401602673, "learning_rate": 9.997152679336041e-06, "loss": 0.8592, "step": 1319 }, { "epoch": 0.04045605001838912, "grad_norm": 2.3601117449170603, "learning_rate": 9.99713590736171e-06, "loss": 0.8102, "step": 1320 }, { "epoch": 0.040486698541130316, "grad_norm": 2.33911517287537, "learning_rate": 9.99711908614932e-06, "loss": 0.8434, "step": 1321 }, { "epoch": 0.04051734706387152, "grad_norm": 2.1711534775740686, "learning_rate": 9.997102215699037e-06, "loss": 0.8857, "step": 1322 }, { "epoch": 0.04054799558661273, "grad_norm": 1.3884990181295693, "learning_rate": 9.997085296011027e-06, "loss": 0.5762, "step": 1323 }, { "epoch": 0.040578644109353926, "grad_norm": 1.0255321809049662, "learning_rate": 9.997068327085458e-06, "loss": 0.5284, "step": 1324 }, { "epoch": 0.04060929263209513, "grad_norm": 2.2383344000106526, "learning_rate": 9.997051308922495e-06, "loss": 0.8858, "step": 1325 }, { "epoch": 0.04063994115483634, "grad_norm": 1.0157753160679976, "learning_rate": 9.997034241522308e-06, "loss": 0.5542, "step": 1326 }, { "epoch": 0.04067058967757754, "grad_norm": 2.287691432985838, "learning_rate": 9.997017124885063e-06, "loss": 0.7466, "step": 1327 }, { "epoch": 0.04070123820031874, "grad_norm": 2.267355793242582, "learning_rate": 9.99699995901093e-06, "loss": 0.7046, "step": 1328 }, { "epoch": 0.04073188672305995, "grad_norm": 2.3838617122971693, "learning_rate": 9.996982743900077e-06, "loss": 0.816, "step": 1329 }, { "epoch": 0.04076253524580115, "grad_norm": 2.5044310057175254, "learning_rate": 9.996965479552675e-06, "loss": 0.9252, "step": 1330 }, { "epoch": 0.04079318376854236, "grad_norm": 2.120903324988358, "learning_rate": 9.996948165968896e-06, "loss": 0.7706, "step": 1331 }, { "epoch": 0.04082383229128356, "grad_norm": 2.021878187893428, "learning_rate": 9.996930803148905e-06, "loss": 0.737, "step": 1332 }, { "epoch": 0.04085448081402476, "grad_norm": 2.2292262827547837, "learning_rate": 9.996913391092877e-06, "loss": 0.8274, "step": 1333 }, { "epoch": 0.04088512933676597, "grad_norm": 2.1845320944256033, "learning_rate": 9.996895929800986e-06, "loss": 0.9012, "step": 1334 }, { "epoch": 0.040915777859507174, "grad_norm": 2.2303532427433983, "learning_rate": 9.996878419273397e-06, "loss": 0.8404, "step": 1335 }, { "epoch": 0.04094642638224837, "grad_norm": 2.180591096674949, "learning_rate": 9.996860859510286e-06, "loss": 0.8249, "step": 1336 }, { "epoch": 0.04097707490498958, "grad_norm": 1.6165071565845, "learning_rate": 9.99684325051183e-06, "loss": 0.5809, "step": 1337 }, { "epoch": 0.041007723427730784, "grad_norm": 2.1970762115391547, "learning_rate": 9.996825592278197e-06, "loss": 0.7904, "step": 1338 }, { "epoch": 0.04103837195047199, "grad_norm": 2.074499786084035, "learning_rate": 9.996807884809563e-06, "loss": 0.8054, "step": 1339 }, { "epoch": 0.04106902047321319, "grad_norm": 2.1681083943906074, "learning_rate": 9.996790128106101e-06, "loss": 0.7352, "step": 1340 }, { "epoch": 0.041099668995954394, "grad_norm": 2.5583417995452526, "learning_rate": 9.99677232216799e-06, "loss": 0.7199, "step": 1341 }, { "epoch": 0.0411303175186956, "grad_norm": 2.248692509860964, "learning_rate": 9.996754466995401e-06, "loss": 0.7321, "step": 1342 }, { "epoch": 0.041160966041436806, "grad_norm": 2.1762386678111723, "learning_rate": 9.996736562588513e-06, "loss": 0.877, "step": 1343 }, { "epoch": 0.041191614564178004, "grad_norm": 2.27860301457575, "learning_rate": 9.9967186089475e-06, "loss": 0.7848, "step": 1344 }, { "epoch": 0.04122226308691921, "grad_norm": 2.1833397465462636, "learning_rate": 9.996700606072542e-06, "loss": 0.7928, "step": 1345 }, { "epoch": 0.041252911609660416, "grad_norm": 2.5833073338876247, "learning_rate": 9.996682553963813e-06, "loss": 0.8765, "step": 1346 }, { "epoch": 0.04128356013240162, "grad_norm": 1.1023062977183757, "learning_rate": 9.996664452621492e-06, "loss": 0.5374, "step": 1347 }, { "epoch": 0.04131420865514282, "grad_norm": 2.3129657921060063, "learning_rate": 9.996646302045758e-06, "loss": 0.8798, "step": 1348 }, { "epoch": 0.041344857177884026, "grad_norm": 2.405520708782671, "learning_rate": 9.996628102236789e-06, "loss": 0.8302, "step": 1349 }, { "epoch": 0.04137550570062523, "grad_norm": 2.485070992643439, "learning_rate": 9.996609853194766e-06, "loss": 0.8567, "step": 1350 }, { "epoch": 0.04140615422336644, "grad_norm": 2.145478298831264, "learning_rate": 9.996591554919868e-06, "loss": 0.8211, "step": 1351 }, { "epoch": 0.041436802746107636, "grad_norm": 2.1331639661426074, "learning_rate": 9.996573207412275e-06, "loss": 0.9342, "step": 1352 }, { "epoch": 0.04146745126884884, "grad_norm": 2.265424699159151, "learning_rate": 9.996554810672165e-06, "loss": 0.7994, "step": 1353 }, { "epoch": 0.04149809979159005, "grad_norm": 2.167335018909853, "learning_rate": 9.996536364699726e-06, "loss": 0.8161, "step": 1354 }, { "epoch": 0.041528748314331246, "grad_norm": 2.2302491093214933, "learning_rate": 9.996517869495133e-06, "loss": 0.8242, "step": 1355 }, { "epoch": 0.04155939683707245, "grad_norm": 2.2984493643372486, "learning_rate": 9.996499325058572e-06, "loss": 0.7293, "step": 1356 }, { "epoch": 0.04159004535981366, "grad_norm": 2.317884495280388, "learning_rate": 9.996480731390224e-06, "loss": 0.8205, "step": 1357 }, { "epoch": 0.04162069388255486, "grad_norm": 1.9569711982748266, "learning_rate": 9.996462088490273e-06, "loss": 0.7441, "step": 1358 }, { "epoch": 0.04165134240529606, "grad_norm": 2.10140415863538, "learning_rate": 9.996443396358904e-06, "loss": 0.7077, "step": 1359 }, { "epoch": 0.04168199092803727, "grad_norm": 2.2649946248824153, "learning_rate": 9.9964246549963e-06, "loss": 0.7208, "step": 1360 }, { "epoch": 0.04171263945077847, "grad_norm": 2.4327897745860576, "learning_rate": 9.996405864402644e-06, "loss": 0.824, "step": 1361 }, { "epoch": 0.04174328797351968, "grad_norm": 2.012786778228896, "learning_rate": 9.996387024578122e-06, "loss": 0.7996, "step": 1362 }, { "epoch": 0.04177393649626088, "grad_norm": 2.2117652007499617, "learning_rate": 9.996368135522922e-06, "loss": 0.7922, "step": 1363 }, { "epoch": 0.04180458501900208, "grad_norm": 1.1369740159029451, "learning_rate": 9.996349197237228e-06, "loss": 0.5625, "step": 1364 }, { "epoch": 0.04183523354174329, "grad_norm": 2.7350500753318467, "learning_rate": 9.996330209721226e-06, "loss": 0.8597, "step": 1365 }, { "epoch": 0.041865882064484494, "grad_norm": 2.269044278748801, "learning_rate": 9.996311172975105e-06, "loss": 0.7632, "step": 1366 }, { "epoch": 0.04189653058722569, "grad_norm": 2.107673027868904, "learning_rate": 9.996292086999051e-06, "loss": 0.7702, "step": 1367 }, { "epoch": 0.0419271791099669, "grad_norm": 2.2934655310278584, "learning_rate": 9.996272951793253e-06, "loss": 0.7708, "step": 1368 }, { "epoch": 0.041957827632708104, "grad_norm": 2.082228108408857, "learning_rate": 9.9962537673579e-06, "loss": 0.7392, "step": 1369 }, { "epoch": 0.04198847615544931, "grad_norm": 2.290297919512996, "learning_rate": 9.99623453369318e-06, "loss": 0.8138, "step": 1370 }, { "epoch": 0.04201912467819051, "grad_norm": 2.088306810321175, "learning_rate": 9.996215250799282e-06, "loss": 0.8494, "step": 1371 }, { "epoch": 0.042049773200931714, "grad_norm": 1.1728537512890453, "learning_rate": 9.996195918676397e-06, "loss": 0.5586, "step": 1372 }, { "epoch": 0.04208042172367292, "grad_norm": 1.0562431578239788, "learning_rate": 9.996176537324715e-06, "loss": 0.5457, "step": 1373 }, { "epoch": 0.042111070246414126, "grad_norm": 2.371750722825589, "learning_rate": 9.996157106744429e-06, "loss": 0.7425, "step": 1374 }, { "epoch": 0.042141718769155324, "grad_norm": 2.3118923129917506, "learning_rate": 9.996137626935727e-06, "loss": 0.7678, "step": 1375 }, { "epoch": 0.04217236729189653, "grad_norm": 2.1111055000079078, "learning_rate": 9.996118097898804e-06, "loss": 0.8184, "step": 1376 }, { "epoch": 0.042203015814637736, "grad_norm": 2.2576479129112967, "learning_rate": 9.99609851963385e-06, "loss": 0.7555, "step": 1377 }, { "epoch": 0.04223366433737894, "grad_norm": 2.5785019289216993, "learning_rate": 9.996078892141059e-06, "loss": 0.7113, "step": 1378 }, { "epoch": 0.04226431286012014, "grad_norm": 1.3161941647233533, "learning_rate": 9.996059215420625e-06, "loss": 0.5456, "step": 1379 }, { "epoch": 0.042294961382861346, "grad_norm": 2.374570111407064, "learning_rate": 9.996039489472741e-06, "loss": 0.7103, "step": 1380 }, { "epoch": 0.04232560990560255, "grad_norm": 2.430827379566154, "learning_rate": 9.996019714297601e-06, "loss": 0.8432, "step": 1381 }, { "epoch": 0.04235625842834376, "grad_norm": 2.433090693004498, "learning_rate": 9.9959998898954e-06, "loss": 0.7811, "step": 1382 }, { "epoch": 0.042386906951084956, "grad_norm": 2.2120115232690187, "learning_rate": 9.995980016266335e-06, "loss": 0.7, "step": 1383 }, { "epoch": 0.04241755547382616, "grad_norm": 2.127251025362776, "learning_rate": 9.995960093410601e-06, "loss": 0.8375, "step": 1384 }, { "epoch": 0.04244820399656737, "grad_norm": 1.1493440284220235, "learning_rate": 9.995940121328394e-06, "loss": 0.5556, "step": 1385 }, { "epoch": 0.042478852519308566, "grad_norm": 2.251593722468747, "learning_rate": 9.995920100019909e-06, "loss": 0.8324, "step": 1386 }, { "epoch": 0.04250950104204977, "grad_norm": 2.444311317227807, "learning_rate": 9.995900029485348e-06, "loss": 0.7401, "step": 1387 }, { "epoch": 0.04254014956479098, "grad_norm": 2.4633248781144035, "learning_rate": 9.995879909724903e-06, "loss": 0.7944, "step": 1388 }, { "epoch": 0.04257079808753218, "grad_norm": 2.3453521941999234, "learning_rate": 9.995859740738776e-06, "loss": 0.8654, "step": 1389 }, { "epoch": 0.04260144661027338, "grad_norm": 2.238733530861835, "learning_rate": 9.995839522527165e-06, "loss": 0.8498, "step": 1390 }, { "epoch": 0.04263209513301459, "grad_norm": 1.081993376456448, "learning_rate": 9.995819255090266e-06, "loss": 0.5451, "step": 1391 }, { "epoch": 0.04266274365575579, "grad_norm": 1.0117959035805268, "learning_rate": 9.995798938428285e-06, "loss": 0.5476, "step": 1392 }, { "epoch": 0.042693392178497, "grad_norm": 2.3287273825709915, "learning_rate": 9.995778572541419e-06, "loss": 0.8277, "step": 1393 }, { "epoch": 0.0427240407012382, "grad_norm": 2.440742196848512, "learning_rate": 9.995758157429867e-06, "loss": 0.8061, "step": 1394 }, { "epoch": 0.0427546892239794, "grad_norm": 2.284309967620969, "learning_rate": 9.995737693093833e-06, "loss": 0.8181, "step": 1395 }, { "epoch": 0.04278533774672061, "grad_norm": 1.9865078880130782, "learning_rate": 9.995717179533515e-06, "loss": 0.7393, "step": 1396 }, { "epoch": 0.042815986269461814, "grad_norm": 2.296326339696516, "learning_rate": 9.99569661674912e-06, "loss": 0.8189, "step": 1397 }, { "epoch": 0.04284663479220301, "grad_norm": 2.4344669213455044, "learning_rate": 9.995676004740846e-06, "loss": 0.9175, "step": 1398 }, { "epoch": 0.04287728331494422, "grad_norm": 2.143385920817151, "learning_rate": 9.9956553435089e-06, "loss": 0.8191, "step": 1399 }, { "epoch": 0.042907931837685424, "grad_norm": 2.270847644983126, "learning_rate": 9.995634633053481e-06, "loss": 0.8519, "step": 1400 }, { "epoch": 0.04293858036042663, "grad_norm": 2.1611776250322903, "learning_rate": 9.995613873374798e-06, "loss": 0.841, "step": 1401 }, { "epoch": 0.04296922888316783, "grad_norm": 2.491905195313811, "learning_rate": 9.995593064473053e-06, "loss": 0.7982, "step": 1402 }, { "epoch": 0.042999877405909034, "grad_norm": 2.214984256313521, "learning_rate": 9.99557220634845e-06, "loss": 0.689, "step": 1403 }, { "epoch": 0.04303052592865024, "grad_norm": 2.291000847393793, "learning_rate": 9.995551299001198e-06, "loss": 0.593, "step": 1404 }, { "epoch": 0.043061174451391446, "grad_norm": 2.685110065840251, "learning_rate": 9.9955303424315e-06, "loss": 0.8361, "step": 1405 }, { "epoch": 0.043091822974132644, "grad_norm": 2.4583113073267433, "learning_rate": 9.995509336639563e-06, "loss": 0.86, "step": 1406 }, { "epoch": 0.04312247149687385, "grad_norm": 2.2392325270173696, "learning_rate": 9.995488281625594e-06, "loss": 0.729, "step": 1407 }, { "epoch": 0.043153120019615056, "grad_norm": 2.3396021412950936, "learning_rate": 9.995467177389801e-06, "loss": 0.7383, "step": 1408 }, { "epoch": 0.04318376854235626, "grad_norm": 2.234101423941762, "learning_rate": 9.995446023932394e-06, "loss": 0.8028, "step": 1409 }, { "epoch": 0.04321441706509746, "grad_norm": 2.143633974168245, "learning_rate": 9.995424821253577e-06, "loss": 0.8815, "step": 1410 }, { "epoch": 0.043245065587838666, "grad_norm": 2.026892705377884, "learning_rate": 9.99540356935356e-06, "loss": 0.7441, "step": 1411 }, { "epoch": 0.04327571411057987, "grad_norm": 2.304444362589145, "learning_rate": 9.995382268232556e-06, "loss": 0.9004, "step": 1412 }, { "epoch": 0.04330636263332108, "grad_norm": 2.271113140149838, "learning_rate": 9.99536091789077e-06, "loss": 0.77, "step": 1413 }, { "epoch": 0.043337011156062276, "grad_norm": 2.2995220035530974, "learning_rate": 9.995339518328418e-06, "loss": 0.826, "step": 1414 }, { "epoch": 0.04336765967880348, "grad_norm": 2.2445720159718574, "learning_rate": 9.995318069545706e-06, "loss": 0.8447, "step": 1415 }, { "epoch": 0.04339830820154469, "grad_norm": 2.170528717903499, "learning_rate": 9.995296571542845e-06, "loss": 0.5544, "step": 1416 }, { "epoch": 0.04342895672428589, "grad_norm": 2.428881184332189, "learning_rate": 9.995275024320051e-06, "loss": 0.8313, "step": 1417 }, { "epoch": 0.04345960524702709, "grad_norm": 2.16347878880223, "learning_rate": 9.995253427877533e-06, "loss": 0.786, "step": 1418 }, { "epoch": 0.0434902537697683, "grad_norm": 2.271911459547079, "learning_rate": 9.995231782215506e-06, "loss": 0.7767, "step": 1419 }, { "epoch": 0.0435209022925095, "grad_norm": 1.086860238089838, "learning_rate": 9.995210087334182e-06, "loss": 0.5389, "step": 1420 }, { "epoch": 0.0435515508152507, "grad_norm": 2.2558505372937807, "learning_rate": 9.995188343233775e-06, "loss": 0.7467, "step": 1421 }, { "epoch": 0.04358219933799191, "grad_norm": 2.486740161267048, "learning_rate": 9.995166549914498e-06, "loss": 0.7525, "step": 1422 }, { "epoch": 0.04361284786073311, "grad_norm": 2.3366804349806993, "learning_rate": 9.995144707376568e-06, "loss": 0.8914, "step": 1423 }, { "epoch": 0.04364349638347432, "grad_norm": 2.4044276647261777, "learning_rate": 9.995122815620199e-06, "loss": 0.7545, "step": 1424 }, { "epoch": 0.04367414490621552, "grad_norm": 2.199760873419752, "learning_rate": 9.995100874645605e-06, "loss": 0.8445, "step": 1425 }, { "epoch": 0.04370479342895672, "grad_norm": 1.5271576596091754, "learning_rate": 9.995078884453006e-06, "loss": 0.543, "step": 1426 }, { "epoch": 0.04373544195169793, "grad_norm": 2.284250695425709, "learning_rate": 9.995056845042616e-06, "loss": 0.7912, "step": 1427 }, { "epoch": 0.043766090474439134, "grad_norm": 2.4835710950920777, "learning_rate": 9.995034756414655e-06, "loss": 0.8613, "step": 1428 }, { "epoch": 0.04379673899718033, "grad_norm": 2.064420580832537, "learning_rate": 9.995012618569335e-06, "loss": 0.8147, "step": 1429 }, { "epoch": 0.04382738751992154, "grad_norm": 2.165787133989808, "learning_rate": 9.99499043150688e-06, "loss": 0.8102, "step": 1430 }, { "epoch": 0.043858036042662744, "grad_norm": 2.179364396482116, "learning_rate": 9.994968195227505e-06, "loss": 0.8321, "step": 1431 }, { "epoch": 0.04388868456540395, "grad_norm": 2.162212419591497, "learning_rate": 9.994945909731432e-06, "loss": 0.8131, "step": 1432 }, { "epoch": 0.04391933308814515, "grad_norm": 1.0781343371685606, "learning_rate": 9.994923575018878e-06, "loss": 0.5648, "step": 1433 }, { "epoch": 0.043949981610886354, "grad_norm": 2.2623780039184247, "learning_rate": 9.994901191090063e-06, "loss": 0.879, "step": 1434 }, { "epoch": 0.04398063013362756, "grad_norm": 1.048867485391845, "learning_rate": 9.99487875794521e-06, "loss": 0.5571, "step": 1435 }, { "epoch": 0.044011278656368766, "grad_norm": 2.205242750388276, "learning_rate": 9.994856275584537e-06, "loss": 0.8668, "step": 1436 }, { "epoch": 0.044041927179109965, "grad_norm": 2.0703086928238106, "learning_rate": 9.99483374400827e-06, "loss": 0.7102, "step": 1437 }, { "epoch": 0.04407257570185117, "grad_norm": 2.287740639221629, "learning_rate": 9.994811163216625e-06, "loss": 0.7592, "step": 1438 }, { "epoch": 0.044103224224592376, "grad_norm": 2.2597116642212995, "learning_rate": 9.994788533209829e-06, "loss": 0.8037, "step": 1439 }, { "epoch": 0.04413387274733358, "grad_norm": 2.249875692803309, "learning_rate": 9.994765853988105e-06, "loss": 0.8981, "step": 1440 }, { "epoch": 0.04416452127007478, "grad_norm": 2.132558718081938, "learning_rate": 9.994743125551672e-06, "loss": 0.924, "step": 1441 }, { "epoch": 0.044195169792815986, "grad_norm": 2.0367865499119304, "learning_rate": 9.994720347900759e-06, "loss": 0.7754, "step": 1442 }, { "epoch": 0.04422581831555719, "grad_norm": 2.1349111231193003, "learning_rate": 9.994697521035588e-06, "loss": 0.7921, "step": 1443 }, { "epoch": 0.0442564668382984, "grad_norm": 2.269534884113159, "learning_rate": 9.994674644956385e-06, "loss": 0.8185, "step": 1444 }, { "epoch": 0.044287115361039596, "grad_norm": 2.6974020616837024, "learning_rate": 9.994651719663373e-06, "loss": 0.6834, "step": 1445 }, { "epoch": 0.0443177638837808, "grad_norm": 2.2668536714564755, "learning_rate": 9.994628745156782e-06, "loss": 0.8087, "step": 1446 }, { "epoch": 0.04434841240652201, "grad_norm": 2.242711689464131, "learning_rate": 9.994605721436836e-06, "loss": 0.7147, "step": 1447 }, { "epoch": 0.04437906092926321, "grad_norm": 2.073926938551995, "learning_rate": 9.99458264850376e-06, "loss": 0.7147, "step": 1448 }, { "epoch": 0.04440970945200441, "grad_norm": 1.9423028541071032, "learning_rate": 9.994559526357785e-06, "loss": 0.7545, "step": 1449 }, { "epoch": 0.04444035797474562, "grad_norm": 2.456717880705304, "learning_rate": 9.994536354999136e-06, "loss": 0.7724, "step": 1450 }, { "epoch": 0.04447100649748682, "grad_norm": 1.9300995677826185, "learning_rate": 9.994513134428042e-06, "loss": 0.734, "step": 1451 }, { "epoch": 0.04450165502022802, "grad_norm": 2.3857486647138857, "learning_rate": 9.994489864644733e-06, "loss": 0.8506, "step": 1452 }, { "epoch": 0.04453230354296923, "grad_norm": 1.9003265173836814, "learning_rate": 9.994466545649437e-06, "loss": 0.7814, "step": 1453 }, { "epoch": 0.04456295206571043, "grad_norm": 2.330218483129496, "learning_rate": 9.994443177442386e-06, "loss": 0.704, "step": 1454 }, { "epoch": 0.04459360058845164, "grad_norm": 2.1966224841284197, "learning_rate": 9.994419760023806e-06, "loss": 0.8376, "step": 1455 }, { "epoch": 0.04462424911119284, "grad_norm": 2.1637051973842367, "learning_rate": 9.994396293393932e-06, "loss": 0.7836, "step": 1456 }, { "epoch": 0.04465489763393404, "grad_norm": 1.9196486162317807, "learning_rate": 9.994372777552992e-06, "loss": 0.8039, "step": 1457 }, { "epoch": 0.04468554615667525, "grad_norm": 1.5726101373380235, "learning_rate": 9.99434921250122e-06, "loss": 0.5665, "step": 1458 }, { "epoch": 0.044716194679416454, "grad_norm": 2.194723795125612, "learning_rate": 9.994325598238847e-06, "loss": 0.7964, "step": 1459 }, { "epoch": 0.04474684320215765, "grad_norm": 2.2432830321391712, "learning_rate": 9.994301934766106e-06, "loss": 0.767, "step": 1460 }, { "epoch": 0.04477749172489886, "grad_norm": 2.244304738676307, "learning_rate": 9.99427822208323e-06, "loss": 0.7447, "step": 1461 }, { "epoch": 0.044808140247640064, "grad_norm": 2.1290143570812665, "learning_rate": 9.994254460190453e-06, "loss": 0.7857, "step": 1462 }, { "epoch": 0.04483878877038127, "grad_norm": 2.5212837321933, "learning_rate": 9.99423064908801e-06, "loss": 0.8543, "step": 1463 }, { "epoch": 0.04486943729312247, "grad_norm": 2.3852472486068232, "learning_rate": 9.994206788776133e-06, "loss": 0.8734, "step": 1464 }, { "epoch": 0.044900085815863675, "grad_norm": 2.428333902070855, "learning_rate": 9.99418287925506e-06, "loss": 0.7955, "step": 1465 }, { "epoch": 0.04493073433860488, "grad_norm": 2.3330409583273095, "learning_rate": 9.994158920525024e-06, "loss": 0.8244, "step": 1466 }, { "epoch": 0.044961382861346086, "grad_norm": 2.3431516393449083, "learning_rate": 9.994134912586265e-06, "loss": 0.7559, "step": 1467 }, { "epoch": 0.044992031384087285, "grad_norm": 2.34794540919881, "learning_rate": 9.994110855439014e-06, "loss": 0.7574, "step": 1468 }, { "epoch": 0.04502267990682849, "grad_norm": 2.4543615471522893, "learning_rate": 9.994086749083512e-06, "loss": 0.8522, "step": 1469 }, { "epoch": 0.045053328429569696, "grad_norm": 1.5307031787226704, "learning_rate": 9.994062593519995e-06, "loss": 0.5441, "step": 1470 }, { "epoch": 0.0450839769523109, "grad_norm": 2.8529408356706205, "learning_rate": 9.994038388748702e-06, "loss": 0.8658, "step": 1471 }, { "epoch": 0.0451146254750521, "grad_norm": 2.157871156961857, "learning_rate": 9.994014134769872e-06, "loss": 0.7359, "step": 1472 }, { "epoch": 0.045145273997793306, "grad_norm": 2.190216927047067, "learning_rate": 9.993989831583742e-06, "loss": 0.913, "step": 1473 }, { "epoch": 0.04517592252053451, "grad_norm": 2.227293800496843, "learning_rate": 9.99396547919055e-06, "loss": 0.7547, "step": 1474 }, { "epoch": 0.04520657104327572, "grad_norm": 2.2155770270069217, "learning_rate": 9.99394107759054e-06, "loss": 0.805, "step": 1475 }, { "epoch": 0.045237219566016916, "grad_norm": 2.199937013071588, "learning_rate": 9.993916626783952e-06, "loss": 0.7912, "step": 1476 }, { "epoch": 0.04526786808875812, "grad_norm": 2.347752082740451, "learning_rate": 9.993892126771026e-06, "loss": 0.9641, "step": 1477 }, { "epoch": 0.04529851661149933, "grad_norm": 2.3351507612422657, "learning_rate": 9.993867577552003e-06, "loss": 0.7439, "step": 1478 }, { "epoch": 0.04532916513424053, "grad_norm": 2.599036273224227, "learning_rate": 9.993842979127124e-06, "loss": 0.8478, "step": 1479 }, { "epoch": 0.04535981365698173, "grad_norm": 2.18415008356805, "learning_rate": 9.993818331496632e-06, "loss": 0.8127, "step": 1480 }, { "epoch": 0.04539046217972294, "grad_norm": 2.3255727244522713, "learning_rate": 9.99379363466077e-06, "loss": 0.8178, "step": 1481 }, { "epoch": 0.04542111070246414, "grad_norm": 2.3563630573957375, "learning_rate": 9.993768888619783e-06, "loss": 0.7338, "step": 1482 }, { "epoch": 0.04545175922520534, "grad_norm": 1.9087166312121584, "learning_rate": 9.993744093373915e-06, "loss": 0.7437, "step": 1483 }, { "epoch": 0.04548240774794655, "grad_norm": 1.2022184464606387, "learning_rate": 9.993719248923406e-06, "loss": 0.5491, "step": 1484 }, { "epoch": 0.04551305627068775, "grad_norm": 2.434305421984262, "learning_rate": 9.993694355268504e-06, "loss": 0.7503, "step": 1485 }, { "epoch": 0.04554370479342896, "grad_norm": 2.2345663334212547, "learning_rate": 9.993669412409455e-06, "loss": 0.7475, "step": 1486 }, { "epoch": 0.04557435331617016, "grad_norm": 2.2580284428908657, "learning_rate": 9.993644420346503e-06, "loss": 0.91, "step": 1487 }, { "epoch": 0.04560500183891136, "grad_norm": 2.0493193672796406, "learning_rate": 9.993619379079893e-06, "loss": 0.7917, "step": 1488 }, { "epoch": 0.04563565036165257, "grad_norm": 2.0664686841549194, "learning_rate": 9.993594288609876e-06, "loss": 0.7948, "step": 1489 }, { "epoch": 0.045666298884393774, "grad_norm": 2.247877835475181, "learning_rate": 9.993569148936695e-06, "loss": 0.7747, "step": 1490 }, { "epoch": 0.04569694740713497, "grad_norm": 2.2076143114504223, "learning_rate": 9.993543960060601e-06, "loss": 0.7455, "step": 1491 }, { "epoch": 0.04572759592987618, "grad_norm": 2.16360610159134, "learning_rate": 9.99351872198184e-06, "loss": 0.813, "step": 1492 }, { "epoch": 0.045758244452617385, "grad_norm": 2.408060961697643, "learning_rate": 9.99349343470066e-06, "loss": 0.8755, "step": 1493 }, { "epoch": 0.04578889297535859, "grad_norm": 2.1267682896780724, "learning_rate": 9.993468098217313e-06, "loss": 0.9123, "step": 1494 }, { "epoch": 0.04581954149809979, "grad_norm": 2.3456013238850932, "learning_rate": 9.993442712532048e-06, "loss": 0.8164, "step": 1495 }, { "epoch": 0.045850190020840995, "grad_norm": 2.278793091371842, "learning_rate": 9.993417277645114e-06, "loss": 0.7467, "step": 1496 }, { "epoch": 0.0458808385435822, "grad_norm": 2.1641553133735867, "learning_rate": 9.99339179355676e-06, "loss": 0.7661, "step": 1497 }, { "epoch": 0.045911487066323406, "grad_norm": 2.4047732457726303, "learning_rate": 9.993366260267243e-06, "loss": 0.8162, "step": 1498 }, { "epoch": 0.045942135589064605, "grad_norm": 2.302629761806154, "learning_rate": 9.993340677776809e-06, "loss": 0.6933, "step": 1499 }, { "epoch": 0.04597278411180581, "grad_norm": 1.3156016047843697, "learning_rate": 9.99331504608571e-06, "loss": 0.5364, "step": 1500 }, { "epoch": 0.046003432634547016, "grad_norm": 2.278665084283029, "learning_rate": 9.993289365194201e-06, "loss": 0.8045, "step": 1501 }, { "epoch": 0.04603408115728822, "grad_norm": 2.1464912345486153, "learning_rate": 9.993263635102534e-06, "loss": 0.8516, "step": 1502 }, { "epoch": 0.04606472968002942, "grad_norm": 0.9557001457412279, "learning_rate": 9.993237855810964e-06, "loss": 0.5481, "step": 1503 }, { "epoch": 0.046095378202770626, "grad_norm": 2.4669494850658342, "learning_rate": 9.993212027319742e-06, "loss": 0.8697, "step": 1504 }, { "epoch": 0.04612602672551183, "grad_norm": 0.9780316888470426, "learning_rate": 9.993186149629127e-06, "loss": 0.5269, "step": 1505 }, { "epoch": 0.04615667524825304, "grad_norm": 2.0146015978757417, "learning_rate": 9.993160222739369e-06, "loss": 0.8206, "step": 1506 }, { "epoch": 0.046187323770994236, "grad_norm": 2.2619544828142946, "learning_rate": 9.993134246650726e-06, "loss": 0.7552, "step": 1507 }, { "epoch": 0.04621797229373544, "grad_norm": 2.051845008989956, "learning_rate": 9.993108221363454e-06, "loss": 0.7846, "step": 1508 }, { "epoch": 0.04624862081647665, "grad_norm": 2.320196593352715, "learning_rate": 9.99308214687781e-06, "loss": 0.9886, "step": 1509 }, { "epoch": 0.04627926933921785, "grad_norm": 1.0431476707966199, "learning_rate": 9.99305602319405e-06, "loss": 0.5663, "step": 1510 }, { "epoch": 0.04630991786195905, "grad_norm": 2.1228387020743558, "learning_rate": 9.99302985031243e-06, "loss": 0.7548, "step": 1511 }, { "epoch": 0.04634056638470026, "grad_norm": 2.0888640201066866, "learning_rate": 9.993003628233211e-06, "loss": 0.7293, "step": 1512 }, { "epoch": 0.04637121490744146, "grad_norm": 2.272166700187362, "learning_rate": 9.99297735695665e-06, "loss": 0.862, "step": 1513 }, { "epoch": 0.04640186343018266, "grad_norm": 2.1822570864932054, "learning_rate": 9.992951036483003e-06, "loss": 0.7515, "step": 1514 }, { "epoch": 0.04643251195292387, "grad_norm": 1.0997606658180217, "learning_rate": 9.992924666812533e-06, "loss": 0.5433, "step": 1515 }, { "epoch": 0.04646316047566507, "grad_norm": 2.2121041659383054, "learning_rate": 9.9928982479455e-06, "loss": 0.7945, "step": 1516 }, { "epoch": 0.04649380899840628, "grad_norm": 2.109551214723022, "learning_rate": 9.992871779882164e-06, "loss": 0.7465, "step": 1517 }, { "epoch": 0.04652445752114748, "grad_norm": 2.440433728922343, "learning_rate": 9.992845262622782e-06, "loss": 0.7555, "step": 1518 }, { "epoch": 0.04655510604388868, "grad_norm": 1.0090824573448174, "learning_rate": 9.99281869616762e-06, "loss": 0.551, "step": 1519 }, { "epoch": 0.04658575456662989, "grad_norm": 2.515344620787977, "learning_rate": 9.992792080516938e-06, "loss": 0.8497, "step": 1520 }, { "epoch": 0.046616403089371095, "grad_norm": 1.0075554855056676, "learning_rate": 9.992765415670998e-06, "loss": 0.5551, "step": 1521 }, { "epoch": 0.04664705161211229, "grad_norm": 2.080535328124425, "learning_rate": 9.992738701630061e-06, "loss": 0.76, "step": 1522 }, { "epoch": 0.0466777001348535, "grad_norm": 0.9275864009661776, "learning_rate": 9.992711938394394e-06, "loss": 0.5329, "step": 1523 }, { "epoch": 0.046708348657594705, "grad_norm": 2.216368739269394, "learning_rate": 9.992685125964259e-06, "loss": 0.8035, "step": 1524 }, { "epoch": 0.04673899718033591, "grad_norm": 2.483526992787161, "learning_rate": 9.992658264339918e-06, "loss": 0.8702, "step": 1525 }, { "epoch": 0.04676964570307711, "grad_norm": 2.282147697165149, "learning_rate": 9.99263135352164e-06, "loss": 0.8533, "step": 1526 }, { "epoch": 0.046800294225818315, "grad_norm": 0.974270047040013, "learning_rate": 9.992604393509687e-06, "loss": 0.5266, "step": 1527 }, { "epoch": 0.04683094274855952, "grad_norm": 0.9553261893417786, "learning_rate": 9.992577384304325e-06, "loss": 0.5344, "step": 1528 }, { "epoch": 0.046861591271300726, "grad_norm": 2.256096841821275, "learning_rate": 9.992550325905821e-06, "loss": 0.7246, "step": 1529 }, { "epoch": 0.046892239794041925, "grad_norm": 2.0533917774154364, "learning_rate": 9.992523218314442e-06, "loss": 0.7784, "step": 1530 }, { "epoch": 0.04692288831678313, "grad_norm": 0.9825689084529211, "learning_rate": 9.992496061530454e-06, "loss": 0.5701, "step": 1531 }, { "epoch": 0.046953536839524336, "grad_norm": 2.2499559575345747, "learning_rate": 9.992468855554125e-06, "loss": 0.865, "step": 1532 }, { "epoch": 0.04698418536226554, "grad_norm": 2.0839156677819073, "learning_rate": 9.992441600385721e-06, "loss": 0.8337, "step": 1533 }, { "epoch": 0.04701483388500674, "grad_norm": 0.9479263916345423, "learning_rate": 9.992414296025514e-06, "loss": 0.5539, "step": 1534 }, { "epoch": 0.047045482407747946, "grad_norm": 2.24668634261317, "learning_rate": 9.992386942473773e-06, "loss": 0.7805, "step": 1535 }, { "epoch": 0.04707613093048915, "grad_norm": 2.0119379477455377, "learning_rate": 9.992359539730763e-06, "loss": 0.8361, "step": 1536 }, { "epoch": 0.04710677945323036, "grad_norm": 2.0590415058557467, "learning_rate": 9.99233208779676e-06, "loss": 0.7783, "step": 1537 }, { "epoch": 0.047137427975971556, "grad_norm": 0.999341501350408, "learning_rate": 9.99230458667203e-06, "loss": 0.544, "step": 1538 }, { "epoch": 0.04716807649871276, "grad_norm": 2.3464302413584472, "learning_rate": 9.992277036356846e-06, "loss": 0.8463, "step": 1539 }, { "epoch": 0.04719872502145397, "grad_norm": 1.8873596136578552, "learning_rate": 9.99224943685148e-06, "loss": 0.779, "step": 1540 }, { "epoch": 0.04722937354419517, "grad_norm": 2.259766077167158, "learning_rate": 9.992221788156202e-06, "loss": 0.7016, "step": 1541 }, { "epoch": 0.04726002206693637, "grad_norm": 2.2468863703266684, "learning_rate": 9.992194090271285e-06, "loss": 0.7517, "step": 1542 }, { "epoch": 0.04729067058967758, "grad_norm": 2.221028307623843, "learning_rate": 9.992166343197002e-06, "loss": 0.7601, "step": 1543 }, { "epoch": 0.04732131911241878, "grad_norm": 2.011766617457771, "learning_rate": 9.992138546933629e-06, "loss": 0.7397, "step": 1544 }, { "epoch": 0.04735196763515998, "grad_norm": 2.4961850559214325, "learning_rate": 9.992110701481436e-06, "loss": 0.79, "step": 1545 }, { "epoch": 0.04738261615790119, "grad_norm": 2.268198373134644, "learning_rate": 9.9920828068407e-06, "loss": 0.8422, "step": 1546 }, { "epoch": 0.04741326468064239, "grad_norm": 2.46609291328082, "learning_rate": 9.992054863011693e-06, "loss": 0.7151, "step": 1547 }, { "epoch": 0.0474439132033836, "grad_norm": 2.0080438622222743, "learning_rate": 9.992026869994694e-06, "loss": 0.7491, "step": 1548 }, { "epoch": 0.0474745617261248, "grad_norm": 2.5522209383293917, "learning_rate": 9.991998827789975e-06, "loss": 0.7906, "step": 1549 }, { "epoch": 0.047505210248866, "grad_norm": 2.0941462289814163, "learning_rate": 9.991970736397817e-06, "loss": 0.7089, "step": 1550 }, { "epoch": 0.04753585877160721, "grad_norm": 2.095478772482786, "learning_rate": 9.991942595818491e-06, "loss": 0.6989, "step": 1551 }, { "epoch": 0.047566507294348415, "grad_norm": 2.176402416776282, "learning_rate": 9.991914406052279e-06, "loss": 0.8467, "step": 1552 }, { "epoch": 0.04759715581708961, "grad_norm": 1.054218142627708, "learning_rate": 9.991886167099456e-06, "loss": 0.5438, "step": 1553 }, { "epoch": 0.04762780433983082, "grad_norm": 2.188993094042433, "learning_rate": 9.991857878960303e-06, "loss": 0.7348, "step": 1554 }, { "epoch": 0.047658452862572025, "grad_norm": 0.993328214613932, "learning_rate": 9.991829541635097e-06, "loss": 0.566, "step": 1555 }, { "epoch": 0.04768910138531323, "grad_norm": 0.8967822304498032, "learning_rate": 9.991801155124116e-06, "loss": 0.5353, "step": 1556 }, { "epoch": 0.04771974990805443, "grad_norm": 0.9266881311772347, "learning_rate": 9.991772719427642e-06, "loss": 0.553, "step": 1557 }, { "epoch": 0.047750398430795635, "grad_norm": 2.401205417688273, "learning_rate": 9.991744234545952e-06, "loss": 0.8428, "step": 1558 }, { "epoch": 0.04778104695353684, "grad_norm": 2.4499191217310754, "learning_rate": 9.991715700479333e-06, "loss": 0.8473, "step": 1559 }, { "epoch": 0.047811695476278046, "grad_norm": 2.008993083356474, "learning_rate": 9.99168711722806e-06, "loss": 0.8482, "step": 1560 }, { "epoch": 0.047842343999019245, "grad_norm": 2.1722996376305774, "learning_rate": 9.991658484792416e-06, "loss": 0.7843, "step": 1561 }, { "epoch": 0.04787299252176045, "grad_norm": 2.1830763529828268, "learning_rate": 9.991629803172684e-06, "loss": 0.8011, "step": 1562 }, { "epoch": 0.047903641044501656, "grad_norm": 2.1150838657136104, "learning_rate": 9.991601072369145e-06, "loss": 0.7337, "step": 1563 }, { "epoch": 0.04793428956724286, "grad_norm": 1.1899983551134405, "learning_rate": 9.991572292382086e-06, "loss": 0.5358, "step": 1564 }, { "epoch": 0.04796493808998406, "grad_norm": 2.1726250695828613, "learning_rate": 9.991543463211788e-06, "loss": 0.7746, "step": 1565 }, { "epoch": 0.047995586612725266, "grad_norm": 1.9268239508919724, "learning_rate": 9.991514584858534e-06, "loss": 0.7416, "step": 1566 }, { "epoch": 0.04802623513546647, "grad_norm": 2.198511583009963, "learning_rate": 9.991485657322609e-06, "loss": 0.7508, "step": 1567 }, { "epoch": 0.04805688365820768, "grad_norm": 1.025992046821334, "learning_rate": 9.9914566806043e-06, "loss": 0.5517, "step": 1568 }, { "epoch": 0.048087532180948876, "grad_norm": 2.2796568240972164, "learning_rate": 9.99142765470389e-06, "loss": 0.8111, "step": 1569 }, { "epoch": 0.04811818070369008, "grad_norm": 2.092582662968351, "learning_rate": 9.991398579621668e-06, "loss": 0.6874, "step": 1570 }, { "epoch": 0.04814882922643129, "grad_norm": 1.0012775849066038, "learning_rate": 9.991369455357918e-06, "loss": 0.5236, "step": 1571 }, { "epoch": 0.04817947774917249, "grad_norm": 0.9959983760549106, "learning_rate": 9.991340281912927e-06, "loss": 0.5481, "step": 1572 }, { "epoch": 0.04821012627191369, "grad_norm": 2.6323082563565277, "learning_rate": 9.991311059286984e-06, "loss": 0.7552, "step": 1573 }, { "epoch": 0.0482407747946549, "grad_norm": 1.9280676525071196, "learning_rate": 9.991281787480377e-06, "loss": 0.8073, "step": 1574 }, { "epoch": 0.0482714233173961, "grad_norm": 1.284053196638117, "learning_rate": 9.991252466493391e-06, "loss": 0.5343, "step": 1575 }, { "epoch": 0.0483020718401373, "grad_norm": 2.352242463053297, "learning_rate": 9.991223096326316e-06, "loss": 0.8454, "step": 1576 }, { "epoch": 0.04833272036287851, "grad_norm": 2.4279787959102292, "learning_rate": 9.991193676979448e-06, "loss": 0.7335, "step": 1577 }, { "epoch": 0.04836336888561971, "grad_norm": 2.283049715942153, "learning_rate": 9.991164208453069e-06, "loss": 0.846, "step": 1578 }, { "epoch": 0.04839401740836092, "grad_norm": 2.1826357798186544, "learning_rate": 9.99113469074747e-06, "loss": 0.8429, "step": 1579 }, { "epoch": 0.04842466593110212, "grad_norm": 2.284335451693949, "learning_rate": 9.991105123862944e-06, "loss": 0.7471, "step": 1580 }, { "epoch": 0.04845531445384332, "grad_norm": 2.1670240159322476, "learning_rate": 9.991075507799786e-06, "loss": 0.7414, "step": 1581 }, { "epoch": 0.04848596297658453, "grad_norm": 2.3133202903183676, "learning_rate": 9.991045842558282e-06, "loss": 0.9109, "step": 1582 }, { "epoch": 0.048516611499325735, "grad_norm": 2.4600045382060802, "learning_rate": 9.991016128138726e-06, "loss": 0.871, "step": 1583 }, { "epoch": 0.04854726002206693, "grad_norm": 2.3481497570688203, "learning_rate": 9.990986364541411e-06, "loss": 0.882, "step": 1584 }, { "epoch": 0.04857790854480814, "grad_norm": 2.0809473054035585, "learning_rate": 9.990956551766631e-06, "loss": 0.7809, "step": 1585 }, { "epoch": 0.048608557067549345, "grad_norm": 2.3638897517885242, "learning_rate": 9.990926689814678e-06, "loss": 0.7786, "step": 1586 }, { "epoch": 0.04863920559029055, "grad_norm": 2.12584704120499, "learning_rate": 9.990896778685847e-06, "loss": 0.8139, "step": 1587 }, { "epoch": 0.04866985411303175, "grad_norm": 2.4037723726716993, "learning_rate": 9.990866818380436e-06, "loss": 0.8011, "step": 1588 }, { "epoch": 0.048700502635772955, "grad_norm": 2.3089305141418746, "learning_rate": 9.990836808898736e-06, "loss": 0.8645, "step": 1589 }, { "epoch": 0.04873115115851416, "grad_norm": 1.9801498199323009, "learning_rate": 9.990806750241043e-06, "loss": 0.7068, "step": 1590 }, { "epoch": 0.048761799681255366, "grad_norm": 2.2780555349419775, "learning_rate": 9.990776642407653e-06, "loss": 0.7517, "step": 1591 }, { "epoch": 0.048792448203996565, "grad_norm": 2.352106120178732, "learning_rate": 9.990746485398866e-06, "loss": 0.7767, "step": 1592 }, { "epoch": 0.04882309672673777, "grad_norm": 2.0606966572713925, "learning_rate": 9.990716279214976e-06, "loss": 0.8177, "step": 1593 }, { "epoch": 0.048853745249478976, "grad_norm": 2.2766049026180784, "learning_rate": 9.990686023856282e-06, "loss": 0.8725, "step": 1594 }, { "epoch": 0.04888439377222018, "grad_norm": 2.5015773213663612, "learning_rate": 9.990655719323082e-06, "loss": 0.9104, "step": 1595 }, { "epoch": 0.04891504229496138, "grad_norm": 2.1851350333311266, "learning_rate": 9.990625365615674e-06, "loss": 0.7464, "step": 1596 }, { "epoch": 0.048945690817702586, "grad_norm": 2.2561517502916244, "learning_rate": 9.990594962734357e-06, "loss": 0.6561, "step": 1597 }, { "epoch": 0.04897633934044379, "grad_norm": 2.1135402182865946, "learning_rate": 9.99056451067943e-06, "loss": 0.8557, "step": 1598 }, { "epoch": 0.049006987863185, "grad_norm": 2.489787353294821, "learning_rate": 9.990534009451195e-06, "loss": 0.8966, "step": 1599 }, { "epoch": 0.049037636385926196, "grad_norm": 2.9350400113613277, "learning_rate": 9.990503459049951e-06, "loss": 0.8099, "step": 1600 }, { "epoch": 0.0490682849086674, "grad_norm": 2.214705285770832, "learning_rate": 9.990472859476002e-06, "loss": 0.7401, "step": 1601 }, { "epoch": 0.04909893343140861, "grad_norm": 2.2035408406340578, "learning_rate": 9.990442210729646e-06, "loss": 0.7346, "step": 1602 }, { "epoch": 0.04912958195414981, "grad_norm": 2.131028543474141, "learning_rate": 9.990411512811185e-06, "loss": 0.8536, "step": 1603 }, { "epoch": 0.04916023047689101, "grad_norm": 2.0136602050806953, "learning_rate": 9.990380765720922e-06, "loss": 0.7341, "step": 1604 }, { "epoch": 0.04919087899963222, "grad_norm": 2.369504129203594, "learning_rate": 9.990349969459162e-06, "loss": 0.8422, "step": 1605 }, { "epoch": 0.04922152752237342, "grad_norm": 1.4668070938316822, "learning_rate": 9.990319124026205e-06, "loss": 0.5428, "step": 1606 }, { "epoch": 0.04925217604511462, "grad_norm": 2.1931353003361793, "learning_rate": 9.990288229422357e-06, "loss": 0.8014, "step": 1607 }, { "epoch": 0.04928282456785583, "grad_norm": 2.4864880907607576, "learning_rate": 9.990257285647923e-06, "loss": 0.859, "step": 1608 }, { "epoch": 0.04931347309059703, "grad_norm": 2.3182245745471253, "learning_rate": 9.990226292703208e-06, "loss": 0.7914, "step": 1609 }, { "epoch": 0.04934412161333824, "grad_norm": 2.4210379523540206, "learning_rate": 9.990195250588515e-06, "loss": 0.8867, "step": 1610 }, { "epoch": 0.04937477013607944, "grad_norm": 1.122151809964259, "learning_rate": 9.990164159304152e-06, "loss": 0.5537, "step": 1611 }, { "epoch": 0.04940541865882064, "grad_norm": 2.413381521884658, "learning_rate": 9.990133018850427e-06, "loss": 0.8126, "step": 1612 }, { "epoch": 0.04943606718156185, "grad_norm": 2.080186484748652, "learning_rate": 9.990101829227643e-06, "loss": 0.811, "step": 1613 }, { "epoch": 0.049466715704303055, "grad_norm": 2.394258187418213, "learning_rate": 9.990070590436107e-06, "loss": 0.8895, "step": 1614 }, { "epoch": 0.049497364227044253, "grad_norm": 2.414905787455873, "learning_rate": 9.99003930247613e-06, "loss": 0.7041, "step": 1615 }, { "epoch": 0.04952801274978546, "grad_norm": 2.1419453036691576, "learning_rate": 9.99000796534802e-06, "loss": 0.7328, "step": 1616 }, { "epoch": 0.049558661272526665, "grad_norm": 2.2456969224490777, "learning_rate": 9.989976579052082e-06, "loss": 0.8374, "step": 1617 }, { "epoch": 0.04958930979526787, "grad_norm": 2.0993628422160526, "learning_rate": 9.989945143588633e-06, "loss": 0.7602, "step": 1618 }, { "epoch": 0.04961995831800907, "grad_norm": 2.5397942722737152, "learning_rate": 9.989913658957973e-06, "loss": 0.7686, "step": 1619 }, { "epoch": 0.049650606840750275, "grad_norm": 2.0159036222623605, "learning_rate": 9.989882125160419e-06, "loss": 0.7855, "step": 1620 }, { "epoch": 0.04968125536349148, "grad_norm": 2.130360057141736, "learning_rate": 9.98985054219628e-06, "loss": 0.7982, "step": 1621 }, { "epoch": 0.049711903886232686, "grad_norm": 2.1550804255299574, "learning_rate": 9.989818910065868e-06, "loss": 0.8883, "step": 1622 }, { "epoch": 0.049742552408973885, "grad_norm": 2.363877776960879, "learning_rate": 9.989787228769489e-06, "loss": 0.7135, "step": 1623 }, { "epoch": 0.04977320093171509, "grad_norm": 2.406258989014138, "learning_rate": 9.989755498307464e-06, "loss": 0.8858, "step": 1624 }, { "epoch": 0.049803849454456296, "grad_norm": 2.1448915090205767, "learning_rate": 9.9897237186801e-06, "loss": 0.7907, "step": 1625 }, { "epoch": 0.0498344979771975, "grad_norm": 2.3965375186235347, "learning_rate": 9.98969188988771e-06, "loss": 0.8074, "step": 1626 }, { "epoch": 0.0498651464999387, "grad_norm": 2.5125080880284436, "learning_rate": 9.989660011930612e-06, "loss": 0.834, "step": 1627 }, { "epoch": 0.049895795022679906, "grad_norm": 1.9955916351227934, "learning_rate": 9.989628084809116e-06, "loss": 0.8276, "step": 1628 }, { "epoch": 0.04992644354542111, "grad_norm": 2.3054380664028686, "learning_rate": 9.989596108523537e-06, "loss": 0.7996, "step": 1629 }, { "epoch": 0.04995709206816232, "grad_norm": 2.2942126472285165, "learning_rate": 9.989564083074193e-06, "loss": 0.836, "step": 1630 }, { "epoch": 0.049987740590903516, "grad_norm": 2.0814244723048114, "learning_rate": 9.989532008461396e-06, "loss": 0.8192, "step": 1631 }, { "epoch": 0.05001838911364472, "grad_norm": 1.1166918793317753, "learning_rate": 9.989499884685465e-06, "loss": 0.5444, "step": 1632 }, { "epoch": 0.05004903763638593, "grad_norm": 1.997778223768244, "learning_rate": 9.989467711746711e-06, "loss": 0.8126, "step": 1633 }, { "epoch": 0.05007968615912713, "grad_norm": 2.3154032667508013, "learning_rate": 9.98943548964546e-06, "loss": 0.8258, "step": 1634 }, { "epoch": 0.05011033468186833, "grad_norm": 0.9522792971073968, "learning_rate": 9.98940321838202e-06, "loss": 0.5454, "step": 1635 }, { "epoch": 0.05014098320460954, "grad_norm": 2.40935311235054, "learning_rate": 9.989370897956718e-06, "loss": 0.8147, "step": 1636 }, { "epoch": 0.05017163172735074, "grad_norm": 2.1397137449528354, "learning_rate": 9.989338528369866e-06, "loss": 0.8227, "step": 1637 }, { "epoch": 0.05020228025009194, "grad_norm": 2.3852089045422975, "learning_rate": 9.989306109621783e-06, "loss": 0.7977, "step": 1638 }, { "epoch": 0.05023292877283315, "grad_norm": 2.212449145128613, "learning_rate": 9.989273641712792e-06, "loss": 0.7656, "step": 1639 }, { "epoch": 0.05026357729557435, "grad_norm": 1.1788598560851444, "learning_rate": 9.989241124643212e-06, "loss": 0.5516, "step": 1640 }, { "epoch": 0.05029422581831556, "grad_norm": 1.0397141121237117, "learning_rate": 9.989208558413361e-06, "loss": 0.5518, "step": 1641 }, { "epoch": 0.05032487434105676, "grad_norm": 2.233134655957945, "learning_rate": 9.989175943023563e-06, "loss": 0.8179, "step": 1642 }, { "epoch": 0.050355522863797963, "grad_norm": 2.301555294017525, "learning_rate": 9.989143278474136e-06, "loss": 0.7698, "step": 1643 }, { "epoch": 0.05038617138653917, "grad_norm": 2.2730910962278443, "learning_rate": 9.989110564765406e-06, "loss": 0.8133, "step": 1644 }, { "epoch": 0.050416819909280375, "grad_norm": 2.056780503519396, "learning_rate": 9.989077801897691e-06, "loss": 0.7934, "step": 1645 }, { "epoch": 0.050447468432021574, "grad_norm": 2.2604544305651992, "learning_rate": 9.989044989871317e-06, "loss": 0.7984, "step": 1646 }, { "epoch": 0.05047811695476278, "grad_norm": 2.322907672732624, "learning_rate": 9.989012128686609e-06, "loss": 0.9275, "step": 1647 }, { "epoch": 0.050508765477503985, "grad_norm": 2.0801735274727235, "learning_rate": 9.988979218343884e-06, "loss": 0.7525, "step": 1648 }, { "epoch": 0.05053941400024519, "grad_norm": 2.4958763661431096, "learning_rate": 9.988946258843471e-06, "loss": 0.8154, "step": 1649 }, { "epoch": 0.05057006252298639, "grad_norm": 2.287989321116229, "learning_rate": 9.988913250185695e-06, "loss": 0.8532, "step": 1650 }, { "epoch": 0.050600711045727595, "grad_norm": 2.0427526955648405, "learning_rate": 9.98888019237088e-06, "loss": 0.7055, "step": 1651 }, { "epoch": 0.0506313595684688, "grad_norm": 2.2386462328154275, "learning_rate": 9.988847085399351e-06, "loss": 0.8376, "step": 1652 }, { "epoch": 0.050662008091210006, "grad_norm": 2.465729614683851, "learning_rate": 9.988813929271438e-06, "loss": 0.8173, "step": 1653 }, { "epoch": 0.050692656613951205, "grad_norm": 2.3268730890866225, "learning_rate": 9.988780723987461e-06, "loss": 0.785, "step": 1654 }, { "epoch": 0.05072330513669241, "grad_norm": 1.5631221918645464, "learning_rate": 9.988747469547752e-06, "loss": 0.5295, "step": 1655 }, { "epoch": 0.050753953659433616, "grad_norm": 2.3608964922170768, "learning_rate": 9.98871416595264e-06, "loss": 0.7881, "step": 1656 }, { "epoch": 0.05078460218217482, "grad_norm": 2.3049122995143083, "learning_rate": 9.98868081320245e-06, "loss": 0.7638, "step": 1657 }, { "epoch": 0.05081525070491602, "grad_norm": 2.346642591344067, "learning_rate": 9.988647411297512e-06, "loss": 0.8811, "step": 1658 }, { "epoch": 0.050845899227657226, "grad_norm": 2.3568567156184512, "learning_rate": 9.988613960238152e-06, "loss": 0.7893, "step": 1659 }, { "epoch": 0.05087654775039843, "grad_norm": 2.0398195348540633, "learning_rate": 9.988580460024705e-06, "loss": 0.7532, "step": 1660 }, { "epoch": 0.05090719627313964, "grad_norm": 2.1985507845244205, "learning_rate": 9.988546910657497e-06, "loss": 0.8123, "step": 1661 }, { "epoch": 0.050937844795880836, "grad_norm": 2.2159375477297596, "learning_rate": 9.98851331213686e-06, "loss": 0.7712, "step": 1662 }, { "epoch": 0.05096849331862204, "grad_norm": 2.022708375571199, "learning_rate": 9.988479664463127e-06, "loss": 0.7265, "step": 1663 }, { "epoch": 0.05099914184136325, "grad_norm": 2.3041414362477743, "learning_rate": 9.988445967636624e-06, "loss": 0.6888, "step": 1664 }, { "epoch": 0.05102979036410445, "grad_norm": 1.2544221670707971, "learning_rate": 9.988412221657689e-06, "loss": 0.551, "step": 1665 }, { "epoch": 0.05106043888684565, "grad_norm": 2.468830000630046, "learning_rate": 9.988378426526653e-06, "loss": 0.8172, "step": 1666 }, { "epoch": 0.05109108740958686, "grad_norm": 2.127800561268766, "learning_rate": 9.988344582243845e-06, "loss": 0.6761, "step": 1667 }, { "epoch": 0.05112173593232806, "grad_norm": 2.0751512867685467, "learning_rate": 9.988310688809603e-06, "loss": 0.7537, "step": 1668 }, { "epoch": 0.05115238445506927, "grad_norm": 2.3757249901031696, "learning_rate": 9.98827674622426e-06, "loss": 0.742, "step": 1669 }, { "epoch": 0.05118303297781047, "grad_norm": 2.167632815682499, "learning_rate": 9.988242754488149e-06, "loss": 0.7888, "step": 1670 }, { "epoch": 0.051213681500551674, "grad_norm": 2.153341693175387, "learning_rate": 9.988208713601606e-06, "loss": 0.6858, "step": 1671 }, { "epoch": 0.05124433002329288, "grad_norm": 1.0228534104961036, "learning_rate": 9.988174623564967e-06, "loss": 0.5454, "step": 1672 }, { "epoch": 0.05127497854603408, "grad_norm": 2.4615997831554037, "learning_rate": 9.988140484378567e-06, "loss": 0.7236, "step": 1673 }, { "epoch": 0.051305627068775284, "grad_norm": 2.0447316266068043, "learning_rate": 9.988106296042741e-06, "loss": 0.7067, "step": 1674 }, { "epoch": 0.05133627559151649, "grad_norm": 2.047848087296416, "learning_rate": 9.988072058557829e-06, "loss": 0.7532, "step": 1675 }, { "epoch": 0.051366924114257695, "grad_norm": 2.3586480316707283, "learning_rate": 9.988037771924167e-06, "loss": 0.781, "step": 1676 }, { "epoch": 0.051397572636998894, "grad_norm": 2.2751231149504623, "learning_rate": 9.98800343614209e-06, "loss": 0.8404, "step": 1677 }, { "epoch": 0.0514282211597401, "grad_norm": 0.9617417328846181, "learning_rate": 9.987969051211942e-06, "loss": 0.5372, "step": 1678 }, { "epoch": 0.051458869682481305, "grad_norm": 2.1873990819083953, "learning_rate": 9.98793461713406e-06, "loss": 0.6999, "step": 1679 }, { "epoch": 0.05148951820522251, "grad_norm": 2.3392412419555355, "learning_rate": 9.987900133908777e-06, "loss": 0.8196, "step": 1680 }, { "epoch": 0.05152016672796371, "grad_norm": 2.258210083790024, "learning_rate": 9.98786560153644e-06, "loss": 0.7812, "step": 1681 }, { "epoch": 0.051550815250704915, "grad_norm": 2.5184118584767328, "learning_rate": 9.987831020017389e-06, "loss": 0.8118, "step": 1682 }, { "epoch": 0.05158146377344612, "grad_norm": 2.3821221166720603, "learning_rate": 9.98779638935196e-06, "loss": 0.8068, "step": 1683 }, { "epoch": 0.051612112296187326, "grad_norm": 2.157760764230448, "learning_rate": 9.987761709540497e-06, "loss": 0.8455, "step": 1684 }, { "epoch": 0.051642760818928525, "grad_norm": 2.0705524868862684, "learning_rate": 9.987726980583343e-06, "loss": 0.789, "step": 1685 }, { "epoch": 0.05167340934166973, "grad_norm": 2.192715000462249, "learning_rate": 9.98769220248084e-06, "loss": 0.7805, "step": 1686 }, { "epoch": 0.051704057864410936, "grad_norm": 2.1343504044578725, "learning_rate": 9.987657375233329e-06, "loss": 0.8226, "step": 1687 }, { "epoch": 0.05173470638715214, "grad_norm": 2.5760686877135908, "learning_rate": 9.987622498841151e-06, "loss": 0.7794, "step": 1688 }, { "epoch": 0.05176535490989334, "grad_norm": 1.9455153357589585, "learning_rate": 9.987587573304655e-06, "loss": 0.7582, "step": 1689 }, { "epoch": 0.051796003432634546, "grad_norm": 2.196051478216042, "learning_rate": 9.987552598624182e-06, "loss": 0.8572, "step": 1690 }, { "epoch": 0.05182665195537575, "grad_norm": 2.0087440594643287, "learning_rate": 9.987517574800077e-06, "loss": 0.7654, "step": 1691 }, { "epoch": 0.05185730047811696, "grad_norm": 1.9111241303659623, "learning_rate": 9.987482501832686e-06, "loss": 0.7801, "step": 1692 }, { "epoch": 0.051887949000858156, "grad_norm": 2.239183063840295, "learning_rate": 9.987447379722353e-06, "loss": 0.7781, "step": 1693 }, { "epoch": 0.05191859752359936, "grad_norm": 2.12612564129964, "learning_rate": 9.987412208469424e-06, "loss": 0.79, "step": 1694 }, { "epoch": 0.05194924604634057, "grad_norm": 2.320439879382839, "learning_rate": 9.987376988074248e-06, "loss": 0.7339, "step": 1695 }, { "epoch": 0.051979894569081773, "grad_norm": 1.9048492142448357, "learning_rate": 9.987341718537169e-06, "loss": 0.7335, "step": 1696 }, { "epoch": 0.05201054309182297, "grad_norm": 2.1573021898571048, "learning_rate": 9.987306399858537e-06, "loss": 0.7735, "step": 1697 }, { "epoch": 0.05204119161456418, "grad_norm": 2.2720040267010915, "learning_rate": 9.987271032038698e-06, "loss": 0.8715, "step": 1698 }, { "epoch": 0.052071840137305384, "grad_norm": 2.0097556043725193, "learning_rate": 9.987235615078003e-06, "loss": 0.7599, "step": 1699 }, { "epoch": 0.05210248866004659, "grad_norm": 3.1679820260016984, "learning_rate": 9.987200148976798e-06, "loss": 0.7336, "step": 1700 }, { "epoch": 0.05213313718278779, "grad_norm": 2.3249559748002695, "learning_rate": 9.987164633735436e-06, "loss": 0.7538, "step": 1701 }, { "epoch": 0.052163785705528994, "grad_norm": 2.638907367190402, "learning_rate": 9.987129069354264e-06, "loss": 0.706, "step": 1702 }, { "epoch": 0.0521944342282702, "grad_norm": 1.209757492472639, "learning_rate": 9.987093455833632e-06, "loss": 0.541, "step": 1703 }, { "epoch": 0.0522250827510114, "grad_norm": 1.0165893828046557, "learning_rate": 9.987057793173893e-06, "loss": 0.5432, "step": 1704 }, { "epoch": 0.052255731273752604, "grad_norm": 2.3201732359809917, "learning_rate": 9.987022081375397e-06, "loss": 0.8571, "step": 1705 }, { "epoch": 0.05228637979649381, "grad_norm": 2.280809625087376, "learning_rate": 9.986986320438496e-06, "loss": 0.7841, "step": 1706 }, { "epoch": 0.052317028319235015, "grad_norm": 2.1815797408598017, "learning_rate": 9.986950510363544e-06, "loss": 0.7255, "step": 1707 }, { "epoch": 0.052347676841976214, "grad_norm": 2.119142982554711, "learning_rate": 9.986914651150894e-06, "loss": 0.8343, "step": 1708 }, { "epoch": 0.05237832536471742, "grad_norm": 2.0968938579953917, "learning_rate": 9.986878742800895e-06, "loss": 0.7148, "step": 1709 }, { "epoch": 0.052408973887458625, "grad_norm": 1.6205493025716804, "learning_rate": 9.986842785313906e-06, "loss": 0.5394, "step": 1710 }, { "epoch": 0.05243962241019983, "grad_norm": 1.238507120857092, "learning_rate": 9.986806778690277e-06, "loss": 0.528, "step": 1711 }, { "epoch": 0.05247027093294103, "grad_norm": 2.147560304839575, "learning_rate": 9.986770722930368e-06, "loss": 0.7619, "step": 1712 }, { "epoch": 0.052500919455682235, "grad_norm": 2.335992796844048, "learning_rate": 9.986734618034529e-06, "loss": 0.7488, "step": 1713 }, { "epoch": 0.05253156797842344, "grad_norm": 2.1431134530599407, "learning_rate": 9.986698464003117e-06, "loss": 0.7758, "step": 1714 }, { "epoch": 0.052562216501164646, "grad_norm": 1.9373571199339419, "learning_rate": 9.986662260836491e-06, "loss": 0.7341, "step": 1715 }, { "epoch": 0.052592865023905845, "grad_norm": 1.6174366956292199, "learning_rate": 9.986626008535005e-06, "loss": 0.5644, "step": 1716 }, { "epoch": 0.05262351354664705, "grad_norm": 2.437279245990581, "learning_rate": 9.986589707099017e-06, "loss": 0.7992, "step": 1717 }, { "epoch": 0.052654162069388256, "grad_norm": 2.41231060141157, "learning_rate": 9.986553356528885e-06, "loss": 0.7596, "step": 1718 }, { "epoch": 0.05268481059212946, "grad_norm": 1.9796487865215375, "learning_rate": 9.986516956824967e-06, "loss": 0.6205, "step": 1719 }, { "epoch": 0.05271545911487066, "grad_norm": 1.0867781164739536, "learning_rate": 9.98648050798762e-06, "loss": 0.5664, "step": 1720 }, { "epoch": 0.052746107637611866, "grad_norm": 2.191493143115638, "learning_rate": 9.986444010017207e-06, "loss": 0.8068, "step": 1721 }, { "epoch": 0.05277675616035307, "grad_norm": 0.9416542366229889, "learning_rate": 9.986407462914086e-06, "loss": 0.5216, "step": 1722 }, { "epoch": 0.05280740468309428, "grad_norm": 0.9589313154232326, "learning_rate": 9.986370866678614e-06, "loss": 0.5317, "step": 1723 }, { "epoch": 0.05283805320583548, "grad_norm": 2.0945086229942302, "learning_rate": 9.986334221311155e-06, "loss": 0.8377, "step": 1724 }, { "epoch": 0.05286870172857668, "grad_norm": 2.021724438611422, "learning_rate": 9.986297526812068e-06, "loss": 0.8668, "step": 1725 }, { "epoch": 0.05289935025131789, "grad_norm": 2.4249245074303913, "learning_rate": 9.986260783181718e-06, "loss": 0.8293, "step": 1726 }, { "epoch": 0.052929998774059094, "grad_norm": 1.1303515506598083, "learning_rate": 9.986223990420462e-06, "loss": 0.5363, "step": 1727 }, { "epoch": 0.05296064729680029, "grad_norm": 0.9964537276425889, "learning_rate": 9.986187148528668e-06, "loss": 0.4971, "step": 1728 }, { "epoch": 0.0529912958195415, "grad_norm": 1.9128725862623237, "learning_rate": 9.986150257506696e-06, "loss": 0.7325, "step": 1729 }, { "epoch": 0.053021944342282704, "grad_norm": 2.094666961830216, "learning_rate": 9.986113317354907e-06, "loss": 0.835, "step": 1730 }, { "epoch": 0.05305259286502391, "grad_norm": 0.9405516129167963, "learning_rate": 9.986076328073669e-06, "loss": 0.5254, "step": 1731 }, { "epoch": 0.05308324138776511, "grad_norm": 2.1664767052468155, "learning_rate": 9.986039289663346e-06, "loss": 0.8077, "step": 1732 }, { "epoch": 0.053113889910506314, "grad_norm": 0.9594887766036231, "learning_rate": 9.986002202124302e-06, "loss": 0.4928, "step": 1733 }, { "epoch": 0.05314453843324752, "grad_norm": 1.0019375112572841, "learning_rate": 9.985965065456902e-06, "loss": 0.5317, "step": 1734 }, { "epoch": 0.05317518695598872, "grad_norm": 2.4213816334588985, "learning_rate": 9.985927879661513e-06, "loss": 0.7986, "step": 1735 }, { "epoch": 0.053205835478729924, "grad_norm": 2.264941272583994, "learning_rate": 9.9858906447385e-06, "loss": 0.8029, "step": 1736 }, { "epoch": 0.05323648400147113, "grad_norm": 0.9423888720166836, "learning_rate": 9.985853360688232e-06, "loss": 0.5306, "step": 1737 }, { "epoch": 0.053267132524212335, "grad_norm": 2.048893492873479, "learning_rate": 9.985816027511075e-06, "loss": 0.7928, "step": 1738 }, { "epoch": 0.053297781046953534, "grad_norm": 1.9135643453660038, "learning_rate": 9.985778645207397e-06, "loss": 0.7726, "step": 1739 }, { "epoch": 0.05332842956969474, "grad_norm": 2.5824471656734462, "learning_rate": 9.985741213777566e-06, "loss": 0.8821, "step": 1740 }, { "epoch": 0.053359078092435945, "grad_norm": 1.9691646513296786, "learning_rate": 9.985703733221951e-06, "loss": 0.7206, "step": 1741 }, { "epoch": 0.05338972661517715, "grad_norm": 2.1374715634078205, "learning_rate": 9.985666203540923e-06, "loss": 0.7718, "step": 1742 }, { "epoch": 0.05342037513791835, "grad_norm": 2.380227554893973, "learning_rate": 9.985628624734847e-06, "loss": 0.8, "step": 1743 }, { "epoch": 0.053451023660659555, "grad_norm": 2.156729887703542, "learning_rate": 9.985590996804099e-06, "loss": 0.8299, "step": 1744 }, { "epoch": 0.05348167218340076, "grad_norm": 1.2252910319725205, "learning_rate": 9.985553319749047e-06, "loss": 0.5309, "step": 1745 }, { "epoch": 0.053512320706141966, "grad_norm": 1.0525365253581744, "learning_rate": 9.985515593570061e-06, "loss": 0.5372, "step": 1746 }, { "epoch": 0.053542969228883165, "grad_norm": 2.340990462293965, "learning_rate": 9.985477818267517e-06, "loss": 0.7604, "step": 1747 }, { "epoch": 0.05357361775162437, "grad_norm": 2.3805889715665622, "learning_rate": 9.985439993841783e-06, "loss": 0.8291, "step": 1748 }, { "epoch": 0.053604266274365577, "grad_norm": 2.194512288546915, "learning_rate": 9.985402120293232e-06, "loss": 0.7745, "step": 1749 }, { "epoch": 0.05363491479710678, "grad_norm": 2.429115731993358, "learning_rate": 9.98536419762224e-06, "loss": 0.8288, "step": 1750 }, { "epoch": 0.05366556331984798, "grad_norm": 1.2253884907519894, "learning_rate": 9.985326225829179e-06, "loss": 0.5013, "step": 1751 }, { "epoch": 0.05369621184258919, "grad_norm": 2.619652052138028, "learning_rate": 9.985288204914424e-06, "loss": 0.7936, "step": 1752 }, { "epoch": 0.05372686036533039, "grad_norm": 2.0888763535089705, "learning_rate": 9.985250134878346e-06, "loss": 0.822, "step": 1753 }, { "epoch": 0.0537575088880716, "grad_norm": 2.541351378437032, "learning_rate": 9.985212015721325e-06, "loss": 0.791, "step": 1754 }, { "epoch": 0.0537881574108128, "grad_norm": 2.1258164101373236, "learning_rate": 9.985173847443733e-06, "loss": 0.8569, "step": 1755 }, { "epoch": 0.053818805933554, "grad_norm": 1.800321450570832, "learning_rate": 9.985135630045948e-06, "loss": 0.7688, "step": 1756 }, { "epoch": 0.05384945445629521, "grad_norm": 2.0819980637082707, "learning_rate": 9.985097363528347e-06, "loss": 0.6871, "step": 1757 }, { "epoch": 0.053880102979036414, "grad_norm": 2.0046236907642165, "learning_rate": 9.985059047891305e-06, "loss": 0.6266, "step": 1758 }, { "epoch": 0.05391075150177761, "grad_norm": 2.322565878656705, "learning_rate": 9.9850206831352e-06, "loss": 0.8001, "step": 1759 }, { "epoch": 0.05394140002451882, "grad_norm": 2.2929263574184557, "learning_rate": 9.984982269260414e-06, "loss": 0.7902, "step": 1760 }, { "epoch": 0.053972048547260024, "grad_norm": 2.2767548873202106, "learning_rate": 9.984943806267319e-06, "loss": 0.7925, "step": 1761 }, { "epoch": 0.05400269707000123, "grad_norm": 2.193547522009201, "learning_rate": 9.9849052941563e-06, "loss": 0.8025, "step": 1762 }, { "epoch": 0.05403334559274243, "grad_norm": 2.016801128518426, "learning_rate": 9.984866732927731e-06, "loss": 0.7434, "step": 1763 }, { "epoch": 0.054063994115483634, "grad_norm": 1.0626413065952682, "learning_rate": 9.984828122581998e-06, "loss": 0.5347, "step": 1764 }, { "epoch": 0.05409464263822484, "grad_norm": 2.7988469224023516, "learning_rate": 9.984789463119475e-06, "loss": 0.7848, "step": 1765 }, { "epoch": 0.05412529116096604, "grad_norm": 2.044741502074766, "learning_rate": 9.984750754540547e-06, "loss": 0.8079, "step": 1766 }, { "epoch": 0.054155939683707244, "grad_norm": 2.203024944886088, "learning_rate": 9.984711996845596e-06, "loss": 0.8894, "step": 1767 }, { "epoch": 0.05418658820644845, "grad_norm": 0.9748598534463385, "learning_rate": 9.984673190034998e-06, "loss": 0.5456, "step": 1768 }, { "epoch": 0.054217236729189655, "grad_norm": 2.2985467844084932, "learning_rate": 9.984634334109143e-06, "loss": 0.7257, "step": 1769 }, { "epoch": 0.054247885251930854, "grad_norm": 2.2226639724004165, "learning_rate": 9.984595429068411e-06, "loss": 0.7205, "step": 1770 }, { "epoch": 0.05427853377467206, "grad_norm": 0.998109792944901, "learning_rate": 9.984556474913183e-06, "loss": 0.5088, "step": 1771 }, { "epoch": 0.054309182297413265, "grad_norm": 2.1682431524948282, "learning_rate": 9.984517471643846e-06, "loss": 0.7256, "step": 1772 }, { "epoch": 0.05433983082015447, "grad_norm": 2.65241217655717, "learning_rate": 9.984478419260782e-06, "loss": 0.77, "step": 1773 }, { "epoch": 0.05437047934289567, "grad_norm": 2.357042320123524, "learning_rate": 9.984439317764378e-06, "loss": 0.7917, "step": 1774 }, { "epoch": 0.054401127865636875, "grad_norm": 0.9638505130345171, "learning_rate": 9.984400167155017e-06, "loss": 0.5378, "step": 1775 }, { "epoch": 0.05443177638837808, "grad_norm": 2.2037799213466838, "learning_rate": 9.984360967433086e-06, "loss": 0.7528, "step": 1776 }, { "epoch": 0.054462424911119287, "grad_norm": 2.14173914858239, "learning_rate": 9.984321718598972e-06, "loss": 0.8032, "step": 1777 }, { "epoch": 0.054493073433860485, "grad_norm": 2.215972731763003, "learning_rate": 9.98428242065306e-06, "loss": 0.8097, "step": 1778 }, { "epoch": 0.05452372195660169, "grad_norm": 2.410529975463277, "learning_rate": 9.98424307359574e-06, "loss": 0.7632, "step": 1779 }, { "epoch": 0.0545543704793429, "grad_norm": 2.0495392792877025, "learning_rate": 9.984203677427393e-06, "loss": 0.7466, "step": 1780 }, { "epoch": 0.0545850190020841, "grad_norm": 2.093307823388011, "learning_rate": 9.984164232148415e-06, "loss": 0.6682, "step": 1781 }, { "epoch": 0.0546156675248253, "grad_norm": 2.4973440785999985, "learning_rate": 9.984124737759192e-06, "loss": 0.6969, "step": 1782 }, { "epoch": 0.05464631604756651, "grad_norm": 2.289172445216251, "learning_rate": 9.984085194260112e-06, "loss": 0.7867, "step": 1783 }, { "epoch": 0.05467696457030771, "grad_norm": 2.0098326728648934, "learning_rate": 9.984045601651566e-06, "loss": 0.8219, "step": 1784 }, { "epoch": 0.05470761309304892, "grad_norm": 2.234207398888604, "learning_rate": 9.984005959933942e-06, "loss": 0.7978, "step": 1785 }, { "epoch": 0.05473826161579012, "grad_norm": 2.212460051746992, "learning_rate": 9.983966269107634e-06, "loss": 0.7841, "step": 1786 }, { "epoch": 0.05476891013853132, "grad_norm": 1.0403415261031075, "learning_rate": 9.98392652917303e-06, "loss": 0.5436, "step": 1787 }, { "epoch": 0.05479955866127253, "grad_norm": 2.085164085483519, "learning_rate": 9.983886740130521e-06, "loss": 0.8134, "step": 1788 }, { "epoch": 0.054830207184013734, "grad_norm": 2.5978243494131537, "learning_rate": 9.983846901980505e-06, "loss": 0.7363, "step": 1789 }, { "epoch": 0.05486085570675493, "grad_norm": 2.085280177031837, "learning_rate": 9.983807014723367e-06, "loss": 0.7717, "step": 1790 }, { "epoch": 0.05489150422949614, "grad_norm": 2.097915599355097, "learning_rate": 9.983767078359505e-06, "loss": 0.8331, "step": 1791 }, { "epoch": 0.054922152752237344, "grad_norm": 2.156486417898269, "learning_rate": 9.983727092889309e-06, "loss": 0.7619, "step": 1792 }, { "epoch": 0.05495280127497855, "grad_norm": 2.304663572175765, "learning_rate": 9.983687058313177e-06, "loss": 0.6746, "step": 1793 }, { "epoch": 0.05498344979771975, "grad_norm": 2.44008250041993, "learning_rate": 9.9836469746315e-06, "loss": 0.8495, "step": 1794 }, { "epoch": 0.055014098320460954, "grad_norm": 2.0151423956739016, "learning_rate": 9.983606841844672e-06, "loss": 0.768, "step": 1795 }, { "epoch": 0.05504474684320216, "grad_norm": 2.1520603963605494, "learning_rate": 9.983566659953094e-06, "loss": 0.744, "step": 1796 }, { "epoch": 0.05507539536594336, "grad_norm": 1.0995297190458408, "learning_rate": 9.983526428957157e-06, "loss": 0.5276, "step": 1797 }, { "epoch": 0.055106043888684564, "grad_norm": 2.1278382311743917, "learning_rate": 9.98348614885726e-06, "loss": 0.7947, "step": 1798 }, { "epoch": 0.05513669241142577, "grad_norm": 2.3050703360233036, "learning_rate": 9.983445819653798e-06, "loss": 0.8548, "step": 1799 }, { "epoch": 0.055167340934166975, "grad_norm": 2.66671171257881, "learning_rate": 9.983405441347171e-06, "loss": 0.7084, "step": 1800 }, { "epoch": 0.055197989456908174, "grad_norm": 2.171458850719512, "learning_rate": 9.983365013937774e-06, "loss": 0.7909, "step": 1801 }, { "epoch": 0.05522863797964938, "grad_norm": 2.16892589061846, "learning_rate": 9.983324537426007e-06, "loss": 0.7864, "step": 1802 }, { "epoch": 0.055259286502390585, "grad_norm": 2.077696195501957, "learning_rate": 9.983284011812267e-06, "loss": 0.7147, "step": 1803 }, { "epoch": 0.05528993502513179, "grad_norm": 2.195674436465187, "learning_rate": 9.983243437096955e-06, "loss": 0.7613, "step": 1804 }, { "epoch": 0.05532058354787299, "grad_norm": 2.1471957628661924, "learning_rate": 9.983202813280472e-06, "loss": 0.8064, "step": 1805 }, { "epoch": 0.055351232070614195, "grad_norm": 2.2568570726756954, "learning_rate": 9.983162140363214e-06, "loss": 0.6167, "step": 1806 }, { "epoch": 0.0553818805933554, "grad_norm": 2.194477472803308, "learning_rate": 9.983121418345587e-06, "loss": 0.7154, "step": 1807 }, { "epoch": 0.05541252911609661, "grad_norm": 1.0601689040472042, "learning_rate": 9.983080647227987e-06, "loss": 0.5194, "step": 1808 }, { "epoch": 0.055443177638837805, "grad_norm": 2.180452678363023, "learning_rate": 9.98303982701082e-06, "loss": 0.8373, "step": 1809 }, { "epoch": 0.05547382616157901, "grad_norm": 2.5952292492801066, "learning_rate": 9.982998957694487e-06, "loss": 0.7622, "step": 1810 }, { "epoch": 0.05550447468432022, "grad_norm": 0.9073910656626829, "learning_rate": 9.98295803927939e-06, "loss": 0.5575, "step": 1811 }, { "epoch": 0.05553512320706142, "grad_norm": 2.1229427670774323, "learning_rate": 9.982917071765932e-06, "loss": 0.8795, "step": 1812 }, { "epoch": 0.05556577172980262, "grad_norm": 2.064046020571693, "learning_rate": 9.982876055154518e-06, "loss": 0.8498, "step": 1813 }, { "epoch": 0.05559642025254383, "grad_norm": 0.966879235125804, "learning_rate": 9.98283498944555e-06, "loss": 0.5389, "step": 1814 }, { "epoch": 0.05562706877528503, "grad_norm": 2.6197225907571524, "learning_rate": 9.982793874639436e-06, "loss": 0.8016, "step": 1815 }, { "epoch": 0.05565771729802624, "grad_norm": 2.474689797662183, "learning_rate": 9.982752710736577e-06, "loss": 0.8975, "step": 1816 }, { "epoch": 0.05568836582076744, "grad_norm": 2.1646660237565825, "learning_rate": 9.982711497737382e-06, "loss": 0.7613, "step": 1817 }, { "epoch": 0.05571901434350864, "grad_norm": 0.9065723245288918, "learning_rate": 9.982670235642255e-06, "loss": 0.5019, "step": 1818 }, { "epoch": 0.05574966286624985, "grad_norm": 2.284938270553212, "learning_rate": 9.982628924451603e-06, "loss": 0.7394, "step": 1819 }, { "epoch": 0.055780311388991054, "grad_norm": 2.1423879797722756, "learning_rate": 9.982587564165835e-06, "loss": 0.7957, "step": 1820 }, { "epoch": 0.05581095991173225, "grad_norm": 0.9570770320087169, "learning_rate": 9.982546154785355e-06, "loss": 0.5356, "step": 1821 }, { "epoch": 0.05584160843447346, "grad_norm": 2.6566745321293523, "learning_rate": 9.982504696310574e-06, "loss": 0.7868, "step": 1822 }, { "epoch": 0.055872256957214664, "grad_norm": 2.3646069039960183, "learning_rate": 9.982463188741897e-06, "loss": 0.7655, "step": 1823 }, { "epoch": 0.05590290547995587, "grad_norm": 0.9387525427329726, "learning_rate": 9.982421632079738e-06, "loss": 0.5351, "step": 1824 }, { "epoch": 0.05593355400269707, "grad_norm": 2.2452975820685594, "learning_rate": 9.982380026324505e-06, "loss": 0.7874, "step": 1825 }, { "epoch": 0.055964202525438274, "grad_norm": 0.9206123730241805, "learning_rate": 9.982338371476604e-06, "loss": 0.5485, "step": 1826 }, { "epoch": 0.05599485104817948, "grad_norm": 1.9222028879724846, "learning_rate": 9.982296667536449e-06, "loss": 0.7851, "step": 1827 }, { "epoch": 0.05602549957092068, "grad_norm": 2.0647086144757902, "learning_rate": 9.98225491450445e-06, "loss": 0.7382, "step": 1828 }, { "epoch": 0.056056148093661884, "grad_norm": 2.164707842224681, "learning_rate": 9.98221311238102e-06, "loss": 0.8248, "step": 1829 }, { "epoch": 0.05608679661640309, "grad_norm": 2.9271821344426527, "learning_rate": 9.982171261166568e-06, "loss": 0.7316, "step": 1830 }, { "epoch": 0.056117445139144295, "grad_norm": 2.2573693197025504, "learning_rate": 9.982129360861507e-06, "loss": 0.8605, "step": 1831 }, { "epoch": 0.056148093661885494, "grad_norm": 2.241913443846562, "learning_rate": 9.982087411466253e-06, "loss": 0.8007, "step": 1832 }, { "epoch": 0.0561787421846267, "grad_norm": 2.126656274600751, "learning_rate": 9.982045412981217e-06, "loss": 0.8861, "step": 1833 }, { "epoch": 0.056209390707367905, "grad_norm": 2.155124454525486, "learning_rate": 9.982003365406812e-06, "loss": 0.7309, "step": 1834 }, { "epoch": 0.05624003923010911, "grad_norm": 2.0690148574371623, "learning_rate": 9.981961268743453e-06, "loss": 0.7277, "step": 1835 }, { "epoch": 0.05627068775285031, "grad_norm": 1.9182496322793967, "learning_rate": 9.981919122991554e-06, "loss": 0.6944, "step": 1836 }, { "epoch": 0.056301336275591515, "grad_norm": 2.2939328122338485, "learning_rate": 9.981876928151532e-06, "loss": 0.9118, "step": 1837 }, { "epoch": 0.05633198479833272, "grad_norm": 2.27303742416203, "learning_rate": 9.9818346842238e-06, "loss": 0.8857, "step": 1838 }, { "epoch": 0.05636263332107393, "grad_norm": 2.2708189996553982, "learning_rate": 9.98179239120878e-06, "loss": 0.7928, "step": 1839 }, { "epoch": 0.056393281843815125, "grad_norm": 1.9678258277838205, "learning_rate": 9.981750049106882e-06, "loss": 0.7445, "step": 1840 }, { "epoch": 0.05642393036655633, "grad_norm": 2.3439487781029817, "learning_rate": 9.981707657918529e-06, "loss": 0.7807, "step": 1841 }, { "epoch": 0.05645457888929754, "grad_norm": 2.1484134689489647, "learning_rate": 9.981665217644134e-06, "loss": 0.764, "step": 1842 }, { "epoch": 0.05648522741203874, "grad_norm": 2.084359049381905, "learning_rate": 9.981622728284117e-06, "loss": 0.7396, "step": 1843 }, { "epoch": 0.05651587593477994, "grad_norm": 2.5110886543490842, "learning_rate": 9.981580189838896e-06, "loss": 0.7946, "step": 1844 }, { "epoch": 0.05654652445752115, "grad_norm": 2.2041313913734024, "learning_rate": 9.981537602308892e-06, "loss": 0.9313, "step": 1845 }, { "epoch": 0.05657717298026235, "grad_norm": 2.4909367398899813, "learning_rate": 9.981494965694522e-06, "loss": 0.7679, "step": 1846 }, { "epoch": 0.05660782150300356, "grad_norm": 2.353085329998533, "learning_rate": 9.981452279996208e-06, "loss": 0.8558, "step": 1847 }, { "epoch": 0.05663847002574476, "grad_norm": 2.0441766231276004, "learning_rate": 9.981409545214371e-06, "loss": 0.7994, "step": 1848 }, { "epoch": 0.05666911854848596, "grad_norm": 2.210481913595893, "learning_rate": 9.981366761349431e-06, "loss": 0.8669, "step": 1849 }, { "epoch": 0.05669976707122717, "grad_norm": 2.388321045995909, "learning_rate": 9.981323928401809e-06, "loss": 0.6814, "step": 1850 }, { "epoch": 0.056730415593968374, "grad_norm": 2.2725363632415183, "learning_rate": 9.981281046371928e-06, "loss": 0.8031, "step": 1851 }, { "epoch": 0.05676106411670957, "grad_norm": 2.174530945812015, "learning_rate": 9.981238115260212e-06, "loss": 0.7705, "step": 1852 }, { "epoch": 0.05679171263945078, "grad_norm": 2.167415919240571, "learning_rate": 9.981195135067081e-06, "loss": 0.8684, "step": 1853 }, { "epoch": 0.056822361162191984, "grad_norm": 2.200782228017217, "learning_rate": 9.981152105792959e-06, "loss": 0.7244, "step": 1854 }, { "epoch": 0.05685300968493319, "grad_norm": 2.521037942324619, "learning_rate": 9.981109027438273e-06, "loss": 0.8036, "step": 1855 }, { "epoch": 0.05688365820767439, "grad_norm": 2.099286464554581, "learning_rate": 9.981065900003444e-06, "loss": 0.7527, "step": 1856 }, { "epoch": 0.056914306730415594, "grad_norm": 2.3814309908702196, "learning_rate": 9.981022723488897e-06, "loss": 0.8363, "step": 1857 }, { "epoch": 0.0569449552531568, "grad_norm": 2.043385494963043, "learning_rate": 9.980979497895061e-06, "loss": 0.7816, "step": 1858 }, { "epoch": 0.056975603775898, "grad_norm": 1.9637899089890583, "learning_rate": 9.980936223222358e-06, "loss": 0.6476, "step": 1859 }, { "epoch": 0.057006252298639204, "grad_norm": 2.078915290200768, "learning_rate": 9.980892899471216e-06, "loss": 0.7412, "step": 1860 }, { "epoch": 0.05703690082138041, "grad_norm": 2.2179333856493417, "learning_rate": 9.980849526642063e-06, "loss": 0.7393, "step": 1861 }, { "epoch": 0.057067549344121615, "grad_norm": 2.180820601137096, "learning_rate": 9.980806104735325e-06, "loss": 0.6696, "step": 1862 }, { "epoch": 0.057098197866862814, "grad_norm": 2.129242169549953, "learning_rate": 9.980762633751429e-06, "loss": 0.7184, "step": 1863 }, { "epoch": 0.05712884638960402, "grad_norm": 2.152140651835846, "learning_rate": 9.980719113690805e-06, "loss": 0.7577, "step": 1864 }, { "epoch": 0.057159494912345225, "grad_norm": 1.0527734117414005, "learning_rate": 9.980675544553881e-06, "loss": 0.5228, "step": 1865 }, { "epoch": 0.05719014343508643, "grad_norm": 2.0382157858634438, "learning_rate": 9.980631926341086e-06, "loss": 0.7296, "step": 1866 }, { "epoch": 0.05722079195782763, "grad_norm": 2.0932922936835774, "learning_rate": 9.980588259052853e-06, "loss": 0.7889, "step": 1867 }, { "epoch": 0.057251440480568835, "grad_norm": 2.3069749385299647, "learning_rate": 9.980544542689606e-06, "loss": 0.7777, "step": 1868 }, { "epoch": 0.05728208900331004, "grad_norm": 1.9693740830675626, "learning_rate": 9.98050077725178e-06, "loss": 0.6602, "step": 1869 }, { "epoch": 0.05731273752605125, "grad_norm": 1.8929865334286036, "learning_rate": 9.980456962739808e-06, "loss": 0.739, "step": 1870 }, { "epoch": 0.057343386048792445, "grad_norm": 2.161153891348112, "learning_rate": 9.980413099154116e-06, "loss": 0.8819, "step": 1871 }, { "epoch": 0.05737403457153365, "grad_norm": 2.9507127357101215, "learning_rate": 9.98036918649514e-06, "loss": 0.7698, "step": 1872 }, { "epoch": 0.05740468309427486, "grad_norm": 2.3561365508880074, "learning_rate": 9.980325224763315e-06, "loss": 0.8715, "step": 1873 }, { "epoch": 0.05743533161701606, "grad_norm": 2.198016607651309, "learning_rate": 9.980281213959069e-06, "loss": 0.8971, "step": 1874 }, { "epoch": 0.05746598013975726, "grad_norm": 2.2512642725866976, "learning_rate": 9.980237154082838e-06, "loss": 0.8458, "step": 1875 }, { "epoch": 0.05749662866249847, "grad_norm": 2.1987700119500846, "learning_rate": 9.980193045135056e-06, "loss": 0.8485, "step": 1876 }, { "epoch": 0.05752727718523967, "grad_norm": 2.1195451258981275, "learning_rate": 9.980148887116158e-06, "loss": 0.808, "step": 1877 }, { "epoch": 0.05755792570798088, "grad_norm": 2.2128921090045175, "learning_rate": 9.980104680026579e-06, "loss": 0.7796, "step": 1878 }, { "epoch": 0.05758857423072208, "grad_norm": 2.1416732145145163, "learning_rate": 9.980060423866756e-06, "loss": 0.7894, "step": 1879 }, { "epoch": 0.05761922275346328, "grad_norm": 1.87723349755078, "learning_rate": 9.98001611863712e-06, "loss": 0.6922, "step": 1880 }, { "epoch": 0.05764987127620449, "grad_norm": 2.2039468829849294, "learning_rate": 9.979971764338112e-06, "loss": 0.7831, "step": 1881 }, { "epoch": 0.057680519798945694, "grad_norm": 2.226952051451656, "learning_rate": 9.97992736097017e-06, "loss": 0.7702, "step": 1882 }, { "epoch": 0.05771116832168689, "grad_norm": 2.355119227863165, "learning_rate": 9.979882908533728e-06, "loss": 0.893, "step": 1883 }, { "epoch": 0.0577418168444281, "grad_norm": 2.3051273406597685, "learning_rate": 9.979838407029226e-06, "loss": 0.7865, "step": 1884 }, { "epoch": 0.057772465367169304, "grad_norm": 2.102250414758317, "learning_rate": 9.9797938564571e-06, "loss": 0.8675, "step": 1885 }, { "epoch": 0.05780311388991051, "grad_norm": 2.3684612924498083, "learning_rate": 9.979749256817794e-06, "loss": 0.7477, "step": 1886 }, { "epoch": 0.05783376241265171, "grad_norm": 2.126629855752258, "learning_rate": 9.979704608111742e-06, "loss": 0.7327, "step": 1887 }, { "epoch": 0.057864410935392914, "grad_norm": 1.040264207708446, "learning_rate": 9.97965991033939e-06, "loss": 0.53, "step": 1888 }, { "epoch": 0.05789505945813412, "grad_norm": 2.3035386776962303, "learning_rate": 9.979615163501172e-06, "loss": 0.8017, "step": 1889 }, { "epoch": 0.05792570798087532, "grad_norm": 2.3130780326705045, "learning_rate": 9.979570367597532e-06, "loss": 0.723, "step": 1890 }, { "epoch": 0.057956356503616524, "grad_norm": 2.39164867047449, "learning_rate": 9.97952552262891e-06, "loss": 0.81, "step": 1891 }, { "epoch": 0.05798700502635773, "grad_norm": 0.9328590183789807, "learning_rate": 9.97948062859575e-06, "loss": 0.5215, "step": 1892 }, { "epoch": 0.058017653549098935, "grad_norm": 2.000846710857981, "learning_rate": 9.979435685498496e-06, "loss": 0.6929, "step": 1893 }, { "epoch": 0.058048302071840134, "grad_norm": 2.05935610248606, "learning_rate": 9.979390693337585e-06, "loss": 0.7522, "step": 1894 }, { "epoch": 0.05807895059458134, "grad_norm": 2.0275085963867916, "learning_rate": 9.979345652113464e-06, "loss": 0.7332, "step": 1895 }, { "epoch": 0.058109599117322545, "grad_norm": 2.1529447929797176, "learning_rate": 9.979300561826576e-06, "loss": 0.7352, "step": 1896 }, { "epoch": 0.05814024764006375, "grad_norm": 1.91912532021819, "learning_rate": 9.979255422477366e-06, "loss": 0.744, "step": 1897 }, { "epoch": 0.05817089616280495, "grad_norm": 2.225340012370184, "learning_rate": 9.979210234066278e-06, "loss": 0.924, "step": 1898 }, { "epoch": 0.058201544685546155, "grad_norm": 1.1476346124121435, "learning_rate": 9.979164996593757e-06, "loss": 0.538, "step": 1899 }, { "epoch": 0.05823219320828736, "grad_norm": 2.061005064970415, "learning_rate": 9.979119710060252e-06, "loss": 0.8074, "step": 1900 }, { "epoch": 0.05826284173102857, "grad_norm": 2.210600741751947, "learning_rate": 9.979074374466203e-06, "loss": 0.8046, "step": 1901 }, { "epoch": 0.058293490253769766, "grad_norm": 2.356030986386718, "learning_rate": 9.979028989812064e-06, "loss": 0.7438, "step": 1902 }, { "epoch": 0.05832413877651097, "grad_norm": 2.3234815925181116, "learning_rate": 9.978983556098274e-06, "loss": 0.8454, "step": 1903 }, { "epoch": 0.05835478729925218, "grad_norm": 1.2180285554578802, "learning_rate": 9.978938073325288e-06, "loss": 0.5602, "step": 1904 }, { "epoch": 0.05838543582199338, "grad_norm": 2.1011121234373085, "learning_rate": 9.97889254149355e-06, "loss": 0.8249, "step": 1905 }, { "epoch": 0.05841608434473458, "grad_norm": 1.911493065271801, "learning_rate": 9.978846960603512e-06, "loss": 0.7687, "step": 1906 }, { "epoch": 0.05844673286747579, "grad_norm": 2.3872691533270873, "learning_rate": 9.97880133065562e-06, "loss": 0.7519, "step": 1907 }, { "epoch": 0.05847738139021699, "grad_norm": 2.184230267168088, "learning_rate": 9.978755651650322e-06, "loss": 0.8791, "step": 1908 }, { "epoch": 0.0585080299129582, "grad_norm": 1.955245636167583, "learning_rate": 9.978709923588074e-06, "loss": 0.7665, "step": 1909 }, { "epoch": 0.0585386784356994, "grad_norm": 2.112832356322987, "learning_rate": 9.978664146469323e-06, "loss": 0.8126, "step": 1910 }, { "epoch": 0.0585693269584406, "grad_norm": 2.087930103437995, "learning_rate": 9.978618320294518e-06, "loss": 0.773, "step": 1911 }, { "epoch": 0.05859997548118181, "grad_norm": 2.420909915990041, "learning_rate": 9.978572445064114e-06, "loss": 0.7943, "step": 1912 }, { "epoch": 0.058630624003923014, "grad_norm": 2.497554058111714, "learning_rate": 9.978526520778564e-06, "loss": 0.861, "step": 1913 }, { "epoch": 0.05866127252666421, "grad_norm": 2.0460595061315168, "learning_rate": 9.978480547438317e-06, "loss": 0.7358, "step": 1914 }, { "epoch": 0.05869192104940542, "grad_norm": 2.1981389648610024, "learning_rate": 9.978434525043825e-06, "loss": 0.7608, "step": 1915 }, { "epoch": 0.058722569572146624, "grad_norm": 2.036052862184877, "learning_rate": 9.978388453595547e-06, "loss": 0.851, "step": 1916 }, { "epoch": 0.05875321809488783, "grad_norm": 1.9492666537567593, "learning_rate": 9.978342333093932e-06, "loss": 0.6748, "step": 1917 }, { "epoch": 0.05878386661762903, "grad_norm": 2.010123440838686, "learning_rate": 9.978296163539436e-06, "loss": 0.7627, "step": 1918 }, { "epoch": 0.058814515140370234, "grad_norm": 2.5778714967232936, "learning_rate": 9.978249944932515e-06, "loss": 0.7767, "step": 1919 }, { "epoch": 0.05884516366311144, "grad_norm": 1.1631946366788732, "learning_rate": 9.978203677273623e-06, "loss": 0.5496, "step": 1920 }, { "epoch": 0.058875812185852645, "grad_norm": 2.2476649060778984, "learning_rate": 9.97815736056322e-06, "loss": 0.826, "step": 1921 }, { "epoch": 0.058906460708593844, "grad_norm": 2.2169137470193037, "learning_rate": 9.978110994801754e-06, "loss": 0.8095, "step": 1922 }, { "epoch": 0.05893710923133505, "grad_norm": 2.047255820874383, "learning_rate": 9.978064579989688e-06, "loss": 0.8386, "step": 1923 }, { "epoch": 0.058967757754076255, "grad_norm": 2.007214889494969, "learning_rate": 9.97801811612748e-06, "loss": 0.8458, "step": 1924 }, { "epoch": 0.058998406276817454, "grad_norm": 2.247814769795314, "learning_rate": 9.977971603215583e-06, "loss": 0.7283, "step": 1925 }, { "epoch": 0.05902905479955866, "grad_norm": 2.3852586820892583, "learning_rate": 9.97792504125446e-06, "loss": 0.8759, "step": 1926 }, { "epoch": 0.059059703322299865, "grad_norm": 2.239708916754809, "learning_rate": 9.977878430244566e-06, "loss": 0.8246, "step": 1927 }, { "epoch": 0.05909035184504107, "grad_norm": 1.2150014645517213, "learning_rate": 9.977831770186364e-06, "loss": 0.5519, "step": 1928 }, { "epoch": 0.05912100036778227, "grad_norm": 0.9585743212984494, "learning_rate": 9.977785061080312e-06, "loss": 0.5082, "step": 1929 }, { "epoch": 0.059151648890523476, "grad_norm": 0.9150215007361415, "learning_rate": 9.97773830292687e-06, "loss": 0.5308, "step": 1930 }, { "epoch": 0.05918229741326468, "grad_norm": 2.231329172435063, "learning_rate": 9.977691495726498e-06, "loss": 0.8567, "step": 1931 }, { "epoch": 0.05921294593600589, "grad_norm": 2.218860122879323, "learning_rate": 9.977644639479658e-06, "loss": 0.8188, "step": 1932 }, { "epoch": 0.059243594458747086, "grad_norm": 2.456189543943653, "learning_rate": 9.977597734186813e-06, "loss": 0.9036, "step": 1933 }, { "epoch": 0.05927424298148829, "grad_norm": 2.4235301522633987, "learning_rate": 9.977550779848422e-06, "loss": 0.9153, "step": 1934 }, { "epoch": 0.0593048915042295, "grad_norm": 1.9489336690846524, "learning_rate": 9.977503776464952e-06, "loss": 0.7621, "step": 1935 }, { "epoch": 0.0593355400269707, "grad_norm": 1.9828054881494837, "learning_rate": 9.977456724036862e-06, "loss": 0.7633, "step": 1936 }, { "epoch": 0.0593661885497119, "grad_norm": 1.932716505172929, "learning_rate": 9.977409622564619e-06, "loss": 0.7921, "step": 1937 }, { "epoch": 0.05939683707245311, "grad_norm": 2.2134951083157595, "learning_rate": 9.977362472048685e-06, "loss": 0.7817, "step": 1938 }, { "epoch": 0.05942748559519431, "grad_norm": 2.261029832640137, "learning_rate": 9.977315272489523e-06, "loss": 0.8799, "step": 1939 }, { "epoch": 0.05945813411793552, "grad_norm": 2.1125899012899167, "learning_rate": 9.9772680238876e-06, "loss": 0.5525, "step": 1940 }, { "epoch": 0.05948878264067672, "grad_norm": 2.2406645782818297, "learning_rate": 9.977220726243384e-06, "loss": 0.8131, "step": 1941 }, { "epoch": 0.05951943116341792, "grad_norm": 1.9369865950664453, "learning_rate": 9.977173379557338e-06, "loss": 0.7815, "step": 1942 }, { "epoch": 0.05955007968615913, "grad_norm": 1.997385965124388, "learning_rate": 9.97712598382993e-06, "loss": 0.7106, "step": 1943 }, { "epoch": 0.059580728208900334, "grad_norm": 2.1223098201520227, "learning_rate": 9.977078539061625e-06, "loss": 0.7542, "step": 1944 }, { "epoch": 0.05961137673164153, "grad_norm": 2.0269813209545595, "learning_rate": 9.977031045252892e-06, "loss": 0.7515, "step": 1945 }, { "epoch": 0.05964202525438274, "grad_norm": 2.2585764837467015, "learning_rate": 9.976983502404199e-06, "loss": 0.8692, "step": 1946 }, { "epoch": 0.059672673777123944, "grad_norm": 2.076685310168226, "learning_rate": 9.976935910516015e-06, "loss": 0.7222, "step": 1947 }, { "epoch": 0.05970332229986515, "grad_norm": 2.0517963698568544, "learning_rate": 9.976888269588806e-06, "loss": 0.7064, "step": 1948 }, { "epoch": 0.05973397082260635, "grad_norm": 2.1783713571123773, "learning_rate": 9.976840579623045e-06, "loss": 0.8688, "step": 1949 }, { "epoch": 0.059764619345347554, "grad_norm": 1.981713043240793, "learning_rate": 9.9767928406192e-06, "loss": 0.7474, "step": 1950 }, { "epoch": 0.05979526786808876, "grad_norm": 1.8857387120031441, "learning_rate": 9.976745052577741e-06, "loss": 0.7668, "step": 1951 }, { "epoch": 0.059825916390829965, "grad_norm": 2.1169989875284103, "learning_rate": 9.97669721549914e-06, "loss": 0.5748, "step": 1952 }, { "epoch": 0.059856564913571164, "grad_norm": 2.1296441038946763, "learning_rate": 9.97664932938387e-06, "loss": 0.7332, "step": 1953 }, { "epoch": 0.05988721343631237, "grad_norm": 2.1251565378415824, "learning_rate": 9.9766013942324e-06, "loss": 0.803, "step": 1954 }, { "epoch": 0.059917861959053575, "grad_norm": 2.42350787445781, "learning_rate": 9.9765534100452e-06, "loss": 0.75, "step": 1955 }, { "epoch": 0.059948510481794774, "grad_norm": 1.0098582488733845, "learning_rate": 9.97650537682275e-06, "loss": 0.5364, "step": 1956 }, { "epoch": 0.05997915900453598, "grad_norm": 2.245707312815582, "learning_rate": 9.976457294565515e-06, "loss": 0.8404, "step": 1957 }, { "epoch": 0.060009807527277186, "grad_norm": 1.0747523278992153, "learning_rate": 9.976409163273977e-06, "loss": 0.5282, "step": 1958 }, { "epoch": 0.06004045605001839, "grad_norm": 2.168856562621754, "learning_rate": 9.976360982948605e-06, "loss": 0.7933, "step": 1959 }, { "epoch": 0.06007110457275959, "grad_norm": 2.4407541883244153, "learning_rate": 9.976312753589874e-06, "loss": 0.8623, "step": 1960 }, { "epoch": 0.060101753095500796, "grad_norm": 1.1490975660374343, "learning_rate": 9.976264475198261e-06, "loss": 0.5389, "step": 1961 }, { "epoch": 0.060132401618242, "grad_norm": 1.875089586757832, "learning_rate": 9.976216147774242e-06, "loss": 0.8015, "step": 1962 }, { "epoch": 0.06016305014098321, "grad_norm": 0.9431089340876715, "learning_rate": 9.97616777131829e-06, "loss": 0.5514, "step": 1963 }, { "epoch": 0.060193698663724406, "grad_norm": 2.057865774677342, "learning_rate": 9.976119345830885e-06, "loss": 0.8338, "step": 1964 }, { "epoch": 0.06022434718646561, "grad_norm": 2.042173093513659, "learning_rate": 9.976070871312502e-06, "loss": 0.7544, "step": 1965 }, { "epoch": 0.06025499570920682, "grad_norm": 2.452882263024842, "learning_rate": 9.976022347763621e-06, "loss": 0.8625, "step": 1966 }, { "epoch": 0.06028564423194802, "grad_norm": 2.1447400594755632, "learning_rate": 9.975973775184718e-06, "loss": 0.818, "step": 1967 }, { "epoch": 0.06031629275468922, "grad_norm": 2.3095269466945405, "learning_rate": 9.975925153576271e-06, "loss": 0.9016, "step": 1968 }, { "epoch": 0.06034694127743043, "grad_norm": 2.213654649288472, "learning_rate": 9.97587648293876e-06, "loss": 0.7511, "step": 1969 }, { "epoch": 0.06037758980017163, "grad_norm": 2.3583294444323917, "learning_rate": 9.975827763272667e-06, "loss": 0.8416, "step": 1970 }, { "epoch": 0.06040823832291284, "grad_norm": 2.3319732686770953, "learning_rate": 9.975778994578469e-06, "loss": 0.8162, "step": 1971 }, { "epoch": 0.06043888684565404, "grad_norm": 2.279358694708987, "learning_rate": 9.975730176856648e-06, "loss": 0.721, "step": 1972 }, { "epoch": 0.06046953536839524, "grad_norm": 2.1413089629564745, "learning_rate": 9.975681310107683e-06, "loss": 0.7509, "step": 1973 }, { "epoch": 0.06050018389113645, "grad_norm": 1.504631225787842, "learning_rate": 9.975632394332057e-06, "loss": 0.5419, "step": 1974 }, { "epoch": 0.060530832413877654, "grad_norm": 2.1594285437049496, "learning_rate": 9.975583429530255e-06, "loss": 0.8169, "step": 1975 }, { "epoch": 0.06056148093661885, "grad_norm": 2.204446252169093, "learning_rate": 9.975534415702753e-06, "loss": 0.8063, "step": 1976 }, { "epoch": 0.06059212945936006, "grad_norm": 2.1905882328915505, "learning_rate": 9.97548535285004e-06, "loss": 0.8741, "step": 1977 }, { "epoch": 0.060622777982101264, "grad_norm": 2.3501792902239353, "learning_rate": 9.975436240972594e-06, "loss": 0.7987, "step": 1978 }, { "epoch": 0.06065342650484247, "grad_norm": 2.232349076885057, "learning_rate": 9.975387080070904e-06, "loss": 0.7254, "step": 1979 }, { "epoch": 0.06068407502758367, "grad_norm": 0.9706607964413632, "learning_rate": 9.975337870145451e-06, "loss": 0.5367, "step": 1980 }, { "epoch": 0.060714723550324874, "grad_norm": 2.198941817045538, "learning_rate": 9.975288611196721e-06, "loss": 0.8036, "step": 1981 }, { "epoch": 0.06074537207306608, "grad_norm": 2.1456784352543936, "learning_rate": 9.975239303225199e-06, "loss": 0.8279, "step": 1982 }, { "epoch": 0.060776020595807285, "grad_norm": 0.9915247517452496, "learning_rate": 9.975189946231372e-06, "loss": 0.5311, "step": 1983 }, { "epoch": 0.060806669118548484, "grad_norm": 2.187647554391678, "learning_rate": 9.975140540215725e-06, "loss": 0.7601, "step": 1984 }, { "epoch": 0.06083731764128969, "grad_norm": 2.188721138621731, "learning_rate": 9.975091085178745e-06, "loss": 0.8489, "step": 1985 }, { "epoch": 0.060867966164030896, "grad_norm": 2.1999279208935354, "learning_rate": 9.975041581120922e-06, "loss": 0.7398, "step": 1986 }, { "epoch": 0.060898614686772094, "grad_norm": 2.1847594758570135, "learning_rate": 9.974992028042738e-06, "loss": 0.7942, "step": 1987 }, { "epoch": 0.0609292632095133, "grad_norm": 0.9398887879313389, "learning_rate": 9.974942425944687e-06, "loss": 0.5279, "step": 1988 }, { "epoch": 0.060959911732254506, "grad_norm": 2.418252248649921, "learning_rate": 9.974892774827254e-06, "loss": 0.7577, "step": 1989 }, { "epoch": 0.06099056025499571, "grad_norm": 2.062353725852378, "learning_rate": 9.974843074690929e-06, "loss": 0.8711, "step": 1990 }, { "epoch": 0.06102120877773691, "grad_norm": 2.0485357221725664, "learning_rate": 9.974793325536206e-06, "loss": 0.7639, "step": 1991 }, { "epoch": 0.061051857300478116, "grad_norm": 2.093144680733953, "learning_rate": 9.974743527363569e-06, "loss": 0.8778, "step": 1992 }, { "epoch": 0.06108250582321932, "grad_norm": 2.2436712123982705, "learning_rate": 9.97469368017351e-06, "loss": 0.9099, "step": 1993 }, { "epoch": 0.06111315434596053, "grad_norm": 2.3456973347632335, "learning_rate": 9.974643783966522e-06, "loss": 0.7401, "step": 1994 }, { "epoch": 0.061143802868701726, "grad_norm": 2.064030462692603, "learning_rate": 9.974593838743097e-06, "loss": 0.7789, "step": 1995 }, { "epoch": 0.06117445139144293, "grad_norm": 2.426426453877442, "learning_rate": 9.974543844503726e-06, "loss": 0.7885, "step": 1996 }, { "epoch": 0.06120509991418414, "grad_norm": 2.1491819893797715, "learning_rate": 9.9744938012489e-06, "loss": 0.7827, "step": 1997 }, { "epoch": 0.06123574843692534, "grad_norm": 2.031215948059911, "learning_rate": 9.974443708979116e-06, "loss": 0.7136, "step": 1998 }, { "epoch": 0.06126639695966654, "grad_norm": 1.066167246081794, "learning_rate": 9.974393567694864e-06, "loss": 0.5546, "step": 1999 }, { "epoch": 0.06129704548240775, "grad_norm": 2.219519140802758, "learning_rate": 9.97434337739664e-06, "loss": 0.8362, "step": 2000 }, { "epoch": 0.06132769400514895, "grad_norm": 2.797114755529844, "learning_rate": 9.974293138084939e-06, "loss": 0.7519, "step": 2001 }, { "epoch": 0.06135834252789016, "grad_norm": 2.323220601134036, "learning_rate": 9.974242849760253e-06, "loss": 0.7186, "step": 2002 }, { "epoch": 0.06138899105063136, "grad_norm": 1.9879881839387583, "learning_rate": 9.97419251242308e-06, "loss": 0.7242, "step": 2003 }, { "epoch": 0.06141963957337256, "grad_norm": 2.215658169692654, "learning_rate": 9.974142126073915e-06, "loss": 0.7585, "step": 2004 }, { "epoch": 0.06145028809611377, "grad_norm": 2.2173122218148142, "learning_rate": 9.974091690713256e-06, "loss": 0.8237, "step": 2005 }, { "epoch": 0.061480936618854974, "grad_norm": 2.043869206547236, "learning_rate": 9.974041206341599e-06, "loss": 0.7775, "step": 2006 }, { "epoch": 0.06151158514159617, "grad_norm": 0.9384817859659448, "learning_rate": 9.97399067295944e-06, "loss": 0.5265, "step": 2007 }, { "epoch": 0.06154223366433738, "grad_norm": 2.0722726023090154, "learning_rate": 9.97394009056728e-06, "loss": 0.8015, "step": 2008 }, { "epoch": 0.061572882187078584, "grad_norm": 2.483481432848903, "learning_rate": 9.973889459165615e-06, "loss": 0.7958, "step": 2009 }, { "epoch": 0.06160353070981979, "grad_norm": 2.202414667352168, "learning_rate": 9.973838778754944e-06, "loss": 0.7597, "step": 2010 }, { "epoch": 0.06163417923256099, "grad_norm": 2.191341162700266, "learning_rate": 9.973788049335768e-06, "loss": 0.8808, "step": 2011 }, { "epoch": 0.061664827755302194, "grad_norm": 2.1951685055516297, "learning_rate": 9.973737270908584e-06, "loss": 0.8287, "step": 2012 }, { "epoch": 0.0616954762780434, "grad_norm": 2.1648654833177887, "learning_rate": 9.973686443473895e-06, "loss": 0.7125, "step": 2013 }, { "epoch": 0.061726124800784606, "grad_norm": 1.935442968941545, "learning_rate": 9.973635567032201e-06, "loss": 0.6988, "step": 2014 }, { "epoch": 0.061756773323525804, "grad_norm": 1.003605928328952, "learning_rate": 9.973584641584005e-06, "loss": 0.5361, "step": 2015 }, { "epoch": 0.06178742184626701, "grad_norm": 0.9775443677453762, "learning_rate": 9.973533667129804e-06, "loss": 0.5277, "step": 2016 }, { "epoch": 0.061818070369008216, "grad_norm": 2.044809058237664, "learning_rate": 9.973482643670106e-06, "loss": 0.7977, "step": 2017 }, { "epoch": 0.061848718891749414, "grad_norm": 2.17891925499273, "learning_rate": 9.973431571205408e-06, "loss": 0.7306, "step": 2018 }, { "epoch": 0.06187936741449062, "grad_norm": 2.0590470946869472, "learning_rate": 9.973380449736218e-06, "loss": 0.8407, "step": 2019 }, { "epoch": 0.061910015937231826, "grad_norm": 2.05762958043724, "learning_rate": 9.973329279263038e-06, "loss": 0.7554, "step": 2020 }, { "epoch": 0.06194066445997303, "grad_norm": 2.1561478336810542, "learning_rate": 9.97327805978637e-06, "loss": 0.8197, "step": 2021 }, { "epoch": 0.06197131298271423, "grad_norm": 2.0171074483690354, "learning_rate": 9.973226791306723e-06, "loss": 0.6965, "step": 2022 }, { "epoch": 0.062001961505455436, "grad_norm": 2.747444439280233, "learning_rate": 9.9731754738246e-06, "loss": 0.9501, "step": 2023 }, { "epoch": 0.06203261002819664, "grad_norm": 2.639251044104261, "learning_rate": 9.973124107340506e-06, "loss": 0.7791, "step": 2024 }, { "epoch": 0.06206325855093785, "grad_norm": 1.9039415900768146, "learning_rate": 9.973072691854949e-06, "loss": 0.7751, "step": 2025 }, { "epoch": 0.062093907073679046, "grad_norm": 2.094116672334243, "learning_rate": 9.97302122736843e-06, "loss": 0.8419, "step": 2026 }, { "epoch": 0.06212455559642025, "grad_norm": 2.400587979137635, "learning_rate": 9.972969713881466e-06, "loss": 0.8446, "step": 2027 }, { "epoch": 0.06215520411916146, "grad_norm": 1.9180443499812394, "learning_rate": 9.972918151394556e-06, "loss": 0.7303, "step": 2028 }, { "epoch": 0.06218585264190266, "grad_norm": 2.1668262670735667, "learning_rate": 9.972866539908212e-06, "loss": 0.8103, "step": 2029 }, { "epoch": 0.06221650116464386, "grad_norm": 1.4628965303221813, "learning_rate": 9.97281487942294e-06, "loss": 0.5473, "step": 2030 }, { "epoch": 0.06224714968738507, "grad_norm": 1.863164648982354, "learning_rate": 9.972763169939252e-06, "loss": 0.7393, "step": 2031 }, { "epoch": 0.06227779821012627, "grad_norm": 2.0520059650541427, "learning_rate": 9.972711411457657e-06, "loss": 0.7681, "step": 2032 }, { "epoch": 0.06230844673286748, "grad_norm": 2.111205291465274, "learning_rate": 9.972659603978664e-06, "loss": 0.7241, "step": 2033 }, { "epoch": 0.06233909525560868, "grad_norm": 1.055545612014101, "learning_rate": 9.972607747502782e-06, "loss": 0.5477, "step": 2034 }, { "epoch": 0.06236974377834988, "grad_norm": 1.9828693661713261, "learning_rate": 9.972555842030525e-06, "loss": 0.7957, "step": 2035 }, { "epoch": 0.06240039230109109, "grad_norm": 2.1878920162564928, "learning_rate": 9.972503887562403e-06, "loss": 0.6662, "step": 2036 }, { "epoch": 0.062431040823832294, "grad_norm": 2.423218717253532, "learning_rate": 9.972451884098927e-06, "loss": 0.8484, "step": 2037 }, { "epoch": 0.06246168934657349, "grad_norm": 1.0368261479534426, "learning_rate": 9.97239983164061e-06, "loss": 0.5321, "step": 2038 }, { "epoch": 0.0624923378693147, "grad_norm": 1.864479562835654, "learning_rate": 9.972347730187967e-06, "loss": 0.8047, "step": 2039 }, { "epoch": 0.0625229863920559, "grad_norm": 2.1442246655387955, "learning_rate": 9.972295579741508e-06, "loss": 0.7303, "step": 2040 }, { "epoch": 0.06255363491479711, "grad_norm": 1.8585813936181628, "learning_rate": 9.972243380301749e-06, "loss": 0.7692, "step": 2041 }, { "epoch": 0.06258428343753832, "grad_norm": 2.175793897414943, "learning_rate": 9.972191131869204e-06, "loss": 0.8831, "step": 2042 }, { "epoch": 0.06261493196027952, "grad_norm": 2.0551907415164528, "learning_rate": 9.972138834444387e-06, "loss": 0.7767, "step": 2043 }, { "epoch": 0.06264558048302071, "grad_norm": 2.2905181354263426, "learning_rate": 9.972086488027815e-06, "loss": 0.7963, "step": 2044 }, { "epoch": 0.06267622900576192, "grad_norm": 2.3602048689108477, "learning_rate": 9.97203409262e-06, "loss": 0.7591, "step": 2045 }, { "epoch": 0.06270687752850312, "grad_norm": 2.3749023564821283, "learning_rate": 9.971981648221463e-06, "loss": 0.72, "step": 2046 }, { "epoch": 0.06273752605124433, "grad_norm": 2.234566812614006, "learning_rate": 9.97192915483272e-06, "loss": 0.8082, "step": 2047 }, { "epoch": 0.06276817457398554, "grad_norm": 2.203293717995487, "learning_rate": 9.971876612454285e-06, "loss": 0.8159, "step": 2048 }, { "epoch": 0.06279882309672674, "grad_norm": 1.2669334089074398, "learning_rate": 9.971824021086677e-06, "loss": 0.5377, "step": 2049 }, { "epoch": 0.06282947161946795, "grad_norm": 2.230991559285099, "learning_rate": 9.971771380730418e-06, "loss": 0.817, "step": 2050 }, { "epoch": 0.06286012014220914, "grad_norm": 2.153672417532638, "learning_rate": 9.97171869138602e-06, "loss": 0.7175, "step": 2051 }, { "epoch": 0.06289076866495034, "grad_norm": 1.9747445964638857, "learning_rate": 9.971665953054007e-06, "loss": 0.8351, "step": 2052 }, { "epoch": 0.06292141718769155, "grad_norm": 2.120288357625989, "learning_rate": 9.971613165734897e-06, "loss": 0.7828, "step": 2053 }, { "epoch": 0.06295206571043276, "grad_norm": 2.0467486910135384, "learning_rate": 9.971560329429211e-06, "loss": 0.7138, "step": 2054 }, { "epoch": 0.06298271423317396, "grad_norm": 2.0860603634995916, "learning_rate": 9.971507444137469e-06, "loss": 0.7622, "step": 2055 }, { "epoch": 0.06301336275591517, "grad_norm": 1.970908481824912, "learning_rate": 9.971454509860192e-06, "loss": 0.7894, "step": 2056 }, { "epoch": 0.06304401127865637, "grad_norm": 2.351028947935282, "learning_rate": 9.971401526597902e-06, "loss": 0.7863, "step": 2057 }, { "epoch": 0.06307465980139758, "grad_norm": 2.3476567850094128, "learning_rate": 9.97134849435112e-06, "loss": 0.7491, "step": 2058 }, { "epoch": 0.06310530832413877, "grad_norm": 1.2789193219211434, "learning_rate": 9.97129541312037e-06, "loss": 0.5409, "step": 2059 }, { "epoch": 0.06313595684687998, "grad_norm": 2.1444822550913636, "learning_rate": 9.971242282906174e-06, "loss": 0.7199, "step": 2060 }, { "epoch": 0.06316660536962118, "grad_norm": 2.1675940718720232, "learning_rate": 9.971189103709056e-06, "loss": 0.795, "step": 2061 }, { "epoch": 0.06319725389236239, "grad_norm": 2.0359234767886223, "learning_rate": 9.97113587552954e-06, "loss": 0.8698, "step": 2062 }, { "epoch": 0.06322790241510359, "grad_norm": 1.8650027029537368, "learning_rate": 9.97108259836815e-06, "loss": 0.8209, "step": 2063 }, { "epoch": 0.0632585509378448, "grad_norm": 1.9974046332205118, "learning_rate": 9.971029272225411e-06, "loss": 0.7714, "step": 2064 }, { "epoch": 0.063289199460586, "grad_norm": 2.0750579412268766, "learning_rate": 9.970975897101849e-06, "loss": 0.812, "step": 2065 }, { "epoch": 0.06331984798332721, "grad_norm": 2.0978151301161634, "learning_rate": 9.97092247299799e-06, "loss": 0.8284, "step": 2066 }, { "epoch": 0.0633504965060684, "grad_norm": 2.1473816265494694, "learning_rate": 9.97086899991436e-06, "loss": 0.8042, "step": 2067 }, { "epoch": 0.06338114502880961, "grad_norm": 2.1866361602612057, "learning_rate": 9.970815477851485e-06, "loss": 0.8169, "step": 2068 }, { "epoch": 0.06341179355155081, "grad_norm": 2.682441778691228, "learning_rate": 9.970761906809893e-06, "loss": 0.7176, "step": 2069 }, { "epoch": 0.06344244207429202, "grad_norm": 2.171405778735718, "learning_rate": 9.970708286790114e-06, "loss": 0.7473, "step": 2070 }, { "epoch": 0.06347309059703322, "grad_norm": 1.206786605535976, "learning_rate": 9.970654617792672e-06, "loss": 0.5348, "step": 2071 }, { "epoch": 0.06350373911977443, "grad_norm": 1.0005110449034555, "learning_rate": 9.9706008998181e-06, "loss": 0.5366, "step": 2072 }, { "epoch": 0.06353438764251564, "grad_norm": 2.2136384948817844, "learning_rate": 9.970547132866925e-06, "loss": 0.7987, "step": 2073 }, { "epoch": 0.06356503616525684, "grad_norm": 0.9846022918193877, "learning_rate": 9.970493316939678e-06, "loss": 0.5177, "step": 2074 }, { "epoch": 0.06359568468799803, "grad_norm": 2.2918406736872, "learning_rate": 9.970439452036888e-06, "loss": 0.8659, "step": 2075 }, { "epoch": 0.06362633321073924, "grad_norm": 1.7885602044347395, "learning_rate": 9.970385538159086e-06, "loss": 0.7207, "step": 2076 }, { "epoch": 0.06365698173348044, "grad_norm": 2.1043429975281813, "learning_rate": 9.970331575306804e-06, "loss": 0.8042, "step": 2077 }, { "epoch": 0.06368763025622165, "grad_norm": 2.1269443710634177, "learning_rate": 9.970277563480573e-06, "loss": 0.7283, "step": 2078 }, { "epoch": 0.06371827877896286, "grad_norm": 2.094042444978855, "learning_rate": 9.970223502680926e-06, "loss": 0.8354, "step": 2079 }, { "epoch": 0.06374892730170406, "grad_norm": 2.2691856040213727, "learning_rate": 9.970169392908396e-06, "loss": 0.8973, "step": 2080 }, { "epoch": 0.06377957582444527, "grad_norm": 2.146786875749665, "learning_rate": 9.970115234163513e-06, "loss": 0.7608, "step": 2081 }, { "epoch": 0.06381022434718646, "grad_norm": 1.33817470027106, "learning_rate": 9.970061026446813e-06, "loss": 0.5371, "step": 2082 }, { "epoch": 0.06384087286992766, "grad_norm": 2.113295349030361, "learning_rate": 9.970006769758832e-06, "loss": 0.7241, "step": 2083 }, { "epoch": 0.06387152139266887, "grad_norm": 2.189394459460152, "learning_rate": 9.969952464100102e-06, "loss": 0.8261, "step": 2084 }, { "epoch": 0.06390216991541008, "grad_norm": 2.096821085285166, "learning_rate": 9.969898109471159e-06, "loss": 0.6992, "step": 2085 }, { "epoch": 0.06393281843815128, "grad_norm": 0.983307030508446, "learning_rate": 9.969843705872537e-06, "loss": 0.5524, "step": 2086 }, { "epoch": 0.06396346696089249, "grad_norm": 2.0282459388447807, "learning_rate": 9.969789253304775e-06, "loss": 0.8415, "step": 2087 }, { "epoch": 0.06399411548363369, "grad_norm": 2.2293020046284115, "learning_rate": 9.969734751768407e-06, "loss": 0.8148, "step": 2088 }, { "epoch": 0.0640247640063749, "grad_norm": 2.1807114615735137, "learning_rate": 9.969680201263972e-06, "loss": 0.7978, "step": 2089 }, { "epoch": 0.06405541252911609, "grad_norm": 2.246099194161644, "learning_rate": 9.969625601792005e-06, "loss": 0.738, "step": 2090 }, { "epoch": 0.0640860610518573, "grad_norm": 2.0631276755339405, "learning_rate": 9.969570953353044e-06, "loss": 0.7432, "step": 2091 }, { "epoch": 0.0641167095745985, "grad_norm": 2.314551191410228, "learning_rate": 9.969516255947633e-06, "loss": 0.7931, "step": 2092 }, { "epoch": 0.06414735809733971, "grad_norm": 2.023814050497669, "learning_rate": 9.969461509576303e-06, "loss": 0.6928, "step": 2093 }, { "epoch": 0.06417800662008091, "grad_norm": 0.975612991370605, "learning_rate": 9.9694067142396e-06, "loss": 0.5236, "step": 2094 }, { "epoch": 0.06420865514282212, "grad_norm": 1.9482713458557115, "learning_rate": 9.96935186993806e-06, "loss": 0.8752, "step": 2095 }, { "epoch": 0.06423930366556332, "grad_norm": 2.043165740362296, "learning_rate": 9.969296976672224e-06, "loss": 0.8623, "step": 2096 }, { "epoch": 0.06426995218830453, "grad_norm": 2.595054972460468, "learning_rate": 9.969242034442634e-06, "loss": 0.8543, "step": 2097 }, { "epoch": 0.06430060071104572, "grad_norm": 2.023617374028626, "learning_rate": 9.96918704324983e-06, "loss": 0.7684, "step": 2098 }, { "epoch": 0.06433124923378693, "grad_norm": 2.0782316341818587, "learning_rate": 9.969132003094357e-06, "loss": 0.7714, "step": 2099 }, { "epoch": 0.06436189775652813, "grad_norm": 2.1398860858768343, "learning_rate": 9.969076913976755e-06, "loss": 0.8764, "step": 2100 }, { "epoch": 0.06439254627926934, "grad_norm": 2.0924601057602437, "learning_rate": 9.969021775897563e-06, "loss": 0.7774, "step": 2101 }, { "epoch": 0.06442319480201054, "grad_norm": 2.0135083202828676, "learning_rate": 9.968966588857331e-06, "loss": 0.8245, "step": 2102 }, { "epoch": 0.06445384332475175, "grad_norm": 2.1455932850832298, "learning_rate": 9.968911352856598e-06, "loss": 0.7763, "step": 2103 }, { "epoch": 0.06448449184749296, "grad_norm": 2.296649605643846, "learning_rate": 9.968856067895913e-06, "loss": 0.7419, "step": 2104 }, { "epoch": 0.06451514037023416, "grad_norm": 1.9180768075295855, "learning_rate": 9.968800733975816e-06, "loss": 0.7121, "step": 2105 }, { "epoch": 0.06454578889297535, "grad_norm": 2.1598450292397615, "learning_rate": 9.968745351096854e-06, "loss": 0.7761, "step": 2106 }, { "epoch": 0.06457643741571656, "grad_norm": 1.9737258952132486, "learning_rate": 9.968689919259572e-06, "loss": 0.7588, "step": 2107 }, { "epoch": 0.06460708593845776, "grad_norm": 2.263063970854475, "learning_rate": 9.968634438464517e-06, "loss": 0.7745, "step": 2108 }, { "epoch": 0.06463773446119897, "grad_norm": 2.0907945255674663, "learning_rate": 9.968578908712236e-06, "loss": 0.7192, "step": 2109 }, { "epoch": 0.06466838298394018, "grad_norm": 2.013874502684691, "learning_rate": 9.968523330003276e-06, "loss": 0.8259, "step": 2110 }, { "epoch": 0.06469903150668138, "grad_norm": 1.9465671037758647, "learning_rate": 9.968467702338186e-06, "loss": 0.7602, "step": 2111 }, { "epoch": 0.06472968002942259, "grad_norm": 2.183053837980145, "learning_rate": 9.968412025717511e-06, "loss": 0.8279, "step": 2112 }, { "epoch": 0.06476032855216378, "grad_norm": 1.9894163772572004, "learning_rate": 9.968356300141802e-06, "loss": 0.7742, "step": 2113 }, { "epoch": 0.06479097707490498, "grad_norm": 1.0665853376295422, "learning_rate": 9.968300525611605e-06, "loss": 0.5235, "step": 2114 }, { "epoch": 0.06482162559764619, "grad_norm": 1.9246734933022092, "learning_rate": 9.968244702127473e-06, "loss": 0.8614, "step": 2115 }, { "epoch": 0.0648522741203874, "grad_norm": 2.1688156503734657, "learning_rate": 9.968188829689955e-06, "loss": 0.8233, "step": 2116 }, { "epoch": 0.0648829226431286, "grad_norm": 0.8763840515998065, "learning_rate": 9.968132908299602e-06, "loss": 0.4822, "step": 2117 }, { "epoch": 0.06491357116586981, "grad_norm": 2.019337317627924, "learning_rate": 9.968076937956962e-06, "loss": 0.7458, "step": 2118 }, { "epoch": 0.06494421968861101, "grad_norm": 2.006076773812726, "learning_rate": 9.968020918662591e-06, "loss": 0.7624, "step": 2119 }, { "epoch": 0.06497486821135222, "grad_norm": 2.186142943173811, "learning_rate": 9.967964850417039e-06, "loss": 0.8353, "step": 2120 }, { "epoch": 0.06500551673409341, "grad_norm": 1.8901383576565194, "learning_rate": 9.967908733220854e-06, "loss": 0.7913, "step": 2121 }, { "epoch": 0.06503616525683462, "grad_norm": 2.283952769820935, "learning_rate": 9.967852567074598e-06, "loss": 0.8044, "step": 2122 }, { "epoch": 0.06506681377957582, "grad_norm": 2.0111622241916955, "learning_rate": 9.967796351978817e-06, "loss": 0.818, "step": 2123 }, { "epoch": 0.06509746230231703, "grad_norm": 2.2003744665371707, "learning_rate": 9.967740087934069e-06, "loss": 0.8497, "step": 2124 }, { "epoch": 0.06512811082505823, "grad_norm": 1.1281888041021124, "learning_rate": 9.967683774940905e-06, "loss": 0.5113, "step": 2125 }, { "epoch": 0.06515875934779944, "grad_norm": 2.1173133824619903, "learning_rate": 9.967627412999883e-06, "loss": 0.7661, "step": 2126 }, { "epoch": 0.06518940787054064, "grad_norm": 1.9972725894448196, "learning_rate": 9.967571002111558e-06, "loss": 0.7477, "step": 2127 }, { "epoch": 0.06522005639328185, "grad_norm": 2.108709650343693, "learning_rate": 9.967514542276484e-06, "loss": 0.757, "step": 2128 }, { "epoch": 0.06525070491602304, "grad_norm": 0.9321693755694129, "learning_rate": 9.967458033495219e-06, "loss": 0.5353, "step": 2129 }, { "epoch": 0.06528135343876425, "grad_norm": 2.3836923400380825, "learning_rate": 9.967401475768316e-06, "loss": 0.8741, "step": 2130 }, { "epoch": 0.06531200196150545, "grad_norm": 2.2126358048161348, "learning_rate": 9.967344869096338e-06, "loss": 0.7301, "step": 2131 }, { "epoch": 0.06534265048424666, "grad_norm": 2.103803106155121, "learning_rate": 9.96728821347984e-06, "loss": 0.8903, "step": 2132 }, { "epoch": 0.06537329900698786, "grad_norm": 2.3551240204044395, "learning_rate": 9.96723150891938e-06, "loss": 0.6542, "step": 2133 }, { "epoch": 0.06540394752972907, "grad_norm": 2.1938712037236967, "learning_rate": 9.967174755415516e-06, "loss": 0.8673, "step": 2134 }, { "epoch": 0.06543459605247028, "grad_norm": 2.081366678419273, "learning_rate": 9.96711795296881e-06, "loss": 0.8446, "step": 2135 }, { "epoch": 0.06546524457521148, "grad_norm": 1.9792243244272119, "learning_rate": 9.967061101579818e-06, "loss": 0.7746, "step": 2136 }, { "epoch": 0.06549589309795267, "grad_norm": 2.103405999211665, "learning_rate": 9.967004201249105e-06, "loss": 0.7664, "step": 2137 }, { "epoch": 0.06552654162069388, "grad_norm": 2.4899757070075887, "learning_rate": 9.966947251977226e-06, "loss": 0.7625, "step": 2138 }, { "epoch": 0.06555719014343508, "grad_norm": 2.1202105381408436, "learning_rate": 9.966890253764746e-06, "loss": 0.7688, "step": 2139 }, { "epoch": 0.06558783866617629, "grad_norm": 1.89867709299372, "learning_rate": 9.966833206612225e-06, "loss": 0.7678, "step": 2140 }, { "epoch": 0.0656184871889175, "grad_norm": 2.1848520907538886, "learning_rate": 9.966776110520224e-06, "loss": 0.7785, "step": 2141 }, { "epoch": 0.0656491357116587, "grad_norm": 2.098025078853529, "learning_rate": 9.96671896548931e-06, "loss": 0.8328, "step": 2142 }, { "epoch": 0.06567978423439991, "grad_norm": 2.007601127865126, "learning_rate": 9.966661771520042e-06, "loss": 0.8349, "step": 2143 }, { "epoch": 0.0657104327571411, "grad_norm": 2.044808825554177, "learning_rate": 9.966604528612986e-06, "loss": 0.7358, "step": 2144 }, { "epoch": 0.0657410812798823, "grad_norm": 1.8598107359942524, "learning_rate": 9.966547236768703e-06, "loss": 0.7508, "step": 2145 }, { "epoch": 0.06577172980262351, "grad_norm": 2.0557068269963064, "learning_rate": 9.96648989598776e-06, "loss": 0.8378, "step": 2146 }, { "epoch": 0.06580237832536472, "grad_norm": 2.271084784178762, "learning_rate": 9.966432506270723e-06, "loss": 0.8084, "step": 2147 }, { "epoch": 0.06583302684810592, "grad_norm": 1.964374004653288, "learning_rate": 9.966375067618152e-06, "loss": 0.7987, "step": 2148 }, { "epoch": 0.06586367537084713, "grad_norm": 2.4313752994034834, "learning_rate": 9.96631758003062e-06, "loss": 0.8199, "step": 2149 }, { "epoch": 0.06589432389358833, "grad_norm": 2.1622493966032517, "learning_rate": 9.966260043508688e-06, "loss": 0.8115, "step": 2150 }, { "epoch": 0.06592497241632954, "grad_norm": 1.9868125355512196, "learning_rate": 9.966202458052927e-06, "loss": 0.8526, "step": 2151 }, { "epoch": 0.06595562093907073, "grad_norm": 2.0640701998437505, "learning_rate": 9.966144823663903e-06, "loss": 0.7902, "step": 2152 }, { "epoch": 0.06598626946181194, "grad_norm": 1.1968513502688158, "learning_rate": 9.966087140342182e-06, "loss": 0.5393, "step": 2153 }, { "epoch": 0.06601691798455314, "grad_norm": 2.421201180272247, "learning_rate": 9.966029408088333e-06, "loss": 0.835, "step": 2154 }, { "epoch": 0.06604756650729435, "grad_norm": 2.0967431386138897, "learning_rate": 9.965971626902928e-06, "loss": 0.6672, "step": 2155 }, { "epoch": 0.06607821503003555, "grad_norm": 2.3320211344739468, "learning_rate": 9.965913796786532e-06, "loss": 0.8802, "step": 2156 }, { "epoch": 0.06610886355277676, "grad_norm": 2.094658365673452, "learning_rate": 9.965855917739718e-06, "loss": 0.8902, "step": 2157 }, { "epoch": 0.06613951207551796, "grad_norm": 0.9830070493747071, "learning_rate": 9.965797989763053e-06, "loss": 0.5074, "step": 2158 }, { "epoch": 0.06617016059825917, "grad_norm": 2.0147241959818043, "learning_rate": 9.965740012857113e-06, "loss": 0.7685, "step": 2159 }, { "epoch": 0.06620080912100036, "grad_norm": 2.0468522774264595, "learning_rate": 9.965681987022463e-06, "loss": 0.78, "step": 2160 }, { "epoch": 0.06623145764374157, "grad_norm": 2.080727431703376, "learning_rate": 9.96562391225968e-06, "loss": 0.795, "step": 2161 }, { "epoch": 0.06626210616648277, "grad_norm": 2.2541985790710566, "learning_rate": 9.965565788569333e-06, "loss": 0.7191, "step": 2162 }, { "epoch": 0.06629275468922398, "grad_norm": 1.9846537291227921, "learning_rate": 9.965507615951997e-06, "loss": 0.84, "step": 2163 }, { "epoch": 0.06632340321196518, "grad_norm": 2.4495402687177545, "learning_rate": 9.965449394408243e-06, "loss": 0.8865, "step": 2164 }, { "epoch": 0.06635405173470639, "grad_norm": 2.2116034314909827, "learning_rate": 9.965391123938645e-06, "loss": 0.8055, "step": 2165 }, { "epoch": 0.0663847002574476, "grad_norm": 1.0851129448292867, "learning_rate": 9.96533280454378e-06, "loss": 0.5296, "step": 2166 }, { "epoch": 0.0664153487801888, "grad_norm": 2.2528595477746665, "learning_rate": 9.965274436224217e-06, "loss": 0.8141, "step": 2167 }, { "epoch": 0.06644599730293, "grad_norm": 2.2223346411464577, "learning_rate": 9.965216018980537e-06, "loss": 0.7688, "step": 2168 }, { "epoch": 0.0664766458256712, "grad_norm": 2.379975756503146, "learning_rate": 9.965157552813313e-06, "loss": 0.801, "step": 2169 }, { "epoch": 0.0665072943484124, "grad_norm": 1.9877300064413226, "learning_rate": 9.96509903772312e-06, "loss": 0.8012, "step": 2170 }, { "epoch": 0.06653794287115361, "grad_norm": 1.9607862005108565, "learning_rate": 9.96504047371054e-06, "loss": 0.7899, "step": 2171 }, { "epoch": 0.06656859139389482, "grad_norm": 0.9990388204966087, "learning_rate": 9.96498186077614e-06, "loss": 0.5242, "step": 2172 }, { "epoch": 0.06659923991663602, "grad_norm": 2.1659737701518633, "learning_rate": 9.964923198920507e-06, "loss": 0.7851, "step": 2173 }, { "epoch": 0.06662988843937723, "grad_norm": 2.035379607767426, "learning_rate": 9.964864488144215e-06, "loss": 0.7819, "step": 2174 }, { "epoch": 0.06666053696211842, "grad_norm": 2.037625499523693, "learning_rate": 9.964805728447842e-06, "loss": 0.7815, "step": 2175 }, { "epoch": 0.06669118548485962, "grad_norm": 2.316002450605538, "learning_rate": 9.964746919831969e-06, "loss": 0.7168, "step": 2176 }, { "epoch": 0.06672183400760083, "grad_norm": 2.2507434940689306, "learning_rate": 9.964688062297173e-06, "loss": 0.7938, "step": 2177 }, { "epoch": 0.06675248253034204, "grad_norm": 2.100984483480584, "learning_rate": 9.964629155844034e-06, "loss": 0.7947, "step": 2178 }, { "epoch": 0.06678313105308324, "grad_norm": 1.902341322755823, "learning_rate": 9.964570200473136e-06, "loss": 0.8098, "step": 2179 }, { "epoch": 0.06681377957582445, "grad_norm": 1.7980057264256, "learning_rate": 9.964511196185058e-06, "loss": 0.6298, "step": 2180 }, { "epoch": 0.06684442809856565, "grad_norm": 1.1829040753543272, "learning_rate": 9.964452142980379e-06, "loss": 0.546, "step": 2181 }, { "epoch": 0.06687507662130686, "grad_norm": 2.032482654360311, "learning_rate": 9.964393040859683e-06, "loss": 0.8095, "step": 2182 }, { "epoch": 0.06690572514404805, "grad_norm": 1.9451687226408776, "learning_rate": 9.964333889823555e-06, "loss": 0.687, "step": 2183 }, { "epoch": 0.06693637366678926, "grad_norm": 2.0208977104092076, "learning_rate": 9.964274689872571e-06, "loss": 0.721, "step": 2184 }, { "epoch": 0.06696702218953046, "grad_norm": 1.9188952970600646, "learning_rate": 9.96421544100732e-06, "loss": 0.7435, "step": 2185 }, { "epoch": 0.06699767071227167, "grad_norm": 2.0820174325374095, "learning_rate": 9.964156143228386e-06, "loss": 0.7623, "step": 2186 }, { "epoch": 0.06702831923501287, "grad_norm": 1.9171615323939455, "learning_rate": 9.964096796536349e-06, "loss": 0.8264, "step": 2187 }, { "epoch": 0.06705896775775408, "grad_norm": 2.0596004568367228, "learning_rate": 9.964037400931798e-06, "loss": 0.8959, "step": 2188 }, { "epoch": 0.06708961628049528, "grad_norm": 2.2531996917131476, "learning_rate": 9.963977956415315e-06, "loss": 0.8154, "step": 2189 }, { "epoch": 0.06712026480323649, "grad_norm": 1.9187939971203034, "learning_rate": 9.963918462987488e-06, "loss": 0.7388, "step": 2190 }, { "epoch": 0.06715091332597768, "grad_norm": 1.726025261124731, "learning_rate": 9.9638589206489e-06, "loss": 0.6565, "step": 2191 }, { "epoch": 0.06718156184871889, "grad_norm": 2.3690641046137015, "learning_rate": 9.963799329400142e-06, "loss": 0.6963, "step": 2192 }, { "epoch": 0.0672122103714601, "grad_norm": 1.8444013738588496, "learning_rate": 9.9637396892418e-06, "loss": 0.7816, "step": 2193 }, { "epoch": 0.0672428588942013, "grad_norm": 2.110519170680331, "learning_rate": 9.963680000174458e-06, "loss": 0.7446, "step": 2194 }, { "epoch": 0.0672735074169425, "grad_norm": 1.9842619970088375, "learning_rate": 9.96362026219871e-06, "loss": 0.727, "step": 2195 }, { "epoch": 0.06730415593968371, "grad_norm": 2.1153026627747153, "learning_rate": 9.96356047531514e-06, "loss": 0.7701, "step": 2196 }, { "epoch": 0.06733480446242492, "grad_norm": 1.9539012440373302, "learning_rate": 9.96350063952434e-06, "loss": 0.7595, "step": 2197 }, { "epoch": 0.06736545298516612, "grad_norm": 2.013744379744089, "learning_rate": 9.963440754826897e-06, "loss": 0.7252, "step": 2198 }, { "epoch": 0.06739610150790731, "grad_norm": 2.330238198039784, "learning_rate": 9.9633808212234e-06, "loss": 0.8312, "step": 2199 }, { "epoch": 0.06742675003064852, "grad_norm": 1.7300227536515687, "learning_rate": 9.963320838714445e-06, "loss": 0.5436, "step": 2200 }, { "epoch": 0.06745739855338972, "grad_norm": 1.9353692053153488, "learning_rate": 9.96326080730062e-06, "loss": 0.8492, "step": 2201 }, { "epoch": 0.06748804707613093, "grad_norm": 1.927794536193327, "learning_rate": 9.963200726982515e-06, "loss": 0.8063, "step": 2202 }, { "epoch": 0.06751869559887214, "grad_norm": 0.9874130908523474, "learning_rate": 9.963140597760723e-06, "loss": 0.5279, "step": 2203 }, { "epoch": 0.06754934412161334, "grad_norm": 2.4205650565527717, "learning_rate": 9.963080419635838e-06, "loss": 0.7716, "step": 2204 }, { "epoch": 0.06757999264435455, "grad_norm": 2.0072256647599485, "learning_rate": 9.963020192608452e-06, "loss": 0.776, "step": 2205 }, { "epoch": 0.06761064116709574, "grad_norm": 2.226562111877214, "learning_rate": 9.962959916679158e-06, "loss": 0.8651, "step": 2206 }, { "epoch": 0.06764128968983694, "grad_norm": 1.9983765105705928, "learning_rate": 9.962899591848549e-06, "loss": 0.7769, "step": 2207 }, { "epoch": 0.06767193821257815, "grad_norm": 2.5650750075665, "learning_rate": 9.962839218117222e-06, "loss": 0.8, "step": 2208 }, { "epoch": 0.06770258673531936, "grad_norm": 2.2523718352211044, "learning_rate": 9.962778795485768e-06, "loss": 0.72, "step": 2209 }, { "epoch": 0.06773323525806056, "grad_norm": 2.1319206521492164, "learning_rate": 9.962718323954787e-06, "loss": 0.6884, "step": 2210 }, { "epoch": 0.06776388378080177, "grad_norm": 1.5393376285490057, "learning_rate": 9.96265780352487e-06, "loss": 0.529, "step": 2211 }, { "epoch": 0.06779453230354297, "grad_norm": 2.144240920787987, "learning_rate": 9.962597234196621e-06, "loss": 0.7214, "step": 2212 }, { "epoch": 0.06782518082628418, "grad_norm": 1.0653868037529317, "learning_rate": 9.962536615970626e-06, "loss": 0.5173, "step": 2213 }, { "epoch": 0.06785582934902537, "grad_norm": 2.529703575529376, "learning_rate": 9.962475948847492e-06, "loss": 0.835, "step": 2214 }, { "epoch": 0.06788647787176658, "grad_norm": 2.530920872253848, "learning_rate": 9.962415232827811e-06, "loss": 0.7281, "step": 2215 }, { "epoch": 0.06791712639450778, "grad_norm": 2.2357292994161972, "learning_rate": 9.962354467912183e-06, "loss": 0.6943, "step": 2216 }, { "epoch": 0.06794777491724899, "grad_norm": 2.240005488155496, "learning_rate": 9.962293654101207e-06, "loss": 0.8399, "step": 2217 }, { "epoch": 0.0679784234399902, "grad_norm": 2.111450627861137, "learning_rate": 9.962232791395483e-06, "loss": 0.8586, "step": 2218 }, { "epoch": 0.0680090719627314, "grad_norm": 2.539758313959378, "learning_rate": 9.962171879795607e-06, "loss": 0.7742, "step": 2219 }, { "epoch": 0.0680397204854726, "grad_norm": 1.7455274455994347, "learning_rate": 9.962110919302184e-06, "loss": 0.5173, "step": 2220 }, { "epoch": 0.06807036900821381, "grad_norm": 2.1825217971222743, "learning_rate": 9.962049909915812e-06, "loss": 0.8258, "step": 2221 }, { "epoch": 0.068101017530955, "grad_norm": 2.3728366985893397, "learning_rate": 9.961988851637094e-06, "loss": 0.7138, "step": 2222 }, { "epoch": 0.06813166605369621, "grad_norm": 2.2525720355465637, "learning_rate": 9.961927744466628e-06, "loss": 0.8432, "step": 2223 }, { "epoch": 0.06816231457643741, "grad_norm": 2.2614464379828823, "learning_rate": 9.96186658840502e-06, "loss": 0.8263, "step": 2224 }, { "epoch": 0.06819296309917862, "grad_norm": 1.9095359935812997, "learning_rate": 9.96180538345287e-06, "loss": 0.7632, "step": 2225 }, { "epoch": 0.06822361162191982, "grad_norm": 2.1415237124381172, "learning_rate": 9.961744129610781e-06, "loss": 0.7972, "step": 2226 }, { "epoch": 0.06825426014466103, "grad_norm": 1.1390680027420395, "learning_rate": 9.961682826879359e-06, "loss": 0.5388, "step": 2227 }, { "epoch": 0.06828490866740224, "grad_norm": 2.2793037231933493, "learning_rate": 9.961621475259208e-06, "loss": 0.733, "step": 2228 }, { "epoch": 0.06831555719014344, "grad_norm": 2.236780976885697, "learning_rate": 9.961560074750929e-06, "loss": 0.7858, "step": 2229 }, { "epoch": 0.06834620571288463, "grad_norm": 2.127100387764417, "learning_rate": 9.96149862535513e-06, "loss": 0.7114, "step": 2230 }, { "epoch": 0.06837685423562584, "grad_norm": 2.235587093406675, "learning_rate": 9.961437127072415e-06, "loss": 0.7605, "step": 2231 }, { "epoch": 0.06840750275836704, "grad_norm": 2.2999758163737702, "learning_rate": 9.961375579903392e-06, "loss": 0.814, "step": 2232 }, { "epoch": 0.06843815128110825, "grad_norm": 2.2874812946920793, "learning_rate": 9.961313983848665e-06, "loss": 0.7675, "step": 2233 }, { "epoch": 0.06846879980384946, "grad_norm": 1.9842297260534474, "learning_rate": 9.96125233890884e-06, "loss": 0.7789, "step": 2234 }, { "epoch": 0.06849944832659066, "grad_norm": 2.200797772771613, "learning_rate": 9.961190645084529e-06, "loss": 0.7679, "step": 2235 }, { "epoch": 0.06853009684933187, "grad_norm": 2.0686038551102444, "learning_rate": 9.961128902376335e-06, "loss": 0.7344, "step": 2236 }, { "epoch": 0.06856074537207306, "grad_norm": 3.9381461279701524, "learning_rate": 9.96106711078487e-06, "loss": 0.8906, "step": 2237 }, { "epoch": 0.06859139389481426, "grad_norm": 1.7575409156762989, "learning_rate": 9.961005270310742e-06, "loss": 0.6814, "step": 2238 }, { "epoch": 0.06862204241755547, "grad_norm": 2.2981082103273804, "learning_rate": 9.96094338095456e-06, "loss": 0.7402, "step": 2239 }, { "epoch": 0.06865269094029668, "grad_norm": 2.109675772869491, "learning_rate": 9.960881442716931e-06, "loss": 0.8654, "step": 2240 }, { "epoch": 0.06868333946303788, "grad_norm": 2.089347543776716, "learning_rate": 9.96081945559847e-06, "loss": 0.7832, "step": 2241 }, { "epoch": 0.06871398798577909, "grad_norm": 2.122791794612436, "learning_rate": 9.960757419599785e-06, "loss": 0.8012, "step": 2242 }, { "epoch": 0.0687446365085203, "grad_norm": 2.3408780391481656, "learning_rate": 9.960695334721489e-06, "loss": 0.7147, "step": 2243 }, { "epoch": 0.0687752850312615, "grad_norm": 2.1414144518233096, "learning_rate": 9.960633200964192e-06, "loss": 0.7923, "step": 2244 }, { "epoch": 0.06880593355400269, "grad_norm": 2.025301655839275, "learning_rate": 9.960571018328505e-06, "loss": 0.803, "step": 2245 }, { "epoch": 0.0688365820767439, "grad_norm": 2.012102201741091, "learning_rate": 9.960508786815045e-06, "loss": 0.7159, "step": 2246 }, { "epoch": 0.0688672305994851, "grad_norm": 1.937147346864909, "learning_rate": 9.96044650642442e-06, "loss": 0.8162, "step": 2247 }, { "epoch": 0.06889787912222631, "grad_norm": 1.9953158464492042, "learning_rate": 9.96038417715725e-06, "loss": 0.747, "step": 2248 }, { "epoch": 0.06892852764496751, "grad_norm": 2.12837025166506, "learning_rate": 9.960321799014142e-06, "loss": 0.7859, "step": 2249 }, { "epoch": 0.06895917616770872, "grad_norm": 2.159668545485188, "learning_rate": 9.960259371995715e-06, "loss": 0.7257, "step": 2250 }, { "epoch": 0.06898982469044992, "grad_norm": 2.4127701179407035, "learning_rate": 9.960196896102585e-06, "loss": 0.7779, "step": 2251 }, { "epoch": 0.06902047321319113, "grad_norm": 1.410171261029431, "learning_rate": 9.960134371335364e-06, "loss": 0.5516, "step": 2252 }, { "epoch": 0.06905112173593232, "grad_norm": 2.301345209981521, "learning_rate": 9.960071797694671e-06, "loss": 0.7542, "step": 2253 }, { "epoch": 0.06908177025867353, "grad_norm": 2.279861608136347, "learning_rate": 9.960009175181122e-06, "loss": 0.8157, "step": 2254 }, { "epoch": 0.06911241878141473, "grad_norm": 1.9899700246862795, "learning_rate": 9.959946503795333e-06, "loss": 0.6722, "step": 2255 }, { "epoch": 0.06914306730415594, "grad_norm": 2.1432719250662817, "learning_rate": 9.959883783537922e-06, "loss": 0.7443, "step": 2256 }, { "epoch": 0.06917371582689714, "grad_norm": 1.220000324072857, "learning_rate": 9.959821014409506e-06, "loss": 0.5315, "step": 2257 }, { "epoch": 0.06920436434963835, "grad_norm": 2.136524355626567, "learning_rate": 9.959758196410705e-06, "loss": 0.7464, "step": 2258 }, { "epoch": 0.06923501287237956, "grad_norm": 1.926801000050369, "learning_rate": 9.959695329542138e-06, "loss": 0.8048, "step": 2259 }, { "epoch": 0.06926566139512076, "grad_norm": 2.2339970240509603, "learning_rate": 9.959632413804424e-06, "loss": 0.8672, "step": 2260 }, { "epoch": 0.06929630991786195, "grad_norm": 2.30043659206228, "learning_rate": 9.959569449198183e-06, "loss": 0.8633, "step": 2261 }, { "epoch": 0.06932695844060316, "grad_norm": 2.0891099381468217, "learning_rate": 9.959506435724036e-06, "loss": 0.7645, "step": 2262 }, { "epoch": 0.06935760696334436, "grad_norm": 2.1507611072298767, "learning_rate": 9.959443373382602e-06, "loss": 0.7655, "step": 2263 }, { "epoch": 0.06938825548608557, "grad_norm": 2.1889686325531983, "learning_rate": 9.959380262174502e-06, "loss": 0.808, "step": 2264 }, { "epoch": 0.06941890400882678, "grad_norm": 2.129697684809936, "learning_rate": 9.959317102100362e-06, "loss": 0.7479, "step": 2265 }, { "epoch": 0.06944955253156798, "grad_norm": 2.3585225259210847, "learning_rate": 9.9592538931608e-06, "loss": 0.7833, "step": 2266 }, { "epoch": 0.06948020105430919, "grad_norm": 2.0061671112099395, "learning_rate": 9.959190635356441e-06, "loss": 0.8749, "step": 2267 }, { "epoch": 0.06951084957705038, "grad_norm": 1.971412482041802, "learning_rate": 9.959127328687908e-06, "loss": 0.7458, "step": 2268 }, { "epoch": 0.06954149809979158, "grad_norm": 2.315981718256524, "learning_rate": 9.959063973155824e-06, "loss": 0.8122, "step": 2269 }, { "epoch": 0.06957214662253279, "grad_norm": 2.0945932986398614, "learning_rate": 9.959000568760815e-06, "loss": 0.7472, "step": 2270 }, { "epoch": 0.069602795145274, "grad_norm": 2.016658760924455, "learning_rate": 9.958937115503505e-06, "loss": 0.8201, "step": 2271 }, { "epoch": 0.0696334436680152, "grad_norm": 1.1349457608207056, "learning_rate": 9.958873613384516e-06, "loss": 0.5316, "step": 2272 }, { "epoch": 0.06966409219075641, "grad_norm": 1.9411666908337206, "learning_rate": 9.958810062404479e-06, "loss": 0.7576, "step": 2273 }, { "epoch": 0.06969474071349761, "grad_norm": 1.7566906057567735, "learning_rate": 9.958746462564017e-06, "loss": 0.7375, "step": 2274 }, { "epoch": 0.06972538923623882, "grad_norm": 1.980634315404257, "learning_rate": 9.958682813863758e-06, "loss": 0.7497, "step": 2275 }, { "epoch": 0.06975603775898001, "grad_norm": 2.0957189401415954, "learning_rate": 9.958619116304327e-06, "loss": 0.8038, "step": 2276 }, { "epoch": 0.06978668628172122, "grad_norm": 3.6028781662981313, "learning_rate": 9.958555369886354e-06, "loss": 0.7109, "step": 2277 }, { "epoch": 0.06981733480446242, "grad_norm": 2.3183718327495155, "learning_rate": 9.958491574610467e-06, "loss": 0.8936, "step": 2278 }, { "epoch": 0.06984798332720363, "grad_norm": 3.558992980172275, "learning_rate": 9.958427730477292e-06, "loss": 0.704, "step": 2279 }, { "epoch": 0.06987863184994483, "grad_norm": 1.773075922891207, "learning_rate": 9.958363837487462e-06, "loss": 0.73, "step": 2280 }, { "epoch": 0.06990928037268604, "grad_norm": 1.1251303050554349, "learning_rate": 9.958299895641603e-06, "loss": 0.5277, "step": 2281 }, { "epoch": 0.06993992889542724, "grad_norm": 2.0084229602634296, "learning_rate": 9.958235904940346e-06, "loss": 0.8704, "step": 2282 }, { "epoch": 0.06997057741816845, "grad_norm": 0.9305158334400182, "learning_rate": 9.958171865384322e-06, "loss": 0.53, "step": 2283 }, { "epoch": 0.07000122594090964, "grad_norm": 2.116327639015216, "learning_rate": 9.958107776974164e-06, "loss": 0.8644, "step": 2284 }, { "epoch": 0.07003187446365085, "grad_norm": 1.0993500859313505, "learning_rate": 9.958043639710501e-06, "loss": 0.5251, "step": 2285 }, { "epoch": 0.07006252298639205, "grad_norm": 1.9445683281201769, "learning_rate": 9.957979453593964e-06, "loss": 0.6999, "step": 2286 }, { "epoch": 0.07009317150913326, "grad_norm": 2.2734606426813126, "learning_rate": 9.957915218625188e-06, "loss": 0.9442, "step": 2287 }, { "epoch": 0.07012382003187446, "grad_norm": 2.0900671089800915, "learning_rate": 9.957850934804805e-06, "loss": 0.7945, "step": 2288 }, { "epoch": 0.07015446855461567, "grad_norm": 2.0836648811688208, "learning_rate": 9.957786602133448e-06, "loss": 0.6713, "step": 2289 }, { "epoch": 0.07018511707735688, "grad_norm": 1.88867304493332, "learning_rate": 9.95772222061175e-06, "loss": 0.7712, "step": 2290 }, { "epoch": 0.07021576560009808, "grad_norm": 1.2502033956281147, "learning_rate": 9.957657790240347e-06, "loss": 0.5339, "step": 2291 }, { "epoch": 0.07024641412283927, "grad_norm": 2.1242277129954887, "learning_rate": 9.957593311019875e-06, "loss": 0.7955, "step": 2292 }, { "epoch": 0.07027706264558048, "grad_norm": 1.8733258140929518, "learning_rate": 9.957528782950965e-06, "loss": 0.6389, "step": 2293 }, { "epoch": 0.07030771116832168, "grad_norm": 2.2053643787839077, "learning_rate": 9.957464206034258e-06, "loss": 0.8507, "step": 2294 }, { "epoch": 0.07033835969106289, "grad_norm": 1.9756747402991037, "learning_rate": 9.957399580270386e-06, "loss": 0.7365, "step": 2295 }, { "epoch": 0.0703690082138041, "grad_norm": 2.105506429757199, "learning_rate": 9.957334905659987e-06, "loss": 0.8492, "step": 2296 }, { "epoch": 0.0703996567365453, "grad_norm": 2.161648797505632, "learning_rate": 9.9572701822037e-06, "loss": 0.7824, "step": 2297 }, { "epoch": 0.07043030525928651, "grad_norm": 2.1997741460810136, "learning_rate": 9.957205409902163e-06, "loss": 0.8099, "step": 2298 }, { "epoch": 0.07046095378202771, "grad_norm": 1.9041341739660858, "learning_rate": 9.95714058875601e-06, "loss": 0.7398, "step": 2299 }, { "epoch": 0.0704916023047689, "grad_norm": 1.1579663901710928, "learning_rate": 9.957075718765882e-06, "loss": 0.5374, "step": 2300 }, { "epoch": 0.07052225082751011, "grad_norm": 2.122681627512686, "learning_rate": 9.95701079993242e-06, "loss": 0.8533, "step": 2301 }, { "epoch": 0.07055289935025132, "grad_norm": 2.247602678547781, "learning_rate": 9.956945832256264e-06, "loss": 0.7632, "step": 2302 }, { "epoch": 0.07058354787299252, "grad_norm": 1.0040932122104247, "learning_rate": 9.956880815738051e-06, "loss": 0.5255, "step": 2303 }, { "epoch": 0.07061419639573373, "grad_norm": 2.0486265132336934, "learning_rate": 9.956815750378423e-06, "loss": 0.7718, "step": 2304 }, { "epoch": 0.07064484491847493, "grad_norm": 2.212818869553762, "learning_rate": 9.956750636178021e-06, "loss": 0.8079, "step": 2305 }, { "epoch": 0.07067549344121614, "grad_norm": 2.2056746599809545, "learning_rate": 9.956685473137486e-06, "loss": 0.8227, "step": 2306 }, { "epoch": 0.07070614196395733, "grad_norm": 2.184958372037437, "learning_rate": 9.956620261257462e-06, "loss": 0.8093, "step": 2307 }, { "epoch": 0.07073679048669854, "grad_norm": 1.0251598790100613, "learning_rate": 9.95655500053859e-06, "loss": 0.5329, "step": 2308 }, { "epoch": 0.07076743900943974, "grad_norm": 2.0762823403000987, "learning_rate": 9.956489690981513e-06, "loss": 0.8089, "step": 2309 }, { "epoch": 0.07079808753218095, "grad_norm": 2.0237324744245844, "learning_rate": 9.956424332586876e-06, "loss": 0.8065, "step": 2310 }, { "epoch": 0.07082873605492215, "grad_norm": 2.0174862889472, "learning_rate": 9.956358925355321e-06, "loss": 0.7073, "step": 2311 }, { "epoch": 0.07085938457766336, "grad_norm": 2.1165186825939415, "learning_rate": 9.956293469287494e-06, "loss": 0.8557, "step": 2312 }, { "epoch": 0.07089003310040456, "grad_norm": 0.9239901399183491, "learning_rate": 9.956227964384038e-06, "loss": 0.5212, "step": 2313 }, { "epoch": 0.07092068162314577, "grad_norm": 0.9844516899825204, "learning_rate": 9.9561624106456e-06, "loss": 0.5372, "step": 2314 }, { "epoch": 0.07095133014588696, "grad_norm": 2.43111042007713, "learning_rate": 9.956096808072827e-06, "loss": 0.7436, "step": 2315 }, { "epoch": 0.07098197866862817, "grad_norm": 2.8007240673552434, "learning_rate": 9.956031156666364e-06, "loss": 0.7616, "step": 2316 }, { "epoch": 0.07101262719136937, "grad_norm": 0.8621651657339893, "learning_rate": 9.955965456426856e-06, "loss": 0.5208, "step": 2317 }, { "epoch": 0.07104327571411058, "grad_norm": 2.0359959395934695, "learning_rate": 9.955899707354954e-06, "loss": 0.9252, "step": 2318 }, { "epoch": 0.07107392423685178, "grad_norm": 2.323913280957989, "learning_rate": 9.955833909451304e-06, "loss": 0.9017, "step": 2319 }, { "epoch": 0.07110457275959299, "grad_norm": 2.081039609552139, "learning_rate": 9.955768062716553e-06, "loss": 0.8027, "step": 2320 }, { "epoch": 0.0711352212823342, "grad_norm": 2.198253971449805, "learning_rate": 9.955702167151355e-06, "loss": 0.7978, "step": 2321 }, { "epoch": 0.0711658698050754, "grad_norm": 2.292421528859222, "learning_rate": 9.955636222756353e-06, "loss": 0.8421, "step": 2322 }, { "epoch": 0.0711965183278166, "grad_norm": 1.9150821868603327, "learning_rate": 9.955570229532198e-06, "loss": 0.7252, "step": 2323 }, { "epoch": 0.0712271668505578, "grad_norm": 1.9117849292917966, "learning_rate": 9.955504187479542e-06, "loss": 0.7022, "step": 2324 }, { "epoch": 0.071257815373299, "grad_norm": 2.0564686996029535, "learning_rate": 9.955438096599038e-06, "loss": 0.7961, "step": 2325 }, { "epoch": 0.07128846389604021, "grad_norm": 2.1537348271735004, "learning_rate": 9.955371956891334e-06, "loss": 0.7216, "step": 2326 }, { "epoch": 0.07131911241878142, "grad_norm": 2.2418362883656844, "learning_rate": 9.95530576835708e-06, "loss": 0.818, "step": 2327 }, { "epoch": 0.07134976094152262, "grad_norm": 2.2426250538090624, "learning_rate": 9.955239530996932e-06, "loss": 0.7842, "step": 2328 }, { "epoch": 0.07138040946426383, "grad_norm": 2.043711310497986, "learning_rate": 9.95517324481154e-06, "loss": 0.8246, "step": 2329 }, { "epoch": 0.07141105798700503, "grad_norm": 1.8715842089675616, "learning_rate": 9.95510690980156e-06, "loss": 0.6835, "step": 2330 }, { "epoch": 0.07144170650974623, "grad_norm": 1.2530717107207856, "learning_rate": 9.955040525967643e-06, "loss": 0.5113, "step": 2331 }, { "epoch": 0.07147235503248743, "grad_norm": 1.997130846871725, "learning_rate": 9.954974093310443e-06, "loss": 0.7639, "step": 2332 }, { "epoch": 0.07150300355522864, "grad_norm": 2.1259047516889007, "learning_rate": 9.954907611830615e-06, "loss": 0.8995, "step": 2333 }, { "epoch": 0.07153365207796984, "grad_norm": 2.256633582298184, "learning_rate": 9.954841081528817e-06, "loss": 0.8027, "step": 2334 }, { "epoch": 0.07156430060071105, "grad_norm": 2.143700428365341, "learning_rate": 9.954774502405699e-06, "loss": 0.8998, "step": 2335 }, { "epoch": 0.07159494912345225, "grad_norm": 2.2708748976899424, "learning_rate": 9.954707874461921e-06, "loss": 0.7861, "step": 2336 }, { "epoch": 0.07162559764619346, "grad_norm": 2.107543554087462, "learning_rate": 9.95464119769814e-06, "loss": 0.7424, "step": 2337 }, { "epoch": 0.07165624616893465, "grad_norm": 2.0202001603035664, "learning_rate": 9.954574472115011e-06, "loss": 0.7813, "step": 2338 }, { "epoch": 0.07168689469167586, "grad_norm": 1.994890850270316, "learning_rate": 9.954507697713192e-06, "loss": 0.7989, "step": 2339 }, { "epoch": 0.07171754321441706, "grad_norm": 1.9942301190882383, "learning_rate": 9.95444087449334e-06, "loss": 0.7688, "step": 2340 }, { "epoch": 0.07174819173715827, "grad_norm": 1.840454829414237, "learning_rate": 9.954374002456116e-06, "loss": 0.7598, "step": 2341 }, { "epoch": 0.07177884025989947, "grad_norm": 2.0859220888487635, "learning_rate": 9.954307081602176e-06, "loss": 0.711, "step": 2342 }, { "epoch": 0.07180948878264068, "grad_norm": 2.234223985251637, "learning_rate": 9.954240111932182e-06, "loss": 0.8012, "step": 2343 }, { "epoch": 0.07184013730538188, "grad_norm": 2.163057883919565, "learning_rate": 9.954173093446792e-06, "loss": 0.8664, "step": 2344 }, { "epoch": 0.07187078582812309, "grad_norm": 1.9610925495235778, "learning_rate": 9.954106026146667e-06, "loss": 0.6918, "step": 2345 }, { "epoch": 0.07190143435086428, "grad_norm": 2.0506703576129537, "learning_rate": 9.954038910032468e-06, "loss": 0.7797, "step": 2346 }, { "epoch": 0.07193208287360549, "grad_norm": 2.3287840826210537, "learning_rate": 9.953971745104855e-06, "loss": 0.816, "step": 2347 }, { "epoch": 0.0719627313963467, "grad_norm": 2.236976134186717, "learning_rate": 9.95390453136449e-06, "loss": 0.8008, "step": 2348 }, { "epoch": 0.0719933799190879, "grad_norm": 2.074841759483753, "learning_rate": 9.953837268812039e-06, "loss": 0.642, "step": 2349 }, { "epoch": 0.0720240284418291, "grad_norm": 1.9174105289703707, "learning_rate": 9.95376995744816e-06, "loss": 0.7384, "step": 2350 }, { "epoch": 0.07205467696457031, "grad_norm": 1.9840227093516738, "learning_rate": 9.95370259727352e-06, "loss": 0.8143, "step": 2351 }, { "epoch": 0.07208532548731152, "grad_norm": 2.1195520179640033, "learning_rate": 9.95363518828878e-06, "loss": 0.7685, "step": 2352 }, { "epoch": 0.07211597401005272, "grad_norm": 1.3243825497937183, "learning_rate": 9.953567730494604e-06, "loss": 0.5333, "step": 2353 }, { "epoch": 0.07214662253279391, "grad_norm": 1.802680791522805, "learning_rate": 9.953500223891657e-06, "loss": 0.6822, "step": 2354 }, { "epoch": 0.07217727105553512, "grad_norm": 0.9865670520705674, "learning_rate": 9.953432668480607e-06, "loss": 0.508, "step": 2355 }, { "epoch": 0.07220791957827633, "grad_norm": 2.0364961056053508, "learning_rate": 9.953365064262117e-06, "loss": 0.8026, "step": 2356 }, { "epoch": 0.07223856810101753, "grad_norm": 2.304593169297076, "learning_rate": 9.953297411236853e-06, "loss": 0.7622, "step": 2357 }, { "epoch": 0.07226921662375874, "grad_norm": 2.113923099326523, "learning_rate": 9.953229709405483e-06, "loss": 0.7865, "step": 2358 }, { "epoch": 0.07229986514649994, "grad_norm": 1.9040093398457312, "learning_rate": 9.953161958768673e-06, "loss": 0.6946, "step": 2359 }, { "epoch": 0.07233051366924115, "grad_norm": 1.5844849276883763, "learning_rate": 9.95309415932709e-06, "loss": 0.5493, "step": 2360 }, { "epoch": 0.07236116219198235, "grad_norm": 1.9246178676523136, "learning_rate": 9.953026311081404e-06, "loss": 0.746, "step": 2361 }, { "epoch": 0.07239181071472355, "grad_norm": 2.1129987734135804, "learning_rate": 9.952958414032283e-06, "loss": 0.7985, "step": 2362 }, { "epoch": 0.07242245923746475, "grad_norm": 1.828587294656579, "learning_rate": 9.952890468180396e-06, "loss": 0.7873, "step": 2363 }, { "epoch": 0.07245310776020596, "grad_norm": 2.122151298885558, "learning_rate": 9.95282247352641e-06, "loss": 0.8465, "step": 2364 }, { "epoch": 0.07248375628294716, "grad_norm": 1.8422994071413745, "learning_rate": 9.952754430070997e-06, "loss": 0.6145, "step": 2365 }, { "epoch": 0.07251440480568837, "grad_norm": 1.816051859453394, "learning_rate": 9.952686337814827e-06, "loss": 0.8394, "step": 2366 }, { "epoch": 0.07254505332842957, "grad_norm": 1.9276603122909988, "learning_rate": 9.952618196758574e-06, "loss": 0.7234, "step": 2367 }, { "epoch": 0.07257570185117078, "grad_norm": 2.2896530056026108, "learning_rate": 9.952550006902905e-06, "loss": 0.6613, "step": 2368 }, { "epoch": 0.07260635037391197, "grad_norm": 2.085789402417831, "learning_rate": 9.952481768248495e-06, "loss": 0.8405, "step": 2369 }, { "epoch": 0.07263699889665318, "grad_norm": 1.851515271266915, "learning_rate": 9.952413480796013e-06, "loss": 0.7459, "step": 2370 }, { "epoch": 0.07266764741939438, "grad_norm": 2.0629016880702338, "learning_rate": 9.952345144546135e-06, "loss": 0.7892, "step": 2371 }, { "epoch": 0.07269829594213559, "grad_norm": 2.3085819001223102, "learning_rate": 9.952276759499531e-06, "loss": 0.8055, "step": 2372 }, { "epoch": 0.0727289444648768, "grad_norm": 2.15327267602673, "learning_rate": 9.95220832565688e-06, "loss": 0.7889, "step": 2373 }, { "epoch": 0.072759592987618, "grad_norm": 1.1580089364260648, "learning_rate": 9.95213984301885e-06, "loss": 0.5266, "step": 2374 }, { "epoch": 0.0727902415103592, "grad_norm": 1.0968870104213087, "learning_rate": 9.952071311586123e-06, "loss": 0.5288, "step": 2375 }, { "epoch": 0.07282089003310041, "grad_norm": 2.0530390886668335, "learning_rate": 9.952002731359368e-06, "loss": 0.7287, "step": 2376 }, { "epoch": 0.0728515385558416, "grad_norm": 2.0433877804860083, "learning_rate": 9.951934102339263e-06, "loss": 0.7396, "step": 2377 }, { "epoch": 0.07288218707858281, "grad_norm": 2.6663417137799206, "learning_rate": 9.951865424526486e-06, "loss": 0.8207, "step": 2378 }, { "epoch": 0.07291283560132401, "grad_norm": 2.271280053445808, "learning_rate": 9.95179669792171e-06, "loss": 0.816, "step": 2379 }, { "epoch": 0.07294348412406522, "grad_norm": 1.107793175664813, "learning_rate": 9.951727922525615e-06, "loss": 0.536, "step": 2380 }, { "epoch": 0.07297413264680642, "grad_norm": 2.0299498079595604, "learning_rate": 9.951659098338878e-06, "loss": 0.7367, "step": 2381 }, { "epoch": 0.07300478116954763, "grad_norm": 2.1917539521624594, "learning_rate": 9.951590225362176e-06, "loss": 0.8437, "step": 2382 }, { "epoch": 0.07303542969228884, "grad_norm": 2.318702848845766, "learning_rate": 9.95152130359619e-06, "loss": 0.8504, "step": 2383 }, { "epoch": 0.07306607821503004, "grad_norm": 2.393834930581342, "learning_rate": 9.951452333041596e-06, "loss": 0.7144, "step": 2384 }, { "epoch": 0.07309672673777123, "grad_norm": 0.9865067315116456, "learning_rate": 9.951383313699077e-06, "loss": 0.5258, "step": 2385 }, { "epoch": 0.07312737526051244, "grad_norm": 2.339742463952982, "learning_rate": 9.951314245569311e-06, "loss": 0.8598, "step": 2386 }, { "epoch": 0.07315802378325365, "grad_norm": 2.0032337778192226, "learning_rate": 9.951245128652978e-06, "loss": 0.8121, "step": 2387 }, { "epoch": 0.07318867230599485, "grad_norm": 2.1919879385112226, "learning_rate": 9.95117596295076e-06, "loss": 0.7158, "step": 2388 }, { "epoch": 0.07321932082873606, "grad_norm": 2.0058179771200466, "learning_rate": 9.951106748463339e-06, "loss": 0.7654, "step": 2389 }, { "epoch": 0.07324996935147726, "grad_norm": 1.9808913238388546, "learning_rate": 9.951037485191395e-06, "loss": 0.8043, "step": 2390 }, { "epoch": 0.07328061787421847, "grad_norm": 0.9647237200640969, "learning_rate": 9.950968173135614e-06, "loss": 0.538, "step": 2391 }, { "epoch": 0.07331126639695967, "grad_norm": 1.9200481643411278, "learning_rate": 9.950898812296676e-06, "loss": 0.7328, "step": 2392 }, { "epoch": 0.07334191491970087, "grad_norm": 2.2581302388579125, "learning_rate": 9.950829402675264e-06, "loss": 0.8173, "step": 2393 }, { "epoch": 0.07337256344244207, "grad_norm": 0.8660349141010847, "learning_rate": 9.950759944272066e-06, "loss": 0.529, "step": 2394 }, { "epoch": 0.07340321196518328, "grad_norm": 1.7759415147027164, "learning_rate": 9.950690437087763e-06, "loss": 0.7549, "step": 2395 }, { "epoch": 0.07343386048792448, "grad_norm": 2.1051595180598324, "learning_rate": 9.950620881123039e-06, "loss": 0.8733, "step": 2396 }, { "epoch": 0.07346450901066569, "grad_norm": 2.0622141365735462, "learning_rate": 9.950551276378579e-06, "loss": 0.7172, "step": 2397 }, { "epoch": 0.0734951575334069, "grad_norm": 2.2010468730194876, "learning_rate": 9.950481622855073e-06, "loss": 0.8318, "step": 2398 }, { "epoch": 0.0735258060561481, "grad_norm": 2.3005499963861973, "learning_rate": 9.950411920553205e-06, "loss": 0.7938, "step": 2399 }, { "epoch": 0.07355645457888929, "grad_norm": 1.9752658379271069, "learning_rate": 9.950342169473661e-06, "loss": 0.8062, "step": 2400 }, { "epoch": 0.0735871031016305, "grad_norm": 2.1417987413544104, "learning_rate": 9.950272369617132e-06, "loss": 0.6836, "step": 2401 }, { "epoch": 0.0736177516243717, "grad_norm": 0.9625795563493352, "learning_rate": 9.9502025209843e-06, "loss": 0.5209, "step": 2402 }, { "epoch": 0.07364840014711291, "grad_norm": 1.9319837574834204, "learning_rate": 9.950132623575855e-06, "loss": 0.8127, "step": 2403 }, { "epoch": 0.07367904866985411, "grad_norm": 2.28019346269713, "learning_rate": 9.950062677392488e-06, "loss": 0.7701, "step": 2404 }, { "epoch": 0.07370969719259532, "grad_norm": 1.8434735363991641, "learning_rate": 9.949992682434887e-06, "loss": 0.7403, "step": 2405 }, { "epoch": 0.07374034571533652, "grad_norm": 0.9580831561003115, "learning_rate": 9.949922638703742e-06, "loss": 0.5125, "step": 2406 }, { "epoch": 0.07377099423807773, "grad_norm": 2.1313367676844415, "learning_rate": 9.949852546199741e-06, "loss": 0.877, "step": 2407 }, { "epoch": 0.07380164276081892, "grad_norm": 2.189804339716585, "learning_rate": 9.949782404923579e-06, "loss": 0.7919, "step": 2408 }, { "epoch": 0.07383229128356013, "grad_norm": 2.324731172151757, "learning_rate": 9.949712214875942e-06, "loss": 0.833, "step": 2409 }, { "epoch": 0.07386293980630133, "grad_norm": 2.0876640173487373, "learning_rate": 9.949641976057525e-06, "loss": 0.7685, "step": 2410 }, { "epoch": 0.07389358832904254, "grad_norm": 2.138972642832821, "learning_rate": 9.94957168846902e-06, "loss": 0.9369, "step": 2411 }, { "epoch": 0.07392423685178375, "grad_norm": 1.8524999880780226, "learning_rate": 9.949501352111118e-06, "loss": 0.8028, "step": 2412 }, { "epoch": 0.07395488537452495, "grad_norm": 1.7814942450531153, "learning_rate": 9.949430966984512e-06, "loss": 0.7489, "step": 2413 }, { "epoch": 0.07398553389726616, "grad_norm": 2.17751065747944, "learning_rate": 9.949360533089898e-06, "loss": 0.8953, "step": 2414 }, { "epoch": 0.07401618242000736, "grad_norm": 2.020054124053781, "learning_rate": 9.949290050427967e-06, "loss": 0.8045, "step": 2415 }, { "epoch": 0.07404683094274855, "grad_norm": 1.4071217303538606, "learning_rate": 9.949219518999416e-06, "loss": 0.5469, "step": 2416 }, { "epoch": 0.07407747946548976, "grad_norm": 2.1932441271972807, "learning_rate": 9.949148938804938e-06, "loss": 0.7871, "step": 2417 }, { "epoch": 0.07410812798823097, "grad_norm": 2.0369743835433725, "learning_rate": 9.949078309845229e-06, "loss": 0.8051, "step": 2418 }, { "epoch": 0.07413877651097217, "grad_norm": 1.952965174437804, "learning_rate": 9.949007632120986e-06, "loss": 0.7259, "step": 2419 }, { "epoch": 0.07416942503371338, "grad_norm": 2.124558225728616, "learning_rate": 9.948936905632905e-06, "loss": 0.7998, "step": 2420 }, { "epoch": 0.07420007355645458, "grad_norm": 2.0292873048216316, "learning_rate": 9.94886613038168e-06, "loss": 0.7561, "step": 2421 }, { "epoch": 0.07423072207919579, "grad_norm": 1.9087602798638492, "learning_rate": 9.948795306368012e-06, "loss": 0.6522, "step": 2422 }, { "epoch": 0.074261370601937, "grad_norm": 2.2154691398617343, "learning_rate": 9.948724433592599e-06, "loss": 0.7118, "step": 2423 }, { "epoch": 0.07429201912467819, "grad_norm": 1.9397081059552408, "learning_rate": 9.948653512056136e-06, "loss": 0.7612, "step": 2424 }, { "epoch": 0.07432266764741939, "grad_norm": 1.8464531859905036, "learning_rate": 9.948582541759324e-06, "loss": 0.7493, "step": 2425 }, { "epoch": 0.0743533161701606, "grad_norm": 1.5412597989509653, "learning_rate": 9.948511522702864e-06, "loss": 0.5435, "step": 2426 }, { "epoch": 0.0743839646929018, "grad_norm": 2.4048770886051107, "learning_rate": 9.94844045488745e-06, "loss": 0.9019, "step": 2427 }, { "epoch": 0.07441461321564301, "grad_norm": 1.8891503120925406, "learning_rate": 9.94836933831379e-06, "loss": 0.7482, "step": 2428 }, { "epoch": 0.07444526173838421, "grad_norm": 2.43126195267096, "learning_rate": 9.94829817298258e-06, "loss": 0.8009, "step": 2429 }, { "epoch": 0.07447591026112542, "grad_norm": 2.219406957267496, "learning_rate": 9.94822695889452e-06, "loss": 0.9824, "step": 2430 }, { "epoch": 0.07450655878386661, "grad_norm": 2.124437207032232, "learning_rate": 9.948155696050316e-06, "loss": 0.8258, "step": 2431 }, { "epoch": 0.07453720730660782, "grad_norm": 2.386570933224071, "learning_rate": 9.948084384450667e-06, "loss": 0.8945, "step": 2432 }, { "epoch": 0.07456785582934902, "grad_norm": 2.0733285369404704, "learning_rate": 9.948013024096277e-06, "loss": 0.879, "step": 2433 }, { "epoch": 0.07459850435209023, "grad_norm": 1.9827299124982838, "learning_rate": 9.947941614987848e-06, "loss": 0.8229, "step": 2434 }, { "epoch": 0.07462915287483143, "grad_norm": 2.3065877021447823, "learning_rate": 9.947870157126085e-06, "loss": 0.7648, "step": 2435 }, { "epoch": 0.07465980139757264, "grad_norm": 2.2104104461815464, "learning_rate": 9.94779865051169e-06, "loss": 0.7612, "step": 2436 }, { "epoch": 0.07469044992031385, "grad_norm": 1.2447217181226216, "learning_rate": 9.947727095145371e-06, "loss": 0.524, "step": 2437 }, { "epoch": 0.07472109844305505, "grad_norm": 1.9128371833040507, "learning_rate": 9.94765549102783e-06, "loss": 0.7745, "step": 2438 }, { "epoch": 0.07475174696579624, "grad_norm": 2.0951163971424918, "learning_rate": 9.947583838159774e-06, "loss": 0.7969, "step": 2439 }, { "epoch": 0.07478239548853745, "grad_norm": 1.8937482078979568, "learning_rate": 9.947512136541906e-06, "loss": 0.7418, "step": 2440 }, { "epoch": 0.07481304401127865, "grad_norm": 1.8025607079668318, "learning_rate": 9.947440386174938e-06, "loss": 0.7232, "step": 2441 }, { "epoch": 0.07484369253401986, "grad_norm": 2.113830910552414, "learning_rate": 9.947368587059574e-06, "loss": 0.7817, "step": 2442 }, { "epoch": 0.07487434105676107, "grad_norm": 2.597309306297494, "learning_rate": 9.94729673919652e-06, "loss": 0.9525, "step": 2443 }, { "epoch": 0.07490498957950227, "grad_norm": 1.8238973049026954, "learning_rate": 9.947224842586484e-06, "loss": 0.688, "step": 2444 }, { "epoch": 0.07493563810224348, "grad_norm": 1.9293514437442676, "learning_rate": 9.947152897230179e-06, "loss": 0.7567, "step": 2445 }, { "epoch": 0.07496628662498468, "grad_norm": 2.0268955366794863, "learning_rate": 9.94708090312831e-06, "loss": 0.7881, "step": 2446 }, { "epoch": 0.07499693514772587, "grad_norm": 2.1514440082339643, "learning_rate": 9.947008860281586e-06, "loss": 0.662, "step": 2447 }, { "epoch": 0.07502758367046708, "grad_norm": 2.0220424706853253, "learning_rate": 9.946936768690719e-06, "loss": 0.6793, "step": 2448 }, { "epoch": 0.07505823219320829, "grad_norm": 2.418173844251185, "learning_rate": 9.946864628356418e-06, "loss": 0.7132, "step": 2449 }, { "epoch": 0.07508888071594949, "grad_norm": 2.166402727625687, "learning_rate": 9.946792439279393e-06, "loss": 0.7231, "step": 2450 }, { "epoch": 0.0751195292386907, "grad_norm": 1.9391688374504192, "learning_rate": 9.946720201460358e-06, "loss": 0.7059, "step": 2451 }, { "epoch": 0.0751501777614319, "grad_norm": 1.1213477336362034, "learning_rate": 9.946647914900023e-06, "loss": 0.5088, "step": 2452 }, { "epoch": 0.07518082628417311, "grad_norm": 2.10429622072135, "learning_rate": 9.946575579599098e-06, "loss": 0.8425, "step": 2453 }, { "epoch": 0.07521147480691431, "grad_norm": 0.945466667897788, "learning_rate": 9.946503195558302e-06, "loss": 0.5077, "step": 2454 }, { "epoch": 0.0752421233296555, "grad_norm": 2.0980044741099295, "learning_rate": 9.94643076277834e-06, "loss": 0.8593, "step": 2455 }, { "epoch": 0.07527277185239671, "grad_norm": 2.0435121029779935, "learning_rate": 9.946358281259933e-06, "loss": 0.7731, "step": 2456 }, { "epoch": 0.07530342037513792, "grad_norm": 2.024204347171377, "learning_rate": 9.946285751003793e-06, "loss": 0.7812, "step": 2457 }, { "epoch": 0.07533406889787912, "grad_norm": 2.24886884277932, "learning_rate": 9.946213172010633e-06, "loss": 0.8007, "step": 2458 }, { "epoch": 0.07536471742062033, "grad_norm": 2.3714306793549365, "learning_rate": 9.946140544281168e-06, "loss": 0.778, "step": 2459 }, { "epoch": 0.07539536594336153, "grad_norm": 1.2474521211209728, "learning_rate": 9.946067867816116e-06, "loss": 0.5462, "step": 2460 }, { "epoch": 0.07542601446610274, "grad_norm": 1.9709703146936286, "learning_rate": 9.945995142616192e-06, "loss": 0.788, "step": 2461 }, { "epoch": 0.07545666298884393, "grad_norm": 0.9772789020232173, "learning_rate": 9.945922368682111e-06, "loss": 0.5082, "step": 2462 }, { "epoch": 0.07548731151158514, "grad_norm": 1.8840522159465025, "learning_rate": 9.945849546014591e-06, "loss": 0.7057, "step": 2463 }, { "epoch": 0.07551796003432634, "grad_norm": 2.0140654513140714, "learning_rate": 9.945776674614353e-06, "loss": 0.7592, "step": 2464 }, { "epoch": 0.07554860855706755, "grad_norm": 0.9705242509695273, "learning_rate": 9.94570375448211e-06, "loss": 0.509, "step": 2465 }, { "epoch": 0.07557925707980875, "grad_norm": 2.12649521164474, "learning_rate": 9.945630785618583e-06, "loss": 0.8113, "step": 2466 }, { "epoch": 0.07560990560254996, "grad_norm": 2.1882550944957817, "learning_rate": 9.94555776802449e-06, "loss": 0.8094, "step": 2467 }, { "epoch": 0.07564055412529117, "grad_norm": 1.1384629623669473, "learning_rate": 9.94548470170055e-06, "loss": 0.5453, "step": 2468 }, { "epoch": 0.07567120264803237, "grad_norm": 2.1038741464515547, "learning_rate": 9.945411586647486e-06, "loss": 0.759, "step": 2469 }, { "epoch": 0.07570185117077356, "grad_norm": 2.006687776493445, "learning_rate": 9.945338422866015e-06, "loss": 0.8351, "step": 2470 }, { "epoch": 0.07573249969351477, "grad_norm": 2.1192717447328393, "learning_rate": 9.945265210356858e-06, "loss": 0.8439, "step": 2471 }, { "epoch": 0.07576314821625597, "grad_norm": 1.906266596811565, "learning_rate": 9.94519194912074e-06, "loss": 0.7584, "step": 2472 }, { "epoch": 0.07579379673899718, "grad_norm": 2.028393544072328, "learning_rate": 9.94511863915838e-06, "loss": 0.7329, "step": 2473 }, { "epoch": 0.07582444526173839, "grad_norm": 2.1008767564621253, "learning_rate": 9.9450452804705e-06, "loss": 0.8347, "step": 2474 }, { "epoch": 0.07585509378447959, "grad_norm": 1.935780727882793, "learning_rate": 9.944971873057822e-06, "loss": 0.8131, "step": 2475 }, { "epoch": 0.0758857423072208, "grad_norm": 2.884611397534499, "learning_rate": 9.944898416921073e-06, "loss": 0.7837, "step": 2476 }, { "epoch": 0.075916390829962, "grad_norm": 1.8486855706695913, "learning_rate": 9.944824912060975e-06, "loss": 0.7736, "step": 2477 }, { "epoch": 0.0759470393527032, "grad_norm": 1.9713283835204514, "learning_rate": 9.944751358478253e-06, "loss": 0.7787, "step": 2478 }, { "epoch": 0.0759776878754444, "grad_norm": 2.1081649724111933, "learning_rate": 9.944677756173629e-06, "loss": 0.7519, "step": 2479 }, { "epoch": 0.0760083363981856, "grad_norm": 2.0099040321514923, "learning_rate": 9.944604105147828e-06, "loss": 0.7977, "step": 2480 }, { "epoch": 0.07603898492092681, "grad_norm": 2.056858176796959, "learning_rate": 9.94453040540158e-06, "loss": 0.6969, "step": 2481 }, { "epoch": 0.07606963344366802, "grad_norm": 1.3535756478739112, "learning_rate": 9.944456656935609e-06, "loss": 0.518, "step": 2482 }, { "epoch": 0.07610028196640922, "grad_norm": 1.1545413050091684, "learning_rate": 9.94438285975064e-06, "loss": 0.5247, "step": 2483 }, { "epoch": 0.07613093048915043, "grad_norm": 2.1163112358759393, "learning_rate": 9.944309013847403e-06, "loss": 0.8188, "step": 2484 }, { "epoch": 0.07616157901189163, "grad_norm": 2.0222274534343647, "learning_rate": 9.944235119226624e-06, "loss": 0.7873, "step": 2485 }, { "epoch": 0.07619222753463283, "grad_norm": 2.7286202539857465, "learning_rate": 9.944161175889031e-06, "loss": 0.7645, "step": 2486 }, { "epoch": 0.07622287605737403, "grad_norm": 2.009773837154301, "learning_rate": 9.944087183835353e-06, "loss": 0.7479, "step": 2487 }, { "epoch": 0.07625352458011524, "grad_norm": 2.3393415950390115, "learning_rate": 9.944013143066318e-06, "loss": 0.8115, "step": 2488 }, { "epoch": 0.07628417310285644, "grad_norm": 1.8259822960116994, "learning_rate": 9.943939053582659e-06, "loss": 0.5438, "step": 2489 }, { "epoch": 0.07631482162559765, "grad_norm": 1.5395345459106202, "learning_rate": 9.943864915385102e-06, "loss": 0.5408, "step": 2490 }, { "epoch": 0.07634547014833885, "grad_norm": 2.124634743919251, "learning_rate": 9.943790728474378e-06, "loss": 0.7987, "step": 2491 }, { "epoch": 0.07637611867108006, "grad_norm": 0.9868660787976484, "learning_rate": 9.943716492851221e-06, "loss": 0.545, "step": 2492 }, { "epoch": 0.07640676719382125, "grad_norm": 2.1613573422883205, "learning_rate": 9.94364220851636e-06, "loss": 0.7416, "step": 2493 }, { "epoch": 0.07643741571656246, "grad_norm": 2.1455107383246865, "learning_rate": 9.943567875470526e-06, "loss": 0.8122, "step": 2494 }, { "epoch": 0.07646806423930366, "grad_norm": 2.300314333141909, "learning_rate": 9.943493493714453e-06, "loss": 0.8407, "step": 2495 }, { "epoch": 0.07649871276204487, "grad_norm": 2.0778972946735705, "learning_rate": 9.943419063248876e-06, "loss": 0.5203, "step": 2496 }, { "epoch": 0.07652936128478607, "grad_norm": 2.2623669972857527, "learning_rate": 9.943344584074525e-06, "loss": 0.8415, "step": 2497 }, { "epoch": 0.07656000980752728, "grad_norm": 1.7786644621354666, "learning_rate": 9.943270056192133e-06, "loss": 0.7362, "step": 2498 }, { "epoch": 0.07659065833026849, "grad_norm": 1.8530739799348115, "learning_rate": 9.94319547960244e-06, "loss": 0.6911, "step": 2499 }, { "epoch": 0.07662130685300969, "grad_norm": 2.1768178599880974, "learning_rate": 9.943120854306175e-06, "loss": 0.6386, "step": 2500 }, { "epoch": 0.07665195537575088, "grad_norm": 2.023852027506842, "learning_rate": 9.943046180304075e-06, "loss": 0.7697, "step": 2501 }, { "epoch": 0.07668260389849209, "grad_norm": 1.09031393365183, "learning_rate": 9.942971457596877e-06, "loss": 0.5313, "step": 2502 }, { "epoch": 0.0767132524212333, "grad_norm": 2.066412995009834, "learning_rate": 9.942896686185317e-06, "loss": 0.73, "step": 2503 }, { "epoch": 0.0767439009439745, "grad_norm": 2.166527453036614, "learning_rate": 9.942821866070131e-06, "loss": 0.7614, "step": 2504 }, { "epoch": 0.0767745494667157, "grad_norm": 2.253396779382133, "learning_rate": 9.942746997252056e-06, "loss": 0.757, "step": 2505 }, { "epoch": 0.07680519798945691, "grad_norm": 1.0825589528730823, "learning_rate": 9.94267207973183e-06, "loss": 0.5233, "step": 2506 }, { "epoch": 0.07683584651219812, "grad_norm": 2.07321931849457, "learning_rate": 9.942597113510193e-06, "loss": 0.7137, "step": 2507 }, { "epoch": 0.07686649503493932, "grad_norm": 2.4612401485322, "learning_rate": 9.94252209858788e-06, "loss": 0.8127, "step": 2508 }, { "epoch": 0.07689714355768051, "grad_norm": 2.0940000336636615, "learning_rate": 9.942447034965634e-06, "loss": 0.7655, "step": 2509 }, { "epoch": 0.07692779208042172, "grad_norm": 2.1667714562354443, "learning_rate": 9.942371922644191e-06, "loss": 0.7105, "step": 2510 }, { "epoch": 0.07695844060316293, "grad_norm": 2.3222254763635024, "learning_rate": 9.942296761624293e-06, "loss": 0.833, "step": 2511 }, { "epoch": 0.07698908912590413, "grad_norm": 1.9075345516116033, "learning_rate": 9.94222155190668e-06, "loss": 0.6072, "step": 2512 }, { "epoch": 0.07701973764864534, "grad_norm": 2.1799482508438786, "learning_rate": 9.942146293492097e-06, "loss": 0.743, "step": 2513 }, { "epoch": 0.07705038617138654, "grad_norm": 3.2180970268347973, "learning_rate": 9.942070986381279e-06, "loss": 0.7245, "step": 2514 }, { "epoch": 0.07708103469412775, "grad_norm": 2.14825275019501, "learning_rate": 9.941995630574972e-06, "loss": 0.7057, "step": 2515 }, { "epoch": 0.07711168321686895, "grad_norm": 1.8938140784613668, "learning_rate": 9.941920226073917e-06, "loss": 0.8, "step": 2516 }, { "epoch": 0.07714233173961015, "grad_norm": 1.830360346938496, "learning_rate": 9.941844772878857e-06, "loss": 0.7474, "step": 2517 }, { "epoch": 0.07717298026235135, "grad_norm": 1.7738020077568564, "learning_rate": 9.941769270990538e-06, "loss": 0.7416, "step": 2518 }, { "epoch": 0.07720362878509256, "grad_norm": 1.8312561147436017, "learning_rate": 9.941693720409701e-06, "loss": 0.7619, "step": 2519 }, { "epoch": 0.07723427730783376, "grad_norm": 1.9844344825904607, "learning_rate": 9.941618121137091e-06, "loss": 0.826, "step": 2520 }, { "epoch": 0.07726492583057497, "grad_norm": 2.177445618905062, "learning_rate": 9.941542473173454e-06, "loss": 0.7479, "step": 2521 }, { "epoch": 0.07729557435331617, "grad_norm": 2.168174992915219, "learning_rate": 9.941466776519535e-06, "loss": 0.7833, "step": 2522 }, { "epoch": 0.07732622287605738, "grad_norm": 2.1766856548823412, "learning_rate": 9.941391031176077e-06, "loss": 0.843, "step": 2523 }, { "epoch": 0.07735687139879857, "grad_norm": 2.004747541946515, "learning_rate": 9.941315237143831e-06, "loss": 0.7594, "step": 2524 }, { "epoch": 0.07738751992153978, "grad_norm": 1.2796013893014515, "learning_rate": 9.941239394423544e-06, "loss": 0.5256, "step": 2525 }, { "epoch": 0.07741816844428098, "grad_norm": 1.9486174256625346, "learning_rate": 9.941163503015956e-06, "loss": 0.7447, "step": 2526 }, { "epoch": 0.07744881696702219, "grad_norm": 2.1457868621214784, "learning_rate": 9.941087562921823e-06, "loss": 0.7766, "step": 2527 }, { "epoch": 0.0774794654897634, "grad_norm": 2.0344530140817083, "learning_rate": 9.941011574141888e-06, "loss": 0.8272, "step": 2528 }, { "epoch": 0.0775101140125046, "grad_norm": 2.0091323461346016, "learning_rate": 9.940935536676903e-06, "loss": 0.6206, "step": 2529 }, { "epoch": 0.0775407625352458, "grad_norm": 2.251220095218001, "learning_rate": 9.940859450527615e-06, "loss": 0.7569, "step": 2530 }, { "epoch": 0.07757141105798701, "grad_norm": 1.1250010820286345, "learning_rate": 9.940783315694778e-06, "loss": 0.5174, "step": 2531 }, { "epoch": 0.0776020595807282, "grad_norm": 1.0460033311579888, "learning_rate": 9.940707132179134e-06, "loss": 0.503, "step": 2532 }, { "epoch": 0.07763270810346941, "grad_norm": 2.1939018914896797, "learning_rate": 9.940630899981442e-06, "loss": 0.6655, "step": 2533 }, { "epoch": 0.07766335662621061, "grad_norm": 2.298825303022901, "learning_rate": 9.940554619102448e-06, "loss": 0.7867, "step": 2534 }, { "epoch": 0.07769400514895182, "grad_norm": 2.1387828617046987, "learning_rate": 9.940478289542906e-06, "loss": 0.7103, "step": 2535 }, { "epoch": 0.07772465367169303, "grad_norm": 2.0147981472418106, "learning_rate": 9.940401911303568e-06, "loss": 0.7536, "step": 2536 }, { "epoch": 0.07775530219443423, "grad_norm": 2.3272818203993224, "learning_rate": 9.940325484385185e-06, "loss": 0.8691, "step": 2537 }, { "epoch": 0.07778595071717544, "grad_norm": 2.1379520455168737, "learning_rate": 9.94024900878851e-06, "loss": 0.8241, "step": 2538 }, { "epoch": 0.07781659923991664, "grad_norm": 2.1346762078971833, "learning_rate": 9.9401724845143e-06, "loss": 0.7754, "step": 2539 }, { "epoch": 0.07784724776265783, "grad_norm": 2.030065689364255, "learning_rate": 9.940095911563303e-06, "loss": 0.8266, "step": 2540 }, { "epoch": 0.07787789628539904, "grad_norm": 2.0966309439363733, "learning_rate": 9.94001928993628e-06, "loss": 0.8146, "step": 2541 }, { "epoch": 0.07790854480814025, "grad_norm": 2.1793571578072357, "learning_rate": 9.93994261963398e-06, "loss": 0.7726, "step": 2542 }, { "epoch": 0.07793919333088145, "grad_norm": 2.1478898111054954, "learning_rate": 9.939865900657164e-06, "loss": 0.747, "step": 2543 }, { "epoch": 0.07796984185362266, "grad_norm": 2.161908541933987, "learning_rate": 9.939789133006586e-06, "loss": 0.7832, "step": 2544 }, { "epoch": 0.07800049037636386, "grad_norm": 2.0360500114766977, "learning_rate": 9.939712316682998e-06, "loss": 0.7375, "step": 2545 }, { "epoch": 0.07803113889910507, "grad_norm": 2.1732304460672793, "learning_rate": 9.939635451687164e-06, "loss": 0.8383, "step": 2546 }, { "epoch": 0.07806178742184627, "grad_norm": 1.90691320399302, "learning_rate": 9.939558538019838e-06, "loss": 0.794, "step": 2547 }, { "epoch": 0.07809243594458747, "grad_norm": 2.109639243899091, "learning_rate": 9.939481575681777e-06, "loss": 0.7439, "step": 2548 }, { "epoch": 0.07812308446732867, "grad_norm": 1.745226856812606, "learning_rate": 9.93940456467374e-06, "loss": 0.7647, "step": 2549 }, { "epoch": 0.07815373299006988, "grad_norm": 2.2282965622374755, "learning_rate": 9.939327504996485e-06, "loss": 0.7292, "step": 2550 }, { "epoch": 0.07818438151281108, "grad_norm": 2.2904837586503, "learning_rate": 9.939250396650775e-06, "loss": 0.8609, "step": 2551 }, { "epoch": 0.07821503003555229, "grad_norm": 2.2155342692928466, "learning_rate": 9.939173239637365e-06, "loss": 0.7903, "step": 2552 }, { "epoch": 0.0782456785582935, "grad_norm": 2.100582229348818, "learning_rate": 9.939096033957019e-06, "loss": 0.7766, "step": 2553 }, { "epoch": 0.0782763270810347, "grad_norm": 1.888542769169912, "learning_rate": 9.939018779610494e-06, "loss": 0.7398, "step": 2554 }, { "epoch": 0.07830697560377589, "grad_norm": 2.050323055484214, "learning_rate": 9.938941476598554e-06, "loss": 0.711, "step": 2555 }, { "epoch": 0.0783376241265171, "grad_norm": 1.8626247571765295, "learning_rate": 9.938864124921961e-06, "loss": 0.707, "step": 2556 }, { "epoch": 0.0783682726492583, "grad_norm": 2.2899169897329004, "learning_rate": 9.938786724581476e-06, "loss": 0.7663, "step": 2557 }, { "epoch": 0.07839892117199951, "grad_norm": 2.3046169357327098, "learning_rate": 9.93870927557786e-06, "loss": 0.763, "step": 2558 }, { "epoch": 0.07842956969474071, "grad_norm": 2.0933369966051036, "learning_rate": 9.93863177791188e-06, "loss": 0.747, "step": 2559 }, { "epoch": 0.07846021821748192, "grad_norm": 2.31918137785533, "learning_rate": 9.938554231584296e-06, "loss": 0.8604, "step": 2560 }, { "epoch": 0.07849086674022313, "grad_norm": 2.125158031184007, "learning_rate": 9.938476636595874e-06, "loss": 0.8189, "step": 2561 }, { "epoch": 0.07852151526296433, "grad_norm": 1.9451010158633402, "learning_rate": 9.938398992947379e-06, "loss": 0.7744, "step": 2562 }, { "epoch": 0.07855216378570552, "grad_norm": 2.166808321550343, "learning_rate": 9.938321300639574e-06, "loss": 0.8271, "step": 2563 }, { "epoch": 0.07858281230844673, "grad_norm": 2.176376158591177, "learning_rate": 9.938243559673227e-06, "loss": 0.8324, "step": 2564 }, { "epoch": 0.07861346083118793, "grad_norm": 2.788663994102116, "learning_rate": 9.938165770049101e-06, "loss": 0.5617, "step": 2565 }, { "epoch": 0.07864410935392914, "grad_norm": 2.103875192234296, "learning_rate": 9.938087931767965e-06, "loss": 0.7503, "step": 2566 }, { "epoch": 0.07867475787667035, "grad_norm": 0.9765301103912973, "learning_rate": 9.938010044830585e-06, "loss": 0.5234, "step": 2567 }, { "epoch": 0.07870540639941155, "grad_norm": 2.190577472584811, "learning_rate": 9.937932109237729e-06, "loss": 0.8036, "step": 2568 }, { "epoch": 0.07873605492215276, "grad_norm": 1.4420305137674148, "learning_rate": 9.937854124990163e-06, "loss": 0.5339, "step": 2569 }, { "epoch": 0.07876670344489396, "grad_norm": 2.2003917708839458, "learning_rate": 9.937776092088659e-06, "loss": 0.7495, "step": 2570 }, { "epoch": 0.07879735196763515, "grad_norm": 1.9831153788979285, "learning_rate": 9.937698010533982e-06, "loss": 0.7157, "step": 2571 }, { "epoch": 0.07882800049037636, "grad_norm": 2.311629142726717, "learning_rate": 9.937619880326905e-06, "loss": 0.8535, "step": 2572 }, { "epoch": 0.07885864901311757, "grad_norm": 2.2787987080729986, "learning_rate": 9.937541701468194e-06, "loss": 0.8667, "step": 2573 }, { "epoch": 0.07888929753585877, "grad_norm": 1.811000671819652, "learning_rate": 9.937463473958624e-06, "loss": 0.8188, "step": 2574 }, { "epoch": 0.07891994605859998, "grad_norm": 1.3801825195064847, "learning_rate": 9.93738519779896e-06, "loss": 0.5323, "step": 2575 }, { "epoch": 0.07895059458134118, "grad_norm": 2.1794108687546494, "learning_rate": 9.937306872989977e-06, "loss": 0.7684, "step": 2576 }, { "epoch": 0.07898124310408239, "grad_norm": 2.403611169339792, "learning_rate": 9.937228499532448e-06, "loss": 0.8548, "step": 2577 }, { "epoch": 0.0790118916268236, "grad_norm": 2.2087779855122402, "learning_rate": 9.937150077427143e-06, "loss": 0.83, "step": 2578 }, { "epoch": 0.07904254014956479, "grad_norm": 2.1391945803906607, "learning_rate": 9.937071606674834e-06, "loss": 0.7663, "step": 2579 }, { "epoch": 0.07907318867230599, "grad_norm": 2.0799386228872723, "learning_rate": 9.936993087276294e-06, "loss": 0.7587, "step": 2580 }, { "epoch": 0.0791038371950472, "grad_norm": 2.037031490567396, "learning_rate": 9.9369145192323e-06, "loss": 0.7661, "step": 2581 }, { "epoch": 0.0791344857177884, "grad_norm": 2.019856237979349, "learning_rate": 9.936835902543624e-06, "loss": 0.6924, "step": 2582 }, { "epoch": 0.07916513424052961, "grad_norm": 1.0371816094717494, "learning_rate": 9.93675723721104e-06, "loss": 0.5148, "step": 2583 }, { "epoch": 0.07919578276327081, "grad_norm": 2.187577715457417, "learning_rate": 9.936678523235324e-06, "loss": 0.7335, "step": 2584 }, { "epoch": 0.07922643128601202, "grad_norm": 2.2465378543568963, "learning_rate": 9.936599760617251e-06, "loss": 0.8691, "step": 2585 }, { "epoch": 0.07925707980875321, "grad_norm": 2.2359897724365094, "learning_rate": 9.936520949357599e-06, "loss": 0.766, "step": 2586 }, { "epoch": 0.07928772833149442, "grad_norm": 0.9168083956045001, "learning_rate": 9.936442089457142e-06, "loss": 0.5273, "step": 2587 }, { "epoch": 0.07931837685423562, "grad_norm": 2.147544622626064, "learning_rate": 9.936363180916657e-06, "loss": 0.7532, "step": 2588 }, { "epoch": 0.07934902537697683, "grad_norm": 2.0416807194276556, "learning_rate": 9.936284223736924e-06, "loss": 0.7343, "step": 2589 }, { "epoch": 0.07937967389971803, "grad_norm": 2.0182264326452675, "learning_rate": 9.93620521791872e-06, "loss": 0.6877, "step": 2590 }, { "epoch": 0.07941032242245924, "grad_norm": 2.18679603598116, "learning_rate": 9.936126163462821e-06, "loss": 0.8142, "step": 2591 }, { "epoch": 0.07944097094520045, "grad_norm": 2.106315843736519, "learning_rate": 9.936047060370008e-06, "loss": 0.8552, "step": 2592 }, { "epoch": 0.07947161946794165, "grad_norm": 2.1488116859042807, "learning_rate": 9.935967908641063e-06, "loss": 0.6992, "step": 2593 }, { "epoch": 0.07950226799068284, "grad_norm": 2.114070633283222, "learning_rate": 9.935888708276761e-06, "loss": 0.7667, "step": 2594 }, { "epoch": 0.07953291651342405, "grad_norm": 1.0180557307937597, "learning_rate": 9.935809459277885e-06, "loss": 0.5268, "step": 2595 }, { "epoch": 0.07956356503616525, "grad_norm": 1.8667549270959345, "learning_rate": 9.935730161645216e-06, "loss": 0.7414, "step": 2596 }, { "epoch": 0.07959421355890646, "grad_norm": 2.0843885404405884, "learning_rate": 9.935650815379536e-06, "loss": 0.8309, "step": 2597 }, { "epoch": 0.07962486208164767, "grad_norm": 2.031838922173489, "learning_rate": 9.935571420481625e-06, "loss": 0.812, "step": 2598 }, { "epoch": 0.07965551060438887, "grad_norm": 2.124027439853411, "learning_rate": 9.935491976952265e-06, "loss": 0.8297, "step": 2599 }, { "epoch": 0.07968615912713008, "grad_norm": 1.8257753473502356, "learning_rate": 9.935412484792239e-06, "loss": 0.7276, "step": 2600 }, { "epoch": 0.07971680764987128, "grad_norm": 1.924028728170998, "learning_rate": 9.935332944002333e-06, "loss": 0.7586, "step": 2601 }, { "epoch": 0.07974745617261247, "grad_norm": 1.0461856966138636, "learning_rate": 9.935253354583327e-06, "loss": 0.5272, "step": 2602 }, { "epoch": 0.07977810469535368, "grad_norm": 1.99881612067181, "learning_rate": 9.935173716536008e-06, "loss": 0.7212, "step": 2603 }, { "epoch": 0.07980875321809489, "grad_norm": 2.2283936063898317, "learning_rate": 9.93509402986116e-06, "loss": 0.8235, "step": 2604 }, { "epoch": 0.07983940174083609, "grad_norm": 2.2239395676265103, "learning_rate": 9.935014294559567e-06, "loss": 0.8449, "step": 2605 }, { "epoch": 0.0798700502635773, "grad_norm": 2.2827843979036575, "learning_rate": 9.934934510632017e-06, "loss": 0.7598, "step": 2606 }, { "epoch": 0.0799006987863185, "grad_norm": 2.006446476930678, "learning_rate": 9.934854678079293e-06, "loss": 0.7861, "step": 2607 }, { "epoch": 0.07993134730905971, "grad_norm": 2.1928678949209544, "learning_rate": 9.934774796902182e-06, "loss": 0.7899, "step": 2608 }, { "epoch": 0.07996199583180091, "grad_norm": 1.839823536379241, "learning_rate": 9.934694867101475e-06, "loss": 0.696, "step": 2609 }, { "epoch": 0.0799926443545421, "grad_norm": 2.2542016581762403, "learning_rate": 9.934614888677955e-06, "loss": 0.7394, "step": 2610 }, { "epoch": 0.08002329287728331, "grad_norm": 1.9045915921386953, "learning_rate": 9.934534861632413e-06, "loss": 0.7996, "step": 2611 }, { "epoch": 0.08005394140002452, "grad_norm": 1.9065529046097507, "learning_rate": 9.934454785965636e-06, "loss": 0.8082, "step": 2612 }, { "epoch": 0.08008458992276572, "grad_norm": 2.2721349521013323, "learning_rate": 9.934374661678415e-06, "loss": 0.774, "step": 2613 }, { "epoch": 0.08011523844550693, "grad_norm": 1.9089382003669308, "learning_rate": 9.934294488771537e-06, "loss": 0.7629, "step": 2614 }, { "epoch": 0.08014588696824813, "grad_norm": 1.069085022848832, "learning_rate": 9.934214267245792e-06, "loss": 0.5049, "step": 2615 }, { "epoch": 0.08017653549098934, "grad_norm": 0.9669515884752163, "learning_rate": 9.934133997101972e-06, "loss": 0.5366, "step": 2616 }, { "epoch": 0.08020718401373053, "grad_norm": 2.357819841967656, "learning_rate": 9.934053678340867e-06, "loss": 0.8923, "step": 2617 }, { "epoch": 0.08023783253647174, "grad_norm": 2.401765129219085, "learning_rate": 9.933973310963269e-06, "loss": 0.725, "step": 2618 }, { "epoch": 0.08026848105921294, "grad_norm": 1.9715511235868584, "learning_rate": 9.933892894969968e-06, "loss": 0.6728, "step": 2619 }, { "epoch": 0.08029912958195415, "grad_norm": 0.9839184626850886, "learning_rate": 9.93381243036176e-06, "loss": 0.5338, "step": 2620 }, { "epoch": 0.08032977810469535, "grad_norm": 2.1689170320517173, "learning_rate": 9.933731917139436e-06, "loss": 0.8297, "step": 2621 }, { "epoch": 0.08036042662743656, "grad_norm": 1.9980084390065578, "learning_rate": 9.933651355303788e-06, "loss": 0.7464, "step": 2622 }, { "epoch": 0.08039107515017777, "grad_norm": 2.0828406606007492, "learning_rate": 9.933570744855609e-06, "loss": 0.734, "step": 2623 }, { "epoch": 0.08042172367291897, "grad_norm": 2.035964382954254, "learning_rate": 9.933490085795698e-06, "loss": 0.7864, "step": 2624 }, { "epoch": 0.08045237219566016, "grad_norm": 2.0418723961646803, "learning_rate": 9.933409378124847e-06, "loss": 0.8813, "step": 2625 }, { "epoch": 0.08048302071840137, "grad_norm": 1.8278905549834152, "learning_rate": 9.93332862184385e-06, "loss": 0.7725, "step": 2626 }, { "epoch": 0.08051366924114257, "grad_norm": 1.9241733404871277, "learning_rate": 9.933247816953505e-06, "loss": 0.8085, "step": 2627 }, { "epoch": 0.08054431776388378, "grad_norm": 2.068445952228297, "learning_rate": 9.933166963454604e-06, "loss": 0.8738, "step": 2628 }, { "epoch": 0.08057496628662499, "grad_norm": 2.301132557195125, "learning_rate": 9.93308606134795e-06, "loss": 0.8263, "step": 2629 }, { "epoch": 0.08060561480936619, "grad_norm": 1.9621064719857866, "learning_rate": 9.933005110634334e-06, "loss": 0.7056, "step": 2630 }, { "epoch": 0.0806362633321074, "grad_norm": 2.223031877667249, "learning_rate": 9.93292411131456e-06, "loss": 0.9015, "step": 2631 }, { "epoch": 0.0806669118548486, "grad_norm": 2.258594794098356, "learning_rate": 9.932843063389418e-06, "loss": 0.8222, "step": 2632 }, { "epoch": 0.0806975603775898, "grad_norm": 2.0417823344218426, "learning_rate": 9.932761966859716e-06, "loss": 0.7689, "step": 2633 }, { "epoch": 0.080728208900331, "grad_norm": 2.1300510153308765, "learning_rate": 9.932680821726246e-06, "loss": 0.6587, "step": 2634 }, { "epoch": 0.0807588574230722, "grad_norm": 1.9237699927895895, "learning_rate": 9.93259962798981e-06, "loss": 0.7909, "step": 2635 }, { "epoch": 0.08078950594581341, "grad_norm": 2.050053023510852, "learning_rate": 9.932518385651207e-06, "loss": 0.895, "step": 2636 }, { "epoch": 0.08082015446855462, "grad_norm": 3.9952859888658927, "learning_rate": 9.932437094711238e-06, "loss": 0.7414, "step": 2637 }, { "epoch": 0.08085080299129582, "grad_norm": 1.991961550741085, "learning_rate": 9.932355755170706e-06, "loss": 0.7644, "step": 2638 }, { "epoch": 0.08088145151403703, "grad_norm": 2.3592341507318784, "learning_rate": 9.93227436703041e-06, "loss": 0.6936, "step": 2639 }, { "epoch": 0.08091210003677823, "grad_norm": 2.0062711404642646, "learning_rate": 9.932192930291152e-06, "loss": 0.7602, "step": 2640 }, { "epoch": 0.08094274855951943, "grad_norm": 2.243147355842606, "learning_rate": 9.932111444953735e-06, "loss": 0.7479, "step": 2641 }, { "epoch": 0.08097339708226063, "grad_norm": 1.0916969280237623, "learning_rate": 9.932029911018965e-06, "loss": 0.5011, "step": 2642 }, { "epoch": 0.08100404560500184, "grad_norm": 2.094093949376908, "learning_rate": 9.931948328487639e-06, "loss": 0.7276, "step": 2643 }, { "epoch": 0.08103469412774304, "grad_norm": 2.1802407499350416, "learning_rate": 9.931866697360566e-06, "loss": 0.6982, "step": 2644 }, { "epoch": 0.08106534265048425, "grad_norm": 2.023893917699585, "learning_rate": 9.931785017638547e-06, "loss": 0.8371, "step": 2645 }, { "epoch": 0.08109599117322545, "grad_norm": 1.9962995688865126, "learning_rate": 9.931703289322389e-06, "loss": 0.766, "step": 2646 }, { "epoch": 0.08112663969596666, "grad_norm": 2.041102653432929, "learning_rate": 9.931621512412897e-06, "loss": 0.7356, "step": 2647 }, { "epoch": 0.08115728821870785, "grad_norm": 1.819214664267237, "learning_rate": 9.931539686910877e-06, "loss": 0.6833, "step": 2648 }, { "epoch": 0.08118793674144906, "grad_norm": 2.1809213089941637, "learning_rate": 9.931457812817135e-06, "loss": 0.7648, "step": 2649 }, { "epoch": 0.08121858526419026, "grad_norm": 2.0250108659952732, "learning_rate": 9.931375890132475e-06, "loss": 0.7682, "step": 2650 }, { "epoch": 0.08124923378693147, "grad_norm": 1.9957946820813048, "learning_rate": 9.93129391885771e-06, "loss": 0.7788, "step": 2651 }, { "epoch": 0.08127988230967267, "grad_norm": 2.1546047947104823, "learning_rate": 9.93121189899364e-06, "loss": 0.7635, "step": 2652 }, { "epoch": 0.08131053083241388, "grad_norm": 0.9653337726776143, "learning_rate": 9.931129830541082e-06, "loss": 0.54, "step": 2653 }, { "epoch": 0.08134117935515509, "grad_norm": 2.3429695538460447, "learning_rate": 9.931047713500836e-06, "loss": 0.8396, "step": 2654 }, { "epoch": 0.08137182787789629, "grad_norm": 1.9589407064520505, "learning_rate": 9.930965547873719e-06, "loss": 0.7868, "step": 2655 }, { "epoch": 0.08140247640063748, "grad_norm": 2.2595705882534323, "learning_rate": 9.930883333660535e-06, "loss": 0.7781, "step": 2656 }, { "epoch": 0.08143312492337869, "grad_norm": 2.2845229311101956, "learning_rate": 9.930801070862095e-06, "loss": 0.8132, "step": 2657 }, { "epoch": 0.0814637734461199, "grad_norm": 2.4357689847699997, "learning_rate": 9.930718759479213e-06, "loss": 0.6953, "step": 2658 }, { "epoch": 0.0814944219688611, "grad_norm": 2.1688507623400652, "learning_rate": 9.930636399512694e-06, "loss": 0.7458, "step": 2659 }, { "epoch": 0.0815250704916023, "grad_norm": 1.9581255447888435, "learning_rate": 9.930553990963355e-06, "loss": 0.8095, "step": 2660 }, { "epoch": 0.08155571901434351, "grad_norm": 2.7088544412410602, "learning_rate": 9.930471533832005e-06, "loss": 0.8161, "step": 2661 }, { "epoch": 0.08158636753708472, "grad_norm": 2.0000037823606873, "learning_rate": 9.930389028119458e-06, "loss": 0.82, "step": 2662 }, { "epoch": 0.08161701605982592, "grad_norm": 2.0898535002012144, "learning_rate": 9.930306473826528e-06, "loss": 0.7601, "step": 2663 }, { "epoch": 0.08164766458256711, "grad_norm": 2.040329955896837, "learning_rate": 9.930223870954025e-06, "loss": 0.6739, "step": 2664 }, { "epoch": 0.08167831310530832, "grad_norm": 2.1972264361178007, "learning_rate": 9.930141219502765e-06, "loss": 0.7748, "step": 2665 }, { "epoch": 0.08170896162804953, "grad_norm": 2.139138856620026, "learning_rate": 9.930058519473562e-06, "loss": 0.7579, "step": 2666 }, { "epoch": 0.08173961015079073, "grad_norm": 1.765016123623868, "learning_rate": 9.929975770867231e-06, "loss": 0.8372, "step": 2667 }, { "epoch": 0.08177025867353194, "grad_norm": 1.9538766749452612, "learning_rate": 9.92989297368459e-06, "loss": 0.722, "step": 2668 }, { "epoch": 0.08180090719627314, "grad_norm": 1.9265619439713244, "learning_rate": 9.92981012792645e-06, "loss": 0.7931, "step": 2669 }, { "epoch": 0.08183155571901435, "grad_norm": 2.1070003175380005, "learning_rate": 9.929727233593628e-06, "loss": 0.7889, "step": 2670 }, { "epoch": 0.08186220424175555, "grad_norm": 2.2159299511155495, "learning_rate": 9.929644290686945e-06, "loss": 0.8527, "step": 2671 }, { "epoch": 0.08189285276449675, "grad_norm": 2.1711587538201074, "learning_rate": 9.929561299207213e-06, "loss": 0.8594, "step": 2672 }, { "epoch": 0.08192350128723795, "grad_norm": 0.946021742397934, "learning_rate": 9.929478259155253e-06, "loss": 0.5197, "step": 2673 }, { "epoch": 0.08195414980997916, "grad_norm": 1.9399775228684495, "learning_rate": 9.929395170531883e-06, "loss": 0.6095, "step": 2674 }, { "epoch": 0.08198479833272036, "grad_norm": 1.827118468392208, "learning_rate": 9.929312033337922e-06, "loss": 0.7473, "step": 2675 }, { "epoch": 0.08201544685546157, "grad_norm": 2.2128249938303024, "learning_rate": 9.929228847574186e-06, "loss": 0.7377, "step": 2676 }, { "epoch": 0.08204609537820277, "grad_norm": 2.0421937595756985, "learning_rate": 9.929145613241497e-06, "loss": 0.7468, "step": 2677 }, { "epoch": 0.08207674390094398, "grad_norm": 1.9750412302344689, "learning_rate": 9.929062330340678e-06, "loss": 0.7513, "step": 2678 }, { "epoch": 0.08210739242368517, "grad_norm": 1.9468377988511165, "learning_rate": 9.928978998872543e-06, "loss": 0.8253, "step": 2679 }, { "epoch": 0.08213804094642638, "grad_norm": 2.3948610411582627, "learning_rate": 9.92889561883792e-06, "loss": 0.7849, "step": 2680 }, { "epoch": 0.08216868946916758, "grad_norm": 0.9700834429882567, "learning_rate": 9.928812190237627e-06, "loss": 0.5113, "step": 2681 }, { "epoch": 0.08219933799190879, "grad_norm": 2.4819788713890905, "learning_rate": 9.928728713072485e-06, "loss": 0.7308, "step": 2682 }, { "epoch": 0.08222998651465, "grad_norm": 2.1376906176995107, "learning_rate": 9.928645187343318e-06, "loss": 0.726, "step": 2683 }, { "epoch": 0.0822606350373912, "grad_norm": 2.043297350865831, "learning_rate": 9.92856161305095e-06, "loss": 0.7985, "step": 2684 }, { "epoch": 0.0822912835601324, "grad_norm": 2.243795773612064, "learning_rate": 9.928477990196202e-06, "loss": 0.6816, "step": 2685 }, { "epoch": 0.08232193208287361, "grad_norm": 2.024446809071369, "learning_rate": 9.928394318779901e-06, "loss": 0.8322, "step": 2686 }, { "epoch": 0.0823525806056148, "grad_norm": 0.9903457498452554, "learning_rate": 9.928310598802868e-06, "loss": 0.5125, "step": 2687 }, { "epoch": 0.08238322912835601, "grad_norm": 1.9963175500175137, "learning_rate": 9.92822683026593e-06, "loss": 0.6784, "step": 2688 }, { "epoch": 0.08241387765109721, "grad_norm": 2.077286574291309, "learning_rate": 9.928143013169912e-06, "loss": 0.7553, "step": 2689 }, { "epoch": 0.08244452617383842, "grad_norm": 2.1417329533038574, "learning_rate": 9.928059147515643e-06, "loss": 0.8, "step": 2690 }, { "epoch": 0.08247517469657963, "grad_norm": 1.8886306218841271, "learning_rate": 9.927975233303943e-06, "loss": 0.6654, "step": 2691 }, { "epoch": 0.08250582321932083, "grad_norm": 2.147768198441069, "learning_rate": 9.927891270535643e-06, "loss": 0.7314, "step": 2692 }, { "epoch": 0.08253647174206204, "grad_norm": 1.9054752595366884, "learning_rate": 9.92780725921157e-06, "loss": 0.692, "step": 2693 }, { "epoch": 0.08256712026480324, "grad_norm": 2.3081428084371183, "learning_rate": 9.927723199332551e-06, "loss": 0.9531, "step": 2694 }, { "epoch": 0.08259776878754443, "grad_norm": 2.2627684623907487, "learning_rate": 9.927639090899415e-06, "loss": 0.6746, "step": 2695 }, { "epoch": 0.08262841731028564, "grad_norm": 1.83490780301932, "learning_rate": 9.927554933912988e-06, "loss": 0.785, "step": 2696 }, { "epoch": 0.08265906583302685, "grad_norm": 2.1151988906287023, "learning_rate": 9.927470728374105e-06, "loss": 0.821, "step": 2697 }, { "epoch": 0.08268971435576805, "grad_norm": 1.7993013637147561, "learning_rate": 9.927386474283589e-06, "loss": 0.6034, "step": 2698 }, { "epoch": 0.08272036287850926, "grad_norm": 2.083375037821424, "learning_rate": 9.927302171642275e-06, "loss": 0.8317, "step": 2699 }, { "epoch": 0.08275101140125046, "grad_norm": 2.14812827308437, "learning_rate": 9.927217820450992e-06, "loss": 0.7342, "step": 2700 }, { "epoch": 0.08278165992399167, "grad_norm": 2.035772923944073, "learning_rate": 9.92713342071057e-06, "loss": 0.7323, "step": 2701 }, { "epoch": 0.08281230844673287, "grad_norm": 2.347430087626188, "learning_rate": 9.927048972421843e-06, "loss": 0.7008, "step": 2702 }, { "epoch": 0.08284295696947407, "grad_norm": 2.0012923141000982, "learning_rate": 9.92696447558564e-06, "loss": 0.7278, "step": 2703 }, { "epoch": 0.08287360549221527, "grad_norm": 2.183570756749403, "learning_rate": 9.926879930202798e-06, "loss": 0.7851, "step": 2704 }, { "epoch": 0.08290425401495648, "grad_norm": 1.9484025493035653, "learning_rate": 9.926795336274146e-06, "loss": 0.7313, "step": 2705 }, { "epoch": 0.08293490253769768, "grad_norm": 1.0248378547134052, "learning_rate": 9.926710693800518e-06, "loss": 0.5219, "step": 2706 }, { "epoch": 0.08296555106043889, "grad_norm": 2.002229902468972, "learning_rate": 9.92662600278275e-06, "loss": 0.7521, "step": 2707 }, { "epoch": 0.0829961995831801, "grad_norm": 2.1153105030156896, "learning_rate": 9.926541263221676e-06, "loss": 0.8354, "step": 2708 }, { "epoch": 0.0830268481059213, "grad_norm": 1.994100166828228, "learning_rate": 9.926456475118131e-06, "loss": 0.7543, "step": 2709 }, { "epoch": 0.08305749662866249, "grad_norm": 0.957293138763307, "learning_rate": 9.926371638472949e-06, "loss": 0.5265, "step": 2710 }, { "epoch": 0.0830881451514037, "grad_norm": 1.944753651996318, "learning_rate": 9.926286753286966e-06, "loss": 0.8413, "step": 2711 }, { "epoch": 0.0831187936741449, "grad_norm": 2.118739396257106, "learning_rate": 9.92620181956102e-06, "loss": 0.9347, "step": 2712 }, { "epoch": 0.08314944219688611, "grad_norm": 0.9545202180601586, "learning_rate": 9.926116837295948e-06, "loss": 0.5313, "step": 2713 }, { "epoch": 0.08318009071962731, "grad_norm": 1.997013036715125, "learning_rate": 9.926031806492584e-06, "loss": 0.7559, "step": 2714 }, { "epoch": 0.08321073924236852, "grad_norm": 1.9193049315574184, "learning_rate": 9.92594672715177e-06, "loss": 0.7974, "step": 2715 }, { "epoch": 0.08324138776510973, "grad_norm": 1.9299760119240283, "learning_rate": 9.925861599274342e-06, "loss": 0.7567, "step": 2716 }, { "epoch": 0.08327203628785093, "grad_norm": 2.0124862569358455, "learning_rate": 9.92577642286114e-06, "loss": 0.7183, "step": 2717 }, { "epoch": 0.08330268481059212, "grad_norm": 2.0810786675985535, "learning_rate": 9.925691197913001e-06, "loss": 0.8251, "step": 2718 }, { "epoch": 0.08333333333333333, "grad_norm": 2.2528894819449516, "learning_rate": 9.925605924430768e-06, "loss": 0.8267, "step": 2719 }, { "epoch": 0.08336398185607453, "grad_norm": 2.1494875925984767, "learning_rate": 9.925520602415278e-06, "loss": 0.8846, "step": 2720 }, { "epoch": 0.08339463037881574, "grad_norm": 2.0074835076572652, "learning_rate": 9.925435231867374e-06, "loss": 0.7752, "step": 2721 }, { "epoch": 0.08342527890155695, "grad_norm": 1.188088684695165, "learning_rate": 9.925349812787897e-06, "loss": 0.5281, "step": 2722 }, { "epoch": 0.08345592742429815, "grad_norm": 1.9276448470183487, "learning_rate": 9.925264345177687e-06, "loss": 0.7177, "step": 2723 }, { "epoch": 0.08348657594703936, "grad_norm": 2.1413251103192117, "learning_rate": 9.925178829037588e-06, "loss": 0.7859, "step": 2724 }, { "epoch": 0.08351722446978056, "grad_norm": 2.4491100160445765, "learning_rate": 9.925093264368441e-06, "loss": 0.9074, "step": 2725 }, { "epoch": 0.08354787299252175, "grad_norm": 1.9192015340379056, "learning_rate": 9.925007651171091e-06, "loss": 0.7473, "step": 2726 }, { "epoch": 0.08357852151526296, "grad_norm": 2.097733156929076, "learning_rate": 9.924921989446382e-06, "loss": 0.832, "step": 2727 }, { "epoch": 0.08360917003800417, "grad_norm": 2.0486124273728294, "learning_rate": 9.924836279195153e-06, "loss": 0.77, "step": 2728 }, { "epoch": 0.08363981856074537, "grad_norm": 2.2337014709340113, "learning_rate": 9.924750520418254e-06, "loss": 0.7067, "step": 2729 }, { "epoch": 0.08367046708348658, "grad_norm": 2.207780097592895, "learning_rate": 9.924664713116528e-06, "loss": 0.773, "step": 2730 }, { "epoch": 0.08370111560622778, "grad_norm": 1.8683094047192397, "learning_rate": 9.92457885729082e-06, "loss": 0.8015, "step": 2731 }, { "epoch": 0.08373176412896899, "grad_norm": 1.941709115891972, "learning_rate": 9.924492952941977e-06, "loss": 0.7653, "step": 2732 }, { "epoch": 0.0837624126517102, "grad_norm": 2.190193418384482, "learning_rate": 9.924407000070844e-06, "loss": 0.7633, "step": 2733 }, { "epoch": 0.08379306117445139, "grad_norm": 2.0476205163926484, "learning_rate": 9.924320998678271e-06, "loss": 0.772, "step": 2734 }, { "epoch": 0.08382370969719259, "grad_norm": 2.6153375650690553, "learning_rate": 9.924234948765101e-06, "loss": 0.8117, "step": 2735 }, { "epoch": 0.0838543582199338, "grad_norm": 2.0024920246901576, "learning_rate": 9.924148850332185e-06, "loss": 0.7457, "step": 2736 }, { "epoch": 0.083885006742675, "grad_norm": 1.8692939517643228, "learning_rate": 9.92406270338037e-06, "loss": 0.6931, "step": 2737 }, { "epoch": 0.08391565526541621, "grad_norm": 2.014969772318779, "learning_rate": 9.923976507910506e-06, "loss": 0.8039, "step": 2738 }, { "epoch": 0.08394630378815741, "grad_norm": 2.075020302485697, "learning_rate": 9.923890263923443e-06, "loss": 0.8775, "step": 2739 }, { "epoch": 0.08397695231089862, "grad_norm": 2.2322615673794353, "learning_rate": 9.923803971420027e-06, "loss": 0.8261, "step": 2740 }, { "epoch": 0.08400760083363981, "grad_norm": 2.040541391123931, "learning_rate": 9.923717630401113e-06, "loss": 0.8056, "step": 2741 }, { "epoch": 0.08403824935638102, "grad_norm": 1.7140626235277618, "learning_rate": 9.923631240867546e-06, "loss": 0.7164, "step": 2742 }, { "epoch": 0.08406889787912222, "grad_norm": 1.8448586815668266, "learning_rate": 9.923544802820183e-06, "loss": 0.6785, "step": 2743 }, { "epoch": 0.08409954640186343, "grad_norm": 2.077553388298157, "learning_rate": 9.923458316259872e-06, "loss": 0.7819, "step": 2744 }, { "epoch": 0.08413019492460463, "grad_norm": 2.137741980802605, "learning_rate": 9.923371781187468e-06, "loss": 0.7578, "step": 2745 }, { "epoch": 0.08416084344734584, "grad_norm": 2.0637183769713956, "learning_rate": 9.923285197603823e-06, "loss": 0.7628, "step": 2746 }, { "epoch": 0.08419149197008705, "grad_norm": 2.0008803421924286, "learning_rate": 9.923198565509787e-06, "loss": 0.7916, "step": 2747 }, { "epoch": 0.08422214049282825, "grad_norm": 3.432094555275299, "learning_rate": 9.923111884906216e-06, "loss": 0.7469, "step": 2748 }, { "epoch": 0.08425278901556944, "grad_norm": 2.2027503547809526, "learning_rate": 9.923025155793965e-06, "loss": 0.8059, "step": 2749 }, { "epoch": 0.08428343753831065, "grad_norm": 1.9071661588274997, "learning_rate": 9.922938378173887e-06, "loss": 0.7496, "step": 2750 }, { "epoch": 0.08431408606105185, "grad_norm": 2.1196374313843975, "learning_rate": 9.922851552046837e-06, "loss": 0.8323, "step": 2751 }, { "epoch": 0.08434473458379306, "grad_norm": 2.287333915391311, "learning_rate": 9.922764677413672e-06, "loss": 0.7613, "step": 2752 }, { "epoch": 0.08437538310653427, "grad_norm": 2.158343207595034, "learning_rate": 9.922677754275248e-06, "loss": 0.7573, "step": 2753 }, { "epoch": 0.08440603162927547, "grad_norm": 2.31561648838972, "learning_rate": 9.922590782632419e-06, "loss": 0.8704, "step": 2754 }, { "epoch": 0.08443668015201668, "grad_norm": 1.9088130753049506, "learning_rate": 9.922503762486044e-06, "loss": 0.8272, "step": 2755 }, { "epoch": 0.08446732867475788, "grad_norm": 2.216923668357904, "learning_rate": 9.92241669383698e-06, "loss": 0.807, "step": 2756 }, { "epoch": 0.08449797719749907, "grad_norm": 2.14709551239781, "learning_rate": 9.922329576686084e-06, "loss": 0.7609, "step": 2757 }, { "epoch": 0.08452862572024028, "grad_norm": 2.2135466892303044, "learning_rate": 9.922242411034216e-06, "loss": 0.7439, "step": 2758 }, { "epoch": 0.08455927424298149, "grad_norm": 1.7462872991461085, "learning_rate": 9.922155196882234e-06, "loss": 0.7314, "step": 2759 }, { "epoch": 0.08458992276572269, "grad_norm": 1.1629891457508263, "learning_rate": 9.922067934230999e-06, "loss": 0.5376, "step": 2760 }, { "epoch": 0.0846205712884639, "grad_norm": 2.006020400700199, "learning_rate": 9.921980623081366e-06, "loss": 0.6975, "step": 2761 }, { "epoch": 0.0846512198112051, "grad_norm": 1.8153580475909896, "learning_rate": 9.9218932634342e-06, "loss": 0.6997, "step": 2762 }, { "epoch": 0.08468186833394631, "grad_norm": 1.8826183142717494, "learning_rate": 9.921805855290362e-06, "loss": 0.6794, "step": 2763 }, { "epoch": 0.08471251685668751, "grad_norm": 1.8651210347453582, "learning_rate": 9.92171839865071e-06, "loss": 0.7024, "step": 2764 }, { "epoch": 0.0847431653794287, "grad_norm": 2.0305734068417727, "learning_rate": 9.921630893516108e-06, "loss": 0.753, "step": 2765 }, { "epoch": 0.08477381390216991, "grad_norm": 1.9568156123874307, "learning_rate": 9.921543339887419e-06, "loss": 0.7509, "step": 2766 }, { "epoch": 0.08480446242491112, "grad_norm": 1.0154494948611328, "learning_rate": 9.921455737765502e-06, "loss": 0.5409, "step": 2767 }, { "epoch": 0.08483511094765232, "grad_norm": 2.233116986633063, "learning_rate": 9.921368087151222e-06, "loss": 0.6884, "step": 2768 }, { "epoch": 0.08486575947039353, "grad_norm": 2.2276675947506526, "learning_rate": 9.921280388045444e-06, "loss": 0.8746, "step": 2769 }, { "epoch": 0.08489640799313473, "grad_norm": 1.9130941458752173, "learning_rate": 9.92119264044903e-06, "loss": 0.7472, "step": 2770 }, { "epoch": 0.08492705651587594, "grad_norm": 2.1490052151715444, "learning_rate": 9.921104844362849e-06, "loss": 0.793, "step": 2771 }, { "epoch": 0.08495770503861713, "grad_norm": 2.144562356683047, "learning_rate": 9.921016999787761e-06, "loss": 0.7797, "step": 2772 }, { "epoch": 0.08498835356135834, "grad_norm": 2.072712436070739, "learning_rate": 9.920929106724633e-06, "loss": 0.7052, "step": 2773 }, { "epoch": 0.08501900208409954, "grad_norm": 2.2028093459776796, "learning_rate": 9.92084116517433e-06, "loss": 0.8755, "step": 2774 }, { "epoch": 0.08504965060684075, "grad_norm": 0.9803874169237852, "learning_rate": 9.920753175137723e-06, "loss": 0.5332, "step": 2775 }, { "epoch": 0.08508029912958195, "grad_norm": 2.119179255792458, "learning_rate": 9.920665136615675e-06, "loss": 0.7684, "step": 2776 }, { "epoch": 0.08511094765232316, "grad_norm": 1.8056766449545016, "learning_rate": 9.920577049609054e-06, "loss": 0.7032, "step": 2777 }, { "epoch": 0.08514159617506437, "grad_norm": 0.8637155928681947, "learning_rate": 9.920488914118727e-06, "loss": 0.5199, "step": 2778 }, { "epoch": 0.08517224469780557, "grad_norm": 2.205405936080973, "learning_rate": 9.920400730145566e-06, "loss": 0.7786, "step": 2779 }, { "epoch": 0.08520289322054676, "grad_norm": 0.8901938560095461, "learning_rate": 9.920312497690436e-06, "loss": 0.5298, "step": 2780 }, { "epoch": 0.08523354174328797, "grad_norm": 2.117339772048953, "learning_rate": 9.92022421675421e-06, "loss": 0.8161, "step": 2781 }, { "epoch": 0.08526419026602917, "grad_norm": 1.965102640845952, "learning_rate": 9.920135887337754e-06, "loss": 0.8807, "step": 2782 }, { "epoch": 0.08529483878877038, "grad_norm": 1.8508862540395732, "learning_rate": 9.92004750944194e-06, "loss": 0.7359, "step": 2783 }, { "epoch": 0.08532548731151159, "grad_norm": 1.9851518627279487, "learning_rate": 9.919959083067641e-06, "loss": 0.6464, "step": 2784 }, { "epoch": 0.08535613583425279, "grad_norm": 0.8965453283956124, "learning_rate": 9.919870608215726e-06, "loss": 0.5099, "step": 2785 }, { "epoch": 0.085386784356994, "grad_norm": 1.8538893773509266, "learning_rate": 9.919782084887066e-06, "loss": 0.7797, "step": 2786 }, { "epoch": 0.0854174328797352, "grad_norm": 2.1269979505346663, "learning_rate": 9.919693513082534e-06, "loss": 0.8173, "step": 2787 }, { "epoch": 0.0854480814024764, "grad_norm": 2.0300480643624623, "learning_rate": 9.919604892803003e-06, "loss": 0.7424, "step": 2788 }, { "epoch": 0.0854787299252176, "grad_norm": 1.9366762021611592, "learning_rate": 9.919516224049348e-06, "loss": 0.7304, "step": 2789 }, { "epoch": 0.0855093784479588, "grad_norm": 1.8564847842167818, "learning_rate": 9.91942750682244e-06, "loss": 0.7088, "step": 2790 }, { "epoch": 0.08554002697070001, "grad_norm": 1.9958945138848723, "learning_rate": 9.919338741123155e-06, "loss": 0.6621, "step": 2791 }, { "epoch": 0.08557067549344122, "grad_norm": 2.2490696362600273, "learning_rate": 9.919249926952365e-06, "loss": 0.7708, "step": 2792 }, { "epoch": 0.08560132401618242, "grad_norm": 1.8122948864437145, "learning_rate": 9.919161064310948e-06, "loss": 0.7992, "step": 2793 }, { "epoch": 0.08563197253892363, "grad_norm": 2.072339789315475, "learning_rate": 9.919072153199778e-06, "loss": 0.7765, "step": 2794 }, { "epoch": 0.08566262106166483, "grad_norm": 1.0701218058652677, "learning_rate": 9.91898319361973e-06, "loss": 0.5221, "step": 2795 }, { "epoch": 0.08569326958440603, "grad_norm": 1.9093408097506683, "learning_rate": 9.918894185571684e-06, "loss": 0.7663, "step": 2796 }, { "epoch": 0.08572391810714723, "grad_norm": 1.7303197479772467, "learning_rate": 9.918805129056514e-06, "loss": 0.7675, "step": 2797 }, { "epoch": 0.08575456662988844, "grad_norm": 2.0328303553946863, "learning_rate": 9.9187160240751e-06, "loss": 0.7939, "step": 2798 }, { "epoch": 0.08578521515262964, "grad_norm": 1.9480008159992905, "learning_rate": 9.918626870628317e-06, "loss": 0.7056, "step": 2799 }, { "epoch": 0.08581586367537085, "grad_norm": 1.8710461832947172, "learning_rate": 9.918537668717045e-06, "loss": 0.7747, "step": 2800 }, { "epoch": 0.08584651219811205, "grad_norm": 1.9437641460997337, "learning_rate": 9.918448418342164e-06, "loss": 0.8291, "step": 2801 }, { "epoch": 0.08587716072085326, "grad_norm": 1.9915250233439314, "learning_rate": 9.918359119504552e-06, "loss": 0.8349, "step": 2802 }, { "epoch": 0.08590780924359445, "grad_norm": 2.1426013232700685, "learning_rate": 9.918269772205089e-06, "loss": 0.7124, "step": 2803 }, { "epoch": 0.08593845776633566, "grad_norm": 2.1261012120825624, "learning_rate": 9.918180376444655e-06, "loss": 0.7489, "step": 2804 }, { "epoch": 0.08596910628907686, "grad_norm": 1.9429376180166744, "learning_rate": 9.918090932224131e-06, "loss": 0.6202, "step": 2805 }, { "epoch": 0.08599975481181807, "grad_norm": 2.117855517430449, "learning_rate": 9.9180014395444e-06, "loss": 0.8366, "step": 2806 }, { "epoch": 0.08603040333455927, "grad_norm": 1.845240015292876, "learning_rate": 9.917911898406343e-06, "loss": 0.7839, "step": 2807 }, { "epoch": 0.08606105185730048, "grad_norm": 2.1682175476347494, "learning_rate": 9.91782230881084e-06, "loss": 0.7597, "step": 2808 }, { "epoch": 0.08609170038004169, "grad_norm": 2.327857199602804, "learning_rate": 9.917732670758776e-06, "loss": 0.6906, "step": 2809 }, { "epoch": 0.08612234890278289, "grad_norm": 2.1002146628530327, "learning_rate": 9.917642984251034e-06, "loss": 0.7516, "step": 2810 }, { "epoch": 0.08615299742552408, "grad_norm": 2.3170323169153697, "learning_rate": 9.9175532492885e-06, "loss": 0.8391, "step": 2811 }, { "epoch": 0.08618364594826529, "grad_norm": 1.1865599874969224, "learning_rate": 9.917463465872051e-06, "loss": 0.5243, "step": 2812 }, { "epoch": 0.0862142944710065, "grad_norm": 1.9264069232973606, "learning_rate": 9.91737363400258e-06, "loss": 0.7552, "step": 2813 }, { "epoch": 0.0862449429937477, "grad_norm": 1.8834524415368594, "learning_rate": 9.917283753680966e-06, "loss": 0.8211, "step": 2814 }, { "epoch": 0.0862755915164889, "grad_norm": 0.9323671543326179, "learning_rate": 9.917193824908097e-06, "loss": 0.5378, "step": 2815 }, { "epoch": 0.08630624003923011, "grad_norm": 1.9771381434887447, "learning_rate": 9.91710384768486e-06, "loss": 0.8271, "step": 2816 }, { "epoch": 0.08633688856197132, "grad_norm": 1.9993794305312345, "learning_rate": 9.91701382201214e-06, "loss": 0.8479, "step": 2817 }, { "epoch": 0.08636753708471252, "grad_norm": 2.0805328763980198, "learning_rate": 9.916923747890825e-06, "loss": 0.6801, "step": 2818 }, { "epoch": 0.08639818560745371, "grad_norm": 1.9917716460625376, "learning_rate": 9.916833625321804e-06, "loss": 0.842, "step": 2819 }, { "epoch": 0.08642883413019492, "grad_norm": 2.118796769684119, "learning_rate": 9.91674345430596e-06, "loss": 0.8499, "step": 2820 }, { "epoch": 0.08645948265293613, "grad_norm": 2.2622989212996196, "learning_rate": 9.916653234844188e-06, "loss": 0.783, "step": 2821 }, { "epoch": 0.08649013117567733, "grad_norm": 1.7289177033957353, "learning_rate": 9.916562966937371e-06, "loss": 0.7306, "step": 2822 }, { "epoch": 0.08652077969841854, "grad_norm": 2.1357310562439884, "learning_rate": 9.916472650586404e-06, "loss": 0.713, "step": 2823 }, { "epoch": 0.08655142822115974, "grad_norm": 1.5069078748728122, "learning_rate": 9.916382285792172e-06, "loss": 0.5351, "step": 2824 }, { "epoch": 0.08658207674390095, "grad_norm": 1.2114369162341239, "learning_rate": 9.916291872555568e-06, "loss": 0.5523, "step": 2825 }, { "epoch": 0.08661272526664215, "grad_norm": 1.9300664885396714, "learning_rate": 9.916201410877481e-06, "loss": 0.7412, "step": 2826 }, { "epoch": 0.08664337378938335, "grad_norm": 2.156616208077294, "learning_rate": 9.916110900758806e-06, "loss": 0.7746, "step": 2827 }, { "epoch": 0.08667402231212455, "grad_norm": 2.1482388124053173, "learning_rate": 9.916020342200432e-06, "loss": 0.8164, "step": 2828 }, { "epoch": 0.08670467083486576, "grad_norm": 1.8587918641781775, "learning_rate": 9.915929735203252e-06, "loss": 0.6577, "step": 2829 }, { "epoch": 0.08673531935760696, "grad_norm": 1.9129004184248062, "learning_rate": 9.915839079768156e-06, "loss": 0.7364, "step": 2830 }, { "epoch": 0.08676596788034817, "grad_norm": 1.887620758025398, "learning_rate": 9.915748375896041e-06, "loss": 0.7813, "step": 2831 }, { "epoch": 0.08679661640308937, "grad_norm": 1.9748703226235613, "learning_rate": 9.9156576235878e-06, "loss": 0.8062, "step": 2832 }, { "epoch": 0.08682726492583058, "grad_norm": 1.8398942114196957, "learning_rate": 9.915566822844326e-06, "loss": 0.7561, "step": 2833 }, { "epoch": 0.08685791344857179, "grad_norm": 2.5202876586753393, "learning_rate": 9.915475973666516e-06, "loss": 0.5579, "step": 2834 }, { "epoch": 0.08688856197131298, "grad_norm": 2.1937201868788536, "learning_rate": 9.915385076055262e-06, "loss": 0.8184, "step": 2835 }, { "epoch": 0.08691921049405418, "grad_norm": 2.5510772539046958, "learning_rate": 9.915294130011461e-06, "loss": 0.7483, "step": 2836 }, { "epoch": 0.08694985901679539, "grad_norm": 2.3327852394131185, "learning_rate": 9.915203135536011e-06, "loss": 0.7549, "step": 2837 }, { "epoch": 0.0869805075395366, "grad_norm": 2.1868755801900805, "learning_rate": 9.915112092629806e-06, "loss": 0.7461, "step": 2838 }, { "epoch": 0.0870111560622778, "grad_norm": 2.2241405061038044, "learning_rate": 9.915021001293743e-06, "loss": 0.6821, "step": 2839 }, { "epoch": 0.087041804585019, "grad_norm": 2.210451552625109, "learning_rate": 9.914929861528722e-06, "loss": 0.793, "step": 2840 }, { "epoch": 0.08707245310776021, "grad_norm": 2.0257172398183663, "learning_rate": 9.914838673335639e-06, "loss": 0.7354, "step": 2841 }, { "epoch": 0.0871031016305014, "grad_norm": 2.294219070356522, "learning_rate": 9.914747436715394e-06, "loss": 0.8216, "step": 2842 }, { "epoch": 0.08713375015324261, "grad_norm": 2.110928279767287, "learning_rate": 9.914656151668884e-06, "loss": 0.8283, "step": 2843 }, { "epoch": 0.08716439867598381, "grad_norm": 2.085429446062461, "learning_rate": 9.914564818197008e-06, "loss": 0.7515, "step": 2844 }, { "epoch": 0.08719504719872502, "grad_norm": 2.5638782517425756, "learning_rate": 9.914473436300668e-06, "loss": 0.8064, "step": 2845 }, { "epoch": 0.08722569572146623, "grad_norm": 2.1528600674030542, "learning_rate": 9.914382005980766e-06, "loss": 0.6968, "step": 2846 }, { "epoch": 0.08725634424420743, "grad_norm": 2.129417509092398, "learning_rate": 9.9142905272382e-06, "loss": 0.7473, "step": 2847 }, { "epoch": 0.08728699276694864, "grad_norm": 2.092657300024219, "learning_rate": 9.914199000073871e-06, "loss": 0.817, "step": 2848 }, { "epoch": 0.08731764128968984, "grad_norm": 1.9888538811286924, "learning_rate": 9.91410742448868e-06, "loss": 0.5512, "step": 2849 }, { "epoch": 0.08734828981243103, "grad_norm": 1.8617642335116646, "learning_rate": 9.914015800483536e-06, "loss": 0.7391, "step": 2850 }, { "epoch": 0.08737893833517224, "grad_norm": 1.9433247028397045, "learning_rate": 9.913924128059334e-06, "loss": 0.6766, "step": 2851 }, { "epoch": 0.08740958685791345, "grad_norm": 4.096813949910427, "learning_rate": 9.91383240721698e-06, "loss": 0.7933, "step": 2852 }, { "epoch": 0.08744023538065465, "grad_norm": 2.0959789326737615, "learning_rate": 9.91374063795738e-06, "loss": 0.8039, "step": 2853 }, { "epoch": 0.08747088390339586, "grad_norm": 2.4470140220927297, "learning_rate": 9.913648820281435e-06, "loss": 0.7235, "step": 2854 }, { "epoch": 0.08750153242613706, "grad_norm": 1.1532677709384356, "learning_rate": 9.913556954190051e-06, "loss": 0.5377, "step": 2855 }, { "epoch": 0.08753218094887827, "grad_norm": 1.8770022539800963, "learning_rate": 9.913465039684134e-06, "loss": 0.8043, "step": 2856 }, { "epoch": 0.08756282947161947, "grad_norm": 1.8365524786004934, "learning_rate": 9.913373076764587e-06, "loss": 0.6646, "step": 2857 }, { "epoch": 0.08759347799436067, "grad_norm": 2.1329778267103636, "learning_rate": 9.913281065432318e-06, "loss": 0.7744, "step": 2858 }, { "epoch": 0.08762412651710187, "grad_norm": 2.0881869550217456, "learning_rate": 9.913189005688235e-06, "loss": 0.8447, "step": 2859 }, { "epoch": 0.08765477503984308, "grad_norm": 2.1564853414418694, "learning_rate": 9.913096897533244e-06, "loss": 0.8696, "step": 2860 }, { "epoch": 0.08768542356258428, "grad_norm": 2.2517471059167424, "learning_rate": 9.913004740968251e-06, "loss": 0.8151, "step": 2861 }, { "epoch": 0.08771607208532549, "grad_norm": 2.027943471793319, "learning_rate": 9.912912535994166e-06, "loss": 0.777, "step": 2862 }, { "epoch": 0.0877467206080667, "grad_norm": 2.444486172452427, "learning_rate": 9.912820282611896e-06, "loss": 0.7992, "step": 2863 }, { "epoch": 0.0877773691308079, "grad_norm": 1.9828726954379376, "learning_rate": 9.912727980822352e-06, "loss": 0.8082, "step": 2864 }, { "epoch": 0.0878080176535491, "grad_norm": 1.9887370362106622, "learning_rate": 9.91263563062644e-06, "loss": 0.8303, "step": 2865 }, { "epoch": 0.0878386661762903, "grad_norm": 1.2223453248510276, "learning_rate": 9.912543232025074e-06, "loss": 0.5141, "step": 2866 }, { "epoch": 0.0878693146990315, "grad_norm": 1.8984356655418029, "learning_rate": 9.912450785019162e-06, "loss": 0.7457, "step": 2867 }, { "epoch": 0.08789996322177271, "grad_norm": 1.9823626732246962, "learning_rate": 9.912358289609616e-06, "loss": 0.7825, "step": 2868 }, { "epoch": 0.08793061174451391, "grad_norm": 0.94575546012522, "learning_rate": 9.912265745797347e-06, "loss": 0.4961, "step": 2869 }, { "epoch": 0.08796126026725512, "grad_norm": 1.9841375396378416, "learning_rate": 9.912173153583266e-06, "loss": 0.7848, "step": 2870 }, { "epoch": 0.08799190878999633, "grad_norm": 1.9891731370255552, "learning_rate": 9.912080512968286e-06, "loss": 0.7947, "step": 2871 }, { "epoch": 0.08802255731273753, "grad_norm": 1.8262005723409052, "learning_rate": 9.91198782395332e-06, "loss": 0.8045, "step": 2872 }, { "epoch": 0.08805320583547872, "grad_norm": 0.9882362844135113, "learning_rate": 9.911895086539281e-06, "loss": 0.5209, "step": 2873 }, { "epoch": 0.08808385435821993, "grad_norm": 1.955458933969871, "learning_rate": 9.911802300727084e-06, "loss": 0.7864, "step": 2874 }, { "epoch": 0.08811450288096113, "grad_norm": 2.009053965669366, "learning_rate": 9.911709466517641e-06, "loss": 0.7911, "step": 2875 }, { "epoch": 0.08814515140370234, "grad_norm": 2.036255083101178, "learning_rate": 9.91161658391187e-06, "loss": 0.7158, "step": 2876 }, { "epoch": 0.08817579992644355, "grad_norm": 2.349161529043961, "learning_rate": 9.911523652910681e-06, "loss": 0.8049, "step": 2877 }, { "epoch": 0.08820644844918475, "grad_norm": 2.0382357063010215, "learning_rate": 9.911430673514994e-06, "loss": 0.7915, "step": 2878 }, { "epoch": 0.08823709697192596, "grad_norm": 0.9159809771023902, "learning_rate": 9.911337645725725e-06, "loss": 0.5265, "step": 2879 }, { "epoch": 0.08826774549466716, "grad_norm": 1.9818103422722864, "learning_rate": 9.91124456954379e-06, "loss": 0.7004, "step": 2880 }, { "epoch": 0.08829839401740835, "grad_norm": 1.9410107926493472, "learning_rate": 9.911151444970104e-06, "loss": 0.8303, "step": 2881 }, { "epoch": 0.08832904254014956, "grad_norm": 1.9021517583368106, "learning_rate": 9.911058272005587e-06, "loss": 0.6714, "step": 2882 }, { "epoch": 0.08835969106289077, "grad_norm": 2.0102072738708956, "learning_rate": 9.910965050651155e-06, "loss": 0.8246, "step": 2883 }, { "epoch": 0.08839033958563197, "grad_norm": 1.8540414080567469, "learning_rate": 9.910871780907729e-06, "loss": 0.772, "step": 2884 }, { "epoch": 0.08842098810837318, "grad_norm": 1.8997254953767237, "learning_rate": 9.910778462776227e-06, "loss": 0.8002, "step": 2885 }, { "epoch": 0.08845163663111438, "grad_norm": 1.873952239072599, "learning_rate": 9.910685096257568e-06, "loss": 0.6901, "step": 2886 }, { "epoch": 0.08848228515385559, "grad_norm": 1.8658751588450884, "learning_rate": 9.910591681352673e-06, "loss": 0.7237, "step": 2887 }, { "epoch": 0.0885129336765968, "grad_norm": 1.01946461146687, "learning_rate": 9.910498218062461e-06, "loss": 0.5236, "step": 2888 }, { "epoch": 0.08854358219933799, "grad_norm": 2.1835457162529073, "learning_rate": 9.910404706387853e-06, "loss": 0.7284, "step": 2889 }, { "epoch": 0.08857423072207919, "grad_norm": 1.9532211310141454, "learning_rate": 9.910311146329772e-06, "loss": 0.7747, "step": 2890 }, { "epoch": 0.0886048792448204, "grad_norm": 2.0211868559209196, "learning_rate": 9.910217537889139e-06, "loss": 0.7528, "step": 2891 }, { "epoch": 0.0886355277675616, "grad_norm": 1.865222362117812, "learning_rate": 9.910123881066875e-06, "loss": 0.6305, "step": 2892 }, { "epoch": 0.08866617629030281, "grad_norm": 0.9098295397149683, "learning_rate": 9.910030175863905e-06, "loss": 0.5294, "step": 2893 }, { "epoch": 0.08869682481304401, "grad_norm": 2.0562036597042095, "learning_rate": 9.909936422281152e-06, "loss": 0.7973, "step": 2894 }, { "epoch": 0.08872747333578522, "grad_norm": 1.9608743670047886, "learning_rate": 9.909842620319539e-06, "loss": 0.8057, "step": 2895 }, { "epoch": 0.08875812185852643, "grad_norm": 0.9372065121088602, "learning_rate": 9.90974876997999e-06, "loss": 0.5302, "step": 2896 }, { "epoch": 0.08878877038126762, "grad_norm": 2.045061394056276, "learning_rate": 9.90965487126343e-06, "loss": 0.7589, "step": 2897 }, { "epoch": 0.08881941890400882, "grad_norm": 1.8824008495028193, "learning_rate": 9.909560924170784e-06, "loss": 0.7578, "step": 2898 }, { "epoch": 0.08885006742675003, "grad_norm": 2.1116835643554044, "learning_rate": 9.90946692870298e-06, "loss": 0.749, "step": 2899 }, { "epoch": 0.08888071594949123, "grad_norm": 2.0209850449067104, "learning_rate": 9.90937288486094e-06, "loss": 0.7986, "step": 2900 }, { "epoch": 0.08891136447223244, "grad_norm": 0.9842847336165884, "learning_rate": 9.909278792645594e-06, "loss": 0.5214, "step": 2901 }, { "epoch": 0.08894201299497365, "grad_norm": 1.0776668670616094, "learning_rate": 9.909184652057866e-06, "loss": 0.5473, "step": 2902 }, { "epoch": 0.08897266151771485, "grad_norm": 2.665439336846594, "learning_rate": 9.909090463098688e-06, "loss": 0.7292, "step": 2903 }, { "epoch": 0.08900331004045604, "grad_norm": 1.862324502287059, "learning_rate": 9.908996225768985e-06, "loss": 0.6705, "step": 2904 }, { "epoch": 0.08903395856319725, "grad_norm": 0.9297149131548258, "learning_rate": 9.908901940069686e-06, "loss": 0.5239, "step": 2905 }, { "epoch": 0.08906460708593845, "grad_norm": 2.229772718119466, "learning_rate": 9.908807606001721e-06, "loss": 0.8952, "step": 2906 }, { "epoch": 0.08909525560867966, "grad_norm": 2.0818775582039817, "learning_rate": 9.908713223566018e-06, "loss": 0.8677, "step": 2907 }, { "epoch": 0.08912590413142087, "grad_norm": 1.0406719897624879, "learning_rate": 9.908618792763507e-06, "loss": 0.5067, "step": 2908 }, { "epoch": 0.08915655265416207, "grad_norm": 1.850991669300992, "learning_rate": 9.90852431359512e-06, "loss": 0.7096, "step": 2909 }, { "epoch": 0.08918720117690328, "grad_norm": 2.100679323619573, "learning_rate": 9.908429786061787e-06, "loss": 0.7611, "step": 2910 }, { "epoch": 0.08921784969964448, "grad_norm": 1.87974596180356, "learning_rate": 9.908335210164438e-06, "loss": 0.7131, "step": 2911 }, { "epoch": 0.08924849822238567, "grad_norm": 1.9945212256282383, "learning_rate": 9.908240585904008e-06, "loss": 0.7514, "step": 2912 }, { "epoch": 0.08927914674512688, "grad_norm": 1.8973035029125709, "learning_rate": 9.908145913281426e-06, "loss": 0.6969, "step": 2913 }, { "epoch": 0.08930979526786809, "grad_norm": 2.0489122361958905, "learning_rate": 9.908051192297628e-06, "loss": 0.811, "step": 2914 }, { "epoch": 0.08934044379060929, "grad_norm": 1.9303286703673397, "learning_rate": 9.907956422953546e-06, "loss": 0.721, "step": 2915 }, { "epoch": 0.0893710923133505, "grad_norm": 1.9182256603242847, "learning_rate": 9.907861605250114e-06, "loss": 0.7437, "step": 2916 }, { "epoch": 0.0894017408360917, "grad_norm": 1.914908034182179, "learning_rate": 9.907766739188264e-06, "loss": 0.8115, "step": 2917 }, { "epoch": 0.08943238935883291, "grad_norm": 1.9158828968568766, "learning_rate": 9.907671824768933e-06, "loss": 0.8052, "step": 2918 }, { "epoch": 0.08946303788157411, "grad_norm": 1.0378280903687027, "learning_rate": 9.907576861993056e-06, "loss": 0.5157, "step": 2919 }, { "epoch": 0.0894936864043153, "grad_norm": 2.003290205150268, "learning_rate": 9.90748185086157e-06, "loss": 0.7279, "step": 2920 }, { "epoch": 0.08952433492705651, "grad_norm": 2.141964164447751, "learning_rate": 9.907386791375408e-06, "loss": 0.8353, "step": 2921 }, { "epoch": 0.08955498344979772, "grad_norm": 2.2499066311921094, "learning_rate": 9.90729168353551e-06, "loss": 0.8374, "step": 2922 }, { "epoch": 0.08958563197253892, "grad_norm": 2.1426305392153386, "learning_rate": 9.907196527342809e-06, "loss": 0.7369, "step": 2923 }, { "epoch": 0.08961628049528013, "grad_norm": 1.8714171767116958, "learning_rate": 9.907101322798247e-06, "loss": 0.7942, "step": 2924 }, { "epoch": 0.08964692901802133, "grad_norm": 1.8531762816208448, "learning_rate": 9.90700606990276e-06, "loss": 0.7328, "step": 2925 }, { "epoch": 0.08967757754076254, "grad_norm": 2.59543229773488, "learning_rate": 9.906910768657286e-06, "loss": 0.8091, "step": 2926 }, { "epoch": 0.08970822606350375, "grad_norm": 2.134864044098138, "learning_rate": 9.906815419062763e-06, "loss": 0.8339, "step": 2927 }, { "epoch": 0.08973887458624494, "grad_norm": 0.991415961756191, "learning_rate": 9.906720021120136e-06, "loss": 0.5125, "step": 2928 }, { "epoch": 0.08976952310898614, "grad_norm": 2.1253611163810957, "learning_rate": 9.90662457483034e-06, "loss": 0.8055, "step": 2929 }, { "epoch": 0.08980017163172735, "grad_norm": 1.7703652280275948, "learning_rate": 9.906529080194315e-06, "loss": 0.7505, "step": 2930 }, { "epoch": 0.08983082015446855, "grad_norm": 1.6737703868703893, "learning_rate": 9.906433537213006e-06, "loss": 0.7381, "step": 2931 }, { "epoch": 0.08986146867720976, "grad_norm": 1.825643182951097, "learning_rate": 9.90633794588735e-06, "loss": 0.7395, "step": 2932 }, { "epoch": 0.08989211719995097, "grad_norm": 1.8526142558836871, "learning_rate": 9.90624230621829e-06, "loss": 0.7432, "step": 2933 }, { "epoch": 0.08992276572269217, "grad_norm": 0.891150292893928, "learning_rate": 9.906146618206772e-06, "loss": 0.5036, "step": 2934 }, { "epoch": 0.08995341424543336, "grad_norm": 0.9198376109055253, "learning_rate": 9.906050881853735e-06, "loss": 0.5108, "step": 2935 }, { "epoch": 0.08998406276817457, "grad_norm": 2.1674358242975327, "learning_rate": 9.905955097160122e-06, "loss": 0.7217, "step": 2936 }, { "epoch": 0.09001471129091577, "grad_norm": 1.8977329271145595, "learning_rate": 9.90585926412688e-06, "loss": 0.8347, "step": 2937 }, { "epoch": 0.09004535981365698, "grad_norm": 0.9205005392765547, "learning_rate": 9.90576338275495e-06, "loss": 0.5155, "step": 2938 }, { "epoch": 0.09007600833639819, "grad_norm": 1.7794589296148104, "learning_rate": 9.90566745304528e-06, "loss": 0.7623, "step": 2939 }, { "epoch": 0.09010665685913939, "grad_norm": 2.038122386746521, "learning_rate": 9.905571474998812e-06, "loss": 0.7717, "step": 2940 }, { "epoch": 0.0901373053818806, "grad_norm": 1.7522717529784557, "learning_rate": 9.905475448616493e-06, "loss": 0.7367, "step": 2941 }, { "epoch": 0.0901679539046218, "grad_norm": 2.014206048753149, "learning_rate": 9.90537937389927e-06, "loss": 0.7937, "step": 2942 }, { "epoch": 0.090198602427363, "grad_norm": 2.075321598480121, "learning_rate": 9.905283250848089e-06, "loss": 0.8471, "step": 2943 }, { "epoch": 0.0902292509501042, "grad_norm": 3.822099402861195, "learning_rate": 9.905187079463895e-06, "loss": 0.7383, "step": 2944 }, { "epoch": 0.0902598994728454, "grad_norm": 0.9260166158876769, "learning_rate": 9.90509085974764e-06, "loss": 0.5145, "step": 2945 }, { "epoch": 0.09029054799558661, "grad_norm": 2.0006451004551367, "learning_rate": 9.90499459170027e-06, "loss": 0.7397, "step": 2946 }, { "epoch": 0.09032119651832782, "grad_norm": 0.8754019312261665, "learning_rate": 9.904898275322734e-06, "loss": 0.5138, "step": 2947 }, { "epoch": 0.09035184504106902, "grad_norm": 2.2469663051391255, "learning_rate": 9.904801910615978e-06, "loss": 0.6924, "step": 2948 }, { "epoch": 0.09038249356381023, "grad_norm": 2.50139243277914, "learning_rate": 9.904705497580954e-06, "loss": 0.7636, "step": 2949 }, { "epoch": 0.09041314208655143, "grad_norm": 2.1178248618903956, "learning_rate": 9.904609036218613e-06, "loss": 0.8459, "step": 2950 }, { "epoch": 0.09044379060929263, "grad_norm": 2.058614617254843, "learning_rate": 9.904512526529904e-06, "loss": 0.8735, "step": 2951 }, { "epoch": 0.09047443913203383, "grad_norm": 1.9400316968602513, "learning_rate": 9.904415968515777e-06, "loss": 0.6138, "step": 2952 }, { "epoch": 0.09050508765477504, "grad_norm": 1.976848237097893, "learning_rate": 9.904319362177186e-06, "loss": 0.7192, "step": 2953 }, { "epoch": 0.09053573617751624, "grad_norm": 2.2227198685398872, "learning_rate": 9.90422270751508e-06, "loss": 0.7256, "step": 2954 }, { "epoch": 0.09056638470025745, "grad_norm": 2.0671852874241035, "learning_rate": 9.904126004530415e-06, "loss": 0.7668, "step": 2955 }, { "epoch": 0.09059703322299865, "grad_norm": 2.094290385139489, "learning_rate": 9.904029253224142e-06, "loss": 0.7491, "step": 2956 }, { "epoch": 0.09062768174573986, "grad_norm": 2.120106247833941, "learning_rate": 9.903932453597212e-06, "loss": 0.7691, "step": 2957 }, { "epoch": 0.09065833026848107, "grad_norm": 2.352868824539404, "learning_rate": 9.90383560565058e-06, "loss": 0.7368, "step": 2958 }, { "epoch": 0.09068897879122226, "grad_norm": 1.923810283829918, "learning_rate": 9.903738709385203e-06, "loss": 0.6941, "step": 2959 }, { "epoch": 0.09071962731396346, "grad_norm": 2.0967394291025063, "learning_rate": 9.903641764802033e-06, "loss": 0.7393, "step": 2960 }, { "epoch": 0.09075027583670467, "grad_norm": 1.110152452920369, "learning_rate": 9.903544771902027e-06, "loss": 0.5058, "step": 2961 }, { "epoch": 0.09078092435944587, "grad_norm": 2.010657323402387, "learning_rate": 9.903447730686139e-06, "loss": 0.7774, "step": 2962 }, { "epoch": 0.09081157288218708, "grad_norm": 2.0201186718157604, "learning_rate": 9.903350641155325e-06, "loss": 0.8047, "step": 2963 }, { "epoch": 0.09084222140492829, "grad_norm": 0.9092581404126574, "learning_rate": 9.903253503310544e-06, "loss": 0.5376, "step": 2964 }, { "epoch": 0.09087286992766949, "grad_norm": 2.1333269628889737, "learning_rate": 9.90315631715275e-06, "loss": 0.8767, "step": 2965 }, { "epoch": 0.09090351845041068, "grad_norm": 0.9016797769891856, "learning_rate": 9.903059082682906e-06, "loss": 0.5054, "step": 2966 }, { "epoch": 0.09093416697315189, "grad_norm": 1.9827941519728578, "learning_rate": 9.902961799901964e-06, "loss": 0.7353, "step": 2967 }, { "epoch": 0.0909648154958931, "grad_norm": 2.1621176837490346, "learning_rate": 9.902864468810884e-06, "loss": 0.8014, "step": 2968 }, { "epoch": 0.0909954640186343, "grad_norm": 1.873406710219088, "learning_rate": 9.902767089410627e-06, "loss": 0.7985, "step": 2969 }, { "epoch": 0.0910261125413755, "grad_norm": 0.9069094138864114, "learning_rate": 9.902669661702151e-06, "loss": 0.5137, "step": 2970 }, { "epoch": 0.09105676106411671, "grad_norm": 1.9760880279360868, "learning_rate": 9.902572185686416e-06, "loss": 0.8907, "step": 2971 }, { "epoch": 0.09108740958685792, "grad_norm": 2.060128020015315, "learning_rate": 9.902474661364383e-06, "loss": 0.7038, "step": 2972 }, { "epoch": 0.09111805810959912, "grad_norm": 1.8666993274614836, "learning_rate": 9.902377088737014e-06, "loss": 0.7399, "step": 2973 }, { "epoch": 0.09114870663234032, "grad_norm": 0.9510888458673106, "learning_rate": 9.90227946780527e-06, "loss": 0.5184, "step": 2974 }, { "epoch": 0.09117935515508152, "grad_norm": 1.8099936427382226, "learning_rate": 9.90218179857011e-06, "loss": 0.7409, "step": 2975 }, { "epoch": 0.09121000367782273, "grad_norm": 2.011166807129866, "learning_rate": 9.902084081032499e-06, "loss": 0.8063, "step": 2976 }, { "epoch": 0.09124065220056393, "grad_norm": 1.9949168011773546, "learning_rate": 9.901986315193399e-06, "loss": 0.7379, "step": 2977 }, { "epoch": 0.09127130072330514, "grad_norm": 2.144512973248358, "learning_rate": 9.901888501053773e-06, "loss": 0.7745, "step": 2978 }, { "epoch": 0.09130194924604634, "grad_norm": 0.9126491600515183, "learning_rate": 9.901790638614588e-06, "loss": 0.5244, "step": 2979 }, { "epoch": 0.09133259776878755, "grad_norm": 0.8840413160326305, "learning_rate": 9.901692727876804e-06, "loss": 0.5058, "step": 2980 }, { "epoch": 0.09136324629152875, "grad_norm": 2.073430894262685, "learning_rate": 9.901594768841386e-06, "loss": 0.6115, "step": 2981 }, { "epoch": 0.09139389481426995, "grad_norm": 1.829077839768486, "learning_rate": 9.901496761509304e-06, "loss": 0.7866, "step": 2982 }, { "epoch": 0.09142454333701115, "grad_norm": 0.9246032474752768, "learning_rate": 9.901398705881518e-06, "loss": 0.528, "step": 2983 }, { "epoch": 0.09145519185975236, "grad_norm": 2.0370978524302954, "learning_rate": 9.901300601958997e-06, "loss": 0.7731, "step": 2984 }, { "epoch": 0.09148584038249356, "grad_norm": 1.9063728668772513, "learning_rate": 9.901202449742706e-06, "loss": 0.7686, "step": 2985 }, { "epoch": 0.09151648890523477, "grad_norm": 2.0840171644338117, "learning_rate": 9.901104249233614e-06, "loss": 0.8157, "step": 2986 }, { "epoch": 0.09154713742797597, "grad_norm": 1.973417714282018, "learning_rate": 9.901006000432688e-06, "loss": 0.7406, "step": 2987 }, { "epoch": 0.09157778595071718, "grad_norm": 1.9506065717084464, "learning_rate": 9.900907703340897e-06, "loss": 0.7099, "step": 2988 }, { "epoch": 0.09160843447345839, "grad_norm": 2.1383953137734655, "learning_rate": 9.900809357959206e-06, "loss": 0.7033, "step": 2989 }, { "epoch": 0.09163908299619958, "grad_norm": 1.0224251282248615, "learning_rate": 9.900710964288588e-06, "loss": 0.523, "step": 2990 }, { "epoch": 0.09166973151894078, "grad_norm": 1.8040411008390507, "learning_rate": 9.900612522330012e-06, "loss": 0.7042, "step": 2991 }, { "epoch": 0.09170038004168199, "grad_norm": 1.856735447542042, "learning_rate": 9.900514032084445e-06, "loss": 0.7557, "step": 2992 }, { "epoch": 0.0917310285644232, "grad_norm": 1.7680089410707587, "learning_rate": 9.90041549355286e-06, "loss": 0.8022, "step": 2993 }, { "epoch": 0.0917616770871644, "grad_norm": 2.105603823036762, "learning_rate": 9.900316906736227e-06, "loss": 0.769, "step": 2994 }, { "epoch": 0.0917923256099056, "grad_norm": 2.0853762697803275, "learning_rate": 9.900218271635517e-06, "loss": 0.7752, "step": 2995 }, { "epoch": 0.09182297413264681, "grad_norm": 1.9886762084870182, "learning_rate": 9.900119588251706e-06, "loss": 0.7479, "step": 2996 }, { "epoch": 0.091853622655388, "grad_norm": 1.9069456173894421, "learning_rate": 9.90002085658576e-06, "loss": 0.7864, "step": 2997 }, { "epoch": 0.09188427117812921, "grad_norm": 1.8556038914980573, "learning_rate": 9.899922076638655e-06, "loss": 0.7503, "step": 2998 }, { "epoch": 0.09191491970087041, "grad_norm": 1.950633639797168, "learning_rate": 9.899823248411364e-06, "loss": 0.7378, "step": 2999 }, { "epoch": 0.09194556822361162, "grad_norm": 1.8262543287561022, "learning_rate": 9.899724371904862e-06, "loss": 0.71, "step": 3000 }, { "epoch": 0.09197621674635283, "grad_norm": 2.0523927321967492, "learning_rate": 9.899625447120122e-06, "loss": 0.7773, "step": 3001 }, { "epoch": 0.09200686526909403, "grad_norm": 2.125326676669027, "learning_rate": 9.899526474058118e-06, "loss": 0.7972, "step": 3002 }, { "epoch": 0.09203751379183524, "grad_norm": 0.9702465332780518, "learning_rate": 9.899427452719826e-06, "loss": 0.5186, "step": 3003 }, { "epoch": 0.09206816231457644, "grad_norm": 2.350098362531575, "learning_rate": 9.899328383106224e-06, "loss": 0.7834, "step": 3004 }, { "epoch": 0.09209881083731764, "grad_norm": 1.8618077438709997, "learning_rate": 9.899229265218284e-06, "loss": 0.8113, "step": 3005 }, { "epoch": 0.09212945936005884, "grad_norm": 2.069782826442215, "learning_rate": 9.899130099056983e-06, "loss": 0.7664, "step": 3006 }, { "epoch": 0.09216010788280005, "grad_norm": 1.9369975374593769, "learning_rate": 9.899030884623302e-06, "loss": 0.8463, "step": 3007 }, { "epoch": 0.09219075640554125, "grad_norm": 1.947463006416339, "learning_rate": 9.898931621918215e-06, "loss": 0.7283, "step": 3008 }, { "epoch": 0.09222140492828246, "grad_norm": 1.897455243748763, "learning_rate": 9.898832310942702e-06, "loss": 0.723, "step": 3009 }, { "epoch": 0.09225205345102366, "grad_norm": 2.1482012001554023, "learning_rate": 9.89873295169774e-06, "loss": 0.7799, "step": 3010 }, { "epoch": 0.09228270197376487, "grad_norm": 1.755712412491787, "learning_rate": 9.89863354418431e-06, "loss": 0.6366, "step": 3011 }, { "epoch": 0.09231335049650607, "grad_norm": 1.9475302459807515, "learning_rate": 9.89853408840339e-06, "loss": 0.7284, "step": 3012 }, { "epoch": 0.09234399901924727, "grad_norm": 1.969885484402543, "learning_rate": 9.89843458435596e-06, "loss": 0.8301, "step": 3013 }, { "epoch": 0.09237464754198847, "grad_norm": 1.9713679645604654, "learning_rate": 9.898335032043001e-06, "loss": 0.8374, "step": 3014 }, { "epoch": 0.09240529606472968, "grad_norm": 1.8377723086417188, "learning_rate": 9.898235431465492e-06, "loss": 0.743, "step": 3015 }, { "epoch": 0.09243594458747088, "grad_norm": 1.852987917966043, "learning_rate": 9.898135782624418e-06, "loss": 0.6962, "step": 3016 }, { "epoch": 0.09246659311021209, "grad_norm": 1.0150261263012186, "learning_rate": 9.898036085520759e-06, "loss": 0.5365, "step": 3017 }, { "epoch": 0.0924972416329533, "grad_norm": 2.086328600955086, "learning_rate": 9.897936340155496e-06, "loss": 0.808, "step": 3018 }, { "epoch": 0.0925278901556945, "grad_norm": 0.9389717959249938, "learning_rate": 9.897836546529614e-06, "loss": 0.5271, "step": 3019 }, { "epoch": 0.0925585386784357, "grad_norm": 0.8467040850317221, "learning_rate": 9.897736704644093e-06, "loss": 0.4897, "step": 3020 }, { "epoch": 0.0925891872011769, "grad_norm": 2.0014875730578887, "learning_rate": 9.897636814499923e-06, "loss": 0.7398, "step": 3021 }, { "epoch": 0.0926198357239181, "grad_norm": 1.8497617764043035, "learning_rate": 9.897536876098081e-06, "loss": 0.7176, "step": 3022 }, { "epoch": 0.09265048424665931, "grad_norm": 0.9704261810261902, "learning_rate": 9.897436889439558e-06, "loss": 0.5365, "step": 3023 }, { "epoch": 0.09268113276940051, "grad_norm": 0.9522709631084986, "learning_rate": 9.897336854525334e-06, "loss": 0.5264, "step": 3024 }, { "epoch": 0.09271178129214172, "grad_norm": 2.3434626242495975, "learning_rate": 9.897236771356397e-06, "loss": 0.8587, "step": 3025 }, { "epoch": 0.09274242981488293, "grad_norm": 1.8830160736142323, "learning_rate": 9.897136639933734e-06, "loss": 0.6929, "step": 3026 }, { "epoch": 0.09277307833762413, "grad_norm": 2.2420881646689197, "learning_rate": 9.89703646025833e-06, "loss": 0.969, "step": 3027 }, { "epoch": 0.09280372686036532, "grad_norm": 1.9449867685052686, "learning_rate": 9.896936232331173e-06, "loss": 0.6601, "step": 3028 }, { "epoch": 0.09283437538310653, "grad_norm": 1.9270472513232544, "learning_rate": 9.896835956153251e-06, "loss": 0.704, "step": 3029 }, { "epoch": 0.09286502390584774, "grad_norm": 1.861633103112164, "learning_rate": 9.896735631725551e-06, "loss": 0.7009, "step": 3030 }, { "epoch": 0.09289567242858894, "grad_norm": 1.8969363212991495, "learning_rate": 9.896635259049062e-06, "loss": 0.6976, "step": 3031 }, { "epoch": 0.09292632095133015, "grad_norm": 1.8024356091617366, "learning_rate": 9.896534838124773e-06, "loss": 0.7443, "step": 3032 }, { "epoch": 0.09295696947407135, "grad_norm": 2.1662350399407395, "learning_rate": 9.896434368953673e-06, "loss": 0.7279, "step": 3033 }, { "epoch": 0.09298761799681256, "grad_norm": 2.021870055976263, "learning_rate": 9.896333851536753e-06, "loss": 0.8532, "step": 3034 }, { "epoch": 0.09301826651955376, "grad_norm": 1.9529552841239664, "learning_rate": 9.896233285875003e-06, "loss": 0.7839, "step": 3035 }, { "epoch": 0.09304891504229496, "grad_norm": 1.9853581109600422, "learning_rate": 9.896132671969412e-06, "loss": 0.7379, "step": 3036 }, { "epoch": 0.09307956356503616, "grad_norm": 1.9183420815340442, "learning_rate": 9.896032009820975e-06, "loss": 0.737, "step": 3037 }, { "epoch": 0.09311021208777737, "grad_norm": 1.9625728573532395, "learning_rate": 9.895931299430681e-06, "loss": 0.7241, "step": 3038 }, { "epoch": 0.09314086061051857, "grad_norm": 1.9934100589264947, "learning_rate": 9.895830540799523e-06, "loss": 0.7277, "step": 3039 }, { "epoch": 0.09317150913325978, "grad_norm": 1.727014578910172, "learning_rate": 9.895729733928494e-06, "loss": 0.7058, "step": 3040 }, { "epoch": 0.09320215765600098, "grad_norm": 2.5106432439268365, "learning_rate": 9.895628878818588e-06, "loss": 0.688, "step": 3041 }, { "epoch": 0.09323280617874219, "grad_norm": 2.029478554818263, "learning_rate": 9.895527975470799e-06, "loss": 0.7834, "step": 3042 }, { "epoch": 0.0932634547014834, "grad_norm": 2.0564191242991323, "learning_rate": 9.895427023886118e-06, "loss": 0.7741, "step": 3043 }, { "epoch": 0.09329410322422459, "grad_norm": 2.4570054086912463, "learning_rate": 9.895326024065542e-06, "loss": 0.7287, "step": 3044 }, { "epoch": 0.09332475174696579, "grad_norm": 2.5473811407175173, "learning_rate": 9.895224976010067e-06, "loss": 0.748, "step": 3045 }, { "epoch": 0.093355400269707, "grad_norm": 2.0498239318943634, "learning_rate": 9.895123879720688e-06, "loss": 0.7376, "step": 3046 }, { "epoch": 0.0933860487924482, "grad_norm": 2.0075399506018927, "learning_rate": 9.8950227351984e-06, "loss": 0.7376, "step": 3047 }, { "epoch": 0.09341669731518941, "grad_norm": 1.896162057273456, "learning_rate": 9.894921542444202e-06, "loss": 0.6852, "step": 3048 }, { "epoch": 0.09344734583793061, "grad_norm": 2.2331461971274593, "learning_rate": 9.894820301459089e-06, "loss": 0.7883, "step": 3049 }, { "epoch": 0.09347799436067182, "grad_norm": 2.1301935894727038, "learning_rate": 9.89471901224406e-06, "loss": 0.8313, "step": 3050 }, { "epoch": 0.09350864288341303, "grad_norm": 2.0308293748567725, "learning_rate": 9.89461767480011e-06, "loss": 0.7504, "step": 3051 }, { "epoch": 0.09353929140615422, "grad_norm": 2.2142792969142118, "learning_rate": 9.894516289128242e-06, "loss": 0.7295, "step": 3052 }, { "epoch": 0.09356993992889542, "grad_norm": 2.251992854968373, "learning_rate": 9.894414855229453e-06, "loss": 0.7547, "step": 3053 }, { "epoch": 0.09360058845163663, "grad_norm": 1.6994240981410307, "learning_rate": 9.89431337310474e-06, "loss": 0.7362, "step": 3054 }, { "epoch": 0.09363123697437783, "grad_norm": 2.0887714684008194, "learning_rate": 9.894211842755107e-06, "loss": 0.766, "step": 3055 }, { "epoch": 0.09366188549711904, "grad_norm": 1.6037998534577527, "learning_rate": 9.894110264181551e-06, "loss": 0.545, "step": 3056 }, { "epoch": 0.09369253401986025, "grad_norm": 1.9083373856858015, "learning_rate": 9.894008637385075e-06, "loss": 0.6871, "step": 3057 }, { "epoch": 0.09372318254260145, "grad_norm": 0.9192843892741529, "learning_rate": 9.89390696236668e-06, "loss": 0.5018, "step": 3058 }, { "epoch": 0.09375383106534264, "grad_norm": 2.4320183971177087, "learning_rate": 9.893805239127366e-06, "loss": 0.6762, "step": 3059 }, { "epoch": 0.09378447958808385, "grad_norm": 2.3805530645323674, "learning_rate": 9.893703467668139e-06, "loss": 0.7946, "step": 3060 }, { "epoch": 0.09381512811082506, "grad_norm": 2.107261029701774, "learning_rate": 9.893601647989997e-06, "loss": 0.7825, "step": 3061 }, { "epoch": 0.09384577663356626, "grad_norm": 1.34307615809011, "learning_rate": 9.893499780093948e-06, "loss": 0.5278, "step": 3062 }, { "epoch": 0.09387642515630747, "grad_norm": 1.899362857688762, "learning_rate": 9.893397863980993e-06, "loss": 0.7351, "step": 3063 }, { "epoch": 0.09390707367904867, "grad_norm": 2.2416826012807474, "learning_rate": 9.893295899652137e-06, "loss": 0.9459, "step": 3064 }, { "epoch": 0.09393772220178988, "grad_norm": 1.015750767767657, "learning_rate": 9.893193887108385e-06, "loss": 0.5237, "step": 3065 }, { "epoch": 0.09396837072453108, "grad_norm": 1.965344979951572, "learning_rate": 9.893091826350741e-06, "loss": 0.8151, "step": 3066 }, { "epoch": 0.09399901924727228, "grad_norm": 2.280701029356865, "learning_rate": 9.892989717380211e-06, "loss": 0.8547, "step": 3067 }, { "epoch": 0.09402966777001348, "grad_norm": 2.127059399383621, "learning_rate": 9.892887560197802e-06, "loss": 0.7657, "step": 3068 }, { "epoch": 0.09406031629275469, "grad_norm": 1.004689569729217, "learning_rate": 9.892785354804519e-06, "loss": 0.5015, "step": 3069 }, { "epoch": 0.09409096481549589, "grad_norm": 2.305732839802527, "learning_rate": 9.89268310120137e-06, "loss": 0.8174, "step": 3070 }, { "epoch": 0.0941216133382371, "grad_norm": 2.063180806492626, "learning_rate": 9.892580799389364e-06, "loss": 0.8766, "step": 3071 }, { "epoch": 0.0941522618609783, "grad_norm": 2.2237382419465326, "learning_rate": 9.892478449369507e-06, "loss": 0.8899, "step": 3072 }, { "epoch": 0.09418291038371951, "grad_norm": 0.931775740022436, "learning_rate": 9.892376051142807e-06, "loss": 0.5292, "step": 3073 }, { "epoch": 0.09421355890646071, "grad_norm": 2.032611807935302, "learning_rate": 9.892273604710275e-06, "loss": 0.8422, "step": 3074 }, { "epoch": 0.0942442074292019, "grad_norm": 1.7826180569732668, "learning_rate": 9.89217111007292e-06, "loss": 0.6895, "step": 3075 }, { "epoch": 0.09427485595194311, "grad_norm": 1.9187119923650222, "learning_rate": 9.89206856723175e-06, "loss": 0.8113, "step": 3076 }, { "epoch": 0.09430550447468432, "grad_norm": 1.8216478992839453, "learning_rate": 9.891965976187778e-06, "loss": 0.8409, "step": 3077 }, { "epoch": 0.09433615299742552, "grad_norm": 1.7978049427019784, "learning_rate": 9.891863336942012e-06, "loss": 0.7052, "step": 3078 }, { "epoch": 0.09436680152016673, "grad_norm": 1.8450558510309216, "learning_rate": 9.891760649495465e-06, "loss": 0.7577, "step": 3079 }, { "epoch": 0.09439745004290793, "grad_norm": 1.904999274334601, "learning_rate": 9.89165791384915e-06, "loss": 0.8058, "step": 3080 }, { "epoch": 0.09442809856564914, "grad_norm": 1.9878184522393483, "learning_rate": 9.891555130004078e-06, "loss": 0.8118, "step": 3081 }, { "epoch": 0.09445874708839035, "grad_norm": 2.057346883525178, "learning_rate": 9.891452297961261e-06, "loss": 0.7788, "step": 3082 }, { "epoch": 0.09448939561113154, "grad_norm": 1.7762225719119498, "learning_rate": 9.891349417721713e-06, "loss": 0.7422, "step": 3083 }, { "epoch": 0.09452004413387274, "grad_norm": 1.0228753007032234, "learning_rate": 9.891246489286448e-06, "loss": 0.5226, "step": 3084 }, { "epoch": 0.09455069265661395, "grad_norm": 1.9519897559718322, "learning_rate": 9.89114351265648e-06, "loss": 0.8521, "step": 3085 }, { "epoch": 0.09458134117935516, "grad_norm": 2.213732107119467, "learning_rate": 9.891040487832824e-06, "loss": 0.7693, "step": 3086 }, { "epoch": 0.09461198970209636, "grad_norm": 1.9990540740805318, "learning_rate": 9.890937414816493e-06, "loss": 0.8033, "step": 3087 }, { "epoch": 0.09464263822483757, "grad_norm": 1.804513231643838, "learning_rate": 9.890834293608506e-06, "loss": 0.733, "step": 3088 }, { "epoch": 0.09467328674757877, "grad_norm": 2.0149275931463166, "learning_rate": 9.890731124209875e-06, "loss": 0.7566, "step": 3089 }, { "epoch": 0.09470393527031996, "grad_norm": 1.9701806024468351, "learning_rate": 9.890627906621622e-06, "loss": 0.7915, "step": 3090 }, { "epoch": 0.09473458379306117, "grad_norm": 1.9213739451871277, "learning_rate": 9.890524640844759e-06, "loss": 0.8123, "step": 3091 }, { "epoch": 0.09476523231580238, "grad_norm": 1.9487427121272856, "learning_rate": 9.890421326880306e-06, "loss": 0.6846, "step": 3092 }, { "epoch": 0.09479588083854358, "grad_norm": 1.8435513975579743, "learning_rate": 9.89031796472928e-06, "loss": 0.6791, "step": 3093 }, { "epoch": 0.09482652936128479, "grad_norm": 1.9231023651801011, "learning_rate": 9.8902145543927e-06, "loss": 0.7728, "step": 3094 }, { "epoch": 0.09485717788402599, "grad_norm": 1.305507074656505, "learning_rate": 9.890111095871584e-06, "loss": 0.5165, "step": 3095 }, { "epoch": 0.0948878264067672, "grad_norm": 2.072991295687858, "learning_rate": 9.890007589166954e-06, "loss": 0.6796, "step": 3096 }, { "epoch": 0.0949184749295084, "grad_norm": 1.8334515427323135, "learning_rate": 9.889904034279827e-06, "loss": 0.7676, "step": 3097 }, { "epoch": 0.0949491234522496, "grad_norm": 2.3119762317203225, "learning_rate": 9.889800431211224e-06, "loss": 0.8066, "step": 3098 }, { "epoch": 0.0949797719749908, "grad_norm": 1.8359676017658741, "learning_rate": 9.889696779962167e-06, "loss": 0.5975, "step": 3099 }, { "epoch": 0.095010420497732, "grad_norm": 2.004630206500889, "learning_rate": 9.889593080533675e-06, "loss": 0.7866, "step": 3100 }, { "epoch": 0.09504106902047321, "grad_norm": 1.0774832179924643, "learning_rate": 9.889489332926773e-06, "loss": 0.5289, "step": 3101 }, { "epoch": 0.09507171754321442, "grad_norm": 2.191040227936995, "learning_rate": 9.889385537142482e-06, "loss": 0.7609, "step": 3102 }, { "epoch": 0.09510236606595562, "grad_norm": 0.9079817635876531, "learning_rate": 9.889281693181823e-06, "loss": 0.4997, "step": 3103 }, { "epoch": 0.09513301458869683, "grad_norm": 2.101644022077999, "learning_rate": 9.889177801045821e-06, "loss": 0.8222, "step": 3104 }, { "epoch": 0.09516366311143803, "grad_norm": 2.1762898620928404, "learning_rate": 9.889073860735499e-06, "loss": 0.7182, "step": 3105 }, { "epoch": 0.09519431163417923, "grad_norm": 1.855633024011805, "learning_rate": 9.888969872251881e-06, "loss": 0.7311, "step": 3106 }, { "epoch": 0.09522496015692043, "grad_norm": 1.078879568707824, "learning_rate": 9.888865835595994e-06, "loss": 0.5044, "step": 3107 }, { "epoch": 0.09525560867966164, "grad_norm": 2.0448343724535296, "learning_rate": 9.888761750768858e-06, "loss": 0.7595, "step": 3108 }, { "epoch": 0.09528625720240284, "grad_norm": 2.1311234390608647, "learning_rate": 9.888657617771503e-06, "loss": 0.8242, "step": 3109 }, { "epoch": 0.09531690572514405, "grad_norm": 0.8927257330655198, "learning_rate": 9.888553436604954e-06, "loss": 0.5167, "step": 3110 }, { "epoch": 0.09534755424788526, "grad_norm": 0.8899331855328574, "learning_rate": 9.888449207270237e-06, "loss": 0.512, "step": 3111 }, { "epoch": 0.09537820277062646, "grad_norm": 1.885540235188503, "learning_rate": 9.888344929768378e-06, "loss": 0.6832, "step": 3112 }, { "epoch": 0.09540885129336767, "grad_norm": 2.264751816417359, "learning_rate": 9.888240604100407e-06, "loss": 0.8106, "step": 3113 }, { "epoch": 0.09543949981610886, "grad_norm": 2.0242456101203072, "learning_rate": 9.888136230267351e-06, "loss": 0.8261, "step": 3114 }, { "epoch": 0.09547014833885006, "grad_norm": 2.161224918375079, "learning_rate": 9.888031808270237e-06, "loss": 0.635, "step": 3115 }, { "epoch": 0.09550079686159127, "grad_norm": 1.7796970474503073, "learning_rate": 9.887927338110095e-06, "loss": 0.7119, "step": 3116 }, { "epoch": 0.09553144538433248, "grad_norm": 1.2662247653614425, "learning_rate": 9.887822819787955e-06, "loss": 0.5176, "step": 3117 }, { "epoch": 0.09556209390707368, "grad_norm": 2.135431423503074, "learning_rate": 9.887718253304847e-06, "loss": 0.6808, "step": 3118 }, { "epoch": 0.09559274242981489, "grad_norm": 2.029203656123572, "learning_rate": 9.8876136386618e-06, "loss": 0.7552, "step": 3119 }, { "epoch": 0.09562339095255609, "grad_norm": 1.8390378616231604, "learning_rate": 9.887508975859843e-06, "loss": 0.7299, "step": 3120 }, { "epoch": 0.09565403947529728, "grad_norm": 0.960875332197341, "learning_rate": 9.887404264900012e-06, "loss": 0.4997, "step": 3121 }, { "epoch": 0.09568468799803849, "grad_norm": 1.9828069127113512, "learning_rate": 9.887299505783334e-06, "loss": 0.7095, "step": 3122 }, { "epoch": 0.0957153365207797, "grad_norm": 2.1965817510581265, "learning_rate": 9.887194698510846e-06, "loss": 0.7172, "step": 3123 }, { "epoch": 0.0957459850435209, "grad_norm": 1.9049828001380131, "learning_rate": 9.887089843083577e-06, "loss": 0.7501, "step": 3124 }, { "epoch": 0.0957766335662621, "grad_norm": 2.0739160120818, "learning_rate": 9.886984939502562e-06, "loss": 0.7655, "step": 3125 }, { "epoch": 0.09580728208900331, "grad_norm": 1.7362355106912433, "learning_rate": 9.886879987768833e-06, "loss": 0.7289, "step": 3126 }, { "epoch": 0.09583793061174452, "grad_norm": 1.8871283027268784, "learning_rate": 9.886774987883426e-06, "loss": 0.7786, "step": 3127 }, { "epoch": 0.09586857913448572, "grad_norm": 2.2580630918740914, "learning_rate": 9.886669939847373e-06, "loss": 0.7668, "step": 3128 }, { "epoch": 0.09589922765722692, "grad_norm": 2.07137866589592, "learning_rate": 9.886564843661713e-06, "loss": 0.7766, "step": 3129 }, { "epoch": 0.09592987617996812, "grad_norm": 1.1760084331272502, "learning_rate": 9.886459699327478e-06, "loss": 0.5012, "step": 3130 }, { "epoch": 0.09596052470270933, "grad_norm": 2.2474347051137893, "learning_rate": 9.886354506845706e-06, "loss": 0.7593, "step": 3131 }, { "epoch": 0.09599117322545053, "grad_norm": 2.016696818197017, "learning_rate": 9.886249266217432e-06, "loss": 0.7475, "step": 3132 }, { "epoch": 0.09602182174819174, "grad_norm": 1.8443705103947232, "learning_rate": 9.886143977443694e-06, "loss": 0.7756, "step": 3133 }, { "epoch": 0.09605247027093294, "grad_norm": 1.969644942978252, "learning_rate": 9.886038640525531e-06, "loss": 0.7741, "step": 3134 }, { "epoch": 0.09608311879367415, "grad_norm": 1.8956639124688641, "learning_rate": 9.885933255463978e-06, "loss": 0.7679, "step": 3135 }, { "epoch": 0.09611376731641535, "grad_norm": 2.023725536939127, "learning_rate": 9.885827822260073e-06, "loss": 0.7748, "step": 3136 }, { "epoch": 0.09614441583915655, "grad_norm": 2.1340901126688254, "learning_rate": 9.885722340914857e-06, "loss": 0.7719, "step": 3137 }, { "epoch": 0.09617506436189775, "grad_norm": 1.0941695559545437, "learning_rate": 9.88561681142937e-06, "loss": 0.5284, "step": 3138 }, { "epoch": 0.09620571288463896, "grad_norm": 2.360018517790863, "learning_rate": 9.88551123380465e-06, "loss": 0.7866, "step": 3139 }, { "epoch": 0.09623636140738016, "grad_norm": 2.001019546668572, "learning_rate": 9.885405608041738e-06, "loss": 0.8982, "step": 3140 }, { "epoch": 0.09626700993012137, "grad_norm": 1.743282854477548, "learning_rate": 9.885299934141674e-06, "loss": 0.8399, "step": 3141 }, { "epoch": 0.09629765845286258, "grad_norm": 0.9127530759771358, "learning_rate": 9.885194212105498e-06, "loss": 0.5022, "step": 3142 }, { "epoch": 0.09632830697560378, "grad_norm": 1.883934417878318, "learning_rate": 9.885088441934257e-06, "loss": 0.7603, "step": 3143 }, { "epoch": 0.09635895549834499, "grad_norm": 1.8772890553821155, "learning_rate": 9.884982623628987e-06, "loss": 0.7297, "step": 3144 }, { "epoch": 0.09638960402108618, "grad_norm": 2.0639564985082473, "learning_rate": 9.884876757190736e-06, "loss": 0.7847, "step": 3145 }, { "epoch": 0.09642025254382738, "grad_norm": 2.4254010058073936, "learning_rate": 9.884770842620541e-06, "loss": 0.8096, "step": 3146 }, { "epoch": 0.09645090106656859, "grad_norm": 0.9544661373127116, "learning_rate": 9.884664879919452e-06, "loss": 0.5119, "step": 3147 }, { "epoch": 0.0964815495893098, "grad_norm": 1.94302472192901, "learning_rate": 9.88455886908851e-06, "loss": 0.7738, "step": 3148 }, { "epoch": 0.096512198112051, "grad_norm": 1.8986480881130983, "learning_rate": 9.884452810128757e-06, "loss": 0.5832, "step": 3149 }, { "epoch": 0.0965428466347922, "grad_norm": 1.835629283485844, "learning_rate": 9.884346703041243e-06, "loss": 0.6869, "step": 3150 }, { "epoch": 0.09657349515753341, "grad_norm": 1.9596889728030968, "learning_rate": 9.88424054782701e-06, "loss": 0.8015, "step": 3151 }, { "epoch": 0.0966041436802746, "grad_norm": 1.9577383839908424, "learning_rate": 9.884134344487106e-06, "loss": 0.7906, "step": 3152 }, { "epoch": 0.09663479220301581, "grad_norm": 1.9887874349626393, "learning_rate": 9.884028093022577e-06, "loss": 0.7341, "step": 3153 }, { "epoch": 0.09666544072575702, "grad_norm": 1.8460487455767838, "learning_rate": 9.88392179343447e-06, "loss": 0.6699, "step": 3154 }, { "epoch": 0.09669608924849822, "grad_norm": 1.970601467745264, "learning_rate": 9.88381544572383e-06, "loss": 0.7812, "step": 3155 }, { "epoch": 0.09672673777123943, "grad_norm": 0.9996326784576466, "learning_rate": 9.883709049891709e-06, "loss": 0.5151, "step": 3156 }, { "epoch": 0.09675738629398063, "grad_norm": 1.8210783678889526, "learning_rate": 9.883602605939151e-06, "loss": 0.7054, "step": 3157 }, { "epoch": 0.09678803481672184, "grad_norm": 2.1411798110278752, "learning_rate": 9.883496113867209e-06, "loss": 0.8208, "step": 3158 }, { "epoch": 0.09681868333946304, "grad_norm": 1.7605945795994768, "learning_rate": 9.883389573676929e-06, "loss": 0.8128, "step": 3159 }, { "epoch": 0.09684933186220424, "grad_norm": 1.745836413964514, "learning_rate": 9.883282985369362e-06, "loss": 0.6826, "step": 3160 }, { "epoch": 0.09687998038494544, "grad_norm": 1.8639985818750056, "learning_rate": 9.88317634894556e-06, "loss": 0.7358, "step": 3161 }, { "epoch": 0.09691062890768665, "grad_norm": 2.004590921901454, "learning_rate": 9.883069664406571e-06, "loss": 0.7167, "step": 3162 }, { "epoch": 0.09694127743042785, "grad_norm": 0.8706534899914451, "learning_rate": 9.882962931753446e-06, "loss": 0.5057, "step": 3163 }, { "epoch": 0.09697192595316906, "grad_norm": 1.9123380768522737, "learning_rate": 9.88285615098724e-06, "loss": 0.7448, "step": 3164 }, { "epoch": 0.09700257447591026, "grad_norm": 1.719084803459663, "learning_rate": 9.882749322109002e-06, "loss": 0.794, "step": 3165 }, { "epoch": 0.09703322299865147, "grad_norm": 1.9730991698784919, "learning_rate": 9.882642445119784e-06, "loss": 0.6757, "step": 3166 }, { "epoch": 0.09706387152139268, "grad_norm": 2.021802651029125, "learning_rate": 9.882535520020641e-06, "loss": 0.9024, "step": 3167 }, { "epoch": 0.09709452004413387, "grad_norm": 1.8791866763738625, "learning_rate": 9.88242854681263e-06, "loss": 0.787, "step": 3168 }, { "epoch": 0.09712516856687507, "grad_norm": 1.946588997564343, "learning_rate": 9.882321525496799e-06, "loss": 0.7028, "step": 3169 }, { "epoch": 0.09715581708961628, "grad_norm": 1.957075250550083, "learning_rate": 9.882214456074204e-06, "loss": 0.8063, "step": 3170 }, { "epoch": 0.09718646561235748, "grad_norm": 1.0202594536853047, "learning_rate": 9.882107338545902e-06, "loss": 0.5317, "step": 3171 }, { "epoch": 0.09721711413509869, "grad_norm": 1.879528320031204, "learning_rate": 9.882000172912946e-06, "loss": 0.7898, "step": 3172 }, { "epoch": 0.0972477626578399, "grad_norm": 0.8580450193614779, "learning_rate": 9.881892959176394e-06, "loss": 0.5137, "step": 3173 }, { "epoch": 0.0972784111805811, "grad_norm": 1.7688457361696521, "learning_rate": 9.8817856973373e-06, "loss": 0.769, "step": 3174 }, { "epoch": 0.0973090597033223, "grad_norm": 1.913719303835709, "learning_rate": 9.881678387396724e-06, "loss": 0.7216, "step": 3175 }, { "epoch": 0.0973397082260635, "grad_norm": 2.880182839379903, "learning_rate": 9.881571029355724e-06, "loss": 0.8087, "step": 3176 }, { "epoch": 0.0973703567488047, "grad_norm": 1.8739205845765692, "learning_rate": 9.881463623215352e-06, "loss": 0.7341, "step": 3177 }, { "epoch": 0.09740100527154591, "grad_norm": 2.0294702946810843, "learning_rate": 9.88135616897667e-06, "loss": 0.7623, "step": 3178 }, { "epoch": 0.09743165379428712, "grad_norm": 2.242445423686008, "learning_rate": 9.88124866664074e-06, "loss": 0.8428, "step": 3179 }, { "epoch": 0.09746230231702832, "grad_norm": 2.08721651838326, "learning_rate": 9.881141116208614e-06, "loss": 0.7868, "step": 3180 }, { "epoch": 0.09749295083976953, "grad_norm": 1.7510215468263186, "learning_rate": 9.88103351768136e-06, "loss": 0.6951, "step": 3181 }, { "epoch": 0.09752359936251073, "grad_norm": 1.9081783433671262, "learning_rate": 9.88092587106003e-06, "loss": 0.8246, "step": 3182 }, { "epoch": 0.09755424788525192, "grad_norm": 2.180843408756449, "learning_rate": 9.88081817634569e-06, "loss": 0.7097, "step": 3183 }, { "epoch": 0.09758489640799313, "grad_norm": 2.151772218546445, "learning_rate": 9.8807104335394e-06, "loss": 0.6847, "step": 3184 }, { "epoch": 0.09761554493073434, "grad_norm": 2.0825450609906375, "learning_rate": 9.88060264264222e-06, "loss": 0.7928, "step": 3185 }, { "epoch": 0.09764619345347554, "grad_norm": 2.0964817187516074, "learning_rate": 9.880494803655216e-06, "loss": 0.7167, "step": 3186 }, { "epoch": 0.09767684197621675, "grad_norm": 1.8974306783112393, "learning_rate": 9.880386916579446e-06, "loss": 0.7316, "step": 3187 }, { "epoch": 0.09770749049895795, "grad_norm": 1.8018454447100123, "learning_rate": 9.880278981415975e-06, "loss": 0.7595, "step": 3188 }, { "epoch": 0.09773813902169916, "grad_norm": 2.269916958650838, "learning_rate": 9.880170998165868e-06, "loss": 0.7981, "step": 3189 }, { "epoch": 0.09776878754444036, "grad_norm": 2.034776139013544, "learning_rate": 9.880062966830186e-06, "loss": 0.847, "step": 3190 }, { "epoch": 0.09779943606718156, "grad_norm": 2.0148172986083486, "learning_rate": 9.879954887409996e-06, "loss": 0.7871, "step": 3191 }, { "epoch": 0.09783008458992276, "grad_norm": 2.0063430637094775, "learning_rate": 9.879846759906361e-06, "loss": 0.6791, "step": 3192 }, { "epoch": 0.09786073311266397, "grad_norm": 1.5690474620393513, "learning_rate": 9.87973858432035e-06, "loss": 0.6968, "step": 3193 }, { "epoch": 0.09789138163540517, "grad_norm": 1.896185144749186, "learning_rate": 9.879630360653022e-06, "loss": 0.7865, "step": 3194 }, { "epoch": 0.09792203015814638, "grad_norm": 1.2319826568718453, "learning_rate": 9.879522088905448e-06, "loss": 0.5177, "step": 3195 }, { "epoch": 0.09795267868088758, "grad_norm": 2.155809411328702, "learning_rate": 9.879413769078697e-06, "loss": 0.7917, "step": 3196 }, { "epoch": 0.09798332720362879, "grad_norm": 2.1244073602513986, "learning_rate": 9.879305401173832e-06, "loss": 0.7365, "step": 3197 }, { "epoch": 0.09801397572637, "grad_norm": 0.901133685798479, "learning_rate": 9.879196985191923e-06, "loss": 0.5313, "step": 3198 }, { "epoch": 0.09804462424911119, "grad_norm": 2.1641980841787802, "learning_rate": 9.87908852113404e-06, "loss": 0.8165, "step": 3199 }, { "epoch": 0.09807527277185239, "grad_norm": 1.9078519011508377, "learning_rate": 9.878980009001245e-06, "loss": 0.7285, "step": 3200 }, { "epoch": 0.0981059212945936, "grad_norm": 1.9217851238125245, "learning_rate": 9.878871448794615e-06, "loss": 0.7739, "step": 3201 }, { "epoch": 0.0981365698173348, "grad_norm": 1.0518819079195778, "learning_rate": 9.878762840515215e-06, "loss": 0.5086, "step": 3202 }, { "epoch": 0.09816721834007601, "grad_norm": 2.0278005087145377, "learning_rate": 9.878654184164116e-06, "loss": 0.7691, "step": 3203 }, { "epoch": 0.09819786686281722, "grad_norm": 0.9283745697957672, "learning_rate": 9.87854547974239e-06, "loss": 0.519, "step": 3204 }, { "epoch": 0.09822851538555842, "grad_norm": 1.8760075800902942, "learning_rate": 9.878436727251106e-06, "loss": 0.7133, "step": 3205 }, { "epoch": 0.09825916390829963, "grad_norm": 0.8624524541652753, "learning_rate": 9.878327926691338e-06, "loss": 0.5092, "step": 3206 }, { "epoch": 0.09828981243104082, "grad_norm": 2.0746144309209407, "learning_rate": 9.878219078064156e-06, "loss": 0.7634, "step": 3207 }, { "epoch": 0.09832046095378202, "grad_norm": 2.1299586293615276, "learning_rate": 9.878110181370634e-06, "loss": 0.707, "step": 3208 }, { "epoch": 0.09835110947652323, "grad_norm": 2.052442168661215, "learning_rate": 9.878001236611842e-06, "loss": 0.7332, "step": 3209 }, { "epoch": 0.09838175799926444, "grad_norm": 1.7587214488179168, "learning_rate": 9.877892243788858e-06, "loss": 0.7768, "step": 3210 }, { "epoch": 0.09841240652200564, "grad_norm": 1.9466496664165014, "learning_rate": 9.877783202902754e-06, "loss": 0.7572, "step": 3211 }, { "epoch": 0.09844305504474685, "grad_norm": 2.0916118445604974, "learning_rate": 9.877674113954603e-06, "loss": 0.7069, "step": 3212 }, { "epoch": 0.09847370356748805, "grad_norm": 1.15069458655777, "learning_rate": 9.877564976945482e-06, "loss": 0.5156, "step": 3213 }, { "epoch": 0.09850435209022924, "grad_norm": 2.006388524480147, "learning_rate": 9.877455791876464e-06, "loss": 0.7848, "step": 3214 }, { "epoch": 0.09853500061297045, "grad_norm": 2.038119925949998, "learning_rate": 9.877346558748626e-06, "loss": 0.6634, "step": 3215 }, { "epoch": 0.09856564913571166, "grad_norm": 1.9342967158314261, "learning_rate": 9.877237277563046e-06, "loss": 0.725, "step": 3216 }, { "epoch": 0.09859629765845286, "grad_norm": 1.9653537254438755, "learning_rate": 9.877127948320798e-06, "loss": 0.7376, "step": 3217 }, { "epoch": 0.09862694618119407, "grad_norm": 1.9659671125987035, "learning_rate": 9.87701857102296e-06, "loss": 0.8113, "step": 3218 }, { "epoch": 0.09865759470393527, "grad_norm": 0.9374740099067715, "learning_rate": 9.876909145670612e-06, "loss": 0.5091, "step": 3219 }, { "epoch": 0.09868824322667648, "grad_norm": 1.7781949543840276, "learning_rate": 9.876799672264828e-06, "loss": 0.7055, "step": 3220 }, { "epoch": 0.09871889174941768, "grad_norm": 2.0423266896493364, "learning_rate": 9.876690150806692e-06, "loss": 0.9437, "step": 3221 }, { "epoch": 0.09874954027215888, "grad_norm": 2.0004263402224134, "learning_rate": 9.876580581297277e-06, "loss": 0.7698, "step": 3222 }, { "epoch": 0.09878018879490008, "grad_norm": 2.009042189179956, "learning_rate": 9.876470963737667e-06, "loss": 0.7774, "step": 3223 }, { "epoch": 0.09881083731764129, "grad_norm": 1.8094690977907182, "learning_rate": 9.876361298128942e-06, "loss": 0.7168, "step": 3224 }, { "epoch": 0.09884148584038249, "grad_norm": 1.0775869869758814, "learning_rate": 9.876251584472181e-06, "loss": 0.5085, "step": 3225 }, { "epoch": 0.0988721343631237, "grad_norm": 1.0249273878587346, "learning_rate": 9.876141822768464e-06, "loss": 0.5244, "step": 3226 }, { "epoch": 0.0989027828858649, "grad_norm": 1.8299871154790213, "learning_rate": 9.876032013018875e-06, "loss": 0.6791, "step": 3227 }, { "epoch": 0.09893343140860611, "grad_norm": 1.9244729840576908, "learning_rate": 9.875922155224495e-06, "loss": 0.8648, "step": 3228 }, { "epoch": 0.09896407993134732, "grad_norm": 2.1330491830475924, "learning_rate": 9.875812249386407e-06, "loss": 0.7902, "step": 3229 }, { "epoch": 0.09899472845408851, "grad_norm": 2.2492504624816556, "learning_rate": 9.875702295505694e-06, "loss": 0.8205, "step": 3230 }, { "epoch": 0.09902537697682971, "grad_norm": 1.9587598453939024, "learning_rate": 9.875592293583438e-06, "loss": 0.761, "step": 3231 }, { "epoch": 0.09905602549957092, "grad_norm": 2.263616978296009, "learning_rate": 9.875482243620722e-06, "loss": 0.7683, "step": 3232 }, { "epoch": 0.09908667402231212, "grad_norm": 2.0315963841288265, "learning_rate": 9.875372145618633e-06, "loss": 0.6955, "step": 3233 }, { "epoch": 0.09911732254505333, "grad_norm": 1.8582954571320818, "learning_rate": 9.875261999578257e-06, "loss": 0.7517, "step": 3234 }, { "epoch": 0.09914797106779454, "grad_norm": 1.6153113574381734, "learning_rate": 9.875151805500675e-06, "loss": 0.5115, "step": 3235 }, { "epoch": 0.09917861959053574, "grad_norm": 1.3375701805620093, "learning_rate": 9.875041563386975e-06, "loss": 0.5179, "step": 3236 }, { "epoch": 0.09920926811327695, "grad_norm": 2.151514070330498, "learning_rate": 9.874931273238244e-06, "loss": 0.8259, "step": 3237 }, { "epoch": 0.09923991663601814, "grad_norm": 2.005925588374603, "learning_rate": 9.874820935055566e-06, "loss": 0.7804, "step": 3238 }, { "epoch": 0.09927056515875934, "grad_norm": 2.2280053048189914, "learning_rate": 9.874710548840032e-06, "loss": 0.7073, "step": 3239 }, { "epoch": 0.09930121368150055, "grad_norm": 2.0641089223358176, "learning_rate": 9.874600114592728e-06, "loss": 0.8326, "step": 3240 }, { "epoch": 0.09933186220424176, "grad_norm": 1.7671291535116802, "learning_rate": 9.87448963231474e-06, "loss": 0.8363, "step": 3241 }, { "epoch": 0.09936251072698296, "grad_norm": 1.9251167787675554, "learning_rate": 9.874379102007159e-06, "loss": 0.7227, "step": 3242 }, { "epoch": 0.09939315924972417, "grad_norm": 1.9566117854644487, "learning_rate": 9.874268523671074e-06, "loss": 0.7429, "step": 3243 }, { "epoch": 0.09942380777246537, "grad_norm": 1.9422960535065812, "learning_rate": 9.874157897307575e-06, "loss": 0.7377, "step": 3244 }, { "epoch": 0.09945445629520656, "grad_norm": 2.36596129949425, "learning_rate": 9.87404722291775e-06, "loss": 0.5547, "step": 3245 }, { "epoch": 0.09948510481794777, "grad_norm": 2.381953260260112, "learning_rate": 9.87393650050269e-06, "loss": 0.7536, "step": 3246 }, { "epoch": 0.09951575334068898, "grad_norm": 2.1931599278106555, "learning_rate": 9.873825730063488e-06, "loss": 0.7966, "step": 3247 }, { "epoch": 0.09954640186343018, "grad_norm": 1.018274146684996, "learning_rate": 9.873714911601234e-06, "loss": 0.5152, "step": 3248 }, { "epoch": 0.09957705038617139, "grad_norm": 1.839953225961578, "learning_rate": 9.873604045117018e-06, "loss": 0.692, "step": 3249 }, { "epoch": 0.09960769890891259, "grad_norm": 2.08708066188575, "learning_rate": 9.873493130611937e-06, "loss": 0.7683, "step": 3250 }, { "epoch": 0.0996383474316538, "grad_norm": 1.8561721977102414, "learning_rate": 9.87338216808708e-06, "loss": 0.8214, "step": 3251 }, { "epoch": 0.099668995954395, "grad_norm": 2.237482391162869, "learning_rate": 9.87327115754354e-06, "loss": 0.8687, "step": 3252 }, { "epoch": 0.0996996444771362, "grad_norm": 1.9466179651202504, "learning_rate": 9.873160098982415e-06, "loss": 0.8375, "step": 3253 }, { "epoch": 0.0997302929998774, "grad_norm": 1.9675676150136092, "learning_rate": 9.873048992404795e-06, "loss": 0.7838, "step": 3254 }, { "epoch": 0.0997609415226186, "grad_norm": 1.9212794873699468, "learning_rate": 9.872937837811778e-06, "loss": 0.6264, "step": 3255 }, { "epoch": 0.09979159004535981, "grad_norm": 2.4765108726741483, "learning_rate": 9.872826635204457e-06, "loss": 0.7965, "step": 3256 }, { "epoch": 0.09982223856810102, "grad_norm": 2.073278478589844, "learning_rate": 9.872715384583928e-06, "loss": 0.7432, "step": 3257 }, { "epoch": 0.09985288709084222, "grad_norm": 2.485815008419135, "learning_rate": 9.872604085951288e-06, "loss": 0.909, "step": 3258 }, { "epoch": 0.09988353561358343, "grad_norm": 1.9006286576993616, "learning_rate": 9.872492739307633e-06, "loss": 0.7789, "step": 3259 }, { "epoch": 0.09991418413632464, "grad_norm": 2.148361198660419, "learning_rate": 9.87238134465406e-06, "loss": 0.7066, "step": 3260 }, { "epoch": 0.09994483265906583, "grad_norm": 1.7593300740047142, "learning_rate": 9.872269901991668e-06, "loss": 0.7627, "step": 3261 }, { "epoch": 0.09997548118180703, "grad_norm": 2.155733006019818, "learning_rate": 9.872158411321552e-06, "loss": 0.873, "step": 3262 }, { "epoch": 0.10000612970454824, "grad_norm": 1.8733920795522887, "learning_rate": 9.872046872644815e-06, "loss": 0.7688, "step": 3263 }, { "epoch": 0.10003677822728944, "grad_norm": 2.0476401620364846, "learning_rate": 9.871935285962553e-06, "loss": 0.7024, "step": 3264 }, { "epoch": 0.10006742675003065, "grad_norm": 2.1200100733723692, "learning_rate": 9.871823651275865e-06, "loss": 0.7612, "step": 3265 }, { "epoch": 0.10009807527277186, "grad_norm": 2.1657792618663945, "learning_rate": 9.871711968585854e-06, "loss": 0.7458, "step": 3266 }, { "epoch": 0.10012872379551306, "grad_norm": 2.0303334175294676, "learning_rate": 9.871600237893617e-06, "loss": 0.8692, "step": 3267 }, { "epoch": 0.10015937231825427, "grad_norm": 1.9129794393179627, "learning_rate": 9.871488459200256e-06, "loss": 0.6889, "step": 3268 }, { "epoch": 0.10019002084099546, "grad_norm": 1.9043329749788893, "learning_rate": 9.871376632506872e-06, "loss": 0.7825, "step": 3269 }, { "epoch": 0.10022066936373666, "grad_norm": 1.982639948488753, "learning_rate": 9.87126475781457e-06, "loss": 0.7288, "step": 3270 }, { "epoch": 0.10025131788647787, "grad_norm": 2.2202283022302187, "learning_rate": 9.871152835124448e-06, "loss": 0.5604, "step": 3271 }, { "epoch": 0.10028196640921908, "grad_norm": 1.9745534427125302, "learning_rate": 9.871040864437613e-06, "loss": 0.8308, "step": 3272 }, { "epoch": 0.10031261493196028, "grad_norm": 2.4965000758145073, "learning_rate": 9.870928845755165e-06, "loss": 0.8974, "step": 3273 }, { "epoch": 0.10034326345470149, "grad_norm": 2.061319010007779, "learning_rate": 9.870816779078209e-06, "loss": 0.7094, "step": 3274 }, { "epoch": 0.10037391197744269, "grad_norm": 2.154273548526055, "learning_rate": 9.870704664407849e-06, "loss": 0.8112, "step": 3275 }, { "epoch": 0.10040456050018388, "grad_norm": 2.0980997102763435, "learning_rate": 9.870592501745189e-06, "loss": 0.8532, "step": 3276 }, { "epoch": 0.10043520902292509, "grad_norm": 1.8582525863970936, "learning_rate": 9.870480291091336e-06, "loss": 0.7667, "step": 3277 }, { "epoch": 0.1004658575456663, "grad_norm": 1.2895449947610842, "learning_rate": 9.870368032447393e-06, "loss": 0.5396, "step": 3278 }, { "epoch": 0.1004965060684075, "grad_norm": 1.8903999085205088, "learning_rate": 9.870255725814468e-06, "loss": 0.7487, "step": 3279 }, { "epoch": 0.1005271545911487, "grad_norm": 2.4476305814779735, "learning_rate": 9.870143371193668e-06, "loss": 0.7843, "step": 3280 }, { "epoch": 0.10055780311388991, "grad_norm": 1.9749681747092513, "learning_rate": 9.8700309685861e-06, "loss": 0.7796, "step": 3281 }, { "epoch": 0.10058845163663112, "grad_norm": 2.3532050997378047, "learning_rate": 9.86991851799287e-06, "loss": 0.7091, "step": 3282 }, { "epoch": 0.10061910015937232, "grad_norm": 1.0226076050991484, "learning_rate": 9.869806019415086e-06, "loss": 0.5011, "step": 3283 }, { "epoch": 0.10064974868211352, "grad_norm": 1.8361525128355767, "learning_rate": 9.869693472853858e-06, "loss": 0.7136, "step": 3284 }, { "epoch": 0.10068039720485472, "grad_norm": 2.1129954135872744, "learning_rate": 9.869580878310294e-06, "loss": 0.7474, "step": 3285 }, { "epoch": 0.10071104572759593, "grad_norm": 2.029249903712124, "learning_rate": 9.869468235785504e-06, "loss": 0.7976, "step": 3286 }, { "epoch": 0.10074169425033713, "grad_norm": 1.943745820663761, "learning_rate": 9.869355545280596e-06, "loss": 0.7509, "step": 3287 }, { "epoch": 0.10077234277307834, "grad_norm": 1.677609612556868, "learning_rate": 9.869242806796684e-06, "loss": 0.6448, "step": 3288 }, { "epoch": 0.10080299129581954, "grad_norm": 2.1251576686102225, "learning_rate": 9.869130020334876e-06, "loss": 0.7829, "step": 3289 }, { "epoch": 0.10083363981856075, "grad_norm": 1.9981924363323105, "learning_rate": 9.869017185896284e-06, "loss": 0.7417, "step": 3290 }, { "epoch": 0.10086428834130196, "grad_norm": 1.696036958822324, "learning_rate": 9.86890430348202e-06, "loss": 0.8902, "step": 3291 }, { "epoch": 0.10089493686404315, "grad_norm": 1.909475919363897, "learning_rate": 9.868791373093197e-06, "loss": 0.7679, "step": 3292 }, { "epoch": 0.10092558538678435, "grad_norm": 2.1826872843462017, "learning_rate": 9.868678394730925e-06, "loss": 0.76, "step": 3293 }, { "epoch": 0.10095623390952556, "grad_norm": 2.1446784573691353, "learning_rate": 9.868565368396321e-06, "loss": 0.6375, "step": 3294 }, { "epoch": 0.10098688243226676, "grad_norm": 1.9721750626928225, "learning_rate": 9.868452294090496e-06, "loss": 0.7277, "step": 3295 }, { "epoch": 0.10101753095500797, "grad_norm": 2.0206127079673184, "learning_rate": 9.868339171814565e-06, "loss": 0.8368, "step": 3296 }, { "epoch": 0.10104817947774918, "grad_norm": 1.7151611633186716, "learning_rate": 9.868226001569643e-06, "loss": 0.7622, "step": 3297 }, { "epoch": 0.10107882800049038, "grad_norm": 1.9544837982624697, "learning_rate": 9.868112783356843e-06, "loss": 0.6962, "step": 3298 }, { "epoch": 0.10110947652323159, "grad_norm": 2.047702220852074, "learning_rate": 9.867999517177284e-06, "loss": 0.8418, "step": 3299 }, { "epoch": 0.10114012504597278, "grad_norm": 2.1423074024163427, "learning_rate": 9.867886203032079e-06, "loss": 0.7779, "step": 3300 }, { "epoch": 0.10117077356871398, "grad_norm": 2.127523287193254, "learning_rate": 9.867772840922346e-06, "loss": 0.793, "step": 3301 }, { "epoch": 0.10120142209145519, "grad_norm": 2.0753149544134137, "learning_rate": 9.8676594308492e-06, "loss": 0.8097, "step": 3302 }, { "epoch": 0.1012320706141964, "grad_norm": 2.168458418442501, "learning_rate": 9.867545972813763e-06, "loss": 0.7917, "step": 3303 }, { "epoch": 0.1012627191369376, "grad_norm": 2.035512955757485, "learning_rate": 9.867432466817151e-06, "loss": 0.8014, "step": 3304 }, { "epoch": 0.1012933676596788, "grad_norm": 1.9088334710993347, "learning_rate": 9.867318912860479e-06, "loss": 0.6566, "step": 3305 }, { "epoch": 0.10132401618242001, "grad_norm": 2.285234916054993, "learning_rate": 9.867205310944868e-06, "loss": 0.7716, "step": 3306 }, { "epoch": 0.1013546647051612, "grad_norm": 2.314217686729217, "learning_rate": 9.867091661071439e-06, "loss": 0.8563, "step": 3307 }, { "epoch": 0.10138531322790241, "grad_norm": 1.7159920579219774, "learning_rate": 9.866977963241312e-06, "loss": 0.7063, "step": 3308 }, { "epoch": 0.10141596175064362, "grad_norm": 2.052521889143315, "learning_rate": 9.866864217455603e-06, "loss": 0.7328, "step": 3309 }, { "epoch": 0.10144661027338482, "grad_norm": 1.9656363582150504, "learning_rate": 9.866750423715437e-06, "loss": 0.7762, "step": 3310 }, { "epoch": 0.10147725879612603, "grad_norm": 2.0864739504126226, "learning_rate": 9.866636582021934e-06, "loss": 0.8276, "step": 3311 }, { "epoch": 0.10150790731886723, "grad_norm": 1.9502741170786484, "learning_rate": 9.866522692376215e-06, "loss": 0.775, "step": 3312 }, { "epoch": 0.10153855584160844, "grad_norm": 1.0238737071083501, "learning_rate": 9.866408754779402e-06, "loss": 0.5294, "step": 3313 }, { "epoch": 0.10156920436434964, "grad_norm": 0.9294288968472466, "learning_rate": 9.866294769232621e-06, "loss": 0.5125, "step": 3314 }, { "epoch": 0.10159985288709084, "grad_norm": 2.0859998740421637, "learning_rate": 9.86618073573699e-06, "loss": 0.8018, "step": 3315 }, { "epoch": 0.10163050140983204, "grad_norm": 1.0587081351925636, "learning_rate": 9.866066654293635e-06, "loss": 0.531, "step": 3316 }, { "epoch": 0.10166114993257325, "grad_norm": 1.7674176048978776, "learning_rate": 9.865952524903682e-06, "loss": 0.6435, "step": 3317 }, { "epoch": 0.10169179845531445, "grad_norm": 0.869045350132348, "learning_rate": 9.865838347568252e-06, "loss": 0.5365, "step": 3318 }, { "epoch": 0.10172244697805566, "grad_norm": 1.9780281596633964, "learning_rate": 9.865724122288474e-06, "loss": 0.8403, "step": 3319 }, { "epoch": 0.10175309550079686, "grad_norm": 1.9046770451025408, "learning_rate": 9.865609849065471e-06, "loss": 0.778, "step": 3320 }, { "epoch": 0.10178374402353807, "grad_norm": 2.316741996834157, "learning_rate": 9.865495527900369e-06, "loss": 0.8801, "step": 3321 }, { "epoch": 0.10181439254627928, "grad_norm": 1.77023889417196, "learning_rate": 9.865381158794293e-06, "loss": 0.753, "step": 3322 }, { "epoch": 0.10184504106902047, "grad_norm": 2.028515833875039, "learning_rate": 9.865266741748372e-06, "loss": 0.7805, "step": 3323 }, { "epoch": 0.10187568959176167, "grad_norm": 1.9888453501065764, "learning_rate": 9.865152276763735e-06, "loss": 0.7482, "step": 3324 }, { "epoch": 0.10190633811450288, "grad_norm": 1.9884453002949687, "learning_rate": 9.865037763841505e-06, "loss": 0.6361, "step": 3325 }, { "epoch": 0.10193698663724408, "grad_norm": 1.2465582785383895, "learning_rate": 9.864923202982815e-06, "loss": 0.5212, "step": 3326 }, { "epoch": 0.10196763515998529, "grad_norm": 1.7189831875676653, "learning_rate": 9.864808594188792e-06, "loss": 0.7746, "step": 3327 }, { "epoch": 0.1019982836827265, "grad_norm": 2.1702212944950072, "learning_rate": 9.864693937460565e-06, "loss": 0.741, "step": 3328 }, { "epoch": 0.1020289322054677, "grad_norm": 2.0236660714091905, "learning_rate": 9.864579232799263e-06, "loss": 0.8418, "step": 3329 }, { "epoch": 0.1020595807282089, "grad_norm": 1.7826780103267497, "learning_rate": 9.864464480206017e-06, "loss": 0.7443, "step": 3330 }, { "epoch": 0.1020902292509501, "grad_norm": 2.048389088130823, "learning_rate": 9.86434967968196e-06, "loss": 0.8108, "step": 3331 }, { "epoch": 0.1021208777736913, "grad_norm": 1.793435549234387, "learning_rate": 9.864234831228218e-06, "loss": 0.77, "step": 3332 }, { "epoch": 0.10215152629643251, "grad_norm": 1.8697426120305034, "learning_rate": 9.864119934845928e-06, "loss": 0.7568, "step": 3333 }, { "epoch": 0.10218217481917372, "grad_norm": 1.1247384604433528, "learning_rate": 9.86400499053622e-06, "loss": 0.5337, "step": 3334 }, { "epoch": 0.10221282334191492, "grad_norm": 0.9664996484315944, "learning_rate": 9.863889998300225e-06, "loss": 0.5073, "step": 3335 }, { "epoch": 0.10224347186465613, "grad_norm": 2.0986165254267846, "learning_rate": 9.863774958139078e-06, "loss": 0.759, "step": 3336 }, { "epoch": 0.10227412038739733, "grad_norm": 1.8849468042765154, "learning_rate": 9.863659870053912e-06, "loss": 0.8399, "step": 3337 }, { "epoch": 0.10230476891013854, "grad_norm": 2.0364647422751374, "learning_rate": 9.86354473404586e-06, "loss": 0.8049, "step": 3338 }, { "epoch": 0.10233541743287973, "grad_norm": 2.084005392014446, "learning_rate": 9.863429550116056e-06, "loss": 0.785, "step": 3339 }, { "epoch": 0.10236606595562094, "grad_norm": 1.9506389308905057, "learning_rate": 9.86331431826564e-06, "loss": 0.8099, "step": 3340 }, { "epoch": 0.10239671447836214, "grad_norm": 1.9876772446061046, "learning_rate": 9.863199038495741e-06, "loss": 0.7491, "step": 3341 }, { "epoch": 0.10242736300110335, "grad_norm": 1.7618386908108465, "learning_rate": 9.8630837108075e-06, "loss": 0.6849, "step": 3342 }, { "epoch": 0.10245801152384455, "grad_norm": 1.334403973050647, "learning_rate": 9.862968335202048e-06, "loss": 0.5423, "step": 3343 }, { "epoch": 0.10248866004658576, "grad_norm": 1.8877156589099788, "learning_rate": 9.862852911680527e-06, "loss": 0.8023, "step": 3344 }, { "epoch": 0.10251930856932696, "grad_norm": 2.0550252049257307, "learning_rate": 9.86273744024407e-06, "loss": 0.7626, "step": 3345 }, { "epoch": 0.10254995709206816, "grad_norm": 2.239664601669099, "learning_rate": 9.862621920893817e-06, "loss": 0.7011, "step": 3346 }, { "epoch": 0.10258060561480936, "grad_norm": 1.8063846513397017, "learning_rate": 9.862506353630908e-06, "loss": 0.707, "step": 3347 }, { "epoch": 0.10261125413755057, "grad_norm": 1.7754205051332632, "learning_rate": 9.86239073845648e-06, "loss": 0.733, "step": 3348 }, { "epoch": 0.10264190266029177, "grad_norm": 1.752841479592276, "learning_rate": 9.86227507537167e-06, "loss": 0.76, "step": 3349 }, { "epoch": 0.10267255118303298, "grad_norm": 0.949762443106377, "learning_rate": 9.86215936437762e-06, "loss": 0.5163, "step": 3350 }, { "epoch": 0.10270319970577418, "grad_norm": 2.2271368774351203, "learning_rate": 9.86204360547547e-06, "loss": 0.732, "step": 3351 }, { "epoch": 0.10273384822851539, "grad_norm": 1.8794123866454127, "learning_rate": 9.861927798666361e-06, "loss": 0.8053, "step": 3352 }, { "epoch": 0.1027644967512566, "grad_norm": 1.8605751804037651, "learning_rate": 9.861811943951432e-06, "loss": 0.7615, "step": 3353 }, { "epoch": 0.10279514527399779, "grad_norm": 2.040164550036215, "learning_rate": 9.861696041331828e-06, "loss": 0.7372, "step": 3354 }, { "epoch": 0.10282579379673899, "grad_norm": 2.5992955527778463, "learning_rate": 9.861580090808687e-06, "loss": 0.7815, "step": 3355 }, { "epoch": 0.1028564423194802, "grad_norm": 2.15843160965356, "learning_rate": 9.861464092383155e-06, "loss": 0.8411, "step": 3356 }, { "epoch": 0.1028870908422214, "grad_norm": 0.8965247873747452, "learning_rate": 9.86134804605637e-06, "loss": 0.5252, "step": 3357 }, { "epoch": 0.10291773936496261, "grad_norm": 2.127408927768419, "learning_rate": 9.861231951829484e-06, "loss": 0.8161, "step": 3358 }, { "epoch": 0.10294838788770382, "grad_norm": 1.7746405971719903, "learning_rate": 9.861115809703633e-06, "loss": 0.733, "step": 3359 }, { "epoch": 0.10297903641044502, "grad_norm": 1.8976954157705956, "learning_rate": 9.860999619679965e-06, "loss": 0.6941, "step": 3360 }, { "epoch": 0.10300968493318623, "grad_norm": 1.9372499014131155, "learning_rate": 9.860883381759622e-06, "loss": 0.8611, "step": 3361 }, { "epoch": 0.10304033345592742, "grad_norm": 1.9216925648227183, "learning_rate": 9.860767095943754e-06, "loss": 0.7107, "step": 3362 }, { "epoch": 0.10307098197866862, "grad_norm": 1.9247978866463764, "learning_rate": 9.860650762233504e-06, "loss": 0.7401, "step": 3363 }, { "epoch": 0.10310163050140983, "grad_norm": 2.0120639994926153, "learning_rate": 9.860534380630016e-06, "loss": 0.8481, "step": 3364 }, { "epoch": 0.10313227902415104, "grad_norm": 2.055111778332105, "learning_rate": 9.860417951134441e-06, "loss": 0.9191, "step": 3365 }, { "epoch": 0.10316292754689224, "grad_norm": 0.9661033746155321, "learning_rate": 9.860301473747923e-06, "loss": 0.4827, "step": 3366 }, { "epoch": 0.10319357606963345, "grad_norm": 1.9763417018184317, "learning_rate": 9.860184948471613e-06, "loss": 0.8029, "step": 3367 }, { "epoch": 0.10322422459237465, "grad_norm": 1.7606451711765014, "learning_rate": 9.860068375306655e-06, "loss": 0.6759, "step": 3368 }, { "epoch": 0.10325487311511586, "grad_norm": 1.8587996711665913, "learning_rate": 9.859951754254203e-06, "loss": 0.749, "step": 3369 }, { "epoch": 0.10328552163785705, "grad_norm": 1.8020954369199185, "learning_rate": 9.859835085315399e-06, "loss": 0.8178, "step": 3370 }, { "epoch": 0.10331617016059826, "grad_norm": 1.9291517126398061, "learning_rate": 9.859718368491398e-06, "loss": 0.8434, "step": 3371 }, { "epoch": 0.10334681868333946, "grad_norm": 1.794283709415034, "learning_rate": 9.85960160378335e-06, "loss": 0.7331, "step": 3372 }, { "epoch": 0.10337746720608067, "grad_norm": 1.976305253465015, "learning_rate": 9.859484791192402e-06, "loss": 0.8231, "step": 3373 }, { "epoch": 0.10340811572882187, "grad_norm": 1.9568111254858573, "learning_rate": 9.859367930719708e-06, "loss": 0.726, "step": 3374 }, { "epoch": 0.10343876425156308, "grad_norm": 1.9662485005680252, "learning_rate": 9.859251022366418e-06, "loss": 0.8185, "step": 3375 }, { "epoch": 0.10346941277430428, "grad_norm": 0.9669851871740194, "learning_rate": 9.859134066133685e-06, "loss": 0.5256, "step": 3376 }, { "epoch": 0.10350006129704548, "grad_norm": 1.967094325560716, "learning_rate": 9.85901706202266e-06, "loss": 0.7563, "step": 3377 }, { "epoch": 0.10353070981978668, "grad_norm": 1.951027562747449, "learning_rate": 9.858900010034498e-06, "loss": 0.7596, "step": 3378 }, { "epoch": 0.10356135834252789, "grad_norm": 1.97136216305659, "learning_rate": 9.858782910170348e-06, "loss": 0.7748, "step": 3379 }, { "epoch": 0.10359200686526909, "grad_norm": 1.8523293371693412, "learning_rate": 9.85866576243137e-06, "loss": 0.6748, "step": 3380 }, { "epoch": 0.1036226553880103, "grad_norm": 1.8899013632198438, "learning_rate": 9.858548566818712e-06, "loss": 0.7613, "step": 3381 }, { "epoch": 0.1036533039107515, "grad_norm": 1.6126234848173633, "learning_rate": 9.858431323333535e-06, "loss": 0.7234, "step": 3382 }, { "epoch": 0.10368395243349271, "grad_norm": 2.0943469341012597, "learning_rate": 9.85831403197699e-06, "loss": 0.7862, "step": 3383 }, { "epoch": 0.10371460095623392, "grad_norm": 1.9783226767083921, "learning_rate": 9.858196692750233e-06, "loss": 0.7186, "step": 3384 }, { "epoch": 0.10374524947897511, "grad_norm": 2.1863105356768027, "learning_rate": 9.858079305654421e-06, "loss": 0.8247, "step": 3385 }, { "epoch": 0.10377589800171631, "grad_norm": 1.5416822913850479, "learning_rate": 9.857961870690712e-06, "loss": 0.6566, "step": 3386 }, { "epoch": 0.10380654652445752, "grad_norm": 2.0246202023490603, "learning_rate": 9.85784438786026e-06, "loss": 0.8187, "step": 3387 }, { "epoch": 0.10383719504719872, "grad_norm": 1.7470817386904833, "learning_rate": 9.857726857164227e-06, "loss": 0.6718, "step": 3388 }, { "epoch": 0.10386784356993993, "grad_norm": 1.8915248731316423, "learning_rate": 9.857609278603766e-06, "loss": 0.7491, "step": 3389 }, { "epoch": 0.10389849209268114, "grad_norm": 1.8951994298049348, "learning_rate": 9.857491652180038e-06, "loss": 0.7428, "step": 3390 }, { "epoch": 0.10392914061542234, "grad_norm": 1.9174888612575596, "learning_rate": 9.857373977894202e-06, "loss": 0.7153, "step": 3391 }, { "epoch": 0.10395978913816355, "grad_norm": 1.9478080429944078, "learning_rate": 9.857256255747418e-06, "loss": 0.8533, "step": 3392 }, { "epoch": 0.10399043766090474, "grad_norm": 1.7054965872170793, "learning_rate": 9.857138485740845e-06, "loss": 0.7147, "step": 3393 }, { "epoch": 0.10402108618364594, "grad_norm": 1.951528642540221, "learning_rate": 9.857020667875645e-06, "loss": 0.7213, "step": 3394 }, { "epoch": 0.10405173470638715, "grad_norm": 1.7715960367938648, "learning_rate": 9.856902802152977e-06, "loss": 0.811, "step": 3395 }, { "epoch": 0.10408238322912836, "grad_norm": 1.7663989444942718, "learning_rate": 9.856784888574e-06, "loss": 0.7038, "step": 3396 }, { "epoch": 0.10411303175186956, "grad_norm": 1.7441800861234025, "learning_rate": 9.856666927139882e-06, "loss": 0.7783, "step": 3397 }, { "epoch": 0.10414368027461077, "grad_norm": 1.943100973957482, "learning_rate": 9.856548917851782e-06, "loss": 0.7042, "step": 3398 }, { "epoch": 0.10417432879735197, "grad_norm": 1.7806277838897804, "learning_rate": 9.856430860710862e-06, "loss": 0.7364, "step": 3399 }, { "epoch": 0.10420497732009318, "grad_norm": 1.8231775756091178, "learning_rate": 9.856312755718286e-06, "loss": 0.767, "step": 3400 }, { "epoch": 0.10423562584283437, "grad_norm": 1.845130596851287, "learning_rate": 9.85619460287522e-06, "loss": 0.7244, "step": 3401 }, { "epoch": 0.10426627436557558, "grad_norm": 1.8466730492514754, "learning_rate": 9.856076402182824e-06, "loss": 0.7215, "step": 3402 }, { "epoch": 0.10429692288831678, "grad_norm": 2.069180900969566, "learning_rate": 9.855958153642265e-06, "loss": 0.7938, "step": 3403 }, { "epoch": 0.10432757141105799, "grad_norm": 1.7616843571988896, "learning_rate": 9.855839857254709e-06, "loss": 0.6228, "step": 3404 }, { "epoch": 0.10435821993379919, "grad_norm": 1.817916295715751, "learning_rate": 9.855721513021319e-06, "loss": 0.7292, "step": 3405 }, { "epoch": 0.1043888684565404, "grad_norm": 2.149134016429619, "learning_rate": 9.855603120943263e-06, "loss": 0.7677, "step": 3406 }, { "epoch": 0.1044195169792816, "grad_norm": 1.7823200503006205, "learning_rate": 9.855484681021708e-06, "loss": 0.8291, "step": 3407 }, { "epoch": 0.1044501655020228, "grad_norm": 1.9915742772368141, "learning_rate": 9.855366193257818e-06, "loss": 0.8144, "step": 3408 }, { "epoch": 0.104480814024764, "grad_norm": 2.2686934413561906, "learning_rate": 9.855247657652764e-06, "loss": 0.8958, "step": 3409 }, { "epoch": 0.10451146254750521, "grad_norm": 1.9504772686285592, "learning_rate": 9.855129074207714e-06, "loss": 0.7124, "step": 3410 }, { "epoch": 0.10454211107024641, "grad_norm": 0.9195438563876296, "learning_rate": 9.855010442923832e-06, "loss": 0.5116, "step": 3411 }, { "epoch": 0.10457275959298762, "grad_norm": 2.1987103548423663, "learning_rate": 9.854891763802292e-06, "loss": 0.8782, "step": 3412 }, { "epoch": 0.10460340811572882, "grad_norm": 2.1032521390605323, "learning_rate": 9.85477303684426e-06, "loss": 0.6942, "step": 3413 }, { "epoch": 0.10463405663847003, "grad_norm": 0.8938263832214811, "learning_rate": 9.85465426205091e-06, "loss": 0.5199, "step": 3414 }, { "epoch": 0.10466470516121124, "grad_norm": 1.9284637768679715, "learning_rate": 9.854535439423404e-06, "loss": 0.852, "step": 3415 }, { "epoch": 0.10469535368395243, "grad_norm": 2.0394588249447896, "learning_rate": 9.854416568962924e-06, "loss": 0.7737, "step": 3416 }, { "epoch": 0.10472600220669363, "grad_norm": 0.9022644134442472, "learning_rate": 9.854297650670632e-06, "loss": 0.5242, "step": 3417 }, { "epoch": 0.10475665072943484, "grad_norm": 1.9727589950465614, "learning_rate": 9.854178684547704e-06, "loss": 0.7456, "step": 3418 }, { "epoch": 0.10478729925217604, "grad_norm": 1.7546696629227836, "learning_rate": 9.85405967059531e-06, "loss": 0.8299, "step": 3419 }, { "epoch": 0.10481794777491725, "grad_norm": 1.8388203292837142, "learning_rate": 9.853940608814628e-06, "loss": 0.8046, "step": 3420 }, { "epoch": 0.10484859629765846, "grad_norm": 1.906513364628526, "learning_rate": 9.853821499206824e-06, "loss": 0.7497, "step": 3421 }, { "epoch": 0.10487924482039966, "grad_norm": 1.9178955842834906, "learning_rate": 9.853702341773075e-06, "loss": 0.7971, "step": 3422 }, { "epoch": 0.10490989334314087, "grad_norm": 1.8995258725378348, "learning_rate": 9.853583136514557e-06, "loss": 0.7642, "step": 3423 }, { "epoch": 0.10494054186588206, "grad_norm": 1.9572938069104808, "learning_rate": 9.85346388343244e-06, "loss": 0.7246, "step": 3424 }, { "epoch": 0.10497119038862326, "grad_norm": 0.971491576045539, "learning_rate": 9.853344582527903e-06, "loss": 0.5077, "step": 3425 }, { "epoch": 0.10500183891136447, "grad_norm": 2.0400971962376024, "learning_rate": 9.85322523380212e-06, "loss": 0.8013, "step": 3426 }, { "epoch": 0.10503248743410568, "grad_norm": 1.7541364741140228, "learning_rate": 9.853105837256267e-06, "loss": 0.8695, "step": 3427 }, { "epoch": 0.10506313595684688, "grad_norm": 2.032836682235508, "learning_rate": 9.85298639289152e-06, "loss": 0.8271, "step": 3428 }, { "epoch": 0.10509378447958809, "grad_norm": 1.876576207374924, "learning_rate": 9.852866900709058e-06, "loss": 0.7188, "step": 3429 }, { "epoch": 0.10512443300232929, "grad_norm": 2.0707135866323516, "learning_rate": 9.852747360710055e-06, "loss": 0.7747, "step": 3430 }, { "epoch": 0.1051550815250705, "grad_norm": 1.7031550377170925, "learning_rate": 9.85262777289569e-06, "loss": 0.7583, "step": 3431 }, { "epoch": 0.10518573004781169, "grad_norm": 2.0478577704723535, "learning_rate": 9.852508137267143e-06, "loss": 0.7526, "step": 3432 }, { "epoch": 0.1052163785705529, "grad_norm": 1.850219576902033, "learning_rate": 9.852388453825592e-06, "loss": 0.7673, "step": 3433 }, { "epoch": 0.1052470270932941, "grad_norm": 1.8288105224958613, "learning_rate": 9.852268722572216e-06, "loss": 0.7235, "step": 3434 }, { "epoch": 0.10527767561603531, "grad_norm": 1.0943549381203692, "learning_rate": 9.852148943508195e-06, "loss": 0.5162, "step": 3435 }, { "epoch": 0.10530832413877651, "grad_norm": 2.1287883423194622, "learning_rate": 9.852029116634708e-06, "loss": 0.763, "step": 3436 }, { "epoch": 0.10533897266151772, "grad_norm": 2.084992860152075, "learning_rate": 9.851909241952938e-06, "loss": 0.7562, "step": 3437 }, { "epoch": 0.10536962118425892, "grad_norm": 0.8729156347121957, "learning_rate": 9.851789319464064e-06, "loss": 0.5213, "step": 3438 }, { "epoch": 0.10540026970700012, "grad_norm": 2.354787565214842, "learning_rate": 9.851669349169269e-06, "loss": 0.8182, "step": 3439 }, { "epoch": 0.10543091822974132, "grad_norm": 1.9733395546914139, "learning_rate": 9.851549331069734e-06, "loss": 0.803, "step": 3440 }, { "epoch": 0.10546156675248253, "grad_norm": 1.8952983693081684, "learning_rate": 9.85142926516664e-06, "loss": 0.7096, "step": 3441 }, { "epoch": 0.10549221527522373, "grad_norm": 0.9546695178247402, "learning_rate": 9.851309151461176e-06, "loss": 0.5215, "step": 3442 }, { "epoch": 0.10552286379796494, "grad_norm": 1.959058560639718, "learning_rate": 9.85118898995452e-06, "loss": 0.7837, "step": 3443 }, { "epoch": 0.10555351232070614, "grad_norm": 1.885958498865122, "learning_rate": 9.851068780647857e-06, "loss": 0.799, "step": 3444 }, { "epoch": 0.10558416084344735, "grad_norm": 1.7365840007929305, "learning_rate": 9.850948523542373e-06, "loss": 0.7031, "step": 3445 }, { "epoch": 0.10561480936618856, "grad_norm": 1.8840408590769417, "learning_rate": 9.850828218639252e-06, "loss": 0.6794, "step": 3446 }, { "epoch": 0.10564545788892975, "grad_norm": 0.9125811777371574, "learning_rate": 9.85070786593968e-06, "loss": 0.4937, "step": 3447 }, { "epoch": 0.10567610641167095, "grad_norm": 1.9156843811575894, "learning_rate": 9.850587465444841e-06, "loss": 0.7944, "step": 3448 }, { "epoch": 0.10570675493441216, "grad_norm": 1.9301497077104062, "learning_rate": 9.850467017155922e-06, "loss": 0.6034, "step": 3449 }, { "epoch": 0.10573740345715336, "grad_norm": 1.6813582253270296, "learning_rate": 9.850346521074112e-06, "loss": 0.69, "step": 3450 }, { "epoch": 0.10576805197989457, "grad_norm": 1.929648718523143, "learning_rate": 9.850225977200596e-06, "loss": 0.8481, "step": 3451 }, { "epoch": 0.10579870050263578, "grad_norm": 2.0335329910731885, "learning_rate": 9.850105385536564e-06, "loss": 0.7747, "step": 3452 }, { "epoch": 0.10582934902537698, "grad_norm": 2.150751013027864, "learning_rate": 9.849984746083202e-06, "loss": 0.7533, "step": 3453 }, { "epoch": 0.10585999754811819, "grad_norm": 2.0002935222484215, "learning_rate": 9.849864058841699e-06, "loss": 0.8071, "step": 3454 }, { "epoch": 0.10589064607085938, "grad_norm": 1.9807250174425348, "learning_rate": 9.849743323813243e-06, "loss": 0.7472, "step": 3455 }, { "epoch": 0.10592129459360058, "grad_norm": 1.8749731046675853, "learning_rate": 9.849622540999027e-06, "loss": 0.7543, "step": 3456 }, { "epoch": 0.10595194311634179, "grad_norm": 1.0215085226886347, "learning_rate": 9.849501710400238e-06, "loss": 0.511, "step": 3457 }, { "epoch": 0.105982591639083, "grad_norm": 2.3995366251130648, "learning_rate": 9.84938083201807e-06, "loss": 0.8441, "step": 3458 }, { "epoch": 0.1060132401618242, "grad_norm": 2.1547902635329055, "learning_rate": 9.84925990585371e-06, "loss": 0.8197, "step": 3459 }, { "epoch": 0.10604388868456541, "grad_norm": 2.276678789596579, "learning_rate": 9.849138931908352e-06, "loss": 0.7576, "step": 3460 }, { "epoch": 0.10607453720730661, "grad_norm": 1.8722300840836583, "learning_rate": 9.849017910183187e-06, "loss": 0.7652, "step": 3461 }, { "epoch": 0.10610518573004782, "grad_norm": 2.0026060604189375, "learning_rate": 9.848896840679408e-06, "loss": 0.6869, "step": 3462 }, { "epoch": 0.10613583425278901, "grad_norm": 1.9008937081813915, "learning_rate": 9.848775723398207e-06, "loss": 0.8644, "step": 3463 }, { "epoch": 0.10616648277553022, "grad_norm": 1.797626390991786, "learning_rate": 9.84865455834078e-06, "loss": 0.7392, "step": 3464 }, { "epoch": 0.10619713129827142, "grad_norm": 1.841066465713901, "learning_rate": 9.848533345508318e-06, "loss": 0.7445, "step": 3465 }, { "epoch": 0.10622777982101263, "grad_norm": 2.66661881858276, "learning_rate": 9.848412084902017e-06, "loss": 0.9385, "step": 3466 }, { "epoch": 0.10625842834375383, "grad_norm": 0.9443273191258454, "learning_rate": 9.848290776523071e-06, "loss": 0.5011, "step": 3467 }, { "epoch": 0.10628907686649504, "grad_norm": 1.7461835680378532, "learning_rate": 9.848169420372675e-06, "loss": 0.7667, "step": 3468 }, { "epoch": 0.10631972538923624, "grad_norm": 1.9726695664471732, "learning_rate": 9.848048016452025e-06, "loss": 0.7407, "step": 3469 }, { "epoch": 0.10635037391197744, "grad_norm": 1.6345912277439651, "learning_rate": 9.847926564762318e-06, "loss": 0.6027, "step": 3470 }, { "epoch": 0.10638102243471864, "grad_norm": 1.7589837545735427, "learning_rate": 9.84780506530475e-06, "loss": 0.6685, "step": 3471 }, { "epoch": 0.10641167095745985, "grad_norm": 2.1257637918142014, "learning_rate": 9.84768351808052e-06, "loss": 0.8253, "step": 3472 }, { "epoch": 0.10644231948020105, "grad_norm": 1.9678743812558626, "learning_rate": 9.847561923090823e-06, "loss": 0.8214, "step": 3473 }, { "epoch": 0.10647296800294226, "grad_norm": 2.3594635857606856, "learning_rate": 9.847440280336856e-06, "loss": 0.8665, "step": 3474 }, { "epoch": 0.10650361652568346, "grad_norm": 2.1116057099883707, "learning_rate": 9.847318589819821e-06, "loss": 0.7657, "step": 3475 }, { "epoch": 0.10653426504842467, "grad_norm": 1.9797082415985445, "learning_rate": 9.847196851540916e-06, "loss": 0.7443, "step": 3476 }, { "epoch": 0.10656491357116588, "grad_norm": 1.8322297876459641, "learning_rate": 9.84707506550134e-06, "loss": 0.7379, "step": 3477 }, { "epoch": 0.10659556209390707, "grad_norm": 2.0488398205846146, "learning_rate": 9.846953231702294e-06, "loss": 0.806, "step": 3478 }, { "epoch": 0.10662621061664827, "grad_norm": 1.763243889151968, "learning_rate": 9.846831350144977e-06, "loss": 0.808, "step": 3479 }, { "epoch": 0.10665685913938948, "grad_norm": 2.388555909219983, "learning_rate": 9.84670942083059e-06, "loss": 0.8321, "step": 3480 }, { "epoch": 0.10668750766213068, "grad_norm": 1.9358414851476886, "learning_rate": 9.846587443760337e-06, "loss": 0.8751, "step": 3481 }, { "epoch": 0.10671815618487189, "grad_norm": 2.0191139355321504, "learning_rate": 9.846465418935415e-06, "loss": 0.7018, "step": 3482 }, { "epoch": 0.1067488047076131, "grad_norm": 1.980583269869422, "learning_rate": 9.84634334635703e-06, "loss": 0.8109, "step": 3483 }, { "epoch": 0.1067794532303543, "grad_norm": 1.8995612674891607, "learning_rate": 9.846221226026386e-06, "loss": 0.8122, "step": 3484 }, { "epoch": 0.10681010175309551, "grad_norm": 1.886385085832981, "learning_rate": 9.846099057944683e-06, "loss": 0.7484, "step": 3485 }, { "epoch": 0.1068407502758367, "grad_norm": 2.001841442043668, "learning_rate": 9.845976842113125e-06, "loss": 0.7073, "step": 3486 }, { "epoch": 0.1068713987985779, "grad_norm": 1.9293388805333684, "learning_rate": 9.845854578532918e-06, "loss": 0.6934, "step": 3487 }, { "epoch": 0.10690204732131911, "grad_norm": 1.6475458449704203, "learning_rate": 9.845732267205266e-06, "loss": 0.7595, "step": 3488 }, { "epoch": 0.10693269584406032, "grad_norm": 1.9066042714976086, "learning_rate": 9.845609908131374e-06, "loss": 0.7447, "step": 3489 }, { "epoch": 0.10696334436680152, "grad_norm": 1.9136830580504547, "learning_rate": 9.845487501312449e-06, "loss": 0.7721, "step": 3490 }, { "epoch": 0.10699399288954273, "grad_norm": 2.1988839479312077, "learning_rate": 9.845365046749695e-06, "loss": 0.8332, "step": 3491 }, { "epoch": 0.10702464141228393, "grad_norm": 1.9719822506576345, "learning_rate": 9.84524254444432e-06, "loss": 0.7543, "step": 3492 }, { "epoch": 0.10705528993502514, "grad_norm": 2.0344324033137373, "learning_rate": 9.845119994397529e-06, "loss": 0.829, "step": 3493 }, { "epoch": 0.10708593845776633, "grad_norm": 2.013386073183352, "learning_rate": 9.844997396610535e-06, "loss": 0.8434, "step": 3494 }, { "epoch": 0.10711658698050754, "grad_norm": 2.047869469281476, "learning_rate": 9.844874751084536e-06, "loss": 0.6862, "step": 3495 }, { "epoch": 0.10714723550324874, "grad_norm": 1.953750013724405, "learning_rate": 9.84475205782075e-06, "loss": 0.789, "step": 3496 }, { "epoch": 0.10717788402598995, "grad_norm": 1.9300999332824784, "learning_rate": 9.844629316820382e-06, "loss": 0.8264, "step": 3497 }, { "epoch": 0.10720853254873115, "grad_norm": 1.974098450021844, "learning_rate": 9.844506528084643e-06, "loss": 0.8953, "step": 3498 }, { "epoch": 0.10723918107147236, "grad_norm": 2.0091371675421823, "learning_rate": 9.84438369161474e-06, "loss": 0.7056, "step": 3499 }, { "epoch": 0.10726982959421356, "grad_norm": 2.0652921848904064, "learning_rate": 9.844260807411886e-06, "loss": 0.7919, "step": 3500 }, { "epoch": 0.10730047811695476, "grad_norm": 1.9216555309559173, "learning_rate": 9.844137875477288e-06, "loss": 0.7884, "step": 3501 }, { "epoch": 0.10733112663969596, "grad_norm": 2.175468917599524, "learning_rate": 9.844014895812163e-06, "loss": 0.7308, "step": 3502 }, { "epoch": 0.10736177516243717, "grad_norm": 2.088778088265246, "learning_rate": 9.843891868417718e-06, "loss": 0.7243, "step": 3503 }, { "epoch": 0.10739242368517837, "grad_norm": 1.9315879930098343, "learning_rate": 9.84376879329517e-06, "loss": 0.7134, "step": 3504 }, { "epoch": 0.10742307220791958, "grad_norm": 1.0674948016365813, "learning_rate": 9.843645670445726e-06, "loss": 0.5261, "step": 3505 }, { "epoch": 0.10745372073066078, "grad_norm": 1.8391531971537969, "learning_rate": 9.843522499870602e-06, "loss": 0.8351, "step": 3506 }, { "epoch": 0.10748436925340199, "grad_norm": 2.11411081761559, "learning_rate": 9.843399281571013e-06, "loss": 0.7853, "step": 3507 }, { "epoch": 0.1075150177761432, "grad_norm": 1.8527900978436702, "learning_rate": 9.843276015548171e-06, "loss": 0.6815, "step": 3508 }, { "epoch": 0.10754566629888439, "grad_norm": 2.239179012032572, "learning_rate": 9.843152701803292e-06, "loss": 0.7019, "step": 3509 }, { "epoch": 0.1075763148216256, "grad_norm": 2.2361545822909927, "learning_rate": 9.843029340337589e-06, "loss": 0.7014, "step": 3510 }, { "epoch": 0.1076069633443668, "grad_norm": 1.8951584146623983, "learning_rate": 9.84290593115228e-06, "loss": 0.7908, "step": 3511 }, { "epoch": 0.107637611867108, "grad_norm": 1.7693167504323868, "learning_rate": 9.842782474248578e-06, "loss": 0.7408, "step": 3512 }, { "epoch": 0.10766826038984921, "grad_norm": 1.9605083111782962, "learning_rate": 9.842658969627702e-06, "loss": 0.7175, "step": 3513 }, { "epoch": 0.10769890891259042, "grad_norm": 2.1968622843200882, "learning_rate": 9.842535417290868e-06, "loss": 0.7745, "step": 3514 }, { "epoch": 0.10772955743533162, "grad_norm": 1.9014558769711003, "learning_rate": 9.842411817239293e-06, "loss": 0.743, "step": 3515 }, { "epoch": 0.10776020595807283, "grad_norm": 2.1888007014999284, "learning_rate": 9.842288169474197e-06, "loss": 0.8442, "step": 3516 }, { "epoch": 0.10779085448081402, "grad_norm": 2.026768862605879, "learning_rate": 9.842164473996797e-06, "loss": 0.8479, "step": 3517 }, { "epoch": 0.10782150300355522, "grad_norm": 2.194102337599011, "learning_rate": 9.842040730808308e-06, "loss": 0.7499, "step": 3518 }, { "epoch": 0.10785215152629643, "grad_norm": 2.068504703906561, "learning_rate": 9.841916939909956e-06, "loss": 0.7179, "step": 3519 }, { "epoch": 0.10788280004903764, "grad_norm": 1.784889937002916, "learning_rate": 9.841793101302957e-06, "loss": 0.7649, "step": 3520 }, { "epoch": 0.10791344857177884, "grad_norm": 1.794775598301373, "learning_rate": 9.841669214988532e-06, "loss": 0.7224, "step": 3521 }, { "epoch": 0.10794409709452005, "grad_norm": 1.7392090621413825, "learning_rate": 9.8415452809679e-06, "loss": 0.7873, "step": 3522 }, { "epoch": 0.10797474561726125, "grad_norm": 1.9352945299673272, "learning_rate": 9.841421299242284e-06, "loss": 0.7174, "step": 3523 }, { "epoch": 0.10800539414000246, "grad_norm": 1.9079236516411968, "learning_rate": 9.841297269812906e-06, "loss": 0.8018, "step": 3524 }, { "epoch": 0.10803604266274365, "grad_norm": 1.9700777994072227, "learning_rate": 9.841173192680987e-06, "loss": 0.7189, "step": 3525 }, { "epoch": 0.10806669118548486, "grad_norm": 1.7601099254223052, "learning_rate": 9.84104906784775e-06, "loss": 0.7208, "step": 3526 }, { "epoch": 0.10809733970822606, "grad_norm": 1.861012884857957, "learning_rate": 9.840924895314418e-06, "loss": 0.7582, "step": 3527 }, { "epoch": 0.10812798823096727, "grad_norm": 2.065824192278561, "learning_rate": 9.840800675082214e-06, "loss": 0.9042, "step": 3528 }, { "epoch": 0.10815863675370847, "grad_norm": 1.8164141764516684, "learning_rate": 9.840676407152363e-06, "loss": 0.7434, "step": 3529 }, { "epoch": 0.10818928527644968, "grad_norm": 2.1958780352975986, "learning_rate": 9.840552091526088e-06, "loss": 0.7683, "step": 3530 }, { "epoch": 0.10821993379919088, "grad_norm": 1.9752733091212074, "learning_rate": 9.840427728204615e-06, "loss": 0.8445, "step": 3531 }, { "epoch": 0.10825058232193208, "grad_norm": 1.6263225432292943, "learning_rate": 9.84030331718917e-06, "loss": 0.6487, "step": 3532 }, { "epoch": 0.10828123084467328, "grad_norm": 2.129336850980548, "learning_rate": 9.840178858480976e-06, "loss": 0.7367, "step": 3533 }, { "epoch": 0.10831187936741449, "grad_norm": 2.271575048600106, "learning_rate": 9.840054352081262e-06, "loss": 0.8852, "step": 3534 }, { "epoch": 0.1083425278901557, "grad_norm": 1.7627417472039368, "learning_rate": 9.839929797991256e-06, "loss": 0.764, "step": 3535 }, { "epoch": 0.1083731764128969, "grad_norm": 1.9930803522538303, "learning_rate": 9.839805196212183e-06, "loss": 0.7654, "step": 3536 }, { "epoch": 0.1084038249356381, "grad_norm": 1.9184579045792873, "learning_rate": 9.839680546745268e-06, "loss": 0.875, "step": 3537 }, { "epoch": 0.10843447345837931, "grad_norm": 1.6986116025330609, "learning_rate": 9.839555849591744e-06, "loss": 0.716, "step": 3538 }, { "epoch": 0.10846512198112052, "grad_norm": 1.9222198143960825, "learning_rate": 9.83943110475284e-06, "loss": 0.7765, "step": 3539 }, { "epoch": 0.10849577050386171, "grad_norm": 1.859070585864703, "learning_rate": 9.839306312229779e-06, "loss": 0.7878, "step": 3540 }, { "epoch": 0.10852641902660291, "grad_norm": 2.034075588767715, "learning_rate": 9.839181472023798e-06, "loss": 0.6829, "step": 3541 }, { "epoch": 0.10855706754934412, "grad_norm": 2.1172625069942126, "learning_rate": 9.839056584136123e-06, "loss": 0.6978, "step": 3542 }, { "epoch": 0.10858771607208532, "grad_norm": 1.961777187445112, "learning_rate": 9.838931648567986e-06, "loss": 0.7924, "step": 3543 }, { "epoch": 0.10861836459482653, "grad_norm": 2.0296717340651447, "learning_rate": 9.838806665320615e-06, "loss": 0.8429, "step": 3544 }, { "epoch": 0.10864901311756774, "grad_norm": 1.9105599980588657, "learning_rate": 9.838681634395245e-06, "loss": 0.6868, "step": 3545 }, { "epoch": 0.10867966164030894, "grad_norm": 2.1347550051551654, "learning_rate": 9.838556555793108e-06, "loss": 0.6395, "step": 3546 }, { "epoch": 0.10871031016305015, "grad_norm": 2.114683295881984, "learning_rate": 9.838431429515434e-06, "loss": 0.7225, "step": 3547 }, { "epoch": 0.10874095868579134, "grad_norm": 2.1556271468136288, "learning_rate": 9.838306255563459e-06, "loss": 0.8295, "step": 3548 }, { "epoch": 0.10877160720853254, "grad_norm": 1.9890654576111741, "learning_rate": 9.838181033938413e-06, "loss": 0.7116, "step": 3549 }, { "epoch": 0.10880225573127375, "grad_norm": 2.1773562158045863, "learning_rate": 9.838055764641533e-06, "loss": 0.7734, "step": 3550 }, { "epoch": 0.10883290425401496, "grad_norm": 1.3204364829832096, "learning_rate": 9.83793044767405e-06, "loss": 0.5196, "step": 3551 }, { "epoch": 0.10886355277675616, "grad_norm": 1.8663152830774417, "learning_rate": 9.837805083037199e-06, "loss": 0.7823, "step": 3552 }, { "epoch": 0.10889420129949737, "grad_norm": 1.8842343130410473, "learning_rate": 9.83767967073222e-06, "loss": 0.7647, "step": 3553 }, { "epoch": 0.10892484982223857, "grad_norm": 0.8648107193723986, "learning_rate": 9.837554210760344e-06, "loss": 0.527, "step": 3554 }, { "epoch": 0.10895549834497978, "grad_norm": 1.8985725432129588, "learning_rate": 9.837428703122807e-06, "loss": 0.7539, "step": 3555 }, { "epoch": 0.10898614686772097, "grad_norm": 1.8896555373878599, "learning_rate": 9.837303147820849e-06, "loss": 0.7489, "step": 3556 }, { "epoch": 0.10901679539046218, "grad_norm": 2.000106399513636, "learning_rate": 9.837177544855705e-06, "loss": 0.7004, "step": 3557 }, { "epoch": 0.10904744391320338, "grad_norm": 1.8969074048333752, "learning_rate": 9.837051894228614e-06, "loss": 0.8274, "step": 3558 }, { "epoch": 0.10907809243594459, "grad_norm": 1.2611669667442054, "learning_rate": 9.836926195940811e-06, "loss": 0.5408, "step": 3559 }, { "epoch": 0.1091087409586858, "grad_norm": 1.8230227610670315, "learning_rate": 9.836800449993538e-06, "loss": 0.746, "step": 3560 }, { "epoch": 0.109139389481427, "grad_norm": 2.147881085583073, "learning_rate": 9.836674656388032e-06, "loss": 0.7875, "step": 3561 }, { "epoch": 0.1091700380041682, "grad_norm": 0.9329479770983513, "learning_rate": 9.836548815125536e-06, "loss": 0.5365, "step": 3562 }, { "epoch": 0.1092006865269094, "grad_norm": 1.846265633877356, "learning_rate": 9.836422926207283e-06, "loss": 0.5972, "step": 3563 }, { "epoch": 0.1092313350496506, "grad_norm": 2.0880970813206026, "learning_rate": 9.83629698963452e-06, "loss": 0.7246, "step": 3564 }, { "epoch": 0.10926198357239181, "grad_norm": 1.9755429748478295, "learning_rate": 9.836171005408483e-06, "loss": 0.6478, "step": 3565 }, { "epoch": 0.10929263209513301, "grad_norm": 1.9774078872687606, "learning_rate": 9.836044973530417e-06, "loss": 0.731, "step": 3566 }, { "epoch": 0.10932328061787422, "grad_norm": 2.031323949532182, "learning_rate": 9.835918894001564e-06, "loss": 0.7863, "step": 3567 }, { "epoch": 0.10935392914061542, "grad_norm": 1.1720043004897012, "learning_rate": 9.835792766823162e-06, "loss": 0.53, "step": 3568 }, { "epoch": 0.10938457766335663, "grad_norm": 2.1386905860627645, "learning_rate": 9.835666591996458e-06, "loss": 0.7697, "step": 3569 }, { "epoch": 0.10941522618609784, "grad_norm": 1.6921085764265809, "learning_rate": 9.835540369522694e-06, "loss": 0.7507, "step": 3570 }, { "epoch": 0.10944587470883903, "grad_norm": 1.8166177206669303, "learning_rate": 9.835414099403113e-06, "loss": 0.7452, "step": 3571 }, { "epoch": 0.10947652323158023, "grad_norm": 1.778948183717474, "learning_rate": 9.83528778163896e-06, "loss": 0.757, "step": 3572 }, { "epoch": 0.10950717175432144, "grad_norm": 2.0860666760823166, "learning_rate": 9.83516141623148e-06, "loss": 0.7755, "step": 3573 }, { "epoch": 0.10953782027706264, "grad_norm": 0.9372621007896001, "learning_rate": 9.835035003181917e-06, "loss": 0.538, "step": 3574 }, { "epoch": 0.10956846879980385, "grad_norm": 1.979362195395861, "learning_rate": 9.834908542491517e-06, "loss": 0.7155, "step": 3575 }, { "epoch": 0.10959911732254506, "grad_norm": 2.043661000463172, "learning_rate": 9.834782034161525e-06, "loss": 0.7978, "step": 3576 }, { "epoch": 0.10962976584528626, "grad_norm": 1.9859083654004157, "learning_rate": 9.834655478193188e-06, "loss": 0.7361, "step": 3577 }, { "epoch": 0.10966041436802747, "grad_norm": 1.8207033619109483, "learning_rate": 9.834528874587756e-06, "loss": 0.7149, "step": 3578 }, { "epoch": 0.10969106289076866, "grad_norm": 2.011749385708135, "learning_rate": 9.834402223346475e-06, "loss": 0.7601, "step": 3579 }, { "epoch": 0.10972171141350986, "grad_norm": 2.4785689218375055, "learning_rate": 9.834275524470588e-06, "loss": 0.9429, "step": 3580 }, { "epoch": 0.10975235993625107, "grad_norm": 0.9360366272553071, "learning_rate": 9.83414877796135e-06, "loss": 0.5183, "step": 3581 }, { "epoch": 0.10978300845899228, "grad_norm": 1.0028370780670568, "learning_rate": 9.834021983820007e-06, "loss": 0.5411, "step": 3582 }, { "epoch": 0.10981365698173348, "grad_norm": 1.977717476252743, "learning_rate": 9.833895142047809e-06, "loss": 0.7611, "step": 3583 }, { "epoch": 0.10984430550447469, "grad_norm": 2.087292681066615, "learning_rate": 9.833768252646003e-06, "loss": 0.8325, "step": 3584 }, { "epoch": 0.1098749540272159, "grad_norm": 2.1542026030124095, "learning_rate": 9.833641315615844e-06, "loss": 0.7683, "step": 3585 }, { "epoch": 0.1099056025499571, "grad_norm": 1.8942899496213799, "learning_rate": 9.83351433095858e-06, "loss": 0.7394, "step": 3586 }, { "epoch": 0.10993625107269829, "grad_norm": 1.832175375363711, "learning_rate": 9.833387298675461e-06, "loss": 0.709, "step": 3587 }, { "epoch": 0.1099668995954395, "grad_norm": 1.9427710674242282, "learning_rate": 9.833260218767741e-06, "loss": 0.7149, "step": 3588 }, { "epoch": 0.1099975481181807, "grad_norm": 2.1449715103081743, "learning_rate": 9.833133091236673e-06, "loss": 0.7549, "step": 3589 }, { "epoch": 0.11002819664092191, "grad_norm": 1.8332162224060884, "learning_rate": 9.833005916083506e-06, "loss": 0.7438, "step": 3590 }, { "epoch": 0.11005884516366311, "grad_norm": 2.038573089713471, "learning_rate": 9.832878693309495e-06, "loss": 0.7948, "step": 3591 }, { "epoch": 0.11008949368640432, "grad_norm": 1.9159744459901185, "learning_rate": 9.832751422915896e-06, "loss": 0.7178, "step": 3592 }, { "epoch": 0.11012014220914552, "grad_norm": 2.54432596721499, "learning_rate": 9.83262410490396e-06, "loss": 0.8398, "step": 3593 }, { "epoch": 0.11015079073188672, "grad_norm": 1.869249037104161, "learning_rate": 9.832496739274942e-06, "loss": 0.6916, "step": 3594 }, { "epoch": 0.11018143925462792, "grad_norm": 1.8930749539278358, "learning_rate": 9.832369326030096e-06, "loss": 0.7048, "step": 3595 }, { "epoch": 0.11021208777736913, "grad_norm": 1.7708018490227393, "learning_rate": 9.83224186517068e-06, "loss": 0.7014, "step": 3596 }, { "epoch": 0.11024273630011033, "grad_norm": 2.0367353377156006, "learning_rate": 9.832114356697948e-06, "loss": 0.6915, "step": 3597 }, { "epoch": 0.11027338482285154, "grad_norm": 1.483313490890426, "learning_rate": 9.831986800613157e-06, "loss": 0.5334, "step": 3598 }, { "epoch": 0.11030403334559274, "grad_norm": 1.8954996133778115, "learning_rate": 9.831859196917563e-06, "loss": 0.7596, "step": 3599 }, { "epoch": 0.11033468186833395, "grad_norm": 1.8775097338604614, "learning_rate": 9.831731545612423e-06, "loss": 0.6842, "step": 3600 }, { "epoch": 0.11036533039107516, "grad_norm": 1.9169192086922093, "learning_rate": 9.831603846698998e-06, "loss": 0.8186, "step": 3601 }, { "epoch": 0.11039597891381635, "grad_norm": 1.9055971017110995, "learning_rate": 9.831476100178543e-06, "loss": 0.7578, "step": 3602 }, { "epoch": 0.11042662743655755, "grad_norm": 2.0982789415909306, "learning_rate": 9.83134830605232e-06, "loss": 0.7568, "step": 3603 }, { "epoch": 0.11045727595929876, "grad_norm": 2.014038356047331, "learning_rate": 9.831220464321584e-06, "loss": 0.7176, "step": 3604 }, { "epoch": 0.11048792448203996, "grad_norm": 1.6464943747702472, "learning_rate": 9.831092574987596e-06, "loss": 0.6801, "step": 3605 }, { "epoch": 0.11051857300478117, "grad_norm": 2.0349364968965244, "learning_rate": 9.830964638051618e-06, "loss": 0.7256, "step": 3606 }, { "epoch": 0.11054922152752238, "grad_norm": 1.760575420791142, "learning_rate": 9.830836653514909e-06, "loss": 0.7097, "step": 3607 }, { "epoch": 0.11057987005026358, "grad_norm": 1.7560422938284508, "learning_rate": 9.830708621378731e-06, "loss": 0.7549, "step": 3608 }, { "epoch": 0.11061051857300479, "grad_norm": 1.7721609518512156, "learning_rate": 9.830580541644343e-06, "loss": 0.8169, "step": 3609 }, { "epoch": 0.11064116709574598, "grad_norm": 1.3103012017125695, "learning_rate": 9.830452414313012e-06, "loss": 0.5525, "step": 3610 }, { "epoch": 0.11067181561848718, "grad_norm": 1.8742880282124994, "learning_rate": 9.830324239385996e-06, "loss": 0.7716, "step": 3611 }, { "epoch": 0.11070246414122839, "grad_norm": 1.8898968059670085, "learning_rate": 9.830196016864558e-06, "loss": 0.8571, "step": 3612 }, { "epoch": 0.1107331126639696, "grad_norm": 2.1064977655612416, "learning_rate": 9.830067746749964e-06, "loss": 0.7956, "step": 3613 }, { "epoch": 0.1107637611867108, "grad_norm": 2.079058110412958, "learning_rate": 9.829939429043478e-06, "loss": 0.7799, "step": 3614 }, { "epoch": 0.11079440970945201, "grad_norm": 1.8933878608008137, "learning_rate": 9.82981106374636e-06, "loss": 0.7337, "step": 3615 }, { "epoch": 0.11082505823219321, "grad_norm": 2.0624392245880907, "learning_rate": 9.82968265085988e-06, "loss": 0.7847, "step": 3616 }, { "epoch": 0.11085570675493442, "grad_norm": 2.136167101671275, "learning_rate": 9.8295541903853e-06, "loss": 0.7771, "step": 3617 }, { "epoch": 0.11088635527767561, "grad_norm": 2.0007030795818674, "learning_rate": 9.829425682323889e-06, "loss": 0.7958, "step": 3618 }, { "epoch": 0.11091700380041682, "grad_norm": 1.709018707980955, "learning_rate": 9.829297126676909e-06, "loss": 0.7007, "step": 3619 }, { "epoch": 0.11094765232315802, "grad_norm": 1.8331451832983106, "learning_rate": 9.82916852344563e-06, "loss": 0.694, "step": 3620 }, { "epoch": 0.11097830084589923, "grad_norm": 1.8691667599276773, "learning_rate": 9.829039872631317e-06, "loss": 0.7626, "step": 3621 }, { "epoch": 0.11100894936864043, "grad_norm": 0.9778227084024581, "learning_rate": 9.828911174235238e-06, "loss": 0.5135, "step": 3622 }, { "epoch": 0.11103959789138164, "grad_norm": 1.7597029644994544, "learning_rate": 9.828782428258663e-06, "loss": 0.8027, "step": 3623 }, { "epoch": 0.11107024641412284, "grad_norm": 2.154708343517572, "learning_rate": 9.828653634702858e-06, "loss": 0.7772, "step": 3624 }, { "epoch": 0.11110089493686404, "grad_norm": 1.8667860623610841, "learning_rate": 9.828524793569095e-06, "loss": 0.7609, "step": 3625 }, { "epoch": 0.11113154345960524, "grad_norm": 1.960683828713328, "learning_rate": 9.82839590485864e-06, "loss": 0.7218, "step": 3626 }, { "epoch": 0.11116219198234645, "grad_norm": 2.0079107964434626, "learning_rate": 9.828266968572765e-06, "loss": 0.7938, "step": 3627 }, { "epoch": 0.11119284050508765, "grad_norm": 2.1750011600095083, "learning_rate": 9.828137984712741e-06, "loss": 0.7307, "step": 3628 }, { "epoch": 0.11122348902782886, "grad_norm": 1.8835895304504142, "learning_rate": 9.828008953279839e-06, "loss": 0.6675, "step": 3629 }, { "epoch": 0.11125413755057006, "grad_norm": 1.8626846228534426, "learning_rate": 9.827879874275328e-06, "loss": 0.8185, "step": 3630 }, { "epoch": 0.11128478607331127, "grad_norm": 2.052893091289707, "learning_rate": 9.827750747700481e-06, "loss": 0.7966, "step": 3631 }, { "epoch": 0.11131543459605248, "grad_norm": 0.9724001401700975, "learning_rate": 9.827621573556573e-06, "loss": 0.5564, "step": 3632 }, { "epoch": 0.11134608311879367, "grad_norm": 1.9551084207794032, "learning_rate": 9.827492351844872e-06, "loss": 0.7961, "step": 3633 }, { "epoch": 0.11137673164153487, "grad_norm": 0.8795016544577685, "learning_rate": 9.827363082566655e-06, "loss": 0.5119, "step": 3634 }, { "epoch": 0.11140738016427608, "grad_norm": 1.6217675129355866, "learning_rate": 9.827233765723193e-06, "loss": 0.7281, "step": 3635 }, { "epoch": 0.11143802868701728, "grad_norm": 1.915160346885111, "learning_rate": 9.827104401315764e-06, "loss": 0.7206, "step": 3636 }, { "epoch": 0.11146867720975849, "grad_norm": 2.044590300673938, "learning_rate": 9.82697498934564e-06, "loss": 0.7061, "step": 3637 }, { "epoch": 0.1114993257324997, "grad_norm": 1.816608680404952, "learning_rate": 9.826845529814093e-06, "loss": 0.7641, "step": 3638 }, { "epoch": 0.1115299742552409, "grad_norm": 2.1140613021610513, "learning_rate": 9.826716022722405e-06, "loss": 0.7351, "step": 3639 }, { "epoch": 0.11156062277798211, "grad_norm": 2.0312117560019285, "learning_rate": 9.826586468071848e-06, "loss": 0.8043, "step": 3640 }, { "epoch": 0.1115912713007233, "grad_norm": 1.7284006332310986, "learning_rate": 9.826456865863699e-06, "loss": 0.8198, "step": 3641 }, { "epoch": 0.1116219198234645, "grad_norm": 1.8960056377382297, "learning_rate": 9.826327216099237e-06, "loss": 0.7043, "step": 3642 }, { "epoch": 0.11165256834620571, "grad_norm": 2.2154404272948605, "learning_rate": 9.826197518779738e-06, "loss": 0.9002, "step": 3643 }, { "epoch": 0.11168321686894692, "grad_norm": 2.2565413968162624, "learning_rate": 9.826067773906479e-06, "loss": 0.8629, "step": 3644 }, { "epoch": 0.11171386539168812, "grad_norm": 1.9586233432635087, "learning_rate": 9.82593798148074e-06, "loss": 0.7695, "step": 3645 }, { "epoch": 0.11174451391442933, "grad_norm": 1.8516132437702248, "learning_rate": 9.825808141503798e-06, "loss": 0.7544, "step": 3646 }, { "epoch": 0.11177516243717053, "grad_norm": 2.1112032511971357, "learning_rate": 9.825678253976935e-06, "loss": 0.7482, "step": 3647 }, { "epoch": 0.11180581095991174, "grad_norm": 1.8011206600516239, "learning_rate": 9.82554831890143e-06, "loss": 0.7202, "step": 3648 }, { "epoch": 0.11183645948265293, "grad_norm": 1.9344237641934832, "learning_rate": 9.825418336278563e-06, "loss": 0.7048, "step": 3649 }, { "epoch": 0.11186710800539414, "grad_norm": 1.911194214022531, "learning_rate": 9.825288306109612e-06, "loss": 0.7204, "step": 3650 }, { "epoch": 0.11189775652813534, "grad_norm": 2.1641381566688738, "learning_rate": 9.825158228395863e-06, "loss": 0.8468, "step": 3651 }, { "epoch": 0.11192840505087655, "grad_norm": 2.1545149024733345, "learning_rate": 9.825028103138596e-06, "loss": 0.8022, "step": 3652 }, { "epoch": 0.11195905357361775, "grad_norm": 2.1590752564990043, "learning_rate": 9.82489793033909e-06, "loss": 0.7205, "step": 3653 }, { "epoch": 0.11198970209635896, "grad_norm": 2.112471377796669, "learning_rate": 9.824767709998632e-06, "loss": 0.654, "step": 3654 }, { "epoch": 0.11202035061910016, "grad_norm": 1.9815354861618384, "learning_rate": 9.824637442118503e-06, "loss": 0.751, "step": 3655 }, { "epoch": 0.11205099914184136, "grad_norm": 1.8870941582473197, "learning_rate": 9.824507126699986e-06, "loss": 0.6825, "step": 3656 }, { "epoch": 0.11208164766458256, "grad_norm": 1.9613357570402972, "learning_rate": 9.824376763744367e-06, "loss": 0.7459, "step": 3657 }, { "epoch": 0.11211229618732377, "grad_norm": 2.1199409105638205, "learning_rate": 9.824246353252928e-06, "loss": 0.7806, "step": 3658 }, { "epoch": 0.11214294471006497, "grad_norm": 2.051246385117295, "learning_rate": 9.824115895226956e-06, "loss": 0.717, "step": 3659 }, { "epoch": 0.11217359323280618, "grad_norm": 1.3414303423663991, "learning_rate": 9.823985389667736e-06, "loss": 0.5321, "step": 3660 }, { "epoch": 0.11220424175554738, "grad_norm": 2.192841684402074, "learning_rate": 9.823854836576554e-06, "loss": 0.7457, "step": 3661 }, { "epoch": 0.11223489027828859, "grad_norm": 1.8276190481338674, "learning_rate": 9.823724235954696e-06, "loss": 0.7996, "step": 3662 }, { "epoch": 0.1122655388010298, "grad_norm": 1.8278847365462105, "learning_rate": 9.823593587803448e-06, "loss": 0.7896, "step": 3663 }, { "epoch": 0.11229618732377099, "grad_norm": 1.9041858129438338, "learning_rate": 9.823462892124098e-06, "loss": 0.8189, "step": 3664 }, { "epoch": 0.1123268358465122, "grad_norm": 1.8139264946369593, "learning_rate": 9.823332148917937e-06, "loss": 0.7533, "step": 3665 }, { "epoch": 0.1123574843692534, "grad_norm": 1.7974066669788964, "learning_rate": 9.823201358186248e-06, "loss": 0.731, "step": 3666 }, { "epoch": 0.1123881328919946, "grad_norm": 1.815726735353267, "learning_rate": 9.823070519930321e-06, "loss": 0.7874, "step": 3667 }, { "epoch": 0.11241878141473581, "grad_norm": 1.869688384544121, "learning_rate": 9.822939634151447e-06, "loss": 0.7733, "step": 3668 }, { "epoch": 0.11244942993747702, "grad_norm": 2.1554937976461512, "learning_rate": 9.822808700850914e-06, "loss": 0.711, "step": 3669 }, { "epoch": 0.11248007846021822, "grad_norm": 1.9903980003114987, "learning_rate": 9.822677720030015e-06, "loss": 0.6575, "step": 3670 }, { "epoch": 0.11251072698295943, "grad_norm": 1.4092377661790751, "learning_rate": 9.822546691690038e-06, "loss": 0.5153, "step": 3671 }, { "epoch": 0.11254137550570062, "grad_norm": 1.719259732230371, "learning_rate": 9.822415615832272e-06, "loss": 0.709, "step": 3672 }, { "epoch": 0.11257202402844182, "grad_norm": 2.480804606357399, "learning_rate": 9.822284492458014e-06, "loss": 0.7786, "step": 3673 }, { "epoch": 0.11260267255118303, "grad_norm": 1.8280733816862578, "learning_rate": 9.822153321568552e-06, "loss": 0.7661, "step": 3674 }, { "epoch": 0.11263332107392424, "grad_norm": 2.0843992877176687, "learning_rate": 9.822022103165178e-06, "loss": 0.8043, "step": 3675 }, { "epoch": 0.11266396959666544, "grad_norm": 1.984177848762459, "learning_rate": 9.821890837249189e-06, "loss": 0.7992, "step": 3676 }, { "epoch": 0.11269461811940665, "grad_norm": 1.742127522115925, "learning_rate": 9.821759523821874e-06, "loss": 0.7141, "step": 3677 }, { "epoch": 0.11272526664214785, "grad_norm": 1.8173532054359318, "learning_rate": 9.821628162884532e-06, "loss": 0.6949, "step": 3678 }, { "epoch": 0.11275591516488906, "grad_norm": 1.9600088784485488, "learning_rate": 9.82149675443845e-06, "loss": 0.7929, "step": 3679 }, { "epoch": 0.11278656368763025, "grad_norm": 1.0886634599276426, "learning_rate": 9.821365298484929e-06, "loss": 0.5142, "step": 3680 }, { "epoch": 0.11281721221037146, "grad_norm": 1.7591578757623698, "learning_rate": 9.82123379502526e-06, "loss": 0.7572, "step": 3681 }, { "epoch": 0.11284786073311266, "grad_norm": 1.9666148295853225, "learning_rate": 9.821102244060743e-06, "loss": 0.7821, "step": 3682 }, { "epoch": 0.11287850925585387, "grad_norm": 1.7951463351374148, "learning_rate": 9.82097064559267e-06, "loss": 0.6832, "step": 3683 }, { "epoch": 0.11290915777859507, "grad_norm": 1.8205767227500531, "learning_rate": 9.820838999622341e-06, "loss": 0.7392, "step": 3684 }, { "epoch": 0.11293980630133628, "grad_norm": 2.114192087728003, "learning_rate": 9.820707306151055e-06, "loss": 0.8331, "step": 3685 }, { "epoch": 0.11297045482407748, "grad_norm": 1.9248573833925755, "learning_rate": 9.820575565180102e-06, "loss": 0.7867, "step": 3686 }, { "epoch": 0.11300110334681868, "grad_norm": 2.1583512611192397, "learning_rate": 9.820443776710786e-06, "loss": 0.8944, "step": 3687 }, { "epoch": 0.11303175186955988, "grad_norm": 1.9174928736695869, "learning_rate": 9.820311940744405e-06, "loss": 0.7734, "step": 3688 }, { "epoch": 0.11306240039230109, "grad_norm": 1.817762145989838, "learning_rate": 9.820180057282256e-06, "loss": 0.7108, "step": 3689 }, { "epoch": 0.1130930489150423, "grad_norm": 1.9406802337759628, "learning_rate": 9.82004812632564e-06, "loss": 0.7428, "step": 3690 }, { "epoch": 0.1131236974377835, "grad_norm": 1.788138649746012, "learning_rate": 9.819916147875857e-06, "loss": 0.8083, "step": 3691 }, { "epoch": 0.1131543459605247, "grad_norm": 2.0589716091128585, "learning_rate": 9.819784121934206e-06, "loss": 0.9205, "step": 3692 }, { "epoch": 0.11318499448326591, "grad_norm": 1.8350279838793235, "learning_rate": 9.819652048501988e-06, "loss": 0.7385, "step": 3693 }, { "epoch": 0.11321564300600712, "grad_norm": 1.9255857796479403, "learning_rate": 9.819519927580507e-06, "loss": 0.7535, "step": 3694 }, { "epoch": 0.11324629152874831, "grad_norm": 1.8501215524127381, "learning_rate": 9.819387759171062e-06, "loss": 0.7216, "step": 3695 }, { "epoch": 0.11327694005148951, "grad_norm": 1.9380558440644036, "learning_rate": 9.819255543274957e-06, "loss": 0.7245, "step": 3696 }, { "epoch": 0.11330758857423072, "grad_norm": 0.9840159921809034, "learning_rate": 9.819123279893494e-06, "loss": 0.5197, "step": 3697 }, { "epoch": 0.11333823709697192, "grad_norm": 1.880425415033134, "learning_rate": 9.818990969027977e-06, "loss": 0.7645, "step": 3698 }, { "epoch": 0.11336888561971313, "grad_norm": 1.913339117554567, "learning_rate": 9.818858610679706e-06, "loss": 0.7745, "step": 3699 }, { "epoch": 0.11339953414245434, "grad_norm": 1.8170240230736314, "learning_rate": 9.818726204849991e-06, "loss": 0.8591, "step": 3700 }, { "epoch": 0.11343018266519554, "grad_norm": 0.8567833406513337, "learning_rate": 9.818593751540134e-06, "loss": 0.5212, "step": 3701 }, { "epoch": 0.11346083118793675, "grad_norm": 0.8565191011254415, "learning_rate": 9.818461250751438e-06, "loss": 0.5223, "step": 3702 }, { "epoch": 0.11349147971067794, "grad_norm": 2.000666810381929, "learning_rate": 9.818328702485212e-06, "loss": 0.8643, "step": 3703 }, { "epoch": 0.11352212823341915, "grad_norm": 0.8081151990338308, "learning_rate": 9.81819610674276e-06, "loss": 0.5228, "step": 3704 }, { "epoch": 0.11355277675616035, "grad_norm": 1.9216145936053692, "learning_rate": 9.81806346352539e-06, "loss": 0.8421, "step": 3705 }, { "epoch": 0.11358342527890156, "grad_norm": 0.879650448369826, "learning_rate": 9.817930772834406e-06, "loss": 0.5082, "step": 3706 }, { "epoch": 0.11361407380164276, "grad_norm": 1.8119138964610508, "learning_rate": 9.817798034671117e-06, "loss": 0.8231, "step": 3707 }, { "epoch": 0.11364472232438397, "grad_norm": 2.071512758920977, "learning_rate": 9.817665249036833e-06, "loss": 0.8481, "step": 3708 }, { "epoch": 0.11367537084712517, "grad_norm": 1.9940261766534664, "learning_rate": 9.81753241593286e-06, "loss": 0.6298, "step": 3709 }, { "epoch": 0.11370601936986638, "grad_norm": 0.8601261856052457, "learning_rate": 9.817399535360507e-06, "loss": 0.4949, "step": 3710 }, { "epoch": 0.11373666789260757, "grad_norm": 1.933719079983175, "learning_rate": 9.817266607321085e-06, "loss": 0.8596, "step": 3711 }, { "epoch": 0.11376731641534878, "grad_norm": 0.9084155245185468, "learning_rate": 9.817133631815902e-06, "loss": 0.5194, "step": 3712 }, { "epoch": 0.11379796493808998, "grad_norm": 0.8884352804692812, "learning_rate": 9.817000608846269e-06, "loss": 0.5165, "step": 3713 }, { "epoch": 0.11382861346083119, "grad_norm": 0.865252513734952, "learning_rate": 9.816867538413497e-06, "loss": 0.4977, "step": 3714 }, { "epoch": 0.1138592619835724, "grad_norm": 1.8619902862946007, "learning_rate": 9.816734420518895e-06, "loss": 0.7932, "step": 3715 }, { "epoch": 0.1138899105063136, "grad_norm": 1.944463813498919, "learning_rate": 9.816601255163777e-06, "loss": 0.8555, "step": 3716 }, { "epoch": 0.1139205590290548, "grad_norm": 1.9428689899393619, "learning_rate": 9.816468042349456e-06, "loss": 0.8377, "step": 3717 }, { "epoch": 0.113951207551796, "grad_norm": 1.8402962779769099, "learning_rate": 9.81633478207724e-06, "loss": 0.7631, "step": 3718 }, { "epoch": 0.1139818560745372, "grad_norm": 1.922598435594532, "learning_rate": 9.816201474348448e-06, "loss": 0.7255, "step": 3719 }, { "epoch": 0.11401250459727841, "grad_norm": 1.8693601145226908, "learning_rate": 9.81606811916439e-06, "loss": 0.7199, "step": 3720 }, { "epoch": 0.11404315312001961, "grad_norm": 1.8417455770836921, "learning_rate": 9.815934716526378e-06, "loss": 0.7353, "step": 3721 }, { "epoch": 0.11407380164276082, "grad_norm": 1.8157553090896585, "learning_rate": 9.815801266435731e-06, "loss": 0.7902, "step": 3722 }, { "epoch": 0.11410445016550202, "grad_norm": 1.9438941010465904, "learning_rate": 9.815667768893763e-06, "loss": 0.841, "step": 3723 }, { "epoch": 0.11413509868824323, "grad_norm": 1.8760624605548688, "learning_rate": 9.815534223901788e-06, "loss": 0.7572, "step": 3724 }, { "epoch": 0.11416574721098444, "grad_norm": 2.0435507872588046, "learning_rate": 9.81540063146112e-06, "loss": 0.7584, "step": 3725 }, { "epoch": 0.11419639573372563, "grad_norm": 2.059346956802741, "learning_rate": 9.81526699157308e-06, "loss": 0.7211, "step": 3726 }, { "epoch": 0.11422704425646683, "grad_norm": 1.7842103273716676, "learning_rate": 9.815133304238982e-06, "loss": 0.6085, "step": 3727 }, { "epoch": 0.11425769277920804, "grad_norm": 2.251198179154489, "learning_rate": 9.814999569460143e-06, "loss": 0.7462, "step": 3728 }, { "epoch": 0.11428834130194925, "grad_norm": 2.0144196973537856, "learning_rate": 9.81486578723788e-06, "loss": 0.7704, "step": 3729 }, { "epoch": 0.11431898982469045, "grad_norm": 1.651485881954418, "learning_rate": 9.814731957573514e-06, "loss": 0.706, "step": 3730 }, { "epoch": 0.11434963834743166, "grad_norm": 1.8257664089955838, "learning_rate": 9.81459808046836e-06, "loss": 0.76, "step": 3731 }, { "epoch": 0.11438028687017286, "grad_norm": 1.7915027122232876, "learning_rate": 9.814464155923741e-06, "loss": 0.5401, "step": 3732 }, { "epoch": 0.11441093539291407, "grad_norm": 1.7870722227859082, "learning_rate": 9.814330183940976e-06, "loss": 0.6989, "step": 3733 }, { "epoch": 0.11444158391565526, "grad_norm": 1.9616024809658892, "learning_rate": 9.814196164521384e-06, "loss": 0.758, "step": 3734 }, { "epoch": 0.11447223243839647, "grad_norm": 1.9749270483821875, "learning_rate": 9.814062097666284e-06, "loss": 0.7167, "step": 3735 }, { "epoch": 0.11450288096113767, "grad_norm": 0.9251884030366325, "learning_rate": 9.813927983376998e-06, "loss": 0.495, "step": 3736 }, { "epoch": 0.11453352948387888, "grad_norm": 2.045989203310227, "learning_rate": 9.813793821654849e-06, "loss": 0.8896, "step": 3737 }, { "epoch": 0.11456417800662008, "grad_norm": 1.8744416980503567, "learning_rate": 9.813659612501156e-06, "loss": 0.7972, "step": 3738 }, { "epoch": 0.11459482652936129, "grad_norm": 1.9438285383634692, "learning_rate": 9.813525355917244e-06, "loss": 0.6837, "step": 3739 }, { "epoch": 0.1146254750521025, "grad_norm": 2.102968744781413, "learning_rate": 9.813391051904436e-06, "loss": 0.8419, "step": 3740 }, { "epoch": 0.1146561235748437, "grad_norm": 1.9770826450384813, "learning_rate": 9.813256700464054e-06, "loss": 0.7025, "step": 3741 }, { "epoch": 0.11468677209758489, "grad_norm": 2.029302891138796, "learning_rate": 9.813122301597422e-06, "loss": 0.8101, "step": 3742 }, { "epoch": 0.1147174206203261, "grad_norm": 2.1614026113364573, "learning_rate": 9.812987855305864e-06, "loss": 0.8253, "step": 3743 }, { "epoch": 0.1147480691430673, "grad_norm": 2.139651627493704, "learning_rate": 9.812853361590707e-06, "loss": 0.7742, "step": 3744 }, { "epoch": 0.11477871766580851, "grad_norm": 1.8921191148405903, "learning_rate": 9.812718820453273e-06, "loss": 0.7066, "step": 3745 }, { "epoch": 0.11480936618854971, "grad_norm": 2.314070941776921, "learning_rate": 9.81258423189489e-06, "loss": 0.7593, "step": 3746 }, { "epoch": 0.11484001471129092, "grad_norm": 1.9965528594425601, "learning_rate": 9.812449595916883e-06, "loss": 0.7279, "step": 3747 }, { "epoch": 0.11487066323403212, "grad_norm": 1.8635214017257726, "learning_rate": 9.812314912520577e-06, "loss": 0.7406, "step": 3748 }, { "epoch": 0.11490131175677332, "grad_norm": 2.030672583448243, "learning_rate": 9.812180181707303e-06, "loss": 0.8597, "step": 3749 }, { "epoch": 0.11493196027951452, "grad_norm": 2.011383315537991, "learning_rate": 9.812045403478385e-06, "loss": 0.7051, "step": 3750 }, { "epoch": 0.11496260880225573, "grad_norm": 1.9793466837128657, "learning_rate": 9.811910577835154e-06, "loss": 0.7621, "step": 3751 }, { "epoch": 0.11499325732499693, "grad_norm": 1.493412150033542, "learning_rate": 9.811775704778934e-06, "loss": 0.5341, "step": 3752 }, { "epoch": 0.11502390584773814, "grad_norm": 1.9139318774933343, "learning_rate": 9.81164078431106e-06, "loss": 0.6506, "step": 3753 }, { "epoch": 0.11505455437047934, "grad_norm": 1.9863912086282778, "learning_rate": 9.811505816432857e-06, "loss": 0.8179, "step": 3754 }, { "epoch": 0.11508520289322055, "grad_norm": 1.997848009196362, "learning_rate": 9.811370801145656e-06, "loss": 0.7101, "step": 3755 }, { "epoch": 0.11511585141596176, "grad_norm": 2.0584525656466837, "learning_rate": 9.811235738450787e-06, "loss": 0.7696, "step": 3756 }, { "epoch": 0.11514649993870295, "grad_norm": 1.773322252394143, "learning_rate": 9.811100628349582e-06, "loss": 0.8263, "step": 3757 }, { "epoch": 0.11517714846144415, "grad_norm": 1.8728062594337926, "learning_rate": 9.810965470843373e-06, "loss": 0.7314, "step": 3758 }, { "epoch": 0.11520779698418536, "grad_norm": 1.32820663509091, "learning_rate": 9.810830265933488e-06, "loss": 0.5359, "step": 3759 }, { "epoch": 0.11523844550692657, "grad_norm": 1.8581243123426174, "learning_rate": 9.810695013621261e-06, "loss": 0.779, "step": 3760 }, { "epoch": 0.11526909402966777, "grad_norm": 1.8243345010374756, "learning_rate": 9.810559713908027e-06, "loss": 0.7086, "step": 3761 }, { "epoch": 0.11529974255240898, "grad_norm": 2.3416321312312527, "learning_rate": 9.810424366795116e-06, "loss": 0.8637, "step": 3762 }, { "epoch": 0.11533039107515018, "grad_norm": 2.048262502924628, "learning_rate": 9.810288972283864e-06, "loss": 0.7943, "step": 3763 }, { "epoch": 0.11536103959789139, "grad_norm": 1.7379995587093897, "learning_rate": 9.810153530375604e-06, "loss": 0.7623, "step": 3764 }, { "epoch": 0.11539168812063258, "grad_norm": 1.953494596153021, "learning_rate": 9.810018041071669e-06, "loss": 0.7846, "step": 3765 }, { "epoch": 0.11542233664337379, "grad_norm": 2.063468713466126, "learning_rate": 9.809882504373397e-06, "loss": 0.7375, "step": 3766 }, { "epoch": 0.11545298516611499, "grad_norm": 0.8816610062944062, "learning_rate": 9.809746920282121e-06, "loss": 0.518, "step": 3767 }, { "epoch": 0.1154836336888562, "grad_norm": 1.7150933644056758, "learning_rate": 9.809611288799178e-06, "loss": 0.7187, "step": 3768 }, { "epoch": 0.1155142822115974, "grad_norm": 1.8360145378530874, "learning_rate": 9.809475609925908e-06, "loss": 0.7365, "step": 3769 }, { "epoch": 0.11554493073433861, "grad_norm": 0.8618860850625815, "learning_rate": 9.80933988366364e-06, "loss": 0.5252, "step": 3770 }, { "epoch": 0.11557557925707981, "grad_norm": 2.0319106708734966, "learning_rate": 9.809204110013717e-06, "loss": 0.7926, "step": 3771 }, { "epoch": 0.11560622777982102, "grad_norm": 1.9912818646911232, "learning_rate": 9.809068288977475e-06, "loss": 0.7993, "step": 3772 }, { "epoch": 0.11563687630256221, "grad_norm": 1.9666472179242462, "learning_rate": 9.808932420556252e-06, "loss": 0.7175, "step": 3773 }, { "epoch": 0.11566752482530342, "grad_norm": 1.9854920634389124, "learning_rate": 9.80879650475139e-06, "loss": 0.783, "step": 3774 }, { "epoch": 0.11569817334804462, "grad_norm": 0.8523527736131906, "learning_rate": 9.808660541564224e-06, "loss": 0.5182, "step": 3775 }, { "epoch": 0.11572882187078583, "grad_norm": 0.8731763141364749, "learning_rate": 9.808524530996095e-06, "loss": 0.518, "step": 3776 }, { "epoch": 0.11575947039352703, "grad_norm": 0.8330208000299373, "learning_rate": 9.808388473048343e-06, "loss": 0.5285, "step": 3777 }, { "epoch": 0.11579011891626824, "grad_norm": 1.7859791218206003, "learning_rate": 9.808252367722311e-06, "loss": 0.7358, "step": 3778 }, { "epoch": 0.11582076743900944, "grad_norm": 2.0312958875741276, "learning_rate": 9.808116215019336e-06, "loss": 0.7373, "step": 3779 }, { "epoch": 0.11585141596175064, "grad_norm": 0.8162398413375672, "learning_rate": 9.807980014940764e-06, "loss": 0.5001, "step": 3780 }, { "epoch": 0.11588206448449184, "grad_norm": 1.7237605408552374, "learning_rate": 9.807843767487933e-06, "loss": 0.7605, "step": 3781 }, { "epoch": 0.11591271300723305, "grad_norm": 0.8315187878922815, "learning_rate": 9.807707472662188e-06, "loss": 0.5035, "step": 3782 }, { "epoch": 0.11594336152997425, "grad_norm": 2.273306833481674, "learning_rate": 9.80757113046487e-06, "loss": 0.7759, "step": 3783 }, { "epoch": 0.11597401005271546, "grad_norm": 0.8340881117833319, "learning_rate": 9.807434740897325e-06, "loss": 0.4976, "step": 3784 }, { "epoch": 0.11600465857545667, "grad_norm": 2.0952601039063574, "learning_rate": 9.807298303960895e-06, "loss": 0.7387, "step": 3785 }, { "epoch": 0.11603530709819787, "grad_norm": 1.7711522603893561, "learning_rate": 9.807161819656925e-06, "loss": 0.7513, "step": 3786 }, { "epoch": 0.11606595562093908, "grad_norm": 1.865472840229448, "learning_rate": 9.80702528798676e-06, "loss": 0.7372, "step": 3787 }, { "epoch": 0.11609660414368027, "grad_norm": 1.882568214122031, "learning_rate": 9.806888708951743e-06, "loss": 0.8177, "step": 3788 }, { "epoch": 0.11612725266642147, "grad_norm": 1.9226433802088447, "learning_rate": 9.806752082553223e-06, "loss": 0.7137, "step": 3789 }, { "epoch": 0.11615790118916268, "grad_norm": 1.8097671400531072, "learning_rate": 9.806615408792545e-06, "loss": 0.7255, "step": 3790 }, { "epoch": 0.11618854971190389, "grad_norm": 1.9472058455945491, "learning_rate": 9.806478687671055e-06, "loss": 0.731, "step": 3791 }, { "epoch": 0.11621919823464509, "grad_norm": 2.0249911227069077, "learning_rate": 9.806341919190102e-06, "loss": 0.7333, "step": 3792 }, { "epoch": 0.1162498467573863, "grad_norm": 1.8686813229713204, "learning_rate": 9.806205103351031e-06, "loss": 0.7942, "step": 3793 }, { "epoch": 0.1162804952801275, "grad_norm": 2.0832069775403554, "learning_rate": 9.806068240155193e-06, "loss": 0.7399, "step": 3794 }, { "epoch": 0.11631114380286871, "grad_norm": 0.9537600509098805, "learning_rate": 9.805931329603932e-06, "loss": 0.5276, "step": 3795 }, { "epoch": 0.1163417923256099, "grad_norm": 2.0412429273362345, "learning_rate": 9.805794371698603e-06, "loss": 0.8054, "step": 3796 }, { "epoch": 0.1163724408483511, "grad_norm": 1.9580210209520539, "learning_rate": 9.80565736644055e-06, "loss": 0.8718, "step": 3797 }, { "epoch": 0.11640308937109231, "grad_norm": 1.6703957321441234, "learning_rate": 9.805520313831127e-06, "loss": 0.7424, "step": 3798 }, { "epoch": 0.11643373789383352, "grad_norm": 2.3458929987640658, "learning_rate": 9.805383213871683e-06, "loss": 0.8638, "step": 3799 }, { "epoch": 0.11646438641657472, "grad_norm": 0.922516920051, "learning_rate": 9.805246066563569e-06, "loss": 0.498, "step": 3800 }, { "epoch": 0.11649503493931593, "grad_norm": 1.8654934826175822, "learning_rate": 9.805108871908134e-06, "loss": 0.76, "step": 3801 }, { "epoch": 0.11652568346205713, "grad_norm": 1.7412811384995615, "learning_rate": 9.804971629906733e-06, "loss": 0.7254, "step": 3802 }, { "epoch": 0.11655633198479834, "grad_norm": 1.9143158328076915, "learning_rate": 9.804834340560717e-06, "loss": 0.7947, "step": 3803 }, { "epoch": 0.11658698050753953, "grad_norm": 0.890519580348268, "learning_rate": 9.80469700387144e-06, "loss": 0.5102, "step": 3804 }, { "epoch": 0.11661762903028074, "grad_norm": 1.8294743357026237, "learning_rate": 9.804559619840253e-06, "loss": 0.7568, "step": 3805 }, { "epoch": 0.11664827755302194, "grad_norm": 1.895750708321489, "learning_rate": 9.80442218846851e-06, "loss": 0.7292, "step": 3806 }, { "epoch": 0.11667892607576315, "grad_norm": 1.9443311618269812, "learning_rate": 9.804284709757567e-06, "loss": 0.7198, "step": 3807 }, { "epoch": 0.11670957459850435, "grad_norm": 0.8735579924609916, "learning_rate": 9.804147183708776e-06, "loss": 0.5038, "step": 3808 }, { "epoch": 0.11674022312124556, "grad_norm": 2.3643450004514808, "learning_rate": 9.804009610323496e-06, "loss": 0.7444, "step": 3809 }, { "epoch": 0.11677087164398676, "grad_norm": 2.054688829290974, "learning_rate": 9.803871989603078e-06, "loss": 0.7905, "step": 3810 }, { "epoch": 0.11680152016672796, "grad_norm": 1.9089664503880681, "learning_rate": 9.803734321548883e-06, "loss": 0.767, "step": 3811 }, { "epoch": 0.11683216868946916, "grad_norm": 0.8612547700048758, "learning_rate": 9.803596606162262e-06, "loss": 0.5052, "step": 3812 }, { "epoch": 0.11686281721221037, "grad_norm": 1.7732023066400475, "learning_rate": 9.803458843444576e-06, "loss": 0.7597, "step": 3813 }, { "epoch": 0.11689346573495157, "grad_norm": 1.9794736667229902, "learning_rate": 9.80332103339718e-06, "loss": 0.7026, "step": 3814 }, { "epoch": 0.11692411425769278, "grad_norm": 1.681955115594938, "learning_rate": 9.803183176021434e-06, "loss": 0.6358, "step": 3815 }, { "epoch": 0.11695476278043399, "grad_norm": 1.8878219249151842, "learning_rate": 9.803045271318694e-06, "loss": 0.7467, "step": 3816 }, { "epoch": 0.11698541130317519, "grad_norm": 1.8282352615508428, "learning_rate": 9.80290731929032e-06, "loss": 0.7812, "step": 3817 }, { "epoch": 0.1170160598259164, "grad_norm": 1.9063746072194345, "learning_rate": 9.802769319937672e-06, "loss": 0.8279, "step": 3818 }, { "epoch": 0.11704670834865759, "grad_norm": 1.9460180288462554, "learning_rate": 9.802631273262109e-06, "loss": 0.7061, "step": 3819 }, { "epoch": 0.1170773568713988, "grad_norm": 2.050871825080161, "learning_rate": 9.802493179264991e-06, "loss": 0.7774, "step": 3820 }, { "epoch": 0.11710800539414, "grad_norm": 2.098026438958997, "learning_rate": 9.802355037947679e-06, "loss": 0.7871, "step": 3821 }, { "epoch": 0.1171386539168812, "grad_norm": 1.7236889331188405, "learning_rate": 9.802216849311535e-06, "loss": 0.7029, "step": 3822 }, { "epoch": 0.11716930243962241, "grad_norm": 1.8235977196934763, "learning_rate": 9.802078613357916e-06, "loss": 0.7234, "step": 3823 }, { "epoch": 0.11719995096236362, "grad_norm": 0.9344633451569525, "learning_rate": 9.801940330088192e-06, "loss": 0.4983, "step": 3824 }, { "epoch": 0.11723059948510482, "grad_norm": 1.8129620263087216, "learning_rate": 9.801801999503719e-06, "loss": 0.7712, "step": 3825 }, { "epoch": 0.11726124800784603, "grad_norm": 1.6752370148382356, "learning_rate": 9.801663621605864e-06, "loss": 0.7097, "step": 3826 }, { "epoch": 0.11729189653058722, "grad_norm": 1.8196327921362825, "learning_rate": 9.801525196395987e-06, "loss": 0.8087, "step": 3827 }, { "epoch": 0.11732254505332843, "grad_norm": 1.8284914631764275, "learning_rate": 9.801386723875455e-06, "loss": 0.9023, "step": 3828 }, { "epoch": 0.11735319357606963, "grad_norm": 2.1867634391354205, "learning_rate": 9.80124820404563e-06, "loss": 0.7865, "step": 3829 }, { "epoch": 0.11738384209881084, "grad_norm": 0.9650604740967921, "learning_rate": 9.801109636907881e-06, "loss": 0.5354, "step": 3830 }, { "epoch": 0.11741449062155204, "grad_norm": 2.5732326599298028, "learning_rate": 9.800971022463568e-06, "loss": 0.7415, "step": 3831 }, { "epoch": 0.11744513914429325, "grad_norm": 1.7308670682487317, "learning_rate": 9.800832360714058e-06, "loss": 0.6827, "step": 3832 }, { "epoch": 0.11747578766703445, "grad_norm": 0.9919615888649522, "learning_rate": 9.80069365166072e-06, "loss": 0.5102, "step": 3833 }, { "epoch": 0.11750643618977566, "grad_norm": 1.858996495433463, "learning_rate": 9.80055489530492e-06, "loss": 0.8116, "step": 3834 }, { "epoch": 0.11753708471251685, "grad_norm": 1.8106149161318876, "learning_rate": 9.800416091648022e-06, "loss": 0.7422, "step": 3835 }, { "epoch": 0.11756773323525806, "grad_norm": 0.8536540649526568, "learning_rate": 9.800277240691399e-06, "loss": 0.5262, "step": 3836 }, { "epoch": 0.11759838175799926, "grad_norm": 0.8354754311469161, "learning_rate": 9.800138342436413e-06, "loss": 0.5161, "step": 3837 }, { "epoch": 0.11762903028074047, "grad_norm": 0.8398047724552523, "learning_rate": 9.799999396884436e-06, "loss": 0.5091, "step": 3838 }, { "epoch": 0.11765967880348167, "grad_norm": 2.0040459274390434, "learning_rate": 9.799860404036838e-06, "loss": 0.795, "step": 3839 }, { "epoch": 0.11769032732622288, "grad_norm": 0.8181714170927724, "learning_rate": 9.799721363894988e-06, "loss": 0.5138, "step": 3840 }, { "epoch": 0.11772097584896409, "grad_norm": 1.668154646623028, "learning_rate": 9.799582276460254e-06, "loss": 0.7013, "step": 3841 }, { "epoch": 0.11775162437170529, "grad_norm": 2.616748773827447, "learning_rate": 9.799443141734009e-06, "loss": 0.8319, "step": 3842 }, { "epoch": 0.11778227289444648, "grad_norm": 2.393471699152147, "learning_rate": 9.79930395971762e-06, "loss": 0.763, "step": 3843 }, { "epoch": 0.11781292141718769, "grad_norm": 1.911711226195945, "learning_rate": 9.799164730412464e-06, "loss": 0.7562, "step": 3844 }, { "epoch": 0.1178435699399289, "grad_norm": 1.823363119207298, "learning_rate": 9.79902545381991e-06, "loss": 0.8212, "step": 3845 }, { "epoch": 0.1178742184626701, "grad_norm": 1.8384187692622416, "learning_rate": 9.79888612994133e-06, "loss": 0.6663, "step": 3846 }, { "epoch": 0.1179048669854113, "grad_norm": 1.9752577530162125, "learning_rate": 9.798746758778097e-06, "loss": 0.8846, "step": 3847 }, { "epoch": 0.11793551550815251, "grad_norm": 2.0963918545102698, "learning_rate": 9.798607340331583e-06, "loss": 0.7716, "step": 3848 }, { "epoch": 0.11796616403089372, "grad_norm": 1.0978041333833677, "learning_rate": 9.798467874603164e-06, "loss": 0.5165, "step": 3849 }, { "epoch": 0.11799681255363491, "grad_norm": 0.9919531106331221, "learning_rate": 9.798328361594214e-06, "loss": 0.5059, "step": 3850 }, { "epoch": 0.11802746107637611, "grad_norm": 1.9741670151255237, "learning_rate": 9.798188801306105e-06, "loss": 0.7231, "step": 3851 }, { "epoch": 0.11805810959911732, "grad_norm": 1.9145308837909456, "learning_rate": 9.798049193740215e-06, "loss": 0.738, "step": 3852 }, { "epoch": 0.11808875812185853, "grad_norm": 1.9353698742177252, "learning_rate": 9.79790953889792e-06, "loss": 0.8026, "step": 3853 }, { "epoch": 0.11811940664459973, "grad_norm": 1.7836709811693507, "learning_rate": 9.797769836780594e-06, "loss": 0.7955, "step": 3854 }, { "epoch": 0.11815005516734094, "grad_norm": 1.8518598808912976, "learning_rate": 9.797630087389614e-06, "loss": 0.6671, "step": 3855 }, { "epoch": 0.11818070369008214, "grad_norm": 1.9769399886785606, "learning_rate": 9.797490290726356e-06, "loss": 0.7104, "step": 3856 }, { "epoch": 0.11821135221282335, "grad_norm": 2.0750340294065635, "learning_rate": 9.797350446792202e-06, "loss": 0.8885, "step": 3857 }, { "epoch": 0.11824200073556454, "grad_norm": 1.8897077178295012, "learning_rate": 9.797210555588523e-06, "loss": 0.7812, "step": 3858 }, { "epoch": 0.11827264925830575, "grad_norm": 1.7347616446409677, "learning_rate": 9.797070617116704e-06, "loss": 0.7255, "step": 3859 }, { "epoch": 0.11830329778104695, "grad_norm": 1.9590842939590662, "learning_rate": 9.796930631378118e-06, "loss": 0.7946, "step": 3860 }, { "epoch": 0.11833394630378816, "grad_norm": 2.0795936788213956, "learning_rate": 9.796790598374149e-06, "loss": 0.8842, "step": 3861 }, { "epoch": 0.11836459482652936, "grad_norm": 1.8141258530052218, "learning_rate": 9.796650518106175e-06, "loss": 0.7889, "step": 3862 }, { "epoch": 0.11839524334927057, "grad_norm": 1.7778167232884605, "learning_rate": 9.796510390575575e-06, "loss": 0.8595, "step": 3863 }, { "epoch": 0.11842589187201177, "grad_norm": 1.9279492673547622, "learning_rate": 9.796370215783732e-06, "loss": 0.7939, "step": 3864 }, { "epoch": 0.11845654039475298, "grad_norm": 1.9904090928120948, "learning_rate": 9.796229993732026e-06, "loss": 0.7839, "step": 3865 }, { "epoch": 0.11848718891749417, "grad_norm": 1.9955921370302643, "learning_rate": 9.796089724421837e-06, "loss": 0.8201, "step": 3866 }, { "epoch": 0.11851783744023538, "grad_norm": 1.9264722895269155, "learning_rate": 9.795949407854551e-06, "loss": 0.7622, "step": 3867 }, { "epoch": 0.11854848596297658, "grad_norm": 1.9482911904303026, "learning_rate": 9.795809044031546e-06, "loss": 0.8153, "step": 3868 }, { "epoch": 0.11857913448571779, "grad_norm": 1.888988668878405, "learning_rate": 9.795668632954209e-06, "loss": 0.6786, "step": 3869 }, { "epoch": 0.118609783008459, "grad_norm": 1.855108042329308, "learning_rate": 9.795528174623922e-06, "loss": 0.6974, "step": 3870 }, { "epoch": 0.1186404315312002, "grad_norm": 2.6534332463503896, "learning_rate": 9.795387669042069e-06, "loss": 0.8038, "step": 3871 }, { "epoch": 0.1186710800539414, "grad_norm": 1.9274208248339366, "learning_rate": 9.795247116210035e-06, "loss": 0.7159, "step": 3872 }, { "epoch": 0.11870172857668261, "grad_norm": 1.8647138618978674, "learning_rate": 9.795106516129203e-06, "loss": 0.8194, "step": 3873 }, { "epoch": 0.1187323770994238, "grad_norm": 1.7914707846402174, "learning_rate": 9.794965868800958e-06, "loss": 0.7364, "step": 3874 }, { "epoch": 0.11876302562216501, "grad_norm": 1.8642960762237157, "learning_rate": 9.79482517422669e-06, "loss": 0.7868, "step": 3875 }, { "epoch": 0.11879367414490621, "grad_norm": 1.9697476545700374, "learning_rate": 9.794684432407781e-06, "loss": 0.7767, "step": 3876 }, { "epoch": 0.11882432266764742, "grad_norm": 1.789531433345558, "learning_rate": 9.794543643345622e-06, "loss": 0.8307, "step": 3877 }, { "epoch": 0.11885497119038863, "grad_norm": 1.7060158836795396, "learning_rate": 9.794402807041596e-06, "loss": 0.7939, "step": 3878 }, { "epoch": 0.11888561971312983, "grad_norm": 1.9513524817726566, "learning_rate": 9.794261923497092e-06, "loss": 0.7791, "step": 3879 }, { "epoch": 0.11891626823587104, "grad_norm": 2.1657437303029727, "learning_rate": 9.7941209927135e-06, "loss": 0.8023, "step": 3880 }, { "epoch": 0.11894691675861223, "grad_norm": 2.2643535396471237, "learning_rate": 9.793980014692207e-06, "loss": 0.6654, "step": 3881 }, { "epoch": 0.11897756528135343, "grad_norm": 1.4347719578696314, "learning_rate": 9.793838989434602e-06, "loss": 0.5246, "step": 3882 }, { "epoch": 0.11900821380409464, "grad_norm": 1.9320010435449868, "learning_rate": 9.793697916942074e-06, "loss": 0.7774, "step": 3883 }, { "epoch": 0.11903886232683585, "grad_norm": 1.7824604645604654, "learning_rate": 9.793556797216016e-06, "loss": 0.7312, "step": 3884 }, { "epoch": 0.11906951084957705, "grad_norm": 1.7274702202457328, "learning_rate": 9.793415630257815e-06, "loss": 0.7289, "step": 3885 }, { "epoch": 0.11910015937231826, "grad_norm": 2.0238738713745743, "learning_rate": 9.793274416068862e-06, "loss": 0.7333, "step": 3886 }, { "epoch": 0.11913080789505946, "grad_norm": 1.8222022271946259, "learning_rate": 9.793133154650552e-06, "loss": 0.6984, "step": 3887 }, { "epoch": 0.11916145641780067, "grad_norm": 1.9212000528514053, "learning_rate": 9.792991846004274e-06, "loss": 0.756, "step": 3888 }, { "epoch": 0.11919210494054186, "grad_norm": 2.0065067066543265, "learning_rate": 9.792850490131421e-06, "loss": 0.716, "step": 3889 }, { "epoch": 0.11922275346328307, "grad_norm": 1.1878581543924933, "learning_rate": 9.792709087033386e-06, "loss": 0.5234, "step": 3890 }, { "epoch": 0.11925340198602427, "grad_norm": 1.9263557188005953, "learning_rate": 9.792567636711561e-06, "loss": 0.7042, "step": 3891 }, { "epoch": 0.11928405050876548, "grad_norm": 1.8457511289272206, "learning_rate": 9.792426139167341e-06, "loss": 0.878, "step": 3892 }, { "epoch": 0.11931469903150668, "grad_norm": 0.9218374289904986, "learning_rate": 9.792284594402122e-06, "loss": 0.5205, "step": 3893 }, { "epoch": 0.11934534755424789, "grad_norm": 1.9252580801621884, "learning_rate": 9.792143002417295e-06, "loss": 0.7057, "step": 3894 }, { "epoch": 0.1193759960769891, "grad_norm": 2.051068820543772, "learning_rate": 9.792001363214257e-06, "loss": 0.7423, "step": 3895 }, { "epoch": 0.1194066445997303, "grad_norm": 0.9200537440425371, "learning_rate": 9.791859676794403e-06, "loss": 0.5053, "step": 3896 }, { "epoch": 0.11943729312247149, "grad_norm": 1.8564203421706564, "learning_rate": 9.791717943159131e-06, "loss": 0.795, "step": 3897 }, { "epoch": 0.1194679416452127, "grad_norm": 1.9249054478262562, "learning_rate": 9.791576162309835e-06, "loss": 0.7508, "step": 3898 }, { "epoch": 0.1194985901679539, "grad_norm": 1.8974588151098368, "learning_rate": 9.791434334247914e-06, "loss": 0.7825, "step": 3899 }, { "epoch": 0.11952923869069511, "grad_norm": 0.8901970268007301, "learning_rate": 9.791292458974764e-06, "loss": 0.5104, "step": 3900 }, { "epoch": 0.11955988721343631, "grad_norm": 1.7054795060111076, "learning_rate": 9.791150536491784e-06, "loss": 0.6516, "step": 3901 }, { "epoch": 0.11959053573617752, "grad_norm": 1.862077099453833, "learning_rate": 9.79100856680037e-06, "loss": 0.7586, "step": 3902 }, { "epoch": 0.11962118425891873, "grad_norm": 2.083623704952197, "learning_rate": 9.790866549901924e-06, "loss": 0.7518, "step": 3903 }, { "epoch": 0.11965183278165993, "grad_norm": 1.9641864229432904, "learning_rate": 9.790724485797846e-06, "loss": 0.8089, "step": 3904 }, { "epoch": 0.11968248130440112, "grad_norm": 1.9645586692126367, "learning_rate": 9.790582374489532e-06, "loss": 0.7683, "step": 3905 }, { "epoch": 0.11971312982714233, "grad_norm": 2.109234739342282, "learning_rate": 9.790440215978383e-06, "loss": 0.7406, "step": 3906 }, { "epoch": 0.11974377834988353, "grad_norm": 1.9087490486610936, "learning_rate": 9.790298010265803e-06, "loss": 0.6865, "step": 3907 }, { "epoch": 0.11977442687262474, "grad_norm": 0.9933002050855174, "learning_rate": 9.79015575735319e-06, "loss": 0.5001, "step": 3908 }, { "epoch": 0.11980507539536595, "grad_norm": 1.8164187697402456, "learning_rate": 9.790013457241948e-06, "loss": 0.7745, "step": 3909 }, { "epoch": 0.11983572391810715, "grad_norm": 1.9965517356864608, "learning_rate": 9.789871109933477e-06, "loss": 0.7957, "step": 3910 }, { "epoch": 0.11986637244084836, "grad_norm": 1.9120268297377856, "learning_rate": 9.78972871542918e-06, "loss": 0.7714, "step": 3911 }, { "epoch": 0.11989702096358955, "grad_norm": 2.05092879297758, "learning_rate": 9.78958627373046e-06, "loss": 0.7921, "step": 3912 }, { "epoch": 0.11992766948633075, "grad_norm": 2.221842051924001, "learning_rate": 9.789443784838722e-06, "loss": 0.7986, "step": 3913 }, { "epoch": 0.11995831800907196, "grad_norm": 0.8947080751367145, "learning_rate": 9.789301248755368e-06, "loss": 0.5167, "step": 3914 }, { "epoch": 0.11998896653181317, "grad_norm": 1.7694232126191867, "learning_rate": 9.789158665481804e-06, "loss": 0.7763, "step": 3915 }, { "epoch": 0.12001961505455437, "grad_norm": 1.6602438572243863, "learning_rate": 9.789016035019435e-06, "loss": 0.715, "step": 3916 }, { "epoch": 0.12005026357729558, "grad_norm": 1.8417573062918349, "learning_rate": 9.788873357369665e-06, "loss": 0.7368, "step": 3917 }, { "epoch": 0.12008091210003678, "grad_norm": 2.2736995601659014, "learning_rate": 9.7887306325339e-06, "loss": 0.7817, "step": 3918 }, { "epoch": 0.12011156062277799, "grad_norm": 2.032070703018031, "learning_rate": 9.788587860513547e-06, "loss": 0.6596, "step": 3919 }, { "epoch": 0.12014220914551918, "grad_norm": 1.871091164032451, "learning_rate": 9.788445041310013e-06, "loss": 0.6818, "step": 3920 }, { "epoch": 0.12017285766826039, "grad_norm": 1.917903958683791, "learning_rate": 9.788302174924705e-06, "loss": 0.8452, "step": 3921 }, { "epoch": 0.12020350619100159, "grad_norm": 1.8047431909119485, "learning_rate": 9.788159261359031e-06, "loss": 0.7792, "step": 3922 }, { "epoch": 0.1202341547137428, "grad_norm": 0.9338457211971057, "learning_rate": 9.788016300614397e-06, "loss": 0.5254, "step": 3923 }, { "epoch": 0.120264803236484, "grad_norm": 0.8927252427717907, "learning_rate": 9.787873292692216e-06, "loss": 0.5082, "step": 3924 }, { "epoch": 0.12029545175922521, "grad_norm": 0.8202109349512836, "learning_rate": 9.78773023759389e-06, "loss": 0.4896, "step": 3925 }, { "epoch": 0.12032610028196641, "grad_norm": 2.0481891876737723, "learning_rate": 9.787587135320837e-06, "loss": 0.7703, "step": 3926 }, { "epoch": 0.12035674880470762, "grad_norm": 1.940735978674249, "learning_rate": 9.787443985874463e-06, "loss": 0.771, "step": 3927 }, { "epoch": 0.12038739732744881, "grad_norm": 1.9352589305253196, "learning_rate": 9.787300789256176e-06, "loss": 0.8211, "step": 3928 }, { "epoch": 0.12041804585019002, "grad_norm": 1.8996709994532546, "learning_rate": 9.787157545467392e-06, "loss": 0.7777, "step": 3929 }, { "epoch": 0.12044869437293122, "grad_norm": 1.8084701385001196, "learning_rate": 9.787014254509517e-06, "loss": 0.7881, "step": 3930 }, { "epoch": 0.12047934289567243, "grad_norm": 1.8797729291956977, "learning_rate": 9.786870916383969e-06, "loss": 0.7392, "step": 3931 }, { "epoch": 0.12050999141841363, "grad_norm": 1.6110108802496155, "learning_rate": 9.786727531092154e-06, "loss": 0.6906, "step": 3932 }, { "epoch": 0.12054063994115484, "grad_norm": 1.7560620644258056, "learning_rate": 9.78658409863549e-06, "loss": 0.7445, "step": 3933 }, { "epoch": 0.12057128846389605, "grad_norm": 1.8179792927475642, "learning_rate": 9.786440619015387e-06, "loss": 0.7725, "step": 3934 }, { "epoch": 0.12060193698663725, "grad_norm": 2.5069290133681283, "learning_rate": 9.78629709223326e-06, "loss": 0.7608, "step": 3935 }, { "epoch": 0.12063258550937844, "grad_norm": 1.1545882786971595, "learning_rate": 9.786153518290524e-06, "loss": 0.5236, "step": 3936 }, { "epoch": 0.12066323403211965, "grad_norm": 1.7529411261106982, "learning_rate": 9.78600989718859e-06, "loss": 0.6871, "step": 3937 }, { "epoch": 0.12069388255486085, "grad_norm": 2.1583624429744828, "learning_rate": 9.78586622892888e-06, "loss": 0.84, "step": 3938 }, { "epoch": 0.12072453107760206, "grad_norm": 2.468055097766941, "learning_rate": 9.785722513512803e-06, "loss": 0.7883, "step": 3939 }, { "epoch": 0.12075517960034327, "grad_norm": 2.0230721339277826, "learning_rate": 9.785578750941779e-06, "loss": 0.8173, "step": 3940 }, { "epoch": 0.12078582812308447, "grad_norm": 1.881611057845351, "learning_rate": 9.785434941217222e-06, "loss": 0.7365, "step": 3941 }, { "epoch": 0.12081647664582568, "grad_norm": 1.9728852351512325, "learning_rate": 9.785291084340551e-06, "loss": 0.8345, "step": 3942 }, { "epoch": 0.12084712516856687, "grad_norm": 2.108136492305265, "learning_rate": 9.785147180313181e-06, "loss": 0.7553, "step": 3943 }, { "epoch": 0.12087777369130807, "grad_norm": 0.9209703106352586, "learning_rate": 9.785003229136534e-06, "loss": 0.5087, "step": 3944 }, { "epoch": 0.12090842221404928, "grad_norm": 2.1161976906065596, "learning_rate": 9.784859230812024e-06, "loss": 0.7681, "step": 3945 }, { "epoch": 0.12093907073679049, "grad_norm": 1.7040083806759214, "learning_rate": 9.784715185341072e-06, "loss": 0.7084, "step": 3946 }, { "epoch": 0.12096971925953169, "grad_norm": 1.6922289832657282, "learning_rate": 9.784571092725097e-06, "loss": 0.7442, "step": 3947 }, { "epoch": 0.1210003677822729, "grad_norm": 2.172328839443821, "learning_rate": 9.78442695296552e-06, "loss": 0.7815, "step": 3948 }, { "epoch": 0.1210310163050141, "grad_norm": 0.8527328405492085, "learning_rate": 9.784282766063758e-06, "loss": 0.4764, "step": 3949 }, { "epoch": 0.12106166482775531, "grad_norm": 2.0163456608246095, "learning_rate": 9.784138532021236e-06, "loss": 0.7989, "step": 3950 }, { "epoch": 0.1210923133504965, "grad_norm": 1.7109951420252174, "learning_rate": 9.783994250839371e-06, "loss": 0.67, "step": 3951 }, { "epoch": 0.1211229618732377, "grad_norm": 0.8908728555158328, "learning_rate": 9.783849922519589e-06, "loss": 0.5215, "step": 3952 }, { "epoch": 0.12115361039597891, "grad_norm": 1.911029978523564, "learning_rate": 9.783705547063306e-06, "loss": 0.8163, "step": 3953 }, { "epoch": 0.12118425891872012, "grad_norm": 1.9060133719234424, "learning_rate": 9.783561124471951e-06, "loss": 0.7524, "step": 3954 }, { "epoch": 0.12121490744146132, "grad_norm": 1.8098395574781434, "learning_rate": 9.783416654746945e-06, "loss": 0.6954, "step": 3955 }, { "epoch": 0.12124555596420253, "grad_norm": 2.081397973091131, "learning_rate": 9.78327213788971e-06, "loss": 0.7788, "step": 3956 }, { "epoch": 0.12127620448694373, "grad_norm": 1.7829205438582147, "learning_rate": 9.78312757390167e-06, "loss": 0.7716, "step": 3957 }, { "epoch": 0.12130685300968494, "grad_norm": 1.8479551103348368, "learning_rate": 9.782982962784252e-06, "loss": 0.7337, "step": 3958 }, { "epoch": 0.12133750153242613, "grad_norm": 2.187426181912062, "learning_rate": 9.782838304538878e-06, "loss": 0.8148, "step": 3959 }, { "epoch": 0.12136815005516734, "grad_norm": 1.1260909599601068, "learning_rate": 9.782693599166973e-06, "loss": 0.528, "step": 3960 }, { "epoch": 0.12139879857790854, "grad_norm": 2.0790460510798674, "learning_rate": 9.782548846669966e-06, "loss": 0.7513, "step": 3961 }, { "epoch": 0.12142944710064975, "grad_norm": 1.9619627130507744, "learning_rate": 9.782404047049281e-06, "loss": 0.7577, "step": 3962 }, { "epoch": 0.12146009562339095, "grad_norm": 2.0124561640302274, "learning_rate": 9.782259200306345e-06, "loss": 0.7146, "step": 3963 }, { "epoch": 0.12149074414613216, "grad_norm": 2.3256677592416106, "learning_rate": 9.782114306442586e-06, "loss": 0.7403, "step": 3964 }, { "epoch": 0.12152139266887337, "grad_norm": 1.7308345489105843, "learning_rate": 9.78196936545943e-06, "loss": 0.6525, "step": 3965 }, { "epoch": 0.12155204119161457, "grad_norm": 1.734505053173351, "learning_rate": 9.781824377358308e-06, "loss": 0.7404, "step": 3966 }, { "epoch": 0.12158268971435576, "grad_norm": 2.0096897466571346, "learning_rate": 9.781679342140647e-06, "loss": 0.7593, "step": 3967 }, { "epoch": 0.12161333823709697, "grad_norm": 2.0081337715986285, "learning_rate": 9.781534259807874e-06, "loss": 0.7712, "step": 3968 }, { "epoch": 0.12164398675983817, "grad_norm": 1.8609828623807834, "learning_rate": 9.781389130361422e-06, "loss": 0.7264, "step": 3969 }, { "epoch": 0.12167463528257938, "grad_norm": 2.019480436225165, "learning_rate": 9.781243953802719e-06, "loss": 0.8323, "step": 3970 }, { "epoch": 0.12170528380532059, "grad_norm": 1.3635191954641133, "learning_rate": 9.781098730133196e-06, "loss": 0.5272, "step": 3971 }, { "epoch": 0.12173593232806179, "grad_norm": 1.0829703566634012, "learning_rate": 9.780953459354285e-06, "loss": 0.5374, "step": 3972 }, { "epoch": 0.121766580850803, "grad_norm": 1.8326340329449362, "learning_rate": 9.780808141467414e-06, "loss": 0.6836, "step": 3973 }, { "epoch": 0.12179722937354419, "grad_norm": 1.8619192789115784, "learning_rate": 9.78066277647402e-06, "loss": 0.7337, "step": 3974 }, { "epoch": 0.1218278778962854, "grad_norm": 2.267811738289442, "learning_rate": 9.780517364375531e-06, "loss": 0.9009, "step": 3975 }, { "epoch": 0.1218585264190266, "grad_norm": 1.491678643749368, "learning_rate": 9.780371905173381e-06, "loss": 0.5394, "step": 3976 }, { "epoch": 0.1218891749417678, "grad_norm": 1.9623338123713694, "learning_rate": 9.780226398869004e-06, "loss": 0.7914, "step": 3977 }, { "epoch": 0.12191982346450901, "grad_norm": 1.9132763383634876, "learning_rate": 9.780080845463832e-06, "loss": 0.857, "step": 3978 }, { "epoch": 0.12195047198725022, "grad_norm": 2.1490485355330695, "learning_rate": 9.779935244959303e-06, "loss": 0.8079, "step": 3979 }, { "epoch": 0.12198112050999142, "grad_norm": 1.7985076562759308, "learning_rate": 9.779789597356848e-06, "loss": 0.7496, "step": 3980 }, { "epoch": 0.12201176903273263, "grad_norm": 1.9321751444269748, "learning_rate": 9.779643902657902e-06, "loss": 0.8531, "step": 3981 }, { "epoch": 0.12204241755547382, "grad_norm": 1.7316224266530633, "learning_rate": 9.779498160863903e-06, "loss": 0.7646, "step": 3982 }, { "epoch": 0.12207306607821503, "grad_norm": 1.7655244911793255, "learning_rate": 9.779352371976284e-06, "loss": 0.6843, "step": 3983 }, { "epoch": 0.12210371460095623, "grad_norm": 0.9188301595227462, "learning_rate": 9.779206535996487e-06, "loss": 0.5227, "step": 3984 }, { "epoch": 0.12213436312369744, "grad_norm": 2.4316908200003913, "learning_rate": 9.77906065292594e-06, "loss": 0.6633, "step": 3985 }, { "epoch": 0.12216501164643864, "grad_norm": 2.1711361550157817, "learning_rate": 9.778914722766089e-06, "loss": 0.7381, "step": 3986 }, { "epoch": 0.12219566016917985, "grad_norm": 1.9180977952833558, "learning_rate": 9.778768745518367e-06, "loss": 0.876, "step": 3987 }, { "epoch": 0.12222630869192105, "grad_norm": 1.935085227818858, "learning_rate": 9.778622721184216e-06, "loss": 0.7891, "step": 3988 }, { "epoch": 0.12225695721466226, "grad_norm": 1.9775532937357845, "learning_rate": 9.778476649765071e-06, "loss": 0.8173, "step": 3989 }, { "epoch": 0.12228760573740345, "grad_norm": 1.9675779596522587, "learning_rate": 9.778330531262373e-06, "loss": 0.8053, "step": 3990 }, { "epoch": 0.12231825426014466, "grad_norm": 1.7564154888670522, "learning_rate": 9.778184365677561e-06, "loss": 0.6353, "step": 3991 }, { "epoch": 0.12234890278288586, "grad_norm": 0.965467559716903, "learning_rate": 9.778038153012078e-06, "loss": 0.5178, "step": 3992 }, { "epoch": 0.12237955130562707, "grad_norm": 1.7429045991779435, "learning_rate": 9.77789189326736e-06, "loss": 0.7368, "step": 3993 }, { "epoch": 0.12241019982836827, "grad_norm": 1.7428018709021957, "learning_rate": 9.777745586444853e-06, "loss": 0.7564, "step": 3994 }, { "epoch": 0.12244084835110948, "grad_norm": 2.1050538608062754, "learning_rate": 9.777599232545994e-06, "loss": 0.839, "step": 3995 }, { "epoch": 0.12247149687385069, "grad_norm": 1.9581609541851468, "learning_rate": 9.777452831572229e-06, "loss": 0.6994, "step": 3996 }, { "epoch": 0.12250214539659189, "grad_norm": 2.0541389442883946, "learning_rate": 9.777306383524999e-06, "loss": 0.7921, "step": 3997 }, { "epoch": 0.12253279391933308, "grad_norm": 1.8078280563017146, "learning_rate": 9.777159888405746e-06, "loss": 0.7254, "step": 3998 }, { "epoch": 0.12256344244207429, "grad_norm": 2.0968954802259003, "learning_rate": 9.777013346215915e-06, "loss": 0.7368, "step": 3999 }, { "epoch": 0.1225940909648155, "grad_norm": 1.8829968294227226, "learning_rate": 9.776866756956948e-06, "loss": 0.7256, "step": 4000 }, { "epoch": 0.1226247394875567, "grad_norm": 1.6992076135172227, "learning_rate": 9.776720120630293e-06, "loss": 0.7001, "step": 4001 }, { "epoch": 0.1226553880102979, "grad_norm": 2.0195854076648074, "learning_rate": 9.776573437237391e-06, "loss": 0.7223, "step": 4002 }, { "epoch": 0.12268603653303911, "grad_norm": 1.993591967956816, "learning_rate": 9.776426706779688e-06, "loss": 0.8202, "step": 4003 }, { "epoch": 0.12271668505578032, "grad_norm": 2.152757971021305, "learning_rate": 9.776279929258632e-06, "loss": 0.7362, "step": 4004 }, { "epoch": 0.12274733357852151, "grad_norm": 1.9875419570641666, "learning_rate": 9.776133104675667e-06, "loss": 0.7729, "step": 4005 }, { "epoch": 0.12277798210126271, "grad_norm": 1.9196287072367106, "learning_rate": 9.775986233032241e-06, "loss": 0.7426, "step": 4006 }, { "epoch": 0.12280863062400392, "grad_norm": 1.8310620383826612, "learning_rate": 9.7758393143298e-06, "loss": 0.7656, "step": 4007 }, { "epoch": 0.12283927914674513, "grad_norm": 1.0046543301046023, "learning_rate": 9.775692348569792e-06, "loss": 0.493, "step": 4008 }, { "epoch": 0.12286992766948633, "grad_norm": 1.8191125001663875, "learning_rate": 9.775545335753667e-06, "loss": 0.8134, "step": 4009 }, { "epoch": 0.12290057619222754, "grad_norm": 1.62894729182304, "learning_rate": 9.77539827588287e-06, "loss": 0.6989, "step": 4010 }, { "epoch": 0.12293122471496874, "grad_norm": 1.9428801694628954, "learning_rate": 9.775251168958853e-06, "loss": 0.7122, "step": 4011 }, { "epoch": 0.12296187323770995, "grad_norm": 1.8723098484763487, "learning_rate": 9.775104014983066e-06, "loss": 0.7049, "step": 4012 }, { "epoch": 0.12299252176045114, "grad_norm": 1.8599124781024214, "learning_rate": 9.774956813956954e-06, "loss": 0.7375, "step": 4013 }, { "epoch": 0.12302317028319235, "grad_norm": 1.060433824126987, "learning_rate": 9.774809565881973e-06, "loss": 0.528, "step": 4014 }, { "epoch": 0.12305381880593355, "grad_norm": 1.6471215006406288, "learning_rate": 9.774662270759571e-06, "loss": 0.76, "step": 4015 }, { "epoch": 0.12308446732867476, "grad_norm": 2.255333115868993, "learning_rate": 9.7745149285912e-06, "loss": 0.8009, "step": 4016 }, { "epoch": 0.12311511585141596, "grad_norm": 1.85207463151369, "learning_rate": 9.774367539378313e-06, "loss": 0.6971, "step": 4017 }, { "epoch": 0.12314576437415717, "grad_norm": 2.139202657361986, "learning_rate": 9.77422010312236e-06, "loss": 0.7945, "step": 4018 }, { "epoch": 0.12317641289689837, "grad_norm": 1.7658888893557316, "learning_rate": 9.774072619824794e-06, "loss": 0.734, "step": 4019 }, { "epoch": 0.12320706141963958, "grad_norm": 1.9805725936284189, "learning_rate": 9.773925089487069e-06, "loss": 0.6777, "step": 4020 }, { "epoch": 0.12323770994238077, "grad_norm": 2.097339492826017, "learning_rate": 9.773777512110641e-06, "loss": 0.7292, "step": 4021 }, { "epoch": 0.12326835846512198, "grad_norm": 2.0479281422007656, "learning_rate": 9.77362988769696e-06, "loss": 0.7066, "step": 4022 }, { "epoch": 0.12329900698786318, "grad_norm": 1.727068083401802, "learning_rate": 9.773482216247482e-06, "loss": 0.7851, "step": 4023 }, { "epoch": 0.12332965551060439, "grad_norm": 1.9149756549241077, "learning_rate": 9.773334497763663e-06, "loss": 0.8197, "step": 4024 }, { "epoch": 0.1233603040333456, "grad_norm": 0.9066421207728026, "learning_rate": 9.773186732246957e-06, "loss": 0.5103, "step": 4025 }, { "epoch": 0.1233909525560868, "grad_norm": 1.9765321698175817, "learning_rate": 9.773038919698821e-06, "loss": 0.7019, "step": 4026 }, { "epoch": 0.123421601078828, "grad_norm": 1.9559550544396924, "learning_rate": 9.772891060120713e-06, "loss": 0.6656, "step": 4027 }, { "epoch": 0.12345224960156921, "grad_norm": 1.758700521214489, "learning_rate": 9.772743153514088e-06, "loss": 0.7164, "step": 4028 }, { "epoch": 0.1234828981243104, "grad_norm": 0.8122173399393205, "learning_rate": 9.772595199880402e-06, "loss": 0.4707, "step": 4029 }, { "epoch": 0.12351354664705161, "grad_norm": 1.9979244760642902, "learning_rate": 9.772447199221114e-06, "loss": 0.7216, "step": 4030 }, { "epoch": 0.12354419516979281, "grad_norm": 1.9717210409885437, "learning_rate": 9.772299151537684e-06, "loss": 0.7492, "step": 4031 }, { "epoch": 0.12357484369253402, "grad_norm": 1.937681325197825, "learning_rate": 9.77215105683157e-06, "loss": 0.7517, "step": 4032 }, { "epoch": 0.12360549221527523, "grad_norm": 0.8618332261734779, "learning_rate": 9.772002915104228e-06, "loss": 0.4818, "step": 4033 }, { "epoch": 0.12363614073801643, "grad_norm": 2.1086549860215738, "learning_rate": 9.771854726357123e-06, "loss": 0.7603, "step": 4034 }, { "epoch": 0.12366678926075764, "grad_norm": 1.8002314655781981, "learning_rate": 9.771706490591711e-06, "loss": 0.831, "step": 4035 }, { "epoch": 0.12369743778349883, "grad_norm": 1.7778485772448616, "learning_rate": 9.771558207809455e-06, "loss": 0.713, "step": 4036 }, { "epoch": 0.12372808630624003, "grad_norm": 1.8771421329346885, "learning_rate": 9.771409878011814e-06, "loss": 0.7922, "step": 4037 }, { "epoch": 0.12375873482898124, "grad_norm": 1.7219897062512775, "learning_rate": 9.771261501200251e-06, "loss": 0.627, "step": 4038 }, { "epoch": 0.12378938335172245, "grad_norm": 1.8407978638367677, "learning_rate": 9.771113077376229e-06, "loss": 0.8138, "step": 4039 }, { "epoch": 0.12382003187446365, "grad_norm": 1.9845268244905363, "learning_rate": 9.770964606541208e-06, "loss": 0.7784, "step": 4040 }, { "epoch": 0.12385068039720486, "grad_norm": 1.9842380966222226, "learning_rate": 9.770816088696652e-06, "loss": 0.8735, "step": 4041 }, { "epoch": 0.12388132891994606, "grad_norm": 1.6700040130159928, "learning_rate": 9.770667523844024e-06, "loss": 0.7232, "step": 4042 }, { "epoch": 0.12391197744268727, "grad_norm": 1.7494771879862894, "learning_rate": 9.77051891198479e-06, "loss": 0.7952, "step": 4043 }, { "epoch": 0.12394262596542846, "grad_norm": 2.0582193479846427, "learning_rate": 9.770370253120411e-06, "loss": 0.6535, "step": 4044 }, { "epoch": 0.12397327448816967, "grad_norm": 2.012259012846718, "learning_rate": 9.770221547252354e-06, "loss": 0.7738, "step": 4045 }, { "epoch": 0.12400392301091087, "grad_norm": 2.6739081742876145, "learning_rate": 9.770072794382082e-06, "loss": 0.8296, "step": 4046 }, { "epoch": 0.12403457153365208, "grad_norm": 1.765892747207758, "learning_rate": 9.769923994511064e-06, "loss": 0.7208, "step": 4047 }, { "epoch": 0.12406522005639328, "grad_norm": 1.6617874474353278, "learning_rate": 9.769775147640762e-06, "loss": 0.7377, "step": 4048 }, { "epoch": 0.12409586857913449, "grad_norm": 2.316036159733407, "learning_rate": 9.769626253772648e-06, "loss": 0.7829, "step": 4049 }, { "epoch": 0.1241265171018757, "grad_norm": 1.9865160888705902, "learning_rate": 9.769477312908186e-06, "loss": 0.7355, "step": 4050 }, { "epoch": 0.1241571656246169, "grad_norm": 2.1085425289654434, "learning_rate": 9.769328325048844e-06, "loss": 0.7047, "step": 4051 }, { "epoch": 0.12418781414735809, "grad_norm": 2.0355507976798632, "learning_rate": 9.769179290196089e-06, "loss": 0.7977, "step": 4052 }, { "epoch": 0.1242184626700993, "grad_norm": 1.8237547971229258, "learning_rate": 9.769030208351389e-06, "loss": 0.7065, "step": 4053 }, { "epoch": 0.1242491111928405, "grad_norm": 1.9216431127520825, "learning_rate": 9.768881079516214e-06, "loss": 0.8196, "step": 4054 }, { "epoch": 0.12427975971558171, "grad_norm": 0.9673162840940205, "learning_rate": 9.768731903692035e-06, "loss": 0.5049, "step": 4055 }, { "epoch": 0.12431040823832291, "grad_norm": 1.8708545183045635, "learning_rate": 9.76858268088032e-06, "loss": 0.7487, "step": 4056 }, { "epoch": 0.12434105676106412, "grad_norm": 2.044135194350074, "learning_rate": 9.76843341108254e-06, "loss": 0.8344, "step": 4057 }, { "epoch": 0.12437170528380533, "grad_norm": 2.217709248286808, "learning_rate": 9.768284094300165e-06, "loss": 0.7925, "step": 4058 }, { "epoch": 0.12440235380654653, "grad_norm": 1.9542245570765469, "learning_rate": 9.768134730534667e-06, "loss": 0.7272, "step": 4059 }, { "epoch": 0.12443300232928772, "grad_norm": 1.829193009550636, "learning_rate": 9.767985319787519e-06, "loss": 0.7466, "step": 4060 }, { "epoch": 0.12446365085202893, "grad_norm": 2.0194277147252984, "learning_rate": 9.767835862060188e-06, "loss": 0.7637, "step": 4061 }, { "epoch": 0.12449429937477013, "grad_norm": 1.6695630257123428, "learning_rate": 9.767686357354154e-06, "loss": 0.8079, "step": 4062 }, { "epoch": 0.12452494789751134, "grad_norm": 2.1998972767729534, "learning_rate": 9.767536805670884e-06, "loss": 0.8016, "step": 4063 }, { "epoch": 0.12455559642025255, "grad_norm": 0.9464763781440771, "learning_rate": 9.767387207011856e-06, "loss": 0.5271, "step": 4064 }, { "epoch": 0.12458624494299375, "grad_norm": 1.9684418612775954, "learning_rate": 9.767237561378541e-06, "loss": 0.7493, "step": 4065 }, { "epoch": 0.12461689346573496, "grad_norm": 1.9575445515388634, "learning_rate": 9.767087868772415e-06, "loss": 0.697, "step": 4066 }, { "epoch": 0.12464754198847615, "grad_norm": 2.356846035480588, "learning_rate": 9.766938129194952e-06, "loss": 0.8001, "step": 4067 }, { "epoch": 0.12467819051121735, "grad_norm": 1.7536424929670622, "learning_rate": 9.76678834264763e-06, "loss": 0.8382, "step": 4068 }, { "epoch": 0.12470883903395856, "grad_norm": 0.8940801028960911, "learning_rate": 9.766638509131919e-06, "loss": 0.5134, "step": 4069 }, { "epoch": 0.12473948755669977, "grad_norm": 1.8364682971730653, "learning_rate": 9.766488628649303e-06, "loss": 0.7319, "step": 4070 }, { "epoch": 0.12477013607944097, "grad_norm": 1.9033532048781003, "learning_rate": 9.766338701201252e-06, "loss": 0.7913, "step": 4071 }, { "epoch": 0.12480078460218218, "grad_norm": 2.013594443622582, "learning_rate": 9.766188726789248e-06, "loss": 0.7892, "step": 4072 }, { "epoch": 0.12483143312492338, "grad_norm": 1.929980948966591, "learning_rate": 9.766038705414766e-06, "loss": 0.7515, "step": 4073 }, { "epoch": 0.12486208164766459, "grad_norm": 1.7828285601991771, "learning_rate": 9.765888637079287e-06, "loss": 0.6883, "step": 4074 }, { "epoch": 0.12489273017040578, "grad_norm": 1.754232367905929, "learning_rate": 9.765738521784285e-06, "loss": 0.7315, "step": 4075 }, { "epoch": 0.12492337869314699, "grad_norm": 1.8692756496423744, "learning_rate": 9.765588359531243e-06, "loss": 0.7025, "step": 4076 }, { "epoch": 0.12495402721588819, "grad_norm": 1.9804987822482374, "learning_rate": 9.76543815032164e-06, "loss": 0.7794, "step": 4077 }, { "epoch": 0.1249846757386294, "grad_norm": 1.7808439767389075, "learning_rate": 9.765287894156957e-06, "loss": 0.6636, "step": 4078 }, { "epoch": 0.1250153242613706, "grad_norm": 1.835123519352648, "learning_rate": 9.765137591038671e-06, "loss": 0.66, "step": 4079 }, { "epoch": 0.1250459727841118, "grad_norm": 1.891618406346714, "learning_rate": 9.764987240968266e-06, "loss": 0.7444, "step": 4080 }, { "epoch": 0.12507662130685301, "grad_norm": 1.0522397726543828, "learning_rate": 9.764836843947222e-06, "loss": 0.5012, "step": 4081 }, { "epoch": 0.12510726982959422, "grad_norm": 1.7963211368785483, "learning_rate": 9.764686399977021e-06, "loss": 0.8477, "step": 4082 }, { "epoch": 0.12513791835233543, "grad_norm": 1.7071933804970616, "learning_rate": 9.764535909059147e-06, "loss": 0.8118, "step": 4083 }, { "epoch": 0.12516856687507663, "grad_norm": 1.782325433502545, "learning_rate": 9.764385371195082e-06, "loss": 0.7912, "step": 4084 }, { "epoch": 0.12519921539781784, "grad_norm": 1.88863232077035, "learning_rate": 9.764234786386308e-06, "loss": 0.75, "step": 4085 }, { "epoch": 0.12522986392055904, "grad_norm": 1.8865812337035655, "learning_rate": 9.764084154634311e-06, "loss": 0.7356, "step": 4086 }, { "epoch": 0.12526051244330022, "grad_norm": 2.213037199062368, "learning_rate": 9.763933475940571e-06, "loss": 0.7901, "step": 4087 }, { "epoch": 0.12529116096604143, "grad_norm": 1.9561922080732654, "learning_rate": 9.763782750306578e-06, "loss": 0.8036, "step": 4088 }, { "epoch": 0.12532180948878263, "grad_norm": 1.8596577728985701, "learning_rate": 9.763631977733815e-06, "loss": 0.736, "step": 4089 }, { "epoch": 0.12535245801152384, "grad_norm": 1.816794750570996, "learning_rate": 9.763481158223764e-06, "loss": 0.7502, "step": 4090 }, { "epoch": 0.12538310653426504, "grad_norm": 1.7675616573177617, "learning_rate": 9.763330291777918e-06, "loss": 0.6099, "step": 4091 }, { "epoch": 0.12541375505700625, "grad_norm": 1.680417324196581, "learning_rate": 9.763179378397759e-06, "loss": 0.6721, "step": 4092 }, { "epoch": 0.12544440357974745, "grad_norm": 1.795715124211618, "learning_rate": 9.763028418084773e-06, "loss": 0.7816, "step": 4093 }, { "epoch": 0.12547505210248866, "grad_norm": 1.885880837190289, "learning_rate": 9.76287741084045e-06, "loss": 0.8179, "step": 4094 }, { "epoch": 0.12550570062522987, "grad_norm": 2.035005883678667, "learning_rate": 9.762726356666279e-06, "loss": 0.8098, "step": 4095 }, { "epoch": 0.12553634914797107, "grad_norm": 2.1293643222476666, "learning_rate": 9.762575255563747e-06, "loss": 0.7931, "step": 4096 }, { "epoch": 0.12556699767071228, "grad_norm": 1.8620303830990386, "learning_rate": 9.76242410753434e-06, "loss": 0.7889, "step": 4097 }, { "epoch": 0.12559764619345348, "grad_norm": 2.007224177321235, "learning_rate": 9.762272912579551e-06, "loss": 0.663, "step": 4098 }, { "epoch": 0.1256282947161947, "grad_norm": 1.1971495346516854, "learning_rate": 9.762121670700867e-06, "loss": 0.5372, "step": 4099 }, { "epoch": 0.1256589432389359, "grad_norm": 1.964657185142504, "learning_rate": 9.761970381899782e-06, "loss": 0.8028, "step": 4100 }, { "epoch": 0.1256895917616771, "grad_norm": 2.0469598191967115, "learning_rate": 9.761819046177782e-06, "loss": 0.6998, "step": 4101 }, { "epoch": 0.12572024028441828, "grad_norm": 2.169000400070458, "learning_rate": 9.761667663536363e-06, "loss": 0.8178, "step": 4102 }, { "epoch": 0.12575088880715948, "grad_norm": 1.9663683610579652, "learning_rate": 9.761516233977014e-06, "loss": 0.7049, "step": 4103 }, { "epoch": 0.1257815373299007, "grad_norm": 1.7998840962385514, "learning_rate": 9.761364757501227e-06, "loss": 0.7436, "step": 4104 }, { "epoch": 0.1258121858526419, "grad_norm": 2.044815774388784, "learning_rate": 9.761213234110494e-06, "loss": 0.6831, "step": 4105 }, { "epoch": 0.1258428343753831, "grad_norm": 1.94123924346665, "learning_rate": 9.761061663806308e-06, "loss": 0.683, "step": 4106 }, { "epoch": 0.1258734828981243, "grad_norm": 2.0048597628630676, "learning_rate": 9.760910046590164e-06, "loss": 0.7863, "step": 4107 }, { "epoch": 0.1259041314208655, "grad_norm": 1.7803839313949708, "learning_rate": 9.760758382463555e-06, "loss": 0.7191, "step": 4108 }, { "epoch": 0.12593477994360672, "grad_norm": 1.9260505755487507, "learning_rate": 9.760606671427976e-06, "loss": 0.7019, "step": 4109 }, { "epoch": 0.12596542846634792, "grad_norm": 1.99567555351415, "learning_rate": 9.760454913484923e-06, "loss": 0.6904, "step": 4110 }, { "epoch": 0.12599607698908913, "grad_norm": 1.4597745344758335, "learning_rate": 9.760303108635887e-06, "loss": 0.5247, "step": 4111 }, { "epoch": 0.12602672551183033, "grad_norm": 1.0834631443515697, "learning_rate": 9.760151256882368e-06, "loss": 0.5084, "step": 4112 }, { "epoch": 0.12605737403457154, "grad_norm": 1.8313300545578, "learning_rate": 9.759999358225861e-06, "loss": 0.8067, "step": 4113 }, { "epoch": 0.12608802255731275, "grad_norm": 1.8475498875746441, "learning_rate": 9.759847412667862e-06, "loss": 0.7873, "step": 4114 }, { "epoch": 0.12611867108005395, "grad_norm": 2.070410744879872, "learning_rate": 9.759695420209867e-06, "loss": 0.7845, "step": 4115 }, { "epoch": 0.12614931960279516, "grad_norm": 2.4109166131841624, "learning_rate": 9.759543380853379e-06, "loss": 0.662, "step": 4116 }, { "epoch": 0.12617996812553636, "grad_norm": 1.821076589099308, "learning_rate": 9.759391294599889e-06, "loss": 0.7286, "step": 4117 }, { "epoch": 0.12621061664827754, "grad_norm": 2.0555897747301866, "learning_rate": 9.7592391614509e-06, "loss": 0.7191, "step": 4118 }, { "epoch": 0.12624126517101875, "grad_norm": 1.9869055572379595, "learning_rate": 9.759086981407909e-06, "loss": 0.8261, "step": 4119 }, { "epoch": 0.12627191369375995, "grad_norm": 2.0197555521265045, "learning_rate": 9.758934754472418e-06, "loss": 0.704, "step": 4120 }, { "epoch": 0.12630256221650116, "grad_norm": 2.216145815091527, "learning_rate": 9.758782480645923e-06, "loss": 0.7672, "step": 4121 }, { "epoch": 0.12633321073924236, "grad_norm": 1.9535525902021684, "learning_rate": 9.758630159929928e-06, "loss": 0.7213, "step": 4122 }, { "epoch": 0.12636385926198357, "grad_norm": 1.8611265572818791, "learning_rate": 9.758477792325932e-06, "loss": 0.8029, "step": 4123 }, { "epoch": 0.12639450778472477, "grad_norm": 1.9290418535193892, "learning_rate": 9.758325377835437e-06, "loss": 0.7248, "step": 4124 }, { "epoch": 0.12642515630746598, "grad_norm": 2.324750513924422, "learning_rate": 9.758172916459944e-06, "loss": 0.8725, "step": 4125 }, { "epoch": 0.12645580483020719, "grad_norm": 2.4850016135445387, "learning_rate": 9.758020408200956e-06, "loss": 0.5623, "step": 4126 }, { "epoch": 0.1264864533529484, "grad_norm": 1.9491080618107037, "learning_rate": 9.757867853059976e-06, "loss": 0.6811, "step": 4127 }, { "epoch": 0.1265171018756896, "grad_norm": 1.9899123386896436, "learning_rate": 9.757715251038508e-06, "loss": 0.7293, "step": 4128 }, { "epoch": 0.1265477503984308, "grad_norm": 1.8852195815376236, "learning_rate": 9.757562602138054e-06, "loss": 0.7472, "step": 4129 }, { "epoch": 0.126578398921172, "grad_norm": 1.8404413878276127, "learning_rate": 9.757409906360116e-06, "loss": 0.7235, "step": 4130 }, { "epoch": 0.12660904744391321, "grad_norm": 1.8899256628638303, "learning_rate": 9.757257163706203e-06, "loss": 0.8466, "step": 4131 }, { "epoch": 0.12663969596665442, "grad_norm": 2.109203739946818, "learning_rate": 9.757104374177817e-06, "loss": 0.7757, "step": 4132 }, { "epoch": 0.1266703444893956, "grad_norm": 1.8506594729040282, "learning_rate": 9.756951537776464e-06, "loss": 0.7306, "step": 4133 }, { "epoch": 0.1267009930121368, "grad_norm": 1.899324349972023, "learning_rate": 9.756798654503652e-06, "loss": 0.7414, "step": 4134 }, { "epoch": 0.126731641534878, "grad_norm": 1.9234255730445213, "learning_rate": 9.756645724360884e-06, "loss": 0.6709, "step": 4135 }, { "epoch": 0.12676229005761921, "grad_norm": 1.6720137033168974, "learning_rate": 9.75649274734967e-06, "loss": 0.7012, "step": 4136 }, { "epoch": 0.12679293858036042, "grad_norm": 1.8169400023096633, "learning_rate": 9.756339723471516e-06, "loss": 0.8005, "step": 4137 }, { "epoch": 0.12682358710310163, "grad_norm": 1.470342627711797, "learning_rate": 9.75618665272793e-06, "loss": 0.5504, "step": 4138 }, { "epoch": 0.12685423562584283, "grad_norm": 1.9758341204030871, "learning_rate": 9.75603353512042e-06, "loss": 0.7679, "step": 4139 }, { "epoch": 0.12688488414858404, "grad_norm": 2.2826157028384406, "learning_rate": 9.755880370650492e-06, "loss": 0.6831, "step": 4140 }, { "epoch": 0.12691553267132524, "grad_norm": 1.8458910356281029, "learning_rate": 9.755727159319661e-06, "loss": 0.7813, "step": 4141 }, { "epoch": 0.12694618119406645, "grad_norm": 1.9817921098211582, "learning_rate": 9.755573901129431e-06, "loss": 0.7181, "step": 4142 }, { "epoch": 0.12697682971680765, "grad_norm": 1.8450518283343222, "learning_rate": 9.755420596081316e-06, "loss": 0.786, "step": 4143 }, { "epoch": 0.12700747823954886, "grad_norm": 1.9468100026022637, "learning_rate": 9.755267244176826e-06, "loss": 0.8058, "step": 4144 }, { "epoch": 0.12703812676229007, "grad_norm": 2.6846710452301825, "learning_rate": 9.75511384541747e-06, "loss": 0.8025, "step": 4145 }, { "epoch": 0.12706877528503127, "grad_norm": 1.5807853652429626, "learning_rate": 9.75496039980476e-06, "loss": 0.7042, "step": 4146 }, { "epoch": 0.12709942380777248, "grad_norm": 2.733682106131754, "learning_rate": 9.75480690734021e-06, "loss": 0.7589, "step": 4147 }, { "epoch": 0.12713007233051368, "grad_norm": 1.8414533442470464, "learning_rate": 9.75465336802533e-06, "loss": 0.6647, "step": 4148 }, { "epoch": 0.12716072085325486, "grad_norm": 1.9205723688179699, "learning_rate": 9.754499781861634e-06, "loss": 0.7278, "step": 4149 }, { "epoch": 0.12719136937599607, "grad_norm": 1.82531644566895, "learning_rate": 9.754346148850635e-06, "loss": 0.7801, "step": 4150 }, { "epoch": 0.12722201789873727, "grad_norm": 3.9581356088019817, "learning_rate": 9.754192468993849e-06, "loss": 0.7528, "step": 4151 }, { "epoch": 0.12725266642147848, "grad_norm": 1.8782851107116356, "learning_rate": 9.754038742292786e-06, "loss": 0.7695, "step": 4152 }, { "epoch": 0.12728331494421968, "grad_norm": 2.0124721331043482, "learning_rate": 9.753884968748964e-06, "loss": 0.8061, "step": 4153 }, { "epoch": 0.1273139634669609, "grad_norm": 2.1637892812157515, "learning_rate": 9.753731148363897e-06, "loss": 0.7421, "step": 4154 }, { "epoch": 0.1273446119897021, "grad_norm": 1.953106417277667, "learning_rate": 9.7535772811391e-06, "loss": 0.8363, "step": 4155 }, { "epoch": 0.1273752605124433, "grad_norm": 1.7974536301555517, "learning_rate": 9.753423367076088e-06, "loss": 0.7083, "step": 4156 }, { "epoch": 0.1274059090351845, "grad_norm": 1.904072816192806, "learning_rate": 9.753269406176382e-06, "loss": 0.8006, "step": 4157 }, { "epoch": 0.1274365575579257, "grad_norm": 1.995871873302226, "learning_rate": 9.753115398441496e-06, "loss": 0.7491, "step": 4158 }, { "epoch": 0.12746720608066692, "grad_norm": 1.883982231352526, "learning_rate": 9.752961343872947e-06, "loss": 0.7494, "step": 4159 }, { "epoch": 0.12749785460340812, "grad_norm": 1.715339420978535, "learning_rate": 9.752807242472255e-06, "loss": 0.7385, "step": 4160 }, { "epoch": 0.12752850312614933, "grad_norm": 1.7316095208566717, "learning_rate": 9.752653094240935e-06, "loss": 0.6732, "step": 4161 }, { "epoch": 0.12755915164889053, "grad_norm": 1.1740354591590012, "learning_rate": 9.752498899180512e-06, "loss": 0.5284, "step": 4162 }, { "epoch": 0.12758980017163174, "grad_norm": 1.8449882331582736, "learning_rate": 9.752344657292497e-06, "loss": 0.7102, "step": 4163 }, { "epoch": 0.12762044869437292, "grad_norm": 0.9153786581222129, "learning_rate": 9.752190368578415e-06, "loss": 0.5188, "step": 4164 }, { "epoch": 0.12765109721711412, "grad_norm": 1.9590798102102196, "learning_rate": 9.752036033039788e-06, "loss": 0.7693, "step": 4165 }, { "epoch": 0.12768174573985533, "grad_norm": 0.8778040323118738, "learning_rate": 9.751881650678132e-06, "loss": 0.5078, "step": 4166 }, { "epoch": 0.12771239426259653, "grad_norm": 2.076583884353312, "learning_rate": 9.751727221494971e-06, "loss": 0.7941, "step": 4167 }, { "epoch": 0.12774304278533774, "grad_norm": 2.3025139742606267, "learning_rate": 9.751572745491827e-06, "loss": 0.8145, "step": 4168 }, { "epoch": 0.12777369130807895, "grad_norm": 1.0289337624629065, "learning_rate": 9.751418222670219e-06, "loss": 0.5337, "step": 4169 }, { "epoch": 0.12780433983082015, "grad_norm": 1.9841980171281777, "learning_rate": 9.751263653031673e-06, "loss": 0.8658, "step": 4170 }, { "epoch": 0.12783498835356136, "grad_norm": 2.07607844320513, "learning_rate": 9.751109036577709e-06, "loss": 0.7516, "step": 4171 }, { "epoch": 0.12786563687630256, "grad_norm": 2.1204547258015483, "learning_rate": 9.750954373309854e-06, "loss": 0.7925, "step": 4172 }, { "epoch": 0.12789628539904377, "grad_norm": 1.8426074991213233, "learning_rate": 9.750799663229627e-06, "loss": 0.767, "step": 4173 }, { "epoch": 0.12792693392178497, "grad_norm": 2.075884579374969, "learning_rate": 9.750644906338559e-06, "loss": 0.786, "step": 4174 }, { "epoch": 0.12795758244452618, "grad_norm": 2.0826038243219442, "learning_rate": 9.750490102638169e-06, "loss": 0.7376, "step": 4175 }, { "epoch": 0.12798823096726739, "grad_norm": 2.044517671461473, "learning_rate": 9.750335252129985e-06, "loss": 0.7064, "step": 4176 }, { "epoch": 0.1280188794900086, "grad_norm": 1.7721234534369399, "learning_rate": 9.750180354815531e-06, "loss": 0.7282, "step": 4177 }, { "epoch": 0.1280495280127498, "grad_norm": 1.736138345594517, "learning_rate": 9.750025410696337e-06, "loss": 0.777, "step": 4178 }, { "epoch": 0.128080176535491, "grad_norm": 1.7366253086067862, "learning_rate": 9.749870419773926e-06, "loss": 0.7448, "step": 4179 }, { "epoch": 0.12811082505823218, "grad_norm": 1.7949615607797078, "learning_rate": 9.749715382049827e-06, "loss": 0.6706, "step": 4180 }, { "epoch": 0.1281414735809734, "grad_norm": 2.1642459039199973, "learning_rate": 9.749560297525567e-06, "loss": 0.7643, "step": 4181 }, { "epoch": 0.1281721221037146, "grad_norm": 1.7470979065011887, "learning_rate": 9.749405166202673e-06, "loss": 0.7265, "step": 4182 }, { "epoch": 0.1282027706264558, "grad_norm": 1.8659982782768716, "learning_rate": 9.749249988082677e-06, "loss": 0.7832, "step": 4183 }, { "epoch": 0.128233419149197, "grad_norm": 1.0077753279480168, "learning_rate": 9.749094763167104e-06, "loss": 0.5028, "step": 4184 }, { "epoch": 0.1282640676719382, "grad_norm": 1.626319085511487, "learning_rate": 9.748939491457485e-06, "loss": 0.6946, "step": 4185 }, { "epoch": 0.12829471619467941, "grad_norm": 1.8285816514072017, "learning_rate": 9.74878417295535e-06, "loss": 0.6892, "step": 4186 }, { "epoch": 0.12832536471742062, "grad_norm": 1.9676866903786934, "learning_rate": 9.74862880766223e-06, "loss": 0.8204, "step": 4187 }, { "epoch": 0.12835601324016183, "grad_norm": 0.8430114979443267, "learning_rate": 9.748473395579656e-06, "loss": 0.5038, "step": 4188 }, { "epoch": 0.12838666176290303, "grad_norm": 1.7181615809467377, "learning_rate": 9.748317936709158e-06, "loss": 0.7098, "step": 4189 }, { "epoch": 0.12841731028564424, "grad_norm": 2.208567725787999, "learning_rate": 9.748162431052269e-06, "loss": 0.8129, "step": 4190 }, { "epoch": 0.12844795880838544, "grad_norm": 1.9825345687532747, "learning_rate": 9.74800687861052e-06, "loss": 0.7929, "step": 4191 }, { "epoch": 0.12847860733112665, "grad_norm": 0.8586253175764985, "learning_rate": 9.747851279385445e-06, "loss": 0.5165, "step": 4192 }, { "epoch": 0.12850925585386785, "grad_norm": 1.7883260793208409, "learning_rate": 9.747695633378576e-06, "loss": 0.7553, "step": 4193 }, { "epoch": 0.12853990437660906, "grad_norm": 2.1010117688283816, "learning_rate": 9.747539940591449e-06, "loss": 0.7358, "step": 4194 }, { "epoch": 0.12857055289935024, "grad_norm": 1.8455471165416437, "learning_rate": 9.747384201025593e-06, "loss": 0.7309, "step": 4195 }, { "epoch": 0.12860120142209144, "grad_norm": 1.6680756286365441, "learning_rate": 9.747228414682547e-06, "loss": 0.7294, "step": 4196 }, { "epoch": 0.12863184994483265, "grad_norm": 1.6690536630095476, "learning_rate": 9.747072581563845e-06, "loss": 0.687, "step": 4197 }, { "epoch": 0.12866249846757385, "grad_norm": 0.9151578794190413, "learning_rate": 9.746916701671023e-06, "loss": 0.5094, "step": 4198 }, { "epoch": 0.12869314699031506, "grad_norm": 0.9078973089081683, "learning_rate": 9.746760775005618e-06, "loss": 0.5234, "step": 4199 }, { "epoch": 0.12872379551305627, "grad_norm": 2.0262240513363374, "learning_rate": 9.746604801569162e-06, "loss": 0.7788, "step": 4200 }, { "epoch": 0.12875444403579747, "grad_norm": 1.9251148549869341, "learning_rate": 9.746448781363194e-06, "loss": 0.7295, "step": 4201 }, { "epoch": 0.12878509255853868, "grad_norm": 1.8753648824842013, "learning_rate": 9.746292714389251e-06, "loss": 0.8024, "step": 4202 }, { "epoch": 0.12881574108127988, "grad_norm": 1.9165797918888843, "learning_rate": 9.746136600648874e-06, "loss": 0.8347, "step": 4203 }, { "epoch": 0.1288463896040211, "grad_norm": 2.1283377582383456, "learning_rate": 9.745980440143598e-06, "loss": 0.748, "step": 4204 }, { "epoch": 0.1288770381267623, "grad_norm": 1.017631530392389, "learning_rate": 9.745824232874963e-06, "loss": 0.5141, "step": 4205 }, { "epoch": 0.1289076866495035, "grad_norm": 1.9073968926786211, "learning_rate": 9.745667978844506e-06, "loss": 0.7208, "step": 4206 }, { "epoch": 0.1289383351722447, "grad_norm": 1.9425748171485273, "learning_rate": 9.745511678053769e-06, "loss": 0.8476, "step": 4207 }, { "epoch": 0.1289689836949859, "grad_norm": 2.5531552842820426, "learning_rate": 9.74535533050429e-06, "loss": 0.8045, "step": 4208 }, { "epoch": 0.12899963221772712, "grad_norm": 0.8524393948004705, "learning_rate": 9.745198936197613e-06, "loss": 0.4783, "step": 4209 }, { "epoch": 0.12903028074046832, "grad_norm": 0.8816481030659492, "learning_rate": 9.745042495135275e-06, "loss": 0.5126, "step": 4210 }, { "epoch": 0.1290609292632095, "grad_norm": 2.031947350003892, "learning_rate": 9.744886007318821e-06, "loss": 0.8624, "step": 4211 }, { "epoch": 0.1290915777859507, "grad_norm": 2.057737993348231, "learning_rate": 9.74472947274979e-06, "loss": 0.8657, "step": 4212 }, { "epoch": 0.1291222263086919, "grad_norm": 2.0877379337387088, "learning_rate": 9.744572891429725e-06, "loss": 0.8846, "step": 4213 }, { "epoch": 0.12915287483143312, "grad_norm": 2.525202396685265, "learning_rate": 9.74441626336017e-06, "loss": 0.7588, "step": 4214 }, { "epoch": 0.12918352335417432, "grad_norm": 2.665759471368125, "learning_rate": 9.744259588542666e-06, "loss": 0.8283, "step": 4215 }, { "epoch": 0.12921417187691553, "grad_norm": 2.0868755066370284, "learning_rate": 9.744102866978761e-06, "loss": 0.753, "step": 4216 }, { "epoch": 0.12924482039965673, "grad_norm": 1.6663412290835704, "learning_rate": 9.743946098669996e-06, "loss": 0.6704, "step": 4217 }, { "epoch": 0.12927546892239794, "grad_norm": 1.8845628284583138, "learning_rate": 9.743789283617915e-06, "loss": 0.7449, "step": 4218 }, { "epoch": 0.12930611744513915, "grad_norm": 2.054978933180123, "learning_rate": 9.743632421824065e-06, "loss": 0.7715, "step": 4219 }, { "epoch": 0.12933676596788035, "grad_norm": 1.9274766497962048, "learning_rate": 9.743475513289993e-06, "loss": 0.7243, "step": 4220 }, { "epoch": 0.12936741449062156, "grad_norm": 1.0526700924858932, "learning_rate": 9.743318558017242e-06, "loss": 0.4923, "step": 4221 }, { "epoch": 0.12939806301336276, "grad_norm": 1.8415829216134407, "learning_rate": 9.743161556007358e-06, "loss": 0.7533, "step": 4222 }, { "epoch": 0.12942871153610397, "grad_norm": 1.8655542373684648, "learning_rate": 9.74300450726189e-06, "loss": 0.766, "step": 4223 }, { "epoch": 0.12945936005884517, "grad_norm": 1.976126862494008, "learning_rate": 9.742847411782385e-06, "loss": 0.7312, "step": 4224 }, { "epoch": 0.12949000858158638, "grad_norm": 1.791837110646353, "learning_rate": 9.742690269570392e-06, "loss": 0.7236, "step": 4225 }, { "epoch": 0.12952065710432756, "grad_norm": 1.8100995790084788, "learning_rate": 9.74253308062746e-06, "loss": 0.7534, "step": 4226 }, { "epoch": 0.12955130562706876, "grad_norm": 1.8034754154606778, "learning_rate": 9.742375844955131e-06, "loss": 0.7665, "step": 4227 }, { "epoch": 0.12958195414980997, "grad_norm": 1.6598709568798133, "learning_rate": 9.742218562554964e-06, "loss": 0.7164, "step": 4228 }, { "epoch": 0.12961260267255117, "grad_norm": 0.9217520984125834, "learning_rate": 9.742061233428502e-06, "loss": 0.5022, "step": 4229 }, { "epoch": 0.12964325119529238, "grad_norm": 1.9354857222087931, "learning_rate": 9.741903857577298e-06, "loss": 0.7997, "step": 4230 }, { "epoch": 0.1296738997180336, "grad_norm": 1.9402417380309183, "learning_rate": 9.741746435002904e-06, "loss": 0.7717, "step": 4231 }, { "epoch": 0.1297045482407748, "grad_norm": 0.8346256028051151, "learning_rate": 9.741588965706865e-06, "loss": 0.4961, "step": 4232 }, { "epoch": 0.129735196763516, "grad_norm": 1.6942983460384657, "learning_rate": 9.74143144969074e-06, "loss": 0.7534, "step": 4233 }, { "epoch": 0.1297658452862572, "grad_norm": 1.9187347702121547, "learning_rate": 9.741273886956077e-06, "loss": 0.729, "step": 4234 }, { "epoch": 0.1297964938089984, "grad_norm": 1.6929493232865136, "learning_rate": 9.741116277504427e-06, "loss": 0.6939, "step": 4235 }, { "epoch": 0.12982714233173961, "grad_norm": 0.8680908866229128, "learning_rate": 9.740958621337348e-06, "loss": 0.5339, "step": 4236 }, { "epoch": 0.12985779085448082, "grad_norm": 0.9280248803835275, "learning_rate": 9.74080091845639e-06, "loss": 0.5233, "step": 4237 }, { "epoch": 0.12988843937722203, "grad_norm": 2.3244795352395218, "learning_rate": 9.740643168863108e-06, "loss": 0.8266, "step": 4238 }, { "epoch": 0.12991908789996323, "grad_norm": 2.0708774396727945, "learning_rate": 9.740485372559056e-06, "loss": 0.814, "step": 4239 }, { "epoch": 0.12994973642270444, "grad_norm": 1.8707025199608531, "learning_rate": 9.740327529545787e-06, "loss": 0.679, "step": 4240 }, { "epoch": 0.12998038494544564, "grad_norm": 0.9353920382774733, "learning_rate": 9.740169639824858e-06, "loss": 0.5239, "step": 4241 }, { "epoch": 0.13001103346818682, "grad_norm": 2.1347676213434097, "learning_rate": 9.740011703397827e-06, "loss": 0.716, "step": 4242 }, { "epoch": 0.13004168199092803, "grad_norm": 2.0038107024271303, "learning_rate": 9.739853720266247e-06, "loss": 0.8316, "step": 4243 }, { "epoch": 0.13007233051366923, "grad_norm": 2.1723081609069825, "learning_rate": 9.739695690431674e-06, "loss": 0.7665, "step": 4244 }, { "epoch": 0.13010297903641044, "grad_norm": 2.0816123268297164, "learning_rate": 9.739537613895668e-06, "loss": 0.8078, "step": 4245 }, { "epoch": 0.13013362755915164, "grad_norm": 1.8283764793552224, "learning_rate": 9.739379490659786e-06, "loss": 0.6996, "step": 4246 }, { "epoch": 0.13016427608189285, "grad_norm": 2.0127921751372937, "learning_rate": 9.739221320725585e-06, "loss": 0.7954, "step": 4247 }, { "epoch": 0.13019492460463405, "grad_norm": 1.7384552828244122, "learning_rate": 9.739063104094622e-06, "loss": 0.7369, "step": 4248 }, { "epoch": 0.13022557312737526, "grad_norm": 2.0544048440003597, "learning_rate": 9.73890484076846e-06, "loss": 0.7954, "step": 4249 }, { "epoch": 0.13025622165011647, "grad_norm": 1.6885290890477809, "learning_rate": 9.738746530748654e-06, "loss": 0.7665, "step": 4250 }, { "epoch": 0.13028687017285767, "grad_norm": 1.8994205481871898, "learning_rate": 9.738588174036767e-06, "loss": 0.6969, "step": 4251 }, { "epoch": 0.13031751869559888, "grad_norm": 2.081166507047817, "learning_rate": 9.738429770634359e-06, "loss": 0.8171, "step": 4252 }, { "epoch": 0.13034816721834008, "grad_norm": 2.0946130864393773, "learning_rate": 9.738271320542989e-06, "loss": 0.7483, "step": 4253 }, { "epoch": 0.1303788157410813, "grad_norm": 1.7251600582252895, "learning_rate": 9.738112823764219e-06, "loss": 0.6583, "step": 4254 }, { "epoch": 0.1304094642638225, "grad_norm": 1.8183488305424782, "learning_rate": 9.737954280299612e-06, "loss": 0.6909, "step": 4255 }, { "epoch": 0.1304401127865637, "grad_norm": 1.78018238833152, "learning_rate": 9.737795690150729e-06, "loss": 0.6911, "step": 4256 }, { "epoch": 0.13047076130930488, "grad_norm": 1.917547028728641, "learning_rate": 9.737637053319133e-06, "loss": 0.8138, "step": 4257 }, { "epoch": 0.13050140983204608, "grad_norm": 1.9044496403314175, "learning_rate": 9.737478369806387e-06, "loss": 0.7394, "step": 4258 }, { "epoch": 0.1305320583547873, "grad_norm": 1.8639277754390315, "learning_rate": 9.737319639614053e-06, "loss": 0.7868, "step": 4259 }, { "epoch": 0.1305627068775285, "grad_norm": 2.2331169653450065, "learning_rate": 9.737160862743697e-06, "loss": 0.8021, "step": 4260 }, { "epoch": 0.1305933554002697, "grad_norm": 1.8599978653896838, "learning_rate": 9.737002039196884e-06, "loss": 0.8736, "step": 4261 }, { "epoch": 0.1306240039230109, "grad_norm": 1.8102083903887995, "learning_rate": 9.73684316897518e-06, "loss": 0.7883, "step": 4262 }, { "epoch": 0.1306546524457521, "grad_norm": 1.8373755338230209, "learning_rate": 9.736684252080145e-06, "loss": 0.8103, "step": 4263 }, { "epoch": 0.13068530096849332, "grad_norm": 1.7994461472813585, "learning_rate": 9.736525288513348e-06, "loss": 0.8352, "step": 4264 }, { "epoch": 0.13071594949123452, "grad_norm": 1.9152036925996756, "learning_rate": 9.736366278276355e-06, "loss": 0.6876, "step": 4265 }, { "epoch": 0.13074659801397573, "grad_norm": 1.8230299373074803, "learning_rate": 9.736207221370735e-06, "loss": 0.7948, "step": 4266 }, { "epoch": 0.13077724653671693, "grad_norm": 0.957168982908417, "learning_rate": 9.736048117798054e-06, "loss": 0.5124, "step": 4267 }, { "epoch": 0.13080789505945814, "grad_norm": 2.319161599499677, "learning_rate": 9.735888967559877e-06, "loss": 0.8604, "step": 4268 }, { "epoch": 0.13083854358219935, "grad_norm": 1.9524025868035813, "learning_rate": 9.735729770657775e-06, "loss": 0.7474, "step": 4269 }, { "epoch": 0.13086919210494055, "grad_norm": 0.8877787607876692, "learning_rate": 9.735570527093316e-06, "loss": 0.5075, "step": 4270 }, { "epoch": 0.13089984062768176, "grad_norm": 1.859993229337721, "learning_rate": 9.735411236868071e-06, "loss": 0.7, "step": 4271 }, { "epoch": 0.13093048915042296, "grad_norm": 1.9221157009868217, "learning_rate": 9.735251899983605e-06, "loss": 0.6622, "step": 4272 }, { "epoch": 0.13096113767316414, "grad_norm": 1.9481954833887394, "learning_rate": 9.735092516441491e-06, "loss": 0.8137, "step": 4273 }, { "epoch": 0.13099178619590535, "grad_norm": 0.9485744463040388, "learning_rate": 9.7349330862433e-06, "loss": 0.4964, "step": 4274 }, { "epoch": 0.13102243471864655, "grad_norm": 2.136477027844876, "learning_rate": 9.7347736093906e-06, "loss": 0.7814, "step": 4275 }, { "epoch": 0.13105308324138776, "grad_norm": 1.8340848209531122, "learning_rate": 9.734614085884967e-06, "loss": 0.7364, "step": 4276 }, { "epoch": 0.13108373176412896, "grad_norm": 2.2064513600268825, "learning_rate": 9.734454515727967e-06, "loss": 0.7709, "step": 4277 }, { "epoch": 0.13111438028687017, "grad_norm": 1.8651839282639557, "learning_rate": 9.734294898921175e-06, "loss": 0.6637, "step": 4278 }, { "epoch": 0.13114502880961137, "grad_norm": 2.1379662566029016, "learning_rate": 9.734135235466167e-06, "loss": 0.7937, "step": 4279 }, { "epoch": 0.13117567733235258, "grad_norm": 28.905946873477347, "learning_rate": 9.73397552536451e-06, "loss": 0.7297, "step": 4280 }, { "epoch": 0.13120632585509379, "grad_norm": 2.309230758946344, "learning_rate": 9.733815768617784e-06, "loss": 0.8057, "step": 4281 }, { "epoch": 0.131236974377835, "grad_norm": 1.0523322309757919, "learning_rate": 9.733655965227557e-06, "loss": 0.5081, "step": 4282 }, { "epoch": 0.1312676229005762, "grad_norm": 38.515172775793346, "learning_rate": 9.733496115195408e-06, "loss": 1.0114, "step": 4283 }, { "epoch": 0.1312982714233174, "grad_norm": 2.143626448128499, "learning_rate": 9.733336218522914e-06, "loss": 0.771, "step": 4284 }, { "epoch": 0.1313289199460586, "grad_norm": 2.2535873492701355, "learning_rate": 9.733176275211643e-06, "loss": 0.7983, "step": 4285 }, { "epoch": 0.13135956846879981, "grad_norm": 2.2272954540954597, "learning_rate": 9.733016285263175e-06, "loss": 0.6542, "step": 4286 }, { "epoch": 0.13139021699154102, "grad_norm": 2.1376354199180643, "learning_rate": 9.732856248679088e-06, "loss": 0.8031, "step": 4287 }, { "epoch": 0.1314208655142822, "grad_norm": 2.275407706808748, "learning_rate": 9.732696165460957e-06, "loss": 0.6871, "step": 4288 }, { "epoch": 0.1314515140370234, "grad_norm": 2.040187550126789, "learning_rate": 9.73253603561036e-06, "loss": 0.8951, "step": 4289 }, { "epoch": 0.1314821625597646, "grad_norm": 2.1239740995029694, "learning_rate": 9.732375859128876e-06, "loss": 0.7192, "step": 4290 }, { "epoch": 0.13151281108250581, "grad_norm": 4.999153262930207, "learning_rate": 9.73221563601808e-06, "loss": 0.7666, "step": 4291 }, { "epoch": 0.13154345960524702, "grad_norm": 62.36625078452832, "learning_rate": 9.732055366279552e-06, "loss": 0.8158, "step": 4292 }, { "epoch": 0.13157410812798823, "grad_norm": 10.090853879157493, "learning_rate": 9.731895049914873e-06, "loss": 1.0172, "step": 4293 }, { "epoch": 0.13160475665072943, "grad_norm": 6.734872914287785, "learning_rate": 9.73173468692562e-06, "loss": 0.917, "step": 4294 }, { "epoch": 0.13163540517347064, "grad_norm": 4.048774429662168, "learning_rate": 9.731574277313377e-06, "loss": 0.8908, "step": 4295 }, { "epoch": 0.13166605369621184, "grad_norm": 4.543826838008357, "learning_rate": 9.731413821079719e-06, "loss": 0.6144, "step": 4296 }, { "epoch": 0.13169670221895305, "grad_norm": 3.40707915331608, "learning_rate": 9.731253318226232e-06, "loss": 0.7639, "step": 4297 }, { "epoch": 0.13172735074169425, "grad_norm": 3.182877765763194, "learning_rate": 9.731092768754496e-06, "loss": 0.7516, "step": 4298 }, { "epoch": 0.13175799926443546, "grad_norm": 2.3592574520444964, "learning_rate": 9.73093217266609e-06, "loss": 0.8002, "step": 4299 }, { "epoch": 0.13178864778717667, "grad_norm": 2.882726555218591, "learning_rate": 9.7307715299626e-06, "loss": 0.9086, "step": 4300 }, { "epoch": 0.13181929630991787, "grad_norm": 2.276410603358051, "learning_rate": 9.73061084064561e-06, "loss": 0.8583, "step": 4301 }, { "epoch": 0.13184994483265908, "grad_norm": 2.478665288504387, "learning_rate": 9.730450104716697e-06, "loss": 0.8446, "step": 4302 }, { "epoch": 0.13188059335540028, "grad_norm": 2.2937847305462506, "learning_rate": 9.73028932217745e-06, "loss": 0.7208, "step": 4303 }, { "epoch": 0.13191124187814146, "grad_norm": 2.2953399466992606, "learning_rate": 9.730128493029454e-06, "loss": 0.615, "step": 4304 }, { "epoch": 0.13194189040088267, "grad_norm": 2.310958441968829, "learning_rate": 9.729967617274291e-06, "loss": 0.724, "step": 4305 }, { "epoch": 0.13197253892362387, "grad_norm": 2.1781339779108286, "learning_rate": 9.729806694913546e-06, "loss": 0.8702, "step": 4306 }, { "epoch": 0.13200318744636508, "grad_norm": 2.262077142328397, "learning_rate": 9.729645725948807e-06, "loss": 0.8162, "step": 4307 }, { "epoch": 0.13203383596910628, "grad_norm": 1.7836946973736219, "learning_rate": 9.729484710381656e-06, "loss": 0.7018, "step": 4308 }, { "epoch": 0.1320644844918475, "grad_norm": 2.0472612371240158, "learning_rate": 9.729323648213684e-06, "loss": 0.7792, "step": 4309 }, { "epoch": 0.1320951330145887, "grad_norm": 2.1927073742965018, "learning_rate": 9.729162539446476e-06, "loss": 0.7995, "step": 4310 }, { "epoch": 0.1321257815373299, "grad_norm": 2.2211844655754334, "learning_rate": 9.729001384081617e-06, "loss": 0.8501, "step": 4311 }, { "epoch": 0.1321564300600711, "grad_norm": 1.9911564321646955, "learning_rate": 9.7288401821207e-06, "loss": 0.7953, "step": 4312 }, { "epoch": 0.1321870785828123, "grad_norm": 2.2994855570573995, "learning_rate": 9.72867893356531e-06, "loss": 0.7435, "step": 4313 }, { "epoch": 0.13221772710555352, "grad_norm": 8.133818101585026, "learning_rate": 9.728517638417037e-06, "loss": 0.6185, "step": 4314 }, { "epoch": 0.13224837562829472, "grad_norm": 3.191519265989249, "learning_rate": 9.728356296677469e-06, "loss": 0.5826, "step": 4315 }, { "epoch": 0.13227902415103593, "grad_norm": 1.91960515461799, "learning_rate": 9.728194908348197e-06, "loss": 0.6292, "step": 4316 }, { "epoch": 0.13230967267377713, "grad_norm": 1.9727360432137708, "learning_rate": 9.728033473430812e-06, "loss": 0.7888, "step": 4317 }, { "epoch": 0.13234032119651834, "grad_norm": 1.950026666883897, "learning_rate": 9.7278719919269e-06, "loss": 0.8407, "step": 4318 }, { "epoch": 0.13237096971925952, "grad_norm": 2.129118079048573, "learning_rate": 9.72771046383806e-06, "loss": 0.846, "step": 4319 }, { "epoch": 0.13240161824200072, "grad_norm": 2.0423971519067803, "learning_rate": 9.727548889165876e-06, "loss": 0.763, "step": 4320 }, { "epoch": 0.13243226676474193, "grad_norm": 2.1332421076022485, "learning_rate": 9.727387267911944e-06, "loss": 0.87, "step": 4321 }, { "epoch": 0.13246291528748314, "grad_norm": 17.326002327996076, "learning_rate": 9.727225600077856e-06, "loss": 0.6814, "step": 4322 }, { "epoch": 0.13249356381022434, "grad_norm": 8.623220715275393, "learning_rate": 9.727063885665206e-06, "loss": 0.8702, "step": 4323 }, { "epoch": 0.13252421233296555, "grad_norm": 5.525876269600761, "learning_rate": 9.726902124675585e-06, "loss": 0.9604, "step": 4324 }, { "epoch": 0.13255486085570675, "grad_norm": 3.3991927740390873, "learning_rate": 9.726740317110588e-06, "loss": 0.8989, "step": 4325 }, { "epoch": 0.13258550937844796, "grad_norm": 2.1885727426647645, "learning_rate": 9.726578462971808e-06, "loss": 0.7941, "step": 4326 }, { "epoch": 0.13261615790118916, "grad_norm": 2.206438476620381, "learning_rate": 9.726416562260842e-06, "loss": 0.7818, "step": 4327 }, { "epoch": 0.13264680642393037, "grad_norm": 2.508881042899862, "learning_rate": 9.726254614979284e-06, "loss": 0.7887, "step": 4328 }, { "epoch": 0.13267745494667157, "grad_norm": 3.5970123489253165, "learning_rate": 9.726092621128731e-06, "loss": 0.8932, "step": 4329 }, { "epoch": 0.13270810346941278, "grad_norm": 2.9588811396211825, "learning_rate": 9.725930580710777e-06, "loss": 0.8084, "step": 4330 }, { "epoch": 0.13273875199215399, "grad_norm": 2.1524020664803074, "learning_rate": 9.725768493727021e-06, "loss": 0.7441, "step": 4331 }, { "epoch": 0.1327694005148952, "grad_norm": 2.062229632221437, "learning_rate": 9.725606360179058e-06, "loss": 0.7761, "step": 4332 }, { "epoch": 0.1328000490376364, "grad_norm": 1.9899465379966927, "learning_rate": 9.725444180068487e-06, "loss": 0.7249, "step": 4333 }, { "epoch": 0.1328306975603776, "grad_norm": 1.978389803346329, "learning_rate": 9.725281953396905e-06, "loss": 0.7487, "step": 4334 }, { "epoch": 0.13286134608311878, "grad_norm": 2.0960007446697584, "learning_rate": 9.725119680165911e-06, "loss": 0.7454, "step": 4335 }, { "epoch": 0.13289199460586, "grad_norm": 2.4978622373399135, "learning_rate": 9.724957360377103e-06, "loss": 0.7571, "step": 4336 }, { "epoch": 0.1329226431286012, "grad_norm": 1.9143040020971256, "learning_rate": 9.724794994032082e-06, "loss": 0.8035, "step": 4337 }, { "epoch": 0.1329532916513424, "grad_norm": 1.9080543848389633, "learning_rate": 9.724632581132447e-06, "loss": 0.6231, "step": 4338 }, { "epoch": 0.1329839401740836, "grad_norm": 2.052760319299078, "learning_rate": 9.7244701216798e-06, "loss": 0.8396, "step": 4339 }, { "epoch": 0.1330145886968248, "grad_norm": 1.8563658701638526, "learning_rate": 9.724307615675737e-06, "loss": 0.6625, "step": 4340 }, { "epoch": 0.13304523721956601, "grad_norm": 1.9159596286562957, "learning_rate": 9.724145063121863e-06, "loss": 0.7451, "step": 4341 }, { "epoch": 0.13307588574230722, "grad_norm": 2.0423771680864093, "learning_rate": 9.723982464019781e-06, "loss": 0.7565, "step": 4342 }, { "epoch": 0.13310653426504843, "grad_norm": 2.6672550152633216, "learning_rate": 9.723819818371089e-06, "loss": 0.8003, "step": 4343 }, { "epoch": 0.13313718278778963, "grad_norm": 1.9487985424611356, "learning_rate": 9.723657126177393e-06, "loss": 0.79, "step": 4344 }, { "epoch": 0.13316783131053084, "grad_norm": 2.1233275555359823, "learning_rate": 9.723494387440295e-06, "loss": 0.7344, "step": 4345 }, { "epoch": 0.13319847983327204, "grad_norm": 2.098607051651493, "learning_rate": 9.723331602161396e-06, "loss": 0.6986, "step": 4346 }, { "epoch": 0.13322912835601325, "grad_norm": 2.04750214121753, "learning_rate": 9.723168770342304e-06, "loss": 0.8887, "step": 4347 }, { "epoch": 0.13325977687875445, "grad_norm": 2.6349447939448942, "learning_rate": 9.723005891984622e-06, "loss": 0.7341, "step": 4348 }, { "epoch": 0.13329042540149566, "grad_norm": 1.9593506609949785, "learning_rate": 9.722842967089953e-06, "loss": 0.8015, "step": 4349 }, { "epoch": 0.13332107392423684, "grad_norm": 1.854441918851124, "learning_rate": 9.722679995659904e-06, "loss": 0.7919, "step": 4350 }, { "epoch": 0.13335172244697804, "grad_norm": 2.2694964481326374, "learning_rate": 9.722516977696083e-06, "loss": 0.7761, "step": 4351 }, { "epoch": 0.13338237096971925, "grad_norm": 1.8279949352032696, "learning_rate": 9.722353913200091e-06, "loss": 0.8064, "step": 4352 }, { "epoch": 0.13341301949246046, "grad_norm": 2.042466809674826, "learning_rate": 9.72219080217354e-06, "loss": 0.8401, "step": 4353 }, { "epoch": 0.13344366801520166, "grad_norm": 1.7677596593035292, "learning_rate": 9.722027644618033e-06, "loss": 0.7647, "step": 4354 }, { "epoch": 0.13347431653794287, "grad_norm": 7.9991340030947855, "learning_rate": 9.72186444053518e-06, "loss": 0.7365, "step": 4355 }, { "epoch": 0.13350496506068407, "grad_norm": 2.102599926923018, "learning_rate": 9.72170118992659e-06, "loss": 0.8412, "step": 4356 }, { "epoch": 0.13353561358342528, "grad_norm": 2.1366965479609266, "learning_rate": 9.721537892793868e-06, "loss": 0.798, "step": 4357 }, { "epoch": 0.13356626210616648, "grad_norm": 2.1592725198792277, "learning_rate": 9.721374549138626e-06, "loss": 0.7554, "step": 4358 }, { "epoch": 0.1335969106289077, "grad_norm": 1.5765636231098645, "learning_rate": 9.721211158962471e-06, "loss": 0.7303, "step": 4359 }, { "epoch": 0.1336275591516489, "grad_norm": 1.8771518511899583, "learning_rate": 9.721047722267016e-06, "loss": 0.7454, "step": 4360 }, { "epoch": 0.1336582076743901, "grad_norm": 2.4999468150951585, "learning_rate": 9.72088423905387e-06, "loss": 0.8387, "step": 4361 }, { "epoch": 0.1336888561971313, "grad_norm": 3.316215734004627, "learning_rate": 9.720720709324644e-06, "loss": 0.7498, "step": 4362 }, { "epoch": 0.1337195047198725, "grad_norm": 2.128388601668882, "learning_rate": 9.720557133080948e-06, "loss": 0.7851, "step": 4363 }, { "epoch": 0.13375015324261372, "grad_norm": 1.8738443801440898, "learning_rate": 9.720393510324395e-06, "loss": 0.7611, "step": 4364 }, { "epoch": 0.13378080176535492, "grad_norm": 2.008605194746792, "learning_rate": 9.720229841056598e-06, "loss": 0.8511, "step": 4365 }, { "epoch": 0.1338114502880961, "grad_norm": 1.846418714383518, "learning_rate": 9.720066125279167e-06, "loss": 0.8076, "step": 4366 }, { "epoch": 0.1338420988108373, "grad_norm": 1.730463774377677, "learning_rate": 9.719902362993719e-06, "loss": 0.7125, "step": 4367 }, { "epoch": 0.1338727473335785, "grad_norm": 38.391207760026965, "learning_rate": 9.719738554201863e-06, "loss": 0.702, "step": 4368 }, { "epoch": 0.13390339585631972, "grad_norm": 2.213862948345738, "learning_rate": 9.719574698905216e-06, "loss": 0.8565, "step": 4369 }, { "epoch": 0.13393404437906092, "grad_norm": 2.4186159877570144, "learning_rate": 9.719410797105393e-06, "loss": 0.6147, "step": 4370 }, { "epoch": 0.13396469290180213, "grad_norm": 2.295374552642242, "learning_rate": 9.719246848804008e-06, "loss": 0.8533, "step": 4371 }, { "epoch": 0.13399534142454333, "grad_norm": 1.8965441155093075, "learning_rate": 9.719082854002675e-06, "loss": 0.6982, "step": 4372 }, { "epoch": 0.13402598994728454, "grad_norm": 4.2472972669098, "learning_rate": 9.71891881270301e-06, "loss": 0.6394, "step": 4373 }, { "epoch": 0.13405663847002575, "grad_norm": 1.9571560518846498, "learning_rate": 9.718754724906634e-06, "loss": 0.8107, "step": 4374 }, { "epoch": 0.13408728699276695, "grad_norm": 1.4163836065919577, "learning_rate": 9.718590590615157e-06, "loss": 0.5422, "step": 4375 }, { "epoch": 0.13411793551550816, "grad_norm": 1.9815967177021734, "learning_rate": 9.718426409830201e-06, "loss": 0.8303, "step": 4376 }, { "epoch": 0.13414858403824936, "grad_norm": 1.6773222734071884, "learning_rate": 9.718262182553384e-06, "loss": 0.7466, "step": 4377 }, { "epoch": 0.13417923256099057, "grad_norm": 3.1085597586788976, "learning_rate": 9.71809790878632e-06, "loss": 0.5702, "step": 4378 }, { "epoch": 0.13420988108373177, "grad_norm": 1.717942585005802, "learning_rate": 9.717933588530632e-06, "loss": 0.7137, "step": 4379 }, { "epoch": 0.13424052960647298, "grad_norm": 1.7235014720470594, "learning_rate": 9.717769221787936e-06, "loss": 0.7103, "step": 4380 }, { "epoch": 0.13427117812921416, "grad_norm": 1.9973212535784604, "learning_rate": 9.717604808559854e-06, "loss": 0.7069, "step": 4381 }, { "epoch": 0.13430182665195536, "grad_norm": 1.8972697925305166, "learning_rate": 9.717440348848004e-06, "loss": 0.8695, "step": 4382 }, { "epoch": 0.13433247517469657, "grad_norm": 1.9338506957104857, "learning_rate": 9.717275842654006e-06, "loss": 0.698, "step": 4383 }, { "epoch": 0.13436312369743778, "grad_norm": 1.869138775272314, "learning_rate": 9.717111289979484e-06, "loss": 0.6708, "step": 4384 }, { "epoch": 0.13439377222017898, "grad_norm": 2.2415410680427073, "learning_rate": 9.716946690826056e-06, "loss": 0.5562, "step": 4385 }, { "epoch": 0.1344244207429202, "grad_norm": 1.7389616133697272, "learning_rate": 9.716782045195348e-06, "loss": 0.7996, "step": 4386 }, { "epoch": 0.1344550692656614, "grad_norm": 2.016624008171407, "learning_rate": 9.716617353088977e-06, "loss": 0.9199, "step": 4387 }, { "epoch": 0.1344857177884026, "grad_norm": 1.636515909576831, "learning_rate": 9.716452614508569e-06, "loss": 0.8546, "step": 4388 }, { "epoch": 0.1345163663111438, "grad_norm": 1.9441487910438018, "learning_rate": 9.716287829455748e-06, "loss": 0.7557, "step": 4389 }, { "epoch": 0.134547014833885, "grad_norm": 1.0302647681145105, "learning_rate": 9.716122997932135e-06, "loss": 0.5394, "step": 4390 }, { "epoch": 0.13457766335662621, "grad_norm": 1.0832543501514242, "learning_rate": 9.715958119939355e-06, "loss": 0.5318, "step": 4391 }, { "epoch": 0.13460831187936742, "grad_norm": 1.8537132937685292, "learning_rate": 9.715793195479035e-06, "loss": 0.759, "step": 4392 }, { "epoch": 0.13463896040210863, "grad_norm": 3.1052153758112233, "learning_rate": 9.715628224552795e-06, "loss": 0.8251, "step": 4393 }, { "epoch": 0.13466960892484983, "grad_norm": 2.0940315620435515, "learning_rate": 9.715463207162267e-06, "loss": 0.7708, "step": 4394 }, { "epoch": 0.13470025744759104, "grad_norm": 1.9212176429850405, "learning_rate": 9.71529814330907e-06, "loss": 0.7468, "step": 4395 }, { "epoch": 0.13473090597033224, "grad_norm": 1.8008155297729327, "learning_rate": 9.715133032994837e-06, "loss": 0.7273, "step": 4396 }, { "epoch": 0.13476155449307342, "grad_norm": 1.0028262170129458, "learning_rate": 9.71496787622119e-06, "loss": 0.5083, "step": 4397 }, { "epoch": 0.13479220301581463, "grad_norm": 1.883556224282165, "learning_rate": 9.71480267298976e-06, "loss": 0.7346, "step": 4398 }, { "epoch": 0.13482285153855583, "grad_norm": 2.4307306186635036, "learning_rate": 9.71463742330217e-06, "loss": 0.7519, "step": 4399 }, { "epoch": 0.13485350006129704, "grad_norm": 0.9916701414372555, "learning_rate": 9.714472127160054e-06, "loss": 0.5542, "step": 4400 }, { "epoch": 0.13488414858403824, "grad_norm": 1.882247855940592, "learning_rate": 9.714306784565037e-06, "loss": 0.7052, "step": 4401 }, { "epoch": 0.13491479710677945, "grad_norm": 1.9249132065611598, "learning_rate": 9.714141395518748e-06, "loss": 0.7724, "step": 4402 }, { "epoch": 0.13494544562952066, "grad_norm": 1.7908998783296453, "learning_rate": 9.71397596002282e-06, "loss": 0.7175, "step": 4403 }, { "epoch": 0.13497609415226186, "grad_norm": 2.0275400944388378, "learning_rate": 9.713810478078878e-06, "loss": 0.8158, "step": 4404 }, { "epoch": 0.13500674267500307, "grad_norm": 2.0666559879920037, "learning_rate": 9.713644949688556e-06, "loss": 0.7738, "step": 4405 }, { "epoch": 0.13503739119774427, "grad_norm": 1.7664049377098454, "learning_rate": 9.713479374853486e-06, "loss": 0.6862, "step": 4406 }, { "epoch": 0.13506803972048548, "grad_norm": 1.65144824990935, "learning_rate": 9.713313753575296e-06, "loss": 0.7071, "step": 4407 }, { "epoch": 0.13509868824322668, "grad_norm": 1.9376786012481435, "learning_rate": 9.713148085855619e-06, "loss": 0.7202, "step": 4408 }, { "epoch": 0.1351293367659679, "grad_norm": 1.226724094473204, "learning_rate": 9.71298237169609e-06, "loss": 0.533, "step": 4409 }, { "epoch": 0.1351599852887091, "grad_norm": 2.049356105380898, "learning_rate": 9.712816611098339e-06, "loss": 0.6994, "step": 4410 }, { "epoch": 0.1351906338114503, "grad_norm": 1.3617576468909505, "learning_rate": 9.712650804064e-06, "loss": 0.5154, "step": 4411 }, { "epoch": 0.13522128233419148, "grad_norm": 0.8468617563370583, "learning_rate": 9.712484950594707e-06, "loss": 0.5334, "step": 4412 }, { "epoch": 0.13525193085693268, "grad_norm": 2.0536954174826465, "learning_rate": 9.712319050692093e-06, "loss": 0.8271, "step": 4413 }, { "epoch": 0.1352825793796739, "grad_norm": 1.6297918040722232, "learning_rate": 9.712153104357796e-06, "loss": 0.6535, "step": 4414 }, { "epoch": 0.1353132279024151, "grad_norm": 1.8907288003001324, "learning_rate": 9.711987111593446e-06, "loss": 0.8041, "step": 4415 }, { "epoch": 0.1353438764251563, "grad_norm": 1.7958801241217897, "learning_rate": 9.711821072400683e-06, "loss": 0.8301, "step": 4416 }, { "epoch": 0.1353745249478975, "grad_norm": 1.6180376562264416, "learning_rate": 9.711654986781142e-06, "loss": 0.6871, "step": 4417 }, { "epoch": 0.1354051734706387, "grad_norm": 1.9227090486964815, "learning_rate": 9.711488854736457e-06, "loss": 0.7175, "step": 4418 }, { "epoch": 0.13543582199337992, "grad_norm": 1.9267597068641478, "learning_rate": 9.711322676268269e-06, "loss": 0.7932, "step": 4419 }, { "epoch": 0.13546647051612112, "grad_norm": 1.6978639762769645, "learning_rate": 9.711156451378212e-06, "loss": 0.7654, "step": 4420 }, { "epoch": 0.13549711903886233, "grad_norm": 1.886882605504737, "learning_rate": 9.710990180067926e-06, "loss": 0.7503, "step": 4421 }, { "epoch": 0.13552776756160353, "grad_norm": 1.812487448574536, "learning_rate": 9.710823862339048e-06, "loss": 0.7693, "step": 4422 }, { "epoch": 0.13555841608434474, "grad_norm": 1.8275466412607622, "learning_rate": 9.710657498193215e-06, "loss": 0.7913, "step": 4423 }, { "epoch": 0.13558906460708595, "grad_norm": 1.6819394089044097, "learning_rate": 9.71049108763207e-06, "loss": 0.7386, "step": 4424 }, { "epoch": 0.13561971312982715, "grad_norm": 1.8991673739057116, "learning_rate": 9.710324630657252e-06, "loss": 0.7035, "step": 4425 }, { "epoch": 0.13565036165256836, "grad_norm": 1.8855050012801107, "learning_rate": 9.7101581272704e-06, "loss": 0.7646, "step": 4426 }, { "epoch": 0.13568101017530956, "grad_norm": 1.836772671569591, "learning_rate": 9.709991577473154e-06, "loss": 0.8006, "step": 4427 }, { "epoch": 0.13571165869805074, "grad_norm": 1.9920895022295142, "learning_rate": 9.709824981267155e-06, "loss": 0.8233, "step": 4428 }, { "epoch": 0.13574230722079195, "grad_norm": 1.7967135370472707, "learning_rate": 9.709658338654046e-06, "loss": 0.8005, "step": 4429 }, { "epoch": 0.13577295574353315, "grad_norm": 1.7763982432254377, "learning_rate": 9.70949164963547e-06, "loss": 0.7329, "step": 4430 }, { "epoch": 0.13580360426627436, "grad_norm": 1.9184599994789755, "learning_rate": 9.709324914213068e-06, "loss": 0.7659, "step": 4431 }, { "epoch": 0.13583425278901556, "grad_norm": 2.34302703759835, "learning_rate": 9.70915813238848e-06, "loss": 0.8135, "step": 4432 }, { "epoch": 0.13586490131175677, "grad_norm": 1.6458663743879542, "learning_rate": 9.708991304163353e-06, "loss": 0.7733, "step": 4433 }, { "epoch": 0.13589554983449798, "grad_norm": 2.4920143056817037, "learning_rate": 9.708824429539332e-06, "loss": 0.6763, "step": 4434 }, { "epoch": 0.13592619835723918, "grad_norm": 1.8298778583352155, "learning_rate": 9.708657508518056e-06, "loss": 0.7233, "step": 4435 }, { "epoch": 0.1359568468799804, "grad_norm": 1.8892386626749809, "learning_rate": 9.708490541101174e-06, "loss": 0.8618, "step": 4436 }, { "epoch": 0.1359874954027216, "grad_norm": 2.0917210294430992, "learning_rate": 9.70832352729033e-06, "loss": 0.7361, "step": 4437 }, { "epoch": 0.1360181439254628, "grad_norm": 1.864258844868636, "learning_rate": 9.70815646708717e-06, "loss": 0.7729, "step": 4438 }, { "epoch": 0.136048792448204, "grad_norm": 2.0505198753786424, "learning_rate": 9.707989360493339e-06, "loss": 0.7233, "step": 4439 }, { "epoch": 0.1360794409709452, "grad_norm": 1.9495732911190298, "learning_rate": 9.707822207510486e-06, "loss": 0.5352, "step": 4440 }, { "epoch": 0.13611008949368641, "grad_norm": 1.7811866704476615, "learning_rate": 9.707655008140255e-06, "loss": 0.7161, "step": 4441 }, { "epoch": 0.13614073801642762, "grad_norm": 1.0469547161656636, "learning_rate": 9.707487762384294e-06, "loss": 0.5287, "step": 4442 }, { "epoch": 0.1361713865391688, "grad_norm": 1.7209573535644478, "learning_rate": 9.707320470244253e-06, "loss": 0.6872, "step": 4443 }, { "epoch": 0.13620203506191, "grad_norm": 1.1243535677955965, "learning_rate": 9.707153131721777e-06, "loss": 0.537, "step": 4444 }, { "epoch": 0.1362326835846512, "grad_norm": 2.076487425194706, "learning_rate": 9.706985746818519e-06, "loss": 0.8213, "step": 4445 }, { "epoch": 0.13626333210739242, "grad_norm": 1.9470841539368955, "learning_rate": 9.706818315536127e-06, "loss": 0.6997, "step": 4446 }, { "epoch": 0.13629398063013362, "grad_norm": 1.3342028948292817, "learning_rate": 9.706650837876246e-06, "loss": 0.55, "step": 4447 }, { "epoch": 0.13632462915287483, "grad_norm": 2.015957909063176, "learning_rate": 9.706483313840533e-06, "loss": 0.7823, "step": 4448 }, { "epoch": 0.13635527767561603, "grad_norm": 1.55088933420369, "learning_rate": 9.706315743430635e-06, "loss": 0.7524, "step": 4449 }, { "epoch": 0.13638592619835724, "grad_norm": 1.5990697082681746, "learning_rate": 9.706148126648203e-06, "loss": 0.6542, "step": 4450 }, { "epoch": 0.13641657472109844, "grad_norm": 0.9360702819457677, "learning_rate": 9.70598046349489e-06, "loss": 0.5213, "step": 4451 }, { "epoch": 0.13644722324383965, "grad_norm": 1.9080195639869317, "learning_rate": 9.705812753972348e-06, "loss": 0.6833, "step": 4452 }, { "epoch": 0.13647787176658085, "grad_norm": 2.0576387934702725, "learning_rate": 9.705644998082228e-06, "loss": 0.8436, "step": 4453 }, { "epoch": 0.13650852028932206, "grad_norm": 1.769287613096271, "learning_rate": 9.705477195826183e-06, "loss": 0.7394, "step": 4454 }, { "epoch": 0.13653916881206327, "grad_norm": 2.196560242094752, "learning_rate": 9.705309347205869e-06, "loss": 0.7958, "step": 4455 }, { "epoch": 0.13656981733480447, "grad_norm": 2.0151997209959993, "learning_rate": 9.705141452222937e-06, "loss": 0.7015, "step": 4456 }, { "epoch": 0.13660046585754568, "grad_norm": 2.05236260013077, "learning_rate": 9.704973510879044e-06, "loss": 0.8075, "step": 4457 }, { "epoch": 0.13663111438028688, "grad_norm": 1.1135420243325846, "learning_rate": 9.704805523175842e-06, "loss": 0.5111, "step": 4458 }, { "epoch": 0.13666176290302806, "grad_norm": 2.1630578647772403, "learning_rate": 9.704637489114987e-06, "loss": 0.6595, "step": 4459 }, { "epoch": 0.13669241142576927, "grad_norm": 1.7742686258462343, "learning_rate": 9.704469408698136e-06, "loss": 0.8239, "step": 4460 }, { "epoch": 0.13672305994851047, "grad_norm": 1.8508426665170508, "learning_rate": 9.704301281926943e-06, "loss": 0.6354, "step": 4461 }, { "epoch": 0.13675370847125168, "grad_norm": 2.0854682127638573, "learning_rate": 9.704133108803067e-06, "loss": 0.7962, "step": 4462 }, { "epoch": 0.13678435699399288, "grad_norm": 1.708533473587686, "learning_rate": 9.703964889328164e-06, "loss": 0.6628, "step": 4463 }, { "epoch": 0.1368150055167341, "grad_norm": 1.863380040935953, "learning_rate": 9.703796623503891e-06, "loss": 0.7618, "step": 4464 }, { "epoch": 0.1368456540394753, "grad_norm": 1.6551041321991629, "learning_rate": 9.703628311331904e-06, "loss": 0.7799, "step": 4465 }, { "epoch": 0.1368763025622165, "grad_norm": 0.8974367186385043, "learning_rate": 9.703459952813868e-06, "loss": 0.5106, "step": 4466 }, { "epoch": 0.1369069510849577, "grad_norm": 1.7097860207583526, "learning_rate": 9.703291547951434e-06, "loss": 0.6393, "step": 4467 }, { "epoch": 0.1369375996076989, "grad_norm": 1.9637865872449407, "learning_rate": 9.703123096746267e-06, "loss": 0.7912, "step": 4468 }, { "epoch": 0.13696824813044012, "grad_norm": 1.8076812739796644, "learning_rate": 9.702954599200025e-06, "loss": 0.7585, "step": 4469 }, { "epoch": 0.13699889665318132, "grad_norm": 1.7485349193010524, "learning_rate": 9.702786055314368e-06, "loss": 0.6587, "step": 4470 }, { "epoch": 0.13702954517592253, "grad_norm": 1.7575719637422815, "learning_rate": 9.702617465090955e-06, "loss": 0.703, "step": 4471 }, { "epoch": 0.13706019369866373, "grad_norm": 1.883728848897849, "learning_rate": 9.70244882853145e-06, "loss": 0.7637, "step": 4472 }, { "epoch": 0.13709084222140494, "grad_norm": 1.9845998719276963, "learning_rate": 9.702280145637516e-06, "loss": 0.7493, "step": 4473 }, { "epoch": 0.13712149074414612, "grad_norm": 2.1570584975086895, "learning_rate": 9.702111416410809e-06, "loss": 0.7715, "step": 4474 }, { "epoch": 0.13715213926688732, "grad_norm": 2.1479344153467657, "learning_rate": 9.701942640852996e-06, "loss": 0.8455, "step": 4475 }, { "epoch": 0.13718278778962853, "grad_norm": 1.88836487612854, "learning_rate": 9.70177381896574e-06, "loss": 0.7295, "step": 4476 }, { "epoch": 0.13721343631236974, "grad_norm": 2.2359443580211384, "learning_rate": 9.701604950750703e-06, "loss": 0.8044, "step": 4477 }, { "epoch": 0.13724408483511094, "grad_norm": 1.7888826644856013, "learning_rate": 9.701436036209549e-06, "loss": 0.7498, "step": 4478 }, { "epoch": 0.13727473335785215, "grad_norm": 2.014097142276588, "learning_rate": 9.701267075343943e-06, "loss": 0.7772, "step": 4479 }, { "epoch": 0.13730538188059335, "grad_norm": 1.7857455678144656, "learning_rate": 9.70109806815555e-06, "loss": 0.8191, "step": 4480 }, { "epoch": 0.13733603040333456, "grad_norm": 1.7860840972262582, "learning_rate": 9.700929014646035e-06, "loss": 0.6699, "step": 4481 }, { "epoch": 0.13736667892607576, "grad_norm": 1.8719270586313759, "learning_rate": 9.700759914817064e-06, "loss": 0.6938, "step": 4482 }, { "epoch": 0.13739732744881697, "grad_norm": 0.9887606759079871, "learning_rate": 9.700590768670302e-06, "loss": 0.5144, "step": 4483 }, { "epoch": 0.13742797597155818, "grad_norm": 1.6777755675766262, "learning_rate": 9.700421576207417e-06, "loss": 0.6409, "step": 4484 }, { "epoch": 0.13745862449429938, "grad_norm": 2.02210521602702, "learning_rate": 9.700252337430075e-06, "loss": 0.8019, "step": 4485 }, { "epoch": 0.1374892730170406, "grad_norm": 0.8747731465428763, "learning_rate": 9.700083052339944e-06, "loss": 0.5057, "step": 4486 }, { "epoch": 0.1375199215397818, "grad_norm": 1.739879634897783, "learning_rate": 9.699913720938694e-06, "loss": 0.7189, "step": 4487 }, { "epoch": 0.137550570062523, "grad_norm": 1.7831281512000337, "learning_rate": 9.69974434322799e-06, "loss": 0.6669, "step": 4488 }, { "epoch": 0.1375812185852642, "grad_norm": 1.7631218358684435, "learning_rate": 9.699574919209502e-06, "loss": 0.7179, "step": 4489 }, { "epoch": 0.13761186710800538, "grad_norm": 1.895531455715369, "learning_rate": 9.6994054488849e-06, "loss": 0.815, "step": 4490 }, { "epoch": 0.1376425156307466, "grad_norm": 1.6469451508772281, "learning_rate": 9.699235932255855e-06, "loss": 0.7248, "step": 4491 }, { "epoch": 0.1376731641534878, "grad_norm": 2.0982929762787403, "learning_rate": 9.699066369324034e-06, "loss": 0.6378, "step": 4492 }, { "epoch": 0.137703812676229, "grad_norm": 1.9576665723263762, "learning_rate": 9.698896760091112e-06, "loss": 0.7733, "step": 4493 }, { "epoch": 0.1377344611989702, "grad_norm": 1.902721751844345, "learning_rate": 9.698727104558756e-06, "loss": 0.7971, "step": 4494 }, { "epoch": 0.1377651097217114, "grad_norm": 1.6866614786587157, "learning_rate": 9.698557402728642e-06, "loss": 0.7979, "step": 4495 }, { "epoch": 0.13779575824445262, "grad_norm": 1.952069749336564, "learning_rate": 9.698387654602437e-06, "loss": 0.7751, "step": 4496 }, { "epoch": 0.13782640676719382, "grad_norm": 1.014717844528978, "learning_rate": 9.698217860181817e-06, "loss": 0.542, "step": 4497 }, { "epoch": 0.13785705528993503, "grad_norm": 1.8753932923150154, "learning_rate": 9.698048019468455e-06, "loss": 0.824, "step": 4498 }, { "epoch": 0.13788770381267623, "grad_norm": 1.8310764853545036, "learning_rate": 9.697878132464024e-06, "loss": 0.7328, "step": 4499 }, { "epoch": 0.13791835233541744, "grad_norm": 1.7884663500894944, "learning_rate": 9.697708199170198e-06, "loss": 0.7872, "step": 4500 }, { "epoch": 0.13794900085815864, "grad_norm": 1.63609618379203, "learning_rate": 9.697538219588652e-06, "loss": 0.8256, "step": 4501 }, { "epoch": 0.13797964938089985, "grad_norm": 1.7623001161209668, "learning_rate": 9.697368193721057e-06, "loss": 0.7309, "step": 4502 }, { "epoch": 0.13801029790364105, "grad_norm": 0.7993896292923013, "learning_rate": 9.697198121569093e-06, "loss": 0.5019, "step": 4503 }, { "epoch": 0.13804094642638226, "grad_norm": 2.0789954568192353, "learning_rate": 9.697028003134434e-06, "loss": 0.8266, "step": 4504 }, { "epoch": 0.13807159494912344, "grad_norm": 2.090774757174744, "learning_rate": 9.696857838418755e-06, "loss": 0.6996, "step": 4505 }, { "epoch": 0.13810224347186464, "grad_norm": 0.8856417728220688, "learning_rate": 9.696687627423738e-06, "loss": 0.544, "step": 4506 }, { "epoch": 0.13813289199460585, "grad_norm": 1.92700621131547, "learning_rate": 9.696517370151053e-06, "loss": 0.7549, "step": 4507 }, { "epoch": 0.13816354051734706, "grad_norm": 1.9809006475231181, "learning_rate": 9.696347066602381e-06, "loss": 0.7996, "step": 4508 }, { "epoch": 0.13819418904008826, "grad_norm": 1.7375697965065409, "learning_rate": 9.6961767167794e-06, "loss": 0.7096, "step": 4509 }, { "epoch": 0.13822483756282947, "grad_norm": 1.957057501903984, "learning_rate": 9.696006320683787e-06, "loss": 0.8123, "step": 4510 }, { "epoch": 0.13825548608557067, "grad_norm": 1.8581756609857913, "learning_rate": 9.695835878317223e-06, "loss": 0.7816, "step": 4511 }, { "epoch": 0.13828613460831188, "grad_norm": 1.8211935904379264, "learning_rate": 9.695665389681389e-06, "loss": 0.7244, "step": 4512 }, { "epoch": 0.13831678313105308, "grad_norm": 1.8728854037777547, "learning_rate": 9.69549485477796e-06, "loss": 0.6612, "step": 4513 }, { "epoch": 0.1383474316537943, "grad_norm": 1.8562296297052183, "learning_rate": 9.695324273608619e-06, "loss": 0.7186, "step": 4514 }, { "epoch": 0.1383780801765355, "grad_norm": 1.8860974282612177, "learning_rate": 9.695153646175047e-06, "loss": 0.7766, "step": 4515 }, { "epoch": 0.1384087286992767, "grad_norm": 1.0197702617360107, "learning_rate": 9.694982972478923e-06, "loss": 0.5403, "step": 4516 }, { "epoch": 0.1384393772220179, "grad_norm": 3.193616274260521, "learning_rate": 9.694812252521933e-06, "loss": 0.7394, "step": 4517 }, { "epoch": 0.1384700257447591, "grad_norm": 1.9315484985759312, "learning_rate": 9.694641486305756e-06, "loss": 0.7819, "step": 4518 }, { "epoch": 0.13850067426750032, "grad_norm": 1.8383009441786728, "learning_rate": 9.694470673832075e-06, "loss": 0.8371, "step": 4519 }, { "epoch": 0.13853132279024152, "grad_norm": 2.1580423236793935, "learning_rate": 9.694299815102572e-06, "loss": 0.6926, "step": 4520 }, { "epoch": 0.1385619713129827, "grad_norm": 1.916505113883462, "learning_rate": 9.694128910118934e-06, "loss": 0.741, "step": 4521 }, { "epoch": 0.1385926198357239, "grad_norm": 1.8622488549410645, "learning_rate": 9.693957958882843e-06, "loss": 0.7374, "step": 4522 }, { "epoch": 0.1386232683584651, "grad_norm": 0.9119258352955781, "learning_rate": 9.693786961395982e-06, "loss": 0.5197, "step": 4523 }, { "epoch": 0.13865391688120632, "grad_norm": 1.9297042421131025, "learning_rate": 9.693615917660036e-06, "loss": 0.7231, "step": 4524 }, { "epoch": 0.13868456540394752, "grad_norm": 1.724739070261407, "learning_rate": 9.693444827676694e-06, "loss": 0.7189, "step": 4525 }, { "epoch": 0.13871521392668873, "grad_norm": 1.975594559376815, "learning_rate": 9.693273691447637e-06, "loss": 0.7913, "step": 4526 }, { "epoch": 0.13874586244942994, "grad_norm": 0.843970452006816, "learning_rate": 9.693102508974555e-06, "loss": 0.4835, "step": 4527 }, { "epoch": 0.13877651097217114, "grad_norm": 0.8514219831747067, "learning_rate": 9.692931280259133e-06, "loss": 0.5143, "step": 4528 }, { "epoch": 0.13880715949491235, "grad_norm": 1.8177949527539492, "learning_rate": 9.692760005303057e-06, "loss": 0.7172, "step": 4529 }, { "epoch": 0.13883780801765355, "grad_norm": 2.573028378590129, "learning_rate": 9.692588684108018e-06, "loss": 0.8848, "step": 4530 }, { "epoch": 0.13886845654039476, "grad_norm": 0.8427184132862107, "learning_rate": 9.6924173166757e-06, "loss": 0.4951, "step": 4531 }, { "epoch": 0.13889910506313596, "grad_norm": 0.8624239654654199, "learning_rate": 9.692245903007795e-06, "loss": 0.5316, "step": 4532 }, { "epoch": 0.13892975358587717, "grad_norm": 1.9006329792651984, "learning_rate": 9.69207444310599e-06, "loss": 0.7008, "step": 4533 }, { "epoch": 0.13896040210861837, "grad_norm": 2.040045036608528, "learning_rate": 9.691902936971975e-06, "loss": 0.7802, "step": 4534 }, { "epoch": 0.13899105063135958, "grad_norm": 1.8239596991445857, "learning_rate": 9.691731384607441e-06, "loss": 0.6783, "step": 4535 }, { "epoch": 0.13902169915410076, "grad_norm": 1.79047823423201, "learning_rate": 9.691559786014076e-06, "loss": 0.7492, "step": 4536 }, { "epoch": 0.13905234767684196, "grad_norm": 2.091987767748127, "learning_rate": 9.691388141193571e-06, "loss": 0.772, "step": 4537 }, { "epoch": 0.13908299619958317, "grad_norm": 1.8693925860882536, "learning_rate": 9.691216450147622e-06, "loss": 0.767, "step": 4538 }, { "epoch": 0.13911364472232438, "grad_norm": 1.037968945332575, "learning_rate": 9.691044712877914e-06, "loss": 0.5198, "step": 4539 }, { "epoch": 0.13914429324506558, "grad_norm": 1.9020902454548585, "learning_rate": 9.690872929386143e-06, "loss": 0.7877, "step": 4540 }, { "epoch": 0.1391749417678068, "grad_norm": 1.5689890321282969, "learning_rate": 9.690701099674e-06, "loss": 0.7759, "step": 4541 }, { "epoch": 0.139205590290548, "grad_norm": 1.8506223884869804, "learning_rate": 9.69052922374318e-06, "loss": 0.6683, "step": 4542 }, { "epoch": 0.1392362388132892, "grad_norm": 0.8627939996962602, "learning_rate": 9.690357301595375e-06, "loss": 0.5267, "step": 4543 }, { "epoch": 0.1392668873360304, "grad_norm": 1.662718086550571, "learning_rate": 9.690185333232278e-06, "loss": 0.8327, "step": 4544 }, { "epoch": 0.1392975358587716, "grad_norm": 2.0408489525168494, "learning_rate": 9.690013318655588e-06, "loss": 0.7894, "step": 4545 }, { "epoch": 0.13932818438151282, "grad_norm": 1.7917950845600379, "learning_rate": 9.689841257866994e-06, "loss": 0.7884, "step": 4546 }, { "epoch": 0.13935883290425402, "grad_norm": 0.8935001906430371, "learning_rate": 9.689669150868196e-06, "loss": 0.4839, "step": 4547 }, { "epoch": 0.13938948142699523, "grad_norm": 2.1508889411435033, "learning_rate": 9.689496997660887e-06, "loss": 0.7286, "step": 4548 }, { "epoch": 0.13942012994973643, "grad_norm": 2.358871374965612, "learning_rate": 9.689324798246765e-06, "loss": 0.8054, "step": 4549 }, { "epoch": 0.13945077847247764, "grad_norm": 1.8435403755232007, "learning_rate": 9.689152552627526e-06, "loss": 0.7333, "step": 4550 }, { "epoch": 0.13948142699521884, "grad_norm": 1.7799599639257386, "learning_rate": 9.688980260804865e-06, "loss": 0.674, "step": 4551 }, { "epoch": 0.13951207551796002, "grad_norm": 0.8557509761856589, "learning_rate": 9.688807922780483e-06, "loss": 0.4916, "step": 4552 }, { "epoch": 0.13954272404070123, "grad_norm": 1.6137992103463712, "learning_rate": 9.688635538556079e-06, "loss": 0.6212, "step": 4553 }, { "epoch": 0.13957337256344243, "grad_norm": 1.9124875852428607, "learning_rate": 9.688463108133345e-06, "loss": 0.7687, "step": 4554 }, { "epoch": 0.13960402108618364, "grad_norm": 1.8245492326240678, "learning_rate": 9.688290631513989e-06, "loss": 0.7682, "step": 4555 }, { "epoch": 0.13963466960892484, "grad_norm": 0.9108542822257696, "learning_rate": 9.688118108699703e-06, "loss": 0.5264, "step": 4556 }, { "epoch": 0.13966531813166605, "grad_norm": 1.9596846156951737, "learning_rate": 9.687945539692191e-06, "loss": 0.6938, "step": 4557 }, { "epoch": 0.13969596665440726, "grad_norm": 0.9035046231728185, "learning_rate": 9.68777292449315e-06, "loss": 0.53, "step": 4558 }, { "epoch": 0.13972661517714846, "grad_norm": 0.8328674608167904, "learning_rate": 9.687600263104287e-06, "loss": 0.504, "step": 4559 }, { "epoch": 0.13975726369988967, "grad_norm": 2.0496166912372606, "learning_rate": 9.687427555527296e-06, "loss": 0.7784, "step": 4560 }, { "epoch": 0.13978791222263087, "grad_norm": 1.8574214632218584, "learning_rate": 9.687254801763883e-06, "loss": 0.7058, "step": 4561 }, { "epoch": 0.13981856074537208, "grad_norm": 1.8906147235129158, "learning_rate": 9.687082001815749e-06, "loss": 0.7594, "step": 4562 }, { "epoch": 0.13984920926811328, "grad_norm": 1.626571276329655, "learning_rate": 9.686909155684596e-06, "loss": 0.7809, "step": 4563 }, { "epoch": 0.1398798577908545, "grad_norm": 1.9944846625238144, "learning_rate": 9.68673626337213e-06, "loss": 0.749, "step": 4564 }, { "epoch": 0.1399105063135957, "grad_norm": 0.9821265583894822, "learning_rate": 9.68656332488005e-06, "loss": 0.5222, "step": 4565 }, { "epoch": 0.1399411548363369, "grad_norm": 1.918619289500038, "learning_rate": 9.686390340210064e-06, "loss": 0.7878, "step": 4566 }, { "epoch": 0.13997180335907808, "grad_norm": 2.243386450926643, "learning_rate": 9.686217309363875e-06, "loss": 0.7671, "step": 4567 }, { "epoch": 0.14000245188181928, "grad_norm": 1.8515884313556517, "learning_rate": 9.686044232343186e-06, "loss": 0.7671, "step": 4568 }, { "epoch": 0.1400331004045605, "grad_norm": 1.890448567960161, "learning_rate": 9.685871109149706e-06, "loss": 0.8004, "step": 4569 }, { "epoch": 0.1400637489273017, "grad_norm": 1.885493053675027, "learning_rate": 9.68569793978514e-06, "loss": 0.6923, "step": 4570 }, { "epoch": 0.1400943974500429, "grad_norm": 1.8419056156292881, "learning_rate": 9.68552472425119e-06, "loss": 0.7398, "step": 4571 }, { "epoch": 0.1401250459727841, "grad_norm": 1.9710324288187853, "learning_rate": 9.685351462549568e-06, "loss": 0.8632, "step": 4572 }, { "epoch": 0.1401556944955253, "grad_norm": 1.7808038412756524, "learning_rate": 9.68517815468198e-06, "loss": 0.723, "step": 4573 }, { "epoch": 0.14018634301826652, "grad_norm": 0.8797137747595021, "learning_rate": 9.68500480065013e-06, "loss": 0.5198, "step": 4574 }, { "epoch": 0.14021699154100772, "grad_norm": 1.9385943518272317, "learning_rate": 9.684831400455731e-06, "loss": 0.6555, "step": 4575 }, { "epoch": 0.14024764006374893, "grad_norm": 2.0997365752539, "learning_rate": 9.684657954100492e-06, "loss": 0.7762, "step": 4576 }, { "epoch": 0.14027828858649014, "grad_norm": 1.7624322611930856, "learning_rate": 9.684484461586117e-06, "loss": 0.599, "step": 4577 }, { "epoch": 0.14030893710923134, "grad_norm": 1.9861423067025354, "learning_rate": 9.684310922914318e-06, "loss": 0.6133, "step": 4578 }, { "epoch": 0.14033958563197255, "grad_norm": 1.6821564027264786, "learning_rate": 9.684137338086805e-06, "loss": 0.7454, "step": 4579 }, { "epoch": 0.14037023415471375, "grad_norm": 1.9364285176938536, "learning_rate": 9.683963707105288e-06, "loss": 0.7441, "step": 4580 }, { "epoch": 0.14040088267745496, "grad_norm": 1.884311395059368, "learning_rate": 9.683790029971478e-06, "loss": 0.6699, "step": 4581 }, { "epoch": 0.14043153120019616, "grad_norm": 1.9470127051233839, "learning_rate": 9.683616306687086e-06, "loss": 0.7729, "step": 4582 }, { "epoch": 0.14046217972293734, "grad_norm": 0.8531954229152967, "learning_rate": 9.683442537253826e-06, "loss": 0.5269, "step": 4583 }, { "epoch": 0.14049282824567855, "grad_norm": 2.0019363357390616, "learning_rate": 9.683268721673408e-06, "loss": 0.7614, "step": 4584 }, { "epoch": 0.14052347676841975, "grad_norm": 0.8425241503343531, "learning_rate": 9.683094859947544e-06, "loss": 0.5131, "step": 4585 }, { "epoch": 0.14055412529116096, "grad_norm": 1.7296819502725604, "learning_rate": 9.68292095207795e-06, "loss": 0.7638, "step": 4586 }, { "epoch": 0.14058477381390216, "grad_norm": 1.7420223144418805, "learning_rate": 9.682746998066335e-06, "loss": 0.8076, "step": 4587 }, { "epoch": 0.14061542233664337, "grad_norm": 0.843698794419924, "learning_rate": 9.682572997914417e-06, "loss": 0.5077, "step": 4588 }, { "epoch": 0.14064607085938458, "grad_norm": 2.1061120215507305, "learning_rate": 9.68239895162391e-06, "loss": 0.8442, "step": 4589 }, { "epoch": 0.14067671938212578, "grad_norm": 1.8627347903271168, "learning_rate": 9.682224859196528e-06, "loss": 0.6824, "step": 4590 }, { "epoch": 0.140707367904867, "grad_norm": 2.2559528834545612, "learning_rate": 9.682050720633985e-06, "loss": 0.6732, "step": 4591 }, { "epoch": 0.1407380164276082, "grad_norm": 1.8328484475643534, "learning_rate": 9.681876535937999e-06, "loss": 0.7609, "step": 4592 }, { "epoch": 0.1407686649503494, "grad_norm": 1.855891260478722, "learning_rate": 9.681702305110285e-06, "loss": 0.6991, "step": 4593 }, { "epoch": 0.1407993134730906, "grad_norm": 1.8769519534946322, "learning_rate": 9.681528028152562e-06, "loss": 0.7869, "step": 4594 }, { "epoch": 0.1408299619958318, "grad_norm": 1.8746128640097124, "learning_rate": 9.681353705066544e-06, "loss": 0.7861, "step": 4595 }, { "epoch": 0.14086061051857302, "grad_norm": 2.0228895934172266, "learning_rate": 9.681179335853951e-06, "loss": 0.6836, "step": 4596 }, { "epoch": 0.14089125904131422, "grad_norm": 0.9488712801208768, "learning_rate": 9.6810049205165e-06, "loss": 0.5239, "step": 4597 }, { "epoch": 0.14092190756405543, "grad_norm": 1.831176847434413, "learning_rate": 9.68083045905591e-06, "loss": 0.8046, "step": 4598 }, { "epoch": 0.1409525560867966, "grad_norm": 1.7998335690184608, "learning_rate": 9.6806559514739e-06, "loss": 0.6496, "step": 4599 }, { "epoch": 0.1409832046095378, "grad_norm": 0.8861339810612073, "learning_rate": 9.680481397772187e-06, "loss": 0.4924, "step": 4600 }, { "epoch": 0.14101385313227902, "grad_norm": 1.7632967407333813, "learning_rate": 9.680306797952496e-06, "loss": 0.783, "step": 4601 }, { "epoch": 0.14104450165502022, "grad_norm": 1.80633611896751, "learning_rate": 9.680132152016544e-06, "loss": 0.8134, "step": 4602 }, { "epoch": 0.14107515017776143, "grad_norm": 1.712487440215308, "learning_rate": 9.679957459966053e-06, "loss": 0.6812, "step": 4603 }, { "epoch": 0.14110579870050263, "grad_norm": 1.9856585571553766, "learning_rate": 9.679782721802742e-06, "loss": 0.7541, "step": 4604 }, { "epoch": 0.14113644722324384, "grad_norm": 1.93091310059048, "learning_rate": 9.679607937528335e-06, "loss": 0.7048, "step": 4605 }, { "epoch": 0.14116709574598504, "grad_norm": 1.737932091502883, "learning_rate": 9.679433107144555e-06, "loss": 0.7089, "step": 4606 }, { "epoch": 0.14119774426872625, "grad_norm": 1.8391266730387914, "learning_rate": 9.679258230653122e-06, "loss": 0.8052, "step": 4607 }, { "epoch": 0.14122839279146746, "grad_norm": 2.069645103326678, "learning_rate": 9.679083308055761e-06, "loss": 0.7564, "step": 4608 }, { "epoch": 0.14125904131420866, "grad_norm": 1.88398870297276, "learning_rate": 9.678908339354194e-06, "loss": 0.6966, "step": 4609 }, { "epoch": 0.14128968983694987, "grad_norm": 1.9465968751083556, "learning_rate": 9.678733324550147e-06, "loss": 0.7268, "step": 4610 }, { "epoch": 0.14132033835969107, "grad_norm": 1.030919981604569, "learning_rate": 9.678558263645343e-06, "loss": 0.5037, "step": 4611 }, { "epoch": 0.14135098688243228, "grad_norm": 1.9195886698435929, "learning_rate": 9.678383156641507e-06, "loss": 0.8167, "step": 4612 }, { "epoch": 0.14138163540517348, "grad_norm": 1.8731609053549816, "learning_rate": 9.678208003540366e-06, "loss": 0.7087, "step": 4613 }, { "epoch": 0.14141228392791466, "grad_norm": 0.865671046180436, "learning_rate": 9.678032804343644e-06, "loss": 0.4922, "step": 4614 }, { "epoch": 0.14144293245065587, "grad_norm": 1.6317859820613836, "learning_rate": 9.677857559053068e-06, "loss": 0.7001, "step": 4615 }, { "epoch": 0.14147358097339707, "grad_norm": 1.7784017800913616, "learning_rate": 9.677682267670365e-06, "loss": 0.7724, "step": 4616 }, { "epoch": 0.14150422949613828, "grad_norm": 1.8380640210624268, "learning_rate": 9.677506930197261e-06, "loss": 0.7218, "step": 4617 }, { "epoch": 0.14153487801887948, "grad_norm": 0.9738149228882358, "learning_rate": 9.677331546635483e-06, "loss": 0.5082, "step": 4618 }, { "epoch": 0.1415655265416207, "grad_norm": 1.7596671080022428, "learning_rate": 9.677156116986764e-06, "loss": 0.699, "step": 4619 }, { "epoch": 0.1415961750643619, "grad_norm": 1.975701320092645, "learning_rate": 9.676980641252826e-06, "loss": 0.7197, "step": 4620 }, { "epoch": 0.1416268235871031, "grad_norm": 1.8598784552685064, "learning_rate": 9.676805119435402e-06, "loss": 0.6532, "step": 4621 }, { "epoch": 0.1416574721098443, "grad_norm": 0.8244020670882576, "learning_rate": 9.676629551536221e-06, "loss": 0.4965, "step": 4622 }, { "epoch": 0.1416881206325855, "grad_norm": 2.0195131878397894, "learning_rate": 9.676453937557013e-06, "loss": 0.7406, "step": 4623 }, { "epoch": 0.14171876915532672, "grad_norm": 2.0035635349472676, "learning_rate": 9.676278277499507e-06, "loss": 0.7198, "step": 4624 }, { "epoch": 0.14174941767806792, "grad_norm": 1.9516702152393814, "learning_rate": 9.676102571365433e-06, "loss": 0.7879, "step": 4625 }, { "epoch": 0.14178006620080913, "grad_norm": 0.8556250947610435, "learning_rate": 9.675926819156527e-06, "loss": 0.5198, "step": 4626 }, { "epoch": 0.14181071472355034, "grad_norm": 1.7408121372608478, "learning_rate": 9.675751020874516e-06, "loss": 0.6728, "step": 4627 }, { "epoch": 0.14184136324629154, "grad_norm": 2.028642828376145, "learning_rate": 9.675575176521134e-06, "loss": 0.7434, "step": 4628 }, { "epoch": 0.14187201176903275, "grad_norm": 1.9600697375309037, "learning_rate": 9.675399286098113e-06, "loss": 0.7534, "step": 4629 }, { "epoch": 0.14190266029177392, "grad_norm": 2.1909587011004197, "learning_rate": 9.675223349607187e-06, "loss": 0.7603, "step": 4630 }, { "epoch": 0.14193330881451513, "grad_norm": 2.0226806983853, "learning_rate": 9.67504736705009e-06, "loss": 0.8849, "step": 4631 }, { "epoch": 0.14196395733725634, "grad_norm": 1.7877952176413714, "learning_rate": 9.674871338428555e-06, "loss": 0.7335, "step": 4632 }, { "epoch": 0.14199460585999754, "grad_norm": 1.719591335600712, "learning_rate": 9.674695263744315e-06, "loss": 0.7267, "step": 4633 }, { "epoch": 0.14202525438273875, "grad_norm": 1.8683838759671463, "learning_rate": 9.674519142999108e-06, "loss": 0.636, "step": 4634 }, { "epoch": 0.14205590290547995, "grad_norm": 1.8163765833855932, "learning_rate": 9.674342976194667e-06, "loss": 0.7298, "step": 4635 }, { "epoch": 0.14208655142822116, "grad_norm": 1.8365100837528556, "learning_rate": 9.67416676333273e-06, "loss": 0.7256, "step": 4636 }, { "epoch": 0.14211719995096236, "grad_norm": 1.8245516160155941, "learning_rate": 9.67399050441503e-06, "loss": 0.6826, "step": 4637 }, { "epoch": 0.14214784847370357, "grad_norm": 1.5522859890594094, "learning_rate": 9.673814199443308e-06, "loss": 0.6933, "step": 4638 }, { "epoch": 0.14217849699644478, "grad_norm": 2.2014966282106103, "learning_rate": 9.673637848419297e-06, "loss": 0.914, "step": 4639 }, { "epoch": 0.14220914551918598, "grad_norm": 2.0626387096918015, "learning_rate": 9.673461451344736e-06, "loss": 0.7283, "step": 4640 }, { "epoch": 0.1422397940419272, "grad_norm": 0.9939850829578208, "learning_rate": 9.673285008221364e-06, "loss": 0.495, "step": 4641 }, { "epoch": 0.1422704425646684, "grad_norm": 1.9852037339377415, "learning_rate": 9.673108519050922e-06, "loss": 0.6911, "step": 4642 }, { "epoch": 0.1423010910874096, "grad_norm": 1.8674590002570668, "learning_rate": 9.672931983835143e-06, "loss": 0.6949, "step": 4643 }, { "epoch": 0.1423317396101508, "grad_norm": 2.1323422103307834, "learning_rate": 9.672755402575771e-06, "loss": 0.6454, "step": 4644 }, { "epoch": 0.14236238813289198, "grad_norm": 1.7967238226662534, "learning_rate": 9.672578775274543e-06, "loss": 0.778, "step": 4645 }, { "epoch": 0.1423930366556332, "grad_norm": 2.1340588020195215, "learning_rate": 9.672402101933201e-06, "loss": 0.8214, "step": 4646 }, { "epoch": 0.1424236851783744, "grad_norm": 1.8889542173467422, "learning_rate": 9.672225382553486e-06, "loss": 0.7177, "step": 4647 }, { "epoch": 0.1424543337011156, "grad_norm": 1.7134926663204497, "learning_rate": 9.67204861713714e-06, "loss": 0.763, "step": 4648 }, { "epoch": 0.1424849822238568, "grad_norm": 2.113972366028378, "learning_rate": 9.671871805685902e-06, "loss": 0.7563, "step": 4649 }, { "epoch": 0.142515630746598, "grad_norm": 1.6736234533083243, "learning_rate": 9.671694948201517e-06, "loss": 0.6907, "step": 4650 }, { "epoch": 0.14254627926933922, "grad_norm": 1.857281029513005, "learning_rate": 9.671518044685726e-06, "loss": 0.7744, "step": 4651 }, { "epoch": 0.14257692779208042, "grad_norm": 1.7437852200306623, "learning_rate": 9.671341095140273e-06, "loss": 0.7173, "step": 4652 }, { "epoch": 0.14260757631482163, "grad_norm": 2.0781555646326266, "learning_rate": 9.671164099566898e-06, "loss": 0.8261, "step": 4653 }, { "epoch": 0.14263822483756283, "grad_norm": 1.957297785973761, "learning_rate": 9.67098705796735e-06, "loss": 0.7483, "step": 4654 }, { "epoch": 0.14266887336030404, "grad_norm": 2.0783909681519575, "learning_rate": 9.670809970343372e-06, "loss": 0.7584, "step": 4655 }, { "epoch": 0.14269952188304524, "grad_norm": 1.8573239340553396, "learning_rate": 9.670632836696707e-06, "loss": 0.792, "step": 4656 }, { "epoch": 0.14273017040578645, "grad_norm": 1.9689135528675448, "learning_rate": 9.670455657029104e-06, "loss": 0.7884, "step": 4657 }, { "epoch": 0.14276081892852766, "grad_norm": 1.7468187800251964, "learning_rate": 9.670278431342304e-06, "loss": 0.6768, "step": 4658 }, { "epoch": 0.14279146745126886, "grad_norm": 1.65153295453724, "learning_rate": 9.670101159638057e-06, "loss": 0.8801, "step": 4659 }, { "epoch": 0.14282211597401007, "grad_norm": 1.5839926455895212, "learning_rate": 9.669923841918107e-06, "loss": 0.6247, "step": 4660 }, { "epoch": 0.14285276449675124, "grad_norm": 1.775213242786564, "learning_rate": 9.669746478184204e-06, "loss": 0.7786, "step": 4661 }, { "epoch": 0.14288341301949245, "grad_norm": 1.9544642850604157, "learning_rate": 9.669569068438094e-06, "loss": 0.8323, "step": 4662 }, { "epoch": 0.14291406154223366, "grad_norm": 1.5603070940400945, "learning_rate": 9.669391612681524e-06, "loss": 0.6469, "step": 4663 }, { "epoch": 0.14294471006497486, "grad_norm": 1.695815206499114, "learning_rate": 9.669214110916246e-06, "loss": 0.6611, "step": 4664 }, { "epoch": 0.14297535858771607, "grad_norm": 1.9753775160929192, "learning_rate": 9.669036563144004e-06, "loss": 0.7884, "step": 4665 }, { "epoch": 0.14300600711045727, "grad_norm": 2.0281529123093938, "learning_rate": 9.668858969366551e-06, "loss": 0.8299, "step": 4666 }, { "epoch": 0.14303665563319848, "grad_norm": 1.9590287018036652, "learning_rate": 9.668681329585637e-06, "loss": 0.8571, "step": 4667 }, { "epoch": 0.14306730415593968, "grad_norm": 1.8575420418934425, "learning_rate": 9.668503643803011e-06, "loss": 0.7368, "step": 4668 }, { "epoch": 0.1430979526786809, "grad_norm": 1.8099258704948955, "learning_rate": 9.668325912020424e-06, "loss": 0.64, "step": 4669 }, { "epoch": 0.1431286012014221, "grad_norm": 1.62294929359585, "learning_rate": 9.668148134239626e-06, "loss": 0.7472, "step": 4670 }, { "epoch": 0.1431592497241633, "grad_norm": 1.6929187633625438, "learning_rate": 9.66797031046237e-06, "loss": 0.5805, "step": 4671 }, { "epoch": 0.1431898982469045, "grad_norm": 1.930209266036854, "learning_rate": 9.667792440690411e-06, "loss": 0.657, "step": 4672 }, { "epoch": 0.1432205467696457, "grad_norm": 1.2239155458863866, "learning_rate": 9.667614524925496e-06, "loss": 0.5101, "step": 4673 }, { "epoch": 0.14325119529238692, "grad_norm": 1.863843828159239, "learning_rate": 9.667436563169383e-06, "loss": 0.7611, "step": 4674 }, { "epoch": 0.14328184381512812, "grad_norm": 1.857161908681275, "learning_rate": 9.667258555423822e-06, "loss": 0.7124, "step": 4675 }, { "epoch": 0.1433124923378693, "grad_norm": 1.7545571025432851, "learning_rate": 9.667080501690569e-06, "loss": 0.773, "step": 4676 }, { "epoch": 0.1433431408606105, "grad_norm": 2.1096632778220705, "learning_rate": 9.666902401971377e-06, "loss": 0.7391, "step": 4677 }, { "epoch": 0.1433737893833517, "grad_norm": 1.7952127664577544, "learning_rate": 9.666724256268001e-06, "loss": 0.8465, "step": 4678 }, { "epoch": 0.14340443790609292, "grad_norm": 0.9771357159568739, "learning_rate": 9.666546064582199e-06, "loss": 0.5052, "step": 4679 }, { "epoch": 0.14343508642883412, "grad_norm": 1.8825856459834605, "learning_rate": 9.666367826915723e-06, "loss": 0.6944, "step": 4680 }, { "epoch": 0.14346573495157533, "grad_norm": 1.7322624001992266, "learning_rate": 9.66618954327033e-06, "loss": 0.7724, "step": 4681 }, { "epoch": 0.14349638347431654, "grad_norm": 1.8422284543238134, "learning_rate": 9.66601121364778e-06, "loss": 0.678, "step": 4682 }, { "epoch": 0.14352703199705774, "grad_norm": 1.8195953736235655, "learning_rate": 9.665832838049826e-06, "loss": 0.7226, "step": 4683 }, { "epoch": 0.14355768051979895, "grad_norm": 2.1116232152922163, "learning_rate": 9.665654416478227e-06, "loss": 0.7684, "step": 4684 }, { "epoch": 0.14358832904254015, "grad_norm": 1.952736553599624, "learning_rate": 9.665475948934742e-06, "loss": 0.7807, "step": 4685 }, { "epoch": 0.14361897756528136, "grad_norm": 2.145494905126868, "learning_rate": 9.665297435421128e-06, "loss": 0.7926, "step": 4686 }, { "epoch": 0.14364962608802256, "grad_norm": 1.8466519959988428, "learning_rate": 9.665118875939145e-06, "loss": 0.7791, "step": 4687 }, { "epoch": 0.14368027461076377, "grad_norm": 1.9683922995655576, "learning_rate": 9.664940270490553e-06, "loss": 0.8328, "step": 4688 }, { "epoch": 0.14371092313350498, "grad_norm": 1.8309870644372435, "learning_rate": 9.66476161907711e-06, "loss": 0.7934, "step": 4689 }, { "epoch": 0.14374157165624618, "grad_norm": 1.7583012718157836, "learning_rate": 9.664582921700578e-06, "loss": 0.6461, "step": 4690 }, { "epoch": 0.1437722201789874, "grad_norm": 1.686586213718502, "learning_rate": 9.664404178362715e-06, "loss": 0.7678, "step": 4691 }, { "epoch": 0.14380286870172856, "grad_norm": 1.9014915101697882, "learning_rate": 9.664225389065287e-06, "loss": 0.744, "step": 4692 }, { "epoch": 0.14383351722446977, "grad_norm": 1.8492374837791392, "learning_rate": 9.664046553810051e-06, "loss": 0.7294, "step": 4693 }, { "epoch": 0.14386416574721098, "grad_norm": 1.8300894762203772, "learning_rate": 9.663867672598772e-06, "loss": 0.7274, "step": 4694 }, { "epoch": 0.14389481426995218, "grad_norm": 1.9175106229090442, "learning_rate": 9.663688745433211e-06, "loss": 0.8758, "step": 4695 }, { "epoch": 0.1439254627926934, "grad_norm": 1.0354430605970493, "learning_rate": 9.663509772315132e-06, "loss": 0.511, "step": 4696 }, { "epoch": 0.1439561113154346, "grad_norm": 2.0524594619405443, "learning_rate": 9.663330753246298e-06, "loss": 0.7958, "step": 4697 }, { "epoch": 0.1439867598381758, "grad_norm": 2.06551121521444, "learning_rate": 9.663151688228473e-06, "loss": 0.7572, "step": 4698 }, { "epoch": 0.144017408360917, "grad_norm": 0.8713116167698426, "learning_rate": 9.66297257726342e-06, "loss": 0.5245, "step": 4699 }, { "epoch": 0.1440480568836582, "grad_norm": 1.6227332237548318, "learning_rate": 9.662793420352906e-06, "loss": 0.7143, "step": 4700 }, { "epoch": 0.14407870540639942, "grad_norm": 1.6829449448299216, "learning_rate": 9.662614217498696e-06, "loss": 0.757, "step": 4701 }, { "epoch": 0.14410935392914062, "grad_norm": 0.8512769550722208, "learning_rate": 9.662434968702554e-06, "loss": 0.5215, "step": 4702 }, { "epoch": 0.14414000245188183, "grad_norm": 1.9150628890880192, "learning_rate": 9.662255673966248e-06, "loss": 0.8362, "step": 4703 }, { "epoch": 0.14417065097462303, "grad_norm": 1.7547084389108967, "learning_rate": 9.662076333291543e-06, "loss": 0.7504, "step": 4704 }, { "epoch": 0.14420129949736424, "grad_norm": 1.925788996459963, "learning_rate": 9.66189694668021e-06, "loss": 0.6677, "step": 4705 }, { "epoch": 0.14423194802010544, "grad_norm": 1.8054288310120827, "learning_rate": 9.66171751413401e-06, "loss": 0.7975, "step": 4706 }, { "epoch": 0.14426259654284662, "grad_norm": 2.0079427746776077, "learning_rate": 9.661538035654716e-06, "loss": 0.7447, "step": 4707 }, { "epoch": 0.14429324506558783, "grad_norm": 1.8451064529634773, "learning_rate": 9.661358511244095e-06, "loss": 0.5374, "step": 4708 }, { "epoch": 0.14432389358832903, "grad_norm": 1.8142984639381763, "learning_rate": 9.661178940903916e-06, "loss": 0.7106, "step": 4709 }, { "epoch": 0.14435454211107024, "grad_norm": 1.0360440383402303, "learning_rate": 9.660999324635948e-06, "loss": 0.5157, "step": 4710 }, { "epoch": 0.14438519063381144, "grad_norm": 1.938630759119246, "learning_rate": 9.660819662441962e-06, "loss": 0.669, "step": 4711 }, { "epoch": 0.14441583915655265, "grad_norm": 1.8076424151260648, "learning_rate": 9.660639954323726e-06, "loss": 0.683, "step": 4712 }, { "epoch": 0.14444648767929386, "grad_norm": 1.636902969227355, "learning_rate": 9.660460200283013e-06, "loss": 0.642, "step": 4713 }, { "epoch": 0.14447713620203506, "grad_norm": 1.852610409515035, "learning_rate": 9.660280400321593e-06, "loss": 0.826, "step": 4714 }, { "epoch": 0.14450778472477627, "grad_norm": 1.6472018521004055, "learning_rate": 9.660100554441237e-06, "loss": 0.6542, "step": 4715 }, { "epoch": 0.14453843324751747, "grad_norm": 1.9575035375650358, "learning_rate": 9.659920662643719e-06, "loss": 0.7379, "step": 4716 }, { "epoch": 0.14456908177025868, "grad_norm": 1.8365431419633476, "learning_rate": 9.659740724930811e-06, "loss": 0.8083, "step": 4717 }, { "epoch": 0.14459973029299988, "grad_norm": 1.8984757058629764, "learning_rate": 9.659560741304284e-06, "loss": 0.7696, "step": 4718 }, { "epoch": 0.1446303788157411, "grad_norm": 1.6557298038948969, "learning_rate": 9.659380711765914e-06, "loss": 0.7059, "step": 4719 }, { "epoch": 0.1446610273384823, "grad_norm": 1.6873061482197285, "learning_rate": 9.659200636317471e-06, "loss": 0.7535, "step": 4720 }, { "epoch": 0.1446916758612235, "grad_norm": 1.886527514261137, "learning_rate": 9.659020514960736e-06, "loss": 0.8157, "step": 4721 }, { "epoch": 0.1447223243839647, "grad_norm": 1.7006888763428387, "learning_rate": 9.658840347697476e-06, "loss": 0.7372, "step": 4722 }, { "epoch": 0.14475297290670588, "grad_norm": 1.9189869538669622, "learning_rate": 9.658660134529473e-06, "loss": 0.7018, "step": 4723 }, { "epoch": 0.1447836214294471, "grad_norm": 1.6846500292125064, "learning_rate": 9.658479875458497e-06, "loss": 0.7356, "step": 4724 }, { "epoch": 0.1448142699521883, "grad_norm": 0.9713962895058946, "learning_rate": 9.658299570486328e-06, "loss": 0.5074, "step": 4725 }, { "epoch": 0.1448449184749295, "grad_norm": 1.705558850176301, "learning_rate": 9.658119219614744e-06, "loss": 0.6341, "step": 4726 }, { "epoch": 0.1448755669976707, "grad_norm": 1.5879906621768416, "learning_rate": 9.657938822845517e-06, "loss": 0.7854, "step": 4727 }, { "epoch": 0.1449062155204119, "grad_norm": 1.6811498844726889, "learning_rate": 9.657758380180426e-06, "loss": 0.7545, "step": 4728 }, { "epoch": 0.14493686404315312, "grad_norm": 1.9450757019303284, "learning_rate": 9.657577891621252e-06, "loss": 0.7854, "step": 4729 }, { "epoch": 0.14496751256589432, "grad_norm": 0.8806277576743071, "learning_rate": 9.65739735716977e-06, "loss": 0.5171, "step": 4730 }, { "epoch": 0.14499816108863553, "grad_norm": 1.6530752629957275, "learning_rate": 9.657216776827763e-06, "loss": 0.6672, "step": 4731 }, { "epoch": 0.14502880961137674, "grad_norm": 1.8600346468249391, "learning_rate": 9.657036150597004e-06, "loss": 0.7, "step": 4732 }, { "epoch": 0.14505945813411794, "grad_norm": 1.6579641057540273, "learning_rate": 9.656855478479279e-06, "loss": 0.7421, "step": 4733 }, { "epoch": 0.14509010665685915, "grad_norm": 0.9436203736355935, "learning_rate": 9.656674760476364e-06, "loss": 0.4881, "step": 4734 }, { "epoch": 0.14512075517960035, "grad_norm": 1.996369739579787, "learning_rate": 9.65649399659004e-06, "loss": 0.8144, "step": 4735 }, { "epoch": 0.14515140370234156, "grad_norm": 1.757501986298747, "learning_rate": 9.65631318682209e-06, "loss": 0.7828, "step": 4736 }, { "epoch": 0.14518205222508276, "grad_norm": 1.6659179669479773, "learning_rate": 9.656132331174297e-06, "loss": 0.7751, "step": 4737 }, { "epoch": 0.14521270074782394, "grad_norm": 1.6455611551022535, "learning_rate": 9.655951429648438e-06, "loss": 0.7048, "step": 4738 }, { "epoch": 0.14524334927056515, "grad_norm": 1.9727936966084625, "learning_rate": 9.655770482246299e-06, "loss": 0.8402, "step": 4739 }, { "epoch": 0.14527399779330635, "grad_norm": 1.6642830974204603, "learning_rate": 9.655589488969663e-06, "loss": 0.6523, "step": 4740 }, { "epoch": 0.14530464631604756, "grad_norm": 1.9298863563697801, "learning_rate": 9.655408449820312e-06, "loss": 0.6724, "step": 4741 }, { "epoch": 0.14533529483878876, "grad_norm": 1.8924275533845918, "learning_rate": 9.65522736480003e-06, "loss": 0.7538, "step": 4742 }, { "epoch": 0.14536594336152997, "grad_norm": 1.9981647043997304, "learning_rate": 9.655046233910604e-06, "loss": 0.697, "step": 4743 }, { "epoch": 0.14539659188427118, "grad_norm": 1.8086142401363892, "learning_rate": 9.654865057153813e-06, "loss": 0.7328, "step": 4744 }, { "epoch": 0.14542724040701238, "grad_norm": 1.7538960902799114, "learning_rate": 9.654683834531447e-06, "loss": 0.6811, "step": 4745 }, { "epoch": 0.1454578889297536, "grad_norm": 1.8714953530448353, "learning_rate": 9.654502566045292e-06, "loss": 0.7491, "step": 4746 }, { "epoch": 0.1454885374524948, "grad_norm": 1.6403333523251495, "learning_rate": 9.65432125169713e-06, "loss": 0.6878, "step": 4747 }, { "epoch": 0.145519185975236, "grad_norm": 1.7724410669594495, "learning_rate": 9.654139891488751e-06, "loss": 0.7164, "step": 4748 }, { "epoch": 0.1455498344979772, "grad_norm": 1.940772975951159, "learning_rate": 9.653958485421939e-06, "loss": 0.7773, "step": 4749 }, { "epoch": 0.1455804830207184, "grad_norm": 0.9923025245179172, "learning_rate": 9.653777033498485e-06, "loss": 0.5296, "step": 4750 }, { "epoch": 0.14561113154345962, "grad_norm": 1.0060793508444512, "learning_rate": 9.653595535720175e-06, "loss": 0.5084, "step": 4751 }, { "epoch": 0.14564178006620082, "grad_norm": 0.8629340322022764, "learning_rate": 9.653413992088798e-06, "loss": 0.5211, "step": 4752 }, { "epoch": 0.14567242858894203, "grad_norm": 1.9201203277663443, "learning_rate": 9.653232402606142e-06, "loss": 0.7527, "step": 4753 }, { "epoch": 0.1457030771116832, "grad_norm": 1.0125772437582239, "learning_rate": 9.653050767273996e-06, "loss": 0.5163, "step": 4754 }, { "epoch": 0.1457337256344244, "grad_norm": 2.0864410677916303, "learning_rate": 9.65286908609415e-06, "loss": 0.7, "step": 4755 }, { "epoch": 0.14576437415716562, "grad_norm": 1.9450347913757888, "learning_rate": 9.652687359068396e-06, "loss": 0.8739, "step": 4756 }, { "epoch": 0.14579502267990682, "grad_norm": 1.7985121007660752, "learning_rate": 9.652505586198523e-06, "loss": 0.7331, "step": 4757 }, { "epoch": 0.14582567120264803, "grad_norm": 1.7866407669566706, "learning_rate": 9.65232376748632e-06, "loss": 0.6951, "step": 4758 }, { "epoch": 0.14585631972538923, "grad_norm": 1.6642236148229765, "learning_rate": 9.65214190293358e-06, "loss": 0.8072, "step": 4759 }, { "epoch": 0.14588696824813044, "grad_norm": 1.8555198903716426, "learning_rate": 9.651959992542097e-06, "loss": 0.748, "step": 4760 }, { "epoch": 0.14591761677087164, "grad_norm": 1.6429112611229075, "learning_rate": 9.651778036313664e-06, "loss": 0.7049, "step": 4761 }, { "epoch": 0.14594826529361285, "grad_norm": 1.7016729950795106, "learning_rate": 9.651596034250069e-06, "loss": 0.8274, "step": 4762 }, { "epoch": 0.14597891381635406, "grad_norm": 1.870714900146296, "learning_rate": 9.651413986353109e-06, "loss": 0.7792, "step": 4763 }, { "epoch": 0.14600956233909526, "grad_norm": 1.1133266016293648, "learning_rate": 9.651231892624577e-06, "loss": 0.5195, "step": 4764 }, { "epoch": 0.14604021086183647, "grad_norm": 1.939600911557928, "learning_rate": 9.651049753066267e-06, "loss": 0.7225, "step": 4765 }, { "epoch": 0.14607085938457767, "grad_norm": 2.0100370421261937, "learning_rate": 9.650867567679973e-06, "loss": 0.7564, "step": 4766 }, { "epoch": 0.14610150790731888, "grad_norm": 1.904026108397537, "learning_rate": 9.650685336467492e-06, "loss": 0.7672, "step": 4767 }, { "epoch": 0.14613215643006008, "grad_norm": 1.8053409879527638, "learning_rate": 9.650503059430618e-06, "loss": 0.7899, "step": 4768 }, { "epoch": 0.14616280495280126, "grad_norm": 1.8206336629293773, "learning_rate": 9.650320736571146e-06, "loss": 0.6994, "step": 4769 }, { "epoch": 0.14619345347554247, "grad_norm": 1.6204515268421693, "learning_rate": 9.650138367890876e-06, "loss": 0.679, "step": 4770 }, { "epoch": 0.14622410199828367, "grad_norm": 1.5516582253265725, "learning_rate": 9.649955953391603e-06, "loss": 0.6537, "step": 4771 }, { "epoch": 0.14625475052102488, "grad_norm": 1.8637705402822673, "learning_rate": 9.649773493075122e-06, "loss": 0.7724, "step": 4772 }, { "epoch": 0.14628539904376608, "grad_norm": 1.7262790174318796, "learning_rate": 9.649590986943236e-06, "loss": 0.7358, "step": 4773 }, { "epoch": 0.1463160475665073, "grad_norm": 1.8572890298572622, "learning_rate": 9.649408434997739e-06, "loss": 0.7091, "step": 4774 }, { "epoch": 0.1463466960892485, "grad_norm": 1.650012978049179, "learning_rate": 9.64922583724043e-06, "loss": 0.7984, "step": 4775 }, { "epoch": 0.1463773446119897, "grad_norm": 1.61542332458243, "learning_rate": 9.64904319367311e-06, "loss": 0.6746, "step": 4776 }, { "epoch": 0.1464079931347309, "grad_norm": 1.9071003911259083, "learning_rate": 9.64886050429758e-06, "loss": 0.7579, "step": 4777 }, { "epoch": 0.1464386416574721, "grad_norm": 1.790161531733155, "learning_rate": 9.648677769115637e-06, "loss": 0.7087, "step": 4778 }, { "epoch": 0.14646929018021332, "grad_norm": 1.8771342324129219, "learning_rate": 9.64849498812908e-06, "loss": 0.7036, "step": 4779 }, { "epoch": 0.14649993870295452, "grad_norm": 2.0335186391408526, "learning_rate": 9.648312161339715e-06, "loss": 0.6817, "step": 4780 }, { "epoch": 0.14653058722569573, "grad_norm": 1.8465907522527274, "learning_rate": 9.64812928874934e-06, "loss": 0.6658, "step": 4781 }, { "epoch": 0.14656123574843694, "grad_norm": 1.8074348909788422, "learning_rate": 9.64794637035976e-06, "loss": 0.7183, "step": 4782 }, { "epoch": 0.14659188427117814, "grad_norm": 1.7786921825652362, "learning_rate": 9.647763406172772e-06, "loss": 0.7619, "step": 4783 }, { "epoch": 0.14662253279391935, "grad_norm": 1.86088531606038, "learning_rate": 9.647580396190184e-06, "loss": 0.7845, "step": 4784 }, { "epoch": 0.14665318131666052, "grad_norm": 1.774833932765458, "learning_rate": 9.647397340413795e-06, "loss": 0.7578, "step": 4785 }, { "epoch": 0.14668382983940173, "grad_norm": 1.829787150709969, "learning_rate": 9.647214238845413e-06, "loss": 0.7354, "step": 4786 }, { "epoch": 0.14671447836214294, "grad_norm": 2.0141013058356565, "learning_rate": 9.647031091486838e-06, "loss": 0.6565, "step": 4787 }, { "epoch": 0.14674512688488414, "grad_norm": 1.8322675147896257, "learning_rate": 9.64684789833988e-06, "loss": 0.8347, "step": 4788 }, { "epoch": 0.14677577540762535, "grad_norm": 1.9235266688855912, "learning_rate": 9.646664659406339e-06, "loss": 0.7691, "step": 4789 }, { "epoch": 0.14680642393036655, "grad_norm": 2.011585946042271, "learning_rate": 9.646481374688022e-06, "loss": 0.7702, "step": 4790 }, { "epoch": 0.14683707245310776, "grad_norm": 1.7625973771478838, "learning_rate": 9.646298044186735e-06, "loss": 0.7906, "step": 4791 }, { "epoch": 0.14686772097584896, "grad_norm": 2.0319328741991467, "learning_rate": 9.646114667904285e-06, "loss": 0.7244, "step": 4792 }, { "epoch": 0.14689836949859017, "grad_norm": 1.7125005520109458, "learning_rate": 9.645931245842477e-06, "loss": 0.6987, "step": 4793 }, { "epoch": 0.14692901802133138, "grad_norm": 1.6870086136998261, "learning_rate": 9.64574777800312e-06, "loss": 0.748, "step": 4794 }, { "epoch": 0.14695966654407258, "grad_norm": 1.7492896628577936, "learning_rate": 9.645564264388024e-06, "loss": 0.6444, "step": 4795 }, { "epoch": 0.1469903150668138, "grad_norm": 1.9831633168544007, "learning_rate": 9.645380704998993e-06, "loss": 0.7371, "step": 4796 }, { "epoch": 0.147020963589555, "grad_norm": 1.9268115976364952, "learning_rate": 9.645197099837838e-06, "loss": 0.7969, "step": 4797 }, { "epoch": 0.1470516121122962, "grad_norm": 1.8844733493472108, "learning_rate": 9.645013448906366e-06, "loss": 0.7296, "step": 4798 }, { "epoch": 0.1470822606350374, "grad_norm": 1.9642720622094652, "learning_rate": 9.644829752206388e-06, "loss": 0.8145, "step": 4799 }, { "epoch": 0.14711290915777858, "grad_norm": 2.0156125832622696, "learning_rate": 9.644646009739715e-06, "loss": 0.888, "step": 4800 }, { "epoch": 0.1471435576805198, "grad_norm": 2.1792958253478076, "learning_rate": 9.644462221508157e-06, "loss": 0.8768, "step": 4801 }, { "epoch": 0.147174206203261, "grad_norm": 1.0702166707081038, "learning_rate": 9.644278387513525e-06, "loss": 0.5347, "step": 4802 }, { "epoch": 0.1472048547260022, "grad_norm": 0.989173366516463, "learning_rate": 9.644094507757627e-06, "loss": 0.5187, "step": 4803 }, { "epoch": 0.1472355032487434, "grad_norm": 1.8289903391509017, "learning_rate": 9.643910582242279e-06, "loss": 0.745, "step": 4804 }, { "epoch": 0.1472661517714846, "grad_norm": 1.950525709750812, "learning_rate": 9.643726610969293e-06, "loss": 0.8165, "step": 4805 }, { "epoch": 0.14729680029422582, "grad_norm": 2.1918974448823296, "learning_rate": 9.643542593940478e-06, "loss": 0.7809, "step": 4806 }, { "epoch": 0.14732744881696702, "grad_norm": 1.864567093465016, "learning_rate": 9.643358531157651e-06, "loss": 0.8164, "step": 4807 }, { "epoch": 0.14735809733970823, "grad_norm": 1.1346430176574782, "learning_rate": 9.643174422622625e-06, "loss": 0.5003, "step": 4808 }, { "epoch": 0.14738874586244943, "grad_norm": 1.9005465752306572, "learning_rate": 9.642990268337214e-06, "loss": 0.8311, "step": 4809 }, { "epoch": 0.14741939438519064, "grad_norm": 1.0949475366454546, "learning_rate": 9.642806068303229e-06, "loss": 0.5242, "step": 4810 }, { "epoch": 0.14745004290793184, "grad_norm": 1.874701892777097, "learning_rate": 9.642621822522491e-06, "loss": 0.7319, "step": 4811 }, { "epoch": 0.14748069143067305, "grad_norm": 1.955354367567878, "learning_rate": 9.642437530996812e-06, "loss": 0.7236, "step": 4812 }, { "epoch": 0.14751133995341426, "grad_norm": 0.8536021513461448, "learning_rate": 9.642253193728006e-06, "loss": 0.4979, "step": 4813 }, { "epoch": 0.14754198847615546, "grad_norm": 0.8616514565354272, "learning_rate": 9.642068810717893e-06, "loss": 0.5042, "step": 4814 }, { "epoch": 0.14757263699889667, "grad_norm": 2.63370978939868, "learning_rate": 9.641884381968289e-06, "loss": 0.782, "step": 4815 }, { "epoch": 0.14760328552163784, "grad_norm": 1.9128056241908307, "learning_rate": 9.641699907481008e-06, "loss": 0.7586, "step": 4816 }, { "epoch": 0.14763393404437905, "grad_norm": 2.2733318738338, "learning_rate": 9.641515387257873e-06, "loss": 0.7373, "step": 4817 }, { "epoch": 0.14766458256712026, "grad_norm": 1.9349762970737299, "learning_rate": 9.641330821300697e-06, "loss": 0.8024, "step": 4818 }, { "epoch": 0.14769523108986146, "grad_norm": 2.0274248744046828, "learning_rate": 9.641146209611302e-06, "loss": 0.8002, "step": 4819 }, { "epoch": 0.14772587961260267, "grad_norm": 1.0027983295738125, "learning_rate": 9.640961552191505e-06, "loss": 0.5075, "step": 4820 }, { "epoch": 0.14775652813534387, "grad_norm": 1.7418068558136603, "learning_rate": 9.640776849043128e-06, "loss": 0.7175, "step": 4821 }, { "epoch": 0.14778717665808508, "grad_norm": 2.1776896254831266, "learning_rate": 9.640592100167989e-06, "loss": 0.6836, "step": 4822 }, { "epoch": 0.14781782518082628, "grad_norm": 1.785648077115751, "learning_rate": 9.640407305567907e-06, "loss": 0.723, "step": 4823 }, { "epoch": 0.1478484737035675, "grad_norm": 2.2510173739730464, "learning_rate": 9.640222465244706e-06, "loss": 0.7535, "step": 4824 }, { "epoch": 0.1478791222263087, "grad_norm": 1.9200111196049368, "learning_rate": 9.640037579200206e-06, "loss": 0.7493, "step": 4825 }, { "epoch": 0.1479097707490499, "grad_norm": 1.7128677446286404, "learning_rate": 9.63985264743623e-06, "loss": 0.7349, "step": 4826 }, { "epoch": 0.1479404192717911, "grad_norm": 1.9102459605328392, "learning_rate": 9.639667669954596e-06, "loss": 0.6674, "step": 4827 }, { "epoch": 0.1479710677945323, "grad_norm": 1.8171706243509302, "learning_rate": 9.63948264675713e-06, "loss": 0.817, "step": 4828 }, { "epoch": 0.14800171631727352, "grad_norm": 1.761660204003078, "learning_rate": 9.639297577845654e-06, "loss": 0.7409, "step": 4829 }, { "epoch": 0.14803236484001472, "grad_norm": 1.8098961361998671, "learning_rate": 9.639112463221994e-06, "loss": 0.6279, "step": 4830 }, { "epoch": 0.1480630133627559, "grad_norm": 1.861801535150556, "learning_rate": 9.638927302887968e-06, "loss": 0.8065, "step": 4831 }, { "epoch": 0.1480936618854971, "grad_norm": 1.9414158411650115, "learning_rate": 9.638742096845408e-06, "loss": 0.7434, "step": 4832 }, { "epoch": 0.1481243104082383, "grad_norm": 1.8357741099781189, "learning_rate": 9.638556845096134e-06, "loss": 0.7347, "step": 4833 }, { "epoch": 0.14815495893097952, "grad_norm": 1.8161813870539019, "learning_rate": 9.638371547641972e-06, "loss": 0.7318, "step": 4834 }, { "epoch": 0.14818560745372072, "grad_norm": 2.9692579072730845, "learning_rate": 9.63818620448475e-06, "loss": 0.8766, "step": 4835 }, { "epoch": 0.14821625597646193, "grad_norm": 1.9598949999615665, "learning_rate": 9.638000815626292e-06, "loss": 0.7765, "step": 4836 }, { "epoch": 0.14824690449920314, "grad_norm": 1.9513263084875467, "learning_rate": 9.637815381068424e-06, "loss": 0.7654, "step": 4837 }, { "epoch": 0.14827755302194434, "grad_norm": 1.908693007539495, "learning_rate": 9.637629900812975e-06, "loss": 0.6912, "step": 4838 }, { "epoch": 0.14830820154468555, "grad_norm": 1.7850047277983903, "learning_rate": 9.637444374861774e-06, "loss": 0.8176, "step": 4839 }, { "epoch": 0.14833885006742675, "grad_norm": 1.8421093523244976, "learning_rate": 9.637258803216643e-06, "loss": 0.7301, "step": 4840 }, { "epoch": 0.14836949859016796, "grad_norm": 0.9980811687051077, "learning_rate": 9.637073185879418e-06, "loss": 0.5358, "step": 4841 }, { "epoch": 0.14840014711290916, "grad_norm": 0.9133807607342997, "learning_rate": 9.636887522851924e-06, "loss": 0.5417, "step": 4842 }, { "epoch": 0.14843079563565037, "grad_norm": 1.9086102856318414, "learning_rate": 9.63670181413599e-06, "loss": 0.7473, "step": 4843 }, { "epoch": 0.14846144415839158, "grad_norm": 1.8576704616565254, "learning_rate": 9.636516059733445e-06, "loss": 0.7279, "step": 4844 }, { "epoch": 0.14849209268113278, "grad_norm": 2.2371026696345773, "learning_rate": 9.636330259646122e-06, "loss": 0.6991, "step": 4845 }, { "epoch": 0.148522741203874, "grad_norm": 2.1074713432357464, "learning_rate": 9.636144413875852e-06, "loss": 0.6844, "step": 4846 }, { "epoch": 0.14855338972661516, "grad_norm": 1.790848593718653, "learning_rate": 9.635958522424464e-06, "loss": 0.7383, "step": 4847 }, { "epoch": 0.14858403824935637, "grad_norm": 1.8249828082557231, "learning_rate": 9.635772585293792e-06, "loss": 0.682, "step": 4848 }, { "epoch": 0.14861468677209758, "grad_norm": 1.092532767176621, "learning_rate": 9.635586602485665e-06, "loss": 0.5199, "step": 4849 }, { "epoch": 0.14864533529483878, "grad_norm": 0.9691615142719298, "learning_rate": 9.635400574001918e-06, "loss": 0.522, "step": 4850 }, { "epoch": 0.14867598381758, "grad_norm": 0.7630978535627854, "learning_rate": 9.635214499844383e-06, "loss": 0.4899, "step": 4851 }, { "epoch": 0.1487066323403212, "grad_norm": 2.321207024941387, "learning_rate": 9.635028380014893e-06, "loss": 0.7923, "step": 4852 }, { "epoch": 0.1487372808630624, "grad_norm": 1.9624361442548472, "learning_rate": 9.634842214515283e-06, "loss": 0.7952, "step": 4853 }, { "epoch": 0.1487679293858036, "grad_norm": 1.7319050453274083, "learning_rate": 9.634656003347387e-06, "loss": 0.6617, "step": 4854 }, { "epoch": 0.1487985779085448, "grad_norm": 1.1661195594335076, "learning_rate": 9.634469746513038e-06, "loss": 0.5199, "step": 4855 }, { "epoch": 0.14882922643128602, "grad_norm": 1.70202382774432, "learning_rate": 9.634283444014075e-06, "loss": 0.6678, "step": 4856 }, { "epoch": 0.14885987495402722, "grad_norm": 1.8517555093824078, "learning_rate": 9.63409709585233e-06, "loss": 0.7249, "step": 4857 }, { "epoch": 0.14889052347676843, "grad_norm": 2.070510369126959, "learning_rate": 9.633910702029641e-06, "loss": 0.837, "step": 4858 }, { "epoch": 0.14892117199950963, "grad_norm": 1.8295925057649591, "learning_rate": 9.633724262547843e-06, "loss": 0.7249, "step": 4859 }, { "epoch": 0.14895182052225084, "grad_norm": 1.9562720566473224, "learning_rate": 9.633537777408777e-06, "loss": 0.6856, "step": 4860 }, { "epoch": 0.14898246904499204, "grad_norm": 0.9674533718111031, "learning_rate": 9.633351246614275e-06, "loss": 0.5123, "step": 4861 }, { "epoch": 0.14901311756773322, "grad_norm": 1.6587814949821522, "learning_rate": 9.633164670166179e-06, "loss": 0.6947, "step": 4862 }, { "epoch": 0.14904376609047443, "grad_norm": 1.68888003351922, "learning_rate": 9.632978048066325e-06, "loss": 0.7087, "step": 4863 }, { "epoch": 0.14907441461321563, "grad_norm": 1.9158510367191572, "learning_rate": 9.632791380316556e-06, "loss": 0.7664, "step": 4864 }, { "epoch": 0.14910506313595684, "grad_norm": 1.7377329270192177, "learning_rate": 9.632604666918705e-06, "loss": 0.8081, "step": 4865 }, { "epoch": 0.14913571165869804, "grad_norm": 1.8156427358413616, "learning_rate": 9.632417907874617e-06, "loss": 0.7368, "step": 4866 }, { "epoch": 0.14916636018143925, "grad_norm": 1.6997369996345522, "learning_rate": 9.632231103186128e-06, "loss": 0.7878, "step": 4867 }, { "epoch": 0.14919700870418046, "grad_norm": 1.6423591608426962, "learning_rate": 9.632044252855082e-06, "loss": 0.7558, "step": 4868 }, { "epoch": 0.14922765722692166, "grad_norm": 1.7874722121528934, "learning_rate": 9.631857356883318e-06, "loss": 0.6612, "step": 4869 }, { "epoch": 0.14925830574966287, "grad_norm": 1.8827345916212987, "learning_rate": 9.631670415272679e-06, "loss": 0.8335, "step": 4870 }, { "epoch": 0.14928895427240407, "grad_norm": 1.676737068670265, "learning_rate": 9.631483428025007e-06, "loss": 0.7127, "step": 4871 }, { "epoch": 0.14931960279514528, "grad_norm": 1.9144567647657256, "learning_rate": 9.631296395142142e-06, "loss": 0.7549, "step": 4872 }, { "epoch": 0.14935025131788648, "grad_norm": 1.685604531783356, "learning_rate": 9.631109316625931e-06, "loss": 0.7644, "step": 4873 }, { "epoch": 0.1493808998406277, "grad_norm": 1.4941990323132015, "learning_rate": 9.630922192478213e-06, "loss": 0.6857, "step": 4874 }, { "epoch": 0.1494115483633689, "grad_norm": 1.960935991860923, "learning_rate": 9.630735022700835e-06, "loss": 0.7767, "step": 4875 }, { "epoch": 0.1494421968861101, "grad_norm": 1.8383144924933796, "learning_rate": 9.630547807295639e-06, "loss": 0.7261, "step": 4876 }, { "epoch": 0.1494728454088513, "grad_norm": 1.8155330907060545, "learning_rate": 9.63036054626447e-06, "loss": 0.7558, "step": 4877 }, { "epoch": 0.14950349393159248, "grad_norm": 2.017123346076423, "learning_rate": 9.630173239609176e-06, "loss": 0.7976, "step": 4878 }, { "epoch": 0.1495341424543337, "grad_norm": 1.9844242511490455, "learning_rate": 9.629985887331598e-06, "loss": 0.7146, "step": 4879 }, { "epoch": 0.1495647909770749, "grad_norm": 1.721521299533863, "learning_rate": 9.629798489433586e-06, "loss": 0.7817, "step": 4880 }, { "epoch": 0.1495954394998161, "grad_norm": 1.8100472700162746, "learning_rate": 9.629611045916985e-06, "loss": 0.701, "step": 4881 }, { "epoch": 0.1496260880225573, "grad_norm": 1.8314306673743885, "learning_rate": 9.629423556783641e-06, "loss": 0.7514, "step": 4882 }, { "epoch": 0.1496567365452985, "grad_norm": 2.055279193121491, "learning_rate": 9.629236022035404e-06, "loss": 0.8103, "step": 4883 }, { "epoch": 0.14968738506803972, "grad_norm": 1.977395604693014, "learning_rate": 9.629048441674117e-06, "loss": 0.8266, "step": 4884 }, { "epoch": 0.14971803359078092, "grad_norm": 1.8036209607269873, "learning_rate": 9.628860815701635e-06, "loss": 0.7066, "step": 4885 }, { "epoch": 0.14974868211352213, "grad_norm": 1.7478338207587072, "learning_rate": 9.6286731441198e-06, "loss": 0.6503, "step": 4886 }, { "epoch": 0.14977933063626334, "grad_norm": 1.8739826243521163, "learning_rate": 9.628485426930465e-06, "loss": 0.6825, "step": 4887 }, { "epoch": 0.14980997915900454, "grad_norm": 2.0355017837395613, "learning_rate": 9.62829766413548e-06, "loss": 0.761, "step": 4888 }, { "epoch": 0.14984062768174575, "grad_norm": 1.8951397969930073, "learning_rate": 9.628109855736692e-06, "loss": 0.7069, "step": 4889 }, { "epoch": 0.14987127620448695, "grad_norm": 1.9294217432988616, "learning_rate": 9.627922001735955e-06, "loss": 0.7811, "step": 4890 }, { "epoch": 0.14990192472722816, "grad_norm": 1.8617332373504707, "learning_rate": 9.627734102135118e-06, "loss": 0.7637, "step": 4891 }, { "epoch": 0.14993257324996936, "grad_norm": 1.83388729154305, "learning_rate": 9.627546156936033e-06, "loss": 0.7728, "step": 4892 }, { "epoch": 0.14996322177271054, "grad_norm": 2.0560835009524845, "learning_rate": 9.627358166140551e-06, "loss": 0.7407, "step": 4893 }, { "epoch": 0.14999387029545175, "grad_norm": 2.038576146385384, "learning_rate": 9.627170129750526e-06, "loss": 0.7549, "step": 4894 }, { "epoch": 0.15002451881819295, "grad_norm": 1.8236780781875452, "learning_rate": 9.626982047767808e-06, "loss": 0.8084, "step": 4895 }, { "epoch": 0.15005516734093416, "grad_norm": 1.6499985019312138, "learning_rate": 9.626793920194254e-06, "loss": 0.6311, "step": 4896 }, { "epoch": 0.15008581586367536, "grad_norm": 1.7855228089360686, "learning_rate": 9.626605747031715e-06, "loss": 0.7185, "step": 4897 }, { "epoch": 0.15011646438641657, "grad_norm": 1.791978070534017, "learning_rate": 9.626417528282047e-06, "loss": 0.7455, "step": 4898 }, { "epoch": 0.15014711290915778, "grad_norm": 1.6695827216113124, "learning_rate": 9.6262292639471e-06, "loss": 0.7015, "step": 4899 }, { "epoch": 0.15017776143189898, "grad_norm": 1.7975564535084028, "learning_rate": 9.626040954028735e-06, "loss": 0.6519, "step": 4900 }, { "epoch": 0.1502084099546402, "grad_norm": 1.9394062485437105, "learning_rate": 9.625852598528804e-06, "loss": 0.7786, "step": 4901 }, { "epoch": 0.1502390584773814, "grad_norm": 1.1238128606665363, "learning_rate": 9.625664197449165e-06, "loss": 0.5016, "step": 4902 }, { "epoch": 0.1502697070001226, "grad_norm": 1.9586184954369323, "learning_rate": 9.625475750791672e-06, "loss": 0.7088, "step": 4903 }, { "epoch": 0.1503003555228638, "grad_norm": 0.8744025433851047, "learning_rate": 9.625287258558183e-06, "loss": 0.4971, "step": 4904 }, { "epoch": 0.150331004045605, "grad_norm": 2.1568483095690594, "learning_rate": 9.625098720750557e-06, "loss": 0.6989, "step": 4905 }, { "epoch": 0.15036165256834622, "grad_norm": 1.8578425456087218, "learning_rate": 9.624910137370647e-06, "loss": 0.7575, "step": 4906 }, { "epoch": 0.15039230109108742, "grad_norm": 1.5424188464438946, "learning_rate": 9.624721508420316e-06, "loss": 0.6929, "step": 4907 }, { "epoch": 0.15042294961382863, "grad_norm": 1.9509540527541973, "learning_rate": 9.624532833901419e-06, "loss": 0.7221, "step": 4908 }, { "epoch": 0.1504535981365698, "grad_norm": 1.9249011243763086, "learning_rate": 9.624344113815818e-06, "loss": 0.7648, "step": 4909 }, { "epoch": 0.150484246659311, "grad_norm": 2.022911545483853, "learning_rate": 9.624155348165372e-06, "loss": 0.7334, "step": 4910 }, { "epoch": 0.15051489518205222, "grad_norm": 1.793305676802103, "learning_rate": 9.623966536951939e-06, "loss": 0.7977, "step": 4911 }, { "epoch": 0.15054554370479342, "grad_norm": 1.861403449538561, "learning_rate": 9.62377768017738e-06, "loss": 0.7475, "step": 4912 }, { "epoch": 0.15057619222753463, "grad_norm": 1.8057001135184148, "learning_rate": 9.623588777843558e-06, "loss": 0.7525, "step": 4913 }, { "epoch": 0.15060684075027583, "grad_norm": 2.043706573037171, "learning_rate": 9.623399829952332e-06, "loss": 0.6812, "step": 4914 }, { "epoch": 0.15063748927301704, "grad_norm": 1.943667825206583, "learning_rate": 9.623210836505565e-06, "loss": 0.783, "step": 4915 }, { "epoch": 0.15066813779575824, "grad_norm": 1.9006193892338594, "learning_rate": 9.623021797505118e-06, "loss": 0.7987, "step": 4916 }, { "epoch": 0.15069878631849945, "grad_norm": 1.902716001891339, "learning_rate": 9.622832712952856e-06, "loss": 0.6842, "step": 4917 }, { "epoch": 0.15072943484124066, "grad_norm": 1.810981515079503, "learning_rate": 9.62264358285064e-06, "loss": 0.8996, "step": 4918 }, { "epoch": 0.15076008336398186, "grad_norm": 1.969168384562869, "learning_rate": 9.622454407200333e-06, "loss": 0.7667, "step": 4919 }, { "epoch": 0.15079073188672307, "grad_norm": 1.8880485803433655, "learning_rate": 9.622265186003799e-06, "loss": 0.7513, "step": 4920 }, { "epoch": 0.15082138040946427, "grad_norm": 2.1642767012186113, "learning_rate": 9.622075919262905e-06, "loss": 0.727, "step": 4921 }, { "epoch": 0.15085202893220548, "grad_norm": 1.8801012400320958, "learning_rate": 9.621886606979514e-06, "loss": 0.7699, "step": 4922 }, { "epoch": 0.15088267745494668, "grad_norm": 1.2651197864852899, "learning_rate": 9.621697249155493e-06, "loss": 0.5023, "step": 4923 }, { "epoch": 0.15091332597768786, "grad_norm": 1.1046027330214496, "learning_rate": 9.621507845792705e-06, "loss": 0.5008, "step": 4924 }, { "epoch": 0.15094397450042907, "grad_norm": 1.9148735565662414, "learning_rate": 9.621318396893018e-06, "loss": 0.778, "step": 4925 }, { "epoch": 0.15097462302317027, "grad_norm": 1.8766240349604522, "learning_rate": 9.621128902458299e-06, "loss": 0.8087, "step": 4926 }, { "epoch": 0.15100527154591148, "grad_norm": 2.0876955097989662, "learning_rate": 9.620939362490414e-06, "loss": 0.8235, "step": 4927 }, { "epoch": 0.15103592006865268, "grad_norm": 1.060933283149895, "learning_rate": 9.62074977699123e-06, "loss": 0.4924, "step": 4928 }, { "epoch": 0.1510665685913939, "grad_norm": 1.1538218052813587, "learning_rate": 9.620560145962618e-06, "loss": 0.5123, "step": 4929 }, { "epoch": 0.1510972171141351, "grad_norm": 1.8562087045901994, "learning_rate": 9.620370469406443e-06, "loss": 0.7233, "step": 4930 }, { "epoch": 0.1511278656368763, "grad_norm": 2.3690880357900905, "learning_rate": 9.620180747324577e-06, "loss": 0.6634, "step": 4931 }, { "epoch": 0.1511585141596175, "grad_norm": 1.8311699943146824, "learning_rate": 9.619990979718889e-06, "loss": 0.736, "step": 4932 }, { "epoch": 0.1511891626823587, "grad_norm": 1.8926710001422808, "learning_rate": 9.619801166591247e-06, "loss": 0.8137, "step": 4933 }, { "epoch": 0.15121981120509992, "grad_norm": 1.5512956037743426, "learning_rate": 9.61961130794352e-06, "loss": 0.6633, "step": 4934 }, { "epoch": 0.15125045972784112, "grad_norm": 2.0010402321968472, "learning_rate": 9.619421403777583e-06, "loss": 0.8445, "step": 4935 }, { "epoch": 0.15128110825058233, "grad_norm": 1.9266469595640987, "learning_rate": 9.619231454095304e-06, "loss": 0.7361, "step": 4936 }, { "epoch": 0.15131175677332354, "grad_norm": 1.6238897006349466, "learning_rate": 9.619041458898557e-06, "loss": 0.6521, "step": 4937 }, { "epoch": 0.15134240529606474, "grad_norm": 1.2626966426136528, "learning_rate": 9.61885141818921e-06, "loss": 0.5123, "step": 4938 }, { "epoch": 0.15137305381880595, "grad_norm": 1.0829647220586824, "learning_rate": 9.61866133196914e-06, "loss": 0.4999, "step": 4939 }, { "epoch": 0.15140370234154713, "grad_norm": 1.8359877407829717, "learning_rate": 9.618471200240219e-06, "loss": 0.7085, "step": 4940 }, { "epoch": 0.15143435086428833, "grad_norm": 1.716696361165358, "learning_rate": 9.618281023004318e-06, "loss": 0.7998, "step": 4941 }, { "epoch": 0.15146499938702954, "grad_norm": 1.820644811791315, "learning_rate": 9.618090800263313e-06, "loss": 0.816, "step": 4942 }, { "epoch": 0.15149564790977074, "grad_norm": 2.05889173450834, "learning_rate": 9.617900532019078e-06, "loss": 0.7662, "step": 4943 }, { "epoch": 0.15152629643251195, "grad_norm": 1.806752422244448, "learning_rate": 9.617710218273486e-06, "loss": 0.766, "step": 4944 }, { "epoch": 0.15155694495525315, "grad_norm": 1.9610818530044487, "learning_rate": 9.617519859028415e-06, "loss": 0.6777, "step": 4945 }, { "epoch": 0.15158759347799436, "grad_norm": 1.9727728561501796, "learning_rate": 9.61732945428574e-06, "loss": 0.7507, "step": 4946 }, { "epoch": 0.15161824200073556, "grad_norm": 2.010934893245397, "learning_rate": 9.617139004047335e-06, "loss": 0.7741, "step": 4947 }, { "epoch": 0.15164889052347677, "grad_norm": 1.8310095389517476, "learning_rate": 9.61694850831508e-06, "loss": 0.8402, "step": 4948 }, { "epoch": 0.15167953904621798, "grad_norm": 1.9514450249192663, "learning_rate": 9.616757967090848e-06, "loss": 0.7399, "step": 4949 }, { "epoch": 0.15171018756895918, "grad_norm": 1.8271939613741288, "learning_rate": 9.61656738037652e-06, "loss": 0.7766, "step": 4950 }, { "epoch": 0.1517408360917004, "grad_norm": 1.6622455548750146, "learning_rate": 9.616376748173973e-06, "loss": 0.5035, "step": 4951 }, { "epoch": 0.1517714846144416, "grad_norm": 1.857851798974954, "learning_rate": 9.616186070485082e-06, "loss": 0.7307, "step": 4952 }, { "epoch": 0.1518021331371828, "grad_norm": 2.358927792029898, "learning_rate": 9.61599534731173e-06, "loss": 0.7406, "step": 4953 }, { "epoch": 0.151832781659924, "grad_norm": 0.9845846523551288, "learning_rate": 9.615804578655796e-06, "loss": 0.511, "step": 4954 }, { "epoch": 0.15186343018266518, "grad_norm": 2.027406899280921, "learning_rate": 9.615613764519155e-06, "loss": 0.7844, "step": 4955 }, { "epoch": 0.1518940787054064, "grad_norm": 1.9620824993404762, "learning_rate": 9.615422904903695e-06, "loss": 0.7796, "step": 4956 }, { "epoch": 0.1519247272281476, "grad_norm": 2.102523991015329, "learning_rate": 9.61523199981129e-06, "loss": 0.8417, "step": 4957 }, { "epoch": 0.1519553757508888, "grad_norm": 1.702552194195034, "learning_rate": 9.615041049243825e-06, "loss": 0.6837, "step": 4958 }, { "epoch": 0.15198602427363, "grad_norm": 2.0003801509956594, "learning_rate": 9.614850053203178e-06, "loss": 0.7611, "step": 4959 }, { "epoch": 0.1520166727963712, "grad_norm": 1.9898306162227992, "learning_rate": 9.614659011691232e-06, "loss": 0.7447, "step": 4960 }, { "epoch": 0.15204732131911242, "grad_norm": 1.885675763553351, "learning_rate": 9.61446792470987e-06, "loss": 0.7258, "step": 4961 }, { "epoch": 0.15207796984185362, "grad_norm": 2.0727705193722317, "learning_rate": 9.614276792260978e-06, "loss": 0.6849, "step": 4962 }, { "epoch": 0.15210861836459483, "grad_norm": 1.6471528035201624, "learning_rate": 9.614085614346433e-06, "loss": 0.7236, "step": 4963 }, { "epoch": 0.15213926688733603, "grad_norm": 1.3254722447343434, "learning_rate": 9.613894390968121e-06, "loss": 0.5226, "step": 4964 }, { "epoch": 0.15216991541007724, "grad_norm": 2.1050857952199915, "learning_rate": 9.61370312212793e-06, "loss": 0.7965, "step": 4965 }, { "epoch": 0.15220056393281844, "grad_norm": 1.8649773394980012, "learning_rate": 9.61351180782774e-06, "loss": 0.8079, "step": 4966 }, { "epoch": 0.15223121245555965, "grad_norm": 0.9215812296264748, "learning_rate": 9.61332044806944e-06, "loss": 0.5065, "step": 4967 }, { "epoch": 0.15226186097830086, "grad_norm": 1.8785560774864332, "learning_rate": 9.61312904285491e-06, "loss": 0.7555, "step": 4968 }, { "epoch": 0.15229250950104206, "grad_norm": 1.7542519065394, "learning_rate": 9.612937592186041e-06, "loss": 0.6362, "step": 4969 }, { "epoch": 0.15232315802378327, "grad_norm": 2.0284587943750148, "learning_rate": 9.612746096064718e-06, "loss": 0.679, "step": 4970 }, { "epoch": 0.15235380654652445, "grad_norm": 0.9435527801406817, "learning_rate": 9.612554554492825e-06, "loss": 0.502, "step": 4971 }, { "epoch": 0.15238445506926565, "grad_norm": 1.7330113220106413, "learning_rate": 9.612362967472254e-06, "loss": 0.7655, "step": 4972 }, { "epoch": 0.15241510359200686, "grad_norm": 0.8750767653185756, "learning_rate": 9.612171335004892e-06, "loss": 0.4878, "step": 4973 }, { "epoch": 0.15244575211474806, "grad_norm": 2.1891459640823676, "learning_rate": 9.611979657092622e-06, "loss": 0.8293, "step": 4974 }, { "epoch": 0.15247640063748927, "grad_norm": 1.8300687620442613, "learning_rate": 9.61178793373734e-06, "loss": 0.7736, "step": 4975 }, { "epoch": 0.15250704916023047, "grad_norm": 0.8540374587367847, "learning_rate": 9.611596164940929e-06, "loss": 0.5196, "step": 4976 }, { "epoch": 0.15253769768297168, "grad_norm": 2.120033881137624, "learning_rate": 9.611404350705283e-06, "loss": 0.7305, "step": 4977 }, { "epoch": 0.15256834620571288, "grad_norm": 1.9359405557341316, "learning_rate": 9.611212491032289e-06, "loss": 0.7215, "step": 4978 }, { "epoch": 0.1525989947284541, "grad_norm": 1.859049912922564, "learning_rate": 9.611020585923838e-06, "loss": 0.7804, "step": 4979 }, { "epoch": 0.1526296432511953, "grad_norm": 1.9416806444536179, "learning_rate": 9.610828635381822e-06, "loss": 0.7971, "step": 4980 }, { "epoch": 0.1526602917739365, "grad_norm": 1.741337793273298, "learning_rate": 9.610636639408132e-06, "loss": 0.7215, "step": 4981 }, { "epoch": 0.1526909402966777, "grad_norm": 1.7724356162366306, "learning_rate": 9.610444598004658e-06, "loss": 0.7426, "step": 4982 }, { "epoch": 0.1527215888194189, "grad_norm": 0.9943146411569626, "learning_rate": 9.610252511173297e-06, "loss": 0.5268, "step": 4983 }, { "epoch": 0.15275223734216012, "grad_norm": 2.0508957218730193, "learning_rate": 9.610060378915935e-06, "loss": 0.6606, "step": 4984 }, { "epoch": 0.15278288586490132, "grad_norm": 2.486130574711528, "learning_rate": 9.60986820123447e-06, "loss": 0.729, "step": 4985 }, { "epoch": 0.1528135343876425, "grad_norm": 1.9631669533824776, "learning_rate": 9.609675978130795e-06, "loss": 0.7487, "step": 4986 }, { "epoch": 0.1528441829103837, "grad_norm": 1.9772670843178737, "learning_rate": 9.609483709606802e-06, "loss": 0.8225, "step": 4987 }, { "epoch": 0.1528748314331249, "grad_norm": 1.9347426073861826, "learning_rate": 9.609291395664387e-06, "loss": 0.731, "step": 4988 }, { "epoch": 0.15290547995586612, "grad_norm": 1.8844134184237011, "learning_rate": 9.609099036305443e-06, "loss": 0.8138, "step": 4989 }, { "epoch": 0.15293612847860732, "grad_norm": 1.911166342382071, "learning_rate": 9.608906631531869e-06, "loss": 0.6511, "step": 4990 }, { "epoch": 0.15296677700134853, "grad_norm": 1.857424655042543, "learning_rate": 9.608714181345558e-06, "loss": 0.779, "step": 4991 }, { "epoch": 0.15299742552408974, "grad_norm": 2.0463717559299424, "learning_rate": 9.608521685748406e-06, "loss": 0.9343, "step": 4992 }, { "epoch": 0.15302807404683094, "grad_norm": 1.748026707961763, "learning_rate": 9.608329144742312e-06, "loss": 0.7167, "step": 4993 }, { "epoch": 0.15305872256957215, "grad_norm": 1.7983359403085313, "learning_rate": 9.608136558329172e-06, "loss": 0.8008, "step": 4994 }, { "epoch": 0.15308937109231335, "grad_norm": 0.9282221979478622, "learning_rate": 9.607943926510883e-06, "loss": 0.5139, "step": 4995 }, { "epoch": 0.15312001961505456, "grad_norm": 1.7064415312822678, "learning_rate": 9.607751249289342e-06, "loss": 0.8497, "step": 4996 }, { "epoch": 0.15315066813779576, "grad_norm": 1.7859972059577651, "learning_rate": 9.607558526666451e-06, "loss": 0.7237, "step": 4997 }, { "epoch": 0.15318131666053697, "grad_norm": 1.747965821508336, "learning_rate": 9.607365758644107e-06, "loss": 0.6625, "step": 4998 }, { "epoch": 0.15321196518327818, "grad_norm": 1.6928933720057302, "learning_rate": 9.607172945224208e-06, "loss": 0.641, "step": 4999 }, { "epoch": 0.15324261370601938, "grad_norm": 2.559162463398214, "learning_rate": 9.606980086408657e-06, "loss": 0.7298, "step": 5000 }, { "epoch": 0.1532732622287606, "grad_norm": 1.9823396237492439, "learning_rate": 9.60678718219935e-06, "loss": 0.7669, "step": 5001 }, { "epoch": 0.15330391075150177, "grad_norm": 1.8028221231128476, "learning_rate": 9.606594232598192e-06, "loss": 0.7954, "step": 5002 }, { "epoch": 0.15333455927424297, "grad_norm": 1.7564974370508564, "learning_rate": 9.606401237607081e-06, "loss": 0.7458, "step": 5003 }, { "epoch": 0.15336520779698418, "grad_norm": 1.8905490084242071, "learning_rate": 9.606208197227922e-06, "loss": 0.8244, "step": 5004 }, { "epoch": 0.15339585631972538, "grad_norm": 0.9778481255490409, "learning_rate": 9.606015111462614e-06, "loss": 0.5137, "step": 5005 }, { "epoch": 0.1534265048424666, "grad_norm": 1.7698628504608398, "learning_rate": 9.605821980313061e-06, "loss": 0.7883, "step": 5006 }, { "epoch": 0.1534571533652078, "grad_norm": 1.9232486742355663, "learning_rate": 9.605628803781165e-06, "loss": 0.796, "step": 5007 }, { "epoch": 0.153487801887949, "grad_norm": 1.9588176604012892, "learning_rate": 9.60543558186883e-06, "loss": 0.6426, "step": 5008 }, { "epoch": 0.1535184504106902, "grad_norm": 2.5059429848920294, "learning_rate": 9.605242314577961e-06, "loss": 0.7323, "step": 5009 }, { "epoch": 0.1535490989334314, "grad_norm": 1.8906135740951564, "learning_rate": 9.605049001910458e-06, "loss": 0.8668, "step": 5010 }, { "epoch": 0.15357974745617262, "grad_norm": 1.8712167907372284, "learning_rate": 9.604855643868231e-06, "loss": 0.7903, "step": 5011 }, { "epoch": 0.15361039597891382, "grad_norm": 1.8795116596153976, "learning_rate": 9.604662240453185e-06, "loss": 0.8012, "step": 5012 }, { "epoch": 0.15364104450165503, "grad_norm": 1.902521943900373, "learning_rate": 9.604468791667221e-06, "loss": 0.7296, "step": 5013 }, { "epoch": 0.15367169302439623, "grad_norm": 1.850874391100053, "learning_rate": 9.60427529751225e-06, "loss": 0.6674, "step": 5014 }, { "epoch": 0.15370234154713744, "grad_norm": 1.6082597273118906, "learning_rate": 9.604081757990175e-06, "loss": 0.773, "step": 5015 }, { "epoch": 0.15373299006987864, "grad_norm": 2.0137017323957265, "learning_rate": 9.603888173102904e-06, "loss": 0.7318, "step": 5016 }, { "epoch": 0.15376363859261982, "grad_norm": 1.9672086178507375, "learning_rate": 9.603694542852346e-06, "loss": 0.809, "step": 5017 }, { "epoch": 0.15379428711536103, "grad_norm": 1.9777378783535216, "learning_rate": 9.603500867240408e-06, "loss": 0.8054, "step": 5018 }, { "epoch": 0.15382493563810223, "grad_norm": 1.7041048745384892, "learning_rate": 9.603307146268998e-06, "loss": 0.7276, "step": 5019 }, { "epoch": 0.15385558416084344, "grad_norm": 2.1913129895473564, "learning_rate": 9.603113379940024e-06, "loss": 0.7248, "step": 5020 }, { "epoch": 0.15388623268358465, "grad_norm": 1.694984739947166, "learning_rate": 9.602919568255396e-06, "loss": 0.6593, "step": 5021 }, { "epoch": 0.15391688120632585, "grad_norm": 1.9553571639928207, "learning_rate": 9.602725711217025e-06, "loss": 0.7268, "step": 5022 }, { "epoch": 0.15394752972906706, "grad_norm": 2.084110222850507, "learning_rate": 9.60253180882682e-06, "loss": 0.9045, "step": 5023 }, { "epoch": 0.15397817825180826, "grad_norm": 1.8205497777438984, "learning_rate": 9.60233786108669e-06, "loss": 0.7484, "step": 5024 }, { "epoch": 0.15400882677454947, "grad_norm": 1.8317679413711365, "learning_rate": 9.602143867998548e-06, "loss": 0.738, "step": 5025 }, { "epoch": 0.15403947529729067, "grad_norm": 2.3360845061608906, "learning_rate": 9.601949829564305e-06, "loss": 0.8491, "step": 5026 }, { "epoch": 0.15407012382003188, "grad_norm": 1.8206608756981533, "learning_rate": 9.601755745785873e-06, "loss": 0.7342, "step": 5027 }, { "epoch": 0.15410077234277308, "grad_norm": 1.7960065328139503, "learning_rate": 9.601561616665164e-06, "loss": 0.7772, "step": 5028 }, { "epoch": 0.1541314208655143, "grad_norm": 1.9156586129654503, "learning_rate": 9.601367442204093e-06, "loss": 0.7148, "step": 5029 }, { "epoch": 0.1541620693882555, "grad_norm": 1.7955958157434158, "learning_rate": 9.601173222404568e-06, "loss": 0.6924, "step": 5030 }, { "epoch": 0.1541927179109967, "grad_norm": 0.9926660541108828, "learning_rate": 9.600978957268508e-06, "loss": 0.4953, "step": 5031 }, { "epoch": 0.1542233664337379, "grad_norm": 1.869260423948571, "learning_rate": 9.600784646797825e-06, "loss": 0.8095, "step": 5032 }, { "epoch": 0.15425401495647909, "grad_norm": 1.8736199442154888, "learning_rate": 9.600590290994434e-06, "loss": 0.8104, "step": 5033 }, { "epoch": 0.1542846634792203, "grad_norm": 1.8882361163227945, "learning_rate": 9.600395889860248e-06, "loss": 0.7156, "step": 5034 }, { "epoch": 0.1543153120019615, "grad_norm": 1.8564231524361279, "learning_rate": 9.600201443397185e-06, "loss": 0.8155, "step": 5035 }, { "epoch": 0.1543459605247027, "grad_norm": 2.2277225106167675, "learning_rate": 9.600006951607162e-06, "loss": 0.7388, "step": 5036 }, { "epoch": 0.1543766090474439, "grad_norm": 1.8850902787197519, "learning_rate": 9.599812414492092e-06, "loss": 0.7347, "step": 5037 }, { "epoch": 0.1544072575701851, "grad_norm": 2.172666331564381, "learning_rate": 9.599617832053893e-06, "loss": 0.6994, "step": 5038 }, { "epoch": 0.15443790609292632, "grad_norm": 1.7873216627739106, "learning_rate": 9.599423204294484e-06, "loss": 0.7906, "step": 5039 }, { "epoch": 0.15446855461566752, "grad_norm": 1.7649473111261356, "learning_rate": 9.599228531215779e-06, "loss": 0.7365, "step": 5040 }, { "epoch": 0.15449920313840873, "grad_norm": 1.882598349276251, "learning_rate": 9.5990338128197e-06, "loss": 0.6814, "step": 5041 }, { "epoch": 0.15452985166114994, "grad_norm": 1.8005367236312326, "learning_rate": 9.598839049108164e-06, "loss": 0.7766, "step": 5042 }, { "epoch": 0.15456050018389114, "grad_norm": 1.8676300143341749, "learning_rate": 9.59864424008309e-06, "loss": 0.8083, "step": 5043 }, { "epoch": 0.15459114870663235, "grad_norm": 0.8933753121276299, "learning_rate": 9.598449385746399e-06, "loss": 0.5202, "step": 5044 }, { "epoch": 0.15462179722937355, "grad_norm": 1.9288547771502222, "learning_rate": 9.59825448610001e-06, "loss": 0.7729, "step": 5045 }, { "epoch": 0.15465244575211476, "grad_norm": 1.9835557833099156, "learning_rate": 9.598059541145841e-06, "loss": 0.7543, "step": 5046 }, { "epoch": 0.15468309427485596, "grad_norm": 1.6393293340864799, "learning_rate": 9.597864550885816e-06, "loss": 0.7848, "step": 5047 }, { "epoch": 0.15471374279759714, "grad_norm": 1.9581320970023075, "learning_rate": 9.597669515321853e-06, "loss": 0.8559, "step": 5048 }, { "epoch": 0.15474439132033835, "grad_norm": 1.8592104119709134, "learning_rate": 9.597474434455878e-06, "loss": 0.774, "step": 5049 }, { "epoch": 0.15477503984307955, "grad_norm": 0.8450053069918761, "learning_rate": 9.597279308289811e-06, "loss": 0.5318, "step": 5050 }, { "epoch": 0.15480568836582076, "grad_norm": 1.700358065853225, "learning_rate": 9.597084136825573e-06, "loss": 0.7333, "step": 5051 }, { "epoch": 0.15483633688856197, "grad_norm": 1.8457705053108413, "learning_rate": 9.59688892006509e-06, "loss": 0.7991, "step": 5052 }, { "epoch": 0.15486698541130317, "grad_norm": 1.8801634908376847, "learning_rate": 9.596693658010286e-06, "loss": 0.7468, "step": 5053 }, { "epoch": 0.15489763393404438, "grad_norm": 1.9370980308496963, "learning_rate": 9.596498350663082e-06, "loss": 0.7887, "step": 5054 }, { "epoch": 0.15492828245678558, "grad_norm": 2.095288059168411, "learning_rate": 9.596302998025403e-06, "loss": 0.6768, "step": 5055 }, { "epoch": 0.1549589309795268, "grad_norm": 1.9986292262217749, "learning_rate": 9.596107600099176e-06, "loss": 0.7365, "step": 5056 }, { "epoch": 0.154989579502268, "grad_norm": 1.821893691405891, "learning_rate": 9.595912156886323e-06, "loss": 0.8031, "step": 5057 }, { "epoch": 0.1550202280250092, "grad_norm": 1.8349222451562888, "learning_rate": 9.595716668388773e-06, "loss": 0.6913, "step": 5058 }, { "epoch": 0.1550508765477504, "grad_norm": 1.70321420403657, "learning_rate": 9.59552113460845e-06, "loss": 0.7822, "step": 5059 }, { "epoch": 0.1550815250704916, "grad_norm": 1.9096561931442857, "learning_rate": 9.595325555547281e-06, "loss": 0.6968, "step": 5060 }, { "epoch": 0.15511217359323282, "grad_norm": 0.898474363234282, "learning_rate": 9.595129931207194e-06, "loss": 0.5164, "step": 5061 }, { "epoch": 0.15514282211597402, "grad_norm": 1.73024570630405, "learning_rate": 9.594934261590117e-06, "loss": 0.6985, "step": 5062 }, { "epoch": 0.15517347063871523, "grad_norm": 0.8650357337815794, "learning_rate": 9.594738546697977e-06, "loss": 0.5046, "step": 5063 }, { "epoch": 0.1552041191614564, "grad_norm": 0.8277615999566638, "learning_rate": 9.594542786532702e-06, "loss": 0.4978, "step": 5064 }, { "epoch": 0.1552347676841976, "grad_norm": 1.8918084223596983, "learning_rate": 9.594346981096221e-06, "loss": 0.8306, "step": 5065 }, { "epoch": 0.15526541620693882, "grad_norm": 1.9124935864487576, "learning_rate": 9.594151130390463e-06, "loss": 0.7203, "step": 5066 }, { "epoch": 0.15529606472968002, "grad_norm": 0.8783548540608576, "learning_rate": 9.593955234417361e-06, "loss": 0.5293, "step": 5067 }, { "epoch": 0.15532671325242123, "grad_norm": 1.8399394948497365, "learning_rate": 9.593759293178839e-06, "loss": 0.7489, "step": 5068 }, { "epoch": 0.15535736177516243, "grad_norm": 1.7328212241977998, "learning_rate": 9.593563306676835e-06, "loss": 0.6969, "step": 5069 }, { "epoch": 0.15538801029790364, "grad_norm": 2.0916824427488443, "learning_rate": 9.593367274913274e-06, "loss": 0.8496, "step": 5070 }, { "epoch": 0.15541865882064484, "grad_norm": 1.697821301618154, "learning_rate": 9.59317119789009e-06, "loss": 0.7489, "step": 5071 }, { "epoch": 0.15544930734338605, "grad_norm": 1.8111843028782069, "learning_rate": 9.592975075609216e-06, "loss": 0.7393, "step": 5072 }, { "epoch": 0.15547995586612726, "grad_norm": 1.5374956618027378, "learning_rate": 9.592778908072583e-06, "loss": 0.6875, "step": 5073 }, { "epoch": 0.15551060438886846, "grad_norm": 1.8114345962831297, "learning_rate": 9.592582695282124e-06, "loss": 0.8158, "step": 5074 }, { "epoch": 0.15554125291160967, "grad_norm": 1.9911900070046253, "learning_rate": 9.592386437239773e-06, "loss": 0.629, "step": 5075 }, { "epoch": 0.15557190143435087, "grad_norm": 0.9916124610203599, "learning_rate": 9.592190133947465e-06, "loss": 0.5325, "step": 5076 }, { "epoch": 0.15560254995709208, "grad_norm": 2.0988693938367913, "learning_rate": 9.59199378540713e-06, "loss": 0.7624, "step": 5077 }, { "epoch": 0.15563319847983328, "grad_norm": 1.6916139598888045, "learning_rate": 9.591797391620708e-06, "loss": 0.6509, "step": 5078 }, { "epoch": 0.15566384700257446, "grad_norm": 1.7727003229326794, "learning_rate": 9.591600952590129e-06, "loss": 0.6526, "step": 5079 }, { "epoch": 0.15569449552531567, "grad_norm": 1.4229197738967037, "learning_rate": 9.59140446831733e-06, "loss": 0.5015, "step": 5080 }, { "epoch": 0.15572514404805687, "grad_norm": 1.9760648141308779, "learning_rate": 9.591207938804252e-06, "loss": 0.8086, "step": 5081 }, { "epoch": 0.15575579257079808, "grad_norm": 1.6272913497989536, "learning_rate": 9.591011364052825e-06, "loss": 0.7596, "step": 5082 }, { "epoch": 0.15578644109353929, "grad_norm": 1.7987948912162233, "learning_rate": 9.59081474406499e-06, "loss": 0.696, "step": 5083 }, { "epoch": 0.1558170896162805, "grad_norm": 1.827964707615435, "learning_rate": 9.590618078842679e-06, "loss": 0.7857, "step": 5084 }, { "epoch": 0.1558477381390217, "grad_norm": 1.789233427938964, "learning_rate": 9.590421368387837e-06, "loss": 0.7254, "step": 5085 }, { "epoch": 0.1558783866617629, "grad_norm": 1.8169096656651034, "learning_rate": 9.590224612702398e-06, "loss": 0.7358, "step": 5086 }, { "epoch": 0.1559090351845041, "grad_norm": 1.850002736986028, "learning_rate": 9.590027811788301e-06, "loss": 0.8665, "step": 5087 }, { "epoch": 0.1559396837072453, "grad_norm": 1.6841708766387864, "learning_rate": 9.589830965647487e-06, "loss": 0.7547, "step": 5088 }, { "epoch": 0.15597033222998652, "grad_norm": 1.8902917368364884, "learning_rate": 9.589634074281891e-06, "loss": 0.869, "step": 5089 }, { "epoch": 0.15600098075272772, "grad_norm": 1.0317472094022113, "learning_rate": 9.589437137693459e-06, "loss": 0.5157, "step": 5090 }, { "epoch": 0.15603162927546893, "grad_norm": 1.7483247691041017, "learning_rate": 9.589240155884128e-06, "loss": 0.7192, "step": 5091 }, { "epoch": 0.15606227779821014, "grad_norm": 1.7113885344873239, "learning_rate": 9.589043128855838e-06, "loss": 0.7177, "step": 5092 }, { "epoch": 0.15609292632095134, "grad_norm": 1.7187625803699333, "learning_rate": 9.588846056610533e-06, "loss": 0.7844, "step": 5093 }, { "epoch": 0.15612357484369255, "grad_norm": 1.7093701697730832, "learning_rate": 9.588648939150153e-06, "loss": 0.7083, "step": 5094 }, { "epoch": 0.15615422336643373, "grad_norm": 1.9755750104931336, "learning_rate": 9.588451776476643e-06, "loss": 0.8044, "step": 5095 }, { "epoch": 0.15618487188917493, "grad_norm": 0.8831459209650383, "learning_rate": 9.588254568591942e-06, "loss": 0.5172, "step": 5096 }, { "epoch": 0.15621552041191614, "grad_norm": 1.7603305191856293, "learning_rate": 9.588057315497995e-06, "loss": 0.7561, "step": 5097 }, { "epoch": 0.15624616893465734, "grad_norm": 1.7586976049834457, "learning_rate": 9.587860017196747e-06, "loss": 0.7298, "step": 5098 }, { "epoch": 0.15627681745739855, "grad_norm": 1.6965168590618427, "learning_rate": 9.587662673690137e-06, "loss": 0.5977, "step": 5099 }, { "epoch": 0.15630746598013975, "grad_norm": 1.8219887751221908, "learning_rate": 9.587465284980115e-06, "loss": 0.6792, "step": 5100 }, { "epoch": 0.15633811450288096, "grad_norm": 1.7716365035851085, "learning_rate": 9.587267851068624e-06, "loss": 0.6612, "step": 5101 }, { "epoch": 0.15636876302562217, "grad_norm": 2.110526822306012, "learning_rate": 9.587070371957608e-06, "loss": 0.7039, "step": 5102 }, { "epoch": 0.15639941154836337, "grad_norm": 1.9430997188437937, "learning_rate": 9.586872847649016e-06, "loss": 0.754, "step": 5103 }, { "epoch": 0.15643006007110458, "grad_norm": 1.870425736375899, "learning_rate": 9.58667527814479e-06, "loss": 0.8692, "step": 5104 }, { "epoch": 0.15646070859384578, "grad_norm": 1.8401177042762624, "learning_rate": 9.58647766344688e-06, "loss": 0.7296, "step": 5105 }, { "epoch": 0.156491357116587, "grad_norm": 1.9119662168182623, "learning_rate": 9.58628000355723e-06, "loss": 0.7314, "step": 5106 }, { "epoch": 0.1565220056393282, "grad_norm": 1.968428344052431, "learning_rate": 9.586082298477794e-06, "loss": 0.6272, "step": 5107 }, { "epoch": 0.1565526541620694, "grad_norm": 1.8313396531397326, "learning_rate": 9.585884548210513e-06, "loss": 0.6872, "step": 5108 }, { "epoch": 0.1565833026848106, "grad_norm": 1.0181517441881718, "learning_rate": 9.585686752757339e-06, "loss": 0.5116, "step": 5109 }, { "epoch": 0.15661395120755178, "grad_norm": 1.7221674184168783, "learning_rate": 9.58548891212022e-06, "loss": 0.9026, "step": 5110 }, { "epoch": 0.156644599730293, "grad_norm": 1.8111589430743487, "learning_rate": 9.585291026301105e-06, "loss": 0.8136, "step": 5111 }, { "epoch": 0.1566752482530342, "grad_norm": 1.7870403139349413, "learning_rate": 9.585093095301944e-06, "loss": 0.7837, "step": 5112 }, { "epoch": 0.1567058967757754, "grad_norm": 1.4777355090266222, "learning_rate": 9.584895119124688e-06, "loss": 0.6185, "step": 5113 }, { "epoch": 0.1567365452985166, "grad_norm": 1.7329681721827148, "learning_rate": 9.584697097771287e-06, "loss": 0.653, "step": 5114 }, { "epoch": 0.1567671938212578, "grad_norm": 1.8407558476668129, "learning_rate": 9.584499031243693e-06, "loss": 0.7807, "step": 5115 }, { "epoch": 0.15679784234399902, "grad_norm": 1.9698417993495616, "learning_rate": 9.584300919543856e-06, "loss": 0.7281, "step": 5116 }, { "epoch": 0.15682849086674022, "grad_norm": 1.5954792668322741, "learning_rate": 9.58410276267373e-06, "loss": 0.7202, "step": 5117 }, { "epoch": 0.15685913938948143, "grad_norm": 1.9156200788911737, "learning_rate": 9.583904560635267e-06, "loss": 0.8528, "step": 5118 }, { "epoch": 0.15688978791222263, "grad_norm": 0.981594511247581, "learning_rate": 9.583706313430418e-06, "loss": 0.5318, "step": 5119 }, { "epoch": 0.15692043643496384, "grad_norm": 2.5833646235387113, "learning_rate": 9.583508021061141e-06, "loss": 0.6451, "step": 5120 }, { "epoch": 0.15695108495770504, "grad_norm": 2.0317285771869416, "learning_rate": 9.583309683529384e-06, "loss": 0.8006, "step": 5121 }, { "epoch": 0.15698173348044625, "grad_norm": 1.8355131820098078, "learning_rate": 9.583111300837105e-06, "loss": 0.7651, "step": 5122 }, { "epoch": 0.15701238200318746, "grad_norm": 1.885492692384682, "learning_rate": 9.582912872986256e-06, "loss": 0.7983, "step": 5123 }, { "epoch": 0.15704303052592866, "grad_norm": 1.9670629896976872, "learning_rate": 9.582714399978796e-06, "loss": 0.7911, "step": 5124 }, { "epoch": 0.15707367904866987, "grad_norm": 1.7756485982147419, "learning_rate": 9.582515881816678e-06, "loss": 0.7933, "step": 5125 }, { "epoch": 0.15710432757141105, "grad_norm": 0.8623989997286543, "learning_rate": 9.582317318501859e-06, "loss": 0.5388, "step": 5126 }, { "epoch": 0.15713497609415225, "grad_norm": 1.999203164431094, "learning_rate": 9.582118710036293e-06, "loss": 0.6752, "step": 5127 }, { "epoch": 0.15716562461689346, "grad_norm": 1.804816836061846, "learning_rate": 9.58192005642194e-06, "loss": 0.6766, "step": 5128 }, { "epoch": 0.15719627313963466, "grad_norm": 0.81428512691434, "learning_rate": 9.581721357660757e-06, "loss": 0.4879, "step": 5129 }, { "epoch": 0.15722692166237587, "grad_norm": 1.622389324538969, "learning_rate": 9.581522613754702e-06, "loss": 0.5278, "step": 5130 }, { "epoch": 0.15725757018511707, "grad_norm": 0.768464865539869, "learning_rate": 9.58132382470573e-06, "loss": 0.4916, "step": 5131 }, { "epoch": 0.15728821870785828, "grad_norm": 1.7964402035675044, "learning_rate": 9.581124990515805e-06, "loss": 0.7336, "step": 5132 }, { "epoch": 0.15731886723059949, "grad_norm": 1.9380135334950317, "learning_rate": 9.58092611118688e-06, "loss": 0.7479, "step": 5133 }, { "epoch": 0.1573495157533407, "grad_norm": 1.8257303066577855, "learning_rate": 9.580727186720919e-06, "loss": 0.8102, "step": 5134 }, { "epoch": 0.1573801642760819, "grad_norm": 1.9039582670459556, "learning_rate": 9.580528217119882e-06, "loss": 0.7647, "step": 5135 }, { "epoch": 0.1574108127988231, "grad_norm": 1.6411048719196422, "learning_rate": 9.580329202385729e-06, "loss": 0.731, "step": 5136 }, { "epoch": 0.1574414613215643, "grad_norm": 2.1016600343895804, "learning_rate": 9.580130142520419e-06, "loss": 0.7654, "step": 5137 }, { "epoch": 0.1574721098443055, "grad_norm": 1.7432868374248809, "learning_rate": 9.579931037525915e-06, "loss": 0.6748, "step": 5138 }, { "epoch": 0.15750275836704672, "grad_norm": 1.6068917965083642, "learning_rate": 9.57973188740418e-06, "loss": 0.7557, "step": 5139 }, { "epoch": 0.15753340688978792, "grad_norm": 1.9844809145822924, "learning_rate": 9.579532692157174e-06, "loss": 0.7924, "step": 5140 }, { "epoch": 0.1575640554125291, "grad_norm": 1.77067728439259, "learning_rate": 9.57933345178686e-06, "loss": 0.8518, "step": 5141 }, { "epoch": 0.1575947039352703, "grad_norm": 1.816567274494751, "learning_rate": 9.579134166295203e-06, "loss": 0.751, "step": 5142 }, { "epoch": 0.15762535245801151, "grad_norm": 2.048672428683356, "learning_rate": 9.578934835684166e-06, "loss": 0.6409, "step": 5143 }, { "epoch": 0.15765600098075272, "grad_norm": 1.4953270398664988, "learning_rate": 9.57873545995571e-06, "loss": 0.6965, "step": 5144 }, { "epoch": 0.15768664950349393, "grad_norm": 1.7465775697767942, "learning_rate": 9.578536039111806e-06, "loss": 0.6926, "step": 5145 }, { "epoch": 0.15771729802623513, "grad_norm": 1.6845502784245825, "learning_rate": 9.578336573154411e-06, "loss": 0.7228, "step": 5146 }, { "epoch": 0.15774794654897634, "grad_norm": 1.8996823091411785, "learning_rate": 9.578137062085496e-06, "loss": 0.8652, "step": 5147 }, { "epoch": 0.15777859507171754, "grad_norm": 1.7485123849079585, "learning_rate": 9.577937505907024e-06, "loss": 0.6952, "step": 5148 }, { "epoch": 0.15780924359445875, "grad_norm": 1.711725358587465, "learning_rate": 9.577737904620963e-06, "loss": 0.7136, "step": 5149 }, { "epoch": 0.15783989211719995, "grad_norm": 1.7035182918301366, "learning_rate": 9.57753825822928e-06, "loss": 0.7487, "step": 5150 }, { "epoch": 0.15787054063994116, "grad_norm": 1.9589086225210777, "learning_rate": 9.57733856673394e-06, "loss": 0.7343, "step": 5151 }, { "epoch": 0.15790118916268236, "grad_norm": 1.1071186588933413, "learning_rate": 9.577138830136914e-06, "loss": 0.5099, "step": 5152 }, { "epoch": 0.15793183768542357, "grad_norm": 1.6262478571984178, "learning_rate": 9.576939048440167e-06, "loss": 0.7173, "step": 5153 }, { "epoch": 0.15796248620816478, "grad_norm": 1.965753813870681, "learning_rate": 9.576739221645669e-06, "loss": 0.7898, "step": 5154 }, { "epoch": 0.15799313473090598, "grad_norm": 2.0600993815500095, "learning_rate": 9.576539349755387e-06, "loss": 0.8077, "step": 5155 }, { "epoch": 0.1580237832536472, "grad_norm": 1.88933676987593, "learning_rate": 9.576339432771293e-06, "loss": 0.811, "step": 5156 }, { "epoch": 0.15805443177638837, "grad_norm": 1.9845059537048446, "learning_rate": 9.576139470695353e-06, "loss": 0.8165, "step": 5157 }, { "epoch": 0.15808508029912957, "grad_norm": 1.7896327695185645, "learning_rate": 9.575939463529544e-06, "loss": 0.7877, "step": 5158 }, { "epoch": 0.15811572882187078, "grad_norm": 1.8547976083637447, "learning_rate": 9.57573941127583e-06, "loss": 0.7137, "step": 5159 }, { "epoch": 0.15814637734461198, "grad_norm": 1.7705575200042662, "learning_rate": 9.575539313936186e-06, "loss": 0.6463, "step": 5160 }, { "epoch": 0.1581770258673532, "grad_norm": 1.948829150856235, "learning_rate": 9.575339171512582e-06, "loss": 0.7897, "step": 5161 }, { "epoch": 0.1582076743900944, "grad_norm": 1.6938357120528083, "learning_rate": 9.57513898400699e-06, "loss": 0.752, "step": 5162 }, { "epoch": 0.1582383229128356, "grad_norm": 1.884229125820616, "learning_rate": 9.574938751421384e-06, "loss": 0.7773, "step": 5163 }, { "epoch": 0.1582689714355768, "grad_norm": 1.8192303133349992, "learning_rate": 9.574738473757737e-06, "loss": 0.7376, "step": 5164 }, { "epoch": 0.158299619958318, "grad_norm": 0.9791251324683953, "learning_rate": 9.574538151018018e-06, "loss": 0.4905, "step": 5165 }, { "epoch": 0.15833026848105922, "grad_norm": 2.072805768222092, "learning_rate": 9.574337783204206e-06, "loss": 0.82, "step": 5166 }, { "epoch": 0.15836091700380042, "grad_norm": 1.7501174242734596, "learning_rate": 9.574137370318275e-06, "loss": 0.7433, "step": 5167 }, { "epoch": 0.15839156552654163, "grad_norm": 1.6700361975512705, "learning_rate": 9.573936912362195e-06, "loss": 0.7762, "step": 5168 }, { "epoch": 0.15842221404928283, "grad_norm": 1.7551248604073364, "learning_rate": 9.573736409337946e-06, "loss": 0.7034, "step": 5169 }, { "epoch": 0.15845286257202404, "grad_norm": 1.8660811951080485, "learning_rate": 9.573535861247502e-06, "loss": 0.7484, "step": 5170 }, { "epoch": 0.15848351109476524, "grad_norm": 0.8183240121213802, "learning_rate": 9.573335268092839e-06, "loss": 0.5002, "step": 5171 }, { "epoch": 0.15851415961750642, "grad_norm": 2.0223563347108344, "learning_rate": 9.573134629875934e-06, "loss": 0.6269, "step": 5172 }, { "epoch": 0.15854480814024763, "grad_norm": 1.9147797702857003, "learning_rate": 9.572933946598761e-06, "loss": 0.7286, "step": 5173 }, { "epoch": 0.15857545666298883, "grad_norm": 1.7513197868909949, "learning_rate": 9.572733218263302e-06, "loss": 0.7404, "step": 5174 }, { "epoch": 0.15860610518573004, "grad_norm": 1.7710954894262578, "learning_rate": 9.572532444871532e-06, "loss": 0.8412, "step": 5175 }, { "epoch": 0.15863675370847125, "grad_norm": 1.8846914204581158, "learning_rate": 9.57233162642543e-06, "loss": 0.805, "step": 5176 }, { "epoch": 0.15866740223121245, "grad_norm": 2.124526185565614, "learning_rate": 9.572130762926975e-06, "loss": 0.787, "step": 5177 }, { "epoch": 0.15869805075395366, "grad_norm": 1.909714225277626, "learning_rate": 9.571929854378144e-06, "loss": 0.7818, "step": 5178 }, { "epoch": 0.15872869927669486, "grad_norm": 1.8205343620082437, "learning_rate": 9.57172890078092e-06, "loss": 0.7685, "step": 5179 }, { "epoch": 0.15875934779943607, "grad_norm": 1.836033171117063, "learning_rate": 9.57152790213728e-06, "loss": 0.7408, "step": 5180 }, { "epoch": 0.15878999632217727, "grad_norm": 1.85042073208907, "learning_rate": 9.571326858449209e-06, "loss": 0.7443, "step": 5181 }, { "epoch": 0.15882064484491848, "grad_norm": 1.7707381755660112, "learning_rate": 9.57112576971868e-06, "loss": 0.7889, "step": 5182 }, { "epoch": 0.15885129336765968, "grad_norm": 2.103559516110049, "learning_rate": 9.570924635947682e-06, "loss": 0.8502, "step": 5183 }, { "epoch": 0.1588819418904009, "grad_norm": 1.7294096177420013, "learning_rate": 9.570723457138196e-06, "loss": 0.6565, "step": 5184 }, { "epoch": 0.1589125904131421, "grad_norm": 1.7587448966069499, "learning_rate": 9.5705222332922e-06, "loss": 0.791, "step": 5185 }, { "epoch": 0.1589432389358833, "grad_norm": 1.9994427157625665, "learning_rate": 9.570320964411678e-06, "loss": 0.7592, "step": 5186 }, { "epoch": 0.1589738874586245, "grad_norm": 0.9310869003705924, "learning_rate": 9.570119650498617e-06, "loss": 0.5087, "step": 5187 }, { "epoch": 0.15900453598136569, "grad_norm": 1.5695740470254809, "learning_rate": 9.569918291554995e-06, "loss": 0.6543, "step": 5188 }, { "epoch": 0.1590351845041069, "grad_norm": 1.9003394171872867, "learning_rate": 9.569716887582801e-06, "loss": 0.8319, "step": 5189 }, { "epoch": 0.1590658330268481, "grad_norm": 1.6609522054370895, "learning_rate": 9.569515438584016e-06, "loss": 0.7252, "step": 5190 }, { "epoch": 0.1590964815495893, "grad_norm": 2.155328417154746, "learning_rate": 9.569313944560628e-06, "loss": 0.838, "step": 5191 }, { "epoch": 0.1591271300723305, "grad_norm": 0.8054455276553973, "learning_rate": 9.569112405514619e-06, "loss": 0.5043, "step": 5192 }, { "epoch": 0.15915777859507171, "grad_norm": 1.7894937365833252, "learning_rate": 9.568910821447976e-06, "loss": 0.8163, "step": 5193 }, { "epoch": 0.15918842711781292, "grad_norm": 1.866013813406795, "learning_rate": 9.568709192362687e-06, "loss": 0.7581, "step": 5194 }, { "epoch": 0.15921907564055413, "grad_norm": 2.0158349096573014, "learning_rate": 9.568507518260737e-06, "loss": 0.7561, "step": 5195 }, { "epoch": 0.15924972416329533, "grad_norm": 1.710618038787171, "learning_rate": 9.568305799144112e-06, "loss": 0.7409, "step": 5196 }, { "epoch": 0.15928037268603654, "grad_norm": 1.7880544498924413, "learning_rate": 9.568104035014802e-06, "loss": 0.6762, "step": 5197 }, { "epoch": 0.15931102120877774, "grad_norm": 2.1665143902142154, "learning_rate": 9.567902225874794e-06, "loss": 0.7166, "step": 5198 }, { "epoch": 0.15934166973151895, "grad_norm": 1.835338469599353, "learning_rate": 9.567700371726079e-06, "loss": 0.7646, "step": 5199 }, { "epoch": 0.15937231825426015, "grad_norm": 2.1312260505531517, "learning_rate": 9.56749847257064e-06, "loss": 0.7677, "step": 5200 }, { "epoch": 0.15940296677700136, "grad_norm": 1.5916713804527276, "learning_rate": 9.567296528410472e-06, "loss": 0.7065, "step": 5201 }, { "epoch": 0.15943361529974256, "grad_norm": 1.7375223391449093, "learning_rate": 9.567094539247562e-06, "loss": 0.7285, "step": 5202 }, { "epoch": 0.15946426382248374, "grad_norm": 1.794115265733253, "learning_rate": 9.566892505083903e-06, "loss": 0.721, "step": 5203 }, { "epoch": 0.15949491234522495, "grad_norm": 1.7589518597390243, "learning_rate": 9.566690425921482e-06, "loss": 0.6997, "step": 5204 }, { "epoch": 0.15952556086796615, "grad_norm": 1.8854569961967653, "learning_rate": 9.56648830176229e-06, "loss": 0.7514, "step": 5205 }, { "epoch": 0.15955620939070736, "grad_norm": 0.9203731857189505, "learning_rate": 9.566286132608322e-06, "loss": 0.5129, "step": 5206 }, { "epoch": 0.15958685791344857, "grad_norm": 1.7323005662571178, "learning_rate": 9.566083918461569e-06, "loss": 0.7241, "step": 5207 }, { "epoch": 0.15961750643618977, "grad_norm": 2.2089416037188396, "learning_rate": 9.565881659324021e-06, "loss": 0.713, "step": 5208 }, { "epoch": 0.15964815495893098, "grad_norm": 1.800122765823468, "learning_rate": 9.565679355197674e-06, "loss": 0.8144, "step": 5209 }, { "epoch": 0.15967880348167218, "grad_norm": 1.6291878420981833, "learning_rate": 9.565477006084521e-06, "loss": 0.654, "step": 5210 }, { "epoch": 0.1597094520044134, "grad_norm": 1.6131924184349844, "learning_rate": 9.565274611986555e-06, "loss": 0.6229, "step": 5211 }, { "epoch": 0.1597401005271546, "grad_norm": 2.2346537353454434, "learning_rate": 9.565072172905768e-06, "loss": 0.7101, "step": 5212 }, { "epoch": 0.1597707490498958, "grad_norm": 1.648742957468223, "learning_rate": 9.564869688844158e-06, "loss": 0.6656, "step": 5213 }, { "epoch": 0.159801397572637, "grad_norm": 1.4918100496141857, "learning_rate": 9.564667159803719e-06, "loss": 0.6277, "step": 5214 }, { "epoch": 0.1598320460953782, "grad_norm": 1.7675426678855002, "learning_rate": 9.564464585786447e-06, "loss": 0.7479, "step": 5215 }, { "epoch": 0.15986269461811942, "grad_norm": 1.8030841129177897, "learning_rate": 9.564261966794337e-06, "loss": 0.72, "step": 5216 }, { "epoch": 0.15989334314086062, "grad_norm": 1.9414341478178234, "learning_rate": 9.564059302829386e-06, "loss": 0.6939, "step": 5217 }, { "epoch": 0.15992399166360183, "grad_norm": 2.0862410114520746, "learning_rate": 9.563856593893593e-06, "loss": 0.7457, "step": 5218 }, { "epoch": 0.159954640186343, "grad_norm": 2.040763419839761, "learning_rate": 9.563653839988951e-06, "loss": 0.7439, "step": 5219 }, { "epoch": 0.1599852887090842, "grad_norm": 1.6688979415177365, "learning_rate": 9.56345104111746e-06, "loss": 0.7172, "step": 5220 }, { "epoch": 0.16001593723182542, "grad_norm": 1.711306119991752, "learning_rate": 9.563248197281119e-06, "loss": 0.7854, "step": 5221 }, { "epoch": 0.16004658575456662, "grad_norm": 1.9540236678702811, "learning_rate": 9.563045308481926e-06, "loss": 0.7887, "step": 5222 }, { "epoch": 0.16007723427730783, "grad_norm": 1.909866166135983, "learning_rate": 9.56284237472188e-06, "loss": 0.7792, "step": 5223 }, { "epoch": 0.16010788280004903, "grad_norm": 1.7829715536939565, "learning_rate": 9.562639396002979e-06, "loss": 0.7793, "step": 5224 }, { "epoch": 0.16013853132279024, "grad_norm": 1.6754080284388688, "learning_rate": 9.562436372327227e-06, "loss": 0.6567, "step": 5225 }, { "epoch": 0.16016917984553145, "grad_norm": 1.6033056907595429, "learning_rate": 9.562233303696623e-06, "loss": 0.7545, "step": 5226 }, { "epoch": 0.16019982836827265, "grad_norm": 1.8099694583760564, "learning_rate": 9.562030190113163e-06, "loss": 0.7037, "step": 5227 }, { "epoch": 0.16023047689101386, "grad_norm": 1.7911243757971251, "learning_rate": 9.561827031578855e-06, "loss": 0.7615, "step": 5228 }, { "epoch": 0.16026112541375506, "grad_norm": 1.6916240778727367, "learning_rate": 9.561623828095697e-06, "loss": 0.6629, "step": 5229 }, { "epoch": 0.16029177393649627, "grad_norm": 1.8888899785322126, "learning_rate": 9.561420579665692e-06, "loss": 0.7715, "step": 5230 }, { "epoch": 0.16032242245923747, "grad_norm": 0.9990148376521791, "learning_rate": 9.561217286290845e-06, "loss": 0.4896, "step": 5231 }, { "epoch": 0.16035307098197868, "grad_norm": 0.9405603400603475, "learning_rate": 9.561013947973155e-06, "loss": 0.5193, "step": 5232 }, { "epoch": 0.16038371950471988, "grad_norm": 1.9901582571365997, "learning_rate": 9.560810564714629e-06, "loss": 0.6346, "step": 5233 }, { "epoch": 0.16041436802746106, "grad_norm": 1.6048918025009407, "learning_rate": 9.560607136517268e-06, "loss": 0.6451, "step": 5234 }, { "epoch": 0.16044501655020227, "grad_norm": 0.8675820519493376, "learning_rate": 9.56040366338308e-06, "loss": 0.508, "step": 5235 }, { "epoch": 0.16047566507294347, "grad_norm": 2.013517652914433, "learning_rate": 9.560200145314067e-06, "loss": 0.7706, "step": 5236 }, { "epoch": 0.16050631359568468, "grad_norm": 1.887849789632235, "learning_rate": 9.559996582312235e-06, "loss": 0.7882, "step": 5237 }, { "epoch": 0.16053696211842589, "grad_norm": 1.8260237326842437, "learning_rate": 9.55979297437959e-06, "loss": 0.626, "step": 5238 }, { "epoch": 0.1605676106411671, "grad_norm": 1.877788992709773, "learning_rate": 9.559589321518137e-06, "loss": 0.8109, "step": 5239 }, { "epoch": 0.1605982591639083, "grad_norm": 1.0076907375377278, "learning_rate": 9.559385623729886e-06, "loss": 0.4926, "step": 5240 }, { "epoch": 0.1606289076866495, "grad_norm": 2.0976787269824615, "learning_rate": 9.55918188101684e-06, "loss": 0.8437, "step": 5241 }, { "epoch": 0.1606595562093907, "grad_norm": 1.9285394374915235, "learning_rate": 9.558978093381008e-06, "loss": 0.7237, "step": 5242 }, { "epoch": 0.1606902047321319, "grad_norm": 1.6845894797419314, "learning_rate": 9.5587742608244e-06, "loss": 0.7044, "step": 5243 }, { "epoch": 0.16072085325487312, "grad_norm": 1.870202057821533, "learning_rate": 9.558570383349023e-06, "loss": 0.7719, "step": 5244 }, { "epoch": 0.16075150177761433, "grad_norm": 1.7708570966554127, "learning_rate": 9.558366460956885e-06, "loss": 0.7794, "step": 5245 }, { "epoch": 0.16078215030035553, "grad_norm": 2.01710267542916, "learning_rate": 9.558162493649996e-06, "loss": 0.7493, "step": 5246 }, { "epoch": 0.16081279882309674, "grad_norm": 1.528674346399743, "learning_rate": 9.557958481430365e-06, "loss": 0.6496, "step": 5247 }, { "epoch": 0.16084344734583794, "grad_norm": 1.8770314448356344, "learning_rate": 9.557754424300004e-06, "loss": 0.7759, "step": 5248 }, { "epoch": 0.16087409586857915, "grad_norm": 1.8947055304802072, "learning_rate": 9.557550322260921e-06, "loss": 0.6921, "step": 5249 }, { "epoch": 0.16090474439132033, "grad_norm": 1.9139943611329806, "learning_rate": 9.55734617531513e-06, "loss": 0.7334, "step": 5250 }, { "epoch": 0.16093539291406153, "grad_norm": 0.9012654489679223, "learning_rate": 9.557141983464641e-06, "loss": 0.4932, "step": 5251 }, { "epoch": 0.16096604143680274, "grad_norm": 1.7491244811077082, "learning_rate": 9.556937746711466e-06, "loss": 0.5818, "step": 5252 }, { "epoch": 0.16099668995954394, "grad_norm": 1.6267890440439494, "learning_rate": 9.556733465057617e-06, "loss": 0.7609, "step": 5253 }, { "epoch": 0.16102733848228515, "grad_norm": 1.8492437693949122, "learning_rate": 9.556529138505108e-06, "loss": 0.777, "step": 5254 }, { "epoch": 0.16105798700502635, "grad_norm": 1.7336068301117808, "learning_rate": 9.556324767055952e-06, "loss": 0.7078, "step": 5255 }, { "epoch": 0.16108863552776756, "grad_norm": 1.8493106991338062, "learning_rate": 9.556120350712158e-06, "loss": 0.7497, "step": 5256 }, { "epoch": 0.16111928405050877, "grad_norm": 1.7377414262276025, "learning_rate": 9.55591588947575e-06, "loss": 0.8271, "step": 5257 }, { "epoch": 0.16114993257324997, "grad_norm": 1.909030868453334, "learning_rate": 9.555711383348734e-06, "loss": 0.6799, "step": 5258 }, { "epoch": 0.16118058109599118, "grad_norm": 1.9537414208139117, "learning_rate": 9.555506832333131e-06, "loss": 0.62, "step": 5259 }, { "epoch": 0.16121122961873238, "grad_norm": 1.8215003633876372, "learning_rate": 9.55530223643095e-06, "loss": 0.7997, "step": 5260 }, { "epoch": 0.1612418781414736, "grad_norm": 1.7959656529071688, "learning_rate": 9.555097595644212e-06, "loss": 0.7943, "step": 5261 }, { "epoch": 0.1612725266642148, "grad_norm": 1.7495300553826338, "learning_rate": 9.554892909974933e-06, "loss": 0.6771, "step": 5262 }, { "epoch": 0.161303175186956, "grad_norm": 1.8513871734950385, "learning_rate": 9.554688179425126e-06, "loss": 0.8212, "step": 5263 }, { "epoch": 0.1613338237096972, "grad_norm": 1.6312657167575322, "learning_rate": 9.554483403996813e-06, "loss": 0.7688, "step": 5264 }, { "epoch": 0.16136447223243838, "grad_norm": 1.9476432423769248, "learning_rate": 9.554278583692009e-06, "loss": 0.7886, "step": 5265 }, { "epoch": 0.1613951207551796, "grad_norm": 1.7621709884850458, "learning_rate": 9.554073718512735e-06, "loss": 0.8081, "step": 5266 }, { "epoch": 0.1614257692779208, "grad_norm": 1.8020118989106522, "learning_rate": 9.553868808461004e-06, "loss": 0.7099, "step": 5267 }, { "epoch": 0.161456417800662, "grad_norm": 1.9215822553642763, "learning_rate": 9.553663853538841e-06, "loss": 0.6985, "step": 5268 }, { "epoch": 0.1614870663234032, "grad_norm": 1.79424563291065, "learning_rate": 9.553458853748263e-06, "loss": 0.7495, "step": 5269 }, { "epoch": 0.1615177148461444, "grad_norm": 1.928854672483742, "learning_rate": 9.553253809091287e-06, "loss": 0.7569, "step": 5270 }, { "epoch": 0.16154836336888562, "grad_norm": 1.7936185948667627, "learning_rate": 9.55304871956994e-06, "loss": 0.7678, "step": 5271 }, { "epoch": 0.16157901189162682, "grad_norm": 1.6961272604574766, "learning_rate": 9.552843585186237e-06, "loss": 0.6757, "step": 5272 }, { "epoch": 0.16160966041436803, "grad_norm": 1.9568450293962025, "learning_rate": 9.552638405942201e-06, "loss": 0.7213, "step": 5273 }, { "epoch": 0.16164030893710923, "grad_norm": 1.9351943730151921, "learning_rate": 9.552433181839855e-06, "loss": 0.6969, "step": 5274 }, { "epoch": 0.16167095745985044, "grad_norm": 1.873924178892835, "learning_rate": 9.55222791288122e-06, "loss": 0.7259, "step": 5275 }, { "epoch": 0.16170160598259165, "grad_norm": 1.7331795926345352, "learning_rate": 9.552022599068317e-06, "loss": 0.7542, "step": 5276 }, { "epoch": 0.16173225450533285, "grad_norm": 1.8518036267491649, "learning_rate": 9.551817240403172e-06, "loss": 0.7496, "step": 5277 }, { "epoch": 0.16176290302807406, "grad_norm": 1.9438299895837454, "learning_rate": 9.551611836887807e-06, "loss": 0.7131, "step": 5278 }, { "epoch": 0.16179355155081526, "grad_norm": 1.8856120175227336, "learning_rate": 9.551406388524244e-06, "loss": 0.7656, "step": 5279 }, { "epoch": 0.16182420007355647, "grad_norm": 2.034442754612429, "learning_rate": 9.551200895314512e-06, "loss": 0.7944, "step": 5280 }, { "epoch": 0.16185484859629765, "grad_norm": 0.9297178154583118, "learning_rate": 9.550995357260633e-06, "loss": 0.4984, "step": 5281 }, { "epoch": 0.16188549711903885, "grad_norm": 2.0494183702103745, "learning_rate": 9.550789774364632e-06, "loss": 0.7084, "step": 5282 }, { "epoch": 0.16191614564178006, "grad_norm": 1.8618457518295324, "learning_rate": 9.550584146628534e-06, "loss": 0.6661, "step": 5283 }, { "epoch": 0.16194679416452126, "grad_norm": 1.8509036548283657, "learning_rate": 9.550378474054367e-06, "loss": 0.7265, "step": 5284 }, { "epoch": 0.16197744268726247, "grad_norm": 1.8483183148267104, "learning_rate": 9.550172756644156e-06, "loss": 0.7677, "step": 5285 }, { "epoch": 0.16200809121000367, "grad_norm": 1.8163027893828434, "learning_rate": 9.549966994399928e-06, "loss": 0.6851, "step": 5286 }, { "epoch": 0.16203873973274488, "grad_norm": 1.83930571021459, "learning_rate": 9.549761187323714e-06, "loss": 0.8411, "step": 5287 }, { "epoch": 0.16206938825548609, "grad_norm": 1.875423759712754, "learning_rate": 9.549555335417535e-06, "loss": 0.7064, "step": 5288 }, { "epoch": 0.1621000367782273, "grad_norm": 1.751587604378209, "learning_rate": 9.549349438683426e-06, "loss": 0.8131, "step": 5289 }, { "epoch": 0.1621306853009685, "grad_norm": 1.620273724676893, "learning_rate": 9.549143497123412e-06, "loss": 0.71, "step": 5290 }, { "epoch": 0.1621613338237097, "grad_norm": 2.588047749339332, "learning_rate": 9.548937510739524e-06, "loss": 0.7356, "step": 5291 }, { "epoch": 0.1621919823464509, "grad_norm": 1.8299980419678592, "learning_rate": 9.54873147953379e-06, "loss": 0.7552, "step": 5292 }, { "epoch": 0.1622226308691921, "grad_norm": 1.645471716053331, "learning_rate": 9.548525403508241e-06, "loss": 0.6955, "step": 5293 }, { "epoch": 0.16225327939193332, "grad_norm": 0.9589866400447533, "learning_rate": 9.548319282664906e-06, "loss": 0.491, "step": 5294 }, { "epoch": 0.16228392791467453, "grad_norm": 1.9232753365040958, "learning_rate": 9.54811311700582e-06, "loss": 0.7644, "step": 5295 }, { "epoch": 0.1623145764374157, "grad_norm": 1.9682978644966584, "learning_rate": 9.54790690653301e-06, "loss": 0.7886, "step": 5296 }, { "epoch": 0.1623452249601569, "grad_norm": 2.0803951158686544, "learning_rate": 9.54770065124851e-06, "loss": 0.7538, "step": 5297 }, { "epoch": 0.16237587348289811, "grad_norm": 1.7910867449432095, "learning_rate": 9.547494351154352e-06, "loss": 0.8983, "step": 5298 }, { "epoch": 0.16240652200563932, "grad_norm": 1.5685662963932423, "learning_rate": 9.547288006252568e-06, "loss": 0.6282, "step": 5299 }, { "epoch": 0.16243717052838053, "grad_norm": 0.9008621178035504, "learning_rate": 9.547081616545193e-06, "loss": 0.5159, "step": 5300 }, { "epoch": 0.16246781905112173, "grad_norm": 1.6490931584307444, "learning_rate": 9.54687518203426e-06, "loss": 0.701, "step": 5301 }, { "epoch": 0.16249846757386294, "grad_norm": 1.9602271128570157, "learning_rate": 9.546668702721801e-06, "loss": 0.6736, "step": 5302 }, { "epoch": 0.16252911609660414, "grad_norm": 1.9027590345994356, "learning_rate": 9.546462178609852e-06, "loss": 0.6468, "step": 5303 }, { "epoch": 0.16255976461934535, "grad_norm": 1.8209204979817322, "learning_rate": 9.546255609700447e-06, "loss": 0.8081, "step": 5304 }, { "epoch": 0.16259041314208655, "grad_norm": 1.8530160060297516, "learning_rate": 9.546048995995625e-06, "loss": 0.7805, "step": 5305 }, { "epoch": 0.16262106166482776, "grad_norm": 0.8693658102122664, "learning_rate": 9.545842337497417e-06, "loss": 0.5173, "step": 5306 }, { "epoch": 0.16265171018756897, "grad_norm": 1.6642858828961802, "learning_rate": 9.545635634207862e-06, "loss": 0.7811, "step": 5307 }, { "epoch": 0.16268235871031017, "grad_norm": 1.7222848265539283, "learning_rate": 9.545428886128996e-06, "loss": 0.7665, "step": 5308 }, { "epoch": 0.16271300723305138, "grad_norm": 1.9444420802251952, "learning_rate": 9.545222093262856e-06, "loss": 0.6814, "step": 5309 }, { "epoch": 0.16274365575579258, "grad_norm": 1.7570068126849956, "learning_rate": 9.54501525561148e-06, "loss": 0.8085, "step": 5310 }, { "epoch": 0.1627743042785338, "grad_norm": 1.9879347373441765, "learning_rate": 9.544808373176906e-06, "loss": 0.765, "step": 5311 }, { "epoch": 0.16280495280127497, "grad_norm": 0.8570063195517861, "learning_rate": 9.544601445961172e-06, "loss": 0.5139, "step": 5312 }, { "epoch": 0.16283560132401617, "grad_norm": 1.6246220195054208, "learning_rate": 9.544394473966317e-06, "loss": 0.6117, "step": 5313 }, { "epoch": 0.16286624984675738, "grad_norm": 1.9384473484125313, "learning_rate": 9.54418745719438e-06, "loss": 0.7212, "step": 5314 }, { "epoch": 0.16289689836949858, "grad_norm": 2.053315517066517, "learning_rate": 9.543980395647403e-06, "loss": 0.7792, "step": 5315 }, { "epoch": 0.1629275468922398, "grad_norm": 1.707917819399618, "learning_rate": 9.543773289327423e-06, "loss": 0.6523, "step": 5316 }, { "epoch": 0.162958195414981, "grad_norm": 0.8176992323886978, "learning_rate": 9.543566138236483e-06, "loss": 0.5134, "step": 5317 }, { "epoch": 0.1629888439377222, "grad_norm": 2.0152099902354106, "learning_rate": 9.543358942376623e-06, "loss": 0.7492, "step": 5318 }, { "epoch": 0.1630194924604634, "grad_norm": 0.84892937990634, "learning_rate": 9.543151701749885e-06, "loss": 0.5153, "step": 5319 }, { "epoch": 0.1630501409832046, "grad_norm": 2.137225941519722, "learning_rate": 9.54294441635831e-06, "loss": 0.7604, "step": 5320 }, { "epoch": 0.16308078950594582, "grad_norm": 1.9452544202167656, "learning_rate": 9.542737086203943e-06, "loss": 0.7229, "step": 5321 }, { "epoch": 0.16311143802868702, "grad_norm": 1.770620888449625, "learning_rate": 9.542529711288824e-06, "loss": 0.6511, "step": 5322 }, { "epoch": 0.16314208655142823, "grad_norm": 1.7523378280493491, "learning_rate": 9.542322291614999e-06, "loss": 0.7779, "step": 5323 }, { "epoch": 0.16317273507416943, "grad_norm": 2.2062111865061196, "learning_rate": 9.542114827184507e-06, "loss": 0.7993, "step": 5324 }, { "epoch": 0.16320338359691064, "grad_norm": 1.8791163507389126, "learning_rate": 9.541907317999397e-06, "loss": 0.8583, "step": 5325 }, { "epoch": 0.16323403211965185, "grad_norm": 1.9050463317533237, "learning_rate": 9.541699764061714e-06, "loss": 0.7092, "step": 5326 }, { "epoch": 0.16326468064239302, "grad_norm": 1.9998966280709332, "learning_rate": 9.5414921653735e-06, "loss": 0.7846, "step": 5327 }, { "epoch": 0.16329532916513423, "grad_norm": 1.6941256417433437, "learning_rate": 9.5412845219368e-06, "loss": 0.7275, "step": 5328 }, { "epoch": 0.16332597768787543, "grad_norm": 1.8930378049053456, "learning_rate": 9.541076833753665e-06, "loss": 0.761, "step": 5329 }, { "epoch": 0.16335662621061664, "grad_norm": 1.69002932838127, "learning_rate": 9.540869100826136e-06, "loss": 0.7731, "step": 5330 }, { "epoch": 0.16338727473335785, "grad_norm": 2.2724721956087874, "learning_rate": 9.540661323156261e-06, "loss": 0.8033, "step": 5331 }, { "epoch": 0.16341792325609905, "grad_norm": 1.0458573042384547, "learning_rate": 9.54045350074609e-06, "loss": 0.484, "step": 5332 }, { "epoch": 0.16344857177884026, "grad_norm": 2.1268870301014333, "learning_rate": 9.540245633597667e-06, "loss": 0.7564, "step": 5333 }, { "epoch": 0.16347922030158146, "grad_norm": 0.9052896927325926, "learning_rate": 9.540037721713045e-06, "loss": 0.5046, "step": 5334 }, { "epoch": 0.16350986882432267, "grad_norm": 1.979776793619579, "learning_rate": 9.539829765094265e-06, "loss": 0.9123, "step": 5335 }, { "epoch": 0.16354051734706387, "grad_norm": 1.8061850531080035, "learning_rate": 9.539621763743384e-06, "loss": 0.708, "step": 5336 }, { "epoch": 0.16357116586980508, "grad_norm": 1.5944410603029322, "learning_rate": 9.539413717662449e-06, "loss": 0.6935, "step": 5337 }, { "epoch": 0.16360181439254629, "grad_norm": 2.030871447719239, "learning_rate": 9.53920562685351e-06, "loss": 0.7605, "step": 5338 }, { "epoch": 0.1636324629152875, "grad_norm": 1.683267920298032, "learning_rate": 9.538997491318613e-06, "loss": 0.8174, "step": 5339 }, { "epoch": 0.1636631114380287, "grad_norm": 1.9935031704796395, "learning_rate": 9.538789311059815e-06, "loss": 0.7359, "step": 5340 }, { "epoch": 0.1636937599607699, "grad_norm": 2.0821164460220287, "learning_rate": 9.538581086079164e-06, "loss": 0.7044, "step": 5341 }, { "epoch": 0.1637244084835111, "grad_norm": 1.7179318527666925, "learning_rate": 9.538372816378711e-06, "loss": 0.7839, "step": 5342 }, { "epoch": 0.16375505700625229, "grad_norm": 1.8548400661908466, "learning_rate": 9.538164501960511e-06, "loss": 0.7482, "step": 5343 }, { "epoch": 0.1637857055289935, "grad_norm": 1.2061613399708688, "learning_rate": 9.537956142826615e-06, "loss": 0.5047, "step": 5344 }, { "epoch": 0.1638163540517347, "grad_norm": 1.5185011355954516, "learning_rate": 9.537747738979076e-06, "loss": 0.6214, "step": 5345 }, { "epoch": 0.1638470025744759, "grad_norm": 0.9111957060021851, "learning_rate": 9.537539290419945e-06, "loss": 0.4841, "step": 5346 }, { "epoch": 0.1638776510972171, "grad_norm": 1.71010123117021, "learning_rate": 9.537330797151282e-06, "loss": 0.6786, "step": 5347 }, { "epoch": 0.16390829961995831, "grad_norm": 2.0169762006929504, "learning_rate": 9.537122259175135e-06, "loss": 0.8134, "step": 5348 }, { "epoch": 0.16393894814269952, "grad_norm": 1.649879924848106, "learning_rate": 9.536913676493564e-06, "loss": 0.6782, "step": 5349 }, { "epoch": 0.16396959666544073, "grad_norm": 1.066636885640487, "learning_rate": 9.53670504910862e-06, "loss": 0.5255, "step": 5350 }, { "epoch": 0.16400024518818193, "grad_norm": 1.7911230903341813, "learning_rate": 9.536496377022362e-06, "loss": 0.8061, "step": 5351 }, { "epoch": 0.16403089371092314, "grad_norm": 1.915447082221522, "learning_rate": 9.536287660236842e-06, "loss": 0.72, "step": 5352 }, { "epoch": 0.16406154223366434, "grad_norm": 1.9643709975892816, "learning_rate": 9.53607889875412e-06, "loss": 0.7291, "step": 5353 }, { "epoch": 0.16409219075640555, "grad_norm": 1.8769334219580385, "learning_rate": 9.535870092576253e-06, "loss": 0.6626, "step": 5354 }, { "epoch": 0.16412283927914675, "grad_norm": 1.9667035018781915, "learning_rate": 9.535661241705296e-06, "loss": 0.8227, "step": 5355 }, { "epoch": 0.16415348780188796, "grad_norm": 1.856952135208599, "learning_rate": 9.53545234614331e-06, "loss": 0.6573, "step": 5356 }, { "epoch": 0.16418413632462917, "grad_norm": 1.6586555240327931, "learning_rate": 9.53524340589235e-06, "loss": 0.6585, "step": 5357 }, { "epoch": 0.16421478484737034, "grad_norm": 1.7138801543686837, "learning_rate": 9.535034420954476e-06, "loss": 0.7817, "step": 5358 }, { "epoch": 0.16424543337011155, "grad_norm": 1.8152349043323157, "learning_rate": 9.53482539133175e-06, "loss": 0.7412, "step": 5359 }, { "epoch": 0.16427608189285275, "grad_norm": 1.7849101862389976, "learning_rate": 9.534616317026227e-06, "loss": 0.72, "step": 5360 }, { "epoch": 0.16430673041559396, "grad_norm": 1.94729670924048, "learning_rate": 9.53440719803997e-06, "loss": 0.6859, "step": 5361 }, { "epoch": 0.16433737893833517, "grad_norm": 1.7063559055313442, "learning_rate": 9.534198034375039e-06, "loss": 0.6858, "step": 5362 }, { "epoch": 0.16436802746107637, "grad_norm": 2.0191726024434984, "learning_rate": 9.533988826033494e-06, "loss": 0.7094, "step": 5363 }, { "epoch": 0.16439867598381758, "grad_norm": 1.8883539051676899, "learning_rate": 9.533779573017397e-06, "loss": 0.71, "step": 5364 }, { "epoch": 0.16442932450655878, "grad_norm": 1.7354217792649826, "learning_rate": 9.53357027532881e-06, "loss": 0.6854, "step": 5365 }, { "epoch": 0.1644599730293, "grad_norm": 1.898105057711462, "learning_rate": 9.533360932969795e-06, "loss": 0.8048, "step": 5366 }, { "epoch": 0.1644906215520412, "grad_norm": 1.9620184144814825, "learning_rate": 9.533151545942414e-06, "loss": 0.7637, "step": 5367 }, { "epoch": 0.1645212700747824, "grad_norm": 1.9008099967488723, "learning_rate": 9.532942114248734e-06, "loss": 0.7084, "step": 5368 }, { "epoch": 0.1645519185975236, "grad_norm": 0.953344923752192, "learning_rate": 9.532732637890813e-06, "loss": 0.506, "step": 5369 }, { "epoch": 0.1645825671202648, "grad_norm": 1.798536883085912, "learning_rate": 9.532523116870718e-06, "loss": 0.7399, "step": 5370 }, { "epoch": 0.16461321564300602, "grad_norm": 1.7271827342839856, "learning_rate": 9.532313551190513e-06, "loss": 0.666, "step": 5371 }, { "epoch": 0.16464386416574722, "grad_norm": 1.8093364840797412, "learning_rate": 9.532103940852263e-06, "loss": 0.7908, "step": 5372 }, { "epoch": 0.16467451268848843, "grad_norm": 2.7020149803837907, "learning_rate": 9.531894285858032e-06, "loss": 0.7158, "step": 5373 }, { "epoch": 0.1647051612112296, "grad_norm": 2.0181760654581473, "learning_rate": 9.53168458620989e-06, "loss": 0.7718, "step": 5374 }, { "epoch": 0.1647358097339708, "grad_norm": 1.9017574149026013, "learning_rate": 9.531474841909898e-06, "loss": 0.6958, "step": 5375 }, { "epoch": 0.16476645825671202, "grad_norm": 0.9350237192257729, "learning_rate": 9.531265052960126e-06, "loss": 0.485, "step": 5376 }, { "epoch": 0.16479710677945322, "grad_norm": 1.8045201128384252, "learning_rate": 9.531055219362639e-06, "loss": 0.7984, "step": 5377 }, { "epoch": 0.16482775530219443, "grad_norm": 2.1292986082500596, "learning_rate": 9.530845341119506e-06, "loss": 0.8851, "step": 5378 }, { "epoch": 0.16485840382493563, "grad_norm": 0.8121071762702956, "learning_rate": 9.530635418232795e-06, "loss": 0.5057, "step": 5379 }, { "epoch": 0.16488905234767684, "grad_norm": 2.0857651598462055, "learning_rate": 9.530425450704574e-06, "loss": 0.7647, "step": 5380 }, { "epoch": 0.16491970087041805, "grad_norm": 1.8859008494598022, "learning_rate": 9.530215438536912e-06, "loss": 0.7578, "step": 5381 }, { "epoch": 0.16495034939315925, "grad_norm": 1.8656903285413058, "learning_rate": 9.530005381731876e-06, "loss": 0.746, "step": 5382 }, { "epoch": 0.16498099791590046, "grad_norm": 0.8251935214977781, "learning_rate": 9.529795280291542e-06, "loss": 0.4976, "step": 5383 }, { "epoch": 0.16501164643864166, "grad_norm": 1.501805557300514, "learning_rate": 9.529585134217973e-06, "loss": 0.7066, "step": 5384 }, { "epoch": 0.16504229496138287, "grad_norm": 1.8965411056688455, "learning_rate": 9.529374943513244e-06, "loss": 0.7439, "step": 5385 }, { "epoch": 0.16507294348412407, "grad_norm": 1.793999903779015, "learning_rate": 9.529164708179424e-06, "loss": 0.7244, "step": 5386 }, { "epoch": 0.16510359200686528, "grad_norm": 1.7856380055619527, "learning_rate": 9.528954428218586e-06, "loss": 0.7669, "step": 5387 }, { "epoch": 0.16513424052960649, "grad_norm": 1.7794961769016475, "learning_rate": 9.528744103632802e-06, "loss": 0.5376, "step": 5388 }, { "epoch": 0.16516488905234766, "grad_norm": 1.7239791013727623, "learning_rate": 9.52853373442414e-06, "loss": 0.6672, "step": 5389 }, { "epoch": 0.16519553757508887, "grad_norm": 1.8876435871982191, "learning_rate": 9.52832332059468e-06, "loss": 0.7338, "step": 5390 }, { "epoch": 0.16522618609783007, "grad_norm": 1.812318745802571, "learning_rate": 9.528112862146492e-06, "loss": 0.6914, "step": 5391 }, { "epoch": 0.16525683462057128, "grad_norm": 1.7756778855970903, "learning_rate": 9.527902359081649e-06, "loss": 0.8213, "step": 5392 }, { "epoch": 0.16528748314331249, "grad_norm": 1.8585456430337592, "learning_rate": 9.527691811402224e-06, "loss": 0.7148, "step": 5393 }, { "epoch": 0.1653181316660537, "grad_norm": 2.368653444535271, "learning_rate": 9.527481219110293e-06, "loss": 0.6773, "step": 5394 }, { "epoch": 0.1653487801887949, "grad_norm": 1.7180923132691213, "learning_rate": 9.527270582207933e-06, "loss": 0.7252, "step": 5395 }, { "epoch": 0.1653794287115361, "grad_norm": 1.9380765108815994, "learning_rate": 9.527059900697216e-06, "loss": 0.7288, "step": 5396 }, { "epoch": 0.1654100772342773, "grad_norm": 1.68506241363974, "learning_rate": 9.52684917458022e-06, "loss": 0.7191, "step": 5397 }, { "epoch": 0.16544072575701851, "grad_norm": 1.7387529290779076, "learning_rate": 9.526638403859021e-06, "loss": 0.7592, "step": 5398 }, { "epoch": 0.16547137427975972, "grad_norm": 1.9296519643991579, "learning_rate": 9.526427588535696e-06, "loss": 0.7953, "step": 5399 }, { "epoch": 0.16550202280250093, "grad_norm": 2.0758155032619636, "learning_rate": 9.526216728612321e-06, "loss": 0.7043, "step": 5400 }, { "epoch": 0.16553267132524213, "grad_norm": 2.163862544375518, "learning_rate": 9.526005824090975e-06, "loss": 0.8597, "step": 5401 }, { "epoch": 0.16556331984798334, "grad_norm": 1.9796024009648219, "learning_rate": 9.525794874973735e-06, "loss": 0.7186, "step": 5402 }, { "epoch": 0.16559396837072454, "grad_norm": 1.8829065135493939, "learning_rate": 9.525583881262681e-06, "loss": 0.8402, "step": 5403 }, { "epoch": 0.16562461689346575, "grad_norm": 1.7985781527080176, "learning_rate": 9.52537284295989e-06, "loss": 0.7644, "step": 5404 }, { "epoch": 0.16565526541620693, "grad_norm": 1.8707129503605113, "learning_rate": 9.525161760067443e-06, "loss": 0.7662, "step": 5405 }, { "epoch": 0.16568591393894813, "grad_norm": 1.021364357409291, "learning_rate": 9.52495063258742e-06, "loss": 0.5267, "step": 5406 }, { "epoch": 0.16571656246168934, "grad_norm": 2.069298851563156, "learning_rate": 9.5247394605219e-06, "loss": 0.8088, "step": 5407 }, { "epoch": 0.16574721098443054, "grad_norm": 1.710058036864639, "learning_rate": 9.524528243872964e-06, "loss": 0.6215, "step": 5408 }, { "epoch": 0.16577785950717175, "grad_norm": 1.8597544351232445, "learning_rate": 9.524316982642693e-06, "loss": 0.7404, "step": 5409 }, { "epoch": 0.16580850802991295, "grad_norm": 0.9116620395255023, "learning_rate": 9.524105676833172e-06, "loss": 0.4978, "step": 5410 }, { "epoch": 0.16583915655265416, "grad_norm": 1.6391889113624345, "learning_rate": 9.523894326446478e-06, "loss": 0.6883, "step": 5411 }, { "epoch": 0.16586980507539537, "grad_norm": 1.8261936454204857, "learning_rate": 9.523682931484696e-06, "loss": 0.6242, "step": 5412 }, { "epoch": 0.16590045359813657, "grad_norm": 1.8536516225786246, "learning_rate": 9.523471491949909e-06, "loss": 0.6721, "step": 5413 }, { "epoch": 0.16593110212087778, "grad_norm": 1.975390838594922, "learning_rate": 9.5232600078442e-06, "loss": 0.7448, "step": 5414 }, { "epoch": 0.16596175064361898, "grad_norm": 1.591249440101633, "learning_rate": 9.523048479169653e-06, "loss": 0.7634, "step": 5415 }, { "epoch": 0.1659923991663602, "grad_norm": 1.6098119940598528, "learning_rate": 9.522836905928352e-06, "loss": 0.6763, "step": 5416 }, { "epoch": 0.1660230476891014, "grad_norm": 1.8173270714596186, "learning_rate": 9.522625288122381e-06, "loss": 0.732, "step": 5417 }, { "epoch": 0.1660536962118426, "grad_norm": 1.6050977814786098, "learning_rate": 9.522413625753827e-06, "loss": 0.7643, "step": 5418 }, { "epoch": 0.1660843447345838, "grad_norm": 1.9022924326012198, "learning_rate": 9.522201918824774e-06, "loss": 0.7996, "step": 5419 }, { "epoch": 0.16611499325732498, "grad_norm": 1.9228794820754243, "learning_rate": 9.521990167337309e-06, "loss": 0.7895, "step": 5420 }, { "epoch": 0.1661456417800662, "grad_norm": 1.7090650024792267, "learning_rate": 9.521778371293517e-06, "loss": 0.7849, "step": 5421 }, { "epoch": 0.1661762903028074, "grad_norm": 1.6236754787029553, "learning_rate": 9.521566530695485e-06, "loss": 0.746, "step": 5422 }, { "epoch": 0.1662069388255486, "grad_norm": 1.667806214910666, "learning_rate": 9.521354645545303e-06, "loss": 0.7645, "step": 5423 }, { "epoch": 0.1662375873482898, "grad_norm": 1.6371464540046308, "learning_rate": 9.521142715845055e-06, "loss": 0.6435, "step": 5424 }, { "epoch": 0.166268235871031, "grad_norm": 1.7787959596729792, "learning_rate": 9.520930741596831e-06, "loss": 0.7014, "step": 5425 }, { "epoch": 0.16629888439377222, "grad_norm": 0.9253453966690562, "learning_rate": 9.520718722802722e-06, "loss": 0.5019, "step": 5426 }, { "epoch": 0.16632953291651342, "grad_norm": 1.823887564781995, "learning_rate": 9.520506659464812e-06, "loss": 0.7187, "step": 5427 }, { "epoch": 0.16636018143925463, "grad_norm": 1.7637253127235484, "learning_rate": 9.520294551585195e-06, "loss": 0.8109, "step": 5428 }, { "epoch": 0.16639082996199583, "grad_norm": 1.84279847906109, "learning_rate": 9.520082399165958e-06, "loss": 0.6669, "step": 5429 }, { "epoch": 0.16642147848473704, "grad_norm": 1.849534419109039, "learning_rate": 9.519870202209194e-06, "loss": 0.7212, "step": 5430 }, { "epoch": 0.16645212700747825, "grad_norm": 1.6916688720866593, "learning_rate": 9.519657960716992e-06, "loss": 0.7485, "step": 5431 }, { "epoch": 0.16648277553021945, "grad_norm": 2.031513400472209, "learning_rate": 9.519445674691443e-06, "loss": 0.7296, "step": 5432 }, { "epoch": 0.16651342405296066, "grad_norm": 1.6736283714886864, "learning_rate": 9.51923334413464e-06, "loss": 0.7322, "step": 5433 }, { "epoch": 0.16654407257570186, "grad_norm": 1.7323716273924272, "learning_rate": 9.519020969048676e-06, "loss": 0.6994, "step": 5434 }, { "epoch": 0.16657472109844307, "grad_norm": 1.7559784895564017, "learning_rate": 9.518808549435639e-06, "loss": 0.764, "step": 5435 }, { "epoch": 0.16660536962118425, "grad_norm": 1.7600403179648296, "learning_rate": 9.518596085297627e-06, "loss": 0.7129, "step": 5436 }, { "epoch": 0.16663601814392545, "grad_norm": 1.5463253237048211, "learning_rate": 9.518383576636732e-06, "loss": 0.6817, "step": 5437 }, { "epoch": 0.16666666666666666, "grad_norm": 1.8647408814153528, "learning_rate": 9.518171023455047e-06, "loss": 0.7377, "step": 5438 }, { "epoch": 0.16669731518940786, "grad_norm": 1.036902163487068, "learning_rate": 9.517958425754668e-06, "loss": 0.5237, "step": 5439 }, { "epoch": 0.16672796371214907, "grad_norm": 1.807475685154996, "learning_rate": 9.517745783537686e-06, "loss": 0.7814, "step": 5440 }, { "epoch": 0.16675861223489027, "grad_norm": 1.8299387695747615, "learning_rate": 9.517533096806201e-06, "loss": 0.7907, "step": 5441 }, { "epoch": 0.16678926075763148, "grad_norm": 1.726581943955212, "learning_rate": 9.517320365562306e-06, "loss": 0.7402, "step": 5442 }, { "epoch": 0.16681990928037269, "grad_norm": 1.9178962688585173, "learning_rate": 9.517107589808098e-06, "loss": 0.7451, "step": 5443 }, { "epoch": 0.1668505578031139, "grad_norm": 1.6857538914664711, "learning_rate": 9.516894769545672e-06, "loss": 0.7455, "step": 5444 }, { "epoch": 0.1668812063258551, "grad_norm": 1.9930622089253875, "learning_rate": 9.516681904777128e-06, "loss": 0.7531, "step": 5445 }, { "epoch": 0.1669118548485963, "grad_norm": 2.045853546549637, "learning_rate": 9.51646899550456e-06, "loss": 0.7811, "step": 5446 }, { "epoch": 0.1669425033713375, "grad_norm": 2.1555486191284996, "learning_rate": 9.516256041730068e-06, "loss": 0.7472, "step": 5447 }, { "epoch": 0.16697315189407871, "grad_norm": 2.1529474618314532, "learning_rate": 9.516043043455749e-06, "loss": 0.8654, "step": 5448 }, { "epoch": 0.16700380041681992, "grad_norm": 2.0062246048563397, "learning_rate": 9.515830000683703e-06, "loss": 0.7499, "step": 5449 }, { "epoch": 0.16703444893956113, "grad_norm": 1.7021518086794665, "learning_rate": 9.515616913416029e-06, "loss": 0.6847, "step": 5450 }, { "epoch": 0.1670650974623023, "grad_norm": 1.9691448872546435, "learning_rate": 9.515403781654825e-06, "loss": 0.7459, "step": 5451 }, { "epoch": 0.1670957459850435, "grad_norm": 1.7285806452509818, "learning_rate": 9.515190605402194e-06, "loss": 0.7308, "step": 5452 }, { "epoch": 0.16712639450778471, "grad_norm": 1.894898970754207, "learning_rate": 9.514977384660233e-06, "loss": 0.8083, "step": 5453 }, { "epoch": 0.16715704303052592, "grad_norm": 1.9773477554155199, "learning_rate": 9.514764119431047e-06, "loss": 0.8218, "step": 5454 }, { "epoch": 0.16718769155326713, "grad_norm": 1.017370417204914, "learning_rate": 9.514550809716731e-06, "loss": 0.4784, "step": 5455 }, { "epoch": 0.16721834007600833, "grad_norm": 2.010808982024262, "learning_rate": 9.514337455519394e-06, "loss": 0.7719, "step": 5456 }, { "epoch": 0.16724898859874954, "grad_norm": 1.8587077761170436, "learning_rate": 9.514124056841133e-06, "loss": 0.7491, "step": 5457 }, { "epoch": 0.16727963712149074, "grad_norm": 1.6297461030891143, "learning_rate": 9.513910613684054e-06, "loss": 0.7237, "step": 5458 }, { "epoch": 0.16731028564423195, "grad_norm": 2.0571528711353286, "learning_rate": 9.513697126050258e-06, "loss": 0.7581, "step": 5459 }, { "epoch": 0.16734093416697315, "grad_norm": 1.7790772896191094, "learning_rate": 9.51348359394185e-06, "loss": 0.7244, "step": 5460 }, { "epoch": 0.16737158268971436, "grad_norm": 1.9233053539340674, "learning_rate": 9.513270017360933e-06, "loss": 0.6211, "step": 5461 }, { "epoch": 0.16740223121245557, "grad_norm": 1.954495869855543, "learning_rate": 9.513056396309613e-06, "loss": 0.7273, "step": 5462 }, { "epoch": 0.16743287973519677, "grad_norm": 1.7896427975668279, "learning_rate": 9.512842730789992e-06, "loss": 0.7182, "step": 5463 }, { "epoch": 0.16746352825793798, "grad_norm": 1.7008880495539762, "learning_rate": 9.512629020804176e-06, "loss": 0.7672, "step": 5464 }, { "epoch": 0.16749417678067918, "grad_norm": 1.9366389336209664, "learning_rate": 9.512415266354274e-06, "loss": 0.799, "step": 5465 }, { "epoch": 0.1675248253034204, "grad_norm": 1.76034455789019, "learning_rate": 9.512201467442389e-06, "loss": 0.6823, "step": 5466 }, { "epoch": 0.16755547382616157, "grad_norm": 1.7465154229438766, "learning_rate": 9.511987624070629e-06, "loss": 0.7737, "step": 5467 }, { "epoch": 0.16758612234890277, "grad_norm": 2.033438208547295, "learning_rate": 9.5117737362411e-06, "loss": 0.827, "step": 5468 }, { "epoch": 0.16761677087164398, "grad_norm": 1.0457738640128849, "learning_rate": 9.51155980395591e-06, "loss": 0.5098, "step": 5469 }, { "epoch": 0.16764741939438518, "grad_norm": 1.8159343907690362, "learning_rate": 9.511345827217167e-06, "loss": 0.7745, "step": 5470 }, { "epoch": 0.1676780679171264, "grad_norm": 2.066034311050583, "learning_rate": 9.511131806026979e-06, "loss": 0.7197, "step": 5471 }, { "epoch": 0.1677087164398676, "grad_norm": 2.1490379589572504, "learning_rate": 9.510917740387456e-06, "loss": 0.7422, "step": 5472 }, { "epoch": 0.1677393649626088, "grad_norm": 1.9264437812416917, "learning_rate": 9.510703630300704e-06, "loss": 0.8313, "step": 5473 }, { "epoch": 0.16777001348535, "grad_norm": 1.982206300595434, "learning_rate": 9.510489475768836e-06, "loss": 0.7774, "step": 5474 }, { "epoch": 0.1678006620080912, "grad_norm": 1.8426164059806065, "learning_rate": 9.510275276793963e-06, "loss": 0.7441, "step": 5475 }, { "epoch": 0.16783131053083242, "grad_norm": 1.7727790461296162, "learning_rate": 9.510061033378191e-06, "loss": 0.6452, "step": 5476 }, { "epoch": 0.16786195905357362, "grad_norm": 2.24109180328191, "learning_rate": 9.509846745523635e-06, "loss": 0.8155, "step": 5477 }, { "epoch": 0.16789260757631483, "grad_norm": 1.7201340203306235, "learning_rate": 9.509632413232406e-06, "loss": 0.6873, "step": 5478 }, { "epoch": 0.16792325609905603, "grad_norm": 1.9871060744149815, "learning_rate": 9.509418036506614e-06, "loss": 0.7078, "step": 5479 }, { "epoch": 0.16795390462179724, "grad_norm": 1.6597596448368357, "learning_rate": 9.509203615348372e-06, "loss": 0.6501, "step": 5480 }, { "epoch": 0.16798455314453845, "grad_norm": 1.8656893974107611, "learning_rate": 9.508989149759792e-06, "loss": 0.7567, "step": 5481 }, { "epoch": 0.16801520166727962, "grad_norm": 1.7701731621538874, "learning_rate": 9.508774639742992e-06, "loss": 0.7137, "step": 5482 }, { "epoch": 0.16804585019002083, "grad_norm": 1.6930146117983216, "learning_rate": 9.508560085300078e-06, "loss": 0.7326, "step": 5483 }, { "epoch": 0.16807649871276203, "grad_norm": 1.7776202681141993, "learning_rate": 9.508345486433171e-06, "loss": 0.7315, "step": 5484 }, { "epoch": 0.16810714723550324, "grad_norm": 2.124745187624052, "learning_rate": 9.508130843144382e-06, "loss": 0.7473, "step": 5485 }, { "epoch": 0.16813779575824445, "grad_norm": 1.8198404235305183, "learning_rate": 9.507916155435824e-06, "loss": 0.7151, "step": 5486 }, { "epoch": 0.16816844428098565, "grad_norm": 1.8245501008033016, "learning_rate": 9.507701423309616e-06, "loss": 0.7091, "step": 5487 }, { "epoch": 0.16819909280372686, "grad_norm": 1.9010354157192642, "learning_rate": 9.507486646767872e-06, "loss": 0.7806, "step": 5488 }, { "epoch": 0.16822974132646806, "grad_norm": 1.9814750685361169, "learning_rate": 9.507271825812709e-06, "loss": 0.7933, "step": 5489 }, { "epoch": 0.16826038984920927, "grad_norm": 1.7438998344702756, "learning_rate": 9.507056960446243e-06, "loss": 0.6959, "step": 5490 }, { "epoch": 0.16829103837195047, "grad_norm": 1.850882408980375, "learning_rate": 9.506842050670593e-06, "loss": 0.7576, "step": 5491 }, { "epoch": 0.16832168689469168, "grad_norm": 1.7764173683916067, "learning_rate": 9.506627096487875e-06, "loss": 0.7465, "step": 5492 }, { "epoch": 0.16835233541743289, "grad_norm": 2.1229604405082316, "learning_rate": 9.506412097900206e-06, "loss": 0.8174, "step": 5493 }, { "epoch": 0.1683829839401741, "grad_norm": 2.0076607738672543, "learning_rate": 9.506197054909708e-06, "loss": 0.732, "step": 5494 }, { "epoch": 0.1684136324629153, "grad_norm": 1.757287476750036, "learning_rate": 9.505981967518493e-06, "loss": 0.6812, "step": 5495 }, { "epoch": 0.1684442809856565, "grad_norm": 1.7893971600538634, "learning_rate": 9.50576683572869e-06, "loss": 0.7139, "step": 5496 }, { "epoch": 0.1684749295083977, "grad_norm": 1.7174080177307827, "learning_rate": 9.50555165954241e-06, "loss": 0.7454, "step": 5497 }, { "epoch": 0.1685055780311389, "grad_norm": 2.0026269410067625, "learning_rate": 9.505336438961778e-06, "loss": 0.8025, "step": 5498 }, { "epoch": 0.1685362265538801, "grad_norm": 1.8229852335675327, "learning_rate": 9.505121173988913e-06, "loss": 0.7698, "step": 5499 }, { "epoch": 0.1685668750766213, "grad_norm": 1.963248199644064, "learning_rate": 9.504905864625935e-06, "loss": 0.8709, "step": 5500 }, { "epoch": 0.1685975235993625, "grad_norm": 2.0342411519674646, "learning_rate": 9.50469051087497e-06, "loss": 0.7847, "step": 5501 }, { "epoch": 0.1686281721221037, "grad_norm": 1.9171392727648264, "learning_rate": 9.504475112738134e-06, "loss": 0.7535, "step": 5502 }, { "epoch": 0.16865882064484491, "grad_norm": 1.9497524925689589, "learning_rate": 9.504259670217553e-06, "loss": 0.7147, "step": 5503 }, { "epoch": 0.16868946916758612, "grad_norm": 1.014306441099802, "learning_rate": 9.50404418331535e-06, "loss": 0.509, "step": 5504 }, { "epoch": 0.16872011769032733, "grad_norm": 0.9017151082014905, "learning_rate": 9.503828652033647e-06, "loss": 0.5215, "step": 5505 }, { "epoch": 0.16875076621306853, "grad_norm": 2.176527382442755, "learning_rate": 9.503613076374568e-06, "loss": 0.7985, "step": 5506 }, { "epoch": 0.16878141473580974, "grad_norm": 0.8225099076132971, "learning_rate": 9.503397456340235e-06, "loss": 0.5104, "step": 5507 }, { "epoch": 0.16881206325855094, "grad_norm": 1.9182867337045713, "learning_rate": 9.503181791932777e-06, "loss": 0.7006, "step": 5508 }, { "epoch": 0.16884271178129215, "grad_norm": 0.9697791693037344, "learning_rate": 9.502966083154314e-06, "loss": 0.5158, "step": 5509 }, { "epoch": 0.16887336030403335, "grad_norm": 1.864158127601389, "learning_rate": 9.502750330006977e-06, "loss": 0.7072, "step": 5510 }, { "epoch": 0.16890400882677456, "grad_norm": 1.9278805134811938, "learning_rate": 9.502534532492889e-06, "loss": 0.7429, "step": 5511 }, { "epoch": 0.16893465734951577, "grad_norm": 1.9151665438495942, "learning_rate": 9.502318690614175e-06, "loss": 0.7053, "step": 5512 }, { "epoch": 0.16896530587225694, "grad_norm": 0.837025844295299, "learning_rate": 9.502102804372962e-06, "loss": 0.5084, "step": 5513 }, { "epoch": 0.16899595439499815, "grad_norm": 1.94000270621854, "learning_rate": 9.501886873771378e-06, "loss": 0.7184, "step": 5514 }, { "epoch": 0.16902660291773935, "grad_norm": 1.8176772579204366, "learning_rate": 9.501670898811552e-06, "loss": 0.7053, "step": 5515 }, { "epoch": 0.16905725144048056, "grad_norm": 2.3309158674831782, "learning_rate": 9.50145487949561e-06, "loss": 0.7277, "step": 5516 }, { "epoch": 0.16908789996322177, "grad_norm": 1.8520655466656628, "learning_rate": 9.501238815825684e-06, "loss": 0.7541, "step": 5517 }, { "epoch": 0.16911854848596297, "grad_norm": 0.8879078159735497, "learning_rate": 9.501022707803898e-06, "loss": 0.5034, "step": 5518 }, { "epoch": 0.16914919700870418, "grad_norm": 1.6474204648484125, "learning_rate": 9.500806555432384e-06, "loss": 0.7156, "step": 5519 }, { "epoch": 0.16917984553144538, "grad_norm": 1.7630416718405577, "learning_rate": 9.50059035871327e-06, "loss": 0.6972, "step": 5520 }, { "epoch": 0.1692104940541866, "grad_norm": 1.8216747797102113, "learning_rate": 9.500374117648689e-06, "loss": 0.6855, "step": 5521 }, { "epoch": 0.1692411425769278, "grad_norm": 1.8900442752918787, "learning_rate": 9.500157832240772e-06, "loss": 0.6696, "step": 5522 }, { "epoch": 0.169271791099669, "grad_norm": 1.695197679849267, "learning_rate": 9.499941502491646e-06, "loss": 0.7182, "step": 5523 }, { "epoch": 0.1693024396224102, "grad_norm": 1.8285610294456798, "learning_rate": 9.499725128403446e-06, "loss": 0.6968, "step": 5524 }, { "epoch": 0.1693330881451514, "grad_norm": 2.167589468795949, "learning_rate": 9.499508709978303e-06, "loss": 0.7228, "step": 5525 }, { "epoch": 0.16936373666789262, "grad_norm": 1.7471382566500087, "learning_rate": 9.499292247218348e-06, "loss": 0.7641, "step": 5526 }, { "epoch": 0.16939438519063382, "grad_norm": 1.8683291548857413, "learning_rate": 9.499075740125719e-06, "loss": 0.794, "step": 5527 }, { "epoch": 0.16942503371337503, "grad_norm": 1.7403271739818917, "learning_rate": 9.498859188702541e-06, "loss": 0.7272, "step": 5528 }, { "epoch": 0.1694556822361162, "grad_norm": 1.6424977748424467, "learning_rate": 9.498642592950955e-06, "loss": 0.6736, "step": 5529 }, { "epoch": 0.1694863307588574, "grad_norm": 1.9045924335415325, "learning_rate": 9.498425952873092e-06, "loss": 0.666, "step": 5530 }, { "epoch": 0.16951697928159862, "grad_norm": 2.0705648583240945, "learning_rate": 9.498209268471089e-06, "loss": 0.7459, "step": 5531 }, { "epoch": 0.16954762780433982, "grad_norm": 1.9215409874614207, "learning_rate": 9.497992539747076e-06, "loss": 0.7211, "step": 5532 }, { "epoch": 0.16957827632708103, "grad_norm": 2.0207599330039026, "learning_rate": 9.497775766703193e-06, "loss": 0.7519, "step": 5533 }, { "epoch": 0.16960892484982223, "grad_norm": 0.9672937553044787, "learning_rate": 9.497558949341575e-06, "loss": 0.5101, "step": 5534 }, { "epoch": 0.16963957337256344, "grad_norm": 1.9043492890411755, "learning_rate": 9.497342087664355e-06, "loss": 0.779, "step": 5535 }, { "epoch": 0.16967022189530465, "grad_norm": 1.9911951628431142, "learning_rate": 9.497125181673676e-06, "loss": 0.8471, "step": 5536 }, { "epoch": 0.16970087041804585, "grad_norm": 1.8698186494112718, "learning_rate": 9.496908231371672e-06, "loss": 0.774, "step": 5537 }, { "epoch": 0.16973151894078706, "grad_norm": 0.8675126865583478, "learning_rate": 9.49669123676048e-06, "loss": 0.5386, "step": 5538 }, { "epoch": 0.16976216746352826, "grad_norm": 1.696544582245715, "learning_rate": 9.496474197842238e-06, "loss": 0.7006, "step": 5539 }, { "epoch": 0.16979281598626947, "grad_norm": 1.678144135959628, "learning_rate": 9.496257114619085e-06, "loss": 0.7355, "step": 5540 }, { "epoch": 0.16982346450901067, "grad_norm": 0.7959035891663752, "learning_rate": 9.496039987093162e-06, "loss": 0.4955, "step": 5541 }, { "epoch": 0.16985411303175188, "grad_norm": 1.8337893751494705, "learning_rate": 9.495822815266605e-06, "loss": 0.6581, "step": 5542 }, { "epoch": 0.16988476155449309, "grad_norm": 1.744607630730739, "learning_rate": 9.495605599141555e-06, "loss": 0.8055, "step": 5543 }, { "epoch": 0.16991541007723426, "grad_norm": 1.89154177690632, "learning_rate": 9.495388338720155e-06, "loss": 0.6705, "step": 5544 }, { "epoch": 0.16994605859997547, "grad_norm": 1.90296685520996, "learning_rate": 9.495171034004542e-06, "loss": 0.7413, "step": 5545 }, { "epoch": 0.16997670712271667, "grad_norm": 0.8426752267403551, "learning_rate": 9.494953684996859e-06, "loss": 0.4955, "step": 5546 }, { "epoch": 0.17000735564545788, "grad_norm": 2.0434655123596737, "learning_rate": 9.494736291699247e-06, "loss": 0.7425, "step": 5547 }, { "epoch": 0.1700380041681991, "grad_norm": 1.7863756488392646, "learning_rate": 9.49451885411385e-06, "loss": 0.7577, "step": 5548 }, { "epoch": 0.1700686526909403, "grad_norm": 1.7339654887271438, "learning_rate": 9.494301372242807e-06, "loss": 0.78, "step": 5549 }, { "epoch": 0.1700993012136815, "grad_norm": 1.6646715923958633, "learning_rate": 9.494083846088263e-06, "loss": 0.777, "step": 5550 }, { "epoch": 0.1701299497364227, "grad_norm": 1.9488066058810407, "learning_rate": 9.493866275652359e-06, "loss": 0.678, "step": 5551 }, { "epoch": 0.1701605982591639, "grad_norm": 2.2094140399298214, "learning_rate": 9.493648660937244e-06, "loss": 0.7822, "step": 5552 }, { "epoch": 0.17019124678190511, "grad_norm": 1.9288883293173522, "learning_rate": 9.493431001945056e-06, "loss": 0.7217, "step": 5553 }, { "epoch": 0.17022189530464632, "grad_norm": 0.9059222004483666, "learning_rate": 9.493213298677945e-06, "loss": 0.5023, "step": 5554 }, { "epoch": 0.17025254382738753, "grad_norm": 1.9591856020246328, "learning_rate": 9.492995551138054e-06, "loss": 0.7661, "step": 5555 }, { "epoch": 0.17028319235012873, "grad_norm": 1.9901915127765635, "learning_rate": 9.492777759327528e-06, "loss": 0.7789, "step": 5556 }, { "epoch": 0.17031384087286994, "grad_norm": 0.8265248570277627, "learning_rate": 9.492559923248512e-06, "loss": 0.5174, "step": 5557 }, { "epoch": 0.17034448939561114, "grad_norm": 1.8175697528866224, "learning_rate": 9.492342042903153e-06, "loss": 0.7992, "step": 5558 }, { "epoch": 0.17037513791835235, "grad_norm": 1.7026369830287142, "learning_rate": 9.4921241182936e-06, "loss": 0.798, "step": 5559 }, { "epoch": 0.17040578644109353, "grad_norm": 1.8365765355718582, "learning_rate": 9.491906149421998e-06, "loss": 0.8357, "step": 5560 }, { "epoch": 0.17043643496383473, "grad_norm": 1.8689410202208092, "learning_rate": 9.491688136290496e-06, "loss": 0.6387, "step": 5561 }, { "epoch": 0.17046708348657594, "grad_norm": 0.9449054412905855, "learning_rate": 9.491470078901241e-06, "loss": 0.4911, "step": 5562 }, { "epoch": 0.17049773200931714, "grad_norm": 0.8381348412995975, "learning_rate": 9.491251977256383e-06, "loss": 0.5083, "step": 5563 }, { "epoch": 0.17052838053205835, "grad_norm": 1.7810244193928708, "learning_rate": 9.49103383135807e-06, "loss": 0.7586, "step": 5564 }, { "epoch": 0.17055902905479955, "grad_norm": 1.6898728338878204, "learning_rate": 9.49081564120845e-06, "loss": 0.6828, "step": 5565 }, { "epoch": 0.17058967757754076, "grad_norm": 0.8472065990940847, "learning_rate": 9.490597406809676e-06, "loss": 0.4927, "step": 5566 }, { "epoch": 0.17062032610028197, "grad_norm": 1.8219561919154088, "learning_rate": 9.490379128163897e-06, "loss": 0.6322, "step": 5567 }, { "epoch": 0.17065097462302317, "grad_norm": 1.9702984858732568, "learning_rate": 9.490160805273262e-06, "loss": 0.7589, "step": 5568 }, { "epoch": 0.17068162314576438, "grad_norm": 1.8242034188121286, "learning_rate": 9.489942438139925e-06, "loss": 0.7625, "step": 5569 }, { "epoch": 0.17071227166850558, "grad_norm": 2.0241515052260404, "learning_rate": 9.489724026766037e-06, "loss": 0.8416, "step": 5570 }, { "epoch": 0.1707429201912468, "grad_norm": 1.9475342864203056, "learning_rate": 9.489505571153747e-06, "loss": 0.8376, "step": 5571 }, { "epoch": 0.170773568713988, "grad_norm": 1.036601714044455, "learning_rate": 9.489287071305212e-06, "loss": 0.5127, "step": 5572 }, { "epoch": 0.1708042172367292, "grad_norm": 1.7488910147395813, "learning_rate": 9.489068527222583e-06, "loss": 0.7965, "step": 5573 }, { "epoch": 0.1708348657594704, "grad_norm": 1.724361703104298, "learning_rate": 9.488849938908011e-06, "loss": 0.6521, "step": 5574 }, { "epoch": 0.17086551428221158, "grad_norm": 0.8340528362803336, "learning_rate": 9.488631306363654e-06, "loss": 0.5037, "step": 5575 }, { "epoch": 0.1708961628049528, "grad_norm": 1.8754462168799986, "learning_rate": 9.488412629591663e-06, "loss": 0.7815, "step": 5576 }, { "epoch": 0.170926811327694, "grad_norm": 1.728811572456509, "learning_rate": 9.488193908594195e-06, "loss": 0.6607, "step": 5577 }, { "epoch": 0.1709574598504352, "grad_norm": 2.0117586826140665, "learning_rate": 9.487975143373404e-06, "loss": 0.7497, "step": 5578 }, { "epoch": 0.1709881083731764, "grad_norm": 1.7285182378160624, "learning_rate": 9.487756333931446e-06, "loss": 0.7275, "step": 5579 }, { "epoch": 0.1710187568959176, "grad_norm": 1.7674552161953856, "learning_rate": 9.487537480270474e-06, "loss": 0.6322, "step": 5580 }, { "epoch": 0.17104940541865882, "grad_norm": 1.6736177809618296, "learning_rate": 9.48731858239265e-06, "loss": 0.7295, "step": 5581 }, { "epoch": 0.17108005394140002, "grad_norm": 1.822560446510806, "learning_rate": 9.487099640300126e-06, "loss": 0.7553, "step": 5582 }, { "epoch": 0.17111070246414123, "grad_norm": 2.1345117097523443, "learning_rate": 9.486880653995063e-06, "loss": 0.8613, "step": 5583 }, { "epoch": 0.17114135098688243, "grad_norm": 0.8920315851310969, "learning_rate": 9.486661623479616e-06, "loss": 0.5002, "step": 5584 }, { "epoch": 0.17117199950962364, "grad_norm": 2.1187480201999387, "learning_rate": 9.486442548755942e-06, "loss": 0.8078, "step": 5585 }, { "epoch": 0.17120264803236485, "grad_norm": 1.901177766714581, "learning_rate": 9.486223429826205e-06, "loss": 0.6658, "step": 5586 }, { "epoch": 0.17123329655510605, "grad_norm": 1.879590751797583, "learning_rate": 9.486004266692558e-06, "loss": 0.765, "step": 5587 }, { "epoch": 0.17126394507784726, "grad_norm": 2.525390745048308, "learning_rate": 9.485785059357166e-06, "loss": 0.7588, "step": 5588 }, { "epoch": 0.17129459360058846, "grad_norm": 1.7522281874614511, "learning_rate": 9.485565807822183e-06, "loss": 0.6594, "step": 5589 }, { "epoch": 0.17132524212332967, "grad_norm": 1.6764767456852814, "learning_rate": 9.485346512089775e-06, "loss": 0.8068, "step": 5590 }, { "epoch": 0.17135589064607085, "grad_norm": 1.6508874202991037, "learning_rate": 9.485127172162098e-06, "loss": 0.6005, "step": 5591 }, { "epoch": 0.17138653916881205, "grad_norm": 2.8408581208042616, "learning_rate": 9.484907788041318e-06, "loss": 0.8585, "step": 5592 }, { "epoch": 0.17141718769155326, "grad_norm": 1.9637616170384895, "learning_rate": 9.484688359729592e-06, "loss": 0.7038, "step": 5593 }, { "epoch": 0.17144783621429446, "grad_norm": 1.7715594285230847, "learning_rate": 9.484468887229085e-06, "loss": 0.6853, "step": 5594 }, { "epoch": 0.17147848473703567, "grad_norm": 1.7086918340714838, "learning_rate": 9.484249370541958e-06, "loss": 0.7313, "step": 5595 }, { "epoch": 0.17150913325977687, "grad_norm": 1.9223822514755264, "learning_rate": 9.484029809670377e-06, "loss": 0.7324, "step": 5596 }, { "epoch": 0.17153978178251808, "grad_norm": 1.6440911201073685, "learning_rate": 9.483810204616498e-06, "loss": 0.6379, "step": 5597 }, { "epoch": 0.17157043030525929, "grad_norm": 1.7309607334812827, "learning_rate": 9.483590555382493e-06, "loss": 0.7241, "step": 5598 }, { "epoch": 0.1716010788280005, "grad_norm": 1.880777608601573, "learning_rate": 9.483370861970525e-06, "loss": 0.7705, "step": 5599 }, { "epoch": 0.1716317273507417, "grad_norm": 1.5920856674930546, "learning_rate": 9.483151124382755e-06, "loss": 0.7603, "step": 5600 }, { "epoch": 0.1716623758734829, "grad_norm": 3.7281861204042115, "learning_rate": 9.48293134262135e-06, "loss": 0.6483, "step": 5601 }, { "epoch": 0.1716930243962241, "grad_norm": 1.7896933677079843, "learning_rate": 9.482711516688475e-06, "loss": 0.8072, "step": 5602 }, { "epoch": 0.17172367291896531, "grad_norm": 1.7706841892328256, "learning_rate": 9.482491646586297e-06, "loss": 0.7944, "step": 5603 }, { "epoch": 0.17175432144170652, "grad_norm": 2.0831252149922497, "learning_rate": 9.48227173231698e-06, "loss": 0.7471, "step": 5604 }, { "epoch": 0.17178496996444773, "grad_norm": 1.6076261783011339, "learning_rate": 9.482051773882695e-06, "loss": 0.6795, "step": 5605 }, { "epoch": 0.1718156184871889, "grad_norm": 1.951804480975737, "learning_rate": 9.481831771285606e-06, "loss": 0.7253, "step": 5606 }, { "epoch": 0.1718462670099301, "grad_norm": 1.0595576843982157, "learning_rate": 9.48161172452788e-06, "loss": 0.5331, "step": 5607 }, { "epoch": 0.17187691553267131, "grad_norm": 0.8913173549110113, "learning_rate": 9.481391633611689e-06, "loss": 0.4983, "step": 5608 }, { "epoch": 0.17190756405541252, "grad_norm": 2.270999841788014, "learning_rate": 9.4811714985392e-06, "loss": 0.6623, "step": 5609 }, { "epoch": 0.17193821257815373, "grad_norm": 2.7165255723475252, "learning_rate": 9.480951319312582e-06, "loss": 0.7475, "step": 5610 }, { "epoch": 0.17196886110089493, "grad_norm": 2.102811543989467, "learning_rate": 9.480731095934003e-06, "loss": 0.7229, "step": 5611 }, { "epoch": 0.17199950962363614, "grad_norm": 1.8112937830897908, "learning_rate": 9.480510828405636e-06, "loss": 0.7045, "step": 5612 }, { "epoch": 0.17203015814637734, "grad_norm": 1.8803285267077816, "learning_rate": 9.480290516729648e-06, "loss": 0.7522, "step": 5613 }, { "epoch": 0.17206080666911855, "grad_norm": 1.8650194371128652, "learning_rate": 9.480070160908212e-06, "loss": 0.7341, "step": 5614 }, { "epoch": 0.17209145519185975, "grad_norm": 1.9118596016851557, "learning_rate": 9.479849760943498e-06, "loss": 0.7449, "step": 5615 }, { "epoch": 0.17212210371460096, "grad_norm": 2.9802878913310966, "learning_rate": 9.479629316837676e-06, "loss": 0.7256, "step": 5616 }, { "epoch": 0.17215275223734217, "grad_norm": 1.662082224785129, "learning_rate": 9.479408828592923e-06, "loss": 0.7585, "step": 5617 }, { "epoch": 0.17218340076008337, "grad_norm": 1.9160705149652364, "learning_rate": 9.479188296211407e-06, "loss": 0.7936, "step": 5618 }, { "epoch": 0.17221404928282458, "grad_norm": 2.0440206185556535, "learning_rate": 9.478967719695303e-06, "loss": 0.7073, "step": 5619 }, { "epoch": 0.17224469780556578, "grad_norm": 1.9620773911857585, "learning_rate": 9.478747099046786e-06, "loss": 0.6686, "step": 5620 }, { "epoch": 0.172275346328307, "grad_norm": 1.4944159544388, "learning_rate": 9.478526434268026e-06, "loss": 0.5257, "step": 5621 }, { "epoch": 0.17230599485104817, "grad_norm": 1.738394245877667, "learning_rate": 9.478305725361198e-06, "loss": 0.6833, "step": 5622 }, { "epoch": 0.17233664337378937, "grad_norm": 1.0080459329460913, "learning_rate": 9.478084972328481e-06, "loss": 0.5106, "step": 5623 }, { "epoch": 0.17236729189653058, "grad_norm": 2.0001179114587164, "learning_rate": 9.477864175172044e-06, "loss": 0.7608, "step": 5624 }, { "epoch": 0.17239794041927178, "grad_norm": 1.565293584950513, "learning_rate": 9.477643333894067e-06, "loss": 0.7595, "step": 5625 }, { "epoch": 0.172428588942013, "grad_norm": 1.8085211965054093, "learning_rate": 9.477422448496724e-06, "loss": 0.7772, "step": 5626 }, { "epoch": 0.1724592374647542, "grad_norm": 1.219569404242758, "learning_rate": 9.477201518982193e-06, "loss": 0.5177, "step": 5627 }, { "epoch": 0.1724898859874954, "grad_norm": 2.0838874454152725, "learning_rate": 9.47698054535265e-06, "loss": 0.7372, "step": 5628 }, { "epoch": 0.1725205345102366, "grad_norm": 1.6521752745506797, "learning_rate": 9.47675952761027e-06, "loss": 0.7316, "step": 5629 }, { "epoch": 0.1725511830329778, "grad_norm": 2.242054650126467, "learning_rate": 9.476538465757236e-06, "loss": 0.6729, "step": 5630 }, { "epoch": 0.17258183155571902, "grad_norm": 1.795190666818639, "learning_rate": 9.47631735979572e-06, "loss": 0.7842, "step": 5631 }, { "epoch": 0.17261248007846022, "grad_norm": 1.8028045328048803, "learning_rate": 9.476096209727907e-06, "loss": 0.7107, "step": 5632 }, { "epoch": 0.17264312860120143, "grad_norm": 1.2901466031276299, "learning_rate": 9.47587501555597e-06, "loss": 0.5336, "step": 5633 }, { "epoch": 0.17267377712394263, "grad_norm": 1.0552267401706736, "learning_rate": 9.475653777282093e-06, "loss": 0.5046, "step": 5634 }, { "epoch": 0.17270442564668384, "grad_norm": 1.7458911655328584, "learning_rate": 9.475432494908454e-06, "loss": 0.6676, "step": 5635 }, { "epoch": 0.17273507416942505, "grad_norm": 2.2121640158650453, "learning_rate": 9.475211168437234e-06, "loss": 0.7809, "step": 5636 }, { "epoch": 0.17276572269216625, "grad_norm": 1.6785634953832562, "learning_rate": 9.474989797870611e-06, "loss": 0.5551, "step": 5637 }, { "epoch": 0.17279637121490743, "grad_norm": 1.7324797802519132, "learning_rate": 9.47476838321077e-06, "loss": 0.7316, "step": 5638 }, { "epoch": 0.17282701973764864, "grad_norm": 1.7567134972033513, "learning_rate": 9.474546924459892e-06, "loss": 0.7662, "step": 5639 }, { "epoch": 0.17285766826038984, "grad_norm": 2.0125655095687534, "learning_rate": 9.474325421620158e-06, "loss": 0.7997, "step": 5640 }, { "epoch": 0.17288831678313105, "grad_norm": 1.7819064675523584, "learning_rate": 9.47410387469375e-06, "loss": 0.7039, "step": 5641 }, { "epoch": 0.17291896530587225, "grad_norm": 1.754039963689201, "learning_rate": 9.473882283682852e-06, "loss": 0.9138, "step": 5642 }, { "epoch": 0.17294961382861346, "grad_norm": 1.7395684131263631, "learning_rate": 9.473660648589648e-06, "loss": 0.7124, "step": 5643 }, { "epoch": 0.17298026235135466, "grad_norm": 1.7891786688806783, "learning_rate": 9.47343896941632e-06, "loss": 0.7742, "step": 5644 }, { "epoch": 0.17301091087409587, "grad_norm": 1.7872151910760445, "learning_rate": 9.473217246165055e-06, "loss": 0.7795, "step": 5645 }, { "epoch": 0.17304155939683707, "grad_norm": 1.732046919326293, "learning_rate": 9.472995478838034e-06, "loss": 0.7635, "step": 5646 }, { "epoch": 0.17307220791957828, "grad_norm": 1.8313294630648098, "learning_rate": 9.472773667437444e-06, "loss": 0.7383, "step": 5647 }, { "epoch": 0.17310285644231949, "grad_norm": 1.782775267105498, "learning_rate": 9.47255181196547e-06, "loss": 0.7416, "step": 5648 }, { "epoch": 0.1731335049650607, "grad_norm": 2.23810907072418, "learning_rate": 9.4723299124243e-06, "loss": 0.8019, "step": 5649 }, { "epoch": 0.1731641534878019, "grad_norm": 1.7716707917989054, "learning_rate": 9.47210796881612e-06, "loss": 0.7511, "step": 5650 }, { "epoch": 0.1731948020105431, "grad_norm": 1.7913801103709626, "learning_rate": 9.471885981143114e-06, "loss": 0.7474, "step": 5651 }, { "epoch": 0.1732254505332843, "grad_norm": 1.5655199130623774, "learning_rate": 9.471663949407472e-06, "loss": 0.5391, "step": 5652 }, { "epoch": 0.1732560990560255, "grad_norm": 1.6409591034069644, "learning_rate": 9.471441873611382e-06, "loss": 0.6943, "step": 5653 }, { "epoch": 0.1732867475787667, "grad_norm": 2.06048143718854, "learning_rate": 9.47121975375703e-06, "loss": 0.7677, "step": 5654 }, { "epoch": 0.1733173961015079, "grad_norm": 1.7895606366421748, "learning_rate": 9.470997589846607e-06, "loss": 0.8348, "step": 5655 }, { "epoch": 0.1733480446242491, "grad_norm": 1.9782408963539468, "learning_rate": 9.4707753818823e-06, "loss": 0.7167, "step": 5656 }, { "epoch": 0.1733786931469903, "grad_norm": 1.9525568529364614, "learning_rate": 9.470553129866297e-06, "loss": 0.7803, "step": 5657 }, { "epoch": 0.17340934166973151, "grad_norm": 1.6807147446955009, "learning_rate": 9.470330833800794e-06, "loss": 0.7289, "step": 5658 }, { "epoch": 0.17343999019247272, "grad_norm": 1.0078984431385547, "learning_rate": 9.470108493687976e-06, "loss": 0.5127, "step": 5659 }, { "epoch": 0.17347063871521393, "grad_norm": 1.6804530456025093, "learning_rate": 9.469886109530034e-06, "loss": 0.7023, "step": 5660 }, { "epoch": 0.17350128723795513, "grad_norm": 1.923265257945296, "learning_rate": 9.469663681329161e-06, "loss": 0.7189, "step": 5661 }, { "epoch": 0.17353193576069634, "grad_norm": 1.893439206654434, "learning_rate": 9.469441209087549e-06, "loss": 0.7613, "step": 5662 }, { "epoch": 0.17356258428343754, "grad_norm": 1.8087184272340588, "learning_rate": 9.469218692807389e-06, "loss": 0.768, "step": 5663 }, { "epoch": 0.17359323280617875, "grad_norm": 1.782536913687053, "learning_rate": 9.468996132490874e-06, "loss": 0.8007, "step": 5664 }, { "epoch": 0.17362388132891995, "grad_norm": 1.6359487854624317, "learning_rate": 9.468773528140195e-06, "loss": 0.7161, "step": 5665 }, { "epoch": 0.17365452985166116, "grad_norm": 0.8770797796832163, "learning_rate": 9.46855087975755e-06, "loss": 0.504, "step": 5666 }, { "epoch": 0.17368517837440237, "grad_norm": 0.8528935343402324, "learning_rate": 9.468328187345128e-06, "loss": 0.5134, "step": 5667 }, { "epoch": 0.17371582689714357, "grad_norm": 1.907883058439203, "learning_rate": 9.468105450905125e-06, "loss": 0.7295, "step": 5668 }, { "epoch": 0.17374647541988475, "grad_norm": 2.154504080176605, "learning_rate": 9.467882670439736e-06, "loss": 0.7795, "step": 5669 }, { "epoch": 0.17377712394262596, "grad_norm": 1.6860584607169002, "learning_rate": 9.467659845951156e-06, "loss": 0.6398, "step": 5670 }, { "epoch": 0.17380777246536716, "grad_norm": 1.9633194133828853, "learning_rate": 9.46743697744158e-06, "loss": 0.8103, "step": 5671 }, { "epoch": 0.17383842098810837, "grad_norm": 3.310618644639345, "learning_rate": 9.467214064913205e-06, "loss": 0.6911, "step": 5672 }, { "epoch": 0.17386906951084957, "grad_norm": 1.7380345441304608, "learning_rate": 9.466991108368226e-06, "loss": 0.7571, "step": 5673 }, { "epoch": 0.17389971803359078, "grad_norm": 1.8787861412714502, "learning_rate": 9.466768107808842e-06, "loss": 0.7154, "step": 5674 }, { "epoch": 0.17393036655633198, "grad_norm": 2.1090923419103382, "learning_rate": 9.466545063237248e-06, "loss": 0.7576, "step": 5675 }, { "epoch": 0.1739610150790732, "grad_norm": 1.94899055841435, "learning_rate": 9.466321974655644e-06, "loss": 0.7556, "step": 5676 }, { "epoch": 0.1739916636018144, "grad_norm": 2.0312141467882845, "learning_rate": 9.466098842066224e-06, "loss": 0.7731, "step": 5677 }, { "epoch": 0.1740223121245556, "grad_norm": 1.9001155124647058, "learning_rate": 9.465875665471193e-06, "loss": 0.7618, "step": 5678 }, { "epoch": 0.1740529606472968, "grad_norm": 1.9193515474190415, "learning_rate": 9.465652444872744e-06, "loss": 0.6816, "step": 5679 }, { "epoch": 0.174083609170038, "grad_norm": 1.8244027229281947, "learning_rate": 9.46542918027308e-06, "loss": 0.6901, "step": 5680 }, { "epoch": 0.17411425769277922, "grad_norm": 1.7478453482610379, "learning_rate": 9.465205871674399e-06, "loss": 0.6531, "step": 5681 }, { "epoch": 0.17414490621552042, "grad_norm": 1.6271301644102623, "learning_rate": 9.464982519078903e-06, "loss": 0.723, "step": 5682 }, { "epoch": 0.17417555473826163, "grad_norm": 1.1109077111438201, "learning_rate": 9.46475912248879e-06, "loss": 0.4973, "step": 5683 }, { "epoch": 0.1742062032610028, "grad_norm": 2.008110275209393, "learning_rate": 9.464535681906264e-06, "loss": 0.7659, "step": 5684 }, { "epoch": 0.174236851783744, "grad_norm": 2.0178058861438086, "learning_rate": 9.464312197333526e-06, "loss": 0.7204, "step": 5685 }, { "epoch": 0.17426750030648522, "grad_norm": 0.8182564779178952, "learning_rate": 9.464088668772777e-06, "loss": 0.494, "step": 5686 }, { "epoch": 0.17429814882922642, "grad_norm": 1.8634150788638992, "learning_rate": 9.463865096226221e-06, "loss": 0.7544, "step": 5687 }, { "epoch": 0.17432879735196763, "grad_norm": 1.7694653967205287, "learning_rate": 9.46364147969606e-06, "loss": 0.7231, "step": 5688 }, { "epoch": 0.17435944587470883, "grad_norm": 1.8253642866151585, "learning_rate": 9.463417819184498e-06, "loss": 0.7556, "step": 5689 }, { "epoch": 0.17439009439745004, "grad_norm": 1.648285046814354, "learning_rate": 9.463194114693736e-06, "loss": 0.6993, "step": 5690 }, { "epoch": 0.17442074292019125, "grad_norm": 1.0175106398902356, "learning_rate": 9.462970366225983e-06, "loss": 0.5166, "step": 5691 }, { "epoch": 0.17445139144293245, "grad_norm": 1.8447598253121582, "learning_rate": 9.46274657378344e-06, "loss": 0.7492, "step": 5692 }, { "epoch": 0.17448203996567366, "grad_norm": 1.9478960125352531, "learning_rate": 9.462522737368311e-06, "loss": 0.7056, "step": 5693 }, { "epoch": 0.17451268848841486, "grad_norm": 1.8991166421133308, "learning_rate": 9.462298856982808e-06, "loss": 0.6617, "step": 5694 }, { "epoch": 0.17454333701115607, "grad_norm": 1.8237835800549111, "learning_rate": 9.46207493262913e-06, "loss": 0.6844, "step": 5695 }, { "epoch": 0.17457398553389727, "grad_norm": 1.895926334144448, "learning_rate": 9.461850964309485e-06, "loss": 0.8083, "step": 5696 }, { "epoch": 0.17460463405663848, "grad_norm": 1.810099581271447, "learning_rate": 9.461626952026083e-06, "loss": 0.691, "step": 5697 }, { "epoch": 0.17463528257937969, "grad_norm": 1.7164126427140733, "learning_rate": 9.46140289578113e-06, "loss": 0.7643, "step": 5698 }, { "epoch": 0.1746659311021209, "grad_norm": 1.8493599413449915, "learning_rate": 9.461178795576829e-06, "loss": 0.7551, "step": 5699 }, { "epoch": 0.17469657962486207, "grad_norm": 1.9224785992158577, "learning_rate": 9.460954651415395e-06, "loss": 0.6596, "step": 5700 }, { "epoch": 0.17472722814760328, "grad_norm": 1.6739978184329856, "learning_rate": 9.460730463299032e-06, "loss": 0.7488, "step": 5701 }, { "epoch": 0.17475787667034448, "grad_norm": 1.7549680199934088, "learning_rate": 9.46050623122995e-06, "loss": 0.7266, "step": 5702 }, { "epoch": 0.1747885251930857, "grad_norm": 1.8869092030040246, "learning_rate": 9.46028195521036e-06, "loss": 0.8391, "step": 5703 }, { "epoch": 0.1748191737158269, "grad_norm": 1.9365671836017007, "learning_rate": 9.46005763524247e-06, "loss": 0.673, "step": 5704 }, { "epoch": 0.1748498222385681, "grad_norm": 1.7810581639376322, "learning_rate": 9.459833271328491e-06, "loss": 0.7477, "step": 5705 }, { "epoch": 0.1748804707613093, "grad_norm": 1.9651816792962449, "learning_rate": 9.459608863470635e-06, "loss": 0.8218, "step": 5706 }, { "epoch": 0.1749111192840505, "grad_norm": 0.974668901115887, "learning_rate": 9.45938441167111e-06, "loss": 0.5049, "step": 5707 }, { "epoch": 0.17494176780679171, "grad_norm": 1.9261307462517137, "learning_rate": 9.45915991593213e-06, "loss": 0.7834, "step": 5708 }, { "epoch": 0.17497241632953292, "grad_norm": 1.8228576801153493, "learning_rate": 9.458935376255907e-06, "loss": 0.7682, "step": 5709 }, { "epoch": 0.17500306485227413, "grad_norm": 1.6565656558492585, "learning_rate": 9.458710792644652e-06, "loss": 0.7401, "step": 5710 }, { "epoch": 0.17503371337501533, "grad_norm": 1.6861600650614865, "learning_rate": 9.45848616510058e-06, "loss": 0.5776, "step": 5711 }, { "epoch": 0.17506436189775654, "grad_norm": 1.8785862479466169, "learning_rate": 9.458261493625903e-06, "loss": 0.7302, "step": 5712 }, { "epoch": 0.17509501042049774, "grad_norm": 0.8720517184062498, "learning_rate": 9.458036778222833e-06, "loss": 0.5207, "step": 5713 }, { "epoch": 0.17512565894323895, "grad_norm": 1.9471812491046847, "learning_rate": 9.457812018893587e-06, "loss": 0.6655, "step": 5714 }, { "epoch": 0.17515630746598013, "grad_norm": 1.827370846207321, "learning_rate": 9.457587215640379e-06, "loss": 0.7788, "step": 5715 }, { "epoch": 0.17518695598872133, "grad_norm": 1.7859459719251893, "learning_rate": 9.457362368465424e-06, "loss": 0.7595, "step": 5716 }, { "epoch": 0.17521760451146254, "grad_norm": 0.7965323432242155, "learning_rate": 9.457137477370936e-06, "loss": 0.522, "step": 5717 }, { "epoch": 0.17524825303420374, "grad_norm": 1.7491988991682585, "learning_rate": 9.456912542359132e-06, "loss": 0.8001, "step": 5718 }, { "epoch": 0.17527890155694495, "grad_norm": 1.907090838101954, "learning_rate": 9.45668756343223e-06, "loss": 0.7132, "step": 5719 }, { "epoch": 0.17530955007968615, "grad_norm": 1.5985983888101893, "learning_rate": 9.456462540592442e-06, "loss": 0.6302, "step": 5720 }, { "epoch": 0.17534019860242736, "grad_norm": 1.9006113898705714, "learning_rate": 9.456237473841991e-06, "loss": 0.7369, "step": 5721 }, { "epoch": 0.17537084712516857, "grad_norm": 2.217326772664082, "learning_rate": 9.456012363183091e-06, "loss": 0.7961, "step": 5722 }, { "epoch": 0.17540149564790977, "grad_norm": 1.8036619024156408, "learning_rate": 9.455787208617962e-06, "loss": 0.7531, "step": 5723 }, { "epoch": 0.17543214417065098, "grad_norm": 1.9075384458434117, "learning_rate": 9.455562010148821e-06, "loss": 0.7936, "step": 5724 }, { "epoch": 0.17546279269339218, "grad_norm": 1.5886458510557788, "learning_rate": 9.455336767777888e-06, "loss": 0.6519, "step": 5725 }, { "epoch": 0.1754934412161334, "grad_norm": 1.7399761909356275, "learning_rate": 9.45511148150738e-06, "loss": 0.685, "step": 5726 }, { "epoch": 0.1755240897388746, "grad_norm": 1.0901201679087262, "learning_rate": 9.45488615133952e-06, "loss": 0.5066, "step": 5727 }, { "epoch": 0.1755547382616158, "grad_norm": 1.9006868968498747, "learning_rate": 9.454660777276528e-06, "loss": 0.7361, "step": 5728 }, { "epoch": 0.175585386784357, "grad_norm": 1.7845863131590807, "learning_rate": 9.454435359320622e-06, "loss": 0.7225, "step": 5729 }, { "epoch": 0.1756160353070982, "grad_norm": 0.8113712613035167, "learning_rate": 9.454209897474025e-06, "loss": 0.5131, "step": 5730 }, { "epoch": 0.1756466838298394, "grad_norm": 0.827095645517229, "learning_rate": 9.45398439173896e-06, "loss": 0.5036, "step": 5731 }, { "epoch": 0.1756773323525806, "grad_norm": 1.6342563460544417, "learning_rate": 9.453758842117645e-06, "loss": 0.7327, "step": 5732 }, { "epoch": 0.1757079808753218, "grad_norm": 0.8334942014634006, "learning_rate": 9.453533248612305e-06, "loss": 0.4961, "step": 5733 }, { "epoch": 0.175738629398063, "grad_norm": 1.7480244296604095, "learning_rate": 9.453307611225162e-06, "loss": 0.7513, "step": 5734 }, { "epoch": 0.1757692779208042, "grad_norm": 1.5123369590340752, "learning_rate": 9.45308192995844e-06, "loss": 0.7334, "step": 5735 }, { "epoch": 0.17579992644354542, "grad_norm": 2.022638334932778, "learning_rate": 9.452856204814364e-06, "loss": 0.7208, "step": 5736 }, { "epoch": 0.17583057496628662, "grad_norm": 1.84029326619947, "learning_rate": 9.452630435795155e-06, "loss": 0.7079, "step": 5737 }, { "epoch": 0.17586122348902783, "grad_norm": 1.668218577933512, "learning_rate": 9.452404622903039e-06, "loss": 0.6915, "step": 5738 }, { "epoch": 0.17589187201176903, "grad_norm": 0.8890738600091329, "learning_rate": 9.452178766140241e-06, "loss": 0.5195, "step": 5739 }, { "epoch": 0.17592252053451024, "grad_norm": 1.8680109347309453, "learning_rate": 9.451952865508986e-06, "loss": 0.7467, "step": 5740 }, { "epoch": 0.17595316905725145, "grad_norm": 1.7143289575964116, "learning_rate": 9.451726921011501e-06, "loss": 0.7499, "step": 5741 }, { "epoch": 0.17598381757999265, "grad_norm": 2.0626602435004435, "learning_rate": 9.451500932650014e-06, "loss": 0.812, "step": 5742 }, { "epoch": 0.17601446610273386, "grad_norm": 1.9012573876591592, "learning_rate": 9.451274900426746e-06, "loss": 0.7166, "step": 5743 }, { "epoch": 0.17604511462547506, "grad_norm": 1.7035906306306412, "learning_rate": 9.451048824343929e-06, "loss": 0.6817, "step": 5744 }, { "epoch": 0.17607576314821627, "grad_norm": 1.8638621908707322, "learning_rate": 9.450822704403788e-06, "loss": 0.7752, "step": 5745 }, { "epoch": 0.17610641167095745, "grad_norm": 1.6003726877449505, "learning_rate": 9.450596540608553e-06, "loss": 0.6392, "step": 5746 }, { "epoch": 0.17613706019369865, "grad_norm": 1.6529601051680127, "learning_rate": 9.450370332960452e-06, "loss": 0.7044, "step": 5747 }, { "epoch": 0.17616770871643986, "grad_norm": 0.9191211621205951, "learning_rate": 9.450144081461711e-06, "loss": 0.5197, "step": 5748 }, { "epoch": 0.17619835723918106, "grad_norm": 1.8345111203617988, "learning_rate": 9.449917786114564e-06, "loss": 0.6627, "step": 5749 }, { "epoch": 0.17622900576192227, "grad_norm": 1.8837104171824144, "learning_rate": 9.449691446921238e-06, "loss": 0.7072, "step": 5750 }, { "epoch": 0.17625965428466348, "grad_norm": 0.8359224169650957, "learning_rate": 9.449465063883964e-06, "loss": 0.4894, "step": 5751 }, { "epoch": 0.17629030280740468, "grad_norm": 1.8678176680049385, "learning_rate": 9.449238637004973e-06, "loss": 0.7649, "step": 5752 }, { "epoch": 0.1763209513301459, "grad_norm": 1.722350232307049, "learning_rate": 9.449012166286493e-06, "loss": 0.6888, "step": 5753 }, { "epoch": 0.1763515998528871, "grad_norm": 1.8521802576395903, "learning_rate": 9.44878565173076e-06, "loss": 0.7244, "step": 5754 }, { "epoch": 0.1763822483756283, "grad_norm": 1.788802200357734, "learning_rate": 9.448559093340003e-06, "loss": 0.6922, "step": 5755 }, { "epoch": 0.1764128968983695, "grad_norm": 2.026475708765547, "learning_rate": 9.448332491116454e-06, "loss": 0.8257, "step": 5756 }, { "epoch": 0.1764435454211107, "grad_norm": 1.9826421669401368, "learning_rate": 9.448105845062348e-06, "loss": 0.7097, "step": 5757 }, { "epoch": 0.17647419394385191, "grad_norm": 1.6058442454553261, "learning_rate": 9.447879155179916e-06, "loss": 0.7718, "step": 5758 }, { "epoch": 0.17650484246659312, "grad_norm": 2.043485300613341, "learning_rate": 9.447652421471394e-06, "loss": 0.6689, "step": 5759 }, { "epoch": 0.17653549098933433, "grad_norm": 1.7590226259778299, "learning_rate": 9.447425643939014e-06, "loss": 0.793, "step": 5760 }, { "epoch": 0.17656613951207553, "grad_norm": 1.8460788159725612, "learning_rate": 9.447198822585011e-06, "loss": 0.72, "step": 5761 }, { "epoch": 0.1765967880348167, "grad_norm": 1.878590516513209, "learning_rate": 9.44697195741162e-06, "loss": 0.7661, "step": 5762 }, { "epoch": 0.17662743655755792, "grad_norm": 0.8950772274981739, "learning_rate": 9.446745048421077e-06, "loss": 0.4869, "step": 5763 }, { "epoch": 0.17665808508029912, "grad_norm": 1.7756787091676256, "learning_rate": 9.446518095615618e-06, "loss": 0.7665, "step": 5764 }, { "epoch": 0.17668873360304033, "grad_norm": 1.9000531864856771, "learning_rate": 9.446291098997477e-06, "loss": 0.7793, "step": 5765 }, { "epoch": 0.17671938212578153, "grad_norm": 0.8787859751913973, "learning_rate": 9.446064058568894e-06, "loss": 0.4924, "step": 5766 }, { "epoch": 0.17675003064852274, "grad_norm": 1.6707331137080594, "learning_rate": 9.445836974332103e-06, "loss": 0.7315, "step": 5767 }, { "epoch": 0.17678067917126394, "grad_norm": 1.9418733296388548, "learning_rate": 9.445609846289342e-06, "loss": 0.8675, "step": 5768 }, { "epoch": 0.17681132769400515, "grad_norm": 2.287554131954926, "learning_rate": 9.44538267444285e-06, "loss": 0.8014, "step": 5769 }, { "epoch": 0.17684197621674635, "grad_norm": 1.8018527715932224, "learning_rate": 9.445155458794867e-06, "loss": 0.7484, "step": 5770 }, { "epoch": 0.17687262473948756, "grad_norm": 1.637121740953951, "learning_rate": 9.444928199347627e-06, "loss": 0.8071, "step": 5771 }, { "epoch": 0.17690327326222877, "grad_norm": 1.7014331525059323, "learning_rate": 9.444700896103373e-06, "loss": 0.7811, "step": 5772 }, { "epoch": 0.17693392178496997, "grad_norm": 1.9595151400722906, "learning_rate": 9.444473549064346e-06, "loss": 0.703, "step": 5773 }, { "epoch": 0.17696457030771118, "grad_norm": 1.6441382586565167, "learning_rate": 9.444246158232783e-06, "loss": 0.6767, "step": 5774 }, { "epoch": 0.17699521883045238, "grad_norm": 1.7715483824540856, "learning_rate": 9.444018723610925e-06, "loss": 0.7514, "step": 5775 }, { "epoch": 0.1770258673531936, "grad_norm": 1.8113219553116782, "learning_rate": 9.443791245201013e-06, "loss": 0.7127, "step": 5776 }, { "epoch": 0.17705651587593477, "grad_norm": 1.6823826241099253, "learning_rate": 9.443563723005288e-06, "loss": 0.7488, "step": 5777 }, { "epoch": 0.17708716439867597, "grad_norm": 2.1519720361651715, "learning_rate": 9.443336157025995e-06, "loss": 0.6278, "step": 5778 }, { "epoch": 0.17711781292141718, "grad_norm": 1.9314738508750795, "learning_rate": 9.443108547265375e-06, "loss": 0.7274, "step": 5779 }, { "epoch": 0.17714846144415838, "grad_norm": 1.9399494064439793, "learning_rate": 9.442880893725667e-06, "loss": 0.7324, "step": 5780 }, { "epoch": 0.1771791099668996, "grad_norm": 2.081476505273356, "learning_rate": 9.442653196409117e-06, "loss": 0.6347, "step": 5781 }, { "epoch": 0.1772097584896408, "grad_norm": 1.9561710542089297, "learning_rate": 9.44242545531797e-06, "loss": 0.819, "step": 5782 }, { "epoch": 0.177240407012382, "grad_norm": 1.8839511350435951, "learning_rate": 9.442197670454466e-06, "loss": 0.7583, "step": 5783 }, { "epoch": 0.1772710555351232, "grad_norm": 1.7881559514458165, "learning_rate": 9.441969841820853e-06, "loss": 0.6896, "step": 5784 }, { "epoch": 0.1773017040578644, "grad_norm": 1.6921306598464383, "learning_rate": 9.441741969419374e-06, "loss": 0.7519, "step": 5785 }, { "epoch": 0.17733235258060562, "grad_norm": 1.7406216780323696, "learning_rate": 9.441514053252276e-06, "loss": 0.7622, "step": 5786 }, { "epoch": 0.17736300110334682, "grad_norm": 1.84754985439868, "learning_rate": 9.441286093321803e-06, "loss": 0.7304, "step": 5787 }, { "epoch": 0.17739364962608803, "grad_norm": 1.773207354028045, "learning_rate": 9.441058089630201e-06, "loss": 0.7435, "step": 5788 }, { "epoch": 0.17742429814882923, "grad_norm": 1.7348983307309274, "learning_rate": 9.44083004217972e-06, "loss": 0.7167, "step": 5789 }, { "epoch": 0.17745494667157044, "grad_norm": 1.697935832040471, "learning_rate": 9.440601950972603e-06, "loss": 0.7077, "step": 5790 }, { "epoch": 0.17748559519431165, "grad_norm": 1.7519345724779718, "learning_rate": 9.440373816011097e-06, "loss": 0.7111, "step": 5791 }, { "epoch": 0.17751624371705285, "grad_norm": 1.86244096973403, "learning_rate": 9.440145637297453e-06, "loss": 0.7558, "step": 5792 }, { "epoch": 0.17754689223979403, "grad_norm": 1.7381317150683129, "learning_rate": 9.439917414833919e-06, "loss": 0.7619, "step": 5793 }, { "epoch": 0.17757754076253524, "grad_norm": 1.9197654254052978, "learning_rate": 9.43968914862274e-06, "loss": 0.7731, "step": 5794 }, { "epoch": 0.17760818928527644, "grad_norm": 1.6771565914169102, "learning_rate": 9.439460838666172e-06, "loss": 0.732, "step": 5795 }, { "epoch": 0.17763883780801765, "grad_norm": 1.8422831502969956, "learning_rate": 9.439232484966458e-06, "loss": 0.7412, "step": 5796 }, { "epoch": 0.17766948633075885, "grad_norm": 1.8128049759179174, "learning_rate": 9.439004087525849e-06, "loss": 0.7007, "step": 5797 }, { "epoch": 0.17770013485350006, "grad_norm": 1.0279292336439276, "learning_rate": 9.4387756463466e-06, "loss": 0.4943, "step": 5798 }, { "epoch": 0.17773078337624126, "grad_norm": 0.9657390024736109, "learning_rate": 9.438547161430957e-06, "loss": 0.5027, "step": 5799 }, { "epoch": 0.17776143189898247, "grad_norm": 0.8213963403381883, "learning_rate": 9.438318632781174e-06, "loss": 0.5125, "step": 5800 }, { "epoch": 0.17779208042172367, "grad_norm": 1.624050665816552, "learning_rate": 9.4380900603995e-06, "loss": 0.7315, "step": 5801 }, { "epoch": 0.17782272894446488, "grad_norm": 2.146100549297567, "learning_rate": 9.437861444288193e-06, "loss": 0.8071, "step": 5802 }, { "epoch": 0.1778533774672061, "grad_norm": 1.698076272359264, "learning_rate": 9.437632784449498e-06, "loss": 0.7273, "step": 5803 }, { "epoch": 0.1778840259899473, "grad_norm": 1.9517697054730918, "learning_rate": 9.437404080885673e-06, "loss": 0.7291, "step": 5804 }, { "epoch": 0.1779146745126885, "grad_norm": 2.014269851595732, "learning_rate": 9.437175333598971e-06, "loss": 0.7332, "step": 5805 }, { "epoch": 0.1779453230354297, "grad_norm": 1.7206998013096297, "learning_rate": 9.436946542591644e-06, "loss": 0.7391, "step": 5806 }, { "epoch": 0.1779759715581709, "grad_norm": 2.3020720158606767, "learning_rate": 9.436717707865948e-06, "loss": 0.8716, "step": 5807 }, { "epoch": 0.1780066200809121, "grad_norm": 2.0515013316860173, "learning_rate": 9.436488829424138e-06, "loss": 0.6837, "step": 5808 }, { "epoch": 0.1780372686036533, "grad_norm": 1.840107840106971, "learning_rate": 9.436259907268466e-06, "loss": 0.7369, "step": 5809 }, { "epoch": 0.1780679171263945, "grad_norm": 1.9964817958978358, "learning_rate": 9.436030941401192e-06, "loss": 0.6995, "step": 5810 }, { "epoch": 0.1780985656491357, "grad_norm": 1.6486556771487852, "learning_rate": 9.43580193182457e-06, "loss": 0.6285, "step": 5811 }, { "epoch": 0.1781292141718769, "grad_norm": 1.9204768571910427, "learning_rate": 9.435572878540857e-06, "loss": 0.6611, "step": 5812 }, { "epoch": 0.17815986269461812, "grad_norm": 1.80181079983011, "learning_rate": 9.435343781552308e-06, "loss": 0.6332, "step": 5813 }, { "epoch": 0.17819051121735932, "grad_norm": 1.8829445782025904, "learning_rate": 9.435114640861183e-06, "loss": 0.7841, "step": 5814 }, { "epoch": 0.17822115974010053, "grad_norm": 2.0202055203944043, "learning_rate": 9.434885456469739e-06, "loss": 0.6966, "step": 5815 }, { "epoch": 0.17825180826284173, "grad_norm": 1.708249615895977, "learning_rate": 9.434656228380233e-06, "loss": 0.7739, "step": 5816 }, { "epoch": 0.17828245678558294, "grad_norm": 1.8185136130993558, "learning_rate": 9.434426956594926e-06, "loss": 0.734, "step": 5817 }, { "epoch": 0.17831310530832414, "grad_norm": 1.9269190071897233, "learning_rate": 9.434197641116074e-06, "loss": 0.8191, "step": 5818 }, { "epoch": 0.17834375383106535, "grad_norm": 1.6579585711912772, "learning_rate": 9.433968281945939e-06, "loss": 0.8347, "step": 5819 }, { "epoch": 0.17837440235380655, "grad_norm": 1.8842667214275879, "learning_rate": 9.43373887908678e-06, "loss": 0.7757, "step": 5820 }, { "epoch": 0.17840505087654776, "grad_norm": 1.862754653015809, "learning_rate": 9.433509432540856e-06, "loss": 0.8586, "step": 5821 }, { "epoch": 0.17843569939928897, "grad_norm": 1.8249276408953818, "learning_rate": 9.433279942310431e-06, "loss": 0.79, "step": 5822 }, { "epoch": 0.17846634792203017, "grad_norm": 1.6630379813333447, "learning_rate": 9.433050408397763e-06, "loss": 0.596, "step": 5823 }, { "epoch": 0.17849699644477135, "grad_norm": 1.7142087636894647, "learning_rate": 9.432820830805116e-06, "loss": 0.7994, "step": 5824 }, { "epoch": 0.17852764496751256, "grad_norm": 1.3920370920686236, "learning_rate": 9.432591209534752e-06, "loss": 0.5252, "step": 5825 }, { "epoch": 0.17855829349025376, "grad_norm": 1.794202467788712, "learning_rate": 9.43236154458893e-06, "loss": 0.7274, "step": 5826 }, { "epoch": 0.17858894201299497, "grad_norm": 1.7804792596810841, "learning_rate": 9.432131835969918e-06, "loss": 0.7802, "step": 5827 }, { "epoch": 0.17861959053573617, "grad_norm": 1.8532558601856488, "learning_rate": 9.431902083679976e-06, "loss": 0.7878, "step": 5828 }, { "epoch": 0.17865023905847738, "grad_norm": 1.4008980277209808, "learning_rate": 9.43167228772137e-06, "loss": 0.5955, "step": 5829 }, { "epoch": 0.17868088758121858, "grad_norm": 0.8593681639325206, "learning_rate": 9.431442448096363e-06, "loss": 0.5076, "step": 5830 }, { "epoch": 0.1787115361039598, "grad_norm": 0.8499073296302261, "learning_rate": 9.431212564807217e-06, "loss": 0.5078, "step": 5831 }, { "epoch": 0.178742184626701, "grad_norm": 1.8997848063645866, "learning_rate": 9.430982637856202e-06, "loss": 0.8148, "step": 5832 }, { "epoch": 0.1787728331494422, "grad_norm": 1.8708416160741905, "learning_rate": 9.430752667245581e-06, "loss": 0.6323, "step": 5833 }, { "epoch": 0.1788034816721834, "grad_norm": 1.803201825024467, "learning_rate": 9.43052265297762e-06, "loss": 0.6891, "step": 5834 }, { "epoch": 0.1788341301949246, "grad_norm": 1.842901838003687, "learning_rate": 9.430292595054586e-06, "loss": 0.7656, "step": 5835 }, { "epoch": 0.17886477871766582, "grad_norm": 1.6228283274703377, "learning_rate": 9.430062493478746e-06, "loss": 0.7131, "step": 5836 }, { "epoch": 0.17889542724040702, "grad_norm": 1.6818408156257145, "learning_rate": 9.429832348252365e-06, "loss": 0.7484, "step": 5837 }, { "epoch": 0.17892607576314823, "grad_norm": 1.833636850374293, "learning_rate": 9.429602159377715e-06, "loss": 0.7903, "step": 5838 }, { "epoch": 0.1789567242858894, "grad_norm": 1.8002980059140181, "learning_rate": 9.42937192685706e-06, "loss": 0.6904, "step": 5839 }, { "epoch": 0.1789873728086306, "grad_norm": 1.1618374484895906, "learning_rate": 9.42914165069267e-06, "loss": 0.5208, "step": 5840 }, { "epoch": 0.17901802133137182, "grad_norm": 1.7529790766707414, "learning_rate": 9.428911330886816e-06, "loss": 0.7528, "step": 5841 }, { "epoch": 0.17904866985411302, "grad_norm": 1.7631926354186496, "learning_rate": 9.428680967441764e-06, "loss": 0.7405, "step": 5842 }, { "epoch": 0.17907931837685423, "grad_norm": 1.6881117719227696, "learning_rate": 9.428450560359786e-06, "loss": 0.7177, "step": 5843 }, { "epoch": 0.17910996689959544, "grad_norm": 0.8576333463548007, "learning_rate": 9.428220109643149e-06, "loss": 0.4861, "step": 5844 }, { "epoch": 0.17914061542233664, "grad_norm": 1.5855762403069071, "learning_rate": 9.427989615294128e-06, "loss": 0.6883, "step": 5845 }, { "epoch": 0.17917126394507785, "grad_norm": 1.8791246739721876, "learning_rate": 9.427759077314993e-06, "loss": 0.7195, "step": 5846 }, { "epoch": 0.17920191246781905, "grad_norm": 1.6436413413208617, "learning_rate": 9.427528495708015e-06, "loss": 0.6737, "step": 5847 }, { "epoch": 0.17923256099056026, "grad_norm": 2.739527540491803, "learning_rate": 9.427297870475465e-06, "loss": 0.789, "step": 5848 }, { "epoch": 0.17926320951330146, "grad_norm": 1.7539723618598084, "learning_rate": 9.427067201619618e-06, "loss": 0.6749, "step": 5849 }, { "epoch": 0.17929385803604267, "grad_norm": 1.7666798832033093, "learning_rate": 9.426836489142743e-06, "loss": 0.8171, "step": 5850 }, { "epoch": 0.17932450655878387, "grad_norm": 2.287176571195317, "learning_rate": 9.426605733047116e-06, "loss": 0.7108, "step": 5851 }, { "epoch": 0.17935515508152508, "grad_norm": 1.6595303304547382, "learning_rate": 9.426374933335009e-06, "loss": 0.636, "step": 5852 }, { "epoch": 0.1793858036042663, "grad_norm": 1.7489518983703711, "learning_rate": 9.4261440900087e-06, "loss": 0.5999, "step": 5853 }, { "epoch": 0.1794164521270075, "grad_norm": 1.5584982831244998, "learning_rate": 9.42591320307046e-06, "loss": 0.643, "step": 5854 }, { "epoch": 0.17944710064974867, "grad_norm": 1.9325728443637875, "learning_rate": 9.425682272522562e-06, "loss": 0.7839, "step": 5855 }, { "epoch": 0.17947774917248988, "grad_norm": 2.0418863205894695, "learning_rate": 9.425451298367287e-06, "loss": 0.7307, "step": 5856 }, { "epoch": 0.17950839769523108, "grad_norm": 1.0376679606486359, "learning_rate": 9.425220280606908e-06, "loss": 0.5136, "step": 5857 }, { "epoch": 0.1795390462179723, "grad_norm": 1.8445126219758383, "learning_rate": 9.424989219243701e-06, "loss": 0.6974, "step": 5858 }, { "epoch": 0.1795696947407135, "grad_norm": 1.674247926387593, "learning_rate": 9.424758114279942e-06, "loss": 0.7051, "step": 5859 }, { "epoch": 0.1796003432634547, "grad_norm": 1.833805607481279, "learning_rate": 9.42452696571791e-06, "loss": 0.6951, "step": 5860 }, { "epoch": 0.1796309917861959, "grad_norm": 0.8669664978337449, "learning_rate": 9.424295773559882e-06, "loss": 0.4968, "step": 5861 }, { "epoch": 0.1796616403089371, "grad_norm": 1.9772877900384296, "learning_rate": 9.424064537808135e-06, "loss": 0.8199, "step": 5862 }, { "epoch": 0.17969228883167832, "grad_norm": 1.7833461603942777, "learning_rate": 9.42383325846495e-06, "loss": 0.7738, "step": 5863 }, { "epoch": 0.17972293735441952, "grad_norm": 1.9413549355091715, "learning_rate": 9.423601935532603e-06, "loss": 0.7832, "step": 5864 }, { "epoch": 0.17975358587716073, "grad_norm": 1.825302951449379, "learning_rate": 9.423370569013373e-06, "loss": 0.7821, "step": 5865 }, { "epoch": 0.17978423439990193, "grad_norm": 0.9008691673368242, "learning_rate": 9.423139158909542e-06, "loss": 0.5033, "step": 5866 }, { "epoch": 0.17981488292264314, "grad_norm": 1.748489111936005, "learning_rate": 9.42290770522339e-06, "loss": 0.7256, "step": 5867 }, { "epoch": 0.17984553144538434, "grad_norm": 1.8359770806050117, "learning_rate": 9.422676207957195e-06, "loss": 0.7589, "step": 5868 }, { "epoch": 0.17987617996812555, "grad_norm": 1.7336921697256225, "learning_rate": 9.422444667113239e-06, "loss": 0.735, "step": 5869 }, { "epoch": 0.17990682849086673, "grad_norm": 1.8536967967621325, "learning_rate": 9.422213082693807e-06, "loss": 0.7565, "step": 5870 }, { "epoch": 0.17993747701360793, "grad_norm": 0.9111016951447877, "learning_rate": 9.421981454701176e-06, "loss": 0.5203, "step": 5871 }, { "epoch": 0.17996812553634914, "grad_norm": 1.7870567166921574, "learning_rate": 9.421749783137632e-06, "loss": 0.8034, "step": 5872 }, { "epoch": 0.17999877405909034, "grad_norm": 1.5235667873245717, "learning_rate": 9.421518068005455e-06, "loss": 0.6519, "step": 5873 }, { "epoch": 0.18002942258183155, "grad_norm": 0.7877934145340947, "learning_rate": 9.42128630930693e-06, "loss": 0.4949, "step": 5874 }, { "epoch": 0.18006007110457276, "grad_norm": 1.6553215860885608, "learning_rate": 9.421054507044339e-06, "loss": 0.6168, "step": 5875 }, { "epoch": 0.18009071962731396, "grad_norm": 1.6675075946318538, "learning_rate": 9.420822661219966e-06, "loss": 0.8061, "step": 5876 }, { "epoch": 0.18012136815005517, "grad_norm": 2.0763552869000454, "learning_rate": 9.420590771836098e-06, "loss": 0.8351, "step": 5877 }, { "epoch": 0.18015201667279637, "grad_norm": 1.760104476054214, "learning_rate": 9.420358838895016e-06, "loss": 0.7627, "step": 5878 }, { "epoch": 0.18018266519553758, "grad_norm": 1.9044952113476528, "learning_rate": 9.420126862399008e-06, "loss": 0.6426, "step": 5879 }, { "epoch": 0.18021331371827878, "grad_norm": 1.7761179356669967, "learning_rate": 9.41989484235036e-06, "loss": 0.7657, "step": 5880 }, { "epoch": 0.18024396224102, "grad_norm": 1.7682850471853415, "learning_rate": 9.419662778751356e-06, "loss": 0.6504, "step": 5881 }, { "epoch": 0.1802746107637612, "grad_norm": 1.78466159021523, "learning_rate": 9.419430671604286e-06, "loss": 0.746, "step": 5882 }, { "epoch": 0.1803052592865024, "grad_norm": 1.900552099274463, "learning_rate": 9.419198520911433e-06, "loss": 0.7653, "step": 5883 }, { "epoch": 0.1803359078092436, "grad_norm": 2.0220287042761753, "learning_rate": 9.418966326675088e-06, "loss": 0.7916, "step": 5884 }, { "epoch": 0.1803665563319848, "grad_norm": 1.8954621996777128, "learning_rate": 9.418734088897534e-06, "loss": 0.8, "step": 5885 }, { "epoch": 0.180397204854726, "grad_norm": 1.701517641623818, "learning_rate": 9.418501807581065e-06, "loss": 0.7581, "step": 5886 }, { "epoch": 0.1804278533774672, "grad_norm": 0.9723197191331902, "learning_rate": 9.418269482727966e-06, "loss": 0.503, "step": 5887 }, { "epoch": 0.1804585019002084, "grad_norm": 0.8942186675342775, "learning_rate": 9.418037114340528e-06, "loss": 0.4657, "step": 5888 }, { "epoch": 0.1804891504229496, "grad_norm": 1.8216773164793205, "learning_rate": 9.41780470242104e-06, "loss": 0.7336, "step": 5889 }, { "epoch": 0.1805197989456908, "grad_norm": 1.7150182287798925, "learning_rate": 9.417572246971791e-06, "loss": 0.7963, "step": 5890 }, { "epoch": 0.18055044746843202, "grad_norm": 1.8288328899091826, "learning_rate": 9.417339747995074e-06, "loss": 0.7084, "step": 5891 }, { "epoch": 0.18058109599117322, "grad_norm": 1.697856111416525, "learning_rate": 9.417107205493177e-06, "loss": 0.8015, "step": 5892 }, { "epoch": 0.18061174451391443, "grad_norm": 1.7424036440705317, "learning_rate": 9.416874619468393e-06, "loss": 0.6258, "step": 5893 }, { "epoch": 0.18064239303665564, "grad_norm": 1.023700087824917, "learning_rate": 9.416641989923012e-06, "loss": 0.5004, "step": 5894 }, { "epoch": 0.18067304155939684, "grad_norm": 1.6553259592850358, "learning_rate": 9.41640931685933e-06, "loss": 0.7095, "step": 5895 }, { "epoch": 0.18070369008213805, "grad_norm": 1.8054364207843125, "learning_rate": 9.416176600279635e-06, "loss": 0.6849, "step": 5896 }, { "epoch": 0.18073433860487925, "grad_norm": 0.9629790063270299, "learning_rate": 9.41594384018622e-06, "loss": 0.4853, "step": 5897 }, { "epoch": 0.18076498712762046, "grad_norm": 0.820096691234335, "learning_rate": 9.415711036581385e-06, "loss": 0.4669, "step": 5898 }, { "epoch": 0.18079563565036166, "grad_norm": 1.8031337471508013, "learning_rate": 9.415478189467418e-06, "loss": 0.7105, "step": 5899 }, { "epoch": 0.18082628417310287, "grad_norm": 1.9692857915560416, "learning_rate": 9.415245298846614e-06, "loss": 0.7169, "step": 5900 }, { "epoch": 0.18085693269584405, "grad_norm": 2.05253403449661, "learning_rate": 9.415012364721267e-06, "loss": 0.8093, "step": 5901 }, { "epoch": 0.18088758121858525, "grad_norm": 0.9398405252202149, "learning_rate": 9.414779387093675e-06, "loss": 0.4952, "step": 5902 }, { "epoch": 0.18091822974132646, "grad_norm": 1.8780503261581183, "learning_rate": 9.414546365966133e-06, "loss": 0.7068, "step": 5903 }, { "epoch": 0.18094887826406766, "grad_norm": 2.007650527224393, "learning_rate": 9.414313301340936e-06, "loss": 0.8406, "step": 5904 }, { "epoch": 0.18097952678680887, "grad_norm": 0.9265484828219778, "learning_rate": 9.41408019322038e-06, "loss": 0.4999, "step": 5905 }, { "epoch": 0.18101017530955008, "grad_norm": 2.251518868719559, "learning_rate": 9.413847041606761e-06, "loss": 0.7935, "step": 5906 }, { "epoch": 0.18104082383229128, "grad_norm": 1.7515609352776633, "learning_rate": 9.413613846502379e-06, "loss": 0.7437, "step": 5907 }, { "epoch": 0.1810714723550325, "grad_norm": 2.030025331733851, "learning_rate": 9.41338060790953e-06, "loss": 0.7105, "step": 5908 }, { "epoch": 0.1811021208777737, "grad_norm": 1.5624696730941026, "learning_rate": 9.413147325830513e-06, "loss": 0.5705, "step": 5909 }, { "epoch": 0.1811327694005149, "grad_norm": 1.6730331528310387, "learning_rate": 9.412914000267626e-06, "loss": 0.7379, "step": 5910 }, { "epoch": 0.1811634179232561, "grad_norm": 1.924874718474037, "learning_rate": 9.41268063122317e-06, "loss": 0.8655, "step": 5911 }, { "epoch": 0.1811940664459973, "grad_norm": 1.4302058443312613, "learning_rate": 9.412447218699442e-06, "loss": 0.5174, "step": 5912 }, { "epoch": 0.18122471496873852, "grad_norm": 1.7962428938987272, "learning_rate": 9.41221376269874e-06, "loss": 0.8752, "step": 5913 }, { "epoch": 0.18125536349147972, "grad_norm": 1.6351270314175965, "learning_rate": 9.41198026322337e-06, "loss": 0.6943, "step": 5914 }, { "epoch": 0.18128601201422093, "grad_norm": 0.8212248861089475, "learning_rate": 9.411746720275628e-06, "loss": 0.5045, "step": 5915 }, { "epoch": 0.18131666053696213, "grad_norm": 1.9144234508007552, "learning_rate": 9.411513133857816e-06, "loss": 0.6778, "step": 5916 }, { "epoch": 0.1813473090597033, "grad_norm": 1.9787127661364483, "learning_rate": 9.411279503972239e-06, "loss": 0.7675, "step": 5917 }, { "epoch": 0.18137795758244452, "grad_norm": 1.6547472275215418, "learning_rate": 9.411045830621194e-06, "loss": 0.7603, "step": 5918 }, { "epoch": 0.18140860610518572, "grad_norm": 1.728444374853868, "learning_rate": 9.410812113806987e-06, "loss": 0.7526, "step": 5919 }, { "epoch": 0.18143925462792693, "grad_norm": 1.7644044786072728, "learning_rate": 9.41057835353192e-06, "loss": 0.6697, "step": 5920 }, { "epoch": 0.18146990315066813, "grad_norm": 1.9227866471595856, "learning_rate": 9.410344549798296e-06, "loss": 0.7824, "step": 5921 }, { "epoch": 0.18150055167340934, "grad_norm": 1.7397994517582958, "learning_rate": 9.410110702608418e-06, "loss": 0.8022, "step": 5922 }, { "epoch": 0.18153120019615054, "grad_norm": 1.9331807422829224, "learning_rate": 9.409876811964591e-06, "loss": 0.7754, "step": 5923 }, { "epoch": 0.18156184871889175, "grad_norm": 1.966337520318365, "learning_rate": 9.409642877869118e-06, "loss": 0.7455, "step": 5924 }, { "epoch": 0.18159249724163296, "grad_norm": 1.895823409938758, "learning_rate": 9.409408900324308e-06, "loss": 0.7007, "step": 5925 }, { "epoch": 0.18162314576437416, "grad_norm": 2.2630209204578042, "learning_rate": 9.409174879332463e-06, "loss": 0.7465, "step": 5926 }, { "epoch": 0.18165379428711537, "grad_norm": 2.019188641307699, "learning_rate": 9.408940814895889e-06, "loss": 0.7127, "step": 5927 }, { "epoch": 0.18168444280985657, "grad_norm": 1.6470928102020768, "learning_rate": 9.408706707016895e-06, "loss": 0.7403, "step": 5928 }, { "epoch": 0.18171509133259778, "grad_norm": 1.5823122173820414, "learning_rate": 9.408472555697783e-06, "loss": 0.742, "step": 5929 }, { "epoch": 0.18174573985533898, "grad_norm": 1.6627852223745696, "learning_rate": 9.408238360940864e-06, "loss": 0.7567, "step": 5930 }, { "epoch": 0.1817763883780802, "grad_norm": 1.6788507353882842, "learning_rate": 9.408004122748447e-06, "loss": 0.7445, "step": 5931 }, { "epoch": 0.18180703690082137, "grad_norm": 0.942548000525406, "learning_rate": 9.407769841122834e-06, "loss": 0.5153, "step": 5932 }, { "epoch": 0.18183768542356257, "grad_norm": 0.946038584291695, "learning_rate": 9.40753551606634e-06, "loss": 0.5044, "step": 5933 }, { "epoch": 0.18186833394630378, "grad_norm": 0.862710727285362, "learning_rate": 9.40730114758127e-06, "loss": 0.5147, "step": 5934 }, { "epoch": 0.18189898246904498, "grad_norm": 1.7546040312192377, "learning_rate": 9.407066735669931e-06, "loss": 0.7564, "step": 5935 }, { "epoch": 0.1819296309917862, "grad_norm": 1.822255124830463, "learning_rate": 9.40683228033464e-06, "loss": 0.7825, "step": 5936 }, { "epoch": 0.1819602795145274, "grad_norm": 1.929065066111895, "learning_rate": 9.4065977815777e-06, "loss": 0.7109, "step": 5937 }, { "epoch": 0.1819909280372686, "grad_norm": 2.067983857000198, "learning_rate": 9.406363239401427e-06, "loss": 0.7715, "step": 5938 }, { "epoch": 0.1820215765600098, "grad_norm": 1.9774966560245866, "learning_rate": 9.406128653808128e-06, "loss": 0.7473, "step": 5939 }, { "epoch": 0.182052225082751, "grad_norm": 1.8097886069702653, "learning_rate": 9.405894024800118e-06, "loss": 0.7327, "step": 5940 }, { "epoch": 0.18208287360549222, "grad_norm": 1.7713107435889393, "learning_rate": 9.405659352379704e-06, "loss": 0.6606, "step": 5941 }, { "epoch": 0.18211352212823342, "grad_norm": 1.8857476442591896, "learning_rate": 9.405424636549202e-06, "loss": 0.6404, "step": 5942 }, { "epoch": 0.18214417065097463, "grad_norm": 1.033573687794708, "learning_rate": 9.405189877310925e-06, "loss": 0.5085, "step": 5943 }, { "epoch": 0.18217481917371584, "grad_norm": 1.867701621220288, "learning_rate": 9.404955074667185e-06, "loss": 0.8281, "step": 5944 }, { "epoch": 0.18220546769645704, "grad_norm": 0.9193685023382324, "learning_rate": 9.404720228620294e-06, "loss": 0.4896, "step": 5945 }, { "epoch": 0.18223611621919825, "grad_norm": 0.8088833593442949, "learning_rate": 9.404485339172568e-06, "loss": 0.4976, "step": 5946 }, { "epoch": 0.18226676474193945, "grad_norm": 2.3315614967834173, "learning_rate": 9.404250406326323e-06, "loss": 0.6394, "step": 5947 }, { "epoch": 0.18229741326468063, "grad_norm": 2.0095454203586764, "learning_rate": 9.40401543008387e-06, "loss": 0.82, "step": 5948 }, { "epoch": 0.18232806178742184, "grad_norm": 2.0027629773638775, "learning_rate": 9.403780410447528e-06, "loss": 0.7078, "step": 5949 }, { "epoch": 0.18235871031016304, "grad_norm": 1.8490761625467476, "learning_rate": 9.40354534741961e-06, "loss": 0.7464, "step": 5950 }, { "epoch": 0.18238935883290425, "grad_norm": 2.2216842566096964, "learning_rate": 9.403310241002433e-06, "loss": 0.7619, "step": 5951 }, { "epoch": 0.18242000735564545, "grad_norm": 1.731643714918723, "learning_rate": 9.403075091198311e-06, "loss": 0.6995, "step": 5952 }, { "epoch": 0.18245065587838666, "grad_norm": 1.8918727722107291, "learning_rate": 9.402839898009566e-06, "loss": 0.7208, "step": 5953 }, { "epoch": 0.18248130440112786, "grad_norm": 1.8125472683795554, "learning_rate": 9.402604661438513e-06, "loss": 0.6763, "step": 5954 }, { "epoch": 0.18251195292386907, "grad_norm": 1.9885520880427772, "learning_rate": 9.40236938148747e-06, "loss": 0.722, "step": 5955 }, { "epoch": 0.18254260144661028, "grad_norm": 1.9421514615552873, "learning_rate": 9.402134058158753e-06, "loss": 0.7437, "step": 5956 }, { "epoch": 0.18257324996935148, "grad_norm": 1.640275569298694, "learning_rate": 9.401898691454686e-06, "loss": 0.7058, "step": 5957 }, { "epoch": 0.1826038984920927, "grad_norm": 1.863913232832668, "learning_rate": 9.401663281377583e-06, "loss": 0.6951, "step": 5958 }, { "epoch": 0.1826345470148339, "grad_norm": 1.9474825572424828, "learning_rate": 9.401427827929766e-06, "loss": 0.7391, "step": 5959 }, { "epoch": 0.1826651955375751, "grad_norm": 2.1537499225732706, "learning_rate": 9.401192331113553e-06, "loss": 0.8694, "step": 5960 }, { "epoch": 0.1826958440603163, "grad_norm": 1.864231698251316, "learning_rate": 9.400956790931268e-06, "loss": 0.7681, "step": 5961 }, { "epoch": 0.1827264925830575, "grad_norm": 1.7175570744464295, "learning_rate": 9.400721207385228e-06, "loss": 0.7682, "step": 5962 }, { "epoch": 0.1827571411057987, "grad_norm": 2.0077175519463624, "learning_rate": 9.400485580477757e-06, "loss": 0.7913, "step": 5963 }, { "epoch": 0.1827877896285399, "grad_norm": 1.786972682535072, "learning_rate": 9.400249910211176e-06, "loss": 0.7171, "step": 5964 }, { "epoch": 0.1828184381512811, "grad_norm": 1.9735110753304501, "learning_rate": 9.400014196587805e-06, "loss": 0.7776, "step": 5965 }, { "epoch": 0.1828490866740223, "grad_norm": 1.6968834090954898, "learning_rate": 9.39977843960997e-06, "loss": 0.809, "step": 5966 }, { "epoch": 0.1828797351967635, "grad_norm": 1.6375045860285529, "learning_rate": 9.399542639279992e-06, "loss": 0.5735, "step": 5967 }, { "epoch": 0.18291038371950472, "grad_norm": 1.7900332882378185, "learning_rate": 9.399306795600193e-06, "loss": 0.7773, "step": 5968 }, { "epoch": 0.18294103224224592, "grad_norm": 1.8602994650700393, "learning_rate": 9.399070908572902e-06, "loss": 0.7324, "step": 5969 }, { "epoch": 0.18297168076498713, "grad_norm": 1.9856513474175341, "learning_rate": 9.398834978200438e-06, "loss": 0.7566, "step": 5970 }, { "epoch": 0.18300232928772833, "grad_norm": 1.61716327841474, "learning_rate": 9.398599004485127e-06, "loss": 0.5258, "step": 5971 }, { "epoch": 0.18303297781046954, "grad_norm": 1.9883399255322698, "learning_rate": 9.398362987429294e-06, "loss": 0.8403, "step": 5972 }, { "epoch": 0.18306362633321074, "grad_norm": 1.722061875914933, "learning_rate": 9.398126927035267e-06, "loss": 0.6248, "step": 5973 }, { "epoch": 0.18309427485595195, "grad_norm": 1.7183736482247518, "learning_rate": 9.397890823305369e-06, "loss": 0.7619, "step": 5974 }, { "epoch": 0.18312492337869316, "grad_norm": 1.5766243236414756, "learning_rate": 9.397654676241927e-06, "loss": 0.6888, "step": 5975 }, { "epoch": 0.18315557190143436, "grad_norm": 1.8006138727167589, "learning_rate": 9.39741848584727e-06, "loss": 0.7703, "step": 5976 }, { "epoch": 0.18318622042417557, "grad_norm": 1.8917619515906081, "learning_rate": 9.397182252123722e-06, "loss": 0.8246, "step": 5977 }, { "epoch": 0.18321686894691677, "grad_norm": 1.6399151436312283, "learning_rate": 9.396945975073613e-06, "loss": 0.7043, "step": 5978 }, { "epoch": 0.18324751746965795, "grad_norm": 1.5773928795971084, "learning_rate": 9.39670965469927e-06, "loss": 0.6775, "step": 5979 }, { "epoch": 0.18327816599239916, "grad_norm": 1.6735389142931647, "learning_rate": 9.396473291003021e-06, "loss": 0.7435, "step": 5980 }, { "epoch": 0.18330881451514036, "grad_norm": 1.824882336206107, "learning_rate": 9.396236883987196e-06, "loss": 0.886, "step": 5981 }, { "epoch": 0.18333946303788157, "grad_norm": 1.7842283314587843, "learning_rate": 9.396000433654124e-06, "loss": 0.7072, "step": 5982 }, { "epoch": 0.18337011156062277, "grad_norm": 1.6962439181203603, "learning_rate": 9.395763940006136e-06, "loss": 0.7135, "step": 5983 }, { "epoch": 0.18340076008336398, "grad_norm": 1.2221809390410017, "learning_rate": 9.395527403045562e-06, "loss": 0.519, "step": 5984 }, { "epoch": 0.18343140860610518, "grad_norm": 2.195465596858276, "learning_rate": 9.395290822774729e-06, "loss": 0.8412, "step": 5985 }, { "epoch": 0.1834620571288464, "grad_norm": 1.970648775183849, "learning_rate": 9.395054199195974e-06, "loss": 0.768, "step": 5986 }, { "epoch": 0.1834927056515876, "grad_norm": 0.8603254859820966, "learning_rate": 9.394817532311625e-06, "loss": 0.4963, "step": 5987 }, { "epoch": 0.1835233541743288, "grad_norm": 2.0122668599031055, "learning_rate": 9.394580822124012e-06, "loss": 0.8297, "step": 5988 }, { "epoch": 0.18355400269707, "grad_norm": 0.8325015246783091, "learning_rate": 9.39434406863547e-06, "loss": 0.5079, "step": 5989 }, { "epoch": 0.1835846512198112, "grad_norm": 1.701202195799264, "learning_rate": 9.394107271848334e-06, "loss": 0.6605, "step": 5990 }, { "epoch": 0.18361529974255242, "grad_norm": 1.845093790440228, "learning_rate": 9.393870431764933e-06, "loss": 0.7094, "step": 5991 }, { "epoch": 0.18364594826529362, "grad_norm": 0.8540739898338569, "learning_rate": 9.393633548387603e-06, "loss": 0.4977, "step": 5992 }, { "epoch": 0.18367659678803483, "grad_norm": 0.8927996255042399, "learning_rate": 9.393396621718678e-06, "loss": 0.4874, "step": 5993 }, { "epoch": 0.183707245310776, "grad_norm": 1.8393562158022714, "learning_rate": 9.39315965176049e-06, "loss": 0.8619, "step": 5994 }, { "epoch": 0.1837378938335172, "grad_norm": 1.9121435488769403, "learning_rate": 9.392922638515379e-06, "loss": 0.7639, "step": 5995 }, { "epoch": 0.18376854235625842, "grad_norm": 1.7600825987508888, "learning_rate": 9.392685581985674e-06, "loss": 0.7146, "step": 5996 }, { "epoch": 0.18379919087899962, "grad_norm": 2.1332925373677325, "learning_rate": 9.392448482173717e-06, "loss": 0.7871, "step": 5997 }, { "epoch": 0.18382983940174083, "grad_norm": 1.8196935971884771, "learning_rate": 9.392211339081839e-06, "loss": 0.6258, "step": 5998 }, { "epoch": 0.18386048792448204, "grad_norm": 1.9166275866857498, "learning_rate": 9.39197415271238e-06, "loss": 0.7268, "step": 5999 }, { "epoch": 0.18389113644722324, "grad_norm": 2.153482561211565, "learning_rate": 9.391736923067675e-06, "loss": 0.6728, "step": 6000 }, { "epoch": 0.18392178496996445, "grad_norm": 1.8481849400439936, "learning_rate": 9.391499650150065e-06, "loss": 0.7346, "step": 6001 }, { "epoch": 0.18395243349270565, "grad_norm": 0.8947172521856843, "learning_rate": 9.391262333961883e-06, "loss": 0.5177, "step": 6002 }, { "epoch": 0.18398308201544686, "grad_norm": 2.1733519378103745, "learning_rate": 9.39102497450547e-06, "loss": 0.6121, "step": 6003 }, { "epoch": 0.18401373053818806, "grad_norm": 0.8330123935934787, "learning_rate": 9.390787571783165e-06, "loss": 0.4981, "step": 6004 }, { "epoch": 0.18404437906092927, "grad_norm": 1.7395049057079408, "learning_rate": 9.390550125797306e-06, "loss": 0.7018, "step": 6005 }, { "epoch": 0.18407502758367048, "grad_norm": 1.8229574602679626, "learning_rate": 9.390312636550232e-06, "loss": 0.7863, "step": 6006 }, { "epoch": 0.18410567610641168, "grad_norm": 0.8923606622012665, "learning_rate": 9.390075104044286e-06, "loss": 0.4969, "step": 6007 }, { "epoch": 0.1841363246291529, "grad_norm": 1.812353402005203, "learning_rate": 9.389837528281807e-06, "loss": 0.7106, "step": 6008 }, { "epoch": 0.1841669731518941, "grad_norm": 2.5663881819056904, "learning_rate": 9.389599909265135e-06, "loss": 0.7319, "step": 6009 }, { "epoch": 0.18419762167463527, "grad_norm": 1.969191516116712, "learning_rate": 9.389362246996611e-06, "loss": 0.759, "step": 6010 }, { "epoch": 0.18422827019737648, "grad_norm": 1.9052967649504766, "learning_rate": 9.38912454147858e-06, "loss": 0.7453, "step": 6011 }, { "epoch": 0.18425891872011768, "grad_norm": 1.742897634089683, "learning_rate": 9.38888679271338e-06, "loss": 0.7789, "step": 6012 }, { "epoch": 0.1842895672428589, "grad_norm": 1.836263897709223, "learning_rate": 9.388649000703357e-06, "loss": 0.7003, "step": 6013 }, { "epoch": 0.1843202157656001, "grad_norm": 1.982871954481266, "learning_rate": 9.38841116545085e-06, "loss": 0.729, "step": 6014 }, { "epoch": 0.1843508642883413, "grad_norm": 1.848857878751707, "learning_rate": 9.388173286958207e-06, "loss": 0.7119, "step": 6015 }, { "epoch": 0.1843815128110825, "grad_norm": 1.9974769163501627, "learning_rate": 9.387935365227769e-06, "loss": 0.7648, "step": 6016 }, { "epoch": 0.1844121613338237, "grad_norm": 1.5672422741060017, "learning_rate": 9.387697400261882e-06, "loss": 0.6647, "step": 6017 }, { "epoch": 0.18444280985656492, "grad_norm": 1.9365917585550299, "learning_rate": 9.38745939206289e-06, "loss": 0.7442, "step": 6018 }, { "epoch": 0.18447345837930612, "grad_norm": 1.9339704237058664, "learning_rate": 9.387221340633137e-06, "loss": 0.7594, "step": 6019 }, { "epoch": 0.18450410690204733, "grad_norm": 2.063422237191947, "learning_rate": 9.386983245974972e-06, "loss": 0.7134, "step": 6020 }, { "epoch": 0.18453475542478853, "grad_norm": 1.6004546343852848, "learning_rate": 9.386745108090736e-06, "loss": 0.7793, "step": 6021 }, { "epoch": 0.18456540394752974, "grad_norm": 0.9593048769648255, "learning_rate": 9.38650692698278e-06, "loss": 0.4913, "step": 6022 }, { "epoch": 0.18459605247027094, "grad_norm": 1.722149653791059, "learning_rate": 9.386268702653447e-06, "loss": 0.6793, "step": 6023 }, { "epoch": 0.18462670099301215, "grad_norm": 1.8741981610845067, "learning_rate": 9.386030435105085e-06, "loss": 0.7244, "step": 6024 }, { "epoch": 0.18465734951575333, "grad_norm": 1.669154339619704, "learning_rate": 9.385792124340045e-06, "loss": 0.7032, "step": 6025 }, { "epoch": 0.18468799803849453, "grad_norm": 2.0825856098066753, "learning_rate": 9.385553770360674e-06, "loss": 0.7057, "step": 6026 }, { "epoch": 0.18471864656123574, "grad_norm": 1.8955900094195834, "learning_rate": 9.385315373169319e-06, "loss": 0.7381, "step": 6027 }, { "epoch": 0.18474929508397694, "grad_norm": 1.8388494625354044, "learning_rate": 9.385076932768328e-06, "loss": 0.7832, "step": 6028 }, { "epoch": 0.18477994360671815, "grad_norm": 0.8389900675218156, "learning_rate": 9.384838449160055e-06, "loss": 0.4922, "step": 6029 }, { "epoch": 0.18481059212945936, "grad_norm": 1.670381625245891, "learning_rate": 9.384599922346843e-06, "loss": 0.7383, "step": 6030 }, { "epoch": 0.18484124065220056, "grad_norm": 2.156665148794103, "learning_rate": 9.384361352331048e-06, "loss": 0.723, "step": 6031 }, { "epoch": 0.18487188917494177, "grad_norm": 1.7447200961402756, "learning_rate": 9.38412273911502e-06, "loss": 0.7086, "step": 6032 }, { "epoch": 0.18490253769768297, "grad_norm": 1.8069365856982655, "learning_rate": 9.383884082701107e-06, "loss": 0.7797, "step": 6033 }, { "epoch": 0.18493318622042418, "grad_norm": 0.8577202690971957, "learning_rate": 9.383645383091663e-06, "loss": 0.5087, "step": 6034 }, { "epoch": 0.18496383474316538, "grad_norm": 0.821211010963006, "learning_rate": 9.383406640289041e-06, "loss": 0.4995, "step": 6035 }, { "epoch": 0.1849944832659066, "grad_norm": 2.0085648794720385, "learning_rate": 9.383167854295589e-06, "loss": 0.8597, "step": 6036 }, { "epoch": 0.1850251317886478, "grad_norm": 1.5845697014470403, "learning_rate": 9.382929025113665e-06, "loss": 0.7506, "step": 6037 }, { "epoch": 0.185055780311389, "grad_norm": 1.6257969812110467, "learning_rate": 9.38269015274562e-06, "loss": 0.68, "step": 6038 }, { "epoch": 0.1850864288341302, "grad_norm": 1.7937707249320836, "learning_rate": 9.382451237193806e-06, "loss": 0.6587, "step": 6039 }, { "epoch": 0.1851170773568714, "grad_norm": 1.7989994498252715, "learning_rate": 9.382212278460578e-06, "loss": 0.7346, "step": 6040 }, { "epoch": 0.1851477258796126, "grad_norm": 0.8402352495185432, "learning_rate": 9.381973276548292e-06, "loss": 0.5008, "step": 6041 }, { "epoch": 0.1851783744023538, "grad_norm": 1.825902516356122, "learning_rate": 9.381734231459303e-06, "loss": 0.7224, "step": 6042 }, { "epoch": 0.185209022925095, "grad_norm": 1.9263478979417714, "learning_rate": 9.381495143195966e-06, "loss": 0.7253, "step": 6043 }, { "epoch": 0.1852396714478362, "grad_norm": 0.833013181558193, "learning_rate": 9.381256011760635e-06, "loss": 0.4893, "step": 6044 }, { "epoch": 0.1852703199705774, "grad_norm": 1.856660696967754, "learning_rate": 9.381016837155668e-06, "loss": 0.7316, "step": 6045 }, { "epoch": 0.18530096849331862, "grad_norm": 1.8851575038984068, "learning_rate": 9.38077761938342e-06, "loss": 0.7643, "step": 6046 }, { "epoch": 0.18533161701605982, "grad_norm": 1.7398323282502746, "learning_rate": 9.380538358446252e-06, "loss": 0.7566, "step": 6047 }, { "epoch": 0.18536226553880103, "grad_norm": 1.6181466671112867, "learning_rate": 9.380299054346516e-06, "loss": 0.6961, "step": 6048 }, { "epoch": 0.18539291406154224, "grad_norm": 1.841349009315116, "learning_rate": 9.380059707086573e-06, "loss": 0.7834, "step": 6049 }, { "epoch": 0.18542356258428344, "grad_norm": 1.576530756529921, "learning_rate": 9.379820316668782e-06, "loss": 0.7523, "step": 6050 }, { "epoch": 0.18545421110702465, "grad_norm": 1.7352382740084888, "learning_rate": 9.379580883095501e-06, "loss": 0.6686, "step": 6051 }, { "epoch": 0.18548485962976585, "grad_norm": 1.8530065473565982, "learning_rate": 9.379341406369088e-06, "loss": 0.7085, "step": 6052 }, { "epoch": 0.18551550815250706, "grad_norm": 2.0386255814421075, "learning_rate": 9.379101886491907e-06, "loss": 0.6919, "step": 6053 }, { "epoch": 0.18554615667524826, "grad_norm": 1.7016237163685035, "learning_rate": 9.37886232346631e-06, "loss": 0.6356, "step": 6054 }, { "epoch": 0.18557680519798947, "grad_norm": 2.1893077156220984, "learning_rate": 9.378622717294665e-06, "loss": 0.7963, "step": 6055 }, { "epoch": 0.18560745372073065, "grad_norm": 2.0147253924027395, "learning_rate": 9.378383067979329e-06, "loss": 0.7637, "step": 6056 }, { "epoch": 0.18563810224347185, "grad_norm": 1.7836107723133456, "learning_rate": 9.378143375522664e-06, "loss": 0.7047, "step": 6057 }, { "epoch": 0.18566875076621306, "grad_norm": 1.7101469590991971, "learning_rate": 9.377903639927032e-06, "loss": 0.7188, "step": 6058 }, { "epoch": 0.18569939928895426, "grad_norm": 1.8014107871629048, "learning_rate": 9.377663861194795e-06, "loss": 0.8301, "step": 6059 }, { "epoch": 0.18573004781169547, "grad_norm": 1.8458966794731964, "learning_rate": 9.377424039328317e-06, "loss": 0.6922, "step": 6060 }, { "epoch": 0.18576069633443668, "grad_norm": 1.5422658453443157, "learning_rate": 9.37718417432996e-06, "loss": 0.7465, "step": 6061 }, { "epoch": 0.18579134485717788, "grad_norm": 1.6674315345610147, "learning_rate": 9.376944266202088e-06, "loss": 0.7306, "step": 6062 }, { "epoch": 0.1858219933799191, "grad_norm": 1.691178208778852, "learning_rate": 9.376704314947062e-06, "loss": 0.708, "step": 6063 }, { "epoch": 0.1858526419026603, "grad_norm": 1.788966744306337, "learning_rate": 9.376464320567251e-06, "loss": 0.7999, "step": 6064 }, { "epoch": 0.1858832904254015, "grad_norm": 1.9784248545658467, "learning_rate": 9.376224283065017e-06, "loss": 0.708, "step": 6065 }, { "epoch": 0.1859139389481427, "grad_norm": 1.7292984697107316, "learning_rate": 9.375984202442724e-06, "loss": 0.7885, "step": 6066 }, { "epoch": 0.1859445874708839, "grad_norm": 1.8290085882474019, "learning_rate": 9.37574407870274e-06, "loss": 0.7551, "step": 6067 }, { "epoch": 0.18597523599362512, "grad_norm": 0.9897211602697353, "learning_rate": 9.375503911847427e-06, "loss": 0.5038, "step": 6068 }, { "epoch": 0.18600588451636632, "grad_norm": 1.6120182012328828, "learning_rate": 9.375263701879158e-06, "loss": 0.7132, "step": 6069 }, { "epoch": 0.18603653303910753, "grad_norm": 1.9654345369053197, "learning_rate": 9.375023448800296e-06, "loss": 0.7666, "step": 6070 }, { "epoch": 0.18606718156184873, "grad_norm": 1.6762043152478683, "learning_rate": 9.374783152613206e-06, "loss": 0.7754, "step": 6071 }, { "epoch": 0.1860978300845899, "grad_norm": 1.548859820105742, "learning_rate": 9.374542813320261e-06, "loss": 0.6588, "step": 6072 }, { "epoch": 0.18612847860733112, "grad_norm": 1.6359812886937461, "learning_rate": 9.374302430923827e-06, "loss": 0.7415, "step": 6073 }, { "epoch": 0.18615912713007232, "grad_norm": 1.7689550867590582, "learning_rate": 9.37406200542627e-06, "loss": 0.7962, "step": 6074 }, { "epoch": 0.18618977565281353, "grad_norm": 1.7736617580175926, "learning_rate": 9.373821536829962e-06, "loss": 0.8057, "step": 6075 }, { "epoch": 0.18622042417555473, "grad_norm": 1.755750652376938, "learning_rate": 9.37358102513727e-06, "loss": 0.7844, "step": 6076 }, { "epoch": 0.18625107269829594, "grad_norm": 1.8544846709544396, "learning_rate": 9.373340470350567e-06, "loss": 0.7564, "step": 6077 }, { "epoch": 0.18628172122103714, "grad_norm": 1.9488230475650627, "learning_rate": 9.373099872472219e-06, "loss": 0.8228, "step": 6078 }, { "epoch": 0.18631236974377835, "grad_norm": 1.7492441910677607, "learning_rate": 9.3728592315046e-06, "loss": 0.7258, "step": 6079 }, { "epoch": 0.18634301826651956, "grad_norm": 0.8856880783147925, "learning_rate": 9.37261854745008e-06, "loss": 0.4849, "step": 6080 }, { "epoch": 0.18637366678926076, "grad_norm": 1.8066370642828071, "learning_rate": 9.372377820311032e-06, "loss": 0.7952, "step": 6081 }, { "epoch": 0.18640431531200197, "grad_norm": 2.0020768887726836, "learning_rate": 9.372137050089826e-06, "loss": 0.8071, "step": 6082 }, { "epoch": 0.18643496383474317, "grad_norm": 1.721043845979057, "learning_rate": 9.371896236788834e-06, "loss": 0.7752, "step": 6083 }, { "epoch": 0.18646561235748438, "grad_norm": 1.7902813011835992, "learning_rate": 9.37165538041043e-06, "loss": 0.6857, "step": 6084 }, { "epoch": 0.18649626088022558, "grad_norm": 1.70382728469193, "learning_rate": 9.371414480956988e-06, "loss": 0.7435, "step": 6085 }, { "epoch": 0.1865269094029668, "grad_norm": 1.6890227259203374, "learning_rate": 9.37117353843088e-06, "loss": 0.7258, "step": 6086 }, { "epoch": 0.18655755792570797, "grad_norm": 1.8834756565230149, "learning_rate": 9.37093255283448e-06, "loss": 0.6676, "step": 6087 }, { "epoch": 0.18658820644844917, "grad_norm": 1.994028523692978, "learning_rate": 9.370691524170166e-06, "loss": 0.7946, "step": 6088 }, { "epoch": 0.18661885497119038, "grad_norm": 1.8888748671077595, "learning_rate": 9.370450452440307e-06, "loss": 0.687, "step": 6089 }, { "epoch": 0.18664950349393158, "grad_norm": 1.622536653862141, "learning_rate": 9.370209337647282e-06, "loss": 0.6612, "step": 6090 }, { "epoch": 0.1866801520166728, "grad_norm": 1.9415217779691185, "learning_rate": 9.369968179793467e-06, "loss": 0.8009, "step": 6091 }, { "epoch": 0.186710800539414, "grad_norm": 1.599342104516634, "learning_rate": 9.369726978881237e-06, "loss": 0.6958, "step": 6092 }, { "epoch": 0.1867414490621552, "grad_norm": 2.0044331543305653, "learning_rate": 9.369485734912971e-06, "loss": 0.8118, "step": 6093 }, { "epoch": 0.1867720975848964, "grad_norm": 1.8486833206719242, "learning_rate": 9.369244447891041e-06, "loss": 0.6606, "step": 6094 }, { "epoch": 0.1868027461076376, "grad_norm": 1.8827300610216104, "learning_rate": 9.36900311781783e-06, "loss": 0.7611, "step": 6095 }, { "epoch": 0.18683339463037882, "grad_norm": 1.7335954522335493, "learning_rate": 9.368761744695711e-06, "loss": 0.6498, "step": 6096 }, { "epoch": 0.18686404315312002, "grad_norm": 1.9038276072333022, "learning_rate": 9.368520328527066e-06, "loss": 0.6848, "step": 6097 }, { "epoch": 0.18689469167586123, "grad_norm": 1.6820185075170795, "learning_rate": 9.368278869314274e-06, "loss": 0.7581, "step": 6098 }, { "epoch": 0.18692534019860244, "grad_norm": 2.0570538614580998, "learning_rate": 9.36803736705971e-06, "loss": 0.8138, "step": 6099 }, { "epoch": 0.18695598872134364, "grad_norm": 1.798685742305569, "learning_rate": 9.367795821765758e-06, "loss": 0.7717, "step": 6100 }, { "epoch": 0.18698663724408485, "grad_norm": 2.2954320611767707, "learning_rate": 9.367554233434795e-06, "loss": 0.7601, "step": 6101 }, { "epoch": 0.18701728576682605, "grad_norm": 1.7689933351695182, "learning_rate": 9.367312602069203e-06, "loss": 0.7895, "step": 6102 }, { "epoch": 0.18704793428956723, "grad_norm": 1.8000485858312056, "learning_rate": 9.367070927671361e-06, "loss": 0.7309, "step": 6103 }, { "epoch": 0.18707858281230844, "grad_norm": 1.7709806810000908, "learning_rate": 9.366829210243655e-06, "loss": 0.7808, "step": 6104 }, { "epoch": 0.18710923133504964, "grad_norm": 0.9310030585571947, "learning_rate": 9.366587449788463e-06, "loss": 0.5183, "step": 6105 }, { "epoch": 0.18713987985779085, "grad_norm": 1.848203804644696, "learning_rate": 9.366345646308165e-06, "loss": 0.7746, "step": 6106 }, { "epoch": 0.18717052838053205, "grad_norm": 1.798279340381654, "learning_rate": 9.366103799805148e-06, "loss": 0.8005, "step": 6107 }, { "epoch": 0.18720117690327326, "grad_norm": 1.7328179438743792, "learning_rate": 9.365861910281795e-06, "loss": 0.6666, "step": 6108 }, { "epoch": 0.18723182542601446, "grad_norm": 1.7324310063564776, "learning_rate": 9.365619977740484e-06, "loss": 0.7349, "step": 6109 }, { "epoch": 0.18726247394875567, "grad_norm": 1.9439283414558126, "learning_rate": 9.365378002183605e-06, "loss": 0.7251, "step": 6110 }, { "epoch": 0.18729312247149688, "grad_norm": 1.7222199916072451, "learning_rate": 9.365135983613537e-06, "loss": 0.7686, "step": 6111 }, { "epoch": 0.18732377099423808, "grad_norm": 0.969122359930575, "learning_rate": 9.36489392203267e-06, "loss": 0.5151, "step": 6112 }, { "epoch": 0.1873544195169793, "grad_norm": 1.8966023792653937, "learning_rate": 9.364651817443384e-06, "loss": 0.8182, "step": 6113 }, { "epoch": 0.1873850680397205, "grad_norm": 1.8373051616548042, "learning_rate": 9.364409669848069e-06, "loss": 0.687, "step": 6114 }, { "epoch": 0.1874157165624617, "grad_norm": 1.9391163905166362, "learning_rate": 9.364167479249108e-06, "loss": 0.7115, "step": 6115 }, { "epoch": 0.1874463650852029, "grad_norm": 0.8195443425408866, "learning_rate": 9.363925245648888e-06, "loss": 0.4889, "step": 6116 }, { "epoch": 0.1874770136079441, "grad_norm": 0.8384404345906605, "learning_rate": 9.363682969049797e-06, "loss": 0.5239, "step": 6117 }, { "epoch": 0.1875076621306853, "grad_norm": 1.8935740750223533, "learning_rate": 9.363440649454218e-06, "loss": 0.7449, "step": 6118 }, { "epoch": 0.1875383106534265, "grad_norm": 1.6960198792873649, "learning_rate": 9.363198286864545e-06, "loss": 0.7247, "step": 6119 }, { "epoch": 0.1875689591761677, "grad_norm": 0.8084156424576531, "learning_rate": 9.362955881283162e-06, "loss": 0.4831, "step": 6120 }, { "epoch": 0.1875996076989089, "grad_norm": 0.82052421049134, "learning_rate": 9.36271343271246e-06, "loss": 0.5038, "step": 6121 }, { "epoch": 0.1876302562216501, "grad_norm": 1.6423208088198396, "learning_rate": 9.362470941154825e-06, "loss": 0.7883, "step": 6122 }, { "epoch": 0.18766090474439132, "grad_norm": 1.7262668208335594, "learning_rate": 9.362228406612645e-06, "loss": 0.8193, "step": 6123 }, { "epoch": 0.18769155326713252, "grad_norm": 2.038993727489435, "learning_rate": 9.361985829088316e-06, "loss": 0.8212, "step": 6124 }, { "epoch": 0.18772220178987373, "grad_norm": 1.6777009561119822, "learning_rate": 9.361743208584223e-06, "loss": 0.6883, "step": 6125 }, { "epoch": 0.18775285031261493, "grad_norm": 1.785008210008594, "learning_rate": 9.36150054510276e-06, "loss": 0.7119, "step": 6126 }, { "epoch": 0.18778349883535614, "grad_norm": 1.7197690315895087, "learning_rate": 9.361257838646313e-06, "loss": 0.6982, "step": 6127 }, { "epoch": 0.18781414735809734, "grad_norm": 1.9213770060977193, "learning_rate": 9.361015089217277e-06, "loss": 0.8237, "step": 6128 }, { "epoch": 0.18784479588083855, "grad_norm": 1.676657033580465, "learning_rate": 9.360772296818046e-06, "loss": 0.7073, "step": 6129 }, { "epoch": 0.18787544440357976, "grad_norm": 1.687821946614801, "learning_rate": 9.360529461451009e-06, "loss": 0.7341, "step": 6130 }, { "epoch": 0.18790609292632096, "grad_norm": 1.7858395163733738, "learning_rate": 9.36028658311856e-06, "loss": 0.6904, "step": 6131 }, { "epoch": 0.18793674144906217, "grad_norm": 2.1194127646186605, "learning_rate": 9.360043661823089e-06, "loss": 0.7806, "step": 6132 }, { "epoch": 0.18796738997180337, "grad_norm": 1.829260013788456, "learning_rate": 9.359800697566994e-06, "loss": 0.7754, "step": 6133 }, { "epoch": 0.18799803849454455, "grad_norm": 1.9102897715286655, "learning_rate": 9.359557690352667e-06, "loss": 0.7567, "step": 6134 }, { "epoch": 0.18802868701728576, "grad_norm": 2.0319670364950895, "learning_rate": 9.359314640182504e-06, "loss": 0.7084, "step": 6135 }, { "epoch": 0.18805933554002696, "grad_norm": 1.6321504320814058, "learning_rate": 9.359071547058898e-06, "loss": 0.7208, "step": 6136 }, { "epoch": 0.18808998406276817, "grad_norm": 2.1877596171036355, "learning_rate": 9.358828410984244e-06, "loss": 0.6722, "step": 6137 }, { "epoch": 0.18812063258550937, "grad_norm": 1.5486497356568654, "learning_rate": 9.358585231960938e-06, "loss": 0.6409, "step": 6138 }, { "epoch": 0.18815128110825058, "grad_norm": 1.9047311224034482, "learning_rate": 9.358342009991377e-06, "loss": 0.753, "step": 6139 }, { "epoch": 0.18818192963099178, "grad_norm": 1.8046635471113661, "learning_rate": 9.358098745077957e-06, "loss": 0.6912, "step": 6140 }, { "epoch": 0.188212578153733, "grad_norm": 1.7540363410683188, "learning_rate": 9.357855437223075e-06, "loss": 0.7383, "step": 6141 }, { "epoch": 0.1882432266764742, "grad_norm": 1.7627870802166785, "learning_rate": 9.357612086429129e-06, "loss": 0.6523, "step": 6142 }, { "epoch": 0.1882738751992154, "grad_norm": 1.8886847121643149, "learning_rate": 9.357368692698515e-06, "loss": 0.7444, "step": 6143 }, { "epoch": 0.1883045237219566, "grad_norm": 1.7088892468576784, "learning_rate": 9.357125256033634e-06, "loss": 0.6785, "step": 6144 }, { "epoch": 0.1883351722446978, "grad_norm": 1.718408522359068, "learning_rate": 9.356881776436881e-06, "loss": 0.7541, "step": 6145 }, { "epoch": 0.18836582076743902, "grad_norm": 1.9688460726780437, "learning_rate": 9.356638253910659e-06, "loss": 0.7443, "step": 6146 }, { "epoch": 0.18839646929018022, "grad_norm": 1.7967535800114953, "learning_rate": 9.356394688457364e-06, "loss": 0.8145, "step": 6147 }, { "epoch": 0.18842711781292143, "grad_norm": 1.6436884151941373, "learning_rate": 9.356151080079399e-06, "loss": 0.6868, "step": 6148 }, { "epoch": 0.1884577663356626, "grad_norm": 1.7316689241750143, "learning_rate": 9.355907428779163e-06, "loss": 0.8046, "step": 6149 }, { "epoch": 0.1884884148584038, "grad_norm": 0.9439582089663613, "learning_rate": 9.355663734559055e-06, "loss": 0.4984, "step": 6150 }, { "epoch": 0.18851906338114502, "grad_norm": 1.7469018651059167, "learning_rate": 9.355419997421478e-06, "loss": 0.6527, "step": 6151 }, { "epoch": 0.18854971190388622, "grad_norm": 0.8912834046802529, "learning_rate": 9.355176217368833e-06, "loss": 0.4912, "step": 6152 }, { "epoch": 0.18858036042662743, "grad_norm": 0.8426714705971972, "learning_rate": 9.354932394403524e-06, "loss": 0.5008, "step": 6153 }, { "epoch": 0.18861100894936864, "grad_norm": 1.9443073634136752, "learning_rate": 9.354688528527952e-06, "loss": 0.6769, "step": 6154 }, { "epoch": 0.18864165747210984, "grad_norm": 2.0449156432800444, "learning_rate": 9.354444619744519e-06, "loss": 0.7719, "step": 6155 }, { "epoch": 0.18867230599485105, "grad_norm": 1.876589031273853, "learning_rate": 9.354200668055629e-06, "loss": 0.7807, "step": 6156 }, { "epoch": 0.18870295451759225, "grad_norm": 1.7195601401050298, "learning_rate": 9.353956673463684e-06, "loss": 0.6668, "step": 6157 }, { "epoch": 0.18873360304033346, "grad_norm": 0.9668967691941002, "learning_rate": 9.353712635971093e-06, "loss": 0.4906, "step": 6158 }, { "epoch": 0.18876425156307466, "grad_norm": 1.8820092509510584, "learning_rate": 9.353468555580256e-06, "loss": 0.6636, "step": 6159 }, { "epoch": 0.18879490008581587, "grad_norm": 2.601355231563424, "learning_rate": 9.353224432293578e-06, "loss": 0.6785, "step": 6160 }, { "epoch": 0.18882554860855708, "grad_norm": 0.8429937029479676, "learning_rate": 9.352980266113468e-06, "loss": 0.5069, "step": 6161 }, { "epoch": 0.18885619713129828, "grad_norm": 1.8389105157254575, "learning_rate": 9.352736057042329e-06, "loss": 0.7641, "step": 6162 }, { "epoch": 0.1888868456540395, "grad_norm": 1.8538665876946128, "learning_rate": 9.352491805082568e-06, "loss": 0.7278, "step": 6163 }, { "epoch": 0.1889174941767807, "grad_norm": 2.194001904812742, "learning_rate": 9.352247510236591e-06, "loss": 0.7429, "step": 6164 }, { "epoch": 0.18894814269952187, "grad_norm": 1.7558059018969299, "learning_rate": 9.352003172506807e-06, "loss": 0.7544, "step": 6165 }, { "epoch": 0.18897879122226308, "grad_norm": 1.7197599158390502, "learning_rate": 9.351758791895621e-06, "loss": 0.7103, "step": 6166 }, { "epoch": 0.18900943974500428, "grad_norm": 1.9246468788254343, "learning_rate": 9.351514368405442e-06, "loss": 0.7761, "step": 6167 }, { "epoch": 0.1890400882677455, "grad_norm": 1.628341487261947, "learning_rate": 9.35126990203868e-06, "loss": 0.7267, "step": 6168 }, { "epoch": 0.1890707367904867, "grad_norm": 1.7065184035349217, "learning_rate": 9.35102539279774e-06, "loss": 0.6718, "step": 6169 }, { "epoch": 0.1891013853132279, "grad_norm": 1.6568021133601194, "learning_rate": 9.350780840685036e-06, "loss": 0.7457, "step": 6170 }, { "epoch": 0.1891320338359691, "grad_norm": 1.8194901673301802, "learning_rate": 9.350536245702975e-06, "loss": 0.7875, "step": 6171 }, { "epoch": 0.1891626823587103, "grad_norm": 1.828642653900024, "learning_rate": 9.350291607853965e-06, "loss": 0.8707, "step": 6172 }, { "epoch": 0.18919333088145152, "grad_norm": 1.8628553152723695, "learning_rate": 9.350046927140422e-06, "loss": 0.7656, "step": 6173 }, { "epoch": 0.18922397940419272, "grad_norm": 1.7752964929036508, "learning_rate": 9.34980220356475e-06, "loss": 0.7352, "step": 6174 }, { "epoch": 0.18925462792693393, "grad_norm": 1.9859700783384453, "learning_rate": 9.349557437129366e-06, "loss": 0.739, "step": 6175 }, { "epoch": 0.18928527644967513, "grad_norm": 1.3855412841243013, "learning_rate": 9.34931262783668e-06, "loss": 0.5031, "step": 6176 }, { "epoch": 0.18931592497241634, "grad_norm": 1.6086710973147613, "learning_rate": 9.349067775689102e-06, "loss": 0.7677, "step": 6177 }, { "epoch": 0.18934657349515754, "grad_norm": 1.718972112039466, "learning_rate": 9.348822880689049e-06, "loss": 0.7431, "step": 6178 }, { "epoch": 0.18937722201789875, "grad_norm": 1.6106950612528455, "learning_rate": 9.34857794283893e-06, "loss": 0.6953, "step": 6179 }, { "epoch": 0.18940787054063993, "grad_norm": 1.7659008378294443, "learning_rate": 9.34833296214116e-06, "loss": 0.8316, "step": 6180 }, { "epoch": 0.18943851906338113, "grad_norm": 1.7018005444621682, "learning_rate": 9.348087938598153e-06, "loss": 0.7641, "step": 6181 }, { "epoch": 0.18946916758612234, "grad_norm": 1.1643154422012874, "learning_rate": 9.347842872212323e-06, "loss": 0.5009, "step": 6182 }, { "epoch": 0.18949981610886354, "grad_norm": 1.7396969621866485, "learning_rate": 9.347597762986085e-06, "loss": 0.7323, "step": 6183 }, { "epoch": 0.18953046463160475, "grad_norm": 1.646568612226513, "learning_rate": 9.347352610921853e-06, "loss": 0.6545, "step": 6184 }, { "epoch": 0.18956111315434596, "grad_norm": 0.8808753538992078, "learning_rate": 9.347107416022043e-06, "loss": 0.4903, "step": 6185 }, { "epoch": 0.18959176167708716, "grad_norm": 1.6814908905661965, "learning_rate": 9.346862178289073e-06, "loss": 0.7255, "step": 6186 }, { "epoch": 0.18962241019982837, "grad_norm": 1.8926781324860884, "learning_rate": 9.346616897725357e-06, "loss": 0.7358, "step": 6187 }, { "epoch": 0.18965305872256957, "grad_norm": 1.9972575738341245, "learning_rate": 9.346371574333312e-06, "loss": 0.851, "step": 6188 }, { "epoch": 0.18968370724531078, "grad_norm": 1.9953282136975272, "learning_rate": 9.346126208115358e-06, "loss": 0.7875, "step": 6189 }, { "epoch": 0.18971435576805198, "grad_norm": 1.9136719678036673, "learning_rate": 9.345880799073908e-06, "loss": 0.7145, "step": 6190 }, { "epoch": 0.1897450042907932, "grad_norm": 0.931861939477264, "learning_rate": 9.345635347211383e-06, "loss": 0.5106, "step": 6191 }, { "epoch": 0.1897756528135344, "grad_norm": 1.5917792329578744, "learning_rate": 9.345389852530201e-06, "loss": 0.7632, "step": 6192 }, { "epoch": 0.1898063013362756, "grad_norm": 1.7472817583206681, "learning_rate": 9.345144315032783e-06, "loss": 0.8247, "step": 6193 }, { "epoch": 0.1898369498590168, "grad_norm": 1.8251363474125493, "learning_rate": 9.344898734721544e-06, "loss": 0.7404, "step": 6194 }, { "epoch": 0.189867598381758, "grad_norm": 1.696202691107418, "learning_rate": 9.344653111598907e-06, "loss": 0.7319, "step": 6195 }, { "epoch": 0.1898982469044992, "grad_norm": 2.010549900096976, "learning_rate": 9.344407445667292e-06, "loss": 0.8267, "step": 6196 }, { "epoch": 0.1899288954272404, "grad_norm": 0.8192870111231415, "learning_rate": 9.344161736929116e-06, "loss": 0.5017, "step": 6197 }, { "epoch": 0.1899595439499816, "grad_norm": 0.9107771458572474, "learning_rate": 9.343915985386806e-06, "loss": 0.5153, "step": 6198 }, { "epoch": 0.1899901924727228, "grad_norm": 1.7677295137466806, "learning_rate": 9.34367019104278e-06, "loss": 0.7234, "step": 6199 }, { "epoch": 0.190020840995464, "grad_norm": 0.779491021979682, "learning_rate": 9.343424353899459e-06, "loss": 0.4934, "step": 6200 }, { "epoch": 0.19005148951820522, "grad_norm": 1.9187869647911355, "learning_rate": 9.343178473959266e-06, "loss": 0.7205, "step": 6201 }, { "epoch": 0.19008213804094642, "grad_norm": 1.6765333938715308, "learning_rate": 9.342932551224626e-06, "loss": 0.7143, "step": 6202 }, { "epoch": 0.19011278656368763, "grad_norm": 2.054705058560816, "learning_rate": 9.34268658569796e-06, "loss": 0.7427, "step": 6203 }, { "epoch": 0.19014343508642884, "grad_norm": 0.8680590196715495, "learning_rate": 9.34244057738169e-06, "loss": 0.4977, "step": 6204 }, { "epoch": 0.19017408360917004, "grad_norm": 1.708274171053454, "learning_rate": 9.342194526278243e-06, "loss": 0.7417, "step": 6205 }, { "epoch": 0.19020473213191125, "grad_norm": 1.799838635469959, "learning_rate": 9.341948432390044e-06, "loss": 0.6736, "step": 6206 }, { "epoch": 0.19023538065465245, "grad_norm": 1.6717666277967047, "learning_rate": 9.341702295719515e-06, "loss": 0.7409, "step": 6207 }, { "epoch": 0.19026602917739366, "grad_norm": 1.8382838771964893, "learning_rate": 9.341456116269084e-06, "loss": 0.7663, "step": 6208 }, { "epoch": 0.19029667770013486, "grad_norm": 1.816920907232303, "learning_rate": 9.341209894041173e-06, "loss": 0.7654, "step": 6209 }, { "epoch": 0.19032732622287607, "grad_norm": 1.7277701092692686, "learning_rate": 9.340963629038208e-06, "loss": 0.6896, "step": 6210 }, { "epoch": 0.19035797474561725, "grad_norm": 1.8391430685658927, "learning_rate": 9.340717321262622e-06, "loss": 0.8165, "step": 6211 }, { "epoch": 0.19038862326835845, "grad_norm": 1.9397876781098211, "learning_rate": 9.340470970716836e-06, "loss": 0.6658, "step": 6212 }, { "epoch": 0.19041927179109966, "grad_norm": 1.8282494856823142, "learning_rate": 9.340224577403278e-06, "loss": 0.7429, "step": 6213 }, { "epoch": 0.19044992031384086, "grad_norm": 1.7105608602451499, "learning_rate": 9.339978141324378e-06, "loss": 0.6955, "step": 6214 }, { "epoch": 0.19048056883658207, "grad_norm": 1.7738826287896317, "learning_rate": 9.339731662482564e-06, "loss": 0.6999, "step": 6215 }, { "epoch": 0.19051121735932328, "grad_norm": 2.5007929335233903, "learning_rate": 9.339485140880261e-06, "loss": 0.829, "step": 6216 }, { "epoch": 0.19054186588206448, "grad_norm": 1.9367657217238041, "learning_rate": 9.339238576519902e-06, "loss": 0.6985, "step": 6217 }, { "epoch": 0.1905725144048057, "grad_norm": 1.7265391949864866, "learning_rate": 9.338991969403914e-06, "loss": 0.7922, "step": 6218 }, { "epoch": 0.1906031629275469, "grad_norm": 1.5946656781701134, "learning_rate": 9.33874531953473e-06, "loss": 0.6496, "step": 6219 }, { "epoch": 0.1906338114502881, "grad_norm": 1.7730188029781184, "learning_rate": 9.338498626914776e-06, "loss": 0.7919, "step": 6220 }, { "epoch": 0.1906644599730293, "grad_norm": 1.9870790181781746, "learning_rate": 9.338251891546486e-06, "loss": 0.7969, "step": 6221 }, { "epoch": 0.1906951084957705, "grad_norm": 1.5785812173225584, "learning_rate": 9.33800511343229e-06, "loss": 0.6983, "step": 6222 }, { "epoch": 0.19072575701851172, "grad_norm": 1.6829722048023845, "learning_rate": 9.337758292574622e-06, "loss": 0.7752, "step": 6223 }, { "epoch": 0.19075640554125292, "grad_norm": 1.7125624991615183, "learning_rate": 9.337511428975908e-06, "loss": 0.7846, "step": 6224 }, { "epoch": 0.19078705406399413, "grad_norm": 1.7609618034108836, "learning_rate": 9.337264522638584e-06, "loss": 0.8537, "step": 6225 }, { "epoch": 0.19081770258673533, "grad_norm": 1.8368499801733593, "learning_rate": 9.337017573565086e-06, "loss": 0.768, "step": 6226 }, { "epoch": 0.1908483511094765, "grad_norm": 1.9432676881332314, "learning_rate": 9.336770581757844e-06, "loss": 0.7559, "step": 6227 }, { "epoch": 0.19087899963221772, "grad_norm": 1.9776956816408695, "learning_rate": 9.336523547219289e-06, "loss": 0.7813, "step": 6228 }, { "epoch": 0.19090964815495892, "grad_norm": 1.8699445138466688, "learning_rate": 9.33627646995186e-06, "loss": 0.7922, "step": 6229 }, { "epoch": 0.19094029667770013, "grad_norm": 1.1056146436261347, "learning_rate": 9.336029349957989e-06, "loss": 0.5206, "step": 6230 }, { "epoch": 0.19097094520044133, "grad_norm": 1.8579580393337352, "learning_rate": 9.335782187240111e-06, "loss": 0.6694, "step": 6231 }, { "epoch": 0.19100159372318254, "grad_norm": 1.737371313969081, "learning_rate": 9.335534981800662e-06, "loss": 0.7502, "step": 6232 }, { "epoch": 0.19103224224592374, "grad_norm": 1.7728162818200195, "learning_rate": 9.335287733642078e-06, "loss": 0.7715, "step": 6233 }, { "epoch": 0.19106289076866495, "grad_norm": 2.145221847890105, "learning_rate": 9.335040442766794e-06, "loss": 0.6896, "step": 6234 }, { "epoch": 0.19109353929140616, "grad_norm": 1.8236999649652217, "learning_rate": 9.334793109177248e-06, "loss": 0.7181, "step": 6235 }, { "epoch": 0.19112418781414736, "grad_norm": 1.8832138990125484, "learning_rate": 9.334545732875876e-06, "loss": 0.702, "step": 6236 }, { "epoch": 0.19115483633688857, "grad_norm": 1.8244180710085083, "learning_rate": 9.334298313865115e-06, "loss": 0.7763, "step": 6237 }, { "epoch": 0.19118548485962977, "grad_norm": 1.7633631005118275, "learning_rate": 9.334050852147404e-06, "loss": 0.7092, "step": 6238 }, { "epoch": 0.19121613338237098, "grad_norm": 1.7577861780001114, "learning_rate": 9.333803347725184e-06, "loss": 0.6978, "step": 6239 }, { "epoch": 0.19124678190511218, "grad_norm": 2.1044025619447795, "learning_rate": 9.333555800600888e-06, "loss": 0.7637, "step": 6240 }, { "epoch": 0.1912774304278534, "grad_norm": 1.648943182493622, "learning_rate": 9.333308210776959e-06, "loss": 0.8788, "step": 6241 }, { "epoch": 0.19130807895059457, "grad_norm": 1.7245882618941462, "learning_rate": 9.333060578255833e-06, "loss": 0.7327, "step": 6242 }, { "epoch": 0.19133872747333577, "grad_norm": 1.8344441855829603, "learning_rate": 9.332812903039954e-06, "loss": 0.7506, "step": 6243 }, { "epoch": 0.19136937599607698, "grad_norm": 1.747890701144689, "learning_rate": 9.332565185131762e-06, "loss": 0.7112, "step": 6244 }, { "epoch": 0.19140002451881818, "grad_norm": 1.7439334527558499, "learning_rate": 9.332317424533696e-06, "loss": 0.6214, "step": 6245 }, { "epoch": 0.1914306730415594, "grad_norm": 2.054532761916262, "learning_rate": 9.332069621248199e-06, "loss": 0.6803, "step": 6246 }, { "epoch": 0.1914613215643006, "grad_norm": 1.7382818191818101, "learning_rate": 9.33182177527771e-06, "loss": 0.7851, "step": 6247 }, { "epoch": 0.1914919700870418, "grad_norm": 1.665879293483115, "learning_rate": 9.331573886624672e-06, "loss": 0.626, "step": 6248 }, { "epoch": 0.191522618609783, "grad_norm": 1.9225199334434353, "learning_rate": 9.33132595529153e-06, "loss": 0.6566, "step": 6249 }, { "epoch": 0.1915532671325242, "grad_norm": 1.8328710915903794, "learning_rate": 9.331077981280724e-06, "loss": 0.7981, "step": 6250 }, { "epoch": 0.19158391565526542, "grad_norm": 1.8536834779868059, "learning_rate": 9.330829964594698e-06, "loss": 0.7098, "step": 6251 }, { "epoch": 0.19161456417800662, "grad_norm": 1.7684617232667736, "learning_rate": 9.330581905235898e-06, "loss": 0.6886, "step": 6252 }, { "epoch": 0.19164521270074783, "grad_norm": 1.1294117032601214, "learning_rate": 9.330333803206766e-06, "loss": 0.5055, "step": 6253 }, { "epoch": 0.19167586122348904, "grad_norm": 1.9150655787853286, "learning_rate": 9.330085658509747e-06, "loss": 0.7152, "step": 6254 }, { "epoch": 0.19170650974623024, "grad_norm": 1.9314951354709982, "learning_rate": 9.329837471147286e-06, "loss": 0.7192, "step": 6255 }, { "epoch": 0.19173715826897145, "grad_norm": 1.6137482424685203, "learning_rate": 9.329589241121828e-06, "loss": 0.7262, "step": 6256 }, { "epoch": 0.19176780679171265, "grad_norm": 1.7977042906576934, "learning_rate": 9.32934096843582e-06, "loss": 0.7805, "step": 6257 }, { "epoch": 0.19179845531445383, "grad_norm": 1.7006130479280352, "learning_rate": 9.329092653091708e-06, "loss": 0.7324, "step": 6258 }, { "epoch": 0.19182910383719504, "grad_norm": 1.6501029637133173, "learning_rate": 9.328844295091938e-06, "loss": 0.7011, "step": 6259 }, { "epoch": 0.19185975235993624, "grad_norm": 0.9525789244555942, "learning_rate": 9.328595894438958e-06, "loss": 0.4772, "step": 6260 }, { "epoch": 0.19189040088267745, "grad_norm": 1.7469168955446288, "learning_rate": 9.328347451135213e-06, "loss": 0.6175, "step": 6261 }, { "epoch": 0.19192104940541865, "grad_norm": 1.8783148213829208, "learning_rate": 9.328098965183157e-06, "loss": 0.7581, "step": 6262 }, { "epoch": 0.19195169792815986, "grad_norm": 1.4962088986137452, "learning_rate": 9.32785043658523e-06, "loss": 0.6554, "step": 6263 }, { "epoch": 0.19198234645090106, "grad_norm": 1.705812615511825, "learning_rate": 9.32760186534389e-06, "loss": 0.8057, "step": 6264 }, { "epoch": 0.19201299497364227, "grad_norm": 0.8953881854013317, "learning_rate": 9.327353251461578e-06, "loss": 0.4894, "step": 6265 }, { "epoch": 0.19204364349638348, "grad_norm": 1.7144414491838276, "learning_rate": 9.327104594940748e-06, "loss": 0.7204, "step": 6266 }, { "epoch": 0.19207429201912468, "grad_norm": 1.7900711956192217, "learning_rate": 9.326855895783851e-06, "loss": 0.7301, "step": 6267 }, { "epoch": 0.1921049405418659, "grad_norm": 1.6788678569983464, "learning_rate": 9.326607153993335e-06, "loss": 0.6953, "step": 6268 }, { "epoch": 0.1921355890646071, "grad_norm": 1.7271933743521204, "learning_rate": 9.32635836957165e-06, "loss": 0.6494, "step": 6269 }, { "epoch": 0.1921662375873483, "grad_norm": 1.7188696526839624, "learning_rate": 9.326109542521252e-06, "loss": 0.6635, "step": 6270 }, { "epoch": 0.1921968861100895, "grad_norm": 1.5848764788653016, "learning_rate": 9.325860672844586e-06, "loss": 0.6957, "step": 6271 }, { "epoch": 0.1922275346328307, "grad_norm": 1.7381068122718748, "learning_rate": 9.325611760544112e-06, "loss": 0.7456, "step": 6272 }, { "epoch": 0.1922581831555719, "grad_norm": 1.812134388646277, "learning_rate": 9.325362805622275e-06, "loss": 0.7479, "step": 6273 }, { "epoch": 0.1922888316783131, "grad_norm": 1.7933366588880884, "learning_rate": 9.325113808081535e-06, "loss": 0.7643, "step": 6274 }, { "epoch": 0.1923194802010543, "grad_norm": 1.6217649310534876, "learning_rate": 9.32486476792434e-06, "loss": 0.6646, "step": 6275 }, { "epoch": 0.1923501287237955, "grad_norm": 1.862208358016709, "learning_rate": 9.324615685153145e-06, "loss": 0.7349, "step": 6276 }, { "epoch": 0.1923807772465367, "grad_norm": 1.5827303289399204, "learning_rate": 9.324366559770406e-06, "loss": 0.6717, "step": 6277 }, { "epoch": 0.19241142576927792, "grad_norm": 1.766435905716033, "learning_rate": 9.324117391778577e-06, "loss": 0.7954, "step": 6278 }, { "epoch": 0.19244207429201912, "grad_norm": 1.7798089985592347, "learning_rate": 9.323868181180113e-06, "loss": 0.7182, "step": 6279 }, { "epoch": 0.19247272281476033, "grad_norm": 1.6911886346051506, "learning_rate": 9.32361892797747e-06, "loss": 0.8037, "step": 6280 }, { "epoch": 0.19250337133750153, "grad_norm": 1.8319638746275988, "learning_rate": 9.323369632173103e-06, "loss": 0.6758, "step": 6281 }, { "epoch": 0.19253401986024274, "grad_norm": 1.6615377700976435, "learning_rate": 9.323120293769468e-06, "loss": 0.7131, "step": 6282 }, { "epoch": 0.19256466838298394, "grad_norm": 1.760448321913327, "learning_rate": 9.322870912769024e-06, "loss": 0.7314, "step": 6283 }, { "epoch": 0.19259531690572515, "grad_norm": 1.7560068684139238, "learning_rate": 9.322621489174226e-06, "loss": 0.8262, "step": 6284 }, { "epoch": 0.19262596542846636, "grad_norm": 1.7132032143565334, "learning_rate": 9.322372022987533e-06, "loss": 0.7342, "step": 6285 }, { "epoch": 0.19265661395120756, "grad_norm": 1.661153784972312, "learning_rate": 9.322122514211402e-06, "loss": 0.6669, "step": 6286 }, { "epoch": 0.19268726247394877, "grad_norm": 1.569661900256363, "learning_rate": 9.321872962848292e-06, "loss": 0.6453, "step": 6287 }, { "epoch": 0.19271791099668997, "grad_norm": 1.7824208214441415, "learning_rate": 9.321623368900664e-06, "loss": 0.7833, "step": 6288 }, { "epoch": 0.19274855951943115, "grad_norm": 1.7166577478727896, "learning_rate": 9.321373732370973e-06, "loss": 0.7551, "step": 6289 }, { "epoch": 0.19277920804217236, "grad_norm": 1.786585375159658, "learning_rate": 9.321124053261681e-06, "loss": 0.68, "step": 6290 }, { "epoch": 0.19280985656491356, "grad_norm": 1.6997165846699454, "learning_rate": 9.32087433157525e-06, "loss": 0.7169, "step": 6291 }, { "epoch": 0.19284050508765477, "grad_norm": 1.7555270906221352, "learning_rate": 9.320624567314136e-06, "loss": 0.7426, "step": 6292 }, { "epoch": 0.19287115361039597, "grad_norm": 1.6336224759170959, "learning_rate": 9.320374760480804e-06, "loss": 0.6859, "step": 6293 }, { "epoch": 0.19290180213313718, "grad_norm": 1.6416470622934667, "learning_rate": 9.320124911077713e-06, "loss": 0.6494, "step": 6294 }, { "epoch": 0.19293245065587838, "grad_norm": 1.7185834304585055, "learning_rate": 9.319875019107327e-06, "loss": 0.6622, "step": 6295 }, { "epoch": 0.1929630991786196, "grad_norm": 1.7500922169202577, "learning_rate": 9.319625084572108e-06, "loss": 0.7817, "step": 6296 }, { "epoch": 0.1929937477013608, "grad_norm": 1.8158441629306417, "learning_rate": 9.319375107474516e-06, "loss": 0.6961, "step": 6297 }, { "epoch": 0.193024396224102, "grad_norm": 1.6320033603963462, "learning_rate": 9.319125087817017e-06, "loss": 0.7567, "step": 6298 }, { "epoch": 0.1930550447468432, "grad_norm": 1.7988427247454395, "learning_rate": 9.318875025602072e-06, "loss": 0.7351, "step": 6299 }, { "epoch": 0.1930856932695844, "grad_norm": 1.83273153551287, "learning_rate": 9.31862492083215e-06, "loss": 0.7853, "step": 6300 }, { "epoch": 0.19311634179232562, "grad_norm": 1.915129323595258, "learning_rate": 9.318374773509707e-06, "loss": 0.8119, "step": 6301 }, { "epoch": 0.19314699031506682, "grad_norm": 1.7276921069280324, "learning_rate": 9.318124583637216e-06, "loss": 0.6791, "step": 6302 }, { "epoch": 0.19317763883780803, "grad_norm": 1.8237742593968385, "learning_rate": 9.317874351217136e-06, "loss": 0.7344, "step": 6303 }, { "epoch": 0.1932082873605492, "grad_norm": 1.7998569739206967, "learning_rate": 9.317624076251936e-06, "loss": 0.7352, "step": 6304 }, { "epoch": 0.1932389358832904, "grad_norm": 1.9396508923388711, "learning_rate": 9.317373758744082e-06, "loss": 0.7645, "step": 6305 }, { "epoch": 0.19326958440603162, "grad_norm": 1.7923651388599366, "learning_rate": 9.317123398696039e-06, "loss": 0.7259, "step": 6306 }, { "epoch": 0.19330023292877282, "grad_norm": 1.6143834512400248, "learning_rate": 9.316872996110276e-06, "loss": 0.6244, "step": 6307 }, { "epoch": 0.19333088145151403, "grad_norm": 1.8740352304881194, "learning_rate": 9.316622550989259e-06, "loss": 0.7431, "step": 6308 }, { "epoch": 0.19336152997425524, "grad_norm": 1.8253333729673848, "learning_rate": 9.316372063335453e-06, "loss": 0.7509, "step": 6309 }, { "epoch": 0.19339217849699644, "grad_norm": 0.9660179897407231, "learning_rate": 9.31612153315133e-06, "loss": 0.5067, "step": 6310 }, { "epoch": 0.19342282701973765, "grad_norm": 1.7930911218649523, "learning_rate": 9.315870960439357e-06, "loss": 0.7397, "step": 6311 }, { "epoch": 0.19345347554247885, "grad_norm": 1.9523516457412753, "learning_rate": 9.315620345202004e-06, "loss": 0.7882, "step": 6312 }, { "epoch": 0.19348412406522006, "grad_norm": 1.722995707757921, "learning_rate": 9.31536968744174e-06, "loss": 0.7041, "step": 6313 }, { "epoch": 0.19351477258796126, "grad_norm": 2.019993213539517, "learning_rate": 9.31511898716103e-06, "loss": 0.7814, "step": 6314 }, { "epoch": 0.19354542111070247, "grad_norm": 1.5702834994200388, "learning_rate": 9.314868244362355e-06, "loss": 0.7051, "step": 6315 }, { "epoch": 0.19357606963344368, "grad_norm": 1.895886215825941, "learning_rate": 9.314617459048175e-06, "loss": 0.6503, "step": 6316 }, { "epoch": 0.19360671815618488, "grad_norm": 1.6849583181843695, "learning_rate": 9.314366631220965e-06, "loss": 0.6545, "step": 6317 }, { "epoch": 0.1936373666789261, "grad_norm": 2.0319296133536096, "learning_rate": 9.314115760883199e-06, "loss": 0.683, "step": 6318 }, { "epoch": 0.1936680152016673, "grad_norm": 1.9524610493306447, "learning_rate": 9.313864848037346e-06, "loss": 0.8466, "step": 6319 }, { "epoch": 0.19369866372440847, "grad_norm": 0.9971938440378584, "learning_rate": 9.313613892685877e-06, "loss": 0.4896, "step": 6320 }, { "epoch": 0.19372931224714968, "grad_norm": 0.9323517396508137, "learning_rate": 9.31336289483127e-06, "loss": 0.4924, "step": 6321 }, { "epoch": 0.19375996076989088, "grad_norm": 2.2239064346476956, "learning_rate": 9.313111854475991e-06, "loss": 0.7686, "step": 6322 }, { "epoch": 0.1937906092926321, "grad_norm": 0.8475196897378168, "learning_rate": 9.312860771622521e-06, "loss": 0.4723, "step": 6323 }, { "epoch": 0.1938212578153733, "grad_norm": 1.8295248905228436, "learning_rate": 9.312609646273327e-06, "loss": 0.7725, "step": 6324 }, { "epoch": 0.1938519063381145, "grad_norm": 0.9545950929384336, "learning_rate": 9.31235847843089e-06, "loss": 0.4909, "step": 6325 }, { "epoch": 0.1938825548608557, "grad_norm": 2.381387366439298, "learning_rate": 9.312107268097679e-06, "loss": 0.7182, "step": 6326 }, { "epoch": 0.1939132033835969, "grad_norm": 1.8789745317230204, "learning_rate": 9.311856015276172e-06, "loss": 0.6829, "step": 6327 }, { "epoch": 0.19394385190633812, "grad_norm": 2.0220694693123824, "learning_rate": 9.311604719968845e-06, "loss": 0.7009, "step": 6328 }, { "epoch": 0.19397450042907932, "grad_norm": 1.717527390697982, "learning_rate": 9.311353382178174e-06, "loss": 0.8135, "step": 6329 }, { "epoch": 0.19400514895182053, "grad_norm": 1.764276676715296, "learning_rate": 9.311102001906634e-06, "loss": 0.7703, "step": 6330 }, { "epoch": 0.19403579747456173, "grad_norm": 1.094451719680199, "learning_rate": 9.310850579156703e-06, "loss": 0.5007, "step": 6331 }, { "epoch": 0.19406644599730294, "grad_norm": 1.9282270431134914, "learning_rate": 9.31059911393086e-06, "loss": 0.7714, "step": 6332 }, { "epoch": 0.19409709452004414, "grad_norm": 1.7552488715503436, "learning_rate": 9.31034760623158e-06, "loss": 0.7471, "step": 6333 }, { "epoch": 0.19412774304278535, "grad_norm": 1.6706135007535394, "learning_rate": 9.310096056061341e-06, "loss": 0.6818, "step": 6334 }, { "epoch": 0.19415839156552653, "grad_norm": 1.781943392709719, "learning_rate": 9.309844463422624e-06, "loss": 0.6843, "step": 6335 }, { "epoch": 0.19418904008826773, "grad_norm": 2.125189756977096, "learning_rate": 9.309592828317906e-06, "loss": 0.822, "step": 6336 }, { "epoch": 0.19421968861100894, "grad_norm": 1.6472293499476545, "learning_rate": 9.309341150749669e-06, "loss": 0.7113, "step": 6337 }, { "epoch": 0.19425033713375014, "grad_norm": 1.8533282213036208, "learning_rate": 9.30908943072039e-06, "loss": 0.7778, "step": 6338 }, { "epoch": 0.19428098565649135, "grad_norm": 0.8752585498548058, "learning_rate": 9.308837668232548e-06, "loss": 0.4759, "step": 6339 }, { "epoch": 0.19431163417923256, "grad_norm": 1.5872553832185043, "learning_rate": 9.30858586328863e-06, "loss": 0.6494, "step": 6340 }, { "epoch": 0.19434228270197376, "grad_norm": 0.8636186270484072, "learning_rate": 9.30833401589111e-06, "loss": 0.5086, "step": 6341 }, { "epoch": 0.19437293122471497, "grad_norm": 1.4748506715829157, "learning_rate": 9.308082126042474e-06, "loss": 0.712, "step": 6342 }, { "epoch": 0.19440357974745617, "grad_norm": 1.5639284169797143, "learning_rate": 9.307830193745203e-06, "loss": 0.7216, "step": 6343 }, { "epoch": 0.19443422827019738, "grad_norm": 1.8158354173917373, "learning_rate": 9.307578219001778e-06, "loss": 0.6831, "step": 6344 }, { "epoch": 0.19446487679293858, "grad_norm": 0.8941628428615838, "learning_rate": 9.307326201814684e-06, "loss": 0.5059, "step": 6345 }, { "epoch": 0.1944955253156798, "grad_norm": 2.006643857250631, "learning_rate": 9.307074142186401e-06, "loss": 0.7456, "step": 6346 }, { "epoch": 0.194526173838421, "grad_norm": 0.9022065299978345, "learning_rate": 9.306822040119415e-06, "loss": 0.4915, "step": 6347 }, { "epoch": 0.1945568223611622, "grad_norm": 1.8938974524748047, "learning_rate": 9.30656989561621e-06, "loss": 0.6817, "step": 6348 }, { "epoch": 0.1945874708839034, "grad_norm": 1.9062982734651666, "learning_rate": 9.30631770867927e-06, "loss": 0.6814, "step": 6349 }, { "epoch": 0.1946181194066446, "grad_norm": 1.6384872785278868, "learning_rate": 9.30606547931108e-06, "loss": 0.6755, "step": 6350 }, { "epoch": 0.1946487679293858, "grad_norm": 1.8456460694321861, "learning_rate": 9.305813207514123e-06, "loss": 0.7495, "step": 6351 }, { "epoch": 0.194679416452127, "grad_norm": 1.6466918891428377, "learning_rate": 9.305560893290889e-06, "loss": 0.6413, "step": 6352 }, { "epoch": 0.1947100649748682, "grad_norm": 1.9621879104994708, "learning_rate": 9.30530853664386e-06, "loss": 0.7568, "step": 6353 }, { "epoch": 0.1947407134976094, "grad_norm": 1.8528752650001197, "learning_rate": 9.305056137575526e-06, "loss": 0.77, "step": 6354 }, { "epoch": 0.1947713620203506, "grad_norm": 1.8075175491493223, "learning_rate": 9.304803696088372e-06, "loss": 0.666, "step": 6355 }, { "epoch": 0.19480201054309182, "grad_norm": 0.9862874689424715, "learning_rate": 9.304551212184887e-06, "loss": 0.4938, "step": 6356 }, { "epoch": 0.19483265906583302, "grad_norm": 2.04730105850082, "learning_rate": 9.304298685867556e-06, "loss": 0.7185, "step": 6357 }, { "epoch": 0.19486330758857423, "grad_norm": 1.6339444630867515, "learning_rate": 9.304046117138868e-06, "loss": 0.7361, "step": 6358 }, { "epoch": 0.19489395611131544, "grad_norm": 1.7424169545694963, "learning_rate": 9.303793506001314e-06, "loss": 0.7292, "step": 6359 }, { "epoch": 0.19492460463405664, "grad_norm": 1.760443906398041, "learning_rate": 9.30354085245738e-06, "loss": 0.6577, "step": 6360 }, { "epoch": 0.19495525315679785, "grad_norm": 0.8202698673575726, "learning_rate": 9.303288156509557e-06, "loss": 0.5026, "step": 6361 }, { "epoch": 0.19498590167953905, "grad_norm": 1.7549243052749943, "learning_rate": 9.303035418160337e-06, "loss": 0.726, "step": 6362 }, { "epoch": 0.19501655020228026, "grad_norm": 1.810569480771087, "learning_rate": 9.302782637412206e-06, "loss": 0.7336, "step": 6363 }, { "epoch": 0.19504719872502146, "grad_norm": 1.7124099203345902, "learning_rate": 9.302529814267658e-06, "loss": 0.8383, "step": 6364 }, { "epoch": 0.19507784724776267, "grad_norm": 1.6077787784457611, "learning_rate": 9.302276948729182e-06, "loss": 0.7487, "step": 6365 }, { "epoch": 0.19510849577050385, "grad_norm": 0.8401264426170517, "learning_rate": 9.30202404079927e-06, "loss": 0.5006, "step": 6366 }, { "epoch": 0.19513914429324505, "grad_norm": 0.8589730147887038, "learning_rate": 9.301771090480415e-06, "loss": 0.5236, "step": 6367 }, { "epoch": 0.19516979281598626, "grad_norm": 1.7765930487921207, "learning_rate": 9.301518097775109e-06, "loss": 0.6955, "step": 6368 }, { "epoch": 0.19520044133872747, "grad_norm": 1.735534110391356, "learning_rate": 9.301265062685845e-06, "loss": 0.758, "step": 6369 }, { "epoch": 0.19523108986146867, "grad_norm": 0.8248828072309699, "learning_rate": 9.301011985215113e-06, "loss": 0.512, "step": 6370 }, { "epoch": 0.19526173838420988, "grad_norm": 1.9672361560899982, "learning_rate": 9.300758865365413e-06, "loss": 0.7277, "step": 6371 }, { "epoch": 0.19529238690695108, "grad_norm": 1.6306779234589075, "learning_rate": 9.300505703139235e-06, "loss": 0.6772, "step": 6372 }, { "epoch": 0.1953230354296923, "grad_norm": 1.864762734901615, "learning_rate": 9.300252498539073e-06, "loss": 0.756, "step": 6373 }, { "epoch": 0.1953536839524335, "grad_norm": 1.7501456543154288, "learning_rate": 9.299999251567421e-06, "loss": 0.6703, "step": 6374 }, { "epoch": 0.1953843324751747, "grad_norm": 1.6597740885423367, "learning_rate": 9.29974596222678e-06, "loss": 0.6579, "step": 6375 }, { "epoch": 0.1954149809979159, "grad_norm": 1.5655747154471178, "learning_rate": 9.29949263051964e-06, "loss": 0.7811, "step": 6376 }, { "epoch": 0.1954456295206571, "grad_norm": 1.9854671262289538, "learning_rate": 9.299239256448497e-06, "loss": 0.7522, "step": 6377 }, { "epoch": 0.19547627804339832, "grad_norm": 1.800175743115053, "learning_rate": 9.298985840015853e-06, "loss": 0.7121, "step": 6378 }, { "epoch": 0.19550692656613952, "grad_norm": 1.822820974433255, "learning_rate": 9.2987323812242e-06, "loss": 0.7287, "step": 6379 }, { "epoch": 0.19553757508888073, "grad_norm": 1.9808135704798946, "learning_rate": 9.298478880076037e-06, "loss": 0.704, "step": 6380 }, { "epoch": 0.19556822361162193, "grad_norm": 1.0236616881091647, "learning_rate": 9.298225336573863e-06, "loss": 0.495, "step": 6381 }, { "epoch": 0.1955988721343631, "grad_norm": 1.5381678196814188, "learning_rate": 9.297971750720174e-06, "loss": 0.6772, "step": 6382 }, { "epoch": 0.19562952065710432, "grad_norm": 1.7581341324581536, "learning_rate": 9.29771812251747e-06, "loss": 0.7861, "step": 6383 }, { "epoch": 0.19566016917984552, "grad_norm": 2.1126053427813383, "learning_rate": 9.297464451968248e-06, "loss": 0.8563, "step": 6384 }, { "epoch": 0.19569081770258673, "grad_norm": 1.6793710502578625, "learning_rate": 9.29721073907501e-06, "loss": 0.7334, "step": 6385 }, { "epoch": 0.19572146622532793, "grad_norm": 1.7045491768911236, "learning_rate": 9.296956983840258e-06, "loss": 0.7186, "step": 6386 }, { "epoch": 0.19575211474806914, "grad_norm": 1.8124514009361392, "learning_rate": 9.296703186266486e-06, "loss": 0.6232, "step": 6387 }, { "epoch": 0.19578276327081034, "grad_norm": 1.8011281516120192, "learning_rate": 9.296449346356199e-06, "loss": 0.7463, "step": 6388 }, { "epoch": 0.19581341179355155, "grad_norm": 1.6539545261154758, "learning_rate": 9.296195464111899e-06, "loss": 0.793, "step": 6389 }, { "epoch": 0.19584406031629276, "grad_norm": 2.0388111166744554, "learning_rate": 9.295941539536083e-06, "loss": 0.8293, "step": 6390 }, { "epoch": 0.19587470883903396, "grad_norm": 1.7159187476382656, "learning_rate": 9.295687572631258e-06, "loss": 0.7513, "step": 6391 }, { "epoch": 0.19590535736177517, "grad_norm": 1.128926076394993, "learning_rate": 9.295433563399922e-06, "loss": 0.4775, "step": 6392 }, { "epoch": 0.19593600588451637, "grad_norm": 1.9295357422202875, "learning_rate": 9.295179511844583e-06, "loss": 0.753, "step": 6393 }, { "epoch": 0.19596665440725758, "grad_norm": 0.8558218812595351, "learning_rate": 9.29492541796774e-06, "loss": 0.5247, "step": 6394 }, { "epoch": 0.19599730292999878, "grad_norm": 1.6606672202250554, "learning_rate": 9.294671281771897e-06, "loss": 0.6711, "step": 6395 }, { "epoch": 0.19602795145274, "grad_norm": 0.8322099642734455, "learning_rate": 9.29441710325956e-06, "loss": 0.4845, "step": 6396 }, { "epoch": 0.19605859997548117, "grad_norm": 1.7118254061877063, "learning_rate": 9.294162882433233e-06, "loss": 0.7163, "step": 6397 }, { "epoch": 0.19608924849822237, "grad_norm": 0.8909681006805223, "learning_rate": 9.29390861929542e-06, "loss": 0.4928, "step": 6398 }, { "epoch": 0.19611989702096358, "grad_norm": 0.8714199357061401, "learning_rate": 9.293654313848626e-06, "loss": 0.4961, "step": 6399 }, { "epoch": 0.19615054554370479, "grad_norm": 0.8280571721808039, "learning_rate": 9.293399966095358e-06, "loss": 0.478, "step": 6400 }, { "epoch": 0.196181194066446, "grad_norm": 2.0965348394933256, "learning_rate": 9.293145576038121e-06, "loss": 0.8056, "step": 6401 }, { "epoch": 0.1962118425891872, "grad_norm": 1.7533662415415059, "learning_rate": 9.292891143679423e-06, "loss": 0.6583, "step": 6402 }, { "epoch": 0.1962424911119284, "grad_norm": 0.9211623166435621, "learning_rate": 9.29263666902177e-06, "loss": 0.4939, "step": 6403 }, { "epoch": 0.1962731396346696, "grad_norm": 1.7501530267300203, "learning_rate": 9.29238215206767e-06, "loss": 0.7559, "step": 6404 }, { "epoch": 0.1963037881574108, "grad_norm": 1.9179057291881894, "learning_rate": 9.29212759281963e-06, "loss": 0.7998, "step": 6405 }, { "epoch": 0.19633443668015202, "grad_norm": 1.616222499695529, "learning_rate": 9.291872991280158e-06, "loss": 0.7096, "step": 6406 }, { "epoch": 0.19636508520289322, "grad_norm": 1.8004864605700883, "learning_rate": 9.291618347451763e-06, "loss": 0.76, "step": 6407 }, { "epoch": 0.19639573372563443, "grad_norm": 1.6765962404597687, "learning_rate": 9.291363661336956e-06, "loss": 0.7311, "step": 6408 }, { "epoch": 0.19642638224837564, "grad_norm": 1.930103969912639, "learning_rate": 9.291108932938244e-06, "loss": 0.807, "step": 6409 }, { "epoch": 0.19645703077111684, "grad_norm": 0.9458555507543898, "learning_rate": 9.290854162258138e-06, "loss": 0.501, "step": 6410 }, { "epoch": 0.19648767929385805, "grad_norm": 1.565464912051991, "learning_rate": 9.290599349299148e-06, "loss": 0.7449, "step": 6411 }, { "epoch": 0.19651832781659925, "grad_norm": 2.7590689188083153, "learning_rate": 9.290344494063785e-06, "loss": 0.6159, "step": 6412 }, { "epoch": 0.19654897633934043, "grad_norm": 1.6690925000148615, "learning_rate": 9.290089596554559e-06, "loss": 0.7026, "step": 6413 }, { "epoch": 0.19657962486208164, "grad_norm": 1.6858566873408638, "learning_rate": 9.289834656773984e-06, "loss": 0.7569, "step": 6414 }, { "epoch": 0.19661027338482284, "grad_norm": 1.882801497813053, "learning_rate": 9.28957967472457e-06, "loss": 0.7175, "step": 6415 }, { "epoch": 0.19664092190756405, "grad_norm": 1.6654009535420446, "learning_rate": 9.28932465040883e-06, "loss": 0.643, "step": 6416 }, { "epoch": 0.19667157043030525, "grad_norm": 1.7361756430868456, "learning_rate": 9.289069583829276e-06, "loss": 0.7121, "step": 6417 }, { "epoch": 0.19670221895304646, "grad_norm": 1.8567293161288434, "learning_rate": 9.288814474988421e-06, "loss": 0.7932, "step": 6418 }, { "epoch": 0.19673286747578766, "grad_norm": 2.0461178881905546, "learning_rate": 9.288559323888781e-06, "loss": 0.7981, "step": 6419 }, { "epoch": 0.19676351599852887, "grad_norm": 1.6713250546360372, "learning_rate": 9.28830413053287e-06, "loss": 0.75, "step": 6420 }, { "epoch": 0.19679416452127008, "grad_norm": 1.8069226254718131, "learning_rate": 9.2880488949232e-06, "loss": 0.7023, "step": 6421 }, { "epoch": 0.19682481304401128, "grad_norm": 1.8718064009620607, "learning_rate": 9.287793617062286e-06, "loss": 0.7201, "step": 6422 }, { "epoch": 0.1968554615667525, "grad_norm": 1.754981439627596, "learning_rate": 9.287538296952646e-06, "loss": 0.7276, "step": 6423 }, { "epoch": 0.1968861100894937, "grad_norm": 1.8171629933807478, "learning_rate": 9.287282934596793e-06, "loss": 0.6745, "step": 6424 }, { "epoch": 0.1969167586122349, "grad_norm": 1.7529090265945033, "learning_rate": 9.287027529997246e-06, "loss": 0.6668, "step": 6425 }, { "epoch": 0.1969474071349761, "grad_norm": 1.8570710919882267, "learning_rate": 9.286772083156518e-06, "loss": 0.7804, "step": 6426 }, { "epoch": 0.1969780556577173, "grad_norm": 1.061519683897881, "learning_rate": 9.286516594077129e-06, "loss": 0.4917, "step": 6427 }, { "epoch": 0.1970087041804585, "grad_norm": 1.7247887543652956, "learning_rate": 9.286261062761595e-06, "loss": 0.745, "step": 6428 }, { "epoch": 0.1970393527031997, "grad_norm": 1.739831383640912, "learning_rate": 9.286005489212433e-06, "loss": 0.6357, "step": 6429 }, { "epoch": 0.1970700012259409, "grad_norm": 0.8189964223256679, "learning_rate": 9.285749873432165e-06, "loss": 0.4826, "step": 6430 }, { "epoch": 0.1971006497486821, "grad_norm": 0.8154709813212014, "learning_rate": 9.285494215423304e-06, "loss": 0.4774, "step": 6431 }, { "epoch": 0.1971312982714233, "grad_norm": 1.9070571085184571, "learning_rate": 9.285238515188372e-06, "loss": 0.7752, "step": 6432 }, { "epoch": 0.19716194679416452, "grad_norm": 1.8487594876082918, "learning_rate": 9.284982772729891e-06, "loss": 0.7229, "step": 6433 }, { "epoch": 0.19719259531690572, "grad_norm": 1.8481317683344325, "learning_rate": 9.284726988050376e-06, "loss": 0.7219, "step": 6434 }, { "epoch": 0.19722324383964693, "grad_norm": 1.7779272593587068, "learning_rate": 9.284471161152351e-06, "loss": 0.7436, "step": 6435 }, { "epoch": 0.19725389236238813, "grad_norm": 2.0505315459375706, "learning_rate": 9.284215292038335e-06, "loss": 0.8079, "step": 6436 }, { "epoch": 0.19728454088512934, "grad_norm": 1.7429384144497582, "learning_rate": 9.28395938071085e-06, "loss": 0.7048, "step": 6437 }, { "epoch": 0.19731518940787054, "grad_norm": 1.624647163011786, "learning_rate": 9.283703427172417e-06, "loss": 0.7496, "step": 6438 }, { "epoch": 0.19734583793061175, "grad_norm": 1.7439620519808146, "learning_rate": 9.28344743142556e-06, "loss": 0.7428, "step": 6439 }, { "epoch": 0.19737648645335296, "grad_norm": 1.7852972437407661, "learning_rate": 9.283191393472796e-06, "loss": 0.6996, "step": 6440 }, { "epoch": 0.19740713497609416, "grad_norm": 1.9744656220333134, "learning_rate": 9.282935313316652e-06, "loss": 0.7742, "step": 6441 }, { "epoch": 0.19743778349883537, "grad_norm": 1.483247229928701, "learning_rate": 9.282679190959652e-06, "loss": 0.612, "step": 6442 }, { "epoch": 0.19746843202157657, "grad_norm": 1.6675391119778633, "learning_rate": 9.282423026404317e-06, "loss": 0.7495, "step": 6443 }, { "epoch": 0.19749908054431775, "grad_norm": 1.8135359713562058, "learning_rate": 9.282166819653172e-06, "loss": 0.754, "step": 6444 }, { "epoch": 0.19752972906705896, "grad_norm": 1.928439739319495, "learning_rate": 9.281910570708744e-06, "loss": 0.8254, "step": 6445 }, { "epoch": 0.19756037758980016, "grad_norm": 2.0237653856231517, "learning_rate": 9.281654279573553e-06, "loss": 0.7563, "step": 6446 }, { "epoch": 0.19759102611254137, "grad_norm": 1.6244021995768858, "learning_rate": 9.281397946250129e-06, "loss": 0.6221, "step": 6447 }, { "epoch": 0.19762167463528257, "grad_norm": 1.7302599867396855, "learning_rate": 9.281141570740992e-06, "loss": 0.6719, "step": 6448 }, { "epoch": 0.19765232315802378, "grad_norm": 1.665264491967291, "learning_rate": 9.280885153048676e-06, "loss": 0.6818, "step": 6449 }, { "epoch": 0.19768297168076499, "grad_norm": 1.6526686595066393, "learning_rate": 9.2806286931757e-06, "loss": 0.6394, "step": 6450 }, { "epoch": 0.1977136202035062, "grad_norm": 1.1901279074542426, "learning_rate": 9.280372191124596e-06, "loss": 0.4839, "step": 6451 }, { "epoch": 0.1977442687262474, "grad_norm": 1.6398349974418978, "learning_rate": 9.280115646897888e-06, "loss": 0.6664, "step": 6452 }, { "epoch": 0.1977749172489886, "grad_norm": 1.8621854432078448, "learning_rate": 9.279859060498107e-06, "loss": 0.7503, "step": 6453 }, { "epoch": 0.1978055657717298, "grad_norm": 1.8853494737509842, "learning_rate": 9.27960243192778e-06, "loss": 0.8112, "step": 6454 }, { "epoch": 0.197836214294471, "grad_norm": 0.9226379468292342, "learning_rate": 9.279345761189435e-06, "loss": 0.5057, "step": 6455 }, { "epoch": 0.19786686281721222, "grad_norm": 2.079842136933081, "learning_rate": 9.2790890482856e-06, "loss": 0.6976, "step": 6456 }, { "epoch": 0.19789751133995342, "grad_norm": 1.857664675351297, "learning_rate": 9.278832293218807e-06, "loss": 0.6906, "step": 6457 }, { "epoch": 0.19792815986269463, "grad_norm": 1.6184504087951503, "learning_rate": 9.278575495991583e-06, "loss": 0.6351, "step": 6458 }, { "epoch": 0.1979588083854358, "grad_norm": 1.8201151159168276, "learning_rate": 9.278318656606463e-06, "loss": 0.6742, "step": 6459 }, { "epoch": 0.19798945690817701, "grad_norm": 0.9587105377194193, "learning_rate": 9.278061775065972e-06, "loss": 0.5019, "step": 6460 }, { "epoch": 0.19802010543091822, "grad_norm": 1.8631748149411076, "learning_rate": 9.277804851372643e-06, "loss": 0.8024, "step": 6461 }, { "epoch": 0.19805075395365943, "grad_norm": 1.920193821528314, "learning_rate": 9.27754788552901e-06, "loss": 0.6363, "step": 6462 }, { "epoch": 0.19808140247640063, "grad_norm": 1.747334374295573, "learning_rate": 9.277290877537603e-06, "loss": 0.6503, "step": 6463 }, { "epoch": 0.19811205099914184, "grad_norm": 1.6313169339604778, "learning_rate": 9.277033827400956e-06, "loss": 0.7799, "step": 6464 }, { "epoch": 0.19814269952188304, "grad_norm": 1.5099765896744672, "learning_rate": 9.276776735121597e-06, "loss": 0.6853, "step": 6465 }, { "epoch": 0.19817334804462425, "grad_norm": 0.9253003484355331, "learning_rate": 9.276519600702065e-06, "loss": 0.5119, "step": 6466 }, { "epoch": 0.19820399656736545, "grad_norm": 1.7441891825750584, "learning_rate": 9.276262424144891e-06, "loss": 0.6892, "step": 6467 }, { "epoch": 0.19823464509010666, "grad_norm": 2.0063960022236578, "learning_rate": 9.27600520545261e-06, "loss": 0.7979, "step": 6468 }, { "epoch": 0.19826529361284786, "grad_norm": 2.042939965439035, "learning_rate": 9.275747944627753e-06, "loss": 0.7269, "step": 6469 }, { "epoch": 0.19829594213558907, "grad_norm": 1.623277014783498, "learning_rate": 9.275490641672859e-06, "loss": 0.6874, "step": 6470 }, { "epoch": 0.19832659065833028, "grad_norm": 1.7028776537398052, "learning_rate": 9.275233296590463e-06, "loss": 0.6713, "step": 6471 }, { "epoch": 0.19835723918107148, "grad_norm": 1.7519365216726948, "learning_rate": 9.274975909383097e-06, "loss": 0.6892, "step": 6472 }, { "epoch": 0.1983878877038127, "grad_norm": 1.7439645745464571, "learning_rate": 9.274718480053303e-06, "loss": 0.6694, "step": 6473 }, { "epoch": 0.1984185362265539, "grad_norm": 1.6437913530841028, "learning_rate": 9.27446100860361e-06, "loss": 0.7306, "step": 6474 }, { "epoch": 0.19844918474929507, "grad_norm": 1.5397948793699208, "learning_rate": 9.274203495036563e-06, "loss": 0.6551, "step": 6475 }, { "epoch": 0.19847983327203628, "grad_norm": 1.6552413665171661, "learning_rate": 9.273945939354691e-06, "loss": 0.6808, "step": 6476 }, { "epoch": 0.19851048179477748, "grad_norm": 1.613710982392899, "learning_rate": 9.27368834156054e-06, "loss": 0.7577, "step": 6477 }, { "epoch": 0.1985411303175187, "grad_norm": 1.7828817985915686, "learning_rate": 9.273430701656642e-06, "loss": 0.7232, "step": 6478 }, { "epoch": 0.1985717788402599, "grad_norm": 1.8919765536997555, "learning_rate": 9.273173019645539e-06, "loss": 0.7567, "step": 6479 }, { "epoch": 0.1986024273630011, "grad_norm": 1.8547110024103097, "learning_rate": 9.272915295529768e-06, "loss": 0.7192, "step": 6480 }, { "epoch": 0.1986330758857423, "grad_norm": 1.5853654005797944, "learning_rate": 9.27265752931187e-06, "loss": 0.6893, "step": 6481 }, { "epoch": 0.1986637244084835, "grad_norm": 0.8978727915525913, "learning_rate": 9.272399720994384e-06, "loss": 0.4844, "step": 6482 }, { "epoch": 0.19869437293122472, "grad_norm": 1.677358570111396, "learning_rate": 9.272141870579851e-06, "loss": 0.7201, "step": 6483 }, { "epoch": 0.19872502145396592, "grad_norm": 0.802322392916381, "learning_rate": 9.27188397807081e-06, "loss": 0.5101, "step": 6484 }, { "epoch": 0.19875566997670713, "grad_norm": 1.6648513207676447, "learning_rate": 9.271626043469804e-06, "loss": 0.7304, "step": 6485 }, { "epoch": 0.19878631849944833, "grad_norm": 1.897100051267405, "learning_rate": 9.271368066779373e-06, "loss": 0.7306, "step": 6486 }, { "epoch": 0.19881696702218954, "grad_norm": 1.6580236369402588, "learning_rate": 9.27111004800206e-06, "loss": 0.7083, "step": 6487 }, { "epoch": 0.19884761554493074, "grad_norm": 1.8969675328327709, "learning_rate": 9.270851987140405e-06, "loss": 0.7397, "step": 6488 }, { "epoch": 0.19887826406767195, "grad_norm": 1.0039515977968339, "learning_rate": 9.270593884196956e-06, "loss": 0.5099, "step": 6489 }, { "epoch": 0.19890891259041313, "grad_norm": 1.7616438487630224, "learning_rate": 9.270335739174251e-06, "loss": 0.6775, "step": 6490 }, { "epoch": 0.19893956111315433, "grad_norm": 1.618981591569874, "learning_rate": 9.270077552074835e-06, "loss": 0.6672, "step": 6491 }, { "epoch": 0.19897020963589554, "grad_norm": 1.9305968818678658, "learning_rate": 9.269819322901254e-06, "loss": 0.7416, "step": 6492 }, { "epoch": 0.19900085815863675, "grad_norm": 1.7394713210718828, "learning_rate": 9.269561051656049e-06, "loss": 0.7041, "step": 6493 }, { "epoch": 0.19903150668137795, "grad_norm": 1.8124450454898908, "learning_rate": 9.269302738341766e-06, "loss": 0.7073, "step": 6494 }, { "epoch": 0.19906215520411916, "grad_norm": 1.6607977556917173, "learning_rate": 9.269044382960952e-06, "loss": 0.6831, "step": 6495 }, { "epoch": 0.19909280372686036, "grad_norm": 1.967393109529356, "learning_rate": 9.268785985516152e-06, "loss": 0.6859, "step": 6496 }, { "epoch": 0.19912345224960157, "grad_norm": 1.9391364210005757, "learning_rate": 9.268527546009911e-06, "loss": 0.7974, "step": 6497 }, { "epoch": 0.19915410077234277, "grad_norm": 1.9084987444008883, "learning_rate": 9.268269064444775e-06, "loss": 0.8073, "step": 6498 }, { "epoch": 0.19918474929508398, "grad_norm": 1.9457612101765733, "learning_rate": 9.268010540823294e-06, "loss": 0.7, "step": 6499 }, { "epoch": 0.19921539781782518, "grad_norm": 1.8974887247429777, "learning_rate": 9.267751975148011e-06, "loss": 0.7678, "step": 6500 }, { "epoch": 0.1992460463405664, "grad_norm": 1.6771723321719774, "learning_rate": 9.267493367421476e-06, "loss": 0.7398, "step": 6501 }, { "epoch": 0.1992766948633076, "grad_norm": 2.098008878595679, "learning_rate": 9.267234717646237e-06, "loss": 0.7484, "step": 6502 }, { "epoch": 0.1993073433860488, "grad_norm": 1.8639659613913155, "learning_rate": 9.266976025824843e-06, "loss": 0.6622, "step": 6503 }, { "epoch": 0.19933799190879, "grad_norm": 1.9640347642358713, "learning_rate": 9.266717291959843e-06, "loss": 0.7614, "step": 6504 }, { "epoch": 0.1993686404315312, "grad_norm": 1.8265190477469109, "learning_rate": 9.266458516053785e-06, "loss": 0.7425, "step": 6505 }, { "epoch": 0.1993992889542724, "grad_norm": 2.005666962810202, "learning_rate": 9.266199698109219e-06, "loss": 0.7609, "step": 6506 }, { "epoch": 0.1994299374770136, "grad_norm": 1.8381972242067026, "learning_rate": 9.265940838128698e-06, "loss": 0.7611, "step": 6507 }, { "epoch": 0.1994605859997548, "grad_norm": 1.7901035774589307, "learning_rate": 9.265681936114768e-06, "loss": 0.7272, "step": 6508 }, { "epoch": 0.199491234522496, "grad_norm": 1.8544623277281302, "learning_rate": 9.265422992069985e-06, "loss": 0.7246, "step": 6509 }, { "epoch": 0.1995218830452372, "grad_norm": 1.7111752379536462, "learning_rate": 9.265164005996896e-06, "loss": 0.6636, "step": 6510 }, { "epoch": 0.19955253156797842, "grad_norm": 1.6597713140654384, "learning_rate": 9.264904977898055e-06, "loss": 0.6615, "step": 6511 }, { "epoch": 0.19958318009071963, "grad_norm": 1.548054026993805, "learning_rate": 9.264645907776013e-06, "loss": 0.7752, "step": 6512 }, { "epoch": 0.19961382861346083, "grad_norm": 1.0261546688141485, "learning_rate": 9.264386795633327e-06, "loss": 0.5064, "step": 6513 }, { "epoch": 0.19964447713620204, "grad_norm": 1.6736102080162423, "learning_rate": 9.264127641472544e-06, "loss": 0.727, "step": 6514 }, { "epoch": 0.19967512565894324, "grad_norm": 0.8479667221483225, "learning_rate": 9.263868445296222e-06, "loss": 0.5076, "step": 6515 }, { "epoch": 0.19970577418168445, "grad_norm": 1.9582333728968309, "learning_rate": 9.263609207106911e-06, "loss": 0.7367, "step": 6516 }, { "epoch": 0.19973642270442565, "grad_norm": 1.73662935537689, "learning_rate": 9.26334992690717e-06, "loss": 0.669, "step": 6517 }, { "epoch": 0.19976707122716686, "grad_norm": 1.7535092090312512, "learning_rate": 9.263090604699549e-06, "loss": 0.7228, "step": 6518 }, { "epoch": 0.19979771974990806, "grad_norm": 1.6988943099374463, "learning_rate": 9.262831240486608e-06, "loss": 0.6388, "step": 6519 }, { "epoch": 0.19982836827264927, "grad_norm": 1.8594355390930355, "learning_rate": 9.262571834270899e-06, "loss": 0.6673, "step": 6520 }, { "epoch": 0.19985901679539045, "grad_norm": 1.1310620286315272, "learning_rate": 9.262312386054978e-06, "loss": 0.516, "step": 6521 }, { "epoch": 0.19988966531813165, "grad_norm": 1.8670707272267915, "learning_rate": 9.262052895841404e-06, "loss": 0.7726, "step": 6522 }, { "epoch": 0.19992031384087286, "grad_norm": 0.8602198891209846, "learning_rate": 9.261793363632733e-06, "loss": 0.4943, "step": 6523 }, { "epoch": 0.19995096236361407, "grad_norm": 2.0118785024782215, "learning_rate": 9.26153378943152e-06, "loss": 0.6839, "step": 6524 }, { "epoch": 0.19998161088635527, "grad_norm": 2.1565823948729927, "learning_rate": 9.261274173240324e-06, "loss": 0.7537, "step": 6525 }, { "epoch": 0.20001225940909648, "grad_norm": 2.1874435413626747, "learning_rate": 9.261014515061704e-06, "loss": 0.7277, "step": 6526 }, { "epoch": 0.20004290793183768, "grad_norm": 1.7435204967234357, "learning_rate": 9.260754814898217e-06, "loss": 0.6857, "step": 6527 }, { "epoch": 0.2000735564545789, "grad_norm": 2.1095300163762816, "learning_rate": 9.260495072752424e-06, "loss": 0.8037, "step": 6528 }, { "epoch": 0.2001042049773201, "grad_norm": 1.6427355688273844, "learning_rate": 9.260235288626882e-06, "loss": 0.6177, "step": 6529 }, { "epoch": 0.2001348535000613, "grad_norm": 1.8494462473031086, "learning_rate": 9.259975462524151e-06, "loss": 0.755, "step": 6530 }, { "epoch": 0.2001655020228025, "grad_norm": 2.1838163809606694, "learning_rate": 9.259715594446794e-06, "loss": 0.8206, "step": 6531 }, { "epoch": 0.2001961505455437, "grad_norm": 1.7601551847355834, "learning_rate": 9.259455684397367e-06, "loss": 0.7058, "step": 6532 }, { "epoch": 0.20022679906828492, "grad_norm": 1.9787862729957775, "learning_rate": 9.259195732378436e-06, "loss": 0.7577, "step": 6533 }, { "epoch": 0.20025744759102612, "grad_norm": 1.326563794882844, "learning_rate": 9.258935738392557e-06, "loss": 0.4942, "step": 6534 }, { "epoch": 0.20028809611376733, "grad_norm": 1.8447456883663989, "learning_rate": 9.258675702442295e-06, "loss": 0.686, "step": 6535 }, { "epoch": 0.20031874463650853, "grad_norm": 2.019769155067558, "learning_rate": 9.258415624530213e-06, "loss": 0.7846, "step": 6536 }, { "epoch": 0.2003493931592497, "grad_norm": 1.7716291588635975, "learning_rate": 9.258155504658871e-06, "loss": 0.7755, "step": 6537 }, { "epoch": 0.20038004168199092, "grad_norm": 1.7013285779860021, "learning_rate": 9.257895342830834e-06, "loss": 0.6499, "step": 6538 }, { "epoch": 0.20041069020473212, "grad_norm": 1.8281825628933759, "learning_rate": 9.257635139048664e-06, "loss": 0.652, "step": 6539 }, { "epoch": 0.20044133872747333, "grad_norm": 1.681680862198622, "learning_rate": 9.257374893314929e-06, "loss": 0.745, "step": 6540 }, { "epoch": 0.20047198725021453, "grad_norm": 1.750748344736613, "learning_rate": 9.257114605632184e-06, "loss": 0.8037, "step": 6541 }, { "epoch": 0.20050263577295574, "grad_norm": 2.0194441639472918, "learning_rate": 9.256854276003004e-06, "loss": 0.7671, "step": 6542 }, { "epoch": 0.20053328429569695, "grad_norm": 1.5491189310099804, "learning_rate": 9.256593904429948e-06, "loss": 0.7313, "step": 6543 }, { "epoch": 0.20056393281843815, "grad_norm": 2.0387317544915673, "learning_rate": 9.256333490915583e-06, "loss": 0.8265, "step": 6544 }, { "epoch": 0.20059458134117936, "grad_norm": 1.8472783140684421, "learning_rate": 9.256073035462476e-06, "loss": 0.7112, "step": 6545 }, { "epoch": 0.20062522986392056, "grad_norm": 1.9559997476873752, "learning_rate": 9.255812538073192e-06, "loss": 0.8023, "step": 6546 }, { "epoch": 0.20065587838666177, "grad_norm": 1.7751963151330756, "learning_rate": 9.255551998750298e-06, "loss": 0.7465, "step": 6547 }, { "epoch": 0.20068652690940297, "grad_norm": 1.6285089872398384, "learning_rate": 9.255291417496361e-06, "loss": 0.6707, "step": 6548 }, { "epoch": 0.20071717543214418, "grad_norm": 2.066887021409831, "learning_rate": 9.255030794313951e-06, "loss": 0.7359, "step": 6549 }, { "epoch": 0.20074782395488538, "grad_norm": 1.810026201322013, "learning_rate": 9.254770129205631e-06, "loss": 0.8098, "step": 6550 }, { "epoch": 0.2007784724776266, "grad_norm": 1.32192609427557, "learning_rate": 9.254509422173974e-06, "loss": 0.5172, "step": 6551 }, { "epoch": 0.20080912100036777, "grad_norm": 1.8083278493132338, "learning_rate": 9.254248673221546e-06, "loss": 0.7667, "step": 6552 }, { "epoch": 0.20083976952310897, "grad_norm": 2.3592698823056404, "learning_rate": 9.253987882350919e-06, "loss": 0.7318, "step": 6553 }, { "epoch": 0.20087041804585018, "grad_norm": 1.8364253041244465, "learning_rate": 9.25372704956466e-06, "loss": 0.7282, "step": 6554 }, { "epoch": 0.20090106656859139, "grad_norm": 1.916035390958038, "learning_rate": 9.25346617486534e-06, "loss": 0.7891, "step": 6555 }, { "epoch": 0.2009317150913326, "grad_norm": 0.9096382859377576, "learning_rate": 9.25320525825553e-06, "loss": 0.5002, "step": 6556 }, { "epoch": 0.2009623636140738, "grad_norm": 1.7671200267688203, "learning_rate": 9.252944299737799e-06, "loss": 0.6929, "step": 6557 }, { "epoch": 0.200993012136815, "grad_norm": 1.8343039517851902, "learning_rate": 9.25268329931472e-06, "loss": 0.731, "step": 6558 }, { "epoch": 0.2010236606595562, "grad_norm": 1.6883517637639183, "learning_rate": 9.252422256988864e-06, "loss": 0.6939, "step": 6559 }, { "epoch": 0.2010543091822974, "grad_norm": 1.805050150843471, "learning_rate": 9.252161172762806e-06, "loss": 0.6901, "step": 6560 }, { "epoch": 0.20108495770503862, "grad_norm": 1.6380221853747419, "learning_rate": 9.251900046639113e-06, "loss": 0.679, "step": 6561 }, { "epoch": 0.20111560622777983, "grad_norm": 1.8102916102828397, "learning_rate": 9.251638878620363e-06, "loss": 0.7885, "step": 6562 }, { "epoch": 0.20114625475052103, "grad_norm": 1.8241294067775564, "learning_rate": 9.251377668709127e-06, "loss": 0.7478, "step": 6563 }, { "epoch": 0.20117690327326224, "grad_norm": 1.6842233813816216, "learning_rate": 9.251116416907977e-06, "loss": 0.7498, "step": 6564 }, { "epoch": 0.20120755179600344, "grad_norm": 1.8100293863430048, "learning_rate": 9.25085512321949e-06, "loss": 0.6872, "step": 6565 }, { "epoch": 0.20123820031874465, "grad_norm": 1.6472730466486907, "learning_rate": 9.250593787646243e-06, "loss": 0.6643, "step": 6566 }, { "epoch": 0.20126884884148585, "grad_norm": 1.8889242535858384, "learning_rate": 9.250332410190805e-06, "loss": 0.6635, "step": 6567 }, { "epoch": 0.20129949736422703, "grad_norm": 1.8184661052451312, "learning_rate": 9.250070990855755e-06, "loss": 0.6848, "step": 6568 }, { "epoch": 0.20133014588696824, "grad_norm": 1.9989499771384995, "learning_rate": 9.249809529643668e-06, "loss": 0.7583, "step": 6569 }, { "epoch": 0.20136079440970944, "grad_norm": 1.657809201819308, "learning_rate": 9.24954802655712e-06, "loss": 0.7436, "step": 6570 }, { "epoch": 0.20139144293245065, "grad_norm": 1.621128316557437, "learning_rate": 9.249286481598686e-06, "loss": 0.6899, "step": 6571 }, { "epoch": 0.20142209145519185, "grad_norm": 1.7249272348809492, "learning_rate": 9.24902489477095e-06, "loss": 0.7572, "step": 6572 }, { "epoch": 0.20145273997793306, "grad_norm": 1.7968036147698503, "learning_rate": 9.248763266076482e-06, "loss": 0.748, "step": 6573 }, { "epoch": 0.20148338850067427, "grad_norm": 1.768502716217813, "learning_rate": 9.248501595517861e-06, "loss": 0.6725, "step": 6574 }, { "epoch": 0.20151403702341547, "grad_norm": 1.656566743879616, "learning_rate": 9.248239883097668e-06, "loss": 0.6612, "step": 6575 }, { "epoch": 0.20154468554615668, "grad_norm": 1.6832491164024348, "learning_rate": 9.247978128818482e-06, "loss": 0.6928, "step": 6576 }, { "epoch": 0.20157533406889788, "grad_norm": 1.6744588640819036, "learning_rate": 9.24771633268288e-06, "loss": 0.7005, "step": 6577 }, { "epoch": 0.2016059825916391, "grad_norm": 1.8339430330868887, "learning_rate": 9.24745449469344e-06, "loss": 0.7301, "step": 6578 }, { "epoch": 0.2016366311143803, "grad_norm": 1.7663051297227084, "learning_rate": 9.247192614852744e-06, "loss": 0.6911, "step": 6579 }, { "epoch": 0.2016672796371215, "grad_norm": 1.6095730052810935, "learning_rate": 9.246930693163375e-06, "loss": 0.7305, "step": 6580 }, { "epoch": 0.2016979281598627, "grad_norm": 1.0059369530694335, "learning_rate": 9.246668729627911e-06, "loss": 0.4961, "step": 6581 }, { "epoch": 0.2017285766826039, "grad_norm": 0.9410611562624123, "learning_rate": 9.246406724248931e-06, "loss": 0.5049, "step": 6582 }, { "epoch": 0.2017592252053451, "grad_norm": 1.6671549903166316, "learning_rate": 9.246144677029022e-06, "loss": 0.6655, "step": 6583 }, { "epoch": 0.2017898737280863, "grad_norm": 2.0677759648419567, "learning_rate": 9.245882587970761e-06, "loss": 0.7799, "step": 6584 }, { "epoch": 0.2018205222508275, "grad_norm": 1.8156971920890892, "learning_rate": 9.245620457076732e-06, "loss": 0.735, "step": 6585 }, { "epoch": 0.2018511707735687, "grad_norm": 1.8282996842307349, "learning_rate": 9.24535828434952e-06, "loss": 0.7732, "step": 6586 }, { "epoch": 0.2018818192963099, "grad_norm": 1.0399718028761362, "learning_rate": 9.245096069791706e-06, "loss": 0.502, "step": 6587 }, { "epoch": 0.20191246781905112, "grad_norm": 1.7518183566681909, "learning_rate": 9.244833813405875e-06, "loss": 0.6816, "step": 6588 }, { "epoch": 0.20194311634179232, "grad_norm": 1.9230261471242749, "learning_rate": 9.244571515194609e-06, "loss": 0.7296, "step": 6589 }, { "epoch": 0.20197376486453353, "grad_norm": 1.9921698624294162, "learning_rate": 9.244309175160495e-06, "loss": 0.7735, "step": 6590 }, { "epoch": 0.20200441338727473, "grad_norm": 1.7383508820920837, "learning_rate": 9.244046793306116e-06, "loss": 0.7678, "step": 6591 }, { "epoch": 0.20203506191001594, "grad_norm": 0.9111723580628409, "learning_rate": 9.243784369634059e-06, "loss": 0.4642, "step": 6592 }, { "epoch": 0.20206571043275715, "grad_norm": 1.8201120144928187, "learning_rate": 9.243521904146908e-06, "loss": 0.8044, "step": 6593 }, { "epoch": 0.20209635895549835, "grad_norm": 1.7402936848523232, "learning_rate": 9.243259396847251e-06, "loss": 0.7929, "step": 6594 }, { "epoch": 0.20212700747823956, "grad_norm": 1.8216661972513957, "learning_rate": 9.242996847737672e-06, "loss": 0.7196, "step": 6595 }, { "epoch": 0.20215765600098076, "grad_norm": 1.8601013072774117, "learning_rate": 9.24273425682076e-06, "loss": 0.7483, "step": 6596 }, { "epoch": 0.20218830452372197, "grad_norm": 1.758711750290856, "learning_rate": 9.242471624099102e-06, "loss": 0.7508, "step": 6597 }, { "epoch": 0.20221895304646317, "grad_norm": 1.6594963955388595, "learning_rate": 9.242208949575286e-06, "loss": 0.6746, "step": 6598 }, { "epoch": 0.20224960156920435, "grad_norm": 1.6858120551761677, "learning_rate": 9.241946233251899e-06, "loss": 0.6328, "step": 6599 }, { "epoch": 0.20228025009194556, "grad_norm": 1.619262767924616, "learning_rate": 9.241683475131531e-06, "loss": 0.6804, "step": 6600 }, { "epoch": 0.20231089861468676, "grad_norm": 2.067417912480514, "learning_rate": 9.24142067521677e-06, "loss": 0.7521, "step": 6601 }, { "epoch": 0.20234154713742797, "grad_norm": 1.6015944220427185, "learning_rate": 9.241157833510206e-06, "loss": 0.7698, "step": 6602 }, { "epoch": 0.20237219566016917, "grad_norm": 1.7107399644773316, "learning_rate": 9.240894950014429e-06, "loss": 0.6641, "step": 6603 }, { "epoch": 0.20240284418291038, "grad_norm": 1.7653458788290497, "learning_rate": 9.240632024732027e-06, "loss": 0.7876, "step": 6604 }, { "epoch": 0.20243349270565159, "grad_norm": 1.9352651356098804, "learning_rate": 9.240369057665595e-06, "loss": 0.7347, "step": 6605 }, { "epoch": 0.2024641412283928, "grad_norm": 1.841141778677358, "learning_rate": 9.24010604881772e-06, "loss": 0.6776, "step": 6606 }, { "epoch": 0.202494789751134, "grad_norm": 1.9876588152466874, "learning_rate": 9.239842998190997e-06, "loss": 0.8427, "step": 6607 }, { "epoch": 0.2025254382738752, "grad_norm": 1.7679773685434141, "learning_rate": 9.239579905788016e-06, "loss": 0.7741, "step": 6608 }, { "epoch": 0.2025560867966164, "grad_norm": 1.2065562400598246, "learning_rate": 9.239316771611369e-06, "loss": 0.4845, "step": 6609 }, { "epoch": 0.2025867353193576, "grad_norm": 1.7616902080727936, "learning_rate": 9.239053595663649e-06, "loss": 0.8193, "step": 6610 }, { "epoch": 0.20261738384209882, "grad_norm": 1.8872163038897531, "learning_rate": 9.23879037794745e-06, "loss": 0.8159, "step": 6611 }, { "epoch": 0.20264803236484003, "grad_norm": 0.7982441913334285, "learning_rate": 9.238527118465364e-06, "loss": 0.4809, "step": 6612 }, { "epoch": 0.20267868088758123, "grad_norm": 1.7478954917779395, "learning_rate": 9.238263817219986e-06, "loss": 0.808, "step": 6613 }, { "epoch": 0.2027093294103224, "grad_norm": 1.770424173465027, "learning_rate": 9.23800047421391e-06, "loss": 0.7679, "step": 6614 }, { "epoch": 0.20273997793306361, "grad_norm": 1.9557891975531323, "learning_rate": 9.237737089449731e-06, "loss": 0.776, "step": 6615 }, { "epoch": 0.20277062645580482, "grad_norm": 1.8451328079787055, "learning_rate": 9.237473662930045e-06, "loss": 0.7737, "step": 6616 }, { "epoch": 0.20280127497854603, "grad_norm": 1.7156679632689815, "learning_rate": 9.237210194657447e-06, "loss": 0.6564, "step": 6617 }, { "epoch": 0.20283192350128723, "grad_norm": 1.6644964464131988, "learning_rate": 9.236946684634531e-06, "loss": 0.7806, "step": 6618 }, { "epoch": 0.20286257202402844, "grad_norm": 1.70294977689974, "learning_rate": 9.236683132863897e-06, "loss": 0.6964, "step": 6619 }, { "epoch": 0.20289322054676964, "grad_norm": 1.6561775822855274, "learning_rate": 9.236419539348138e-06, "loss": 0.6342, "step": 6620 }, { "epoch": 0.20292386906951085, "grad_norm": 1.566969488571515, "learning_rate": 9.236155904089856e-06, "loss": 0.7468, "step": 6621 }, { "epoch": 0.20295451759225205, "grad_norm": 1.8224557885067216, "learning_rate": 9.235892227091645e-06, "loss": 0.7336, "step": 6622 }, { "epoch": 0.20298516611499326, "grad_norm": 1.5420780528287912, "learning_rate": 9.235628508356102e-06, "loss": 0.6374, "step": 6623 }, { "epoch": 0.20301581463773447, "grad_norm": 1.5599458786406164, "learning_rate": 9.235364747885831e-06, "loss": 0.6835, "step": 6624 }, { "epoch": 0.20304646316047567, "grad_norm": 1.6870489407930334, "learning_rate": 9.235100945683425e-06, "loss": 0.7792, "step": 6625 }, { "epoch": 0.20307711168321688, "grad_norm": 1.7135441503591025, "learning_rate": 9.234837101751486e-06, "loss": 0.7138, "step": 6626 }, { "epoch": 0.20310776020595808, "grad_norm": 1.652015049017691, "learning_rate": 9.234573216092615e-06, "loss": 0.6777, "step": 6627 }, { "epoch": 0.2031384087286993, "grad_norm": 1.5962697925543086, "learning_rate": 9.23430928870941e-06, "loss": 0.6849, "step": 6628 }, { "epoch": 0.2031690572514405, "grad_norm": 1.696705233588403, "learning_rate": 9.234045319604472e-06, "loss": 0.6438, "step": 6629 }, { "epoch": 0.20319970577418167, "grad_norm": 1.7254547769411297, "learning_rate": 9.233781308780402e-06, "loss": 0.5049, "step": 6630 }, { "epoch": 0.20323035429692288, "grad_norm": 1.8106271581921312, "learning_rate": 9.233517256239801e-06, "loss": 0.7528, "step": 6631 }, { "epoch": 0.20326100281966408, "grad_norm": 1.770654445739423, "learning_rate": 9.233253161985271e-06, "loss": 0.7694, "step": 6632 }, { "epoch": 0.2032916513424053, "grad_norm": 1.908244034338254, "learning_rate": 9.232989026019417e-06, "loss": 0.7493, "step": 6633 }, { "epoch": 0.2033222998651465, "grad_norm": 2.0669613320381792, "learning_rate": 9.232724848344836e-06, "loss": 0.7955, "step": 6634 }, { "epoch": 0.2033529483878877, "grad_norm": 1.9512955535582817, "learning_rate": 9.232460628964138e-06, "loss": 0.7618, "step": 6635 }, { "epoch": 0.2033835969106289, "grad_norm": 1.66731987792686, "learning_rate": 9.23219636787992e-06, "loss": 0.7795, "step": 6636 }, { "epoch": 0.2034142454333701, "grad_norm": 1.9752484349690727, "learning_rate": 9.231932065094787e-06, "loss": 0.6804, "step": 6637 }, { "epoch": 0.20344489395611132, "grad_norm": 1.7984650492034293, "learning_rate": 9.231667720611345e-06, "loss": 0.8317, "step": 6638 }, { "epoch": 0.20347554247885252, "grad_norm": 1.6866074085358782, "learning_rate": 9.2314033344322e-06, "loss": 0.7022, "step": 6639 }, { "epoch": 0.20350619100159373, "grad_norm": 1.9260443436024903, "learning_rate": 9.231138906559956e-06, "loss": 0.7899, "step": 6640 }, { "epoch": 0.20353683952433493, "grad_norm": 1.8732996327300622, "learning_rate": 9.230874436997215e-06, "loss": 0.7327, "step": 6641 }, { "epoch": 0.20356748804707614, "grad_norm": 1.872417946494757, "learning_rate": 9.230609925746588e-06, "loss": 0.7626, "step": 6642 }, { "epoch": 0.20359813656981735, "grad_norm": 1.5911728500186832, "learning_rate": 9.230345372810676e-06, "loss": 0.5149, "step": 6643 }, { "epoch": 0.20362878509255855, "grad_norm": 1.2947621163368583, "learning_rate": 9.230080778192091e-06, "loss": 0.4869, "step": 6644 }, { "epoch": 0.20365943361529976, "grad_norm": 2.099586371873431, "learning_rate": 9.229816141893438e-06, "loss": 0.7174, "step": 6645 }, { "epoch": 0.20369008213804093, "grad_norm": 1.9319761996023133, "learning_rate": 9.229551463917323e-06, "loss": 0.7798, "step": 6646 }, { "epoch": 0.20372073066078214, "grad_norm": 1.912523783487992, "learning_rate": 9.229286744266356e-06, "loss": 0.7719, "step": 6647 }, { "epoch": 0.20375137918352335, "grad_norm": 2.0253023980915192, "learning_rate": 9.229021982943144e-06, "loss": 0.7497, "step": 6648 }, { "epoch": 0.20378202770626455, "grad_norm": 1.8800375719252924, "learning_rate": 9.228757179950298e-06, "loss": 0.7786, "step": 6649 }, { "epoch": 0.20381267622900576, "grad_norm": 1.9702133858665636, "learning_rate": 9.228492335290423e-06, "loss": 0.6569, "step": 6650 }, { "epoch": 0.20384332475174696, "grad_norm": 1.3578457624577465, "learning_rate": 9.228227448966132e-06, "loss": 0.505, "step": 6651 }, { "epoch": 0.20387397327448817, "grad_norm": 1.2011443754920823, "learning_rate": 9.227962520980037e-06, "loss": 0.5069, "step": 6652 }, { "epoch": 0.20390462179722937, "grad_norm": 2.106345887566035, "learning_rate": 9.227697551334741e-06, "loss": 0.7053, "step": 6653 }, { "epoch": 0.20393527031997058, "grad_norm": 2.114125189602315, "learning_rate": 9.227432540032862e-06, "loss": 0.6981, "step": 6654 }, { "epoch": 0.20396591884271179, "grad_norm": 1.849583004384926, "learning_rate": 9.227167487077008e-06, "loss": 0.7422, "step": 6655 }, { "epoch": 0.203996567365453, "grad_norm": 2.0034395382260817, "learning_rate": 9.226902392469792e-06, "loss": 0.8278, "step": 6656 }, { "epoch": 0.2040272158881942, "grad_norm": 1.0590754775607494, "learning_rate": 9.226637256213825e-06, "loss": 0.5105, "step": 6657 }, { "epoch": 0.2040578644109354, "grad_norm": 1.8496570912603294, "learning_rate": 9.226372078311718e-06, "loss": 0.8226, "step": 6658 }, { "epoch": 0.2040885129336766, "grad_norm": 1.7876544555006488, "learning_rate": 9.226106858766086e-06, "loss": 0.6342, "step": 6659 }, { "epoch": 0.2041191614564178, "grad_norm": 1.7938157363536023, "learning_rate": 9.225841597579543e-06, "loss": 0.6896, "step": 6660 }, { "epoch": 0.204149809979159, "grad_norm": 1.9565169667769466, "learning_rate": 9.2255762947547e-06, "loss": 0.7452, "step": 6661 }, { "epoch": 0.2041804585019002, "grad_norm": 1.8758852222531457, "learning_rate": 9.225310950294173e-06, "loss": 0.6646, "step": 6662 }, { "epoch": 0.2042111070246414, "grad_norm": 1.7023424269775942, "learning_rate": 9.225045564200578e-06, "loss": 0.7268, "step": 6663 }, { "epoch": 0.2042417555473826, "grad_norm": 1.8212406994461379, "learning_rate": 9.224780136476526e-06, "loss": 0.6483, "step": 6664 }, { "epoch": 0.20427240407012381, "grad_norm": 1.9114192350711876, "learning_rate": 9.224514667124636e-06, "loss": 0.7738, "step": 6665 }, { "epoch": 0.20430305259286502, "grad_norm": 0.9464330160168086, "learning_rate": 9.22424915614752e-06, "loss": 0.4988, "step": 6666 }, { "epoch": 0.20433370111560623, "grad_norm": 1.6774413809895843, "learning_rate": 9.223983603547797e-06, "loss": 0.76, "step": 6667 }, { "epoch": 0.20436434963834743, "grad_norm": 2.1297242980020585, "learning_rate": 9.223718009328085e-06, "loss": 0.7458, "step": 6668 }, { "epoch": 0.20439499816108864, "grad_norm": 1.8391746031578589, "learning_rate": 9.223452373490995e-06, "loss": 0.7715, "step": 6669 }, { "epoch": 0.20442564668382984, "grad_norm": 1.7969514684466465, "learning_rate": 9.22318669603915e-06, "loss": 0.7615, "step": 6670 }, { "epoch": 0.20445629520657105, "grad_norm": 1.8464545630006532, "learning_rate": 9.222920976975166e-06, "loss": 0.7668, "step": 6671 }, { "epoch": 0.20448694372931225, "grad_norm": 2.0173507697568365, "learning_rate": 9.222655216301663e-06, "loss": 0.7951, "step": 6672 }, { "epoch": 0.20451759225205346, "grad_norm": 1.8196246664633693, "learning_rate": 9.222389414021256e-06, "loss": 0.7519, "step": 6673 }, { "epoch": 0.20454824077479467, "grad_norm": 1.9183998986632764, "learning_rate": 9.222123570136565e-06, "loss": 0.7243, "step": 6674 }, { "epoch": 0.20457888929753587, "grad_norm": 1.7768919606366251, "learning_rate": 9.22185768465021e-06, "loss": 0.7392, "step": 6675 }, { "epoch": 0.20460953782027708, "grad_norm": 1.7240818509903748, "learning_rate": 9.221591757564811e-06, "loss": 0.7269, "step": 6676 }, { "epoch": 0.20464018634301825, "grad_norm": 1.8175113927921152, "learning_rate": 9.221325788882991e-06, "loss": 0.7112, "step": 6677 }, { "epoch": 0.20467083486575946, "grad_norm": 1.8001141674447956, "learning_rate": 9.221059778607366e-06, "loss": 0.6908, "step": 6678 }, { "epoch": 0.20470148338850067, "grad_norm": 1.721961686047634, "learning_rate": 9.220793726740558e-06, "loss": 0.7957, "step": 6679 }, { "epoch": 0.20473213191124187, "grad_norm": 1.6468989326694612, "learning_rate": 9.22052763328519e-06, "loss": 0.6497, "step": 6680 }, { "epoch": 0.20476278043398308, "grad_norm": 0.9140398497616437, "learning_rate": 9.220261498243886e-06, "loss": 0.5198, "step": 6681 }, { "epoch": 0.20479342895672428, "grad_norm": 1.5069759168311372, "learning_rate": 9.219995321619263e-06, "loss": 0.5768, "step": 6682 }, { "epoch": 0.2048240774794655, "grad_norm": 1.6584602009983955, "learning_rate": 9.219729103413946e-06, "loss": 0.6692, "step": 6683 }, { "epoch": 0.2048547260022067, "grad_norm": 1.9468948683189988, "learning_rate": 9.21946284363056e-06, "loss": 0.7032, "step": 6684 }, { "epoch": 0.2048853745249479, "grad_norm": 1.7505433745981749, "learning_rate": 9.219196542271727e-06, "loss": 0.6931, "step": 6685 }, { "epoch": 0.2049160230476891, "grad_norm": 2.0072518513043325, "learning_rate": 9.218930199340071e-06, "loss": 0.606, "step": 6686 }, { "epoch": 0.2049466715704303, "grad_norm": 1.7422720055249175, "learning_rate": 9.218663814838215e-06, "loss": 0.7615, "step": 6687 }, { "epoch": 0.20497732009317152, "grad_norm": 1.9220098459847357, "learning_rate": 9.218397388768785e-06, "loss": 0.7736, "step": 6688 }, { "epoch": 0.20500796861591272, "grad_norm": 1.6868481486025837, "learning_rate": 9.218130921134408e-06, "loss": 0.7595, "step": 6689 }, { "epoch": 0.20503861713865393, "grad_norm": 1.8689688465186625, "learning_rate": 9.217864411937704e-06, "loss": 0.7504, "step": 6690 }, { "epoch": 0.20506926566139513, "grad_norm": 1.4545475313142977, "learning_rate": 9.217597861181306e-06, "loss": 0.6051, "step": 6691 }, { "epoch": 0.2050999141841363, "grad_norm": 1.6395750549237205, "learning_rate": 9.217331268867838e-06, "loss": 0.6541, "step": 6692 }, { "epoch": 0.20513056270687752, "grad_norm": 0.8965563947513557, "learning_rate": 9.217064634999925e-06, "loss": 0.492, "step": 6693 }, { "epoch": 0.20516121122961872, "grad_norm": 1.6538847526067177, "learning_rate": 9.216797959580193e-06, "loss": 0.7286, "step": 6694 }, { "epoch": 0.20519185975235993, "grad_norm": 1.8670750818848154, "learning_rate": 9.216531242611275e-06, "loss": 0.8586, "step": 6695 }, { "epoch": 0.20522250827510113, "grad_norm": 1.5827274861026595, "learning_rate": 9.216264484095794e-06, "loss": 0.5766, "step": 6696 }, { "epoch": 0.20525315679784234, "grad_norm": 2.1214094957196954, "learning_rate": 9.21599768403638e-06, "loss": 0.7259, "step": 6697 }, { "epoch": 0.20528380532058355, "grad_norm": 1.8194552350215416, "learning_rate": 9.215730842435663e-06, "loss": 0.8057, "step": 6698 }, { "epoch": 0.20531445384332475, "grad_norm": 2.2368767750227447, "learning_rate": 9.215463959296272e-06, "loss": 0.7998, "step": 6699 }, { "epoch": 0.20534510236606596, "grad_norm": 1.8227237030594687, "learning_rate": 9.215197034620835e-06, "loss": 0.7978, "step": 6700 }, { "epoch": 0.20537575088880716, "grad_norm": 1.7993357358745083, "learning_rate": 9.214930068411982e-06, "loss": 0.808, "step": 6701 }, { "epoch": 0.20540639941154837, "grad_norm": 0.8086682758621407, "learning_rate": 9.214663060672347e-06, "loss": 0.4703, "step": 6702 }, { "epoch": 0.20543704793428957, "grad_norm": 1.6677295138826123, "learning_rate": 9.214396011404557e-06, "loss": 0.711, "step": 6703 }, { "epoch": 0.20546769645703078, "grad_norm": 1.6527854358083434, "learning_rate": 9.214128920611246e-06, "loss": 0.6581, "step": 6704 }, { "epoch": 0.20549834497977199, "grad_norm": 1.6138597213212753, "learning_rate": 9.213861788295043e-06, "loss": 0.7504, "step": 6705 }, { "epoch": 0.2055289935025132, "grad_norm": 1.9000633820630646, "learning_rate": 9.21359461445858e-06, "loss": 0.8029, "step": 6706 }, { "epoch": 0.2055596420252544, "grad_norm": 0.8079371026457818, "learning_rate": 9.213327399104495e-06, "loss": 0.497, "step": 6707 }, { "epoch": 0.20559029054799557, "grad_norm": 1.8018031587273264, "learning_rate": 9.213060142235415e-06, "loss": 0.8084, "step": 6708 }, { "epoch": 0.20562093907073678, "grad_norm": 1.8431215762671034, "learning_rate": 9.212792843853976e-06, "loss": 0.7071, "step": 6709 }, { "epoch": 0.20565158759347799, "grad_norm": 1.7808608251179903, "learning_rate": 9.212525503962812e-06, "loss": 0.7322, "step": 6710 }, { "epoch": 0.2056822361162192, "grad_norm": 1.7596583454879335, "learning_rate": 9.212258122564554e-06, "loss": 0.7171, "step": 6711 }, { "epoch": 0.2057128846389604, "grad_norm": 1.848475545135932, "learning_rate": 9.21199069966184e-06, "loss": 0.656, "step": 6712 }, { "epoch": 0.2057435331617016, "grad_norm": 1.7322849877973003, "learning_rate": 9.211723235257306e-06, "loss": 0.6613, "step": 6713 }, { "epoch": 0.2057741816844428, "grad_norm": 1.726106141228765, "learning_rate": 9.211455729353584e-06, "loss": 0.5959, "step": 6714 }, { "epoch": 0.20580483020718401, "grad_norm": 0.8449438454935616, "learning_rate": 9.21118818195331e-06, "loss": 0.4771, "step": 6715 }, { "epoch": 0.20583547872992522, "grad_norm": 1.8006532677505842, "learning_rate": 9.210920593059124e-06, "loss": 0.7319, "step": 6716 }, { "epoch": 0.20586612725266643, "grad_norm": 1.818219088097725, "learning_rate": 9.210652962673658e-06, "loss": 0.7147, "step": 6717 }, { "epoch": 0.20589677577540763, "grad_norm": 1.769715759308144, "learning_rate": 9.210385290799551e-06, "loss": 0.6483, "step": 6718 }, { "epoch": 0.20592742429814884, "grad_norm": 1.99237866432386, "learning_rate": 9.210117577439441e-06, "loss": 0.8504, "step": 6719 }, { "epoch": 0.20595807282089004, "grad_norm": 0.8648996471638114, "learning_rate": 9.209849822595964e-06, "loss": 0.5102, "step": 6720 }, { "epoch": 0.20598872134363125, "grad_norm": 1.8001993989486489, "learning_rate": 9.209582026271762e-06, "loss": 0.7263, "step": 6721 }, { "epoch": 0.20601936986637245, "grad_norm": 2.2853154744169792, "learning_rate": 9.209314188469469e-06, "loss": 0.7677, "step": 6722 }, { "epoch": 0.20605001838911363, "grad_norm": 1.8936703711050131, "learning_rate": 9.209046309191727e-06, "loss": 0.8022, "step": 6723 }, { "epoch": 0.20608066691185484, "grad_norm": 1.655890791861509, "learning_rate": 9.208778388441175e-06, "loss": 0.7126, "step": 6724 }, { "epoch": 0.20611131543459604, "grad_norm": 1.6660759053116119, "learning_rate": 9.208510426220454e-06, "loss": 0.6042, "step": 6725 }, { "epoch": 0.20614196395733725, "grad_norm": 1.7908682415571213, "learning_rate": 9.2082424225322e-06, "loss": 0.662, "step": 6726 }, { "epoch": 0.20617261248007845, "grad_norm": 0.8416356398587774, "learning_rate": 9.20797437737906e-06, "loss": 0.4858, "step": 6727 }, { "epoch": 0.20620326100281966, "grad_norm": 1.8428161681178898, "learning_rate": 9.20770629076367e-06, "loss": 0.788, "step": 6728 }, { "epoch": 0.20623390952556087, "grad_norm": 1.8046199830959653, "learning_rate": 9.207438162688673e-06, "loss": 0.7917, "step": 6729 }, { "epoch": 0.20626455804830207, "grad_norm": 1.635368796239476, "learning_rate": 9.207169993156713e-06, "loss": 0.8005, "step": 6730 }, { "epoch": 0.20629520657104328, "grad_norm": 1.8843761641425258, "learning_rate": 9.206901782170428e-06, "loss": 0.7298, "step": 6731 }, { "epoch": 0.20632585509378448, "grad_norm": 1.894063755421474, "learning_rate": 9.206633529732465e-06, "loss": 0.7891, "step": 6732 }, { "epoch": 0.2063565036165257, "grad_norm": 1.7218024300435457, "learning_rate": 9.206365235845465e-06, "loss": 0.7171, "step": 6733 }, { "epoch": 0.2063871521392669, "grad_norm": 1.7324222501452822, "learning_rate": 9.206096900512072e-06, "loss": 0.662, "step": 6734 }, { "epoch": 0.2064178006620081, "grad_norm": 1.7759893358881007, "learning_rate": 9.205828523734931e-06, "loss": 0.6528, "step": 6735 }, { "epoch": 0.2064484491847493, "grad_norm": 1.5790169613134686, "learning_rate": 9.205560105516684e-06, "loss": 0.7323, "step": 6736 }, { "epoch": 0.2064790977074905, "grad_norm": 1.689437440433285, "learning_rate": 9.20529164585998e-06, "loss": 0.7641, "step": 6737 }, { "epoch": 0.20650974623023172, "grad_norm": 1.7850496429094946, "learning_rate": 9.205023144767457e-06, "loss": 0.7447, "step": 6738 }, { "epoch": 0.2065403947529729, "grad_norm": 1.5837632832801825, "learning_rate": 9.20475460224177e-06, "loss": 0.7499, "step": 6739 }, { "epoch": 0.2065710432757141, "grad_norm": 2.496330439983192, "learning_rate": 9.204486018285557e-06, "loss": 0.7428, "step": 6740 }, { "epoch": 0.2066016917984553, "grad_norm": 1.6272459903651788, "learning_rate": 9.204217392901468e-06, "loss": 0.6738, "step": 6741 }, { "epoch": 0.2066323403211965, "grad_norm": 0.91348703353134, "learning_rate": 9.20394872609215e-06, "loss": 0.489, "step": 6742 }, { "epoch": 0.20666298884393772, "grad_norm": 1.8931154371226169, "learning_rate": 9.203680017860249e-06, "loss": 0.7694, "step": 6743 }, { "epoch": 0.20669363736667892, "grad_norm": 1.9432522425411105, "learning_rate": 9.203411268208413e-06, "loss": 0.6489, "step": 6744 }, { "epoch": 0.20672428588942013, "grad_norm": 1.6733472115195271, "learning_rate": 9.20314247713929e-06, "loss": 0.6972, "step": 6745 }, { "epoch": 0.20675493441216133, "grad_norm": 1.7142606105069549, "learning_rate": 9.20287364465553e-06, "loss": 0.7421, "step": 6746 }, { "epoch": 0.20678558293490254, "grad_norm": 2.0087471357459714, "learning_rate": 9.20260477075978e-06, "loss": 0.7201, "step": 6747 }, { "epoch": 0.20681623145764375, "grad_norm": 0.8458847455538521, "learning_rate": 9.20233585545469e-06, "loss": 0.4966, "step": 6748 }, { "epoch": 0.20684687998038495, "grad_norm": 2.049286359960871, "learning_rate": 9.20206689874291e-06, "loss": 0.6957, "step": 6749 }, { "epoch": 0.20687752850312616, "grad_norm": 0.7826940363582866, "learning_rate": 9.201797900627087e-06, "loss": 0.4764, "step": 6750 }, { "epoch": 0.20690817702586736, "grad_norm": 1.6912603616861772, "learning_rate": 9.201528861109877e-06, "loss": 0.7015, "step": 6751 }, { "epoch": 0.20693882554860857, "grad_norm": 0.8170849642207012, "learning_rate": 9.201259780193927e-06, "loss": 0.4846, "step": 6752 }, { "epoch": 0.20696947407134977, "grad_norm": 1.827240031504076, "learning_rate": 9.200990657881891e-06, "loss": 0.6992, "step": 6753 }, { "epoch": 0.20700012259409095, "grad_norm": 1.8259011429069365, "learning_rate": 9.200721494176418e-06, "loss": 0.7607, "step": 6754 }, { "epoch": 0.20703077111683216, "grad_norm": 1.9065495493963849, "learning_rate": 9.200452289080161e-06, "loss": 0.7467, "step": 6755 }, { "epoch": 0.20706141963957336, "grad_norm": 1.7927005656618156, "learning_rate": 9.200183042595775e-06, "loss": 0.7667, "step": 6756 }, { "epoch": 0.20709206816231457, "grad_norm": 1.8258823910021642, "learning_rate": 9.199913754725908e-06, "loss": 0.7707, "step": 6757 }, { "epoch": 0.20712271668505577, "grad_norm": 1.5340429475217137, "learning_rate": 9.199644425473217e-06, "loss": 0.6963, "step": 6758 }, { "epoch": 0.20715336520779698, "grad_norm": 1.8299104565238002, "learning_rate": 9.199375054840356e-06, "loss": 0.6688, "step": 6759 }, { "epoch": 0.20718401373053819, "grad_norm": 1.8802104902477303, "learning_rate": 9.199105642829977e-06, "loss": 0.7738, "step": 6760 }, { "epoch": 0.2072146622532794, "grad_norm": 1.7598982859339458, "learning_rate": 9.198836189444735e-06, "loss": 0.6863, "step": 6761 }, { "epoch": 0.2072453107760206, "grad_norm": 1.8367670019135447, "learning_rate": 9.198566694687288e-06, "loss": 0.6863, "step": 6762 }, { "epoch": 0.2072759592987618, "grad_norm": 1.9563584116316692, "learning_rate": 9.198297158560288e-06, "loss": 0.7123, "step": 6763 }, { "epoch": 0.207306607821503, "grad_norm": 1.8992862192274782, "learning_rate": 9.19802758106639e-06, "loss": 0.8067, "step": 6764 }, { "epoch": 0.20733725634424421, "grad_norm": 1.519697320007608, "learning_rate": 9.197757962208257e-06, "loss": 0.6362, "step": 6765 }, { "epoch": 0.20736790486698542, "grad_norm": 1.6775450215848355, "learning_rate": 9.197488301988535e-06, "loss": 0.7423, "step": 6766 }, { "epoch": 0.20739855338972663, "grad_norm": 1.9043297210704055, "learning_rate": 9.197218600409889e-06, "loss": 0.7888, "step": 6767 }, { "epoch": 0.20742920191246783, "grad_norm": 1.7084663081533433, "learning_rate": 9.196948857474976e-06, "loss": 0.6655, "step": 6768 }, { "epoch": 0.20745985043520904, "grad_norm": 1.7864470187024433, "learning_rate": 9.19667907318645e-06, "loss": 0.7384, "step": 6769 }, { "epoch": 0.20749049895795021, "grad_norm": 1.9146610171939578, "learning_rate": 9.196409247546973e-06, "loss": 0.7373, "step": 6770 }, { "epoch": 0.20752114748069142, "grad_norm": 1.5098805827472352, "learning_rate": 9.196139380559201e-06, "loss": 0.6808, "step": 6771 }, { "epoch": 0.20755179600343263, "grad_norm": 1.1884687593674135, "learning_rate": 9.195869472225794e-06, "loss": 0.5041, "step": 6772 }, { "epoch": 0.20758244452617383, "grad_norm": 1.8479144097021498, "learning_rate": 9.195599522549412e-06, "loss": 0.7799, "step": 6773 }, { "epoch": 0.20761309304891504, "grad_norm": 1.9262281230425602, "learning_rate": 9.195329531532715e-06, "loss": 0.8028, "step": 6774 }, { "epoch": 0.20764374157165624, "grad_norm": 1.8343948572505522, "learning_rate": 9.19505949917836e-06, "loss": 0.8153, "step": 6775 }, { "epoch": 0.20767439009439745, "grad_norm": 1.5880062968952529, "learning_rate": 9.194789425489012e-06, "loss": 0.6009, "step": 6776 }, { "epoch": 0.20770503861713865, "grad_norm": 1.4759332949912205, "learning_rate": 9.194519310467332e-06, "loss": 0.6627, "step": 6777 }, { "epoch": 0.20773568713987986, "grad_norm": 2.2566249198500032, "learning_rate": 9.194249154115978e-06, "loss": 0.8453, "step": 6778 }, { "epoch": 0.20776633566262107, "grad_norm": 1.7110157569887257, "learning_rate": 9.193978956437615e-06, "loss": 0.7175, "step": 6779 }, { "epoch": 0.20779698418536227, "grad_norm": 1.7255247337861706, "learning_rate": 9.193708717434904e-06, "loss": 0.6665, "step": 6780 }, { "epoch": 0.20782763270810348, "grad_norm": 1.7307950166748334, "learning_rate": 9.193438437110508e-06, "loss": 0.7388, "step": 6781 }, { "epoch": 0.20785828123084468, "grad_norm": 1.7659489345844288, "learning_rate": 9.19316811546709e-06, "loss": 0.7473, "step": 6782 }, { "epoch": 0.2078889297535859, "grad_norm": 2.31007115288661, "learning_rate": 9.192897752507314e-06, "loss": 0.7416, "step": 6783 }, { "epoch": 0.2079195782763271, "grad_norm": 1.9916791692524527, "learning_rate": 9.192627348233842e-06, "loss": 0.7949, "step": 6784 }, { "epoch": 0.20795022679906827, "grad_norm": 1.7704382326300976, "learning_rate": 9.192356902649342e-06, "loss": 0.696, "step": 6785 }, { "epoch": 0.20798087532180948, "grad_norm": 1.7451824999617491, "learning_rate": 9.192086415756476e-06, "loss": 0.7713, "step": 6786 }, { "epoch": 0.20801152384455068, "grad_norm": 1.6031705686453945, "learning_rate": 9.19181588755791e-06, "loss": 0.591, "step": 6787 }, { "epoch": 0.2080421723672919, "grad_norm": 1.5919476095006226, "learning_rate": 9.19154531805631e-06, "loss": 0.7394, "step": 6788 }, { "epoch": 0.2080728208900331, "grad_norm": 1.7753501111179872, "learning_rate": 9.191274707254342e-06, "loss": 0.6309, "step": 6789 }, { "epoch": 0.2081034694127743, "grad_norm": 1.0507846759425072, "learning_rate": 9.19100405515467e-06, "loss": 0.4797, "step": 6790 }, { "epoch": 0.2081341179355155, "grad_norm": 1.9062070865198815, "learning_rate": 9.190733361759964e-06, "loss": 0.8396, "step": 6791 }, { "epoch": 0.2081647664582567, "grad_norm": 1.782948521099564, "learning_rate": 9.190462627072891e-06, "loss": 0.6718, "step": 6792 }, { "epoch": 0.20819541498099792, "grad_norm": 1.5291121421295177, "learning_rate": 9.190191851096116e-06, "loss": 0.628, "step": 6793 }, { "epoch": 0.20822606350373912, "grad_norm": 1.784674869657271, "learning_rate": 9.18992103383231e-06, "loss": 0.6685, "step": 6794 }, { "epoch": 0.20825671202648033, "grad_norm": 1.9825531035681496, "learning_rate": 9.189650175284139e-06, "loss": 0.756, "step": 6795 }, { "epoch": 0.20828736054922153, "grad_norm": 1.6512585115184524, "learning_rate": 9.18937927545427e-06, "loss": 0.6894, "step": 6796 }, { "epoch": 0.20831800907196274, "grad_norm": 1.7291805660032682, "learning_rate": 9.18910833434538e-06, "loss": 0.7714, "step": 6797 }, { "epoch": 0.20834865759470395, "grad_norm": 0.9659235120587193, "learning_rate": 9.188837351960132e-06, "loss": 0.5062, "step": 6798 }, { "epoch": 0.20837930611744515, "grad_norm": 1.5450964518569246, "learning_rate": 9.188566328301196e-06, "loss": 0.6389, "step": 6799 }, { "epoch": 0.20840995464018636, "grad_norm": 2.1041982554955165, "learning_rate": 9.188295263371247e-06, "loss": 0.8099, "step": 6800 }, { "epoch": 0.20844060316292753, "grad_norm": 0.7944831695327185, "learning_rate": 9.188024157172952e-06, "loss": 0.4939, "step": 6801 }, { "epoch": 0.20847125168566874, "grad_norm": 1.9803244997547809, "learning_rate": 9.187753009708983e-06, "loss": 0.6371, "step": 6802 }, { "epoch": 0.20850190020840995, "grad_norm": 1.8660461576652703, "learning_rate": 9.187481820982011e-06, "loss": 0.7462, "step": 6803 }, { "epoch": 0.20853254873115115, "grad_norm": 1.579493382579708, "learning_rate": 9.187210590994711e-06, "loss": 0.7788, "step": 6804 }, { "epoch": 0.20856319725389236, "grad_norm": 1.7960810579646789, "learning_rate": 9.186939319749756e-06, "loss": 0.7927, "step": 6805 }, { "epoch": 0.20859384577663356, "grad_norm": 1.8314649508709209, "learning_rate": 9.186668007249813e-06, "loss": 0.7748, "step": 6806 }, { "epoch": 0.20862449429937477, "grad_norm": 1.8205891989113856, "learning_rate": 9.18639665349756e-06, "loss": 0.6856, "step": 6807 }, { "epoch": 0.20865514282211597, "grad_norm": 1.957675173616404, "learning_rate": 9.186125258495668e-06, "loss": 0.7182, "step": 6808 }, { "epoch": 0.20868579134485718, "grad_norm": 0.9703100665088302, "learning_rate": 9.185853822246814e-06, "loss": 0.4858, "step": 6809 }, { "epoch": 0.20871643986759839, "grad_norm": 0.9158888763485403, "learning_rate": 9.185582344753673e-06, "loss": 0.494, "step": 6810 }, { "epoch": 0.2087470883903396, "grad_norm": 2.051007840441848, "learning_rate": 9.185310826018916e-06, "loss": 0.8195, "step": 6811 }, { "epoch": 0.2087777369130808, "grad_norm": 1.7732338693936338, "learning_rate": 9.185039266045221e-06, "loss": 0.6585, "step": 6812 }, { "epoch": 0.208808385435822, "grad_norm": 1.579575628333002, "learning_rate": 9.184767664835264e-06, "loss": 0.7207, "step": 6813 }, { "epoch": 0.2088390339585632, "grad_norm": 1.7727019748813604, "learning_rate": 9.18449602239172e-06, "loss": 0.697, "step": 6814 }, { "epoch": 0.20886968248130441, "grad_norm": 1.7808187399192292, "learning_rate": 9.184224338717268e-06, "loss": 0.7662, "step": 6815 }, { "epoch": 0.2089003310040456, "grad_norm": 1.6726219273615999, "learning_rate": 9.18395261381458e-06, "loss": 0.6156, "step": 6816 }, { "epoch": 0.2089309795267868, "grad_norm": 1.8326021329469375, "learning_rate": 9.183680847686338e-06, "loss": 0.8114, "step": 6817 }, { "epoch": 0.208961628049528, "grad_norm": 1.9288626618344638, "learning_rate": 9.183409040335218e-06, "loss": 0.7644, "step": 6818 }, { "epoch": 0.2089922765722692, "grad_norm": 1.672073496473314, "learning_rate": 9.183137191763898e-06, "loss": 0.6419, "step": 6819 }, { "epoch": 0.20902292509501041, "grad_norm": 1.691755527101769, "learning_rate": 9.182865301975056e-06, "loss": 0.7427, "step": 6820 }, { "epoch": 0.20905357361775162, "grad_norm": 1.765372803180703, "learning_rate": 9.182593370971373e-06, "loss": 0.7697, "step": 6821 }, { "epoch": 0.20908422214049283, "grad_norm": 2.1117715895021796, "learning_rate": 9.18232139875553e-06, "loss": 0.801, "step": 6822 }, { "epoch": 0.20911487066323403, "grad_norm": 1.7747514290506967, "learning_rate": 9.1820493853302e-06, "loss": 0.6449, "step": 6823 }, { "epoch": 0.20914551918597524, "grad_norm": 1.6660686883071856, "learning_rate": 9.181777330698069e-06, "loss": 0.713, "step": 6824 }, { "epoch": 0.20917616770871644, "grad_norm": 1.8518806501079634, "learning_rate": 9.181505234861816e-06, "loss": 0.7505, "step": 6825 }, { "epoch": 0.20920681623145765, "grad_norm": 1.640453133222903, "learning_rate": 9.181233097824123e-06, "loss": 0.6494, "step": 6826 }, { "epoch": 0.20923746475419885, "grad_norm": 1.7537630114903089, "learning_rate": 9.18096091958767e-06, "loss": 0.764, "step": 6827 }, { "epoch": 0.20926811327694006, "grad_norm": 1.999009199335041, "learning_rate": 9.180688700155138e-06, "loss": 0.7229, "step": 6828 }, { "epoch": 0.20929876179968127, "grad_norm": 1.7170214898379217, "learning_rate": 9.180416439529211e-06, "loss": 0.7814, "step": 6829 }, { "epoch": 0.20932941032242247, "grad_norm": 1.8149656575857185, "learning_rate": 9.18014413771257e-06, "loss": 0.7234, "step": 6830 }, { "epoch": 0.20936005884516368, "grad_norm": 1.7920228999272243, "learning_rate": 9.179871794707903e-06, "loss": 0.826, "step": 6831 }, { "epoch": 0.20939070736790485, "grad_norm": 1.944797410749394, "learning_rate": 9.179599410517887e-06, "loss": 0.74, "step": 6832 }, { "epoch": 0.20942135589064606, "grad_norm": 1.9416265894432077, "learning_rate": 9.17932698514521e-06, "loss": 0.8058, "step": 6833 }, { "epoch": 0.20945200441338727, "grad_norm": 1.8778135412842543, "learning_rate": 9.179054518592553e-06, "loss": 0.7686, "step": 6834 }, { "epoch": 0.20948265293612847, "grad_norm": 1.64474573013903, "learning_rate": 9.178782010862603e-06, "loss": 0.7435, "step": 6835 }, { "epoch": 0.20951330145886968, "grad_norm": 1.8319759640079765, "learning_rate": 9.178509461958046e-06, "loss": 0.7605, "step": 6836 }, { "epoch": 0.20954394998161088, "grad_norm": 1.7855648579986367, "learning_rate": 9.178236871881565e-06, "loss": 0.7925, "step": 6837 }, { "epoch": 0.2095745985043521, "grad_norm": 1.6585637434699263, "learning_rate": 9.177964240635849e-06, "loss": 0.7004, "step": 6838 }, { "epoch": 0.2096052470270933, "grad_norm": 1.86410404390193, "learning_rate": 9.17769156822358e-06, "loss": 0.7072, "step": 6839 }, { "epoch": 0.2096358955498345, "grad_norm": 1.8530621563971725, "learning_rate": 9.177418854647447e-06, "loss": 0.6818, "step": 6840 }, { "epoch": 0.2096665440725757, "grad_norm": 1.125894768126668, "learning_rate": 9.17714609991014e-06, "loss": 0.5028, "step": 6841 }, { "epoch": 0.2096971925953169, "grad_norm": 0.9734690070478185, "learning_rate": 9.17687330401434e-06, "loss": 0.4998, "step": 6842 }, { "epoch": 0.20972784111805812, "grad_norm": 1.8644130238033845, "learning_rate": 9.176600466962741e-06, "loss": 0.8043, "step": 6843 }, { "epoch": 0.20975848964079932, "grad_norm": 1.883425353539842, "learning_rate": 9.176327588758029e-06, "loss": 0.8088, "step": 6844 }, { "epoch": 0.20978913816354053, "grad_norm": 1.656372899019515, "learning_rate": 9.176054669402892e-06, "loss": 0.7002, "step": 6845 }, { "epoch": 0.20981978668628173, "grad_norm": 1.7263265397225005, "learning_rate": 9.17578170890002e-06, "loss": 0.8127, "step": 6846 }, { "epoch": 0.2098504352090229, "grad_norm": 1.7121882198460352, "learning_rate": 9.175508707252102e-06, "loss": 0.8137, "step": 6847 }, { "epoch": 0.20988108373176412, "grad_norm": 1.8794673935085513, "learning_rate": 9.175235664461828e-06, "loss": 0.7443, "step": 6848 }, { "epoch": 0.20991173225450532, "grad_norm": 2.0044817246563564, "learning_rate": 9.17496258053189e-06, "loss": 0.7848, "step": 6849 }, { "epoch": 0.20994238077724653, "grad_norm": 1.744483937240471, "learning_rate": 9.174689455464978e-06, "loss": 0.8035, "step": 6850 }, { "epoch": 0.20997302929998773, "grad_norm": 2.1194148158500026, "learning_rate": 9.174416289263781e-06, "loss": 0.8102, "step": 6851 }, { "epoch": 0.21000367782272894, "grad_norm": 1.936136297460602, "learning_rate": 9.174143081930993e-06, "loss": 0.7016, "step": 6852 }, { "epoch": 0.21003432634547015, "grad_norm": 1.7685777371932359, "learning_rate": 9.173869833469304e-06, "loss": 0.6208, "step": 6853 }, { "epoch": 0.21006497486821135, "grad_norm": 1.7870901576654972, "learning_rate": 9.17359654388141e-06, "loss": 0.7219, "step": 6854 }, { "epoch": 0.21009562339095256, "grad_norm": 1.7709667104532059, "learning_rate": 9.17332321317e-06, "loss": 0.7301, "step": 6855 }, { "epoch": 0.21012627191369376, "grad_norm": 1.748446950352049, "learning_rate": 9.173049841337768e-06, "loss": 0.7382, "step": 6856 }, { "epoch": 0.21015692043643497, "grad_norm": 1.8398553345711108, "learning_rate": 9.172776428387408e-06, "loss": 0.7559, "step": 6857 }, { "epoch": 0.21018756895917617, "grad_norm": 1.7009313786225615, "learning_rate": 9.172502974321616e-06, "loss": 0.7497, "step": 6858 }, { "epoch": 0.21021821748191738, "grad_norm": 1.8026413552088343, "learning_rate": 9.172229479143085e-06, "loss": 0.5226, "step": 6859 }, { "epoch": 0.21024886600465859, "grad_norm": 1.9363748529202822, "learning_rate": 9.171955942854506e-06, "loss": 0.7579, "step": 6860 }, { "epoch": 0.2102795145273998, "grad_norm": 1.8033573832254106, "learning_rate": 9.17168236545858e-06, "loss": 0.7517, "step": 6861 }, { "epoch": 0.210310163050141, "grad_norm": 1.748957965893536, "learning_rate": 9.171408746958e-06, "loss": 0.6844, "step": 6862 }, { "epoch": 0.21034081157288217, "grad_norm": 1.8161299308525722, "learning_rate": 9.171135087355463e-06, "loss": 0.7819, "step": 6863 }, { "epoch": 0.21037146009562338, "grad_norm": 1.7878548993667627, "learning_rate": 9.170861386653665e-06, "loss": 0.756, "step": 6864 }, { "epoch": 0.21040210861836459, "grad_norm": 1.792135781338691, "learning_rate": 9.1705876448553e-06, "loss": 0.7271, "step": 6865 }, { "epoch": 0.2104327571411058, "grad_norm": 0.9734903945267865, "learning_rate": 9.17031386196307e-06, "loss": 0.5096, "step": 6866 }, { "epoch": 0.210463405663847, "grad_norm": 1.9028634184432638, "learning_rate": 9.170040037979671e-06, "loss": 0.7614, "step": 6867 }, { "epoch": 0.2104940541865882, "grad_norm": 1.694211012977835, "learning_rate": 9.169766172907799e-06, "loss": 0.7729, "step": 6868 }, { "epoch": 0.2105247027093294, "grad_norm": 2.0870773322495904, "learning_rate": 9.169492266750154e-06, "loss": 0.6842, "step": 6869 }, { "epoch": 0.21055535123207061, "grad_norm": 1.792154594032373, "learning_rate": 9.169218319509436e-06, "loss": 0.722, "step": 6870 }, { "epoch": 0.21058599975481182, "grad_norm": 0.9154273263535209, "learning_rate": 9.168944331188342e-06, "loss": 0.5163, "step": 6871 }, { "epoch": 0.21061664827755303, "grad_norm": 1.9743349525453757, "learning_rate": 9.168670301789574e-06, "loss": 0.792, "step": 6872 }, { "epoch": 0.21064729680029423, "grad_norm": 1.8693132833812818, "learning_rate": 9.16839623131583e-06, "loss": 0.7489, "step": 6873 }, { "epoch": 0.21067794532303544, "grad_norm": 1.7813273678304533, "learning_rate": 9.168122119769813e-06, "loss": 0.8148, "step": 6874 }, { "epoch": 0.21070859384577664, "grad_norm": 2.792978920969533, "learning_rate": 9.167847967154219e-06, "loss": 0.7076, "step": 6875 }, { "epoch": 0.21073924236851785, "grad_norm": 1.5876035303021407, "learning_rate": 9.167573773471756e-06, "loss": 0.624, "step": 6876 }, { "epoch": 0.21076989089125905, "grad_norm": 1.6921049429493127, "learning_rate": 9.167299538725121e-06, "loss": 0.6789, "step": 6877 }, { "epoch": 0.21080053941400023, "grad_norm": 1.7170578211337775, "learning_rate": 9.167025262917018e-06, "loss": 0.6965, "step": 6878 }, { "epoch": 0.21083118793674144, "grad_norm": 1.7578112592531, "learning_rate": 9.166750946050147e-06, "loss": 0.6755, "step": 6879 }, { "epoch": 0.21086183645948264, "grad_norm": 1.7834347659126346, "learning_rate": 9.166476588127215e-06, "loss": 0.7212, "step": 6880 }, { "epoch": 0.21089248498222385, "grad_norm": 1.714172527356643, "learning_rate": 9.166202189150922e-06, "loss": 0.7632, "step": 6881 }, { "epoch": 0.21092313350496505, "grad_norm": 1.8270774505399052, "learning_rate": 9.165927749123972e-06, "loss": 0.8293, "step": 6882 }, { "epoch": 0.21095378202770626, "grad_norm": 1.7095949547928186, "learning_rate": 9.165653268049072e-06, "loss": 0.6976, "step": 6883 }, { "epoch": 0.21098443055044747, "grad_norm": 1.6144527886490627, "learning_rate": 9.165378745928923e-06, "loss": 0.7105, "step": 6884 }, { "epoch": 0.21101507907318867, "grad_norm": 1.788243461813118, "learning_rate": 9.165104182766233e-06, "loss": 0.7202, "step": 6885 }, { "epoch": 0.21104572759592988, "grad_norm": 1.6479423097165988, "learning_rate": 9.164829578563705e-06, "loss": 0.641, "step": 6886 }, { "epoch": 0.21107637611867108, "grad_norm": 0.9544702610902126, "learning_rate": 9.164554933324045e-06, "loss": 0.5021, "step": 6887 }, { "epoch": 0.2111070246414123, "grad_norm": 1.5791800181820352, "learning_rate": 9.164280247049961e-06, "loss": 0.6838, "step": 6888 }, { "epoch": 0.2111376731641535, "grad_norm": 1.674161460783684, "learning_rate": 9.164005519744157e-06, "loss": 0.6221, "step": 6889 }, { "epoch": 0.2111683216868947, "grad_norm": 0.7711683694305198, "learning_rate": 9.163730751409342e-06, "loss": 0.475, "step": 6890 }, { "epoch": 0.2111989702096359, "grad_norm": 1.7167220706392847, "learning_rate": 9.16345594204822e-06, "loss": 0.7976, "step": 6891 }, { "epoch": 0.2112296187323771, "grad_norm": 1.833129240930157, "learning_rate": 9.163181091663504e-06, "loss": 0.7704, "step": 6892 }, { "epoch": 0.21126026725511832, "grad_norm": 1.6989457380427906, "learning_rate": 9.162906200257899e-06, "loss": 0.6912, "step": 6893 }, { "epoch": 0.2112909157778595, "grad_norm": 0.8303846636551513, "learning_rate": 9.162631267834114e-06, "loss": 0.4808, "step": 6894 }, { "epoch": 0.2113215643006007, "grad_norm": 1.6514146018567908, "learning_rate": 9.162356294394858e-06, "loss": 0.7695, "step": 6895 }, { "epoch": 0.2113522128233419, "grad_norm": 1.7210836422496705, "learning_rate": 9.16208127994284e-06, "loss": 0.7056, "step": 6896 }, { "epoch": 0.2113828613460831, "grad_norm": 1.8070236791121794, "learning_rate": 9.161806224480772e-06, "loss": 0.6796, "step": 6897 }, { "epoch": 0.21141350986882432, "grad_norm": 1.881684405334881, "learning_rate": 9.161531128011361e-06, "loss": 0.7223, "step": 6898 }, { "epoch": 0.21144415839156552, "grad_norm": 1.932841210681594, "learning_rate": 9.161255990537318e-06, "loss": 0.7558, "step": 6899 }, { "epoch": 0.21147480691430673, "grad_norm": 1.909009164221902, "learning_rate": 9.160980812061357e-06, "loss": 0.7713, "step": 6900 }, { "epoch": 0.21150545543704793, "grad_norm": 1.8644356886033961, "learning_rate": 9.160705592586187e-06, "loss": 0.7333, "step": 6901 }, { "epoch": 0.21153610395978914, "grad_norm": 1.6675138254542254, "learning_rate": 9.16043033211452e-06, "loss": 0.6969, "step": 6902 }, { "epoch": 0.21156675248253035, "grad_norm": 1.6892572185055745, "learning_rate": 9.16015503064907e-06, "loss": 0.7077, "step": 6903 }, { "epoch": 0.21159740100527155, "grad_norm": 2.1049162270148996, "learning_rate": 9.159879688192546e-06, "loss": 0.8021, "step": 6904 }, { "epoch": 0.21162804952801276, "grad_norm": 0.9823090997930126, "learning_rate": 9.159604304747662e-06, "loss": 0.5073, "step": 6905 }, { "epoch": 0.21165869805075396, "grad_norm": 1.8230607892727704, "learning_rate": 9.159328880317136e-06, "loss": 0.6742, "step": 6906 }, { "epoch": 0.21168934657349517, "grad_norm": 0.8581277215546509, "learning_rate": 9.159053414903676e-06, "loss": 0.5198, "step": 6907 }, { "epoch": 0.21171999509623637, "grad_norm": 1.7963722666636788, "learning_rate": 9.158777908509999e-06, "loss": 0.7073, "step": 6908 }, { "epoch": 0.21175064361897755, "grad_norm": 1.788095152589813, "learning_rate": 9.158502361138819e-06, "loss": 0.7205, "step": 6909 }, { "epoch": 0.21178129214171876, "grad_norm": 0.8710616070081827, "learning_rate": 9.158226772792852e-06, "loss": 0.4857, "step": 6910 }, { "epoch": 0.21181194066445996, "grad_norm": 2.0178951303650083, "learning_rate": 9.15795114347481e-06, "loss": 0.7412, "step": 6911 }, { "epoch": 0.21184258918720117, "grad_norm": 1.64157528679434, "learning_rate": 9.157675473187414e-06, "loss": 0.7283, "step": 6912 }, { "epoch": 0.21187323770994237, "grad_norm": 1.7019332214032006, "learning_rate": 9.157399761933377e-06, "loss": 0.7259, "step": 6913 }, { "epoch": 0.21190388623268358, "grad_norm": 1.6060917912374433, "learning_rate": 9.157124009715417e-06, "loss": 0.7573, "step": 6914 }, { "epoch": 0.21193453475542479, "grad_norm": 1.9453801078820514, "learning_rate": 9.156848216536251e-06, "loss": 0.6643, "step": 6915 }, { "epoch": 0.211965183278166, "grad_norm": 1.658835094098403, "learning_rate": 9.156572382398594e-06, "loss": 0.7044, "step": 6916 }, { "epoch": 0.2119958318009072, "grad_norm": 1.7395985814295267, "learning_rate": 9.156296507305167e-06, "loss": 0.6636, "step": 6917 }, { "epoch": 0.2120264803236484, "grad_norm": 1.8210205225629972, "learning_rate": 9.156020591258687e-06, "loss": 0.7422, "step": 6918 }, { "epoch": 0.2120571288463896, "grad_norm": 0.8594983365179799, "learning_rate": 9.155744634261874e-06, "loss": 0.4893, "step": 6919 }, { "epoch": 0.21208777736913081, "grad_norm": 1.8212139680934627, "learning_rate": 9.155468636317443e-06, "loss": 0.8511, "step": 6920 }, { "epoch": 0.21211842589187202, "grad_norm": 1.6324249670599813, "learning_rate": 9.15519259742812e-06, "loss": 0.7161, "step": 6921 }, { "epoch": 0.21214907441461323, "grad_norm": 1.8316990862187694, "learning_rate": 9.15491651759662e-06, "loss": 0.7616, "step": 6922 }, { "epoch": 0.21217972293735443, "grad_norm": 1.7195018363195347, "learning_rate": 9.154640396825662e-06, "loss": 0.7164, "step": 6923 }, { "epoch": 0.21221037146009564, "grad_norm": 0.8347845723763856, "learning_rate": 9.15436423511797e-06, "loss": 0.4913, "step": 6924 }, { "epoch": 0.21224101998283681, "grad_norm": 1.6691154470476273, "learning_rate": 9.154088032476266e-06, "loss": 0.7715, "step": 6925 }, { "epoch": 0.21227166850557802, "grad_norm": 1.8244152941922673, "learning_rate": 9.153811788903269e-06, "loss": 0.681, "step": 6926 }, { "epoch": 0.21230231702831923, "grad_norm": 2.0724667101050183, "learning_rate": 9.1535355044017e-06, "loss": 0.7293, "step": 6927 }, { "epoch": 0.21233296555106043, "grad_norm": 2.182190078939684, "learning_rate": 9.153259178974286e-06, "loss": 0.6537, "step": 6928 }, { "epoch": 0.21236361407380164, "grad_norm": 1.567885108712536, "learning_rate": 9.152982812623746e-06, "loss": 0.6684, "step": 6929 }, { "epoch": 0.21239426259654284, "grad_norm": 1.8021626820561905, "learning_rate": 9.152706405352802e-06, "loss": 0.7286, "step": 6930 }, { "epoch": 0.21242491111928405, "grad_norm": 1.6268337808558793, "learning_rate": 9.15242995716418e-06, "loss": 0.6745, "step": 6931 }, { "epoch": 0.21245555964202525, "grad_norm": 0.8203364787433969, "learning_rate": 9.152153468060603e-06, "loss": 0.49, "step": 6932 }, { "epoch": 0.21248620816476646, "grad_norm": 1.6681435869876402, "learning_rate": 9.151876938044795e-06, "loss": 0.6759, "step": 6933 }, { "epoch": 0.21251685668750767, "grad_norm": 1.8097469010537726, "learning_rate": 9.151600367119482e-06, "loss": 0.7607, "step": 6934 }, { "epoch": 0.21254750521024887, "grad_norm": 1.8253314774917206, "learning_rate": 9.15132375528739e-06, "loss": 0.7188, "step": 6935 }, { "epoch": 0.21257815373299008, "grad_norm": 0.8256443023293238, "learning_rate": 9.15104710255124e-06, "loss": 0.482, "step": 6936 }, { "epoch": 0.21260880225573128, "grad_norm": 1.7466118191256845, "learning_rate": 9.150770408913763e-06, "loss": 0.7173, "step": 6937 }, { "epoch": 0.2126394507784725, "grad_norm": 1.7395428791537741, "learning_rate": 9.15049367437768e-06, "loss": 0.7283, "step": 6938 }, { "epoch": 0.2126700993012137, "grad_norm": 2.3786985446323947, "learning_rate": 9.150216898945724e-06, "loss": 0.7599, "step": 6939 }, { "epoch": 0.21270074782395487, "grad_norm": 1.801658470555801, "learning_rate": 9.149940082620618e-06, "loss": 0.7653, "step": 6940 }, { "epoch": 0.21273139634669608, "grad_norm": 1.9168776006052357, "learning_rate": 9.149663225405092e-06, "loss": 0.7769, "step": 6941 }, { "epoch": 0.21276204486943728, "grad_norm": 1.6620627990228012, "learning_rate": 9.14938632730187e-06, "loss": 0.6881, "step": 6942 }, { "epoch": 0.2127926933921785, "grad_norm": 1.5318401627367622, "learning_rate": 9.149109388313684e-06, "loss": 0.719, "step": 6943 }, { "epoch": 0.2128233419149197, "grad_norm": 1.7383589017233156, "learning_rate": 9.148832408443262e-06, "loss": 0.7464, "step": 6944 }, { "epoch": 0.2128539904376609, "grad_norm": 0.8443623031484068, "learning_rate": 9.148555387693332e-06, "loss": 0.4988, "step": 6945 }, { "epoch": 0.2128846389604021, "grad_norm": 1.924540246479336, "learning_rate": 9.148278326066624e-06, "loss": 0.6775, "step": 6946 }, { "epoch": 0.2129152874831433, "grad_norm": 1.9723913781697802, "learning_rate": 9.14800122356587e-06, "loss": 0.753, "step": 6947 }, { "epoch": 0.21294593600588452, "grad_norm": 0.8551429205466912, "learning_rate": 9.147724080193798e-06, "loss": 0.4968, "step": 6948 }, { "epoch": 0.21297658452862572, "grad_norm": 1.8131640879754274, "learning_rate": 9.147446895953138e-06, "loss": 0.6264, "step": 6949 }, { "epoch": 0.21300723305136693, "grad_norm": 0.7812227737499419, "learning_rate": 9.147169670846623e-06, "loss": 0.5006, "step": 6950 }, { "epoch": 0.21303788157410813, "grad_norm": 1.659525111079974, "learning_rate": 9.146892404876985e-06, "loss": 0.7178, "step": 6951 }, { "epoch": 0.21306853009684934, "grad_norm": 0.7894846818895113, "learning_rate": 9.146615098046953e-06, "loss": 0.4764, "step": 6952 }, { "epoch": 0.21309917861959055, "grad_norm": 2.0668705931444187, "learning_rate": 9.146337750359265e-06, "loss": 0.7409, "step": 6953 }, { "epoch": 0.21312982714233175, "grad_norm": 1.9334955030924266, "learning_rate": 9.146060361816648e-06, "loss": 0.7766, "step": 6954 }, { "epoch": 0.21316047566507296, "grad_norm": 1.8374477868586896, "learning_rate": 9.145782932421838e-06, "loss": 0.775, "step": 6955 }, { "epoch": 0.21319112418781413, "grad_norm": 1.9337956402301197, "learning_rate": 9.145505462177569e-06, "loss": 0.8049, "step": 6956 }, { "epoch": 0.21322177271055534, "grad_norm": 1.9745067624578612, "learning_rate": 9.145227951086573e-06, "loss": 0.7013, "step": 6957 }, { "epoch": 0.21325242123329655, "grad_norm": 1.8575003603461222, "learning_rate": 9.144950399151584e-06, "loss": 0.8499, "step": 6958 }, { "epoch": 0.21328306975603775, "grad_norm": 1.753764247161566, "learning_rate": 9.144672806375341e-06, "loss": 0.7443, "step": 6959 }, { "epoch": 0.21331371827877896, "grad_norm": 2.06861984509508, "learning_rate": 9.144395172760574e-06, "loss": 0.7021, "step": 6960 }, { "epoch": 0.21334436680152016, "grad_norm": 1.9298406316946488, "learning_rate": 9.144117498310023e-06, "loss": 0.8098, "step": 6961 }, { "epoch": 0.21337501532426137, "grad_norm": 1.8290123630639186, "learning_rate": 9.14383978302642e-06, "loss": 0.697, "step": 6962 }, { "epoch": 0.21340566384700257, "grad_norm": 2.0792271236254636, "learning_rate": 9.143562026912504e-06, "loss": 0.7984, "step": 6963 }, { "epoch": 0.21343631236974378, "grad_norm": 1.7128611880415845, "learning_rate": 9.143284229971011e-06, "loss": 0.7134, "step": 6964 }, { "epoch": 0.21346696089248499, "grad_norm": 1.9479319944555873, "learning_rate": 9.143006392204678e-06, "loss": 0.7019, "step": 6965 }, { "epoch": 0.2134976094152262, "grad_norm": 1.7778286482379715, "learning_rate": 9.142728513616245e-06, "loss": 0.6988, "step": 6966 }, { "epoch": 0.2135282579379674, "grad_norm": 1.7407668433087269, "learning_rate": 9.142450594208447e-06, "loss": 0.7783, "step": 6967 }, { "epoch": 0.2135589064607086, "grad_norm": 1.8564017891483227, "learning_rate": 9.142172633984024e-06, "loss": 0.7184, "step": 6968 }, { "epoch": 0.2135895549834498, "grad_norm": 2.117485928426193, "learning_rate": 9.141894632945712e-06, "loss": 0.779, "step": 6969 }, { "epoch": 0.21362020350619101, "grad_norm": 1.671249899640579, "learning_rate": 9.141616591096255e-06, "loss": 0.7192, "step": 6970 }, { "epoch": 0.2136508520289322, "grad_norm": 1.6754820019153722, "learning_rate": 9.141338508438388e-06, "loss": 0.6422, "step": 6971 }, { "epoch": 0.2136815005516734, "grad_norm": 2.0037980016868118, "learning_rate": 9.141060384974854e-06, "loss": 0.7256, "step": 6972 }, { "epoch": 0.2137121490744146, "grad_norm": 1.770454684529185, "learning_rate": 9.140782220708393e-06, "loss": 0.7524, "step": 6973 }, { "epoch": 0.2137427975971558, "grad_norm": 0.9847829232137651, "learning_rate": 9.140504015641745e-06, "loss": 0.498, "step": 6974 }, { "epoch": 0.21377344611989701, "grad_norm": 1.8447134369591225, "learning_rate": 9.140225769777652e-06, "loss": 0.6444, "step": 6975 }, { "epoch": 0.21380409464263822, "grad_norm": 1.7538201932715816, "learning_rate": 9.139947483118852e-06, "loss": 0.7449, "step": 6976 }, { "epoch": 0.21383474316537943, "grad_norm": 1.5951626062886828, "learning_rate": 9.139669155668095e-06, "loss": 0.7437, "step": 6977 }, { "epoch": 0.21386539168812063, "grad_norm": 1.858833259075066, "learning_rate": 9.139390787428115e-06, "loss": 0.6933, "step": 6978 }, { "epoch": 0.21389604021086184, "grad_norm": 1.7212704740693199, "learning_rate": 9.139112378401659e-06, "loss": 0.6782, "step": 6979 }, { "epoch": 0.21392668873360304, "grad_norm": 1.6663084474576322, "learning_rate": 9.138833928591471e-06, "loss": 0.5944, "step": 6980 }, { "epoch": 0.21395733725634425, "grad_norm": 0.8699602529563103, "learning_rate": 9.138555438000291e-06, "loss": 0.4832, "step": 6981 }, { "epoch": 0.21398798577908545, "grad_norm": 1.9034420691868836, "learning_rate": 9.138276906630868e-06, "loss": 0.7864, "step": 6982 }, { "epoch": 0.21401863430182666, "grad_norm": 1.6268549539385289, "learning_rate": 9.137998334485944e-06, "loss": 0.7298, "step": 6983 }, { "epoch": 0.21404928282456787, "grad_norm": 1.79039929114554, "learning_rate": 9.137719721568263e-06, "loss": 0.7484, "step": 6984 }, { "epoch": 0.21407993134730907, "grad_norm": 1.8175667957601191, "learning_rate": 9.13744106788057e-06, "loss": 0.7228, "step": 6985 }, { "epoch": 0.21411057987005028, "grad_norm": 1.8909405959757197, "learning_rate": 9.137162373425612e-06, "loss": 0.7549, "step": 6986 }, { "epoch": 0.21414122839279146, "grad_norm": 1.6357334683518054, "learning_rate": 9.136883638206135e-06, "loss": 0.6965, "step": 6987 }, { "epoch": 0.21417187691553266, "grad_norm": 1.8584534396803898, "learning_rate": 9.136604862224886e-06, "loss": 0.7455, "step": 6988 }, { "epoch": 0.21420252543827387, "grad_norm": 1.923274238546006, "learning_rate": 9.136326045484607e-06, "loss": 0.8737, "step": 6989 }, { "epoch": 0.21423317396101507, "grad_norm": 0.8596550191543165, "learning_rate": 9.136047187988053e-06, "loss": 0.5077, "step": 6990 }, { "epoch": 0.21426382248375628, "grad_norm": 1.9603129315645629, "learning_rate": 9.135768289737966e-06, "loss": 0.7598, "step": 6991 }, { "epoch": 0.21429447100649748, "grad_norm": 1.595049213894692, "learning_rate": 9.135489350737096e-06, "loss": 0.788, "step": 6992 }, { "epoch": 0.2143251195292387, "grad_norm": 1.8386993735326143, "learning_rate": 9.135210370988192e-06, "loss": 0.7566, "step": 6993 }, { "epoch": 0.2143557680519799, "grad_norm": 1.7735742927597118, "learning_rate": 9.134931350494001e-06, "loss": 0.7659, "step": 6994 }, { "epoch": 0.2143864165747211, "grad_norm": 1.9014561275349697, "learning_rate": 9.134652289257275e-06, "loss": 0.6432, "step": 6995 }, { "epoch": 0.2144170650974623, "grad_norm": 1.7715084501512703, "learning_rate": 9.134373187280761e-06, "loss": 0.662, "step": 6996 }, { "epoch": 0.2144477136202035, "grad_norm": 1.805479575609256, "learning_rate": 9.134094044567213e-06, "loss": 0.6815, "step": 6997 }, { "epoch": 0.21447836214294472, "grad_norm": 0.8043156297456138, "learning_rate": 9.133814861119375e-06, "loss": 0.4906, "step": 6998 }, { "epoch": 0.21450901066568592, "grad_norm": 1.6647302086917195, "learning_rate": 9.133535636940003e-06, "loss": 0.7153, "step": 6999 }, { "epoch": 0.21453965918842713, "grad_norm": 1.7887073532106652, "learning_rate": 9.133256372031845e-06, "loss": 0.7204, "step": 7000 }, { "epoch": 0.21457030771116833, "grad_norm": 0.8385304043339927, "learning_rate": 9.132977066397657e-06, "loss": 0.4754, "step": 7001 }, { "epoch": 0.2146009562339095, "grad_norm": 0.8255902657217924, "learning_rate": 9.132697720040187e-06, "loss": 0.506, "step": 7002 }, { "epoch": 0.21463160475665072, "grad_norm": 1.9232229840657447, "learning_rate": 9.132418332962189e-06, "loss": 0.6644, "step": 7003 }, { "epoch": 0.21466225327939192, "grad_norm": 1.7802428702456325, "learning_rate": 9.132138905166417e-06, "loss": 0.6896, "step": 7004 }, { "epoch": 0.21469290180213313, "grad_norm": 1.81263224791839, "learning_rate": 9.131859436655621e-06, "loss": 0.7206, "step": 7005 }, { "epoch": 0.21472355032487433, "grad_norm": 1.5436475469645543, "learning_rate": 9.131579927432559e-06, "loss": 0.6616, "step": 7006 }, { "epoch": 0.21475419884761554, "grad_norm": 1.6450575312369302, "learning_rate": 9.131300377499983e-06, "loss": 0.6359, "step": 7007 }, { "epoch": 0.21478484737035675, "grad_norm": 1.597360617481595, "learning_rate": 9.131020786860644e-06, "loss": 0.6772, "step": 7008 }, { "epoch": 0.21481549589309795, "grad_norm": 1.8022081875444382, "learning_rate": 9.130741155517305e-06, "loss": 0.7597, "step": 7009 }, { "epoch": 0.21484614441583916, "grad_norm": 0.8725924806950642, "learning_rate": 9.130461483472712e-06, "loss": 0.4877, "step": 7010 }, { "epoch": 0.21487679293858036, "grad_norm": 1.8063927046529586, "learning_rate": 9.130181770729628e-06, "loss": 0.6423, "step": 7011 }, { "epoch": 0.21490744146132157, "grad_norm": 1.827249535151622, "learning_rate": 9.129902017290806e-06, "loss": 0.7238, "step": 7012 }, { "epoch": 0.21493808998406277, "grad_norm": 2.098724405863711, "learning_rate": 9.129622223159002e-06, "loss": 0.7468, "step": 7013 }, { "epoch": 0.21496873850680398, "grad_norm": 1.748393074990259, "learning_rate": 9.129342388336973e-06, "loss": 0.7185, "step": 7014 }, { "epoch": 0.21499938702954519, "grad_norm": 1.7112139822646133, "learning_rate": 9.129062512827478e-06, "loss": 0.6547, "step": 7015 }, { "epoch": 0.2150300355522864, "grad_norm": 3.18292279914859, "learning_rate": 9.128782596633275e-06, "loss": 0.7058, "step": 7016 }, { "epoch": 0.2150606840750276, "grad_norm": 0.8129329295633559, "learning_rate": 9.128502639757117e-06, "loss": 0.4898, "step": 7017 }, { "epoch": 0.21509133259776878, "grad_norm": 1.6599690675297474, "learning_rate": 9.12822264220177e-06, "loss": 0.6836, "step": 7018 }, { "epoch": 0.21512198112050998, "grad_norm": 1.7744747341751226, "learning_rate": 9.127942603969987e-06, "loss": 0.7636, "step": 7019 }, { "epoch": 0.2151526296432512, "grad_norm": 1.6705404481426673, "learning_rate": 9.12766252506453e-06, "loss": 0.7048, "step": 7020 }, { "epoch": 0.2151832781659924, "grad_norm": 1.784120696397485, "learning_rate": 9.127382405488156e-06, "loss": 0.7101, "step": 7021 }, { "epoch": 0.2152139266887336, "grad_norm": 1.7993686503370603, "learning_rate": 9.12710224524363e-06, "loss": 0.6915, "step": 7022 }, { "epoch": 0.2152445752114748, "grad_norm": 1.764336306913328, "learning_rate": 9.126822044333707e-06, "loss": 0.6505, "step": 7023 }, { "epoch": 0.215275223734216, "grad_norm": 1.6860632942826916, "learning_rate": 9.126541802761153e-06, "loss": 0.6615, "step": 7024 }, { "epoch": 0.21530587225695721, "grad_norm": 1.8189986520425985, "learning_rate": 9.126261520528725e-06, "loss": 0.776, "step": 7025 }, { "epoch": 0.21533652077969842, "grad_norm": 1.6442997428061474, "learning_rate": 9.125981197639187e-06, "loss": 0.7303, "step": 7026 }, { "epoch": 0.21536716930243963, "grad_norm": 1.6999955981610342, "learning_rate": 9.125700834095301e-06, "loss": 0.8109, "step": 7027 }, { "epoch": 0.21539781782518083, "grad_norm": 1.8677500168970826, "learning_rate": 9.12542042989983e-06, "loss": 0.7518, "step": 7028 }, { "epoch": 0.21542846634792204, "grad_norm": 1.872191154374633, "learning_rate": 9.125139985055533e-06, "loss": 0.6508, "step": 7029 }, { "epoch": 0.21545911487066324, "grad_norm": 1.6762230645933132, "learning_rate": 9.124859499565178e-06, "loss": 0.7309, "step": 7030 }, { "epoch": 0.21548976339340445, "grad_norm": 0.8506978456735566, "learning_rate": 9.124578973431527e-06, "loss": 0.4638, "step": 7031 }, { "epoch": 0.21552041191614565, "grad_norm": 1.7361269736824667, "learning_rate": 9.124298406657345e-06, "loss": 0.7812, "step": 7032 }, { "epoch": 0.21555106043888683, "grad_norm": 1.634276017816826, "learning_rate": 9.124017799245396e-06, "loss": 0.74, "step": 7033 }, { "epoch": 0.21558170896162804, "grad_norm": 1.8648427853950407, "learning_rate": 9.123737151198442e-06, "loss": 0.6514, "step": 7034 }, { "epoch": 0.21561235748436924, "grad_norm": 1.722044296469583, "learning_rate": 9.12345646251925e-06, "loss": 0.621, "step": 7035 }, { "epoch": 0.21564300600711045, "grad_norm": 1.6884829917505568, "learning_rate": 9.12317573321059e-06, "loss": 0.6873, "step": 7036 }, { "epoch": 0.21567365452985165, "grad_norm": 1.9897700076369427, "learning_rate": 9.122894963275221e-06, "loss": 0.8689, "step": 7037 }, { "epoch": 0.21570430305259286, "grad_norm": 1.8172517524263982, "learning_rate": 9.122614152715917e-06, "loss": 0.7379, "step": 7038 }, { "epoch": 0.21573495157533407, "grad_norm": 1.8900692846755418, "learning_rate": 9.122333301535438e-06, "loss": 0.8025, "step": 7039 }, { "epoch": 0.21576560009807527, "grad_norm": 0.8624868582150736, "learning_rate": 9.122052409736554e-06, "loss": 0.488, "step": 7040 }, { "epoch": 0.21579624862081648, "grad_norm": 1.738261637091519, "learning_rate": 9.121771477322035e-06, "loss": 0.7093, "step": 7041 }, { "epoch": 0.21582689714355768, "grad_norm": 1.8656479654735891, "learning_rate": 9.121490504294645e-06, "loss": 0.8324, "step": 7042 }, { "epoch": 0.2158575456662989, "grad_norm": 2.1669598402821184, "learning_rate": 9.121209490657156e-06, "loss": 0.7224, "step": 7043 }, { "epoch": 0.2158881941890401, "grad_norm": 2.0877385774822286, "learning_rate": 9.120928436412334e-06, "loss": 0.7274, "step": 7044 }, { "epoch": 0.2159188427117813, "grad_norm": 1.7418619965732447, "learning_rate": 9.120647341562952e-06, "loss": 0.6997, "step": 7045 }, { "epoch": 0.2159494912345225, "grad_norm": 0.8188971671813499, "learning_rate": 9.120366206111777e-06, "loss": 0.4785, "step": 7046 }, { "epoch": 0.2159801397572637, "grad_norm": 1.4934270705168542, "learning_rate": 9.120085030061577e-06, "loss": 0.6084, "step": 7047 }, { "epoch": 0.21601078828000492, "grad_norm": 1.5601515778332495, "learning_rate": 9.119803813415126e-06, "loss": 0.7146, "step": 7048 }, { "epoch": 0.2160414368027461, "grad_norm": 1.708619498438601, "learning_rate": 9.119522556175196e-06, "loss": 0.7821, "step": 7049 }, { "epoch": 0.2160720853254873, "grad_norm": 0.8219705883500764, "learning_rate": 9.119241258344554e-06, "loss": 0.5102, "step": 7050 }, { "epoch": 0.2161027338482285, "grad_norm": 1.7481020773937648, "learning_rate": 9.118959919925977e-06, "loss": 0.687, "step": 7051 }, { "epoch": 0.2161333823709697, "grad_norm": 1.8334622480544054, "learning_rate": 9.11867854092223e-06, "loss": 0.8046, "step": 7052 }, { "epoch": 0.21616403089371092, "grad_norm": 1.6530030925327817, "learning_rate": 9.118397121336092e-06, "loss": 0.6857, "step": 7053 }, { "epoch": 0.21619467941645212, "grad_norm": 1.7008933610733916, "learning_rate": 9.118115661170333e-06, "loss": 0.7062, "step": 7054 }, { "epoch": 0.21622532793919333, "grad_norm": 0.7967563821403555, "learning_rate": 9.117834160427726e-06, "loss": 0.4804, "step": 7055 }, { "epoch": 0.21625597646193453, "grad_norm": 1.7910497957750997, "learning_rate": 9.117552619111046e-06, "loss": 0.8931, "step": 7056 }, { "epoch": 0.21628662498467574, "grad_norm": 1.5147829238799952, "learning_rate": 9.117271037223065e-06, "loss": 0.6528, "step": 7057 }, { "epoch": 0.21631727350741695, "grad_norm": 1.728459307467497, "learning_rate": 9.116989414766561e-06, "loss": 0.709, "step": 7058 }, { "epoch": 0.21634792203015815, "grad_norm": 1.79336557276472, "learning_rate": 9.116707751744307e-06, "loss": 0.7659, "step": 7059 }, { "epoch": 0.21637857055289936, "grad_norm": 1.9691897384826764, "learning_rate": 9.116426048159077e-06, "loss": 0.713, "step": 7060 }, { "epoch": 0.21640921907564056, "grad_norm": 1.5997935082473445, "learning_rate": 9.116144304013648e-06, "loss": 0.6656, "step": 7061 }, { "epoch": 0.21643986759838177, "grad_norm": 1.7042856765830088, "learning_rate": 9.115862519310797e-06, "loss": 0.7825, "step": 7062 }, { "epoch": 0.21647051612112297, "grad_norm": 1.8974012093032582, "learning_rate": 9.115580694053298e-06, "loss": 0.729, "step": 7063 }, { "epoch": 0.21650116464386415, "grad_norm": 1.5229856527185082, "learning_rate": 9.11529882824393e-06, "loss": 0.7275, "step": 7064 }, { "epoch": 0.21653181316660536, "grad_norm": 1.8244652489474025, "learning_rate": 9.115016921885471e-06, "loss": 0.7218, "step": 7065 }, { "epoch": 0.21656246168934656, "grad_norm": 1.5023927868695335, "learning_rate": 9.114734974980697e-06, "loss": 0.6806, "step": 7066 }, { "epoch": 0.21659311021208777, "grad_norm": 1.7266523186297662, "learning_rate": 9.114452987532387e-06, "loss": 0.6992, "step": 7067 }, { "epoch": 0.21662375873482898, "grad_norm": 1.9458815186348506, "learning_rate": 9.114170959543317e-06, "loss": 0.7006, "step": 7068 }, { "epoch": 0.21665440725757018, "grad_norm": 0.850397067808177, "learning_rate": 9.11388889101627e-06, "loss": 0.4816, "step": 7069 }, { "epoch": 0.2166850557803114, "grad_norm": 1.8129581366849368, "learning_rate": 9.11360678195402e-06, "loss": 0.7751, "step": 7070 }, { "epoch": 0.2167157043030526, "grad_norm": 1.7804917087480037, "learning_rate": 9.113324632359352e-06, "loss": 0.6106, "step": 7071 }, { "epoch": 0.2167463528257938, "grad_norm": 1.7868080034841054, "learning_rate": 9.113042442235045e-06, "loss": 0.7226, "step": 7072 }, { "epoch": 0.216777001348535, "grad_norm": 1.6521230715269768, "learning_rate": 9.112760211583878e-06, "loss": 0.6204, "step": 7073 }, { "epoch": 0.2168076498712762, "grad_norm": 0.8209854826762336, "learning_rate": 9.112477940408631e-06, "loss": 0.5079, "step": 7074 }, { "epoch": 0.21683829839401741, "grad_norm": 1.7349357934519176, "learning_rate": 9.112195628712086e-06, "loss": 0.7549, "step": 7075 }, { "epoch": 0.21686894691675862, "grad_norm": 1.733846921448947, "learning_rate": 9.111913276497026e-06, "loss": 0.7849, "step": 7076 }, { "epoch": 0.21689959543949983, "grad_norm": 1.6691899940676953, "learning_rate": 9.111630883766233e-06, "loss": 0.6442, "step": 7077 }, { "epoch": 0.21693024396224103, "grad_norm": 1.7138772532214286, "learning_rate": 9.111348450522491e-06, "loss": 0.6531, "step": 7078 }, { "epoch": 0.21696089248498224, "grad_norm": 2.1407485039041636, "learning_rate": 9.111065976768578e-06, "loss": 0.7851, "step": 7079 }, { "epoch": 0.21699154100772342, "grad_norm": 1.857524654077512, "learning_rate": 9.11078346250728e-06, "loss": 0.7032, "step": 7080 }, { "epoch": 0.21702218953046462, "grad_norm": 1.8001895156653005, "learning_rate": 9.110500907741383e-06, "loss": 0.6799, "step": 7081 }, { "epoch": 0.21705283805320583, "grad_norm": 0.8314110419405524, "learning_rate": 9.110218312473667e-06, "loss": 0.4768, "step": 7082 }, { "epoch": 0.21708348657594703, "grad_norm": 2.451298643712965, "learning_rate": 9.109935676706918e-06, "loss": 0.7256, "step": 7083 }, { "epoch": 0.21711413509868824, "grad_norm": 1.8993601257018564, "learning_rate": 9.109653000443921e-06, "loss": 0.7641, "step": 7084 }, { "epoch": 0.21714478362142944, "grad_norm": 1.7145148245858406, "learning_rate": 9.109370283687462e-06, "loss": 0.6789, "step": 7085 }, { "epoch": 0.21717543214417065, "grad_norm": 1.9836327630937687, "learning_rate": 9.109087526440328e-06, "loss": 0.6377, "step": 7086 }, { "epoch": 0.21720608066691185, "grad_norm": 1.8761217470782527, "learning_rate": 9.108804728705302e-06, "loss": 0.833, "step": 7087 }, { "epoch": 0.21723672918965306, "grad_norm": 1.8424136422740092, "learning_rate": 9.108521890485172e-06, "loss": 0.7587, "step": 7088 }, { "epoch": 0.21726737771239427, "grad_norm": 1.666166729724519, "learning_rate": 9.108239011782722e-06, "loss": 0.7655, "step": 7089 }, { "epoch": 0.21729802623513547, "grad_norm": 1.839023541702506, "learning_rate": 9.107956092600745e-06, "loss": 0.6438, "step": 7090 }, { "epoch": 0.21732867475787668, "grad_norm": 0.8504605381907147, "learning_rate": 9.107673132942025e-06, "loss": 0.4951, "step": 7091 }, { "epoch": 0.21735932328061788, "grad_norm": 0.9066900424999271, "learning_rate": 9.10739013280935e-06, "loss": 0.5163, "step": 7092 }, { "epoch": 0.2173899718033591, "grad_norm": 1.9552861736759144, "learning_rate": 9.10710709220551e-06, "loss": 0.8272, "step": 7093 }, { "epoch": 0.2174206203261003, "grad_norm": 0.8191989015384193, "learning_rate": 9.10682401113329e-06, "loss": 0.4971, "step": 7094 }, { "epoch": 0.21745126884884147, "grad_norm": 1.6699209127753465, "learning_rate": 9.106540889595485e-06, "loss": 0.6154, "step": 7095 }, { "epoch": 0.21748191737158268, "grad_norm": 1.8522933356871243, "learning_rate": 9.106257727594883e-06, "loss": 0.7287, "step": 7096 }, { "epoch": 0.21751256589432388, "grad_norm": 1.7324377290465198, "learning_rate": 9.105974525134272e-06, "loss": 0.7382, "step": 7097 }, { "epoch": 0.2175432144170651, "grad_norm": 1.985336576738761, "learning_rate": 9.105691282216442e-06, "loss": 0.7701, "step": 7098 }, { "epoch": 0.2175738629398063, "grad_norm": 1.7359304731852356, "learning_rate": 9.105407998844186e-06, "loss": 0.7413, "step": 7099 }, { "epoch": 0.2176045114625475, "grad_norm": 1.5447268371008172, "learning_rate": 9.105124675020294e-06, "loss": 0.6839, "step": 7100 }, { "epoch": 0.2176351599852887, "grad_norm": 1.8473447659477975, "learning_rate": 9.104841310747559e-06, "loss": 0.7534, "step": 7101 }, { "epoch": 0.2176658085080299, "grad_norm": 0.9785913230250081, "learning_rate": 9.104557906028773e-06, "loss": 0.4797, "step": 7102 }, { "epoch": 0.21769645703077112, "grad_norm": 1.7789952146970998, "learning_rate": 9.104274460866726e-06, "loss": 0.6797, "step": 7103 }, { "epoch": 0.21772710555351232, "grad_norm": 0.8878850837012774, "learning_rate": 9.103990975264214e-06, "loss": 0.5123, "step": 7104 }, { "epoch": 0.21775775407625353, "grad_norm": 1.6209520818787677, "learning_rate": 9.103707449224028e-06, "loss": 0.6494, "step": 7105 }, { "epoch": 0.21778840259899473, "grad_norm": 1.7358920015548536, "learning_rate": 9.103423882748963e-06, "loss": 0.74, "step": 7106 }, { "epoch": 0.21781905112173594, "grad_norm": 1.6912836227239112, "learning_rate": 9.103140275841812e-06, "loss": 0.6389, "step": 7107 }, { "epoch": 0.21784969964447715, "grad_norm": 1.8788804583675693, "learning_rate": 9.10285662850537e-06, "loss": 0.7635, "step": 7108 }, { "epoch": 0.21788034816721835, "grad_norm": 1.6454567445055497, "learning_rate": 9.102572940742433e-06, "loss": 0.6553, "step": 7109 }, { "epoch": 0.21791099668995956, "grad_norm": 1.8076331137805495, "learning_rate": 9.102289212555795e-06, "loss": 0.7861, "step": 7110 }, { "epoch": 0.21794164521270074, "grad_norm": 1.7422786965801842, "learning_rate": 9.102005443948252e-06, "loss": 0.7382, "step": 7111 }, { "epoch": 0.21797229373544194, "grad_norm": 1.076544475564577, "learning_rate": 9.1017216349226e-06, "loss": 0.4892, "step": 7112 }, { "epoch": 0.21800294225818315, "grad_norm": 1.6538280281415974, "learning_rate": 9.101437785481633e-06, "loss": 0.6501, "step": 7113 }, { "epoch": 0.21803359078092435, "grad_norm": 2.1269975455692993, "learning_rate": 9.101153895628152e-06, "loss": 0.8212, "step": 7114 }, { "epoch": 0.21806423930366556, "grad_norm": 1.5462189900265397, "learning_rate": 9.10086996536495e-06, "loss": 0.7037, "step": 7115 }, { "epoch": 0.21809488782640676, "grad_norm": 1.8154367217653815, "learning_rate": 9.10058599469483e-06, "loss": 0.7465, "step": 7116 }, { "epoch": 0.21812553634914797, "grad_norm": 1.7653320686205385, "learning_rate": 9.100301983620587e-06, "loss": 0.766, "step": 7117 }, { "epoch": 0.21815618487188917, "grad_norm": 1.6784367383421954, "learning_rate": 9.100017932145017e-06, "loss": 0.7138, "step": 7118 }, { "epoch": 0.21818683339463038, "grad_norm": 1.8617341804643892, "learning_rate": 9.099733840270923e-06, "loss": 0.6513, "step": 7119 }, { "epoch": 0.2182174819173716, "grad_norm": 1.7198980581363201, "learning_rate": 9.099449708001102e-06, "loss": 0.6759, "step": 7120 }, { "epoch": 0.2182481304401128, "grad_norm": 1.5751393625097927, "learning_rate": 9.099165535338355e-06, "loss": 0.7908, "step": 7121 }, { "epoch": 0.218278778962854, "grad_norm": 1.751582865864355, "learning_rate": 9.09888132228548e-06, "loss": 0.6562, "step": 7122 }, { "epoch": 0.2183094274855952, "grad_norm": 1.62409236227919, "learning_rate": 9.098597068845279e-06, "loss": 0.7783, "step": 7123 }, { "epoch": 0.2183400760083364, "grad_norm": 1.5965814800743048, "learning_rate": 9.098312775020552e-06, "loss": 0.6204, "step": 7124 }, { "epoch": 0.21837072453107761, "grad_norm": 1.6214402375000039, "learning_rate": 9.098028440814101e-06, "loss": 0.6427, "step": 7125 }, { "epoch": 0.2184013730538188, "grad_norm": 1.6471174298633096, "learning_rate": 9.097744066228728e-06, "loss": 0.597, "step": 7126 }, { "epoch": 0.21843202157656, "grad_norm": 1.7051990411057367, "learning_rate": 9.097459651267233e-06, "loss": 0.7877, "step": 7127 }, { "epoch": 0.2184626700993012, "grad_norm": 2.008263002779389, "learning_rate": 9.09717519593242e-06, "loss": 0.7684, "step": 7128 }, { "epoch": 0.2184933186220424, "grad_norm": 2.101900627539187, "learning_rate": 9.096890700227093e-06, "loss": 0.787, "step": 7129 }, { "epoch": 0.21852396714478362, "grad_norm": 1.8417664227620556, "learning_rate": 9.096606164154052e-06, "loss": 0.6023, "step": 7130 }, { "epoch": 0.21855461566752482, "grad_norm": 1.9804543929299612, "learning_rate": 9.096321587716101e-06, "loss": 0.7846, "step": 7131 }, { "epoch": 0.21858526419026603, "grad_norm": 1.9966369782621776, "learning_rate": 9.096036970916048e-06, "loss": 0.7447, "step": 7132 }, { "epoch": 0.21861591271300723, "grad_norm": 1.9167500033888802, "learning_rate": 9.095752313756695e-06, "loss": 0.7933, "step": 7133 }, { "epoch": 0.21864656123574844, "grad_norm": 1.5971198593046203, "learning_rate": 9.095467616240844e-06, "loss": 0.7363, "step": 7134 }, { "epoch": 0.21867720975848964, "grad_norm": 1.8182875227065476, "learning_rate": 9.095182878371304e-06, "loss": 0.6705, "step": 7135 }, { "epoch": 0.21870785828123085, "grad_norm": 2.032758601275622, "learning_rate": 9.09489810015088e-06, "loss": 0.788, "step": 7136 }, { "epoch": 0.21873850680397205, "grad_norm": 1.800561875782836, "learning_rate": 9.094613281582376e-06, "loss": 0.7474, "step": 7137 }, { "epoch": 0.21876915532671326, "grad_norm": 1.726116553720526, "learning_rate": 9.0943284226686e-06, "loss": 0.7179, "step": 7138 }, { "epoch": 0.21879980384945447, "grad_norm": 1.7429955680533564, "learning_rate": 9.094043523412359e-06, "loss": 0.695, "step": 7139 }, { "epoch": 0.21883045237219567, "grad_norm": 2.235209876078084, "learning_rate": 9.093758583816459e-06, "loss": 0.6729, "step": 7140 }, { "epoch": 0.21886110089493688, "grad_norm": 1.7081785188254075, "learning_rate": 9.09347360388371e-06, "loss": 0.7927, "step": 7141 }, { "epoch": 0.21889174941767806, "grad_norm": 1.685805615242057, "learning_rate": 9.093188583616917e-06, "loss": 0.7145, "step": 7142 }, { "epoch": 0.21892239794041926, "grad_norm": 1.8288839656481648, "learning_rate": 9.092903523018888e-06, "loss": 0.7068, "step": 7143 }, { "epoch": 0.21895304646316047, "grad_norm": 0.9958586841237852, "learning_rate": 9.092618422092434e-06, "loss": 0.4855, "step": 7144 }, { "epoch": 0.21898369498590167, "grad_norm": 1.6517927178381622, "learning_rate": 9.092333280840365e-06, "loss": 0.7049, "step": 7145 }, { "epoch": 0.21901434350864288, "grad_norm": 1.899153315400226, "learning_rate": 9.092048099265489e-06, "loss": 0.7608, "step": 7146 }, { "epoch": 0.21904499203138408, "grad_norm": 1.9422444919516944, "learning_rate": 9.091762877370616e-06, "loss": 0.7484, "step": 7147 }, { "epoch": 0.2190756405541253, "grad_norm": 1.9905349317510126, "learning_rate": 9.091477615158555e-06, "loss": 0.7789, "step": 7148 }, { "epoch": 0.2191062890768665, "grad_norm": 1.4513958240139753, "learning_rate": 9.09119231263212e-06, "loss": 0.6376, "step": 7149 }, { "epoch": 0.2191369375996077, "grad_norm": 1.8774005521308978, "learning_rate": 9.09090696979412e-06, "loss": 0.7463, "step": 7150 }, { "epoch": 0.2191675861223489, "grad_norm": 1.9056489875393248, "learning_rate": 9.090621586647365e-06, "loss": 0.7718, "step": 7151 }, { "epoch": 0.2191982346450901, "grad_norm": 1.9179650116424083, "learning_rate": 9.09033616319467e-06, "loss": 0.7196, "step": 7152 }, { "epoch": 0.21922888316783132, "grad_norm": 1.8266027743953452, "learning_rate": 9.090050699438848e-06, "loss": 0.7897, "step": 7153 }, { "epoch": 0.21925953169057252, "grad_norm": 1.711201598071196, "learning_rate": 9.089765195382708e-06, "loss": 0.6653, "step": 7154 }, { "epoch": 0.21929018021331373, "grad_norm": 1.770993527223573, "learning_rate": 9.089479651029065e-06, "loss": 0.7374, "step": 7155 }, { "epoch": 0.21932082873605493, "grad_norm": 1.7401481026511076, "learning_rate": 9.089194066380735e-06, "loss": 0.6571, "step": 7156 }, { "epoch": 0.2193514772587961, "grad_norm": 1.6986952667070658, "learning_rate": 9.088908441440527e-06, "loss": 0.6751, "step": 7157 }, { "epoch": 0.21938212578153732, "grad_norm": 1.9708440101748483, "learning_rate": 9.088622776211257e-06, "loss": 0.6809, "step": 7158 }, { "epoch": 0.21941277430427852, "grad_norm": 1.909115477459308, "learning_rate": 9.088337070695743e-06, "loss": 0.791, "step": 7159 }, { "epoch": 0.21944342282701973, "grad_norm": 1.8287194599251584, "learning_rate": 9.088051324896798e-06, "loss": 0.7394, "step": 7160 }, { "epoch": 0.21947407134976094, "grad_norm": 1.6171295116437245, "learning_rate": 9.087765538817237e-06, "loss": 0.6184, "step": 7161 }, { "epoch": 0.21950471987250214, "grad_norm": 1.662653801356007, "learning_rate": 9.087479712459876e-06, "loss": 0.7169, "step": 7162 }, { "epoch": 0.21953536839524335, "grad_norm": 1.084999562415439, "learning_rate": 9.08719384582753e-06, "loss": 0.5126, "step": 7163 }, { "epoch": 0.21956601691798455, "grad_norm": 1.3373762577302382, "learning_rate": 9.08690793892302e-06, "loss": 0.4807, "step": 7164 }, { "epoch": 0.21959666544072576, "grad_norm": 1.727804775885603, "learning_rate": 9.08662199174916e-06, "loss": 0.7004, "step": 7165 }, { "epoch": 0.21962731396346696, "grad_norm": 2.786002722833153, "learning_rate": 9.086336004308767e-06, "loss": 0.8123, "step": 7166 }, { "epoch": 0.21965796248620817, "grad_norm": 1.9817181146050258, "learning_rate": 9.08604997660466e-06, "loss": 0.7922, "step": 7167 }, { "epoch": 0.21968861100894937, "grad_norm": 1.6964585462054103, "learning_rate": 9.085763908639657e-06, "loss": 0.6965, "step": 7168 }, { "epoch": 0.21971925953169058, "grad_norm": 1.8904201213455456, "learning_rate": 9.085477800416575e-06, "loss": 0.7365, "step": 7169 }, { "epoch": 0.2197499080544318, "grad_norm": 1.6859618539335506, "learning_rate": 9.085191651938238e-06, "loss": 0.6414, "step": 7170 }, { "epoch": 0.219780556577173, "grad_norm": 1.6895164518384773, "learning_rate": 9.08490546320746e-06, "loss": 0.6566, "step": 7171 }, { "epoch": 0.2198112050999142, "grad_norm": 1.8492999181450736, "learning_rate": 9.084619234227066e-06, "loss": 0.7021, "step": 7172 }, { "epoch": 0.21984185362265538, "grad_norm": 2.1256985186760797, "learning_rate": 9.084332964999871e-06, "loss": 0.6651, "step": 7173 }, { "epoch": 0.21987250214539658, "grad_norm": 1.9187385668497148, "learning_rate": 9.0840466555287e-06, "loss": 0.7223, "step": 7174 }, { "epoch": 0.2199031506681378, "grad_norm": 1.7608639076065302, "learning_rate": 9.083760305816372e-06, "loss": 0.6647, "step": 7175 }, { "epoch": 0.219933799190879, "grad_norm": 1.676761760457416, "learning_rate": 9.083473915865707e-06, "loss": 0.7336, "step": 7176 }, { "epoch": 0.2199644477136202, "grad_norm": 1.7133592716971309, "learning_rate": 9.083187485679531e-06, "loss": 0.7099, "step": 7177 }, { "epoch": 0.2199950962363614, "grad_norm": 1.870922696327476, "learning_rate": 9.082901015260664e-06, "loss": 0.6777, "step": 7178 }, { "epoch": 0.2200257447591026, "grad_norm": 1.5546845266727736, "learning_rate": 9.082614504611928e-06, "loss": 0.668, "step": 7179 }, { "epoch": 0.22005639328184382, "grad_norm": 1.713011783793158, "learning_rate": 9.082327953736146e-06, "loss": 0.7066, "step": 7180 }, { "epoch": 0.22008704180458502, "grad_norm": 1.6066551286274326, "learning_rate": 9.082041362636142e-06, "loss": 0.5397, "step": 7181 }, { "epoch": 0.22011769032732623, "grad_norm": 1.5453266571898603, "learning_rate": 9.08175473131474e-06, "loss": 0.6993, "step": 7182 }, { "epoch": 0.22014833885006743, "grad_norm": 1.71013112284106, "learning_rate": 9.081468059774766e-06, "loss": 0.6777, "step": 7183 }, { "epoch": 0.22017898737280864, "grad_norm": 1.6482812045842195, "learning_rate": 9.081181348019041e-06, "loss": 0.5292, "step": 7184 }, { "epoch": 0.22020963589554984, "grad_norm": 1.904771103490963, "learning_rate": 9.080894596050393e-06, "loss": 0.6782, "step": 7185 }, { "epoch": 0.22024028441829105, "grad_norm": 1.627708923244157, "learning_rate": 9.080607803871646e-06, "loss": 0.7441, "step": 7186 }, { "epoch": 0.22027093294103225, "grad_norm": 1.7762294669889718, "learning_rate": 9.080320971485628e-06, "loss": 0.7483, "step": 7187 }, { "epoch": 0.22030158146377343, "grad_norm": 1.507482443965449, "learning_rate": 9.080034098895161e-06, "loss": 0.7359, "step": 7188 }, { "epoch": 0.22033222998651464, "grad_norm": 2.0317459520731287, "learning_rate": 9.079747186103077e-06, "loss": 0.748, "step": 7189 }, { "epoch": 0.22036287850925584, "grad_norm": 0.9792320388775644, "learning_rate": 9.079460233112196e-06, "loss": 0.508, "step": 7190 }, { "epoch": 0.22039352703199705, "grad_norm": 1.7278607940431043, "learning_rate": 9.079173239925352e-06, "loss": 0.5999, "step": 7191 }, { "epoch": 0.22042417555473826, "grad_norm": 1.835907348189121, "learning_rate": 9.078886206545371e-06, "loss": 0.7881, "step": 7192 }, { "epoch": 0.22045482407747946, "grad_norm": 1.6527005580010437, "learning_rate": 9.07859913297508e-06, "loss": 0.5725, "step": 7193 }, { "epoch": 0.22048547260022067, "grad_norm": 1.699597321390202, "learning_rate": 9.078312019217308e-06, "loss": 0.7704, "step": 7194 }, { "epoch": 0.22051612112296187, "grad_norm": 1.6779269244522512, "learning_rate": 9.078024865274883e-06, "loss": 0.6607, "step": 7195 }, { "epoch": 0.22054676964570308, "grad_norm": 2.028041284409162, "learning_rate": 9.077737671150637e-06, "loss": 0.7893, "step": 7196 }, { "epoch": 0.22057741816844428, "grad_norm": 1.94729211953956, "learning_rate": 9.077450436847397e-06, "loss": 0.7558, "step": 7197 }, { "epoch": 0.2206080666911855, "grad_norm": 1.0180382802404044, "learning_rate": 9.077163162367997e-06, "loss": 0.5135, "step": 7198 }, { "epoch": 0.2206387152139267, "grad_norm": 0.913026134720954, "learning_rate": 9.076875847715262e-06, "loss": 0.4957, "step": 7199 }, { "epoch": 0.2206693637366679, "grad_norm": 1.760425771437767, "learning_rate": 9.076588492892029e-06, "loss": 0.6693, "step": 7200 }, { "epoch": 0.2207000122594091, "grad_norm": 0.8013422642168897, "learning_rate": 9.076301097901126e-06, "loss": 0.4988, "step": 7201 }, { "epoch": 0.2207306607821503, "grad_norm": 1.6329299314610557, "learning_rate": 9.076013662745385e-06, "loss": 0.7046, "step": 7202 }, { "epoch": 0.22076130930489152, "grad_norm": 1.7879319706179868, "learning_rate": 9.075726187427639e-06, "loss": 0.6744, "step": 7203 }, { "epoch": 0.2207919578276327, "grad_norm": 1.7132276097212145, "learning_rate": 9.075438671950719e-06, "loss": 0.6471, "step": 7204 }, { "epoch": 0.2208226063503739, "grad_norm": 1.7184992747332986, "learning_rate": 9.07515111631746e-06, "loss": 0.7373, "step": 7205 }, { "epoch": 0.2208532548731151, "grad_norm": 0.969503889052093, "learning_rate": 9.074863520530691e-06, "loss": 0.5158, "step": 7206 }, { "epoch": 0.2208839033958563, "grad_norm": 0.9398062549341248, "learning_rate": 9.074575884593252e-06, "loss": 0.477, "step": 7207 }, { "epoch": 0.22091455191859752, "grad_norm": 1.5958201379735826, "learning_rate": 9.074288208507976e-06, "loss": 0.6675, "step": 7208 }, { "epoch": 0.22094520044133872, "grad_norm": 1.7966319584668742, "learning_rate": 9.074000492277695e-06, "loss": 0.7831, "step": 7209 }, { "epoch": 0.22097584896407993, "grad_norm": 0.8206600430566764, "learning_rate": 9.073712735905242e-06, "loss": 0.5053, "step": 7210 }, { "epoch": 0.22100649748682114, "grad_norm": 0.8930602852992586, "learning_rate": 9.073424939393458e-06, "loss": 0.4875, "step": 7211 }, { "epoch": 0.22103714600956234, "grad_norm": 1.723512288130864, "learning_rate": 9.073137102745174e-06, "loss": 0.7268, "step": 7212 }, { "epoch": 0.22106779453230355, "grad_norm": 1.8763337498577557, "learning_rate": 9.07284922596323e-06, "loss": 0.7159, "step": 7213 }, { "epoch": 0.22109844305504475, "grad_norm": 0.86149875332806, "learning_rate": 9.072561309050459e-06, "loss": 0.4959, "step": 7214 }, { "epoch": 0.22112909157778596, "grad_norm": 1.8653608484237292, "learning_rate": 9.072273352009699e-06, "loss": 0.6249, "step": 7215 }, { "epoch": 0.22115974010052716, "grad_norm": 1.6854613793544393, "learning_rate": 9.071985354843789e-06, "loss": 0.6698, "step": 7216 }, { "epoch": 0.22119038862326837, "grad_norm": 2.0369295662963127, "learning_rate": 9.071697317555564e-06, "loss": 0.7596, "step": 7217 }, { "epoch": 0.22122103714600957, "grad_norm": 1.8714142746098434, "learning_rate": 9.071409240147865e-06, "loss": 0.6767, "step": 7218 }, { "epoch": 0.22125168566875075, "grad_norm": 1.6978276700971047, "learning_rate": 9.071121122623528e-06, "loss": 0.6708, "step": 7219 }, { "epoch": 0.22128233419149196, "grad_norm": 1.825811090882698, "learning_rate": 9.070832964985393e-06, "loss": 0.8332, "step": 7220 }, { "epoch": 0.22131298271423316, "grad_norm": 2.073185129672899, "learning_rate": 9.0705447672363e-06, "loss": 0.6504, "step": 7221 }, { "epoch": 0.22134363123697437, "grad_norm": 1.6584040856201727, "learning_rate": 9.070256529379087e-06, "loss": 0.7122, "step": 7222 }, { "epoch": 0.22137427975971558, "grad_norm": 1.646583631819351, "learning_rate": 9.069968251416596e-06, "loss": 0.7556, "step": 7223 }, { "epoch": 0.22140492828245678, "grad_norm": 1.6415409284982136, "learning_rate": 9.069679933351666e-06, "loss": 0.7305, "step": 7224 }, { "epoch": 0.221435576805198, "grad_norm": 1.9851587703552713, "learning_rate": 9.069391575187137e-06, "loss": 0.7011, "step": 7225 }, { "epoch": 0.2214662253279392, "grad_norm": 1.8465499341180633, "learning_rate": 9.069103176925853e-06, "loss": 0.6967, "step": 7226 }, { "epoch": 0.2214968738506804, "grad_norm": 1.6861340738754969, "learning_rate": 9.068814738570655e-06, "loss": 0.8042, "step": 7227 }, { "epoch": 0.2215275223734216, "grad_norm": 0.920022790084979, "learning_rate": 9.068526260124383e-06, "loss": 0.5039, "step": 7228 }, { "epoch": 0.2215581708961628, "grad_norm": 1.6930808249761977, "learning_rate": 9.06823774158988e-06, "loss": 0.7368, "step": 7229 }, { "epoch": 0.22158881941890402, "grad_norm": 1.7099261459256894, "learning_rate": 9.067949182969993e-06, "loss": 0.8041, "step": 7230 }, { "epoch": 0.22161946794164522, "grad_norm": 1.6232175587205757, "learning_rate": 9.067660584267559e-06, "loss": 0.6318, "step": 7231 }, { "epoch": 0.22165011646438643, "grad_norm": 1.6960210191641032, "learning_rate": 9.067371945485426e-06, "loss": 0.8125, "step": 7232 }, { "epoch": 0.22168076498712763, "grad_norm": 0.7805752960541723, "learning_rate": 9.067083266626436e-06, "loss": 0.4608, "step": 7233 }, { "epoch": 0.22171141350986884, "grad_norm": 1.8640614787455065, "learning_rate": 9.066794547693433e-06, "loss": 0.7562, "step": 7234 }, { "epoch": 0.22174206203261002, "grad_norm": 1.695900219607567, "learning_rate": 9.066505788689264e-06, "loss": 0.6987, "step": 7235 }, { "epoch": 0.22177271055535122, "grad_norm": 1.5979812633744346, "learning_rate": 9.066216989616772e-06, "loss": 0.6635, "step": 7236 }, { "epoch": 0.22180335907809243, "grad_norm": 2.1146072001813874, "learning_rate": 9.065928150478804e-06, "loss": 0.7272, "step": 7237 }, { "epoch": 0.22183400760083363, "grad_norm": 1.6866397295740716, "learning_rate": 9.065639271278205e-06, "loss": 0.7063, "step": 7238 }, { "epoch": 0.22186465612357484, "grad_norm": 1.73297591248751, "learning_rate": 9.065350352017822e-06, "loss": 0.8331, "step": 7239 }, { "epoch": 0.22189530464631604, "grad_norm": 0.7848873585771222, "learning_rate": 9.065061392700504e-06, "loss": 0.4836, "step": 7240 }, { "epoch": 0.22192595316905725, "grad_norm": 1.7234138887695196, "learning_rate": 9.064772393329094e-06, "loss": 0.7538, "step": 7241 }, { "epoch": 0.22195660169179846, "grad_norm": 1.6892772306129742, "learning_rate": 9.064483353906443e-06, "loss": 0.7004, "step": 7242 }, { "epoch": 0.22198725021453966, "grad_norm": 1.9072274486215854, "learning_rate": 9.064194274435396e-06, "loss": 0.7109, "step": 7243 }, { "epoch": 0.22201789873728087, "grad_norm": 1.8406695342281725, "learning_rate": 9.063905154918804e-06, "loss": 0.7185, "step": 7244 }, { "epoch": 0.22204854726002207, "grad_norm": 0.8284376035524147, "learning_rate": 9.063615995359514e-06, "loss": 0.4964, "step": 7245 }, { "epoch": 0.22207919578276328, "grad_norm": 1.790291978774024, "learning_rate": 9.063326795760377e-06, "loss": 0.7357, "step": 7246 }, { "epoch": 0.22210984430550448, "grad_norm": 1.860853383299195, "learning_rate": 9.06303755612424e-06, "loss": 0.6909, "step": 7247 }, { "epoch": 0.2221404928282457, "grad_norm": 2.0761117729083844, "learning_rate": 9.062748276453956e-06, "loss": 0.7171, "step": 7248 }, { "epoch": 0.2221711413509869, "grad_norm": 1.6656554006915596, "learning_rate": 9.062458956752374e-06, "loss": 0.694, "step": 7249 }, { "epoch": 0.22220178987372807, "grad_norm": 1.8381050145345985, "learning_rate": 9.062169597022343e-06, "loss": 0.8065, "step": 7250 }, { "epoch": 0.22223243839646928, "grad_norm": 1.6774585281370433, "learning_rate": 9.061880197266715e-06, "loss": 0.6964, "step": 7251 }, { "epoch": 0.22226308691921048, "grad_norm": 1.941513060290912, "learning_rate": 9.061590757488343e-06, "loss": 0.7205, "step": 7252 }, { "epoch": 0.2222937354419517, "grad_norm": 1.6065259079363639, "learning_rate": 9.061301277690079e-06, "loss": 0.6258, "step": 7253 }, { "epoch": 0.2223243839646929, "grad_norm": 1.811983273380415, "learning_rate": 9.061011757874773e-06, "loss": 0.8115, "step": 7254 }, { "epoch": 0.2223550324874341, "grad_norm": 1.8618329061269305, "learning_rate": 9.06072219804528e-06, "loss": 0.6889, "step": 7255 }, { "epoch": 0.2223856810101753, "grad_norm": 1.6307342060928272, "learning_rate": 9.060432598204452e-06, "loss": 0.6928, "step": 7256 }, { "epoch": 0.2224163295329165, "grad_norm": 1.682667239113293, "learning_rate": 9.060142958355143e-06, "loss": 0.7426, "step": 7257 }, { "epoch": 0.22244697805565772, "grad_norm": 1.7203704057104983, "learning_rate": 9.059853278500206e-06, "loss": 0.6835, "step": 7258 }, { "epoch": 0.22247762657839892, "grad_norm": 1.8120804140796578, "learning_rate": 9.059563558642495e-06, "loss": 0.7622, "step": 7259 }, { "epoch": 0.22250827510114013, "grad_norm": 1.8514417277572586, "learning_rate": 9.059273798784867e-06, "loss": 0.7653, "step": 7260 }, { "epoch": 0.22253892362388134, "grad_norm": 1.8013851247477497, "learning_rate": 9.058983998930176e-06, "loss": 0.727, "step": 7261 }, { "epoch": 0.22256957214662254, "grad_norm": 1.5693566106691212, "learning_rate": 9.058694159081275e-06, "loss": 0.6653, "step": 7262 }, { "epoch": 0.22260022066936375, "grad_norm": 1.8292601227007605, "learning_rate": 9.058404279241024e-06, "loss": 0.8285, "step": 7263 }, { "epoch": 0.22263086919210495, "grad_norm": 1.8295029240946261, "learning_rate": 9.058114359412277e-06, "loss": 0.7143, "step": 7264 }, { "epoch": 0.22266151771484616, "grad_norm": 1.6594400053516851, "learning_rate": 9.057824399597892e-06, "loss": 0.7377, "step": 7265 }, { "epoch": 0.22269216623758734, "grad_norm": 1.7200358872977202, "learning_rate": 9.057534399800722e-06, "loss": 0.762, "step": 7266 }, { "epoch": 0.22272281476032854, "grad_norm": 1.9787723609032866, "learning_rate": 9.05724436002363e-06, "loss": 0.7061, "step": 7267 }, { "epoch": 0.22275346328306975, "grad_norm": 1.6440623020067011, "learning_rate": 9.05695428026947e-06, "loss": 0.6879, "step": 7268 }, { "epoch": 0.22278411180581095, "grad_norm": 1.8384179820800728, "learning_rate": 9.0566641605411e-06, "loss": 0.7911, "step": 7269 }, { "epoch": 0.22281476032855216, "grad_norm": 1.5847521152861799, "learning_rate": 9.05637400084138e-06, "loss": 0.7171, "step": 7270 }, { "epoch": 0.22284540885129336, "grad_norm": 1.6926432437048657, "learning_rate": 9.056083801173172e-06, "loss": 0.7123, "step": 7271 }, { "epoch": 0.22287605737403457, "grad_norm": 1.5667543768580676, "learning_rate": 9.055793561539332e-06, "loss": 0.6684, "step": 7272 }, { "epoch": 0.22290670589677578, "grad_norm": 1.712712810243882, "learning_rate": 9.05550328194272e-06, "loss": 0.7779, "step": 7273 }, { "epoch": 0.22293735441951698, "grad_norm": 1.8953906881466893, "learning_rate": 9.055212962386196e-06, "loss": 0.8389, "step": 7274 }, { "epoch": 0.2229680029422582, "grad_norm": 1.7091018896868844, "learning_rate": 9.054922602872621e-06, "loss": 0.8096, "step": 7275 }, { "epoch": 0.2229986514649994, "grad_norm": 1.843104408403717, "learning_rate": 9.054632203404856e-06, "loss": 0.7079, "step": 7276 }, { "epoch": 0.2230292999877406, "grad_norm": 1.9492363288829004, "learning_rate": 9.054341763985764e-06, "loss": 0.6595, "step": 7277 }, { "epoch": 0.2230599485104818, "grad_norm": 0.8839794415471485, "learning_rate": 9.054051284618205e-06, "loss": 0.4715, "step": 7278 }, { "epoch": 0.223090597033223, "grad_norm": 1.9182775873334696, "learning_rate": 9.053760765305039e-06, "loss": 0.8098, "step": 7279 }, { "epoch": 0.22312124555596421, "grad_norm": 1.744755994479875, "learning_rate": 9.053470206049133e-06, "loss": 0.7154, "step": 7280 }, { "epoch": 0.2231518940787054, "grad_norm": 1.674311190560118, "learning_rate": 9.053179606853346e-06, "loss": 0.61, "step": 7281 }, { "epoch": 0.2231825426014466, "grad_norm": 0.7752217457177947, "learning_rate": 9.052888967720546e-06, "loss": 0.4793, "step": 7282 }, { "epoch": 0.2232131911241878, "grad_norm": 1.692762397168351, "learning_rate": 9.052598288653592e-06, "loss": 0.7314, "step": 7283 }, { "epoch": 0.223243839646929, "grad_norm": 1.94534398039471, "learning_rate": 9.052307569655351e-06, "loss": 0.6985, "step": 7284 }, { "epoch": 0.22327448816967022, "grad_norm": 1.8133342534150314, "learning_rate": 9.052016810728686e-06, "loss": 0.7568, "step": 7285 }, { "epoch": 0.22330513669241142, "grad_norm": 1.6322749827820264, "learning_rate": 9.051726011876464e-06, "loss": 0.6493, "step": 7286 }, { "epoch": 0.22333578521515263, "grad_norm": 1.6971951179425881, "learning_rate": 9.051435173101549e-06, "loss": 0.5797, "step": 7287 }, { "epoch": 0.22336643373789383, "grad_norm": 1.750093496055058, "learning_rate": 9.051144294406803e-06, "loss": 0.687, "step": 7288 }, { "epoch": 0.22339708226063504, "grad_norm": 2.0514547466399335, "learning_rate": 9.0508533757951e-06, "loss": 0.801, "step": 7289 }, { "epoch": 0.22342773078337624, "grad_norm": 1.9362584166232653, "learning_rate": 9.050562417269301e-06, "loss": 0.7886, "step": 7290 }, { "epoch": 0.22345837930611745, "grad_norm": 0.9641870557316453, "learning_rate": 9.050271418832272e-06, "loss": 0.4719, "step": 7291 }, { "epoch": 0.22348902782885866, "grad_norm": 1.7908365366408745, "learning_rate": 9.049980380486887e-06, "loss": 0.6364, "step": 7292 }, { "epoch": 0.22351967635159986, "grad_norm": 2.309166068038384, "learning_rate": 9.049689302236005e-06, "loss": 0.838, "step": 7293 }, { "epoch": 0.22355032487434107, "grad_norm": 1.7595374235578616, "learning_rate": 9.049398184082499e-06, "loss": 0.7315, "step": 7294 }, { "epoch": 0.22358097339708227, "grad_norm": 0.7909986982947056, "learning_rate": 9.049107026029236e-06, "loss": 0.4785, "step": 7295 }, { "epoch": 0.22361162191982348, "grad_norm": 1.9542372159774366, "learning_rate": 9.048815828079087e-06, "loss": 0.7365, "step": 7296 }, { "epoch": 0.22364227044256466, "grad_norm": 1.769232272303367, "learning_rate": 9.04852459023492e-06, "loss": 0.7041, "step": 7297 }, { "epoch": 0.22367291896530586, "grad_norm": 1.9858459225642504, "learning_rate": 9.048233312499604e-06, "loss": 0.7583, "step": 7298 }, { "epoch": 0.22370356748804707, "grad_norm": 1.855506082236666, "learning_rate": 9.047941994876008e-06, "loss": 0.7912, "step": 7299 }, { "epoch": 0.22373421601078827, "grad_norm": 0.8135987569853617, "learning_rate": 9.047650637367005e-06, "loss": 0.4939, "step": 7300 }, { "epoch": 0.22376486453352948, "grad_norm": 1.854998989699253, "learning_rate": 9.047359239975464e-06, "loss": 0.6514, "step": 7301 }, { "epoch": 0.22379551305627068, "grad_norm": 1.9938552471042168, "learning_rate": 9.047067802704259e-06, "loss": 0.783, "step": 7302 }, { "epoch": 0.2238261615790119, "grad_norm": 1.8556695419391374, "learning_rate": 9.046776325556257e-06, "loss": 0.8172, "step": 7303 }, { "epoch": 0.2238568101017531, "grad_norm": 1.576687555616452, "learning_rate": 9.046484808534333e-06, "loss": 0.6548, "step": 7304 }, { "epoch": 0.2238874586244943, "grad_norm": 1.725530857246149, "learning_rate": 9.04619325164136e-06, "loss": 0.7868, "step": 7305 }, { "epoch": 0.2239181071472355, "grad_norm": 1.721247726874564, "learning_rate": 9.045901654880207e-06, "loss": 0.642, "step": 7306 }, { "epoch": 0.2239487556699767, "grad_norm": 1.7356774106853285, "learning_rate": 9.045610018253752e-06, "loss": 0.744, "step": 7307 }, { "epoch": 0.22397940419271792, "grad_norm": 0.9350285207837065, "learning_rate": 9.045318341764866e-06, "loss": 0.498, "step": 7308 }, { "epoch": 0.22401005271545912, "grad_norm": 0.8978290677889298, "learning_rate": 9.045026625416423e-06, "loss": 0.4991, "step": 7309 }, { "epoch": 0.22404070123820033, "grad_norm": 0.7697166999009274, "learning_rate": 9.044734869211298e-06, "loss": 0.4766, "step": 7310 }, { "epoch": 0.22407134976094154, "grad_norm": 1.8507168670535858, "learning_rate": 9.044443073152364e-06, "loss": 0.5882, "step": 7311 }, { "epoch": 0.2241019982836827, "grad_norm": 1.6977939144912715, "learning_rate": 9.0441512372425e-06, "loss": 0.772, "step": 7312 }, { "epoch": 0.22413264680642392, "grad_norm": 1.8434480849639878, "learning_rate": 9.043859361484578e-06, "loss": 0.7969, "step": 7313 }, { "epoch": 0.22416329532916512, "grad_norm": 1.6835559164067668, "learning_rate": 9.043567445881475e-06, "loss": 0.7406, "step": 7314 }, { "epoch": 0.22419394385190633, "grad_norm": 1.8271958142514924, "learning_rate": 9.043275490436068e-06, "loss": 0.6769, "step": 7315 }, { "epoch": 0.22422459237464754, "grad_norm": 1.8695538517436159, "learning_rate": 9.042983495151232e-06, "loss": 0.7777, "step": 7316 }, { "epoch": 0.22425524089738874, "grad_norm": 1.3777433253693159, "learning_rate": 9.042691460029846e-06, "loss": 0.4916, "step": 7317 }, { "epoch": 0.22428588942012995, "grad_norm": 1.07567908918064, "learning_rate": 9.042399385074785e-06, "loss": 0.4892, "step": 7318 }, { "epoch": 0.22431653794287115, "grad_norm": 1.747189176709027, "learning_rate": 9.042107270288932e-06, "loss": 0.7829, "step": 7319 }, { "epoch": 0.22434718646561236, "grad_norm": 1.8890013866472137, "learning_rate": 9.04181511567516e-06, "loss": 0.732, "step": 7320 }, { "epoch": 0.22437783498835356, "grad_norm": 1.1313582660041663, "learning_rate": 9.041522921236347e-06, "loss": 0.4958, "step": 7321 }, { "epoch": 0.22440848351109477, "grad_norm": 1.7950409630297854, "learning_rate": 9.041230686975377e-06, "loss": 0.7532, "step": 7322 }, { "epoch": 0.22443913203383598, "grad_norm": 2.092643128188025, "learning_rate": 9.040938412895127e-06, "loss": 0.7904, "step": 7323 }, { "epoch": 0.22446978055657718, "grad_norm": 1.9710779262755875, "learning_rate": 9.040646098998477e-06, "loss": 0.7464, "step": 7324 }, { "epoch": 0.2245004290793184, "grad_norm": 1.611054358256088, "learning_rate": 9.040353745288307e-06, "loss": 0.6535, "step": 7325 }, { "epoch": 0.2245310776020596, "grad_norm": 1.792171168963958, "learning_rate": 9.040061351767498e-06, "loss": 0.7703, "step": 7326 }, { "epoch": 0.2245617261248008, "grad_norm": 1.9831924321629775, "learning_rate": 9.039768918438931e-06, "loss": 0.8186, "step": 7327 }, { "epoch": 0.22459237464754198, "grad_norm": 1.7673798241862637, "learning_rate": 9.039476445305486e-06, "loss": 0.7066, "step": 7328 }, { "epoch": 0.22462302317028318, "grad_norm": 1.9960963603509412, "learning_rate": 9.039183932370046e-06, "loss": 0.9158, "step": 7329 }, { "epoch": 0.2246536716930244, "grad_norm": 1.8738636121410894, "learning_rate": 9.038891379635494e-06, "loss": 0.7513, "step": 7330 }, { "epoch": 0.2246843202157656, "grad_norm": 2.566215284877947, "learning_rate": 9.038598787104714e-06, "loss": 0.6784, "step": 7331 }, { "epoch": 0.2247149687385068, "grad_norm": 1.1123051777531872, "learning_rate": 9.038306154780585e-06, "loss": 0.5105, "step": 7332 }, { "epoch": 0.224745617261248, "grad_norm": 2.031834277190326, "learning_rate": 9.03801348266599e-06, "loss": 0.7472, "step": 7333 }, { "epoch": 0.2247762657839892, "grad_norm": 2.0722590369538665, "learning_rate": 9.037720770763818e-06, "loss": 0.822, "step": 7334 }, { "epoch": 0.22480691430673042, "grad_norm": 0.8583338555831637, "learning_rate": 9.037428019076948e-06, "loss": 0.4817, "step": 7335 }, { "epoch": 0.22483756282947162, "grad_norm": 1.7474456867133734, "learning_rate": 9.037135227608269e-06, "loss": 0.6867, "step": 7336 }, { "epoch": 0.22486821135221283, "grad_norm": 1.8147732856327528, "learning_rate": 9.036842396360661e-06, "loss": 0.7229, "step": 7337 }, { "epoch": 0.22489885987495403, "grad_norm": 1.6997150323684835, "learning_rate": 9.036549525337015e-06, "loss": 0.7886, "step": 7338 }, { "epoch": 0.22492950839769524, "grad_norm": 0.8310282445576195, "learning_rate": 9.036256614540211e-06, "loss": 0.4874, "step": 7339 }, { "epoch": 0.22496015692043644, "grad_norm": 1.7843050198678156, "learning_rate": 9.03596366397314e-06, "loss": 0.7817, "step": 7340 }, { "epoch": 0.22499080544317765, "grad_norm": 1.7150573420508477, "learning_rate": 9.035670673638684e-06, "loss": 0.6944, "step": 7341 }, { "epoch": 0.22502145396591886, "grad_norm": 1.832059321671727, "learning_rate": 9.035377643539735e-06, "loss": 0.6548, "step": 7342 }, { "epoch": 0.22505210248866003, "grad_norm": 0.8830273463733054, "learning_rate": 9.035084573679176e-06, "loss": 0.5083, "step": 7343 }, { "epoch": 0.22508275101140124, "grad_norm": 1.8531448266959554, "learning_rate": 9.034791464059896e-06, "loss": 0.69, "step": 7344 }, { "epoch": 0.22511339953414244, "grad_norm": 1.510718998315085, "learning_rate": 9.034498314684784e-06, "loss": 0.7419, "step": 7345 }, { "epoch": 0.22514404805688365, "grad_norm": 1.3946141886417553, "learning_rate": 9.034205125556728e-06, "loss": 0.5987, "step": 7346 }, { "epoch": 0.22517469657962486, "grad_norm": 1.9465184293911604, "learning_rate": 9.033911896678617e-06, "loss": 0.7444, "step": 7347 }, { "epoch": 0.22520534510236606, "grad_norm": 1.7593388433218584, "learning_rate": 9.033618628053338e-06, "loss": 0.714, "step": 7348 }, { "epoch": 0.22523599362510727, "grad_norm": 1.8702907577029988, "learning_rate": 9.033325319683786e-06, "loss": 0.6347, "step": 7349 }, { "epoch": 0.22526664214784847, "grad_norm": 1.6817512704654256, "learning_rate": 9.033031971572845e-06, "loss": 0.688, "step": 7350 }, { "epoch": 0.22529729067058968, "grad_norm": 1.902151016438782, "learning_rate": 9.032738583723407e-06, "loss": 0.7309, "step": 7351 }, { "epoch": 0.22532793919333088, "grad_norm": 1.6661332109399751, "learning_rate": 9.032445156138367e-06, "loss": 0.6891, "step": 7352 }, { "epoch": 0.2253585877160721, "grad_norm": 1.6184671368626498, "learning_rate": 9.032151688820612e-06, "loss": 0.7682, "step": 7353 }, { "epoch": 0.2253892362388133, "grad_norm": 1.9749647320498323, "learning_rate": 9.031858181773034e-06, "loss": 0.706, "step": 7354 }, { "epoch": 0.2254198847615545, "grad_norm": 1.9275089190934822, "learning_rate": 9.031564634998527e-06, "loss": 0.7226, "step": 7355 }, { "epoch": 0.2254505332842957, "grad_norm": 1.9793737959946218, "learning_rate": 9.031271048499982e-06, "loss": 0.5854, "step": 7356 }, { "epoch": 0.2254811818070369, "grad_norm": 0.9576483630309244, "learning_rate": 9.030977422280291e-06, "loss": 0.4736, "step": 7357 }, { "epoch": 0.22551183032977812, "grad_norm": 1.668753277176103, "learning_rate": 9.030683756342348e-06, "loss": 0.6467, "step": 7358 }, { "epoch": 0.2255424788525193, "grad_norm": 1.5900423856917376, "learning_rate": 9.030390050689047e-06, "loss": 0.6059, "step": 7359 }, { "epoch": 0.2255731273752605, "grad_norm": 1.8217902539437194, "learning_rate": 9.030096305323281e-06, "loss": 0.7737, "step": 7360 }, { "epoch": 0.2256037758980017, "grad_norm": 1.828066462481493, "learning_rate": 9.029802520247946e-06, "loss": 0.7215, "step": 7361 }, { "epoch": 0.2256344244207429, "grad_norm": 2.0514331879784007, "learning_rate": 9.029508695465935e-06, "loss": 0.7073, "step": 7362 }, { "epoch": 0.22566507294348412, "grad_norm": 1.5675773699407725, "learning_rate": 9.029214830980145e-06, "loss": 0.6185, "step": 7363 }, { "epoch": 0.22569572146622532, "grad_norm": 1.997871703883092, "learning_rate": 9.028920926793468e-06, "loss": 0.7496, "step": 7364 }, { "epoch": 0.22572636998896653, "grad_norm": 2.0435382235068094, "learning_rate": 9.028626982908805e-06, "loss": 0.7148, "step": 7365 }, { "epoch": 0.22575701851170774, "grad_norm": 0.8501281561335114, "learning_rate": 9.028332999329048e-06, "loss": 0.5057, "step": 7366 }, { "epoch": 0.22578766703444894, "grad_norm": 1.817556536236826, "learning_rate": 9.028038976057097e-06, "loss": 0.697, "step": 7367 }, { "epoch": 0.22581831555719015, "grad_norm": 2.0380122607537836, "learning_rate": 9.027744913095844e-06, "loss": 0.8457, "step": 7368 }, { "epoch": 0.22584896407993135, "grad_norm": 1.8283837409249746, "learning_rate": 9.027450810448193e-06, "loss": 0.7151, "step": 7369 }, { "epoch": 0.22587961260267256, "grad_norm": 0.7917315226602317, "learning_rate": 9.027156668117036e-06, "loss": 0.486, "step": 7370 }, { "epoch": 0.22591026112541376, "grad_norm": 1.6645484543561386, "learning_rate": 9.026862486105277e-06, "loss": 0.625, "step": 7371 }, { "epoch": 0.22594090964815497, "grad_norm": 1.6687522843917661, "learning_rate": 9.026568264415809e-06, "loss": 0.7009, "step": 7372 }, { "epoch": 0.22597155817089618, "grad_norm": 1.6409136856848014, "learning_rate": 9.026274003051535e-06, "loss": 0.5715, "step": 7373 }, { "epoch": 0.22600220669363735, "grad_norm": 0.777723288139691, "learning_rate": 9.025979702015352e-06, "loss": 0.4977, "step": 7374 }, { "epoch": 0.22603285521637856, "grad_norm": 1.9571108868832459, "learning_rate": 9.025685361310162e-06, "loss": 0.6856, "step": 7375 }, { "epoch": 0.22606350373911976, "grad_norm": 0.7927736366851938, "learning_rate": 9.025390980938864e-06, "loss": 0.4967, "step": 7376 }, { "epoch": 0.22609415226186097, "grad_norm": 1.794310704206597, "learning_rate": 9.025096560904359e-06, "loss": 0.6879, "step": 7377 }, { "epoch": 0.22612480078460218, "grad_norm": 1.7813731692340908, "learning_rate": 9.024802101209547e-06, "loss": 0.718, "step": 7378 }, { "epoch": 0.22615544930734338, "grad_norm": 1.5826376723103295, "learning_rate": 9.02450760185733e-06, "loss": 0.675, "step": 7379 }, { "epoch": 0.2261860978300846, "grad_norm": 1.8878406309992657, "learning_rate": 9.02421306285061e-06, "loss": 0.8221, "step": 7380 }, { "epoch": 0.2262167463528258, "grad_norm": 1.7495265822532284, "learning_rate": 9.023918484192289e-06, "loss": 0.7302, "step": 7381 }, { "epoch": 0.226247394875567, "grad_norm": 0.7624243008558022, "learning_rate": 9.023623865885272e-06, "loss": 0.4642, "step": 7382 }, { "epoch": 0.2262780433983082, "grad_norm": 1.871343029624403, "learning_rate": 9.023329207932456e-06, "loss": 0.8199, "step": 7383 }, { "epoch": 0.2263086919210494, "grad_norm": 1.6834516697789979, "learning_rate": 9.02303451033675e-06, "loss": 0.7489, "step": 7384 }, { "epoch": 0.22633934044379062, "grad_norm": 1.6333539874420007, "learning_rate": 9.022739773101055e-06, "loss": 0.685, "step": 7385 }, { "epoch": 0.22636998896653182, "grad_norm": 1.7539694441849316, "learning_rate": 9.022444996228276e-06, "loss": 0.7607, "step": 7386 }, { "epoch": 0.22640063748927303, "grad_norm": 0.7961571609692681, "learning_rate": 9.022150179721316e-06, "loss": 0.4352, "step": 7387 }, { "epoch": 0.22643128601201423, "grad_norm": 0.802640093236222, "learning_rate": 9.021855323583082e-06, "loss": 0.4839, "step": 7388 }, { "epoch": 0.22646193453475544, "grad_norm": 1.7105033586217593, "learning_rate": 9.02156042781648e-06, "loss": 0.7056, "step": 7389 }, { "epoch": 0.22649258305749662, "grad_norm": 1.9136666270502205, "learning_rate": 9.021265492424412e-06, "loss": 0.7275, "step": 7390 }, { "epoch": 0.22652323158023782, "grad_norm": 1.7041922550928055, "learning_rate": 9.020970517409786e-06, "loss": 0.7735, "step": 7391 }, { "epoch": 0.22655388010297903, "grad_norm": 0.8485304101015206, "learning_rate": 9.020675502775511e-06, "loss": 0.5029, "step": 7392 }, { "epoch": 0.22658452862572023, "grad_norm": 0.8088453756981732, "learning_rate": 9.020380448524489e-06, "loss": 0.4776, "step": 7393 }, { "epoch": 0.22661517714846144, "grad_norm": 0.8024676258309783, "learning_rate": 9.020085354659631e-06, "loss": 0.4712, "step": 7394 }, { "epoch": 0.22664582567120264, "grad_norm": 1.4995694471123937, "learning_rate": 9.019790221183844e-06, "loss": 0.5836, "step": 7395 }, { "epoch": 0.22667647419394385, "grad_norm": 1.6948246389061454, "learning_rate": 9.019495048100035e-06, "loss": 0.7179, "step": 7396 }, { "epoch": 0.22670712271668506, "grad_norm": 1.9245791337139233, "learning_rate": 9.019199835411112e-06, "loss": 0.7472, "step": 7397 }, { "epoch": 0.22673777123942626, "grad_norm": 3.5822502543234176, "learning_rate": 9.018904583119987e-06, "loss": 0.8344, "step": 7398 }, { "epoch": 0.22676841976216747, "grad_norm": 1.7580857804421093, "learning_rate": 9.018609291229565e-06, "loss": 0.8005, "step": 7399 }, { "epoch": 0.22679906828490867, "grad_norm": 1.6496621593178746, "learning_rate": 9.018313959742756e-06, "loss": 0.7068, "step": 7400 }, { "epoch": 0.22682971680764988, "grad_norm": 2.075945366568093, "learning_rate": 9.018018588662474e-06, "loss": 0.7193, "step": 7401 }, { "epoch": 0.22686036533039108, "grad_norm": 1.623383656882588, "learning_rate": 9.017723177991627e-06, "loss": 0.66, "step": 7402 }, { "epoch": 0.2268910138531323, "grad_norm": 1.747881194577664, "learning_rate": 9.017427727733124e-06, "loss": 0.8069, "step": 7403 }, { "epoch": 0.2269216623758735, "grad_norm": 1.8240325967915605, "learning_rate": 9.017132237889877e-06, "loss": 0.7584, "step": 7404 }, { "epoch": 0.22695231089861467, "grad_norm": 1.9222153165228208, "learning_rate": 9.0168367084648e-06, "loss": 0.5589, "step": 7405 }, { "epoch": 0.22698295942135588, "grad_norm": 1.8479223361928179, "learning_rate": 9.016541139460803e-06, "loss": 0.7356, "step": 7406 }, { "epoch": 0.22701360794409708, "grad_norm": 1.8507739521306228, "learning_rate": 9.016245530880798e-06, "loss": 0.7899, "step": 7407 }, { "epoch": 0.2270442564668383, "grad_norm": 1.8247152266754691, "learning_rate": 9.015949882727697e-06, "loss": 0.7397, "step": 7408 }, { "epoch": 0.2270749049895795, "grad_norm": 1.6347478447719714, "learning_rate": 9.015654195004416e-06, "loss": 0.6529, "step": 7409 }, { "epoch": 0.2271055535123207, "grad_norm": 1.8143628064711597, "learning_rate": 9.015358467713865e-06, "loss": 0.7606, "step": 7410 }, { "epoch": 0.2271362020350619, "grad_norm": 1.9008817899677717, "learning_rate": 9.015062700858963e-06, "loss": 0.7889, "step": 7411 }, { "epoch": 0.2271668505578031, "grad_norm": 1.9551459968481706, "learning_rate": 9.014766894442619e-06, "loss": 0.7561, "step": 7412 }, { "epoch": 0.22719749908054432, "grad_norm": 8.280010845250105, "learning_rate": 9.01447104846775e-06, "loss": 0.7261, "step": 7413 }, { "epoch": 0.22722814760328552, "grad_norm": 1.666390036090097, "learning_rate": 9.01417516293727e-06, "loss": 0.6849, "step": 7414 }, { "epoch": 0.22725879612602673, "grad_norm": 1.7069703800267022, "learning_rate": 9.013879237854095e-06, "loss": 0.7156, "step": 7415 }, { "epoch": 0.22728944464876794, "grad_norm": 1.0276684048388376, "learning_rate": 9.013583273221141e-06, "loss": 0.4881, "step": 7416 }, { "epoch": 0.22732009317150914, "grad_norm": 1.9532530118977538, "learning_rate": 9.013287269041322e-06, "loss": 0.7511, "step": 7417 }, { "epoch": 0.22735074169425035, "grad_norm": 0.8767870989202853, "learning_rate": 9.01299122531756e-06, "loss": 0.4998, "step": 7418 }, { "epoch": 0.22738139021699155, "grad_norm": 1.5413280291963574, "learning_rate": 9.012695142052767e-06, "loss": 0.714, "step": 7419 }, { "epoch": 0.22741203873973276, "grad_norm": 1.9515530230582279, "learning_rate": 9.012399019249863e-06, "loss": 0.8569, "step": 7420 }, { "epoch": 0.22744268726247394, "grad_norm": 1.9637964653811855, "learning_rate": 9.012102856911764e-06, "loss": 0.7652, "step": 7421 }, { "epoch": 0.22747333578521514, "grad_norm": 1.6251994772882068, "learning_rate": 9.011806655041389e-06, "loss": 0.6493, "step": 7422 }, { "epoch": 0.22750398430795635, "grad_norm": 1.7584835449139846, "learning_rate": 9.011510413641658e-06, "loss": 0.7391, "step": 7423 }, { "epoch": 0.22753463283069755, "grad_norm": 1.193536966249129, "learning_rate": 9.011214132715486e-06, "loss": 0.5114, "step": 7424 }, { "epoch": 0.22756528135343876, "grad_norm": 1.8433746696958389, "learning_rate": 9.010917812265796e-06, "loss": 0.777, "step": 7425 }, { "epoch": 0.22759592987617996, "grad_norm": 0.8952373482459111, "learning_rate": 9.010621452295508e-06, "loss": 0.4856, "step": 7426 }, { "epoch": 0.22762657839892117, "grad_norm": 1.9047347509121975, "learning_rate": 9.010325052807538e-06, "loss": 0.7104, "step": 7427 }, { "epoch": 0.22765722692166238, "grad_norm": 1.8036684180237428, "learning_rate": 9.01002861380481e-06, "loss": 0.6428, "step": 7428 }, { "epoch": 0.22768787544440358, "grad_norm": 1.8736651102821884, "learning_rate": 9.009732135290246e-06, "loss": 0.8524, "step": 7429 }, { "epoch": 0.2277185239671448, "grad_norm": 1.7388362790713772, "learning_rate": 9.009435617266764e-06, "loss": 0.7415, "step": 7430 }, { "epoch": 0.227749172489886, "grad_norm": 0.8877390085200039, "learning_rate": 9.009139059737286e-06, "loss": 0.4759, "step": 7431 }, { "epoch": 0.2277798210126272, "grad_norm": 14.954790981825134, "learning_rate": 9.008842462704737e-06, "loss": 0.7493, "step": 7432 }, { "epoch": 0.2278104695353684, "grad_norm": 2.0943493907391035, "learning_rate": 9.008545826172037e-06, "loss": 0.7329, "step": 7433 }, { "epoch": 0.2278411180581096, "grad_norm": 1.4954035016810665, "learning_rate": 9.00824915014211e-06, "loss": 0.7357, "step": 7434 }, { "epoch": 0.22787176658085082, "grad_norm": 1.7214370374846975, "learning_rate": 9.007952434617877e-06, "loss": 0.7177, "step": 7435 }, { "epoch": 0.227902415103592, "grad_norm": 1.7574668705275223, "learning_rate": 9.007655679602262e-06, "loss": 0.6682, "step": 7436 }, { "epoch": 0.2279330636263332, "grad_norm": 1.7612996818782602, "learning_rate": 9.007358885098192e-06, "loss": 0.7437, "step": 7437 }, { "epoch": 0.2279637121490744, "grad_norm": 2.4597486346858846, "learning_rate": 9.00706205110859e-06, "loss": 0.8342, "step": 7438 }, { "epoch": 0.2279943606718156, "grad_norm": 1.8370819024449374, "learning_rate": 9.00676517763638e-06, "loss": 0.7323, "step": 7439 }, { "epoch": 0.22802500919455682, "grad_norm": 2.178312696415652, "learning_rate": 9.006468264684487e-06, "loss": 0.7408, "step": 7440 }, { "epoch": 0.22805565771729802, "grad_norm": 5.212624397393382, "learning_rate": 9.006171312255837e-06, "loss": 0.472, "step": 7441 }, { "epoch": 0.22808630624003923, "grad_norm": 2.0145862253622995, "learning_rate": 9.005874320353356e-06, "loss": 0.7408, "step": 7442 }, { "epoch": 0.22811695476278043, "grad_norm": 1.8770751094409104, "learning_rate": 9.005577288979972e-06, "loss": 0.7109, "step": 7443 }, { "epoch": 0.22814760328552164, "grad_norm": 2.0961657821552815, "learning_rate": 9.00528021813861e-06, "loss": 0.7234, "step": 7444 }, { "epoch": 0.22817825180826284, "grad_norm": 1.7507077764924595, "learning_rate": 9.004983107832195e-06, "loss": 0.7509, "step": 7445 }, { "epoch": 0.22820890033100405, "grad_norm": 0.8613965285498534, "learning_rate": 9.004685958063657e-06, "loss": 0.4956, "step": 7446 }, { "epoch": 0.22823954885374526, "grad_norm": 1.9155073400943763, "learning_rate": 9.004388768835926e-06, "loss": 0.735, "step": 7447 }, { "epoch": 0.22827019737648646, "grad_norm": 1.6998283076178544, "learning_rate": 9.004091540151926e-06, "loss": 0.8113, "step": 7448 }, { "epoch": 0.22830084589922767, "grad_norm": 1.8834941399780225, "learning_rate": 9.003794272014587e-06, "loss": 0.7887, "step": 7449 }, { "epoch": 0.22833149442196887, "grad_norm": 1.507108782329804, "learning_rate": 9.003496964426842e-06, "loss": 0.748, "step": 7450 }, { "epoch": 0.22836214294471008, "grad_norm": 0.9364477450422084, "learning_rate": 9.003199617391613e-06, "loss": 0.4992, "step": 7451 }, { "epoch": 0.22839279146745126, "grad_norm": 1.7000564450478652, "learning_rate": 9.002902230911836e-06, "loss": 0.7032, "step": 7452 }, { "epoch": 0.22842343999019246, "grad_norm": 1.7884451837221182, "learning_rate": 9.002604804990438e-06, "loss": 0.6919, "step": 7453 }, { "epoch": 0.22845408851293367, "grad_norm": 1.6042107853892984, "learning_rate": 9.002307339630352e-06, "loss": 0.6095, "step": 7454 }, { "epoch": 0.22848473703567487, "grad_norm": 1.8887033623732594, "learning_rate": 9.002009834834506e-06, "loss": 0.7595, "step": 7455 }, { "epoch": 0.22851538555841608, "grad_norm": 1.655610188022896, "learning_rate": 9.001712290605835e-06, "loss": 0.7315, "step": 7456 }, { "epoch": 0.22854603408115728, "grad_norm": 1.8086791405464122, "learning_rate": 9.001414706947269e-06, "loss": 0.7216, "step": 7457 }, { "epoch": 0.2285766826038985, "grad_norm": 1.5450312558067283, "learning_rate": 9.00111708386174e-06, "loss": 0.6683, "step": 7458 }, { "epoch": 0.2286073311266397, "grad_norm": 1.7208542260947313, "learning_rate": 9.000819421352178e-06, "loss": 0.7694, "step": 7459 }, { "epoch": 0.2286379796493809, "grad_norm": 1.8644302399254309, "learning_rate": 9.000521719421522e-06, "loss": 0.8704, "step": 7460 }, { "epoch": 0.2286686281721221, "grad_norm": 1.5038753697887308, "learning_rate": 9.0002239780727e-06, "loss": 0.7747, "step": 7461 }, { "epoch": 0.2286992766948633, "grad_norm": 1.0666509893018903, "learning_rate": 8.999926197308649e-06, "loss": 0.4868, "step": 7462 }, { "epoch": 0.22872992521760452, "grad_norm": 1.0247455413604094, "learning_rate": 8.999628377132298e-06, "loss": 0.4924, "step": 7463 }, { "epoch": 0.22876057374034572, "grad_norm": 1.843593633534192, "learning_rate": 8.99933051754659e-06, "loss": 0.6761, "step": 7464 }, { "epoch": 0.22879122226308693, "grad_norm": 1.7182781691720073, "learning_rate": 8.999032618554453e-06, "loss": 0.7166, "step": 7465 }, { "epoch": 0.22882187078582814, "grad_norm": 1.787841100919115, "learning_rate": 8.998734680158824e-06, "loss": 0.7153, "step": 7466 }, { "epoch": 0.2288525193085693, "grad_norm": 1.7082010428636345, "learning_rate": 8.99843670236264e-06, "loss": 0.772, "step": 7467 }, { "epoch": 0.22888316783131052, "grad_norm": 0.8244874567176131, "learning_rate": 8.998138685168836e-06, "loss": 0.4714, "step": 7468 }, { "epoch": 0.22891381635405172, "grad_norm": 1.8263201508846025, "learning_rate": 8.997840628580348e-06, "loss": 0.7136, "step": 7469 }, { "epoch": 0.22894446487679293, "grad_norm": 1.7596123318926005, "learning_rate": 8.997542532600114e-06, "loss": 0.7617, "step": 7470 }, { "epoch": 0.22897511339953414, "grad_norm": 1.7551753521978672, "learning_rate": 8.99724439723107e-06, "loss": 0.692, "step": 7471 }, { "epoch": 0.22900576192227534, "grad_norm": 1.694845811996046, "learning_rate": 8.996946222476156e-06, "loss": 0.6949, "step": 7472 }, { "epoch": 0.22903641044501655, "grad_norm": 0.8467714911133734, "learning_rate": 8.996648008338307e-06, "loss": 0.4856, "step": 7473 }, { "epoch": 0.22906705896775775, "grad_norm": 1.7778127535596824, "learning_rate": 8.996349754820461e-06, "loss": 0.7064, "step": 7474 }, { "epoch": 0.22909770749049896, "grad_norm": 2.0033991635493917, "learning_rate": 8.996051461925562e-06, "loss": 0.7542, "step": 7475 }, { "epoch": 0.22912835601324016, "grad_norm": 1.9709314328654575, "learning_rate": 8.995753129656542e-06, "loss": 0.6982, "step": 7476 }, { "epoch": 0.22915900453598137, "grad_norm": 0.8252666583493803, "learning_rate": 8.995454758016345e-06, "loss": 0.4723, "step": 7477 }, { "epoch": 0.22918965305872258, "grad_norm": 1.9813354997784174, "learning_rate": 8.99515634700791e-06, "loss": 0.7071, "step": 7478 }, { "epoch": 0.22922030158146378, "grad_norm": 1.6849049077805172, "learning_rate": 8.994857896634178e-06, "loss": 0.6074, "step": 7479 }, { "epoch": 0.229250950104205, "grad_norm": 2.196193512333205, "learning_rate": 8.994559406898088e-06, "loss": 0.7266, "step": 7480 }, { "epoch": 0.2292815986269462, "grad_norm": 1.8813318480097532, "learning_rate": 8.994260877802585e-06, "loss": 0.7322, "step": 7481 }, { "epoch": 0.2293122471496874, "grad_norm": 1.7712517010034177, "learning_rate": 8.993962309350605e-06, "loss": 0.7146, "step": 7482 }, { "epoch": 0.22934289567242858, "grad_norm": 1.7289664484823624, "learning_rate": 8.993663701545091e-06, "loss": 0.6691, "step": 7483 }, { "epoch": 0.22937354419516978, "grad_norm": 1.7906940687381714, "learning_rate": 8.993365054388989e-06, "loss": 0.7471, "step": 7484 }, { "epoch": 0.229404192717911, "grad_norm": 1.9006139544616678, "learning_rate": 8.99306636788524e-06, "loss": 0.7302, "step": 7485 }, { "epoch": 0.2294348412406522, "grad_norm": 1.6955503786258137, "learning_rate": 8.992767642036786e-06, "loss": 0.7446, "step": 7486 }, { "epoch": 0.2294654897633934, "grad_norm": 2.0155398193817193, "learning_rate": 8.992468876846569e-06, "loss": 0.7509, "step": 7487 }, { "epoch": 0.2294961382861346, "grad_norm": 1.8141700135574812, "learning_rate": 8.992170072317536e-06, "loss": 0.7919, "step": 7488 }, { "epoch": 0.2295267868088758, "grad_norm": 0.8954147384386071, "learning_rate": 8.99187122845263e-06, "loss": 0.4872, "step": 7489 }, { "epoch": 0.22955743533161702, "grad_norm": 3.0591132656343683, "learning_rate": 8.991572345254796e-06, "loss": 0.7492, "step": 7490 }, { "epoch": 0.22958808385435822, "grad_norm": 1.75964852023931, "learning_rate": 8.991273422726975e-06, "loss": 0.7529, "step": 7491 }, { "epoch": 0.22961873237709943, "grad_norm": 1.6415432372541978, "learning_rate": 8.990974460872119e-06, "loss": 0.6724, "step": 7492 }, { "epoch": 0.22964938089984063, "grad_norm": 1.8217824858949387, "learning_rate": 8.99067545969317e-06, "loss": 0.6932, "step": 7493 }, { "epoch": 0.22968002942258184, "grad_norm": 1.921852064878497, "learning_rate": 8.990376419193074e-06, "loss": 0.7288, "step": 7494 }, { "epoch": 0.22971067794532304, "grad_norm": 2.841297330763125, "learning_rate": 8.990077339374778e-06, "loss": 0.7059, "step": 7495 }, { "epoch": 0.22974132646806425, "grad_norm": 1.6546936695217285, "learning_rate": 8.98977822024123e-06, "loss": 0.7801, "step": 7496 }, { "epoch": 0.22977197499080546, "grad_norm": 1.7606417330754707, "learning_rate": 8.989479061795377e-06, "loss": 0.7375, "step": 7497 }, { "epoch": 0.22980262351354663, "grad_norm": 1.7194699053042206, "learning_rate": 8.989179864040166e-06, "loss": 0.7622, "step": 7498 }, { "epoch": 0.22983327203628784, "grad_norm": 1.6245526873394294, "learning_rate": 8.988880626978543e-06, "loss": 0.6872, "step": 7499 }, { "epoch": 0.22986392055902904, "grad_norm": 1.6924912966326289, "learning_rate": 8.98858135061346e-06, "loss": 0.692, "step": 7500 }, { "epoch": 0.22989456908177025, "grad_norm": 1.9957486329048024, "learning_rate": 8.988282034947864e-06, "loss": 0.7681, "step": 7501 }, { "epoch": 0.22992521760451146, "grad_norm": 0.9252861823909375, "learning_rate": 8.987982679984704e-06, "loss": 0.4994, "step": 7502 }, { "epoch": 0.22995586612725266, "grad_norm": 0.8678482992723914, "learning_rate": 8.987683285726931e-06, "loss": 0.4711, "step": 7503 }, { "epoch": 0.22998651464999387, "grad_norm": 1.7501575892422234, "learning_rate": 8.987383852177497e-06, "loss": 0.716, "step": 7504 }, { "epoch": 0.23001716317273507, "grad_norm": 1.8016928085381196, "learning_rate": 8.987084379339345e-06, "loss": 0.6688, "step": 7505 }, { "epoch": 0.23004781169547628, "grad_norm": 1.6985989350950663, "learning_rate": 8.986784867215433e-06, "loss": 0.7438, "step": 7506 }, { "epoch": 0.23007846021821748, "grad_norm": 2.127681637577671, "learning_rate": 8.98648531580871e-06, "loss": 0.7182, "step": 7507 }, { "epoch": 0.2301091087409587, "grad_norm": 2.18185517017369, "learning_rate": 8.986185725122125e-06, "loss": 0.7612, "step": 7508 }, { "epoch": 0.2301397572636999, "grad_norm": 1.5793089803452287, "learning_rate": 8.985886095158634e-06, "loss": 0.7266, "step": 7509 }, { "epoch": 0.2301704057864411, "grad_norm": 1.8095469874716552, "learning_rate": 8.985586425921187e-06, "loss": 0.7354, "step": 7510 }, { "epoch": 0.2302010543091823, "grad_norm": 1.6706981755411754, "learning_rate": 8.985286717412737e-06, "loss": 0.6761, "step": 7511 }, { "epoch": 0.2302317028319235, "grad_norm": 1.8890402789646192, "learning_rate": 8.984986969636238e-06, "loss": 0.6421, "step": 7512 }, { "epoch": 0.23026235135466472, "grad_norm": 2.0693819838685994, "learning_rate": 8.984687182594642e-06, "loss": 0.7658, "step": 7513 }, { "epoch": 0.2302929998774059, "grad_norm": 1.625527153752677, "learning_rate": 8.984387356290905e-06, "loss": 0.7613, "step": 7514 }, { "epoch": 0.2303236484001471, "grad_norm": 2.6043289378331544, "learning_rate": 8.984087490727978e-06, "loss": 0.6605, "step": 7515 }, { "epoch": 0.2303542969228883, "grad_norm": 6.298558726223969, "learning_rate": 8.983787585908819e-06, "loss": 0.6626, "step": 7516 }, { "epoch": 0.2303849454456295, "grad_norm": 1.8515298411973133, "learning_rate": 8.98348764183638e-06, "loss": 0.8638, "step": 7517 }, { "epoch": 0.23041559396837072, "grad_norm": 1.774102655813211, "learning_rate": 8.983187658513618e-06, "loss": 0.7633, "step": 7518 }, { "epoch": 0.23044624249111192, "grad_norm": 1.669396571255921, "learning_rate": 8.982887635943492e-06, "loss": 0.7042, "step": 7519 }, { "epoch": 0.23047689101385313, "grad_norm": 2.069236874875307, "learning_rate": 8.982587574128953e-06, "loss": 0.6617, "step": 7520 }, { "epoch": 0.23050753953659434, "grad_norm": 2.410635943727434, "learning_rate": 8.98228747307296e-06, "loss": 0.6365, "step": 7521 }, { "epoch": 0.23053818805933554, "grad_norm": 1.3873627979353875, "learning_rate": 8.981987332778468e-06, "loss": 0.4951, "step": 7522 }, { "epoch": 0.23056883658207675, "grad_norm": 1.9003040406383271, "learning_rate": 8.981687153248438e-06, "loss": 0.7235, "step": 7523 }, { "epoch": 0.23059948510481795, "grad_norm": 1.6373793818074986, "learning_rate": 8.981386934485825e-06, "loss": 0.7211, "step": 7524 }, { "epoch": 0.23063013362755916, "grad_norm": 1.923457128328767, "learning_rate": 8.98108667649359e-06, "loss": 0.782, "step": 7525 }, { "epoch": 0.23066078215030036, "grad_norm": 1.7933320142471163, "learning_rate": 8.980786379274685e-06, "loss": 0.7545, "step": 7526 }, { "epoch": 0.23069143067304157, "grad_norm": 1.6792982021292944, "learning_rate": 8.980486042832076e-06, "loss": 0.6556, "step": 7527 }, { "epoch": 0.23072207919578278, "grad_norm": 1.7858221799060996, "learning_rate": 8.98018566716872e-06, "loss": 0.6768, "step": 7528 }, { "epoch": 0.23075272771852395, "grad_norm": 1.6938421614527146, "learning_rate": 8.979885252287575e-06, "loss": 0.6814, "step": 7529 }, { "epoch": 0.23078337624126516, "grad_norm": 1.829965827366096, "learning_rate": 8.9795847981916e-06, "loss": 0.7425, "step": 7530 }, { "epoch": 0.23081402476400636, "grad_norm": 2.0432045282159956, "learning_rate": 8.979284304883762e-06, "loss": 0.7242, "step": 7531 }, { "epoch": 0.23084467328674757, "grad_norm": 1.1797534635867108, "learning_rate": 8.978983772367015e-06, "loss": 0.4544, "step": 7532 }, { "epoch": 0.23087532180948878, "grad_norm": 1.8884169626491174, "learning_rate": 8.978683200644325e-06, "loss": 0.6565, "step": 7533 }, { "epoch": 0.23090597033222998, "grad_norm": 2.110229918195631, "learning_rate": 8.97838258971865e-06, "loss": 0.7244, "step": 7534 }, { "epoch": 0.2309366188549712, "grad_norm": 1.939517964162014, "learning_rate": 8.978081939592953e-06, "loss": 0.7806, "step": 7535 }, { "epoch": 0.2309672673777124, "grad_norm": 1.7969708224235488, "learning_rate": 8.9777812502702e-06, "loss": 0.7306, "step": 7536 }, { "epoch": 0.2309979159004536, "grad_norm": 1.5289061207756527, "learning_rate": 8.977480521753346e-06, "loss": 0.7225, "step": 7537 }, { "epoch": 0.2310285644231948, "grad_norm": 1.6995852082185836, "learning_rate": 8.977179754045362e-06, "loss": 0.756, "step": 7538 }, { "epoch": 0.231059212945936, "grad_norm": 1.939708011984613, "learning_rate": 8.976878947149206e-06, "loss": 0.7069, "step": 7539 }, { "epoch": 0.23108986146867722, "grad_norm": 1.6838916396280597, "learning_rate": 8.976578101067845e-06, "loss": 0.6767, "step": 7540 }, { "epoch": 0.23112050999141842, "grad_norm": 1.9164396589816814, "learning_rate": 8.976277215804243e-06, "loss": 0.6278, "step": 7541 }, { "epoch": 0.23115115851415963, "grad_norm": 1.6401061252744498, "learning_rate": 8.975976291361364e-06, "loss": 0.7089, "step": 7542 }, { "epoch": 0.23118180703690083, "grad_norm": 1.8646374504445185, "learning_rate": 8.975675327742173e-06, "loss": 0.6912, "step": 7543 }, { "epoch": 0.23121245555964204, "grad_norm": 1.7705732353886598, "learning_rate": 8.975374324949638e-06, "loss": 0.6047, "step": 7544 }, { "epoch": 0.23124310408238322, "grad_norm": 1.7862955786341959, "learning_rate": 8.975073282986719e-06, "loss": 0.7255, "step": 7545 }, { "epoch": 0.23127375260512442, "grad_norm": 1.9322278533110826, "learning_rate": 8.974772201856387e-06, "loss": 0.7503, "step": 7546 }, { "epoch": 0.23130440112786563, "grad_norm": 2.1054682081265206, "learning_rate": 8.974471081561608e-06, "loss": 0.7686, "step": 7547 }, { "epoch": 0.23133504965060683, "grad_norm": 1.7701146853511955, "learning_rate": 8.97416992210535e-06, "loss": 0.7805, "step": 7548 }, { "epoch": 0.23136569817334804, "grad_norm": 1.0096927310578072, "learning_rate": 8.973868723490578e-06, "loss": 0.4792, "step": 7549 }, { "epoch": 0.23139634669608924, "grad_norm": 1.876229081204876, "learning_rate": 8.97356748572026e-06, "loss": 0.7572, "step": 7550 }, { "epoch": 0.23142699521883045, "grad_norm": 0.8353699041934703, "learning_rate": 8.973266208797365e-06, "loss": 0.4711, "step": 7551 }, { "epoch": 0.23145764374157166, "grad_norm": 1.7067112297601839, "learning_rate": 8.972964892724862e-06, "loss": 0.7048, "step": 7552 }, { "epoch": 0.23148829226431286, "grad_norm": 1.896070265325058, "learning_rate": 8.97266353750572e-06, "loss": 0.7838, "step": 7553 }, { "epoch": 0.23151894078705407, "grad_norm": 1.7488896196308947, "learning_rate": 8.972362143142905e-06, "loss": 0.72, "step": 7554 }, { "epoch": 0.23154958930979527, "grad_norm": 0.9340185410490639, "learning_rate": 8.972060709639393e-06, "loss": 0.4886, "step": 7555 }, { "epoch": 0.23158023783253648, "grad_norm": 2.1615749384447205, "learning_rate": 8.971759236998147e-06, "loss": 0.7464, "step": 7556 }, { "epoch": 0.23161088635527768, "grad_norm": 1.6164091281709878, "learning_rate": 8.971457725222143e-06, "loss": 0.6487, "step": 7557 }, { "epoch": 0.2316415348780189, "grad_norm": 1.7045289462627713, "learning_rate": 8.971156174314349e-06, "loss": 0.6863, "step": 7558 }, { "epoch": 0.2316721834007601, "grad_norm": 0.7978432337021354, "learning_rate": 8.970854584277738e-06, "loss": 0.4854, "step": 7559 }, { "epoch": 0.23170283192350127, "grad_norm": 1.9476787890902527, "learning_rate": 8.970552955115282e-06, "loss": 0.6827, "step": 7560 }, { "epoch": 0.23173348044624248, "grad_norm": 2.908039161772329, "learning_rate": 8.970251286829949e-06, "loss": 0.7291, "step": 7561 }, { "epoch": 0.23176412896898368, "grad_norm": 1.8141063983572638, "learning_rate": 8.969949579424715e-06, "loss": 0.6734, "step": 7562 }, { "epoch": 0.2317947774917249, "grad_norm": 1.7020352415385274, "learning_rate": 8.969647832902552e-06, "loss": 0.6375, "step": 7563 }, { "epoch": 0.2318254260144661, "grad_norm": 1.575911656924497, "learning_rate": 8.969346047266436e-06, "loss": 0.7297, "step": 7564 }, { "epoch": 0.2318560745372073, "grad_norm": 1.7736144420679192, "learning_rate": 8.969044222519333e-06, "loss": 0.7047, "step": 7565 }, { "epoch": 0.2318867230599485, "grad_norm": 1.7738402192818903, "learning_rate": 8.968742358664227e-06, "loss": 0.6938, "step": 7566 }, { "epoch": 0.2319173715826897, "grad_norm": 1.6458049912492878, "learning_rate": 8.968440455704085e-06, "loss": 0.7039, "step": 7567 }, { "epoch": 0.23194802010543092, "grad_norm": 1.6370343295205776, "learning_rate": 8.968138513641882e-06, "loss": 0.6626, "step": 7568 }, { "epoch": 0.23197866862817212, "grad_norm": 0.9918232195452558, "learning_rate": 8.967836532480595e-06, "loss": 0.5005, "step": 7569 }, { "epoch": 0.23200931715091333, "grad_norm": 1.662871176285762, "learning_rate": 8.967534512223202e-06, "loss": 0.7317, "step": 7570 }, { "epoch": 0.23203996567365454, "grad_norm": 1.6044991197277594, "learning_rate": 8.967232452872676e-06, "loss": 0.6881, "step": 7571 }, { "epoch": 0.23207061419639574, "grad_norm": 1.836797828022533, "learning_rate": 8.966930354431991e-06, "loss": 0.7897, "step": 7572 }, { "epoch": 0.23210126271913695, "grad_norm": 1.8830910678513548, "learning_rate": 8.966628216904128e-06, "loss": 0.7331, "step": 7573 }, { "epoch": 0.23213191124187815, "grad_norm": 1.9089680201699766, "learning_rate": 8.966326040292062e-06, "loss": 0.6994, "step": 7574 }, { "epoch": 0.23216255976461936, "grad_norm": 0.8583866179976062, "learning_rate": 8.966023824598771e-06, "loss": 0.4979, "step": 7575 }, { "epoch": 0.23219320828736054, "grad_norm": 0.867356929758002, "learning_rate": 8.965721569827233e-06, "loss": 0.4903, "step": 7576 }, { "epoch": 0.23222385681010174, "grad_norm": 1.612645069173317, "learning_rate": 8.965419275980425e-06, "loss": 0.6986, "step": 7577 }, { "epoch": 0.23225450533284295, "grad_norm": 1.628677197750012, "learning_rate": 8.965116943061325e-06, "loss": 0.698, "step": 7578 }, { "epoch": 0.23228515385558415, "grad_norm": 1.7838882034967591, "learning_rate": 8.964814571072916e-06, "loss": 0.7434, "step": 7579 }, { "epoch": 0.23231580237832536, "grad_norm": 1.702504542508258, "learning_rate": 8.964512160018173e-06, "loss": 0.6747, "step": 7580 }, { "epoch": 0.23234645090106656, "grad_norm": 1.6568920766573112, "learning_rate": 8.964209709900078e-06, "loss": 0.7267, "step": 7581 }, { "epoch": 0.23237709942380777, "grad_norm": 1.8092752405549115, "learning_rate": 8.963907220721609e-06, "loss": 0.7332, "step": 7582 }, { "epoch": 0.23240774794654898, "grad_norm": 1.5672172665030553, "learning_rate": 8.963604692485748e-06, "loss": 0.6846, "step": 7583 }, { "epoch": 0.23243839646929018, "grad_norm": 1.7086188888654417, "learning_rate": 8.963302125195476e-06, "loss": 0.6528, "step": 7584 }, { "epoch": 0.2324690449920314, "grad_norm": 1.6079717527876258, "learning_rate": 8.962999518853775e-06, "loss": 0.7355, "step": 7585 }, { "epoch": 0.2324996935147726, "grad_norm": 1.7276753270745715, "learning_rate": 8.962696873463625e-06, "loss": 0.7641, "step": 7586 }, { "epoch": 0.2325303420375138, "grad_norm": 1.6498272681210686, "learning_rate": 8.96239418902801e-06, "loss": 0.7649, "step": 7587 }, { "epoch": 0.232560990560255, "grad_norm": 1.8364928970930117, "learning_rate": 8.962091465549912e-06, "loss": 0.6775, "step": 7588 }, { "epoch": 0.2325916390829962, "grad_norm": 1.1572847845209135, "learning_rate": 8.96178870303231e-06, "loss": 0.5134, "step": 7589 }, { "epoch": 0.23262228760573742, "grad_norm": 2.0189906185947093, "learning_rate": 8.961485901478193e-06, "loss": 0.7617, "step": 7590 }, { "epoch": 0.2326529361284786, "grad_norm": 1.6402398372137055, "learning_rate": 8.96118306089054e-06, "loss": 0.7167, "step": 7591 }, { "epoch": 0.2326835846512198, "grad_norm": 1.8736637123742865, "learning_rate": 8.960880181272338e-06, "loss": 0.7802, "step": 7592 }, { "epoch": 0.232714233173961, "grad_norm": 1.6049395407181197, "learning_rate": 8.960577262626569e-06, "loss": 0.6851, "step": 7593 }, { "epoch": 0.2327448816967022, "grad_norm": 1.6519044109740757, "learning_rate": 8.96027430495622e-06, "loss": 0.7102, "step": 7594 }, { "epoch": 0.23277553021944342, "grad_norm": 1.6639307203723237, "learning_rate": 8.959971308264275e-06, "loss": 0.6726, "step": 7595 }, { "epoch": 0.23280617874218462, "grad_norm": 1.729208943423311, "learning_rate": 8.959668272553717e-06, "loss": 0.6493, "step": 7596 }, { "epoch": 0.23283682726492583, "grad_norm": 1.6406832082277325, "learning_rate": 8.959365197827537e-06, "loss": 0.6626, "step": 7597 }, { "epoch": 0.23286747578766703, "grad_norm": 0.9575963044106042, "learning_rate": 8.95906208408872e-06, "loss": 0.4698, "step": 7598 }, { "epoch": 0.23289812431040824, "grad_norm": 1.7825757963172488, "learning_rate": 8.958758931340247e-06, "loss": 0.7761, "step": 7599 }, { "epoch": 0.23292877283314944, "grad_norm": 1.7391421963944567, "learning_rate": 8.958455739585113e-06, "loss": 0.638, "step": 7600 }, { "epoch": 0.23295942135589065, "grad_norm": 1.781329755595209, "learning_rate": 8.958152508826299e-06, "loss": 0.7412, "step": 7601 }, { "epoch": 0.23299006987863186, "grad_norm": 1.9969283786633611, "learning_rate": 8.957849239066797e-06, "loss": 0.6599, "step": 7602 }, { "epoch": 0.23302071840137306, "grad_norm": 1.6778111382729026, "learning_rate": 8.957545930309595e-06, "loss": 0.7544, "step": 7603 }, { "epoch": 0.23305136692411427, "grad_norm": 1.6836716291115903, "learning_rate": 8.95724258255768e-06, "loss": 0.6952, "step": 7604 }, { "epoch": 0.23308201544685547, "grad_norm": 1.78819494266077, "learning_rate": 8.95693919581404e-06, "loss": 0.7136, "step": 7605 }, { "epoch": 0.23311266396959668, "grad_norm": 1.6522825278435986, "learning_rate": 8.956635770081665e-06, "loss": 0.7087, "step": 7606 }, { "epoch": 0.23314331249233786, "grad_norm": 1.7395466937216928, "learning_rate": 8.956332305363546e-06, "loss": 0.7918, "step": 7607 }, { "epoch": 0.23317396101507906, "grad_norm": 1.8860723232184553, "learning_rate": 8.956028801662675e-06, "loss": 0.8237, "step": 7608 }, { "epoch": 0.23320460953782027, "grad_norm": 1.0214457244449628, "learning_rate": 8.955725258982038e-06, "loss": 0.4872, "step": 7609 }, { "epoch": 0.23323525806056147, "grad_norm": 1.579610240989424, "learning_rate": 8.955421677324628e-06, "loss": 0.7179, "step": 7610 }, { "epoch": 0.23326590658330268, "grad_norm": 1.8802734127542773, "learning_rate": 8.955118056693436e-06, "loss": 0.7391, "step": 7611 }, { "epoch": 0.23329655510604388, "grad_norm": 1.5691456544340552, "learning_rate": 8.954814397091454e-06, "loss": 0.7359, "step": 7612 }, { "epoch": 0.2333272036287851, "grad_norm": 1.6521592874675894, "learning_rate": 8.954510698521674e-06, "loss": 0.6812, "step": 7613 }, { "epoch": 0.2333578521515263, "grad_norm": 2.0053041513757917, "learning_rate": 8.954206960987088e-06, "loss": 0.7228, "step": 7614 }, { "epoch": 0.2333885006742675, "grad_norm": 0.8242552751665423, "learning_rate": 8.953903184490688e-06, "loss": 0.4896, "step": 7615 }, { "epoch": 0.2334191491970087, "grad_norm": 1.9316993776889095, "learning_rate": 8.953599369035471e-06, "loss": 0.8169, "step": 7616 }, { "epoch": 0.2334497977197499, "grad_norm": 1.9330443913863042, "learning_rate": 8.953295514624428e-06, "loss": 0.6484, "step": 7617 }, { "epoch": 0.23348044624249112, "grad_norm": 0.7911193561115963, "learning_rate": 8.95299162126055e-06, "loss": 0.4783, "step": 7618 }, { "epoch": 0.23351109476523232, "grad_norm": 1.8367052117669442, "learning_rate": 8.952687688946836e-06, "loss": 0.7801, "step": 7619 }, { "epoch": 0.23354174328797353, "grad_norm": 1.853510076427441, "learning_rate": 8.952383717686277e-06, "loss": 0.6952, "step": 7620 }, { "epoch": 0.23357239181071474, "grad_norm": 1.7691408780981297, "learning_rate": 8.952079707481872e-06, "loss": 0.6251, "step": 7621 }, { "epoch": 0.2336030403334559, "grad_norm": 1.826736157978531, "learning_rate": 8.951775658336612e-06, "loss": 0.6483, "step": 7622 }, { "epoch": 0.23363368885619712, "grad_norm": 1.7652453373049493, "learning_rate": 8.951471570253498e-06, "loss": 0.6971, "step": 7623 }, { "epoch": 0.23366433737893832, "grad_norm": 1.6667493750662232, "learning_rate": 8.951167443235522e-06, "loss": 0.7103, "step": 7624 }, { "epoch": 0.23369498590167953, "grad_norm": 1.8868819058054231, "learning_rate": 8.950863277285683e-06, "loss": 0.7262, "step": 7625 }, { "epoch": 0.23372563442442074, "grad_norm": 1.611723642105559, "learning_rate": 8.950559072406977e-06, "loss": 0.6967, "step": 7626 }, { "epoch": 0.23375628294716194, "grad_norm": 1.684290012431321, "learning_rate": 8.950254828602402e-06, "loss": 0.6687, "step": 7627 }, { "epoch": 0.23378693146990315, "grad_norm": 1.5924256348355144, "learning_rate": 8.949950545874954e-06, "loss": 0.6492, "step": 7628 }, { "epoch": 0.23381757999264435, "grad_norm": 1.6740443025210414, "learning_rate": 8.949646224227635e-06, "loss": 0.6973, "step": 7629 }, { "epoch": 0.23384822851538556, "grad_norm": 1.578995138800541, "learning_rate": 8.94934186366344e-06, "loss": 0.6999, "step": 7630 }, { "epoch": 0.23387887703812676, "grad_norm": 1.6466023709891493, "learning_rate": 8.94903746418537e-06, "loss": 0.7719, "step": 7631 }, { "epoch": 0.23390952556086797, "grad_norm": 1.9466302874170074, "learning_rate": 8.94873302579642e-06, "loss": 0.719, "step": 7632 }, { "epoch": 0.23394017408360918, "grad_norm": 1.8084180059487753, "learning_rate": 8.948428548499597e-06, "loss": 0.7347, "step": 7633 }, { "epoch": 0.23397082260635038, "grad_norm": 1.6691352786277696, "learning_rate": 8.948124032297897e-06, "loss": 0.7207, "step": 7634 }, { "epoch": 0.2340014711290916, "grad_norm": 1.8770732247099984, "learning_rate": 8.94781947719432e-06, "loss": 0.653, "step": 7635 }, { "epoch": 0.2340321196518328, "grad_norm": 1.8529762926924658, "learning_rate": 8.947514883191868e-06, "loss": 0.6224, "step": 7636 }, { "epoch": 0.234062768174574, "grad_norm": 1.7994089457576308, "learning_rate": 8.94721025029354e-06, "loss": 0.6456, "step": 7637 }, { "epoch": 0.23409341669731518, "grad_norm": 1.8712725308140705, "learning_rate": 8.94690557850234e-06, "loss": 0.8002, "step": 7638 }, { "epoch": 0.23412406522005638, "grad_norm": 1.6856377576080406, "learning_rate": 8.946600867821272e-06, "loss": 0.8136, "step": 7639 }, { "epoch": 0.2341547137427976, "grad_norm": 1.0146678289724234, "learning_rate": 8.946296118253333e-06, "loss": 0.524, "step": 7640 }, { "epoch": 0.2341853622655388, "grad_norm": 1.9968928386960065, "learning_rate": 8.945991329801528e-06, "loss": 0.8366, "step": 7641 }, { "epoch": 0.23421601078828, "grad_norm": 1.9798797179462617, "learning_rate": 8.945686502468865e-06, "loss": 0.7774, "step": 7642 }, { "epoch": 0.2342466593110212, "grad_norm": 1.6199651867519216, "learning_rate": 8.94538163625834e-06, "loss": 0.7109, "step": 7643 }, { "epoch": 0.2342773078337624, "grad_norm": 1.948200069456506, "learning_rate": 8.945076731172961e-06, "loss": 0.6386, "step": 7644 }, { "epoch": 0.23430795635650362, "grad_norm": 1.747499563514388, "learning_rate": 8.944771787215731e-06, "loss": 0.7014, "step": 7645 }, { "epoch": 0.23433860487924482, "grad_norm": 1.590395960708311, "learning_rate": 8.944466804389657e-06, "loss": 0.7619, "step": 7646 }, { "epoch": 0.23436925340198603, "grad_norm": 1.635001843040509, "learning_rate": 8.94416178269774e-06, "loss": 0.7489, "step": 7647 }, { "epoch": 0.23439990192472723, "grad_norm": 1.9074463812519815, "learning_rate": 8.94385672214299e-06, "loss": 0.7481, "step": 7648 }, { "epoch": 0.23443055044746844, "grad_norm": 1.8291644815883157, "learning_rate": 8.94355162272841e-06, "loss": 0.7337, "step": 7649 }, { "epoch": 0.23446119897020964, "grad_norm": 1.7424164376184228, "learning_rate": 8.943246484457006e-06, "loss": 0.7021, "step": 7650 }, { "epoch": 0.23449184749295085, "grad_norm": 1.5969401686526663, "learning_rate": 8.942941307331786e-06, "loss": 0.5715, "step": 7651 }, { "epoch": 0.23452249601569206, "grad_norm": 1.592333479139271, "learning_rate": 8.942636091355756e-06, "loss": 0.69, "step": 7652 }, { "epoch": 0.23455314453843326, "grad_norm": 1.8008072124399817, "learning_rate": 8.942330836531925e-06, "loss": 0.7417, "step": 7653 }, { "epoch": 0.23458379306117444, "grad_norm": 1.779479684626396, "learning_rate": 8.9420255428633e-06, "loss": 0.6338, "step": 7654 }, { "epoch": 0.23461444158391564, "grad_norm": 1.8329589778008166, "learning_rate": 8.941720210352886e-06, "loss": 0.6465, "step": 7655 }, { "epoch": 0.23464509010665685, "grad_norm": 1.8863133849047626, "learning_rate": 8.941414839003695e-06, "loss": 0.7276, "step": 7656 }, { "epoch": 0.23467573862939806, "grad_norm": 1.6675075214387458, "learning_rate": 8.941109428818737e-06, "loss": 0.7628, "step": 7657 }, { "epoch": 0.23470638715213926, "grad_norm": 1.782800168723298, "learning_rate": 8.940803979801019e-06, "loss": 0.6977, "step": 7658 }, { "epoch": 0.23473703567488047, "grad_norm": 1.738538691306624, "learning_rate": 8.940498491953549e-06, "loss": 0.7852, "step": 7659 }, { "epoch": 0.23476768419762167, "grad_norm": 1.8110590899161632, "learning_rate": 8.940192965279342e-06, "loss": 0.6588, "step": 7660 }, { "epoch": 0.23479833272036288, "grad_norm": 2.055493794757792, "learning_rate": 8.939887399781404e-06, "loss": 0.7044, "step": 7661 }, { "epoch": 0.23482898124310408, "grad_norm": 1.6673216614239454, "learning_rate": 8.939581795462747e-06, "loss": 0.7189, "step": 7662 }, { "epoch": 0.2348596297658453, "grad_norm": 1.9265972140294088, "learning_rate": 8.939276152326384e-06, "loss": 0.6983, "step": 7663 }, { "epoch": 0.2348902782885865, "grad_norm": 1.0506054302455627, "learning_rate": 8.938970470375324e-06, "loss": 0.5189, "step": 7664 }, { "epoch": 0.2349209268113277, "grad_norm": 1.8440251483779808, "learning_rate": 8.93866474961258e-06, "loss": 0.7599, "step": 7665 }, { "epoch": 0.2349515753340689, "grad_norm": 1.831378521414513, "learning_rate": 8.938358990041164e-06, "loss": 0.704, "step": 7666 }, { "epoch": 0.2349822238568101, "grad_norm": 1.640873006908131, "learning_rate": 8.938053191664091e-06, "loss": 0.5419, "step": 7667 }, { "epoch": 0.23501287237955132, "grad_norm": 0.7371241951570491, "learning_rate": 8.937747354484372e-06, "loss": 0.4889, "step": 7668 }, { "epoch": 0.2350435209022925, "grad_norm": 1.9684696278966671, "learning_rate": 8.93744147850502e-06, "loss": 0.7216, "step": 7669 }, { "epoch": 0.2350741694250337, "grad_norm": 0.8427777607165567, "learning_rate": 8.93713556372905e-06, "loss": 0.4974, "step": 7670 }, { "epoch": 0.2351048179477749, "grad_norm": 0.8116486844698865, "learning_rate": 8.936829610159477e-06, "loss": 0.5104, "step": 7671 }, { "epoch": 0.2351354664705161, "grad_norm": 0.8206953230417033, "learning_rate": 8.936523617799312e-06, "loss": 0.4874, "step": 7672 }, { "epoch": 0.23516611499325732, "grad_norm": 1.696604298854288, "learning_rate": 8.936217586651574e-06, "loss": 0.7463, "step": 7673 }, { "epoch": 0.23519676351599852, "grad_norm": 1.5066380400410475, "learning_rate": 8.935911516719278e-06, "loss": 0.6646, "step": 7674 }, { "epoch": 0.23522741203873973, "grad_norm": 1.7407584241579148, "learning_rate": 8.935605408005437e-06, "loss": 0.71, "step": 7675 }, { "epoch": 0.23525806056148094, "grad_norm": 1.6184661504130098, "learning_rate": 8.93529926051307e-06, "loss": 0.7367, "step": 7676 }, { "epoch": 0.23528870908422214, "grad_norm": 1.5548904371032741, "learning_rate": 8.934993074245193e-06, "loss": 0.6325, "step": 7677 }, { "epoch": 0.23531935760696335, "grad_norm": 1.9500527842827877, "learning_rate": 8.93468684920482e-06, "loss": 0.6927, "step": 7678 }, { "epoch": 0.23535000612970455, "grad_norm": 1.8328778678153692, "learning_rate": 8.934380585394972e-06, "loss": 0.764, "step": 7679 }, { "epoch": 0.23538065465244576, "grad_norm": 1.9062895052822235, "learning_rate": 8.934074282818667e-06, "loss": 0.7926, "step": 7680 }, { "epoch": 0.23541130317518696, "grad_norm": 1.670828988176945, "learning_rate": 8.93376794147892e-06, "loss": 0.6507, "step": 7681 }, { "epoch": 0.23544195169792817, "grad_norm": 1.732739622716536, "learning_rate": 8.933461561378752e-06, "loss": 0.6146, "step": 7682 }, { "epoch": 0.23547260022066938, "grad_norm": 2.6639783067607334, "learning_rate": 8.933155142521179e-06, "loss": 0.8019, "step": 7683 }, { "epoch": 0.23550324874341058, "grad_norm": 1.7403360157248073, "learning_rate": 8.932848684909223e-06, "loss": 0.593, "step": 7684 }, { "epoch": 0.23553389726615176, "grad_norm": 1.7583003778467219, "learning_rate": 8.932542188545903e-06, "loss": 0.6918, "step": 7685 }, { "epoch": 0.23556454578889297, "grad_norm": 1.5843903324709525, "learning_rate": 8.93223565343424e-06, "loss": 0.6555, "step": 7686 }, { "epoch": 0.23559519431163417, "grad_norm": 1.4120719653405565, "learning_rate": 8.93192907957725e-06, "loss": 0.5926, "step": 7687 }, { "epoch": 0.23562584283437538, "grad_norm": 1.742945045627312, "learning_rate": 8.931622466977959e-06, "loss": 0.7611, "step": 7688 }, { "epoch": 0.23565649135711658, "grad_norm": 1.5584956419584408, "learning_rate": 8.931315815639385e-06, "loss": 0.7149, "step": 7689 }, { "epoch": 0.2356871398798578, "grad_norm": 1.6996532169347929, "learning_rate": 8.93100912556455e-06, "loss": 0.7452, "step": 7690 }, { "epoch": 0.235717788402599, "grad_norm": 0.9642606794397475, "learning_rate": 8.930702396756476e-06, "loss": 0.4779, "step": 7691 }, { "epoch": 0.2357484369253402, "grad_norm": 1.7164147390189795, "learning_rate": 8.930395629218187e-06, "loss": 0.8456, "step": 7692 }, { "epoch": 0.2357790854480814, "grad_norm": 0.8702406767091625, "learning_rate": 8.930088822952703e-06, "loss": 0.5081, "step": 7693 }, { "epoch": 0.2358097339708226, "grad_norm": 2.1646014876968316, "learning_rate": 8.92978197796305e-06, "loss": 0.761, "step": 7694 }, { "epoch": 0.23584038249356382, "grad_norm": 1.8102837803778582, "learning_rate": 8.92947509425225e-06, "loss": 0.6535, "step": 7695 }, { "epoch": 0.23587103101630502, "grad_norm": 0.8538073300852668, "learning_rate": 8.929168171823323e-06, "loss": 0.4668, "step": 7696 }, { "epoch": 0.23590167953904623, "grad_norm": 1.9374820873371938, "learning_rate": 8.928861210679298e-06, "loss": 0.6964, "step": 7697 }, { "epoch": 0.23593232806178743, "grad_norm": 1.5567819546350503, "learning_rate": 8.928554210823201e-06, "loss": 0.7598, "step": 7698 }, { "epoch": 0.23596297658452864, "grad_norm": 1.8562901502638587, "learning_rate": 8.92824717225805e-06, "loss": 0.8606, "step": 7699 }, { "epoch": 0.23599362510726982, "grad_norm": 1.7576003936927231, "learning_rate": 8.927940094986879e-06, "loss": 0.7068, "step": 7700 }, { "epoch": 0.23602427363001102, "grad_norm": 2.0832948440911543, "learning_rate": 8.927632979012707e-06, "loss": 0.737, "step": 7701 }, { "epoch": 0.23605492215275223, "grad_norm": 0.9096206353652991, "learning_rate": 8.927325824338561e-06, "loss": 0.4892, "step": 7702 }, { "epoch": 0.23608557067549343, "grad_norm": 0.8707309748090155, "learning_rate": 8.92701863096747e-06, "loss": 0.4968, "step": 7703 }, { "epoch": 0.23611621919823464, "grad_norm": 1.6815161753100512, "learning_rate": 8.92671139890246e-06, "loss": 0.6785, "step": 7704 }, { "epoch": 0.23614686772097584, "grad_norm": 1.802833484487542, "learning_rate": 8.926404128146558e-06, "loss": 0.6896, "step": 7705 }, { "epoch": 0.23617751624371705, "grad_norm": 1.9411281926845294, "learning_rate": 8.92609681870279e-06, "loss": 0.7166, "step": 7706 }, { "epoch": 0.23620816476645826, "grad_norm": 1.7945542410233093, "learning_rate": 8.925789470574187e-06, "loss": 0.7189, "step": 7707 }, { "epoch": 0.23623881328919946, "grad_norm": 1.858206251178851, "learning_rate": 8.925482083763776e-06, "loss": 0.7617, "step": 7708 }, { "epoch": 0.23626946181194067, "grad_norm": 1.751603668384798, "learning_rate": 8.925174658274585e-06, "loss": 0.6429, "step": 7709 }, { "epoch": 0.23630011033468187, "grad_norm": 1.9114732412774307, "learning_rate": 8.924867194109643e-06, "loss": 0.6649, "step": 7710 }, { "epoch": 0.23633075885742308, "grad_norm": 1.8999283674723222, "learning_rate": 8.924559691271983e-06, "loss": 0.753, "step": 7711 }, { "epoch": 0.23636140738016428, "grad_norm": 1.8963224921579456, "learning_rate": 8.92425214976463e-06, "loss": 0.7483, "step": 7712 }, { "epoch": 0.2363920559029055, "grad_norm": 1.0604017303329731, "learning_rate": 8.923944569590617e-06, "loss": 0.4651, "step": 7713 }, { "epoch": 0.2364227044256467, "grad_norm": 1.8553114568844618, "learning_rate": 8.923636950752974e-06, "loss": 0.6099, "step": 7714 }, { "epoch": 0.2364533529483879, "grad_norm": 1.8783757922649418, "learning_rate": 8.923329293254732e-06, "loss": 0.776, "step": 7715 }, { "epoch": 0.23648400147112908, "grad_norm": 1.6683564069484162, "learning_rate": 8.923021597098924e-06, "loss": 0.7114, "step": 7716 }, { "epoch": 0.23651464999387029, "grad_norm": 1.9318708701491314, "learning_rate": 8.922713862288579e-06, "loss": 0.7827, "step": 7717 }, { "epoch": 0.2365452985166115, "grad_norm": 1.7768425446118754, "learning_rate": 8.922406088826732e-06, "loss": 0.6442, "step": 7718 }, { "epoch": 0.2365759470393527, "grad_norm": 0.812698622686433, "learning_rate": 8.922098276716413e-06, "loss": 0.4758, "step": 7719 }, { "epoch": 0.2366065955620939, "grad_norm": 1.7386721828399025, "learning_rate": 8.921790425960658e-06, "loss": 0.6626, "step": 7720 }, { "epoch": 0.2366372440848351, "grad_norm": 1.6327342077592342, "learning_rate": 8.921482536562495e-06, "loss": 0.6793, "step": 7721 }, { "epoch": 0.2366678926075763, "grad_norm": 0.8289692835134876, "learning_rate": 8.921174608524964e-06, "loss": 0.4881, "step": 7722 }, { "epoch": 0.23669854113031752, "grad_norm": 0.8442198183112527, "learning_rate": 8.920866641851094e-06, "loss": 0.49, "step": 7723 }, { "epoch": 0.23672918965305872, "grad_norm": 1.6471567781074699, "learning_rate": 8.920558636543924e-06, "loss": 0.7015, "step": 7724 }, { "epoch": 0.23675983817579993, "grad_norm": 1.7588154607227846, "learning_rate": 8.920250592606486e-06, "loss": 0.7191, "step": 7725 }, { "epoch": 0.23679048669854114, "grad_norm": 1.8463325520208898, "learning_rate": 8.919942510041817e-06, "loss": 0.6594, "step": 7726 }, { "epoch": 0.23682113522128234, "grad_norm": 1.9203458674140317, "learning_rate": 8.91963438885295e-06, "loss": 0.8239, "step": 7727 }, { "epoch": 0.23685178374402355, "grad_norm": 1.8639526368684949, "learning_rate": 8.919326229042922e-06, "loss": 0.7076, "step": 7728 }, { "epoch": 0.23688243226676475, "grad_norm": 1.683014989982066, "learning_rate": 8.91901803061477e-06, "loss": 0.7377, "step": 7729 }, { "epoch": 0.23691308078950596, "grad_norm": 1.0121222792290445, "learning_rate": 8.918709793571532e-06, "loss": 0.4854, "step": 7730 }, { "epoch": 0.23694372931224714, "grad_norm": 1.5720955434036572, "learning_rate": 8.918401517916243e-06, "loss": 0.6552, "step": 7731 }, { "epoch": 0.23697437783498834, "grad_norm": 1.8923546204338402, "learning_rate": 8.918093203651941e-06, "loss": 0.7846, "step": 7732 }, { "epoch": 0.23700502635772955, "grad_norm": 1.8070491371219708, "learning_rate": 8.917784850781665e-06, "loss": 0.7135, "step": 7733 }, { "epoch": 0.23703567488047075, "grad_norm": 1.9305613993925337, "learning_rate": 8.917476459308452e-06, "loss": 0.738, "step": 7734 }, { "epoch": 0.23706632340321196, "grad_norm": 2.1199751202606927, "learning_rate": 8.917168029235341e-06, "loss": 0.7293, "step": 7735 }, { "epoch": 0.23709697192595316, "grad_norm": 2.009717889052549, "learning_rate": 8.916859560565372e-06, "loss": 0.7346, "step": 7736 }, { "epoch": 0.23712762044869437, "grad_norm": 1.6638217424881092, "learning_rate": 8.916551053301582e-06, "loss": 0.7008, "step": 7737 }, { "epoch": 0.23715826897143558, "grad_norm": 1.8683930786677798, "learning_rate": 8.916242507447013e-06, "loss": 0.6732, "step": 7738 }, { "epoch": 0.23718891749417678, "grad_norm": 1.636206494526885, "learning_rate": 8.915933923004705e-06, "loss": 0.7599, "step": 7739 }, { "epoch": 0.237219566016918, "grad_norm": 1.7704878483930417, "learning_rate": 8.915625299977699e-06, "loss": 0.7773, "step": 7740 }, { "epoch": 0.2372502145396592, "grad_norm": 1.7243760600470035, "learning_rate": 8.915316638369033e-06, "loss": 0.6613, "step": 7741 }, { "epoch": 0.2372808630624004, "grad_norm": 1.0224894869075867, "learning_rate": 8.915007938181752e-06, "loss": 0.4785, "step": 7742 }, { "epoch": 0.2373115115851416, "grad_norm": 1.5473652045769617, "learning_rate": 8.914699199418895e-06, "loss": 0.8099, "step": 7743 }, { "epoch": 0.2373421601078828, "grad_norm": 0.8100567039332447, "learning_rate": 8.914390422083506e-06, "loss": 0.5065, "step": 7744 }, { "epoch": 0.23737280863062402, "grad_norm": 0.7928590739030718, "learning_rate": 8.914081606178627e-06, "loss": 0.5099, "step": 7745 }, { "epoch": 0.23740345715336522, "grad_norm": 1.7817029351787674, "learning_rate": 8.9137727517073e-06, "loss": 0.67, "step": 7746 }, { "epoch": 0.2374341056761064, "grad_norm": 1.7602618477865122, "learning_rate": 8.913463858672566e-06, "loss": 0.8256, "step": 7747 }, { "epoch": 0.2374647541988476, "grad_norm": 1.8261798033353691, "learning_rate": 8.913154927077475e-06, "loss": 0.6459, "step": 7748 }, { "epoch": 0.2374954027215888, "grad_norm": 1.7738577980219237, "learning_rate": 8.912845956925064e-06, "loss": 0.6483, "step": 7749 }, { "epoch": 0.23752605124433002, "grad_norm": 1.7154861831026667, "learning_rate": 8.912536948218385e-06, "loss": 0.732, "step": 7750 }, { "epoch": 0.23755669976707122, "grad_norm": 1.2193151747549482, "learning_rate": 8.912227900960475e-06, "loss": 0.4766, "step": 7751 }, { "epoch": 0.23758734828981243, "grad_norm": 0.9817058219741279, "learning_rate": 8.911918815154384e-06, "loss": 0.4651, "step": 7752 }, { "epoch": 0.23761799681255363, "grad_norm": 1.7756326857794975, "learning_rate": 8.911609690803154e-06, "loss": 0.614, "step": 7753 }, { "epoch": 0.23764864533529484, "grad_norm": 1.9623380806559432, "learning_rate": 8.911300527909836e-06, "loss": 0.8314, "step": 7754 }, { "epoch": 0.23767929385803604, "grad_norm": 1.7489377747401385, "learning_rate": 8.91099132647747e-06, "loss": 0.8006, "step": 7755 }, { "epoch": 0.23770994238077725, "grad_norm": 1.0541862207621748, "learning_rate": 8.910682086509108e-06, "loss": 0.4841, "step": 7756 }, { "epoch": 0.23774059090351846, "grad_norm": 1.814463794138197, "learning_rate": 8.910372808007795e-06, "loss": 0.7458, "step": 7757 }, { "epoch": 0.23777123942625966, "grad_norm": 1.7803199338398377, "learning_rate": 8.910063490976576e-06, "loss": 0.695, "step": 7758 }, { "epoch": 0.23780188794900087, "grad_norm": 1.751552136373409, "learning_rate": 8.909754135418503e-06, "loss": 0.7143, "step": 7759 }, { "epoch": 0.23783253647174207, "grad_norm": 1.8904096540345037, "learning_rate": 8.909444741336622e-06, "loss": 0.6794, "step": 7760 }, { "epoch": 0.23786318499448328, "grad_norm": 1.735454488223871, "learning_rate": 8.90913530873398e-06, "loss": 0.6768, "step": 7761 }, { "epoch": 0.23789383351722446, "grad_norm": 1.6400239327380448, "learning_rate": 8.90882583761363e-06, "loss": 0.6031, "step": 7762 }, { "epoch": 0.23792448203996566, "grad_norm": 1.5818392763277358, "learning_rate": 8.908516327978618e-06, "loss": 0.6604, "step": 7763 }, { "epoch": 0.23795513056270687, "grad_norm": 0.9446032921123202, "learning_rate": 8.908206779831995e-06, "loss": 0.5088, "step": 7764 }, { "epoch": 0.23798577908544807, "grad_norm": 1.7193947748074185, "learning_rate": 8.907897193176809e-06, "loss": 0.747, "step": 7765 }, { "epoch": 0.23801642760818928, "grad_norm": 1.6172587982581252, "learning_rate": 8.907587568016112e-06, "loss": 0.66, "step": 7766 }, { "epoch": 0.23804707613093049, "grad_norm": 1.626763979876953, "learning_rate": 8.907277904352955e-06, "loss": 0.7223, "step": 7767 }, { "epoch": 0.2380777246536717, "grad_norm": 1.564173368246776, "learning_rate": 8.906968202190392e-06, "loss": 0.6429, "step": 7768 }, { "epoch": 0.2381083731764129, "grad_norm": 2.0383714879359314, "learning_rate": 8.906658461531469e-06, "loss": 0.8008, "step": 7769 }, { "epoch": 0.2381390216991541, "grad_norm": 1.7031136454295268, "learning_rate": 8.90634868237924e-06, "loss": 0.6945, "step": 7770 }, { "epoch": 0.2381696702218953, "grad_norm": 1.8424948245155568, "learning_rate": 8.90603886473676e-06, "loss": 0.7812, "step": 7771 }, { "epoch": 0.2382003187446365, "grad_norm": 0.8552369073149164, "learning_rate": 8.905729008607079e-06, "loss": 0.5058, "step": 7772 }, { "epoch": 0.23823096726737772, "grad_norm": 1.5228152888007875, "learning_rate": 8.905419113993252e-06, "loss": 0.7617, "step": 7773 }, { "epoch": 0.23826161579011892, "grad_norm": 1.724880651896165, "learning_rate": 8.905109180898328e-06, "loss": 0.6506, "step": 7774 }, { "epoch": 0.23829226431286013, "grad_norm": 1.6742191512321756, "learning_rate": 8.904799209325367e-06, "loss": 0.7808, "step": 7775 }, { "epoch": 0.23832291283560134, "grad_norm": 1.8843000686428966, "learning_rate": 8.904489199277419e-06, "loss": 0.6877, "step": 7776 }, { "epoch": 0.23835356135834254, "grad_norm": 0.8044872518611815, "learning_rate": 8.904179150757539e-06, "loss": 0.4646, "step": 7777 }, { "epoch": 0.23838420988108372, "grad_norm": 1.6293429417426863, "learning_rate": 8.903869063768784e-06, "loss": 0.7264, "step": 7778 }, { "epoch": 0.23841485840382493, "grad_norm": 1.7203025012583149, "learning_rate": 8.903558938314209e-06, "loss": 0.6805, "step": 7779 }, { "epoch": 0.23844550692656613, "grad_norm": 1.8434264276517203, "learning_rate": 8.90324877439687e-06, "loss": 0.7676, "step": 7780 }, { "epoch": 0.23847615544930734, "grad_norm": 1.7951983597594712, "learning_rate": 8.90293857201982e-06, "loss": 0.6452, "step": 7781 }, { "epoch": 0.23850680397204854, "grad_norm": 0.8617413344309398, "learning_rate": 8.902628331186117e-06, "loss": 0.4758, "step": 7782 }, { "epoch": 0.23853745249478975, "grad_norm": 1.9832389627133489, "learning_rate": 8.902318051898819e-06, "loss": 0.76, "step": 7783 }, { "epoch": 0.23856810101753095, "grad_norm": 0.80067759881273, "learning_rate": 8.902007734160985e-06, "loss": 0.4975, "step": 7784 }, { "epoch": 0.23859874954027216, "grad_norm": 0.8210850861920713, "learning_rate": 8.90169737797567e-06, "loss": 0.506, "step": 7785 }, { "epoch": 0.23862939806301336, "grad_norm": 1.6896925971208805, "learning_rate": 8.90138698334593e-06, "loss": 0.6996, "step": 7786 }, { "epoch": 0.23866004658575457, "grad_norm": 1.8566627747655944, "learning_rate": 8.901076550274827e-06, "loss": 0.6785, "step": 7787 }, { "epoch": 0.23869069510849578, "grad_norm": 1.5646094810561018, "learning_rate": 8.900766078765417e-06, "loss": 0.7604, "step": 7788 }, { "epoch": 0.23872134363123698, "grad_norm": 1.8747368730946852, "learning_rate": 8.900455568820763e-06, "loss": 0.7281, "step": 7789 }, { "epoch": 0.2387519921539782, "grad_norm": 1.7655831894629719, "learning_rate": 8.900145020443922e-06, "loss": 0.7499, "step": 7790 }, { "epoch": 0.2387826406767194, "grad_norm": 1.6500747684941672, "learning_rate": 8.899834433637955e-06, "loss": 0.7344, "step": 7791 }, { "epoch": 0.2388132891994606, "grad_norm": 1.8874999626104683, "learning_rate": 8.89952380840592e-06, "loss": 0.7561, "step": 7792 }, { "epoch": 0.23884393772220178, "grad_norm": 1.690011992263975, "learning_rate": 8.89921314475088e-06, "loss": 0.7485, "step": 7793 }, { "epoch": 0.23887458624494298, "grad_norm": 1.6780046046852015, "learning_rate": 8.898902442675894e-06, "loss": 0.7004, "step": 7794 }, { "epoch": 0.2389052347676842, "grad_norm": 1.5655683228504527, "learning_rate": 8.898591702184027e-06, "loss": 0.7356, "step": 7795 }, { "epoch": 0.2389358832904254, "grad_norm": 1.7234040222531823, "learning_rate": 8.898280923278336e-06, "loss": 0.7335, "step": 7796 }, { "epoch": 0.2389665318131666, "grad_norm": 1.2201884227964348, "learning_rate": 8.897970105961887e-06, "loss": 0.4918, "step": 7797 }, { "epoch": 0.2389971803359078, "grad_norm": 1.8627245429946804, "learning_rate": 8.897659250237742e-06, "loss": 0.7146, "step": 7798 }, { "epoch": 0.239027828858649, "grad_norm": 0.939068980722789, "learning_rate": 8.897348356108961e-06, "loss": 0.4915, "step": 7799 }, { "epoch": 0.23905847738139022, "grad_norm": 0.8450397510716464, "learning_rate": 8.897037423578611e-06, "loss": 0.4839, "step": 7800 }, { "epoch": 0.23908912590413142, "grad_norm": 1.8638648718005149, "learning_rate": 8.896726452649754e-06, "loss": 0.7189, "step": 7801 }, { "epoch": 0.23911977442687263, "grad_norm": 1.855705492825267, "learning_rate": 8.896415443325453e-06, "loss": 0.8377, "step": 7802 }, { "epoch": 0.23915042294961383, "grad_norm": 1.0923725573692054, "learning_rate": 8.896104395608775e-06, "loss": 0.4737, "step": 7803 }, { "epoch": 0.23918107147235504, "grad_norm": 1.8565798840141345, "learning_rate": 8.895793309502782e-06, "loss": 0.7343, "step": 7804 }, { "epoch": 0.23921171999509624, "grad_norm": 1.9133433427813424, "learning_rate": 8.895482185010543e-06, "loss": 0.7276, "step": 7805 }, { "epoch": 0.23924236851783745, "grad_norm": 1.817799765955026, "learning_rate": 8.89517102213512e-06, "loss": 0.6891, "step": 7806 }, { "epoch": 0.23927301704057866, "grad_norm": 1.7168970759075994, "learning_rate": 8.89485982087958e-06, "loss": 0.6142, "step": 7807 }, { "epoch": 0.23930366556331986, "grad_norm": 1.8293020677994256, "learning_rate": 8.89454858124699e-06, "loss": 0.6046, "step": 7808 }, { "epoch": 0.23933431408606104, "grad_norm": 1.9894603287776573, "learning_rate": 8.894237303240417e-06, "loss": 0.6931, "step": 7809 }, { "epoch": 0.23936496260880225, "grad_norm": 1.6300331163375192, "learning_rate": 8.893925986862928e-06, "loss": 0.8164, "step": 7810 }, { "epoch": 0.23939561113154345, "grad_norm": 1.7850451146772353, "learning_rate": 8.893614632117589e-06, "loss": 0.6814, "step": 7811 }, { "epoch": 0.23942625965428466, "grad_norm": 1.695893789697916, "learning_rate": 8.893303239007468e-06, "loss": 0.6797, "step": 7812 }, { "epoch": 0.23945690817702586, "grad_norm": 1.1171840220799176, "learning_rate": 8.892991807535635e-06, "loss": 0.5008, "step": 7813 }, { "epoch": 0.23948755669976707, "grad_norm": 1.8464594406225916, "learning_rate": 8.892680337705157e-06, "loss": 0.6274, "step": 7814 }, { "epoch": 0.23951820522250827, "grad_norm": 1.7892305372291746, "learning_rate": 8.892368829519105e-06, "loss": 0.7202, "step": 7815 }, { "epoch": 0.23954885374524948, "grad_norm": 1.6283624259571976, "learning_rate": 8.892057282980545e-06, "loss": 0.6837, "step": 7816 }, { "epoch": 0.23957950226799068, "grad_norm": 1.817690690752984, "learning_rate": 8.89174569809255e-06, "loss": 0.7503, "step": 7817 }, { "epoch": 0.2396101507907319, "grad_norm": 1.6022716742614256, "learning_rate": 8.891434074858189e-06, "loss": 0.6576, "step": 7818 }, { "epoch": 0.2396407993134731, "grad_norm": 1.5680596041034172, "learning_rate": 8.891122413280533e-06, "loss": 0.6963, "step": 7819 }, { "epoch": 0.2396714478362143, "grad_norm": 1.9980706162956552, "learning_rate": 8.890810713362651e-06, "loss": 0.7831, "step": 7820 }, { "epoch": 0.2397020963589555, "grad_norm": 1.7786711669178648, "learning_rate": 8.890498975107616e-06, "loss": 0.7817, "step": 7821 }, { "epoch": 0.2397327448816967, "grad_norm": 1.6610092867710793, "learning_rate": 8.890187198518498e-06, "loss": 0.7122, "step": 7822 }, { "epoch": 0.23976339340443792, "grad_norm": 1.5506579463527974, "learning_rate": 8.889875383598372e-06, "loss": 0.6899, "step": 7823 }, { "epoch": 0.2397940419271791, "grad_norm": 1.763635427727064, "learning_rate": 8.889563530350307e-06, "loss": 0.7287, "step": 7824 }, { "epoch": 0.2398246904499203, "grad_norm": 1.9187885180923616, "learning_rate": 8.88925163877738e-06, "loss": 0.6304, "step": 7825 }, { "epoch": 0.2398553389726615, "grad_norm": 1.7204413915372483, "learning_rate": 8.888939708882658e-06, "loss": 0.6928, "step": 7826 }, { "epoch": 0.2398859874954027, "grad_norm": 1.5702213218851686, "learning_rate": 8.888627740669221e-06, "loss": 0.7462, "step": 7827 }, { "epoch": 0.23991663601814392, "grad_norm": 1.6509912314653472, "learning_rate": 8.888315734140139e-06, "loss": 0.6602, "step": 7828 }, { "epoch": 0.23994728454088513, "grad_norm": 1.9325560620131828, "learning_rate": 8.888003689298487e-06, "loss": 0.7406, "step": 7829 }, { "epoch": 0.23997793306362633, "grad_norm": 1.8283687714257353, "learning_rate": 8.88769160614734e-06, "loss": 0.7756, "step": 7830 }, { "epoch": 0.24000858158636754, "grad_norm": 1.5516082006176717, "learning_rate": 8.887379484689772e-06, "loss": 0.6712, "step": 7831 }, { "epoch": 0.24003923010910874, "grad_norm": 1.7500147357311229, "learning_rate": 8.88706732492886e-06, "loss": 0.7499, "step": 7832 }, { "epoch": 0.24006987863184995, "grad_norm": 1.71307354829612, "learning_rate": 8.88675512686768e-06, "loss": 0.673, "step": 7833 }, { "epoch": 0.24010052715459115, "grad_norm": 2.277100882436193, "learning_rate": 8.886442890509305e-06, "loss": 0.7328, "step": 7834 }, { "epoch": 0.24013117567733236, "grad_norm": 2.070272278905906, "learning_rate": 8.886130615856815e-06, "loss": 0.7488, "step": 7835 }, { "epoch": 0.24016182420007356, "grad_norm": 1.7164242234766085, "learning_rate": 8.885818302913286e-06, "loss": 0.8083, "step": 7836 }, { "epoch": 0.24019247272281477, "grad_norm": 1.8848594948548638, "learning_rate": 8.885505951681795e-06, "loss": 0.7542, "step": 7837 }, { "epoch": 0.24022312124555598, "grad_norm": 1.769131764010906, "learning_rate": 8.88519356216542e-06, "loss": 0.7526, "step": 7838 }, { "epoch": 0.24025376976829718, "grad_norm": 1.016611439653614, "learning_rate": 8.884881134367239e-06, "loss": 0.5263, "step": 7839 }, { "epoch": 0.24028441829103836, "grad_norm": 0.9023337185112525, "learning_rate": 8.884568668290329e-06, "loss": 0.5105, "step": 7840 }, { "epoch": 0.24031506681377957, "grad_norm": 1.4470845040720175, "learning_rate": 8.88425616393777e-06, "loss": 0.6868, "step": 7841 }, { "epoch": 0.24034571533652077, "grad_norm": 1.7310700664674477, "learning_rate": 8.883943621312644e-06, "loss": 0.6584, "step": 7842 }, { "epoch": 0.24037636385926198, "grad_norm": 0.8474592760561769, "learning_rate": 8.883631040418024e-06, "loss": 0.4957, "step": 7843 }, { "epoch": 0.24040701238200318, "grad_norm": 1.8261389178865841, "learning_rate": 8.883318421256994e-06, "loss": 0.7148, "step": 7844 }, { "epoch": 0.2404376609047444, "grad_norm": 2.0343844127992883, "learning_rate": 8.883005763832636e-06, "loss": 0.7226, "step": 7845 }, { "epoch": 0.2404683094274856, "grad_norm": 1.7029269806639353, "learning_rate": 8.882693068148027e-06, "loss": 0.634, "step": 7846 }, { "epoch": 0.2404989579502268, "grad_norm": 0.997129631383225, "learning_rate": 8.882380334206252e-06, "loss": 0.4902, "step": 7847 }, { "epoch": 0.240529606472968, "grad_norm": 1.9133890535069569, "learning_rate": 8.882067562010388e-06, "loss": 0.7176, "step": 7848 }, { "epoch": 0.2405602549957092, "grad_norm": 1.5777816003382643, "learning_rate": 8.881754751563521e-06, "loss": 0.6384, "step": 7849 }, { "epoch": 0.24059090351845042, "grad_norm": 1.8341848968958792, "learning_rate": 8.88144190286873e-06, "loss": 0.7134, "step": 7850 }, { "epoch": 0.24062155204119162, "grad_norm": 0.8598740767073961, "learning_rate": 8.881129015929098e-06, "loss": 0.5051, "step": 7851 }, { "epoch": 0.24065220056393283, "grad_norm": 0.8387238763836885, "learning_rate": 8.88081609074771e-06, "loss": 0.4981, "step": 7852 }, { "epoch": 0.24068284908667403, "grad_norm": 1.8751068563543256, "learning_rate": 8.880503127327648e-06, "loss": 0.7005, "step": 7853 }, { "epoch": 0.24071349760941524, "grad_norm": 2.2731829507235917, "learning_rate": 8.880190125671998e-06, "loss": 0.6619, "step": 7854 }, { "epoch": 0.24074414613215642, "grad_norm": 2.4172658806212053, "learning_rate": 8.879877085783838e-06, "loss": 0.6399, "step": 7855 }, { "epoch": 0.24077479465489762, "grad_norm": 1.613196206762624, "learning_rate": 8.879564007666257e-06, "loss": 0.6739, "step": 7856 }, { "epoch": 0.24080544317763883, "grad_norm": 1.8298006620795249, "learning_rate": 8.879250891322341e-06, "loss": 0.7757, "step": 7857 }, { "epoch": 0.24083609170038003, "grad_norm": 1.8714250616231585, "learning_rate": 8.878937736755172e-06, "loss": 0.6646, "step": 7858 }, { "epoch": 0.24086674022312124, "grad_norm": 1.9646990401945852, "learning_rate": 8.878624543967837e-06, "loss": 0.6951, "step": 7859 }, { "epoch": 0.24089738874586245, "grad_norm": 1.0158829730380827, "learning_rate": 8.878311312963423e-06, "loss": 0.5027, "step": 7860 }, { "epoch": 0.24092803726860365, "grad_norm": 2.1640075016597184, "learning_rate": 8.877998043745015e-06, "loss": 0.7997, "step": 7861 }, { "epoch": 0.24095868579134486, "grad_norm": 1.8494285883265047, "learning_rate": 8.8776847363157e-06, "loss": 0.7806, "step": 7862 }, { "epoch": 0.24098933431408606, "grad_norm": 1.9014564901310709, "learning_rate": 8.877371390678565e-06, "loss": 0.6619, "step": 7863 }, { "epoch": 0.24101998283682727, "grad_norm": 1.929085063021647, "learning_rate": 8.877058006836698e-06, "loss": 0.7892, "step": 7864 }, { "epoch": 0.24105063135956847, "grad_norm": 0.8348605419152214, "learning_rate": 8.876744584793186e-06, "loss": 0.4929, "step": 7865 }, { "epoch": 0.24108127988230968, "grad_norm": 1.8276383761524555, "learning_rate": 8.876431124551118e-06, "loss": 0.7648, "step": 7866 }, { "epoch": 0.24111192840505088, "grad_norm": 1.6684262531011036, "learning_rate": 8.876117626113583e-06, "loss": 0.7008, "step": 7867 }, { "epoch": 0.2411425769277921, "grad_norm": 1.7786788022804683, "learning_rate": 8.875804089483669e-06, "loss": 0.7525, "step": 7868 }, { "epoch": 0.2411732254505333, "grad_norm": 1.692470469315091, "learning_rate": 8.875490514664464e-06, "loss": 0.7435, "step": 7869 }, { "epoch": 0.2412038739732745, "grad_norm": 1.781572198587456, "learning_rate": 8.875176901659061e-06, "loss": 0.6964, "step": 7870 }, { "epoch": 0.24123452249601568, "grad_norm": 0.8394319839569279, "learning_rate": 8.874863250470547e-06, "loss": 0.4794, "step": 7871 }, { "epoch": 0.24126517101875689, "grad_norm": 0.8326407440573272, "learning_rate": 8.874549561102014e-06, "loss": 0.4883, "step": 7872 }, { "epoch": 0.2412958195414981, "grad_norm": 0.8110578139773392, "learning_rate": 8.874235833556554e-06, "loss": 0.4905, "step": 7873 }, { "epoch": 0.2413264680642393, "grad_norm": 0.7699510437699241, "learning_rate": 8.873922067837258e-06, "loss": 0.4891, "step": 7874 }, { "epoch": 0.2413571165869805, "grad_norm": 1.732705879069986, "learning_rate": 8.873608263947216e-06, "loss": 0.6793, "step": 7875 }, { "epoch": 0.2413877651097217, "grad_norm": 1.7728664317097447, "learning_rate": 8.87329442188952e-06, "loss": 0.7424, "step": 7876 }, { "epoch": 0.2414184136324629, "grad_norm": 1.7974740663131266, "learning_rate": 8.872980541667261e-06, "loss": 0.6243, "step": 7877 }, { "epoch": 0.24144906215520412, "grad_norm": 1.7367487637775703, "learning_rate": 8.872666623283539e-06, "loss": 0.7239, "step": 7878 }, { "epoch": 0.24147971067794533, "grad_norm": 1.9819925940793799, "learning_rate": 8.872352666741438e-06, "loss": 0.7482, "step": 7879 }, { "epoch": 0.24151035920068653, "grad_norm": 1.5995496097065853, "learning_rate": 8.872038672044056e-06, "loss": 0.713, "step": 7880 }, { "epoch": 0.24154100772342774, "grad_norm": 1.697325928587626, "learning_rate": 8.871724639194487e-06, "loss": 0.69, "step": 7881 }, { "epoch": 0.24157165624616894, "grad_norm": 2.8622825655431825, "learning_rate": 8.871410568195824e-06, "loss": 0.7376, "step": 7882 }, { "epoch": 0.24160230476891015, "grad_norm": 1.1143513284773956, "learning_rate": 8.871096459051162e-06, "loss": 0.4792, "step": 7883 }, { "epoch": 0.24163295329165135, "grad_norm": 1.6492496337040676, "learning_rate": 8.870782311763596e-06, "loss": 0.7216, "step": 7884 }, { "epoch": 0.24166360181439256, "grad_norm": 1.9849541790029894, "learning_rate": 8.870468126336221e-06, "loss": 0.8293, "step": 7885 }, { "epoch": 0.24169425033713374, "grad_norm": 1.4657863937573028, "learning_rate": 8.870153902772133e-06, "loss": 0.6287, "step": 7886 }, { "epoch": 0.24172489885987494, "grad_norm": 0.8450692424063045, "learning_rate": 8.86983964107443e-06, "loss": 0.4926, "step": 7887 }, { "epoch": 0.24175554738261615, "grad_norm": 1.9018189929033904, "learning_rate": 8.869525341246209e-06, "loss": 0.7063, "step": 7888 }, { "epoch": 0.24178619590535735, "grad_norm": 1.9016768654526015, "learning_rate": 8.86921100329056e-06, "loss": 0.6228, "step": 7889 }, { "epoch": 0.24181684442809856, "grad_norm": 1.9438583313361206, "learning_rate": 8.868896627210587e-06, "loss": 0.7319, "step": 7890 }, { "epoch": 0.24184749295083977, "grad_norm": 1.7150827718158979, "learning_rate": 8.868582213009387e-06, "loss": 0.7499, "step": 7891 }, { "epoch": 0.24187814147358097, "grad_norm": 1.71619071071877, "learning_rate": 8.868267760690055e-06, "loss": 0.7152, "step": 7892 }, { "epoch": 0.24190878999632218, "grad_norm": 1.582325649848641, "learning_rate": 8.867953270255691e-06, "loss": 0.6989, "step": 7893 }, { "epoch": 0.24193943851906338, "grad_norm": 1.7790915843505803, "learning_rate": 8.867638741709395e-06, "loss": 0.6965, "step": 7894 }, { "epoch": 0.2419700870418046, "grad_norm": 2.0084940052048963, "learning_rate": 8.867324175054264e-06, "loss": 0.8288, "step": 7895 }, { "epoch": 0.2420007355645458, "grad_norm": 1.6150350809914615, "learning_rate": 8.867009570293398e-06, "loss": 0.6682, "step": 7896 }, { "epoch": 0.242031384087287, "grad_norm": 1.5189101090453774, "learning_rate": 8.866694927429897e-06, "loss": 0.6734, "step": 7897 }, { "epoch": 0.2420620326100282, "grad_norm": 1.8662751154272903, "learning_rate": 8.866380246466863e-06, "loss": 0.7168, "step": 7898 }, { "epoch": 0.2420926811327694, "grad_norm": 1.6382462651814118, "learning_rate": 8.866065527407393e-06, "loss": 0.7702, "step": 7899 }, { "epoch": 0.24212332965551062, "grad_norm": 1.7452699154071067, "learning_rate": 8.865750770254593e-06, "loss": 0.7858, "step": 7900 }, { "epoch": 0.24215397817825182, "grad_norm": 1.6812833672820375, "learning_rate": 8.865435975011559e-06, "loss": 0.7259, "step": 7901 }, { "epoch": 0.242184626700993, "grad_norm": 3.0514133224559705, "learning_rate": 8.865121141681397e-06, "loss": 0.6732, "step": 7902 }, { "epoch": 0.2422152752237342, "grad_norm": 1.0512363200192612, "learning_rate": 8.864806270267207e-06, "loss": 0.4818, "step": 7903 }, { "epoch": 0.2422459237464754, "grad_norm": 1.7852234434614633, "learning_rate": 8.864491360772091e-06, "loss": 0.7533, "step": 7904 }, { "epoch": 0.24227657226921662, "grad_norm": 1.7916077888775972, "learning_rate": 8.864176413199155e-06, "loss": 0.6799, "step": 7905 }, { "epoch": 0.24230722079195782, "grad_norm": 2.0300871694580587, "learning_rate": 8.8638614275515e-06, "loss": 0.7369, "step": 7906 }, { "epoch": 0.24233786931469903, "grad_norm": 1.7170755965624647, "learning_rate": 8.863546403832227e-06, "loss": 0.7278, "step": 7907 }, { "epoch": 0.24236851783744023, "grad_norm": 0.819830672078477, "learning_rate": 8.863231342044445e-06, "loss": 0.5003, "step": 7908 }, { "epoch": 0.24239916636018144, "grad_norm": 0.8364532256824666, "learning_rate": 8.862916242191255e-06, "loss": 0.4841, "step": 7909 }, { "epoch": 0.24242981488292265, "grad_norm": 1.7578393747575654, "learning_rate": 8.862601104275763e-06, "loss": 0.6936, "step": 7910 }, { "epoch": 0.24246046340566385, "grad_norm": 1.7762418587193167, "learning_rate": 8.862285928301075e-06, "loss": 0.6933, "step": 7911 }, { "epoch": 0.24249111192840506, "grad_norm": 1.761090166571021, "learning_rate": 8.861970714270294e-06, "loss": 0.7842, "step": 7912 }, { "epoch": 0.24252176045114626, "grad_norm": 1.8159695344446134, "learning_rate": 8.86165546218653e-06, "loss": 0.6971, "step": 7913 }, { "epoch": 0.24255240897388747, "grad_norm": 1.7283024370403324, "learning_rate": 8.861340172052883e-06, "loss": 0.7427, "step": 7914 }, { "epoch": 0.24258305749662867, "grad_norm": 0.9039604794215287, "learning_rate": 8.861024843872465e-06, "loss": 0.4859, "step": 7915 }, { "epoch": 0.24261370601936988, "grad_norm": 1.821807833475637, "learning_rate": 8.860709477648383e-06, "loss": 0.7522, "step": 7916 }, { "epoch": 0.24264435454211106, "grad_norm": 1.7586021314455558, "learning_rate": 8.86039407338374e-06, "loss": 0.7633, "step": 7917 }, { "epoch": 0.24267500306485226, "grad_norm": 1.6895845103492972, "learning_rate": 8.86007863108165e-06, "loss": 0.727, "step": 7918 }, { "epoch": 0.24270565158759347, "grad_norm": 1.6648954930509212, "learning_rate": 8.859763150745215e-06, "loss": 0.6608, "step": 7919 }, { "epoch": 0.24273630011033467, "grad_norm": 1.667247073294282, "learning_rate": 8.859447632377547e-06, "loss": 0.665, "step": 7920 }, { "epoch": 0.24276694863307588, "grad_norm": 1.5882611743008441, "learning_rate": 8.859132075981753e-06, "loss": 0.7691, "step": 7921 }, { "epoch": 0.24279759715581709, "grad_norm": 1.9102321203489965, "learning_rate": 8.858816481560944e-06, "loss": 0.7833, "step": 7922 }, { "epoch": 0.2428282456785583, "grad_norm": 1.6407518556080884, "learning_rate": 8.85850084911823e-06, "loss": 0.6771, "step": 7923 }, { "epoch": 0.2428588942012995, "grad_norm": 0.8688635241076963, "learning_rate": 8.858185178656718e-06, "loss": 0.4833, "step": 7924 }, { "epoch": 0.2428895427240407, "grad_norm": 1.758428013458981, "learning_rate": 8.857869470179521e-06, "loss": 0.735, "step": 7925 }, { "epoch": 0.2429201912467819, "grad_norm": 1.7977999541590386, "learning_rate": 8.857553723689749e-06, "loss": 0.7479, "step": 7926 }, { "epoch": 0.2429508397695231, "grad_norm": 2.016725538020713, "learning_rate": 8.857237939190513e-06, "loss": 0.6758, "step": 7927 }, { "epoch": 0.24298148829226432, "grad_norm": 1.5889254889381998, "learning_rate": 8.856922116684924e-06, "loss": 0.7572, "step": 7928 }, { "epoch": 0.24301213681500552, "grad_norm": 1.8131000625135516, "learning_rate": 8.856606256176096e-06, "loss": 0.7827, "step": 7929 }, { "epoch": 0.24304278533774673, "grad_norm": 1.6569666352402759, "learning_rate": 8.856290357667139e-06, "loss": 0.6865, "step": 7930 }, { "epoch": 0.24307343386048794, "grad_norm": 1.743622062966663, "learning_rate": 8.855974421161167e-06, "loss": 0.7272, "step": 7931 }, { "epoch": 0.24310408238322914, "grad_norm": 1.609455778011754, "learning_rate": 8.855658446661292e-06, "loss": 0.6809, "step": 7932 }, { "epoch": 0.24313473090597032, "grad_norm": 1.6810408254929528, "learning_rate": 8.855342434170628e-06, "loss": 0.7862, "step": 7933 }, { "epoch": 0.24316537942871153, "grad_norm": 1.6012521897452296, "learning_rate": 8.855026383692288e-06, "loss": 0.7082, "step": 7934 }, { "epoch": 0.24319602795145273, "grad_norm": 1.8672998505509404, "learning_rate": 8.854710295229387e-06, "loss": 0.6886, "step": 7935 }, { "epoch": 0.24322667647419394, "grad_norm": 1.8158193456295963, "learning_rate": 8.854394168785038e-06, "loss": 0.6343, "step": 7936 }, { "epoch": 0.24325732499693514, "grad_norm": 1.6972089384070814, "learning_rate": 8.85407800436236e-06, "loss": 0.6361, "step": 7937 }, { "epoch": 0.24328797351967635, "grad_norm": 1.5075051695058392, "learning_rate": 8.853761801964462e-06, "loss": 0.6221, "step": 7938 }, { "epoch": 0.24331862204241755, "grad_norm": 1.7937796998335398, "learning_rate": 8.853445561594466e-06, "loss": 0.8186, "step": 7939 }, { "epoch": 0.24334927056515876, "grad_norm": 1.9903859329842168, "learning_rate": 8.853129283255484e-06, "loss": 0.7557, "step": 7940 }, { "epoch": 0.24337991908789997, "grad_norm": 1.6242730011914777, "learning_rate": 8.852812966950633e-06, "loss": 0.6766, "step": 7941 }, { "epoch": 0.24341056761064117, "grad_norm": 1.7467649291914307, "learning_rate": 8.852496612683028e-06, "loss": 0.7877, "step": 7942 }, { "epoch": 0.24344121613338238, "grad_norm": 1.5682288436298346, "learning_rate": 8.852180220455791e-06, "loss": 0.6587, "step": 7943 }, { "epoch": 0.24347186465612358, "grad_norm": 0.9022637479873721, "learning_rate": 8.851863790272036e-06, "loss": 0.4681, "step": 7944 }, { "epoch": 0.2435025131788648, "grad_norm": 1.6652819201563889, "learning_rate": 8.851547322134882e-06, "loss": 0.7412, "step": 7945 }, { "epoch": 0.243533161701606, "grad_norm": 1.6819204143776718, "learning_rate": 8.851230816047445e-06, "loss": 0.6458, "step": 7946 }, { "epoch": 0.2435638102243472, "grad_norm": 1.6116868381353353, "learning_rate": 8.850914272012846e-06, "loss": 0.6711, "step": 7947 }, { "epoch": 0.24359445874708838, "grad_norm": 1.7549628908498993, "learning_rate": 8.850597690034204e-06, "loss": 0.731, "step": 7948 }, { "epoch": 0.24362510726982958, "grad_norm": 1.930822748038165, "learning_rate": 8.850281070114637e-06, "loss": 0.6981, "step": 7949 }, { "epoch": 0.2436557557925708, "grad_norm": 1.7812360286481155, "learning_rate": 8.849964412257268e-06, "loss": 0.6983, "step": 7950 }, { "epoch": 0.243686404315312, "grad_norm": 1.782003441249021, "learning_rate": 8.84964771646521e-06, "loss": 0.6882, "step": 7951 }, { "epoch": 0.2437170528380532, "grad_norm": 1.8801309821663963, "learning_rate": 8.849330982741594e-06, "loss": 0.7032, "step": 7952 }, { "epoch": 0.2437477013607944, "grad_norm": 1.5651267523258867, "learning_rate": 8.849014211089529e-06, "loss": 0.7275, "step": 7953 }, { "epoch": 0.2437783498835356, "grad_norm": 1.6992505650689416, "learning_rate": 8.848697401512146e-06, "loss": 0.7433, "step": 7954 }, { "epoch": 0.24380899840627682, "grad_norm": 0.9214129986837052, "learning_rate": 8.848380554012562e-06, "loss": 0.4966, "step": 7955 }, { "epoch": 0.24383964692901802, "grad_norm": 1.6905878852720009, "learning_rate": 8.848063668593898e-06, "loss": 0.6133, "step": 7956 }, { "epoch": 0.24387029545175923, "grad_norm": 1.6600213680779792, "learning_rate": 8.84774674525928e-06, "loss": 0.7046, "step": 7957 }, { "epoch": 0.24390094397450043, "grad_norm": 1.6933700875566493, "learning_rate": 8.847429784011828e-06, "loss": 0.644, "step": 7958 }, { "epoch": 0.24393159249724164, "grad_norm": 1.7396747136362185, "learning_rate": 8.847112784854666e-06, "loss": 0.7091, "step": 7959 }, { "epoch": 0.24396224101998285, "grad_norm": 1.8684029324543772, "learning_rate": 8.846795747790918e-06, "loss": 0.8563, "step": 7960 }, { "epoch": 0.24399288954272405, "grad_norm": 1.782190523627196, "learning_rate": 8.846478672823707e-06, "loss": 0.6396, "step": 7961 }, { "epoch": 0.24402353806546526, "grad_norm": 1.88665669311605, "learning_rate": 8.846161559956156e-06, "loss": 0.6891, "step": 7962 }, { "epoch": 0.24405418658820646, "grad_norm": 0.8549543866694338, "learning_rate": 8.845844409191393e-06, "loss": 0.4972, "step": 7963 }, { "epoch": 0.24408483511094764, "grad_norm": 0.8115505413127505, "learning_rate": 8.84552722053254e-06, "loss": 0.4874, "step": 7964 }, { "epoch": 0.24411548363368885, "grad_norm": 1.8782718269229484, "learning_rate": 8.845209993982724e-06, "loss": 0.8132, "step": 7965 }, { "epoch": 0.24414613215643005, "grad_norm": 1.654585731009314, "learning_rate": 8.84489272954507e-06, "loss": 0.7092, "step": 7966 }, { "epoch": 0.24417678067917126, "grad_norm": 0.7681090078381487, "learning_rate": 8.844575427222703e-06, "loss": 0.4723, "step": 7967 }, { "epoch": 0.24420742920191246, "grad_norm": 1.6850436745072663, "learning_rate": 8.844258087018754e-06, "loss": 0.6517, "step": 7968 }, { "epoch": 0.24423807772465367, "grad_norm": 1.6648204215851632, "learning_rate": 8.843940708936342e-06, "loss": 0.7072, "step": 7969 }, { "epoch": 0.24426872624739487, "grad_norm": 2.17486121220585, "learning_rate": 8.843623292978602e-06, "loss": 0.7681, "step": 7970 }, { "epoch": 0.24429937477013608, "grad_norm": 1.850345111509204, "learning_rate": 8.843305839148657e-06, "loss": 0.6737, "step": 7971 }, { "epoch": 0.24433002329287729, "grad_norm": 2.0668904582274354, "learning_rate": 8.842988347449636e-06, "loss": 0.8028, "step": 7972 }, { "epoch": 0.2443606718156185, "grad_norm": 1.796834015294838, "learning_rate": 8.842670817884669e-06, "loss": 0.7576, "step": 7973 }, { "epoch": 0.2443913203383597, "grad_norm": 1.826894411582645, "learning_rate": 8.842353250456882e-06, "loss": 0.7397, "step": 7974 }, { "epoch": 0.2444219688611009, "grad_norm": 1.7887689438772474, "learning_rate": 8.842035645169404e-06, "loss": 0.7118, "step": 7975 }, { "epoch": 0.2444526173838421, "grad_norm": 1.5909419256103294, "learning_rate": 8.841718002025367e-06, "loss": 0.7121, "step": 7976 }, { "epoch": 0.2444832659065833, "grad_norm": 1.6220988984450107, "learning_rate": 8.841400321027899e-06, "loss": 0.7054, "step": 7977 }, { "epoch": 0.24451391442932452, "grad_norm": 1.6853923608883696, "learning_rate": 8.841082602180134e-06, "loss": 0.6402, "step": 7978 }, { "epoch": 0.2445445629520657, "grad_norm": 1.0078356248100935, "learning_rate": 8.840764845485196e-06, "loss": 0.4977, "step": 7979 }, { "epoch": 0.2445752114748069, "grad_norm": 1.7691628351092485, "learning_rate": 8.840447050946219e-06, "loss": 0.7434, "step": 7980 }, { "epoch": 0.2446058599975481, "grad_norm": 1.6779778562897696, "learning_rate": 8.840129218566335e-06, "loss": 0.7703, "step": 7981 }, { "epoch": 0.24463650852028931, "grad_norm": 0.8354567872897779, "learning_rate": 8.839811348348677e-06, "loss": 0.4926, "step": 7982 }, { "epoch": 0.24466715704303052, "grad_norm": 0.8557073698910221, "learning_rate": 8.839493440296376e-06, "loss": 0.4778, "step": 7983 }, { "epoch": 0.24469780556577173, "grad_norm": 1.6078060754676384, "learning_rate": 8.83917549441256e-06, "loss": 0.6879, "step": 7984 }, { "epoch": 0.24472845408851293, "grad_norm": 0.8542456549668138, "learning_rate": 8.838857510700369e-06, "loss": 0.5034, "step": 7985 }, { "epoch": 0.24475910261125414, "grad_norm": 1.7917961988324247, "learning_rate": 8.83853948916293e-06, "loss": 0.6808, "step": 7986 }, { "epoch": 0.24478975113399534, "grad_norm": 1.871489170515039, "learning_rate": 8.838221429803381e-06, "loss": 0.7326, "step": 7987 }, { "epoch": 0.24482039965673655, "grad_norm": 1.6702175590870718, "learning_rate": 8.837903332624855e-06, "loss": 0.6812, "step": 7988 }, { "epoch": 0.24485104817947775, "grad_norm": 1.8134931694739158, "learning_rate": 8.837585197630483e-06, "loss": 0.7379, "step": 7989 }, { "epoch": 0.24488169670221896, "grad_norm": 2.0082388044993724, "learning_rate": 8.837267024823404e-06, "loss": 0.713, "step": 7990 }, { "epoch": 0.24491234522496017, "grad_norm": 1.8410771407823308, "learning_rate": 8.836948814206751e-06, "loss": 0.6607, "step": 7991 }, { "epoch": 0.24494299374770137, "grad_norm": 1.745148739359042, "learning_rate": 8.83663056578366e-06, "loss": 0.7057, "step": 7992 }, { "epoch": 0.24497364227044258, "grad_norm": 1.6580785340925759, "learning_rate": 8.836312279557264e-06, "loss": 0.663, "step": 7993 }, { "epoch": 0.24500429079318378, "grad_norm": 1.976297761336539, "learning_rate": 8.835993955530704e-06, "loss": 0.7288, "step": 7994 }, { "epoch": 0.24503493931592496, "grad_norm": 1.5964860229734923, "learning_rate": 8.835675593707113e-06, "loss": 0.7018, "step": 7995 }, { "epoch": 0.24506558783866617, "grad_norm": 1.7531683886770775, "learning_rate": 8.83535719408963e-06, "loss": 0.7296, "step": 7996 }, { "epoch": 0.24509623636140737, "grad_norm": 1.8877884594947731, "learning_rate": 8.83503875668139e-06, "loss": 0.6852, "step": 7997 }, { "epoch": 0.24512688488414858, "grad_norm": 1.9427233805460684, "learning_rate": 8.834720281485533e-06, "loss": 0.7654, "step": 7998 }, { "epoch": 0.24515753340688978, "grad_norm": 1.7302660609281746, "learning_rate": 8.834401768505194e-06, "loss": 0.7062, "step": 7999 }, { "epoch": 0.245188181929631, "grad_norm": 1.7159207587006038, "learning_rate": 8.834083217743516e-06, "loss": 0.6646, "step": 8000 }, { "epoch": 0.2452188304523722, "grad_norm": 1.828903659312408, "learning_rate": 8.833764629203634e-06, "loss": 0.7411, "step": 8001 }, { "epoch": 0.2452494789751134, "grad_norm": 1.6804320267053996, "learning_rate": 8.833446002888689e-06, "loss": 0.6629, "step": 8002 }, { "epoch": 0.2452801274978546, "grad_norm": 1.8777956177191655, "learning_rate": 8.833127338801818e-06, "loss": 0.7866, "step": 8003 }, { "epoch": 0.2453107760205958, "grad_norm": 1.8918128045691969, "learning_rate": 8.832808636946162e-06, "loss": 0.6645, "step": 8004 }, { "epoch": 0.24534142454333702, "grad_norm": 1.6312506889632612, "learning_rate": 8.832489897324863e-06, "loss": 0.6705, "step": 8005 }, { "epoch": 0.24537207306607822, "grad_norm": 1.7893780704653304, "learning_rate": 8.83217111994106e-06, "loss": 0.7855, "step": 8006 }, { "epoch": 0.24540272158881943, "grad_norm": 1.50274221587021, "learning_rate": 8.831852304797896e-06, "loss": 0.5396, "step": 8007 }, { "epoch": 0.24543337011156063, "grad_norm": 1.6768894084621193, "learning_rate": 8.831533451898508e-06, "loss": 0.7364, "step": 8008 }, { "epoch": 0.24546401863430184, "grad_norm": 1.925880737520859, "learning_rate": 8.831214561246044e-06, "loss": 0.7163, "step": 8009 }, { "epoch": 0.24549466715704302, "grad_norm": 1.7615683259058121, "learning_rate": 8.830895632843641e-06, "loss": 0.6969, "step": 8010 }, { "epoch": 0.24552531567978422, "grad_norm": 1.5489840283278447, "learning_rate": 8.830576666694443e-06, "loss": 0.6547, "step": 8011 }, { "epoch": 0.24555596420252543, "grad_norm": 1.7814653414553923, "learning_rate": 8.830257662801594e-06, "loss": 0.6992, "step": 8012 }, { "epoch": 0.24558661272526663, "grad_norm": 2.154353675445491, "learning_rate": 8.829938621168234e-06, "loss": 0.7685, "step": 8013 }, { "epoch": 0.24561726124800784, "grad_norm": 1.085678926980741, "learning_rate": 8.829619541797511e-06, "loss": 0.4932, "step": 8014 }, { "epoch": 0.24564790977074905, "grad_norm": 1.6221629001502402, "learning_rate": 8.829300424692566e-06, "loss": 0.7352, "step": 8015 }, { "epoch": 0.24567855829349025, "grad_norm": 1.5428521078971296, "learning_rate": 8.828981269856543e-06, "loss": 0.6513, "step": 8016 }, { "epoch": 0.24570920681623146, "grad_norm": 1.715657416815919, "learning_rate": 8.828662077292588e-06, "loss": 0.7309, "step": 8017 }, { "epoch": 0.24573985533897266, "grad_norm": 1.7274204644539477, "learning_rate": 8.828342847003848e-06, "loss": 0.8097, "step": 8018 }, { "epoch": 0.24577050386171387, "grad_norm": 1.7367096721031674, "learning_rate": 8.828023578993466e-06, "loss": 0.7066, "step": 8019 }, { "epoch": 0.24580115238445507, "grad_norm": 2.0295865126118504, "learning_rate": 8.827704273264588e-06, "loss": 0.6796, "step": 8020 }, { "epoch": 0.24583180090719628, "grad_norm": 1.7204259765651797, "learning_rate": 8.82738492982036e-06, "loss": 0.6363, "step": 8021 }, { "epoch": 0.24586244942993749, "grad_norm": 1.6646845919950077, "learning_rate": 8.82706554866393e-06, "loss": 0.7221, "step": 8022 }, { "epoch": 0.2458930979526787, "grad_norm": 1.5413961423025662, "learning_rate": 8.826746129798442e-06, "loss": 0.6591, "step": 8023 }, { "epoch": 0.2459237464754199, "grad_norm": 1.74270464815785, "learning_rate": 8.826426673227047e-06, "loss": 0.7214, "step": 8024 }, { "epoch": 0.2459543949981611, "grad_norm": 1.578001911740241, "learning_rate": 8.826107178952889e-06, "loss": 0.7302, "step": 8025 }, { "epoch": 0.24598504352090228, "grad_norm": 1.8379219412436318, "learning_rate": 8.825787646979119e-06, "loss": 0.7209, "step": 8026 }, { "epoch": 0.24601569204364349, "grad_norm": 1.7168398724106444, "learning_rate": 8.825468077308885e-06, "loss": 0.7645, "step": 8027 }, { "epoch": 0.2460463405663847, "grad_norm": 1.0633223877679365, "learning_rate": 8.825148469945335e-06, "loss": 0.4798, "step": 8028 }, { "epoch": 0.2460769890891259, "grad_norm": 1.908272408255897, "learning_rate": 8.824828824891618e-06, "loss": 0.73, "step": 8029 }, { "epoch": 0.2461076376118671, "grad_norm": 1.9130739822265423, "learning_rate": 8.824509142150885e-06, "loss": 0.7059, "step": 8030 }, { "epoch": 0.2461382861346083, "grad_norm": 1.8946705854652655, "learning_rate": 8.824189421726284e-06, "loss": 0.7046, "step": 8031 }, { "epoch": 0.24616893465734951, "grad_norm": 1.780757457810362, "learning_rate": 8.823869663620967e-06, "loss": 0.7575, "step": 8032 }, { "epoch": 0.24619958318009072, "grad_norm": 1.5833455739258302, "learning_rate": 8.823549867838082e-06, "loss": 0.7024, "step": 8033 }, { "epoch": 0.24623023170283193, "grad_norm": 1.6035741215604673, "learning_rate": 8.823230034380784e-06, "loss": 0.6304, "step": 8034 }, { "epoch": 0.24626088022557313, "grad_norm": 1.8593356877728557, "learning_rate": 8.822910163252221e-06, "loss": 0.7271, "step": 8035 }, { "epoch": 0.24629152874831434, "grad_norm": 1.7501212004565672, "learning_rate": 8.822590254455547e-06, "loss": 0.7989, "step": 8036 }, { "epoch": 0.24632217727105554, "grad_norm": 1.597393767964925, "learning_rate": 8.822270307993912e-06, "loss": 0.5857, "step": 8037 }, { "epoch": 0.24635282579379675, "grad_norm": 1.9412929215282402, "learning_rate": 8.82195032387047e-06, "loss": 0.7697, "step": 8038 }, { "epoch": 0.24638347431653795, "grad_norm": 1.6334101462723745, "learning_rate": 8.821630302088374e-06, "loss": 0.6273, "step": 8039 }, { "epoch": 0.24641412283927916, "grad_norm": 1.6608878080382592, "learning_rate": 8.821310242650776e-06, "loss": 0.6628, "step": 8040 }, { "epoch": 0.24644477136202034, "grad_norm": 1.6253787309443626, "learning_rate": 8.82099014556083e-06, "loss": 0.7116, "step": 8041 }, { "epoch": 0.24647541988476154, "grad_norm": 1.924294584305308, "learning_rate": 8.820670010821693e-06, "loss": 0.7398, "step": 8042 }, { "epoch": 0.24650606840750275, "grad_norm": 1.7848870937413674, "learning_rate": 8.820349838436515e-06, "loss": 0.7588, "step": 8043 }, { "epoch": 0.24653671693024395, "grad_norm": 1.1218684962649488, "learning_rate": 8.820029628408453e-06, "loss": 0.4891, "step": 8044 }, { "epoch": 0.24656736545298516, "grad_norm": 1.9468870280657689, "learning_rate": 8.819709380740662e-06, "loss": 0.7178, "step": 8045 }, { "epoch": 0.24659801397572637, "grad_norm": 2.1336816009410384, "learning_rate": 8.819389095436295e-06, "loss": 0.7512, "step": 8046 }, { "epoch": 0.24662866249846757, "grad_norm": 1.6905705883273028, "learning_rate": 8.819068772498514e-06, "loss": 0.7063, "step": 8047 }, { "epoch": 0.24665931102120878, "grad_norm": 1.6237807074468171, "learning_rate": 8.818748411930468e-06, "loss": 0.704, "step": 8048 }, { "epoch": 0.24668995954394998, "grad_norm": 1.8139199245143738, "learning_rate": 8.818428013735319e-06, "loss": 0.7588, "step": 8049 }, { "epoch": 0.2467206080666912, "grad_norm": 1.6398101270657128, "learning_rate": 8.81810757791622e-06, "loss": 0.7238, "step": 8050 }, { "epoch": 0.2467512565894324, "grad_norm": 1.7200380688321009, "learning_rate": 8.81778710447633e-06, "loss": 0.7312, "step": 8051 }, { "epoch": 0.2467819051121736, "grad_norm": 1.7154601116773878, "learning_rate": 8.817466593418808e-06, "loss": 0.6267, "step": 8052 }, { "epoch": 0.2468125536349148, "grad_norm": 2.020308154116351, "learning_rate": 8.817146044746809e-06, "loss": 0.7205, "step": 8053 }, { "epoch": 0.246843202157656, "grad_norm": 0.9549264731421447, "learning_rate": 8.816825458463496e-06, "loss": 0.4814, "step": 8054 }, { "epoch": 0.24687385068039722, "grad_norm": 1.7975205159350511, "learning_rate": 8.816504834572024e-06, "loss": 0.726, "step": 8055 }, { "epoch": 0.24690449920313842, "grad_norm": 0.824066236344961, "learning_rate": 8.816184173075553e-06, "loss": 0.5032, "step": 8056 }, { "epoch": 0.2469351477258796, "grad_norm": 1.7930192610336615, "learning_rate": 8.815863473977244e-06, "loss": 0.7163, "step": 8057 }, { "epoch": 0.2469657962486208, "grad_norm": 1.9451412750348747, "learning_rate": 8.815542737280254e-06, "loss": 0.842, "step": 8058 }, { "epoch": 0.246996444771362, "grad_norm": 1.906914849331054, "learning_rate": 8.815221962987747e-06, "loss": 0.7484, "step": 8059 }, { "epoch": 0.24702709329410322, "grad_norm": 1.7759992635799693, "learning_rate": 8.814901151102882e-06, "loss": 0.7076, "step": 8060 }, { "epoch": 0.24705774181684442, "grad_norm": 1.7504604877509267, "learning_rate": 8.814580301628818e-06, "loss": 0.7225, "step": 8061 }, { "epoch": 0.24708839033958563, "grad_norm": 1.6880965837909796, "learning_rate": 8.81425941456872e-06, "loss": 0.7485, "step": 8062 }, { "epoch": 0.24711903886232683, "grad_norm": 0.8180802853882532, "learning_rate": 8.813938489925747e-06, "loss": 0.4822, "step": 8063 }, { "epoch": 0.24714968738506804, "grad_norm": 1.8349195427446265, "learning_rate": 8.813617527703062e-06, "loss": 0.6452, "step": 8064 }, { "epoch": 0.24718033590780925, "grad_norm": 2.0834182069815443, "learning_rate": 8.813296527903828e-06, "loss": 0.7657, "step": 8065 }, { "epoch": 0.24721098443055045, "grad_norm": 1.833911492211358, "learning_rate": 8.812975490531208e-06, "loss": 0.6459, "step": 8066 }, { "epoch": 0.24724163295329166, "grad_norm": 1.6210590788830959, "learning_rate": 8.812654415588366e-06, "loss": 0.7199, "step": 8067 }, { "epoch": 0.24727228147603286, "grad_norm": 1.7729579535881799, "learning_rate": 8.812333303078462e-06, "loss": 0.6381, "step": 8068 }, { "epoch": 0.24730292999877407, "grad_norm": 1.787819578109249, "learning_rate": 8.812012153004665e-06, "loss": 0.724, "step": 8069 }, { "epoch": 0.24733357852151527, "grad_norm": 1.8636070855956623, "learning_rate": 8.811690965370135e-06, "loss": 0.8098, "step": 8070 }, { "epoch": 0.24736422704425648, "grad_norm": 0.8757204077897007, "learning_rate": 8.81136974017804e-06, "loss": 0.503, "step": 8071 }, { "epoch": 0.24739487556699766, "grad_norm": 0.8296041602401055, "learning_rate": 8.811048477431543e-06, "loss": 0.4728, "step": 8072 }, { "epoch": 0.24742552408973886, "grad_norm": 1.7371236791374447, "learning_rate": 8.810727177133811e-06, "loss": 0.7976, "step": 8073 }, { "epoch": 0.24745617261248007, "grad_norm": 1.84858500111401, "learning_rate": 8.810405839288008e-06, "loss": 0.6569, "step": 8074 }, { "epoch": 0.24748682113522127, "grad_norm": 1.634642631384614, "learning_rate": 8.810084463897302e-06, "loss": 0.6719, "step": 8075 }, { "epoch": 0.24751746965796248, "grad_norm": 1.7618898404482233, "learning_rate": 8.80976305096486e-06, "loss": 0.6692, "step": 8076 }, { "epoch": 0.24754811818070369, "grad_norm": 1.7048278322314527, "learning_rate": 8.809441600493846e-06, "loss": 0.6902, "step": 8077 }, { "epoch": 0.2475787667034449, "grad_norm": 0.9194366410593979, "learning_rate": 8.80912011248743e-06, "loss": 0.4912, "step": 8078 }, { "epoch": 0.2476094152261861, "grad_norm": 1.6605733817072015, "learning_rate": 8.80879858694878e-06, "loss": 0.6199, "step": 8079 }, { "epoch": 0.2476400637489273, "grad_norm": 1.646999820046466, "learning_rate": 8.808477023881061e-06, "loss": 0.6749, "step": 8080 }, { "epoch": 0.2476707122716685, "grad_norm": 0.8039230149470897, "learning_rate": 8.808155423287444e-06, "loss": 0.4741, "step": 8081 }, { "epoch": 0.24770136079440971, "grad_norm": 1.8606754712434794, "learning_rate": 8.807833785171098e-06, "loss": 0.7595, "step": 8082 }, { "epoch": 0.24773200931715092, "grad_norm": 0.8108455884815702, "learning_rate": 8.807512109535192e-06, "loss": 0.4653, "step": 8083 }, { "epoch": 0.24776265783989213, "grad_norm": 1.6108879801395906, "learning_rate": 8.807190396382893e-06, "loss": 0.667, "step": 8084 }, { "epoch": 0.24779330636263333, "grad_norm": 1.8486720364561011, "learning_rate": 8.806868645717374e-06, "loss": 0.6752, "step": 8085 }, { "epoch": 0.24782395488537454, "grad_norm": 1.7396994392325402, "learning_rate": 8.806546857541804e-06, "loss": 0.7266, "step": 8086 }, { "epoch": 0.24785460340811574, "grad_norm": 1.8732019164657232, "learning_rate": 8.806225031859354e-06, "loss": 0.7456, "step": 8087 }, { "epoch": 0.24788525193085692, "grad_norm": 1.7488859775991634, "learning_rate": 8.805903168673196e-06, "loss": 0.7313, "step": 8088 }, { "epoch": 0.24791590045359813, "grad_norm": 1.6303549400081747, "learning_rate": 8.805581267986499e-06, "loss": 0.6999, "step": 8089 }, { "epoch": 0.24794654897633933, "grad_norm": 1.5845065237565525, "learning_rate": 8.805259329802435e-06, "loss": 0.6666, "step": 8090 }, { "epoch": 0.24797719749908054, "grad_norm": 1.641344033965189, "learning_rate": 8.80493735412418e-06, "loss": 0.7153, "step": 8091 }, { "epoch": 0.24800784602182174, "grad_norm": 1.9342264404231848, "learning_rate": 8.804615340954901e-06, "loss": 0.7707, "step": 8092 }, { "epoch": 0.24803849454456295, "grad_norm": 1.6237903789331474, "learning_rate": 8.804293290297777e-06, "loss": 0.6972, "step": 8093 }, { "epoch": 0.24806914306730415, "grad_norm": 1.7733389906725845, "learning_rate": 8.803971202155975e-06, "loss": 0.6776, "step": 8094 }, { "epoch": 0.24809979159004536, "grad_norm": 1.6779057403607223, "learning_rate": 8.803649076532672e-06, "loss": 0.7671, "step": 8095 }, { "epoch": 0.24813044011278657, "grad_norm": 1.624910838015011, "learning_rate": 8.80332691343104e-06, "loss": 0.7072, "step": 8096 }, { "epoch": 0.24816108863552777, "grad_norm": 1.6593531141744151, "learning_rate": 8.803004712854258e-06, "loss": 0.6506, "step": 8097 }, { "epoch": 0.24819173715826898, "grad_norm": 1.7800167468958394, "learning_rate": 8.802682474805495e-06, "loss": 0.7219, "step": 8098 }, { "epoch": 0.24822238568101018, "grad_norm": 1.6643312557521797, "learning_rate": 8.80236019928793e-06, "loss": 0.7537, "step": 8099 }, { "epoch": 0.2482530342037514, "grad_norm": 1.8112247012058549, "learning_rate": 8.802037886304736e-06, "loss": 0.6496, "step": 8100 }, { "epoch": 0.2482836827264926, "grad_norm": 1.7057185025596442, "learning_rate": 8.80171553585909e-06, "loss": 0.683, "step": 8101 }, { "epoch": 0.2483143312492338, "grad_norm": 1.6293941995008292, "learning_rate": 8.80139314795417e-06, "loss": 0.7514, "step": 8102 }, { "epoch": 0.24834497977197498, "grad_norm": 1.5232707284630618, "learning_rate": 8.801070722593147e-06, "loss": 0.7463, "step": 8103 }, { "epoch": 0.24837562829471618, "grad_norm": 1.5865403100536668, "learning_rate": 8.800748259779206e-06, "loss": 0.6583, "step": 8104 }, { "epoch": 0.2484062768174574, "grad_norm": 1.6668793419158758, "learning_rate": 8.800425759515517e-06, "loss": 0.7488, "step": 8105 }, { "epoch": 0.2484369253401986, "grad_norm": 1.7115498811486738, "learning_rate": 8.800103221805261e-06, "loss": 0.683, "step": 8106 }, { "epoch": 0.2484675738629398, "grad_norm": 1.0063081100720115, "learning_rate": 8.799780646651617e-06, "loss": 0.4699, "step": 8107 }, { "epoch": 0.248498222385681, "grad_norm": 2.1437879455006073, "learning_rate": 8.799458034057761e-06, "loss": 0.8035, "step": 8108 }, { "epoch": 0.2485288709084222, "grad_norm": 1.8654765602390526, "learning_rate": 8.799135384026874e-06, "loss": 0.7081, "step": 8109 }, { "epoch": 0.24855951943116342, "grad_norm": 1.604386897150601, "learning_rate": 8.798812696562132e-06, "loss": 0.7015, "step": 8110 }, { "epoch": 0.24859016795390462, "grad_norm": 1.7495979755025188, "learning_rate": 8.798489971666717e-06, "loss": 0.7362, "step": 8111 }, { "epoch": 0.24862081647664583, "grad_norm": 1.890923968420445, "learning_rate": 8.798167209343811e-06, "loss": 0.7201, "step": 8112 }, { "epoch": 0.24865146499938703, "grad_norm": 1.8272539958772693, "learning_rate": 8.79784440959659e-06, "loss": 0.6812, "step": 8113 }, { "epoch": 0.24868211352212824, "grad_norm": 1.8362982460947905, "learning_rate": 8.797521572428234e-06, "loss": 0.7149, "step": 8114 }, { "epoch": 0.24871276204486945, "grad_norm": 1.5871885591816028, "learning_rate": 8.79719869784193e-06, "loss": 0.6701, "step": 8115 }, { "epoch": 0.24874341056761065, "grad_norm": 1.5177388331232216, "learning_rate": 8.796875785840853e-06, "loss": 0.6792, "step": 8116 }, { "epoch": 0.24877405909035186, "grad_norm": 1.6861062136228848, "learning_rate": 8.796552836428188e-06, "loss": 0.6554, "step": 8117 }, { "epoch": 0.24880470761309306, "grad_norm": 1.486216567048406, "learning_rate": 8.796229849607116e-06, "loss": 0.6737, "step": 8118 }, { "epoch": 0.24883535613583424, "grad_norm": 1.6080475721668437, "learning_rate": 8.795906825380821e-06, "loss": 0.6267, "step": 8119 }, { "epoch": 0.24886600465857545, "grad_norm": 1.7221002485390313, "learning_rate": 8.795583763752486e-06, "loss": 0.7256, "step": 8120 }, { "epoch": 0.24889665318131665, "grad_norm": 1.6005269719873279, "learning_rate": 8.795260664725291e-06, "loss": 0.6601, "step": 8121 }, { "epoch": 0.24892730170405786, "grad_norm": 1.688612334546882, "learning_rate": 8.794937528302422e-06, "loss": 0.6979, "step": 8122 }, { "epoch": 0.24895795022679906, "grad_norm": 1.6995791968154887, "learning_rate": 8.794614354487063e-06, "loss": 0.6512, "step": 8123 }, { "epoch": 0.24898859874954027, "grad_norm": 1.7375649941180298, "learning_rate": 8.794291143282398e-06, "loss": 0.6611, "step": 8124 }, { "epoch": 0.24901924727228147, "grad_norm": 1.818864755727158, "learning_rate": 8.793967894691612e-06, "loss": 0.698, "step": 8125 }, { "epoch": 0.24904989579502268, "grad_norm": 1.7101611371028524, "learning_rate": 8.793644608717888e-06, "loss": 0.6335, "step": 8126 }, { "epoch": 0.24908054431776389, "grad_norm": 1.7119103396908064, "learning_rate": 8.793321285364416e-06, "loss": 0.6923, "step": 8127 }, { "epoch": 0.2491111928405051, "grad_norm": 1.7560670780132992, "learning_rate": 8.792997924634376e-06, "loss": 0.6371, "step": 8128 }, { "epoch": 0.2491418413632463, "grad_norm": 1.6482877191684315, "learning_rate": 8.792674526530957e-06, "loss": 0.637, "step": 8129 }, { "epoch": 0.2491724898859875, "grad_norm": 2.1016266214028443, "learning_rate": 8.792351091057348e-06, "loss": 0.8843, "step": 8130 }, { "epoch": 0.2492031384087287, "grad_norm": 1.6273333789583677, "learning_rate": 8.792027618216731e-06, "loss": 0.6632, "step": 8131 }, { "epoch": 0.24923378693146991, "grad_norm": 1.785935000285777, "learning_rate": 8.791704108012295e-06, "loss": 0.7459, "step": 8132 }, { "epoch": 0.24926443545421112, "grad_norm": 1.8436676070897169, "learning_rate": 8.791380560447231e-06, "loss": 0.7509, "step": 8133 }, { "epoch": 0.2492950839769523, "grad_norm": 2.0695827643833584, "learning_rate": 8.791056975524722e-06, "loss": 0.7703, "step": 8134 }, { "epoch": 0.2493257324996935, "grad_norm": 1.7651411067896026, "learning_rate": 8.79073335324796e-06, "loss": 0.7208, "step": 8135 }, { "epoch": 0.2493563810224347, "grad_norm": 1.0489514162792806, "learning_rate": 8.790409693620132e-06, "loss": 0.4844, "step": 8136 }, { "epoch": 0.24938702954517591, "grad_norm": 1.5950323131073934, "learning_rate": 8.790085996644426e-06, "loss": 0.7027, "step": 8137 }, { "epoch": 0.24941767806791712, "grad_norm": 1.669429751117589, "learning_rate": 8.789762262324035e-06, "loss": 0.7274, "step": 8138 }, { "epoch": 0.24944832659065833, "grad_norm": 1.7399541810651085, "learning_rate": 8.789438490662146e-06, "loss": 0.683, "step": 8139 }, { "epoch": 0.24947897511339953, "grad_norm": 0.8158320755806553, "learning_rate": 8.78911468166195e-06, "loss": 0.4722, "step": 8140 }, { "epoch": 0.24950962363614074, "grad_norm": 0.7660723634566271, "learning_rate": 8.788790835326637e-06, "loss": 0.4821, "step": 8141 }, { "epoch": 0.24954027215888194, "grad_norm": 1.7908154717399816, "learning_rate": 8.7884669516594e-06, "loss": 0.6775, "step": 8142 }, { "epoch": 0.24957092068162315, "grad_norm": 1.6755110832429814, "learning_rate": 8.788143030663427e-06, "loss": 0.7292, "step": 8143 }, { "epoch": 0.24960156920436435, "grad_norm": 1.606974487648688, "learning_rate": 8.787819072341914e-06, "loss": 0.6495, "step": 8144 }, { "epoch": 0.24963221772710556, "grad_norm": 2.226780894722849, "learning_rate": 8.787495076698049e-06, "loss": 0.6802, "step": 8145 }, { "epoch": 0.24966286624984677, "grad_norm": 0.9354319240718054, "learning_rate": 8.787171043735025e-06, "loss": 0.474, "step": 8146 }, { "epoch": 0.24969351477258797, "grad_norm": 1.9008648492028457, "learning_rate": 8.786846973456036e-06, "loss": 0.7644, "step": 8147 }, { "epoch": 0.24972416329532918, "grad_norm": 1.9671117277050176, "learning_rate": 8.786522865864275e-06, "loss": 0.8012, "step": 8148 }, { "epoch": 0.24975481181807038, "grad_norm": 1.8203806563774312, "learning_rate": 8.786198720962937e-06, "loss": 0.7702, "step": 8149 }, { "epoch": 0.24978546034081156, "grad_norm": 1.743996173674167, "learning_rate": 8.785874538755212e-06, "loss": 0.7765, "step": 8150 }, { "epoch": 0.24981610886355277, "grad_norm": 1.5812807604108041, "learning_rate": 8.785550319244298e-06, "loss": 0.64, "step": 8151 }, { "epoch": 0.24984675738629397, "grad_norm": 2.0274197904731794, "learning_rate": 8.785226062433387e-06, "loss": 0.6955, "step": 8152 }, { "epoch": 0.24987740590903518, "grad_norm": 1.7121658790914607, "learning_rate": 8.784901768325676e-06, "loss": 0.7778, "step": 8153 }, { "epoch": 0.24990805443177638, "grad_norm": 0.9731024984712795, "learning_rate": 8.784577436924359e-06, "loss": 0.4603, "step": 8154 }, { "epoch": 0.2499387029545176, "grad_norm": 1.9066827604145415, "learning_rate": 8.784253068232634e-06, "loss": 0.7787, "step": 8155 }, { "epoch": 0.2499693514772588, "grad_norm": 1.6848128605170574, "learning_rate": 8.783928662253693e-06, "loss": 0.7419, "step": 8156 }, { "epoch": 0.25, "grad_norm": 1.7387493538107448, "learning_rate": 8.783604218990735e-06, "loss": 0.7048, "step": 8157 }, { "epoch": 0.2500306485227412, "grad_norm": 1.611657850609936, "learning_rate": 8.783279738446957e-06, "loss": 0.6691, "step": 8158 }, { "epoch": 0.2500612970454824, "grad_norm": 1.7170447168162335, "learning_rate": 8.782955220625556e-06, "loss": 0.7266, "step": 8159 }, { "epoch": 0.2500919455682236, "grad_norm": 1.706139617991794, "learning_rate": 8.78263066552973e-06, "loss": 0.7313, "step": 8160 }, { "epoch": 0.2501225940909648, "grad_norm": 1.7930833030209823, "learning_rate": 8.782306073162674e-06, "loss": 0.6645, "step": 8161 }, { "epoch": 0.25015324261370603, "grad_norm": 1.8582458425705777, "learning_rate": 8.78198144352759e-06, "loss": 0.679, "step": 8162 }, { "epoch": 0.25018389113644723, "grad_norm": 1.888517741309403, "learning_rate": 8.781656776627674e-06, "loss": 0.7583, "step": 8163 }, { "epoch": 0.25021453965918844, "grad_norm": 2.085155758029387, "learning_rate": 8.78133207246613e-06, "loss": 0.8652, "step": 8164 }, { "epoch": 0.25024518818192965, "grad_norm": 1.670592341576558, "learning_rate": 8.78100733104615e-06, "loss": 0.6159, "step": 8165 }, { "epoch": 0.25027583670467085, "grad_norm": 1.6629622438669016, "learning_rate": 8.780682552370937e-06, "loss": 0.7593, "step": 8166 }, { "epoch": 0.25030648522741206, "grad_norm": 1.0174974009887146, "learning_rate": 8.780357736443693e-06, "loss": 0.4958, "step": 8167 }, { "epoch": 0.25033713375015326, "grad_norm": 1.7628717827119185, "learning_rate": 8.780032883267617e-06, "loss": 0.7557, "step": 8168 }, { "epoch": 0.25036778227289447, "grad_norm": 0.8084029416295773, "learning_rate": 8.779707992845909e-06, "loss": 0.4752, "step": 8169 }, { "epoch": 0.2503984307956357, "grad_norm": 0.7979079743431448, "learning_rate": 8.779383065181772e-06, "loss": 0.5014, "step": 8170 }, { "epoch": 0.2504290793183769, "grad_norm": 1.4804405823958047, "learning_rate": 8.779058100278407e-06, "loss": 0.6498, "step": 8171 }, { "epoch": 0.2504597278411181, "grad_norm": 1.6783741844980617, "learning_rate": 8.778733098139014e-06, "loss": 0.7676, "step": 8172 }, { "epoch": 0.25049037636385924, "grad_norm": 1.6078346803198134, "learning_rate": 8.778408058766796e-06, "loss": 0.6572, "step": 8173 }, { "epoch": 0.25052102488660044, "grad_norm": 0.974346512454923, "learning_rate": 8.778082982164959e-06, "loss": 0.4752, "step": 8174 }, { "epoch": 0.25055167340934165, "grad_norm": 1.7907482833604942, "learning_rate": 8.777757868336703e-06, "loss": 0.7784, "step": 8175 }, { "epoch": 0.25058232193208285, "grad_norm": 1.838933913203009, "learning_rate": 8.777432717285232e-06, "loss": 0.6834, "step": 8176 }, { "epoch": 0.25061297045482406, "grad_norm": 1.9728130276071572, "learning_rate": 8.777107529013751e-06, "loss": 0.7776, "step": 8177 }, { "epoch": 0.25064361897756526, "grad_norm": 1.757887785278414, "learning_rate": 8.776782303525462e-06, "loss": 0.667, "step": 8178 }, { "epoch": 0.25067426750030647, "grad_norm": 1.7018872744337081, "learning_rate": 8.776457040823572e-06, "loss": 0.7176, "step": 8179 }, { "epoch": 0.2507049160230477, "grad_norm": 1.8967408895938391, "learning_rate": 8.776131740911283e-06, "loss": 0.7582, "step": 8180 }, { "epoch": 0.2507355645457889, "grad_norm": 1.6178065143960134, "learning_rate": 8.775806403791802e-06, "loss": 0.6689, "step": 8181 }, { "epoch": 0.2507662130685301, "grad_norm": 0.8463481179078944, "learning_rate": 8.775481029468334e-06, "loss": 0.4747, "step": 8182 }, { "epoch": 0.2507968615912713, "grad_norm": 1.805977631354502, "learning_rate": 8.775155617944087e-06, "loss": 0.6889, "step": 8183 }, { "epoch": 0.2508275101140125, "grad_norm": 1.8703418288419706, "learning_rate": 8.774830169222263e-06, "loss": 0.7077, "step": 8184 }, { "epoch": 0.2508581586367537, "grad_norm": 1.7707433979291214, "learning_rate": 8.774504683306076e-06, "loss": 0.7087, "step": 8185 }, { "epoch": 0.2508888071594949, "grad_norm": 2.013909643949386, "learning_rate": 8.774179160198725e-06, "loss": 0.7301, "step": 8186 }, { "epoch": 0.2509194556822361, "grad_norm": 2.1668065318121674, "learning_rate": 8.773853599903422e-06, "loss": 0.7039, "step": 8187 }, { "epoch": 0.2509501042049773, "grad_norm": 0.9274470922401972, "learning_rate": 8.773528002423373e-06, "loss": 0.4854, "step": 8188 }, { "epoch": 0.2509807527277185, "grad_norm": 1.6340687693893292, "learning_rate": 8.773202367761788e-06, "loss": 0.8318, "step": 8189 }, { "epoch": 0.25101140125045973, "grad_norm": 1.6539129392252416, "learning_rate": 8.772876695921874e-06, "loss": 0.813, "step": 8190 }, { "epoch": 0.25104204977320094, "grad_norm": 1.8687013059307644, "learning_rate": 8.772550986906843e-06, "loss": 0.6456, "step": 8191 }, { "epoch": 0.25107269829594214, "grad_norm": 2.192829487211569, "learning_rate": 8.7722252407199e-06, "loss": 0.6596, "step": 8192 }, { "epoch": 0.25110334681868335, "grad_norm": 0.8504424880010482, "learning_rate": 8.771899457364256e-06, "loss": 0.4762, "step": 8193 }, { "epoch": 0.25113399534142455, "grad_norm": 2.037314422551662, "learning_rate": 8.771573636843123e-06, "loss": 0.7456, "step": 8194 }, { "epoch": 0.25116464386416576, "grad_norm": 1.643845185104976, "learning_rate": 8.771247779159708e-06, "loss": 0.7417, "step": 8195 }, { "epoch": 0.25119529238690697, "grad_norm": 1.8064172119136421, "learning_rate": 8.770921884317225e-06, "loss": 0.7511, "step": 8196 }, { "epoch": 0.25122594090964817, "grad_norm": 1.9012556350400156, "learning_rate": 8.770595952318885e-06, "loss": 0.7458, "step": 8197 }, { "epoch": 0.2512565894323894, "grad_norm": 1.812873854619686, "learning_rate": 8.770269983167896e-06, "loss": 0.7851, "step": 8198 }, { "epoch": 0.2512872379551306, "grad_norm": 1.9789564468626701, "learning_rate": 8.769943976867473e-06, "loss": 0.6821, "step": 8199 }, { "epoch": 0.2513178864778718, "grad_norm": 2.069635542925194, "learning_rate": 8.76961793342083e-06, "loss": 0.7192, "step": 8200 }, { "epoch": 0.251348535000613, "grad_norm": 1.921008008944625, "learning_rate": 8.769291852831172e-06, "loss": 0.7794, "step": 8201 }, { "epoch": 0.2513791835233542, "grad_norm": 0.8282896500383892, "learning_rate": 8.76896573510172e-06, "loss": 0.4611, "step": 8202 }, { "epoch": 0.2514098320460954, "grad_norm": 0.8573182664049317, "learning_rate": 8.768639580235685e-06, "loss": 0.4802, "step": 8203 }, { "epoch": 0.25144048056883656, "grad_norm": 0.8091192037748193, "learning_rate": 8.768313388236278e-06, "loss": 0.4983, "step": 8204 }, { "epoch": 0.25147112909157776, "grad_norm": 1.9615884055017265, "learning_rate": 8.767987159106717e-06, "loss": 0.8018, "step": 8205 }, { "epoch": 0.25150177761431897, "grad_norm": 1.7776841390516476, "learning_rate": 8.767660892850214e-06, "loss": 0.7857, "step": 8206 }, { "epoch": 0.25153242613706017, "grad_norm": 1.5838204722958238, "learning_rate": 8.767334589469982e-06, "loss": 0.57, "step": 8207 }, { "epoch": 0.2515630746598014, "grad_norm": 1.48235702700454, "learning_rate": 8.76700824896924e-06, "loss": 0.6656, "step": 8208 }, { "epoch": 0.2515937231825426, "grad_norm": 1.8555142208456803, "learning_rate": 8.766681871351202e-06, "loss": 0.7462, "step": 8209 }, { "epoch": 0.2516243717052838, "grad_norm": 1.6489534878269785, "learning_rate": 8.766355456619085e-06, "loss": 0.7315, "step": 8210 }, { "epoch": 0.251655020228025, "grad_norm": 1.7664394913818218, "learning_rate": 8.766029004776102e-06, "loss": 0.741, "step": 8211 }, { "epoch": 0.2516856687507662, "grad_norm": 2.0911168673781604, "learning_rate": 8.765702515825472e-06, "loss": 0.6413, "step": 8212 }, { "epoch": 0.2517163172735074, "grad_norm": 1.7440782396855483, "learning_rate": 8.765375989770412e-06, "loss": 0.6666, "step": 8213 }, { "epoch": 0.2517469657962486, "grad_norm": 1.1988887003975797, "learning_rate": 8.765049426614138e-06, "loss": 0.4807, "step": 8214 }, { "epoch": 0.2517776143189898, "grad_norm": 1.926882277046341, "learning_rate": 8.764722826359871e-06, "loss": 0.6693, "step": 8215 }, { "epoch": 0.251808262841731, "grad_norm": 1.9948593652432978, "learning_rate": 8.764396189010824e-06, "loss": 0.7532, "step": 8216 }, { "epoch": 0.25183891136447223, "grad_norm": 1.8323598933230683, "learning_rate": 8.76406951457022e-06, "loss": 0.8217, "step": 8217 }, { "epoch": 0.25186955988721343, "grad_norm": 1.5370188421474722, "learning_rate": 8.763742803041275e-06, "loss": 0.694, "step": 8218 }, { "epoch": 0.25190020840995464, "grad_norm": 1.5846346750336235, "learning_rate": 8.76341605442721e-06, "loss": 0.6859, "step": 8219 }, { "epoch": 0.25193085693269585, "grad_norm": 1.6804769798747972, "learning_rate": 8.763089268731244e-06, "loss": 0.7771, "step": 8220 }, { "epoch": 0.25196150545543705, "grad_norm": 1.7029969325987473, "learning_rate": 8.762762445956595e-06, "loss": 0.6863, "step": 8221 }, { "epoch": 0.25199215397817826, "grad_norm": 1.6443582541998143, "learning_rate": 8.762435586106486e-06, "loss": 0.6981, "step": 8222 }, { "epoch": 0.25202280250091946, "grad_norm": 2.1359923616377188, "learning_rate": 8.762108689184136e-06, "loss": 0.7147, "step": 8223 }, { "epoch": 0.25205345102366067, "grad_norm": 1.5194244211318049, "learning_rate": 8.761781755192767e-06, "loss": 0.5966, "step": 8224 }, { "epoch": 0.2520840995464019, "grad_norm": 1.6034737395569219, "learning_rate": 8.7614547841356e-06, "loss": 0.6927, "step": 8225 }, { "epoch": 0.2521147480691431, "grad_norm": 1.8938661517901905, "learning_rate": 8.761127776015857e-06, "loss": 0.6931, "step": 8226 }, { "epoch": 0.2521453965918843, "grad_norm": 1.731872524263541, "learning_rate": 8.760800730836758e-06, "loss": 0.6708, "step": 8227 }, { "epoch": 0.2521760451146255, "grad_norm": 0.976621854872663, "learning_rate": 8.760473648601528e-06, "loss": 0.4888, "step": 8228 }, { "epoch": 0.2522066936373667, "grad_norm": 1.7767663852087168, "learning_rate": 8.76014652931339e-06, "loss": 0.6859, "step": 8229 }, { "epoch": 0.2522373421601079, "grad_norm": 0.913138626010244, "learning_rate": 8.759819372975565e-06, "loss": 0.4955, "step": 8230 }, { "epoch": 0.2522679906828491, "grad_norm": 1.837701732190321, "learning_rate": 8.759492179591278e-06, "loss": 0.7655, "step": 8231 }, { "epoch": 0.2522986392055903, "grad_norm": 1.7501831626599966, "learning_rate": 8.759164949163752e-06, "loss": 0.746, "step": 8232 }, { "epoch": 0.2523292877283315, "grad_norm": 1.6850112439157803, "learning_rate": 8.758837681696213e-06, "loss": 0.7269, "step": 8233 }, { "epoch": 0.2523599362510727, "grad_norm": 1.866159913242569, "learning_rate": 8.758510377191884e-06, "loss": 0.69, "step": 8234 }, { "epoch": 0.2523905847738139, "grad_norm": 1.6624840662951734, "learning_rate": 8.75818303565399e-06, "loss": 0.6927, "step": 8235 }, { "epoch": 0.2524212332965551, "grad_norm": 1.7397181416295182, "learning_rate": 8.757855657085758e-06, "loss": 0.7279, "step": 8236 }, { "epoch": 0.2524518818192963, "grad_norm": 1.5935995844899413, "learning_rate": 8.757528241490413e-06, "loss": 0.6828, "step": 8237 }, { "epoch": 0.2524825303420375, "grad_norm": 1.7677658219263013, "learning_rate": 8.75720078887118e-06, "loss": 0.7094, "step": 8238 }, { "epoch": 0.2525131788647787, "grad_norm": 1.5714384426715315, "learning_rate": 8.756873299231287e-06, "loss": 0.6689, "step": 8239 }, { "epoch": 0.2525438273875199, "grad_norm": 1.900585319850464, "learning_rate": 8.756545772573962e-06, "loss": 0.8531, "step": 8240 }, { "epoch": 0.2525744759102611, "grad_norm": 2.14097767970716, "learning_rate": 8.756218208902426e-06, "loss": 0.69, "step": 8241 }, { "epoch": 0.2526051244330023, "grad_norm": 1.636633298585525, "learning_rate": 8.755890608219914e-06, "loss": 0.7415, "step": 8242 }, { "epoch": 0.2526357729557435, "grad_norm": 1.7753144181186924, "learning_rate": 8.75556297052965e-06, "loss": 0.7016, "step": 8243 }, { "epoch": 0.2526664214784847, "grad_norm": 1.8722813045013813, "learning_rate": 8.755235295834862e-06, "loss": 0.6017, "step": 8244 }, { "epoch": 0.25269707000122593, "grad_norm": 1.7157896936352532, "learning_rate": 8.754907584138781e-06, "loss": 0.6938, "step": 8245 }, { "epoch": 0.25272771852396714, "grad_norm": 1.7328921438625293, "learning_rate": 8.754579835444634e-06, "loss": 0.6427, "step": 8246 }, { "epoch": 0.25275836704670834, "grad_norm": 1.822997615673366, "learning_rate": 8.754252049755654e-06, "loss": 0.6055, "step": 8247 }, { "epoch": 0.25278901556944955, "grad_norm": 1.5288033741886486, "learning_rate": 8.753924227075064e-06, "loss": 0.708, "step": 8248 }, { "epoch": 0.25281966409219075, "grad_norm": 1.7466755472021698, "learning_rate": 8.7535963674061e-06, "loss": 0.6839, "step": 8249 }, { "epoch": 0.25285031261493196, "grad_norm": 1.641636708630159, "learning_rate": 8.753268470751991e-06, "loss": 0.6614, "step": 8250 }, { "epoch": 0.25288096113767317, "grad_norm": 1.7379526611913296, "learning_rate": 8.752940537115969e-06, "loss": 0.7069, "step": 8251 }, { "epoch": 0.25291160966041437, "grad_norm": 1.7091435084163253, "learning_rate": 8.752612566501259e-06, "loss": 0.6555, "step": 8252 }, { "epoch": 0.2529422581831556, "grad_norm": 1.776480587558469, "learning_rate": 8.752284558911101e-06, "loss": 0.78, "step": 8253 }, { "epoch": 0.2529729067058968, "grad_norm": 1.647085658533588, "learning_rate": 8.751956514348722e-06, "loss": 0.6941, "step": 8254 }, { "epoch": 0.253003555228638, "grad_norm": 1.7103061597925557, "learning_rate": 8.751628432817355e-06, "loss": 0.7701, "step": 8255 }, { "epoch": 0.2530342037513792, "grad_norm": 2.265872227185117, "learning_rate": 8.751300314320234e-06, "loss": 0.6755, "step": 8256 }, { "epoch": 0.2530648522741204, "grad_norm": 1.7044844841331945, "learning_rate": 8.750972158860592e-06, "loss": 0.7091, "step": 8257 }, { "epoch": 0.2530955007968616, "grad_norm": 1.722608895529567, "learning_rate": 8.75064396644166e-06, "loss": 0.6429, "step": 8258 }, { "epoch": 0.2531261493196028, "grad_norm": 1.6472284562349635, "learning_rate": 8.750315737066674e-06, "loss": 0.745, "step": 8259 }, { "epoch": 0.253156797842344, "grad_norm": 1.3344208663472525, "learning_rate": 8.749987470738867e-06, "loss": 0.5095, "step": 8260 }, { "epoch": 0.2531874463650852, "grad_norm": 1.794578519586371, "learning_rate": 8.749659167461475e-06, "loss": 0.6777, "step": 8261 }, { "epoch": 0.25321809488782643, "grad_norm": 2.048429465100954, "learning_rate": 8.749330827237731e-06, "loss": 0.681, "step": 8262 }, { "epoch": 0.25324874341056763, "grad_norm": 0.8246517294855131, "learning_rate": 8.749002450070871e-06, "loss": 0.5008, "step": 8263 }, { "epoch": 0.25327939193330884, "grad_norm": 1.7215280094815173, "learning_rate": 8.748674035964132e-06, "loss": 0.7888, "step": 8264 }, { "epoch": 0.25331004045605005, "grad_norm": 1.8364643278916226, "learning_rate": 8.748345584920748e-06, "loss": 0.7279, "step": 8265 }, { "epoch": 0.2533406889787912, "grad_norm": 1.6511052126261205, "learning_rate": 8.748017096943956e-06, "loss": 0.73, "step": 8266 }, { "epoch": 0.2533713375015324, "grad_norm": 1.8686740114582991, "learning_rate": 8.74768857203699e-06, "loss": 0.723, "step": 8267 }, { "epoch": 0.2534019860242736, "grad_norm": 1.7294498260635622, "learning_rate": 8.747360010203092e-06, "loss": 0.7136, "step": 8268 }, { "epoch": 0.2534326345470148, "grad_norm": 1.8350705722785912, "learning_rate": 8.747031411445496e-06, "loss": 0.7161, "step": 8269 }, { "epoch": 0.253463283069756, "grad_norm": 1.6817595411799233, "learning_rate": 8.746702775767442e-06, "loss": 0.6531, "step": 8270 }, { "epoch": 0.2534939315924972, "grad_norm": 1.8885827598490665, "learning_rate": 8.746374103172166e-06, "loss": 0.7254, "step": 8271 }, { "epoch": 0.25352458011523843, "grad_norm": 1.1156748165963646, "learning_rate": 8.746045393662908e-06, "loss": 0.4927, "step": 8272 }, { "epoch": 0.25355522863797963, "grad_norm": 1.6128572040630464, "learning_rate": 8.745716647242905e-06, "loss": 0.6681, "step": 8273 }, { "epoch": 0.25358587716072084, "grad_norm": 1.4940177758009865, "learning_rate": 8.7453878639154e-06, "loss": 0.624, "step": 8274 }, { "epoch": 0.25361652568346205, "grad_norm": 1.7229532937936725, "learning_rate": 8.745059043683629e-06, "loss": 0.7201, "step": 8275 }, { "epoch": 0.25364717420620325, "grad_norm": 1.734836621776242, "learning_rate": 8.744730186550831e-06, "loss": 0.7491, "step": 8276 }, { "epoch": 0.25367782272894446, "grad_norm": 1.7770292859507255, "learning_rate": 8.74440129252025e-06, "loss": 0.6832, "step": 8277 }, { "epoch": 0.25370847125168566, "grad_norm": 1.947432529096878, "learning_rate": 8.744072361595124e-06, "loss": 0.7898, "step": 8278 }, { "epoch": 0.25373911977442687, "grad_norm": 1.8571941385540038, "learning_rate": 8.743743393778697e-06, "loss": 0.7425, "step": 8279 }, { "epoch": 0.2537697682971681, "grad_norm": 1.6851886819254365, "learning_rate": 8.743414389074208e-06, "loss": 0.697, "step": 8280 }, { "epoch": 0.2538004168199093, "grad_norm": 1.8395055556305475, "learning_rate": 8.743085347484899e-06, "loss": 0.6782, "step": 8281 }, { "epoch": 0.2538310653426505, "grad_norm": 1.7953066331554905, "learning_rate": 8.742756269014012e-06, "loss": 0.7665, "step": 8282 }, { "epoch": 0.2538617138653917, "grad_norm": 1.7350825275581094, "learning_rate": 8.74242715366479e-06, "loss": 0.7984, "step": 8283 }, { "epoch": 0.2538923623881329, "grad_norm": 1.9248612823342641, "learning_rate": 8.742098001440474e-06, "loss": 0.8518, "step": 8284 }, { "epoch": 0.2539230109108741, "grad_norm": 1.8742549623983331, "learning_rate": 8.741768812344311e-06, "loss": 0.8276, "step": 8285 }, { "epoch": 0.2539536594336153, "grad_norm": 1.6578467902490612, "learning_rate": 8.741439586379543e-06, "loss": 0.6042, "step": 8286 }, { "epoch": 0.2539843079563565, "grad_norm": 1.7502781604046858, "learning_rate": 8.74111032354941e-06, "loss": 0.7668, "step": 8287 }, { "epoch": 0.2540149564790977, "grad_norm": 1.6029650239218078, "learning_rate": 8.740781023857163e-06, "loss": 0.5953, "step": 8288 }, { "epoch": 0.2540456050018389, "grad_norm": 1.757026709465014, "learning_rate": 8.740451687306043e-06, "loss": 0.6633, "step": 8289 }, { "epoch": 0.25407625352458013, "grad_norm": 1.6723836608121532, "learning_rate": 8.740122313899295e-06, "loss": 0.6962, "step": 8290 }, { "epoch": 0.25410690204732134, "grad_norm": 1.6419029238388363, "learning_rate": 8.739792903640166e-06, "loss": 0.6772, "step": 8291 }, { "epoch": 0.25413755057006254, "grad_norm": 1.7828527402774517, "learning_rate": 8.7394634565319e-06, "loss": 0.6509, "step": 8292 }, { "epoch": 0.25416819909280375, "grad_norm": 1.9158920541299576, "learning_rate": 8.739133972577744e-06, "loss": 0.7664, "step": 8293 }, { "epoch": 0.25419884761554495, "grad_norm": 1.6339000001963637, "learning_rate": 8.738804451780943e-06, "loss": 0.6162, "step": 8294 }, { "epoch": 0.25422949613828616, "grad_norm": 1.8233820564685999, "learning_rate": 8.738474894144747e-06, "loss": 0.7016, "step": 8295 }, { "epoch": 0.25426014466102737, "grad_norm": 1.6539776428525337, "learning_rate": 8.7381452996724e-06, "loss": 0.5503, "step": 8296 }, { "epoch": 0.2542907931837685, "grad_norm": 1.731269014378508, "learning_rate": 8.737815668367152e-06, "loss": 0.7178, "step": 8297 }, { "epoch": 0.2543214417065097, "grad_norm": 1.723799860252847, "learning_rate": 8.737486000232247e-06, "loss": 0.6867, "step": 8298 }, { "epoch": 0.2543520902292509, "grad_norm": 1.8229923255071994, "learning_rate": 8.737156295270938e-06, "loss": 0.6226, "step": 8299 }, { "epoch": 0.25438273875199213, "grad_norm": 2.23370665616045, "learning_rate": 8.736826553486473e-06, "loss": 0.6859, "step": 8300 }, { "epoch": 0.25441338727473334, "grad_norm": 1.0144723484981943, "learning_rate": 8.736496774882099e-06, "loss": 0.4914, "step": 8301 }, { "epoch": 0.25444403579747454, "grad_norm": 1.7596120261317694, "learning_rate": 8.736166959461065e-06, "loss": 0.6908, "step": 8302 }, { "epoch": 0.25447468432021575, "grad_norm": 1.6681673037284852, "learning_rate": 8.735837107226624e-06, "loss": 0.7938, "step": 8303 }, { "epoch": 0.25450533284295696, "grad_norm": 1.9536644188452459, "learning_rate": 8.735507218182023e-06, "loss": 0.8104, "step": 8304 }, { "epoch": 0.25453598136569816, "grad_norm": 2.0851369005629445, "learning_rate": 8.735177292330514e-06, "loss": 0.7633, "step": 8305 }, { "epoch": 0.25456662988843937, "grad_norm": 0.8316338286235772, "learning_rate": 8.734847329675349e-06, "loss": 0.4758, "step": 8306 }, { "epoch": 0.25459727841118057, "grad_norm": 1.9066066874540006, "learning_rate": 8.734517330219775e-06, "loss": 0.7462, "step": 8307 }, { "epoch": 0.2546279269339218, "grad_norm": 1.7128679333296792, "learning_rate": 8.734187293967046e-06, "loss": 0.6289, "step": 8308 }, { "epoch": 0.254658575456663, "grad_norm": 0.8111693474772278, "learning_rate": 8.733857220920416e-06, "loss": 0.4985, "step": 8309 }, { "epoch": 0.2546892239794042, "grad_norm": 1.9152715503715343, "learning_rate": 8.733527111083136e-06, "loss": 0.6873, "step": 8310 }, { "epoch": 0.2547198725021454, "grad_norm": 1.7940728422080654, "learning_rate": 8.733196964458457e-06, "loss": 0.7284, "step": 8311 }, { "epoch": 0.2547505210248866, "grad_norm": 1.8553497153925342, "learning_rate": 8.732866781049632e-06, "loss": 0.7085, "step": 8312 }, { "epoch": 0.2547811695476278, "grad_norm": 1.6516916871140994, "learning_rate": 8.732536560859917e-06, "loss": 0.7796, "step": 8313 }, { "epoch": 0.254811818070369, "grad_norm": 1.646349480443621, "learning_rate": 8.732206303892564e-06, "loss": 0.7516, "step": 8314 }, { "epoch": 0.2548424665931102, "grad_norm": 1.8598157789135503, "learning_rate": 8.731876010150827e-06, "loss": 0.8129, "step": 8315 }, { "epoch": 0.2548731151158514, "grad_norm": 0.7933146506037795, "learning_rate": 8.731545679637962e-06, "loss": 0.4705, "step": 8316 }, { "epoch": 0.25490376363859263, "grad_norm": 2.0118974653611907, "learning_rate": 8.731215312357221e-06, "loss": 0.7507, "step": 8317 }, { "epoch": 0.25493441216133383, "grad_norm": 1.95910520741383, "learning_rate": 8.730884908311862e-06, "loss": 0.8248, "step": 8318 }, { "epoch": 0.25496506068407504, "grad_norm": 1.6848872809108226, "learning_rate": 8.730554467505139e-06, "loss": 0.6736, "step": 8319 }, { "epoch": 0.25499570920681625, "grad_norm": 1.5453417656516217, "learning_rate": 8.730223989940307e-06, "loss": 0.6274, "step": 8320 }, { "epoch": 0.25502635772955745, "grad_norm": 1.5746022282973824, "learning_rate": 8.729893475620626e-06, "loss": 0.7231, "step": 8321 }, { "epoch": 0.25505700625229866, "grad_norm": 1.4355065081186082, "learning_rate": 8.729562924549348e-06, "loss": 0.5973, "step": 8322 }, { "epoch": 0.25508765477503986, "grad_norm": 1.8074943767672662, "learning_rate": 8.729232336729734e-06, "loss": 0.7404, "step": 8323 }, { "epoch": 0.25511830329778107, "grad_norm": 1.7692299665931723, "learning_rate": 8.728901712165039e-06, "loss": 0.7513, "step": 8324 }, { "epoch": 0.2551489518205223, "grad_norm": 1.7560312540894252, "learning_rate": 8.728571050858522e-06, "loss": 0.751, "step": 8325 }, { "epoch": 0.2551796003432635, "grad_norm": 1.6045489504995958, "learning_rate": 8.72824035281344e-06, "loss": 0.712, "step": 8326 }, { "epoch": 0.2552102488660047, "grad_norm": 1.5826984275734102, "learning_rate": 8.727909618033051e-06, "loss": 0.6891, "step": 8327 }, { "epoch": 0.25524089738874584, "grad_norm": 0.9087535493071844, "learning_rate": 8.727578846520615e-06, "loss": 0.5023, "step": 8328 }, { "epoch": 0.25527154591148704, "grad_norm": 0.7977649531790899, "learning_rate": 8.727248038279392e-06, "loss": 0.5029, "step": 8329 }, { "epoch": 0.25530219443422825, "grad_norm": 2.166310332558424, "learning_rate": 8.72691719331264e-06, "loss": 0.8332, "step": 8330 }, { "epoch": 0.25533284295696945, "grad_norm": 1.759692088934856, "learning_rate": 8.72658631162362e-06, "loss": 0.7507, "step": 8331 }, { "epoch": 0.25536349147971066, "grad_norm": 1.6367398163481908, "learning_rate": 8.72625539321559e-06, "loss": 0.7159, "step": 8332 }, { "epoch": 0.25539414000245186, "grad_norm": 1.9151824130894286, "learning_rate": 8.725924438091813e-06, "loss": 0.6953, "step": 8333 }, { "epoch": 0.25542478852519307, "grad_norm": 0.8597640278463627, "learning_rate": 8.72559344625555e-06, "loss": 0.4669, "step": 8334 }, { "epoch": 0.2554554370479343, "grad_norm": 1.6546234705621516, "learning_rate": 8.72526241771006e-06, "loss": 0.7102, "step": 8335 }, { "epoch": 0.2554860855706755, "grad_norm": 1.705046322015247, "learning_rate": 8.724931352458605e-06, "loss": 0.6572, "step": 8336 }, { "epoch": 0.2555167340934167, "grad_norm": 0.8556264499014846, "learning_rate": 8.72460025050445e-06, "loss": 0.4818, "step": 8337 }, { "epoch": 0.2555473826161579, "grad_norm": 1.9989113330714605, "learning_rate": 8.724269111850857e-06, "loss": 0.7341, "step": 8338 }, { "epoch": 0.2555780311388991, "grad_norm": 1.5481197218282226, "learning_rate": 8.723937936501086e-06, "loss": 0.7037, "step": 8339 }, { "epoch": 0.2556086796616403, "grad_norm": 1.8759560569021076, "learning_rate": 8.723606724458402e-06, "loss": 0.6535, "step": 8340 }, { "epoch": 0.2556393281843815, "grad_norm": 0.8425278117290913, "learning_rate": 8.72327547572607e-06, "loss": 0.4829, "step": 8341 }, { "epoch": 0.2556699767071227, "grad_norm": 2.020639733341731, "learning_rate": 8.72294419030735e-06, "loss": 0.7256, "step": 8342 }, { "epoch": 0.2557006252298639, "grad_norm": 1.6610525492974524, "learning_rate": 8.72261286820551e-06, "loss": 0.6732, "step": 8343 }, { "epoch": 0.2557312737526051, "grad_norm": 2.0221452712459276, "learning_rate": 8.72228150942381e-06, "loss": 0.7789, "step": 8344 }, { "epoch": 0.25576192227534633, "grad_norm": 1.7813775382748243, "learning_rate": 8.72195011396552e-06, "loss": 0.7362, "step": 8345 }, { "epoch": 0.25579257079808754, "grad_norm": 1.7663585805781763, "learning_rate": 8.721618681833903e-06, "loss": 0.7699, "step": 8346 }, { "epoch": 0.25582321932082874, "grad_norm": 1.6518378176899262, "learning_rate": 8.721287213032225e-06, "loss": 0.7187, "step": 8347 }, { "epoch": 0.25585386784356995, "grad_norm": 1.9312860893602808, "learning_rate": 8.720955707563752e-06, "loss": 0.7198, "step": 8348 }, { "epoch": 0.25588451636631115, "grad_norm": 1.5491731922888994, "learning_rate": 8.72062416543175e-06, "loss": 0.6844, "step": 8349 }, { "epoch": 0.25591516488905236, "grad_norm": 1.5836333638695335, "learning_rate": 8.720292586639485e-06, "loss": 0.639, "step": 8350 }, { "epoch": 0.25594581341179357, "grad_norm": 1.6161671644672757, "learning_rate": 8.719960971190227e-06, "loss": 0.7637, "step": 8351 }, { "epoch": 0.25597646193453477, "grad_norm": 1.8297514160265647, "learning_rate": 8.719629319087242e-06, "loss": 0.7231, "step": 8352 }, { "epoch": 0.256007110457276, "grad_norm": 1.7585163469256149, "learning_rate": 8.719297630333796e-06, "loss": 0.7422, "step": 8353 }, { "epoch": 0.2560377589800172, "grad_norm": 1.6530969914452474, "learning_rate": 8.71896590493316e-06, "loss": 0.7474, "step": 8354 }, { "epoch": 0.2560684075027584, "grad_norm": 1.6547793943024607, "learning_rate": 8.718634142888601e-06, "loss": 0.7818, "step": 8355 }, { "epoch": 0.2560990560254996, "grad_norm": 1.7148493476266373, "learning_rate": 8.718302344203388e-06, "loss": 0.6701, "step": 8356 }, { "epoch": 0.2561297045482408, "grad_norm": 0.8488275056588236, "learning_rate": 8.717970508880791e-06, "loss": 0.4966, "step": 8357 }, { "epoch": 0.256160353070982, "grad_norm": 1.5752875622996636, "learning_rate": 8.71763863692408e-06, "loss": 0.7094, "step": 8358 }, { "epoch": 0.25619100159372316, "grad_norm": 1.7607109785691897, "learning_rate": 8.717306728336523e-06, "loss": 0.7121, "step": 8359 }, { "epoch": 0.25622165011646436, "grad_norm": 1.8844511059619384, "learning_rate": 8.716974783121393e-06, "loss": 0.7355, "step": 8360 }, { "epoch": 0.25625229863920557, "grad_norm": 1.6737969043364742, "learning_rate": 8.716642801281959e-06, "loss": 0.8603, "step": 8361 }, { "epoch": 0.2562829471619468, "grad_norm": 1.7270632077697883, "learning_rate": 8.716310782821493e-06, "loss": 0.7059, "step": 8362 }, { "epoch": 0.256313595684688, "grad_norm": 1.632963320385186, "learning_rate": 8.715978727743263e-06, "loss": 0.6565, "step": 8363 }, { "epoch": 0.2563442442074292, "grad_norm": 1.8928079839220038, "learning_rate": 8.715646636050548e-06, "loss": 0.7782, "step": 8364 }, { "epoch": 0.2563748927301704, "grad_norm": 1.5248743970718015, "learning_rate": 8.715314507746613e-06, "loss": 0.6248, "step": 8365 }, { "epoch": 0.2564055412529116, "grad_norm": 1.4626851328737283, "learning_rate": 8.714982342834735e-06, "loss": 0.5851, "step": 8366 }, { "epoch": 0.2564361897756528, "grad_norm": 1.899507200576532, "learning_rate": 8.714650141318185e-06, "loss": 0.7447, "step": 8367 }, { "epoch": 0.256466838298394, "grad_norm": 1.595971339481739, "learning_rate": 8.714317903200238e-06, "loss": 0.7067, "step": 8368 }, { "epoch": 0.2564974868211352, "grad_norm": 1.6141286466020368, "learning_rate": 8.713985628484165e-06, "loss": 0.7525, "step": 8369 }, { "epoch": 0.2565281353438764, "grad_norm": 0.8928952972274125, "learning_rate": 8.713653317173241e-06, "loss": 0.4856, "step": 8370 }, { "epoch": 0.2565587838666176, "grad_norm": 1.628387386351008, "learning_rate": 8.713320969270742e-06, "loss": 0.7011, "step": 8371 }, { "epoch": 0.25658943238935883, "grad_norm": 1.7262389381856573, "learning_rate": 8.71298858477994e-06, "loss": 0.6405, "step": 8372 }, { "epoch": 0.25662008091210003, "grad_norm": 0.7475825246800886, "learning_rate": 8.712656163704111e-06, "loss": 0.4744, "step": 8373 }, { "epoch": 0.25665072943484124, "grad_norm": 1.6988921699195267, "learning_rate": 8.712323706046533e-06, "loss": 0.738, "step": 8374 }, { "epoch": 0.25668137795758245, "grad_norm": 1.7186923814668655, "learning_rate": 8.71199121181048e-06, "loss": 0.7245, "step": 8375 }, { "epoch": 0.25671202648032365, "grad_norm": 1.7616682378150483, "learning_rate": 8.711658680999226e-06, "loss": 0.633, "step": 8376 }, { "epoch": 0.25674267500306486, "grad_norm": 1.6646229979507021, "learning_rate": 8.71132611361605e-06, "loss": 0.7871, "step": 8377 }, { "epoch": 0.25677332352580606, "grad_norm": 0.8566501211132116, "learning_rate": 8.710993509664226e-06, "loss": 0.498, "step": 8378 }, { "epoch": 0.25680397204854727, "grad_norm": 1.902028600784724, "learning_rate": 8.710660869147038e-06, "loss": 0.7252, "step": 8379 }, { "epoch": 0.2568346205712885, "grad_norm": 1.628030777225232, "learning_rate": 8.710328192067757e-06, "loss": 0.7226, "step": 8380 }, { "epoch": 0.2568652690940297, "grad_norm": 1.6795883045808453, "learning_rate": 8.709995478429661e-06, "loss": 0.76, "step": 8381 }, { "epoch": 0.2568959176167709, "grad_norm": 1.744381732612409, "learning_rate": 8.709662728236033e-06, "loss": 0.7082, "step": 8382 }, { "epoch": 0.2569265661395121, "grad_norm": 1.7195527509856674, "learning_rate": 8.709329941490147e-06, "loss": 0.7453, "step": 8383 }, { "epoch": 0.2569572146622533, "grad_norm": 1.9695978369579445, "learning_rate": 8.708997118195287e-06, "loss": 0.6672, "step": 8384 }, { "epoch": 0.2569878631849945, "grad_norm": 1.9737477454884977, "learning_rate": 8.708664258354727e-06, "loss": 0.7991, "step": 8385 }, { "epoch": 0.2570185117077357, "grad_norm": 1.7263626474072786, "learning_rate": 8.708331361971748e-06, "loss": 0.7323, "step": 8386 }, { "epoch": 0.2570491602304769, "grad_norm": 1.6415251651808465, "learning_rate": 8.707998429049633e-06, "loss": 0.6733, "step": 8387 }, { "epoch": 0.2570798087532181, "grad_norm": 1.705268797026797, "learning_rate": 8.707665459591662e-06, "loss": 0.71, "step": 8388 }, { "epoch": 0.2571104572759593, "grad_norm": 1.6168413916741675, "learning_rate": 8.707332453601112e-06, "loss": 0.661, "step": 8389 }, { "epoch": 0.2571411057987005, "grad_norm": 0.7957108621982146, "learning_rate": 8.706999411081268e-06, "loss": 0.4685, "step": 8390 }, { "epoch": 0.2571717543214417, "grad_norm": 1.6289105976152514, "learning_rate": 8.706666332035409e-06, "loss": 0.76, "step": 8391 }, { "epoch": 0.2572024028441829, "grad_norm": 1.7697569943672944, "learning_rate": 8.70633321646682e-06, "loss": 0.7895, "step": 8392 }, { "epoch": 0.2572330513669241, "grad_norm": 1.5554597884148278, "learning_rate": 8.70600006437878e-06, "loss": 0.6851, "step": 8393 }, { "epoch": 0.2572636998896653, "grad_norm": 1.492122360799134, "learning_rate": 8.705666875774575e-06, "loss": 0.6699, "step": 8394 }, { "epoch": 0.2572943484124065, "grad_norm": 1.6648421365489796, "learning_rate": 8.705333650657486e-06, "loss": 0.7094, "step": 8395 }, { "epoch": 0.2573249969351477, "grad_norm": 1.8745776757613501, "learning_rate": 8.705000389030795e-06, "loss": 0.6561, "step": 8396 }, { "epoch": 0.2573556454578889, "grad_norm": 1.6478844441586078, "learning_rate": 8.704667090897787e-06, "loss": 0.7007, "step": 8397 }, { "epoch": 0.2573862939806301, "grad_norm": 0.8660134124910732, "learning_rate": 8.704333756261748e-06, "loss": 0.4991, "step": 8398 }, { "epoch": 0.2574169425033713, "grad_norm": 1.7642672283527596, "learning_rate": 8.704000385125959e-06, "loss": 0.7039, "step": 8399 }, { "epoch": 0.25744759102611253, "grad_norm": 1.8390203778092833, "learning_rate": 8.703666977493707e-06, "loss": 0.7797, "step": 8400 }, { "epoch": 0.25747823954885374, "grad_norm": 1.8512791971932985, "learning_rate": 8.703333533368279e-06, "loss": 0.7402, "step": 8401 }, { "epoch": 0.25750888807159494, "grad_norm": 1.6336932925181702, "learning_rate": 8.703000052752954e-06, "loss": 0.7674, "step": 8402 }, { "epoch": 0.25753953659433615, "grad_norm": 1.6435016101171762, "learning_rate": 8.702666535651026e-06, "loss": 0.6615, "step": 8403 }, { "epoch": 0.25757018511707735, "grad_norm": 1.6897857501187918, "learning_rate": 8.702332982065775e-06, "loss": 0.6853, "step": 8404 }, { "epoch": 0.25760083363981856, "grad_norm": 2.044840043285815, "learning_rate": 8.701999392000491e-06, "loss": 0.8007, "step": 8405 }, { "epoch": 0.25763148216255977, "grad_norm": 1.5896832195814568, "learning_rate": 8.701665765458458e-06, "loss": 0.69, "step": 8406 }, { "epoch": 0.25766213068530097, "grad_norm": 1.6560088341296983, "learning_rate": 8.701332102442967e-06, "loss": 0.6806, "step": 8407 }, { "epoch": 0.2576927792080422, "grad_norm": 1.821710846250818, "learning_rate": 8.700998402957303e-06, "loss": 0.7182, "step": 8408 }, { "epoch": 0.2577234277307834, "grad_norm": 1.8763975626845038, "learning_rate": 8.700664667004754e-06, "loss": 0.6723, "step": 8409 }, { "epoch": 0.2577540762535246, "grad_norm": 1.803636368854129, "learning_rate": 8.700330894588612e-06, "loss": 0.6775, "step": 8410 }, { "epoch": 0.2577847247762658, "grad_norm": 1.7692628988179118, "learning_rate": 8.69999708571216e-06, "loss": 0.822, "step": 8411 }, { "epoch": 0.257815373299007, "grad_norm": 1.7870267297821676, "learning_rate": 8.69966324037869e-06, "loss": 0.6762, "step": 8412 }, { "epoch": 0.2578460218217482, "grad_norm": 1.8151291462373427, "learning_rate": 8.699329358591492e-06, "loss": 0.6969, "step": 8413 }, { "epoch": 0.2578766703444894, "grad_norm": 0.894432608241415, "learning_rate": 8.698995440353856e-06, "loss": 0.4893, "step": 8414 }, { "epoch": 0.2579073188672306, "grad_norm": 0.8304190327532127, "learning_rate": 8.698661485669072e-06, "loss": 0.4756, "step": 8415 }, { "epoch": 0.2579379673899718, "grad_norm": 0.7896765247088179, "learning_rate": 8.698327494540428e-06, "loss": 0.4865, "step": 8416 }, { "epoch": 0.25796861591271303, "grad_norm": 0.8026917293556042, "learning_rate": 8.69799346697122e-06, "loss": 0.4861, "step": 8417 }, { "epoch": 0.25799926443545423, "grad_norm": 1.7824994794959894, "learning_rate": 8.697659402964733e-06, "loss": 0.733, "step": 8418 }, { "epoch": 0.25802991295819544, "grad_norm": 1.7203201920795668, "learning_rate": 8.697325302524264e-06, "loss": 0.7811, "step": 8419 }, { "epoch": 0.25806056148093665, "grad_norm": 0.9051596020771672, "learning_rate": 8.696991165653102e-06, "loss": 0.505, "step": 8420 }, { "epoch": 0.2580912100036778, "grad_norm": 2.2793610767950043, "learning_rate": 8.69665699235454e-06, "loss": 0.5741, "step": 8421 }, { "epoch": 0.258121858526419, "grad_norm": 1.6865910765304273, "learning_rate": 8.69632278263187e-06, "loss": 0.7146, "step": 8422 }, { "epoch": 0.2581525070491602, "grad_norm": 1.9445233380063225, "learning_rate": 8.695988536488387e-06, "loss": 0.7548, "step": 8423 }, { "epoch": 0.2581831555719014, "grad_norm": 1.6613248804481688, "learning_rate": 8.695654253927384e-06, "loss": 0.7603, "step": 8424 }, { "epoch": 0.2582138040946426, "grad_norm": 1.5959817071263203, "learning_rate": 8.695319934952152e-06, "loss": 0.5625, "step": 8425 }, { "epoch": 0.2582444526173838, "grad_norm": 1.7448403506541623, "learning_rate": 8.694985579565988e-06, "loss": 0.6718, "step": 8426 }, { "epoch": 0.25827510114012503, "grad_norm": 1.7252749782483305, "learning_rate": 8.694651187772187e-06, "loss": 0.6633, "step": 8427 }, { "epoch": 0.25830574966286624, "grad_norm": 1.8978434705515421, "learning_rate": 8.694316759574042e-06, "loss": 0.6093, "step": 8428 }, { "epoch": 0.25833639818560744, "grad_norm": 1.6446452372650187, "learning_rate": 8.693982294974847e-06, "loss": 0.754, "step": 8429 }, { "epoch": 0.25836704670834865, "grad_norm": 1.8230963016586048, "learning_rate": 8.693647793977901e-06, "loss": 0.6715, "step": 8430 }, { "epoch": 0.25839769523108985, "grad_norm": 1.7912011222275717, "learning_rate": 8.693313256586498e-06, "loss": 0.691, "step": 8431 }, { "epoch": 0.25842834375383106, "grad_norm": 1.9714513978124761, "learning_rate": 8.692978682803934e-06, "loss": 0.6875, "step": 8432 }, { "epoch": 0.25845899227657226, "grad_norm": 1.8259549950306262, "learning_rate": 8.692644072633506e-06, "loss": 0.715, "step": 8433 }, { "epoch": 0.25848964079931347, "grad_norm": 1.5357883934941288, "learning_rate": 8.692309426078514e-06, "loss": 0.7127, "step": 8434 }, { "epoch": 0.2585202893220547, "grad_norm": 1.7450304988694465, "learning_rate": 8.691974743142249e-06, "loss": 0.6854, "step": 8435 }, { "epoch": 0.2585509378447959, "grad_norm": 0.9135339102238139, "learning_rate": 8.691640023828014e-06, "loss": 0.4765, "step": 8436 }, { "epoch": 0.2585815863675371, "grad_norm": 1.8377768733257325, "learning_rate": 8.691305268139104e-06, "loss": 0.7354, "step": 8437 }, { "epoch": 0.2586122348902783, "grad_norm": 1.622286887480452, "learning_rate": 8.69097047607882e-06, "loss": 0.6727, "step": 8438 }, { "epoch": 0.2586428834130195, "grad_norm": 1.6866220790249387, "learning_rate": 8.690635647650458e-06, "loss": 0.7221, "step": 8439 }, { "epoch": 0.2586735319357607, "grad_norm": 1.6313440350686965, "learning_rate": 8.690300782857319e-06, "loss": 0.6423, "step": 8440 }, { "epoch": 0.2587041804585019, "grad_norm": 1.5006000577946714, "learning_rate": 8.689965881702704e-06, "loss": 0.7429, "step": 8441 }, { "epoch": 0.2587348289812431, "grad_norm": 1.8519979415544612, "learning_rate": 8.68963094418991e-06, "loss": 0.7038, "step": 8442 }, { "epoch": 0.2587654775039843, "grad_norm": 1.7410413029992657, "learning_rate": 8.689295970322238e-06, "loss": 0.6721, "step": 8443 }, { "epoch": 0.2587961260267255, "grad_norm": 1.8523111951284714, "learning_rate": 8.688960960102987e-06, "loss": 0.6977, "step": 8444 }, { "epoch": 0.25882677454946673, "grad_norm": 2.1003546702586715, "learning_rate": 8.688625913535464e-06, "loss": 0.6906, "step": 8445 }, { "epoch": 0.25885742307220794, "grad_norm": 1.8044856572840349, "learning_rate": 8.688290830622963e-06, "loss": 0.7599, "step": 8446 }, { "epoch": 0.25888807159494914, "grad_norm": 1.5740630368265167, "learning_rate": 8.68795571136879e-06, "loss": 0.7055, "step": 8447 }, { "epoch": 0.25891872011769035, "grad_norm": 0.8751725550364577, "learning_rate": 8.687620555776244e-06, "loss": 0.4838, "step": 8448 }, { "epoch": 0.25894936864043155, "grad_norm": 0.8331170627296626, "learning_rate": 8.687285363848631e-06, "loss": 0.4739, "step": 8449 }, { "epoch": 0.25898001716317276, "grad_norm": 1.7093781363378315, "learning_rate": 8.686950135589251e-06, "loss": 0.7387, "step": 8450 }, { "epoch": 0.25901066568591397, "grad_norm": 1.948451158796109, "learning_rate": 8.686614871001409e-06, "loss": 0.686, "step": 8451 }, { "epoch": 0.2590413142086551, "grad_norm": 1.8222141979195707, "learning_rate": 8.686279570088408e-06, "loss": 0.7868, "step": 8452 }, { "epoch": 0.2590719627313963, "grad_norm": 0.8271225610604901, "learning_rate": 8.68594423285355e-06, "loss": 0.4708, "step": 8453 }, { "epoch": 0.2591026112541375, "grad_norm": 1.6239412938851956, "learning_rate": 8.68560885930014e-06, "loss": 0.7232, "step": 8454 }, { "epoch": 0.25913325977687873, "grad_norm": 1.617598153864809, "learning_rate": 8.685273449431483e-06, "loss": 0.6308, "step": 8455 }, { "epoch": 0.25916390829961994, "grad_norm": 1.7308594164856388, "learning_rate": 8.684938003250885e-06, "loss": 0.7159, "step": 8456 }, { "epoch": 0.25919455682236114, "grad_norm": 1.5319419193171344, "learning_rate": 8.684602520761648e-06, "loss": 0.82, "step": 8457 }, { "epoch": 0.25922520534510235, "grad_norm": 1.6648188440411562, "learning_rate": 8.684267001967082e-06, "loss": 0.6608, "step": 8458 }, { "epoch": 0.25925585386784356, "grad_norm": 1.7988088646456675, "learning_rate": 8.683931446870488e-06, "loss": 0.6927, "step": 8459 }, { "epoch": 0.25928650239058476, "grad_norm": 1.759916004911238, "learning_rate": 8.683595855475176e-06, "loss": 0.726, "step": 8460 }, { "epoch": 0.25931715091332597, "grad_norm": 1.595522240783577, "learning_rate": 8.683260227784452e-06, "loss": 0.7251, "step": 8461 }, { "epoch": 0.2593477994360672, "grad_norm": 1.0055642973650452, "learning_rate": 8.682924563801622e-06, "loss": 0.4816, "step": 8462 }, { "epoch": 0.2593784479588084, "grad_norm": 0.8592047927951098, "learning_rate": 8.682588863529994e-06, "loss": 0.4957, "step": 8463 }, { "epoch": 0.2594090964815496, "grad_norm": 1.7894870254427424, "learning_rate": 8.682253126972878e-06, "loss": 0.712, "step": 8464 }, { "epoch": 0.2594397450042908, "grad_norm": 1.79757790372583, "learning_rate": 8.681917354133576e-06, "loss": 0.7273, "step": 8465 }, { "epoch": 0.259470393527032, "grad_norm": 0.8527681198276372, "learning_rate": 8.681581545015403e-06, "loss": 0.4798, "step": 8466 }, { "epoch": 0.2595010420497732, "grad_norm": 1.7517069637225229, "learning_rate": 8.681245699621663e-06, "loss": 0.7207, "step": 8467 }, { "epoch": 0.2595316905725144, "grad_norm": 0.9127124687717627, "learning_rate": 8.68090981795567e-06, "loss": 0.4765, "step": 8468 }, { "epoch": 0.2595623390952556, "grad_norm": 1.690348492220137, "learning_rate": 8.680573900020727e-06, "loss": 0.581, "step": 8469 }, { "epoch": 0.2595929876179968, "grad_norm": 1.7062995512510506, "learning_rate": 8.680237945820152e-06, "loss": 0.6375, "step": 8470 }, { "epoch": 0.259623636140738, "grad_norm": 1.7800282257668192, "learning_rate": 8.679901955357247e-06, "loss": 0.6687, "step": 8471 }, { "epoch": 0.25965428466347923, "grad_norm": 1.6635165373857868, "learning_rate": 8.679565928635329e-06, "loss": 0.6632, "step": 8472 }, { "epoch": 0.25968493318622043, "grad_norm": 1.8502104089963, "learning_rate": 8.679229865657705e-06, "loss": 0.7001, "step": 8473 }, { "epoch": 0.25971558170896164, "grad_norm": 1.6566539775371072, "learning_rate": 8.678893766427688e-06, "loss": 0.7919, "step": 8474 }, { "epoch": 0.25974623023170285, "grad_norm": 1.7066634303018888, "learning_rate": 8.67855763094859e-06, "loss": 0.633, "step": 8475 }, { "epoch": 0.25977687875444405, "grad_norm": 1.62568018511291, "learning_rate": 8.678221459223722e-06, "loss": 0.6698, "step": 8476 }, { "epoch": 0.25980752727718526, "grad_norm": 1.7680565318011723, "learning_rate": 8.677885251256397e-06, "loss": 0.6695, "step": 8477 }, { "epoch": 0.25983817579992646, "grad_norm": 1.8216460987875525, "learning_rate": 8.677549007049928e-06, "loss": 0.8264, "step": 8478 }, { "epoch": 0.25986882432266767, "grad_norm": 1.75256850421271, "learning_rate": 8.677212726607627e-06, "loss": 0.6416, "step": 8479 }, { "epoch": 0.2598994728454089, "grad_norm": 1.514472459312885, "learning_rate": 8.676876409932809e-06, "loss": 0.6064, "step": 8480 }, { "epoch": 0.2599301213681501, "grad_norm": 1.5664644566360446, "learning_rate": 8.676540057028785e-06, "loss": 0.6814, "step": 8481 }, { "epoch": 0.2599607698908913, "grad_norm": 1.0933268253050048, "learning_rate": 8.676203667898872e-06, "loss": 0.4924, "step": 8482 }, { "epoch": 0.25999141841363244, "grad_norm": 1.7356497855003268, "learning_rate": 8.675867242546386e-06, "loss": 0.7571, "step": 8483 }, { "epoch": 0.26002206693637364, "grad_norm": 1.8242292356528549, "learning_rate": 8.675530780974637e-06, "loss": 0.7873, "step": 8484 }, { "epoch": 0.26005271545911485, "grad_norm": 0.8260855327504402, "learning_rate": 8.675194283186944e-06, "loss": 0.4662, "step": 8485 }, { "epoch": 0.26008336398185605, "grad_norm": 1.8638840801451486, "learning_rate": 8.67485774918662e-06, "loss": 0.7986, "step": 8486 }, { "epoch": 0.26011401250459726, "grad_norm": 1.726741250961179, "learning_rate": 8.674521178976985e-06, "loss": 0.7422, "step": 8487 }, { "epoch": 0.26014466102733846, "grad_norm": 1.6346236473027775, "learning_rate": 8.67418457256135e-06, "loss": 0.7034, "step": 8488 }, { "epoch": 0.26017530955007967, "grad_norm": 1.8363057196645776, "learning_rate": 8.673847929943036e-06, "loss": 0.713, "step": 8489 }, { "epoch": 0.2602059580728209, "grad_norm": 1.7996197027694458, "learning_rate": 8.673511251125358e-06, "loss": 0.5953, "step": 8490 }, { "epoch": 0.2602366065955621, "grad_norm": 1.6453483331567822, "learning_rate": 8.673174536111632e-06, "loss": 0.7828, "step": 8491 }, { "epoch": 0.2602672551183033, "grad_norm": 1.8672178741293128, "learning_rate": 8.672837784905178e-06, "loss": 0.7803, "step": 8492 }, { "epoch": 0.2602979036410445, "grad_norm": 1.6903591427063458, "learning_rate": 8.672500997509316e-06, "loss": 0.8038, "step": 8493 }, { "epoch": 0.2603285521637857, "grad_norm": 1.8119397707369207, "learning_rate": 8.67216417392736e-06, "loss": 0.7257, "step": 8494 }, { "epoch": 0.2603592006865269, "grad_norm": 1.8177992323664967, "learning_rate": 8.67182731416263e-06, "loss": 0.7118, "step": 8495 }, { "epoch": 0.2603898492092681, "grad_norm": 1.7426083853854482, "learning_rate": 8.67149041821845e-06, "loss": 0.6785, "step": 8496 }, { "epoch": 0.2604204977320093, "grad_norm": 1.7401945845716202, "learning_rate": 8.671153486098131e-06, "loss": 0.7453, "step": 8497 }, { "epoch": 0.2604511462547505, "grad_norm": 1.7720113108696707, "learning_rate": 8.670816517805e-06, "loss": 0.6886, "step": 8498 }, { "epoch": 0.2604817947774917, "grad_norm": 1.7189114295673438, "learning_rate": 8.670479513342373e-06, "loss": 0.5577, "step": 8499 }, { "epoch": 0.26051244330023293, "grad_norm": 1.1107746470197215, "learning_rate": 8.670142472713574e-06, "loss": 0.5102, "step": 8500 }, { "epoch": 0.26054309182297414, "grad_norm": 1.5491410151471403, "learning_rate": 8.66980539592192e-06, "loss": 0.7974, "step": 8501 }, { "epoch": 0.26057374034571534, "grad_norm": 0.8122563623783458, "learning_rate": 8.669468282970736e-06, "loss": 0.4664, "step": 8502 }, { "epoch": 0.26060438886845655, "grad_norm": 1.6000290432235387, "learning_rate": 8.669131133863342e-06, "loss": 0.7335, "step": 8503 }, { "epoch": 0.26063503739119775, "grad_norm": 1.8387822653884545, "learning_rate": 8.66879394860306e-06, "loss": 0.799, "step": 8504 }, { "epoch": 0.26066568591393896, "grad_norm": 0.8299206116641515, "learning_rate": 8.668456727193213e-06, "loss": 0.4603, "step": 8505 }, { "epoch": 0.26069633443668017, "grad_norm": 1.7249628434043698, "learning_rate": 8.668119469637122e-06, "loss": 0.6504, "step": 8506 }, { "epoch": 0.26072698295942137, "grad_norm": 1.722989239020549, "learning_rate": 8.667782175938112e-06, "loss": 0.6796, "step": 8507 }, { "epoch": 0.2607576314821626, "grad_norm": 1.8454712008497138, "learning_rate": 8.667444846099507e-06, "loss": 0.7077, "step": 8508 }, { "epoch": 0.2607882800049038, "grad_norm": 0.9013193555328425, "learning_rate": 8.667107480124629e-06, "loss": 0.4833, "step": 8509 }, { "epoch": 0.260818928527645, "grad_norm": 0.8324404332624555, "learning_rate": 8.6667700780168e-06, "loss": 0.4912, "step": 8510 }, { "epoch": 0.2608495770503862, "grad_norm": 1.694189811864415, "learning_rate": 8.66643263977935e-06, "loss": 0.6765, "step": 8511 }, { "epoch": 0.2608802255731274, "grad_norm": 1.604830093395559, "learning_rate": 8.666095165415602e-06, "loss": 0.6262, "step": 8512 }, { "epoch": 0.2609108740958686, "grad_norm": 1.839268102898304, "learning_rate": 8.665757654928878e-06, "loss": 0.7466, "step": 8513 }, { "epoch": 0.26094152261860976, "grad_norm": 1.534490826301804, "learning_rate": 8.665420108322507e-06, "loss": 0.6226, "step": 8514 }, { "epoch": 0.26097217114135096, "grad_norm": 0.908406257161292, "learning_rate": 8.665082525599812e-06, "loss": 0.4735, "step": 8515 }, { "epoch": 0.26100281966409217, "grad_norm": 1.8385329962762944, "learning_rate": 8.664744906764124e-06, "loss": 0.7819, "step": 8516 }, { "epoch": 0.2610334681868334, "grad_norm": 1.6275693648951424, "learning_rate": 8.664407251818765e-06, "loss": 0.6575, "step": 8517 }, { "epoch": 0.2610641167095746, "grad_norm": 2.099361579100184, "learning_rate": 8.664069560767064e-06, "loss": 0.7355, "step": 8518 }, { "epoch": 0.2610947652323158, "grad_norm": 1.7870135966817828, "learning_rate": 8.663731833612348e-06, "loss": 0.6632, "step": 8519 }, { "epoch": 0.261125413755057, "grad_norm": 1.6096203375943425, "learning_rate": 8.663394070357947e-06, "loss": 0.6, "step": 8520 }, { "epoch": 0.2611560622777982, "grad_norm": 1.8411834766061315, "learning_rate": 8.663056271007185e-06, "loss": 0.6181, "step": 8521 }, { "epoch": 0.2611867108005394, "grad_norm": 1.6427902720707404, "learning_rate": 8.662718435563391e-06, "loss": 0.6642, "step": 8522 }, { "epoch": 0.2612173593232806, "grad_norm": 0.9213677097026624, "learning_rate": 8.662380564029897e-06, "loss": 0.4862, "step": 8523 }, { "epoch": 0.2612480078460218, "grad_norm": 1.711023131456096, "learning_rate": 8.66204265641003e-06, "loss": 0.7111, "step": 8524 }, { "epoch": 0.261278656368763, "grad_norm": 1.6025920534778608, "learning_rate": 8.66170471270712e-06, "loss": 0.6402, "step": 8525 }, { "epoch": 0.2613093048915042, "grad_norm": 1.6342431369052703, "learning_rate": 8.661366732924496e-06, "loss": 0.7141, "step": 8526 }, { "epoch": 0.26133995341424543, "grad_norm": 1.927403591764155, "learning_rate": 8.661028717065488e-06, "loss": 0.7287, "step": 8527 }, { "epoch": 0.26137060193698664, "grad_norm": 2.062127067194271, "learning_rate": 8.660690665133429e-06, "loss": 0.7314, "step": 8528 }, { "epoch": 0.26140125045972784, "grad_norm": 1.6379001472461423, "learning_rate": 8.660352577131646e-06, "loss": 0.6721, "step": 8529 }, { "epoch": 0.26143189898246905, "grad_norm": 1.94022176899794, "learning_rate": 8.660014453063475e-06, "loss": 0.7038, "step": 8530 }, { "epoch": 0.26146254750521025, "grad_norm": 0.8326807005658289, "learning_rate": 8.659676292932244e-06, "loss": 0.4768, "step": 8531 }, { "epoch": 0.26149319602795146, "grad_norm": 1.6408730259113933, "learning_rate": 8.659338096741285e-06, "loss": 0.7016, "step": 8532 }, { "epoch": 0.26152384455069266, "grad_norm": 1.6797111048718598, "learning_rate": 8.658999864493934e-06, "loss": 0.6781, "step": 8533 }, { "epoch": 0.26155449307343387, "grad_norm": 2.034212315972613, "learning_rate": 8.658661596193519e-06, "loss": 0.7308, "step": 8534 }, { "epoch": 0.2615851415961751, "grad_norm": 1.7621814443559025, "learning_rate": 8.658323291843375e-06, "loss": 0.7228, "step": 8535 }, { "epoch": 0.2616157901189163, "grad_norm": 1.7572739032157543, "learning_rate": 8.657984951446838e-06, "loss": 0.703, "step": 8536 }, { "epoch": 0.2616464386416575, "grad_norm": 1.482396555768358, "learning_rate": 8.657646575007238e-06, "loss": 0.7663, "step": 8537 }, { "epoch": 0.2616770871643987, "grad_norm": 1.7208636984490029, "learning_rate": 8.65730816252791e-06, "loss": 0.7347, "step": 8538 }, { "epoch": 0.2617077356871399, "grad_norm": 1.678044618502701, "learning_rate": 8.65696971401219e-06, "loss": 0.7153, "step": 8539 }, { "epoch": 0.2617383842098811, "grad_norm": 1.7667453922876144, "learning_rate": 8.65663122946341e-06, "loss": 0.7967, "step": 8540 }, { "epoch": 0.2617690327326223, "grad_norm": 1.7282491497048267, "learning_rate": 8.656292708884908e-06, "loss": 0.6954, "step": 8541 }, { "epoch": 0.2617996812553635, "grad_norm": 1.8413149775304558, "learning_rate": 8.65595415228002e-06, "loss": 0.6401, "step": 8542 }, { "epoch": 0.2618303297781047, "grad_norm": 0.8403639005222127, "learning_rate": 8.655615559652078e-06, "loss": 0.4927, "step": 8543 }, { "epoch": 0.2618609783008459, "grad_norm": 1.997669017027658, "learning_rate": 8.655276931004422e-06, "loss": 0.6611, "step": 8544 }, { "epoch": 0.2618916268235871, "grad_norm": 1.9033567571680712, "learning_rate": 8.654938266340384e-06, "loss": 0.6252, "step": 8545 }, { "epoch": 0.2619222753463283, "grad_norm": 1.885452020653928, "learning_rate": 8.654599565663307e-06, "loss": 0.7545, "step": 8546 }, { "epoch": 0.2619529238690695, "grad_norm": 1.8275634564576426, "learning_rate": 8.654260828976526e-06, "loss": 0.721, "step": 8547 }, { "epoch": 0.2619835723918107, "grad_norm": 2.0957076077263874, "learning_rate": 8.653922056283376e-06, "loss": 0.7338, "step": 8548 }, { "epoch": 0.2620142209145519, "grad_norm": 1.8628717796433047, "learning_rate": 8.653583247587198e-06, "loss": 0.8019, "step": 8549 }, { "epoch": 0.2620448694372931, "grad_norm": 1.761431507964122, "learning_rate": 8.65324440289133e-06, "loss": 0.6425, "step": 8550 }, { "epoch": 0.2620755179600343, "grad_norm": 1.682791521923497, "learning_rate": 8.65290552219911e-06, "loss": 0.6761, "step": 8551 }, { "epoch": 0.2621061664827755, "grad_norm": 2.0194564047281003, "learning_rate": 8.652566605513877e-06, "loss": 0.7582, "step": 8552 }, { "epoch": 0.2621368150055167, "grad_norm": 1.54296231118038, "learning_rate": 8.65222765283897e-06, "loss": 0.7028, "step": 8553 }, { "epoch": 0.2621674635282579, "grad_norm": 1.685510228705197, "learning_rate": 8.65188866417773e-06, "loss": 0.6523, "step": 8554 }, { "epoch": 0.26219811205099913, "grad_norm": 1.7664886346792887, "learning_rate": 8.651549639533496e-06, "loss": 0.6071, "step": 8555 }, { "epoch": 0.26222876057374034, "grad_norm": 1.6492578739551054, "learning_rate": 8.65121057890961e-06, "loss": 0.6753, "step": 8556 }, { "epoch": 0.26225940909648154, "grad_norm": 1.6949036244396574, "learning_rate": 8.650871482309413e-06, "loss": 0.7086, "step": 8557 }, { "epoch": 0.26229005761922275, "grad_norm": 1.711634096448467, "learning_rate": 8.650532349736244e-06, "loss": 0.7245, "step": 8558 }, { "epoch": 0.26232070614196396, "grad_norm": 0.8392855579142933, "learning_rate": 8.650193181193444e-06, "loss": 0.4923, "step": 8559 }, { "epoch": 0.26235135466470516, "grad_norm": 1.9523378758115282, "learning_rate": 8.649853976684358e-06, "loss": 0.6575, "step": 8560 }, { "epoch": 0.26238200318744637, "grad_norm": 1.628525026802647, "learning_rate": 8.64951473621233e-06, "loss": 0.6502, "step": 8561 }, { "epoch": 0.26241265171018757, "grad_norm": 1.8497232540525717, "learning_rate": 8.649175459780695e-06, "loss": 0.7736, "step": 8562 }, { "epoch": 0.2624433002329288, "grad_norm": 1.7655778924804435, "learning_rate": 8.648836147392802e-06, "loss": 0.7688, "step": 8563 }, { "epoch": 0.26247394875567, "grad_norm": 1.6849766466798262, "learning_rate": 8.648496799051994e-06, "loss": 0.6825, "step": 8564 }, { "epoch": 0.2625045972784112, "grad_norm": 0.8031825683618768, "learning_rate": 8.648157414761613e-06, "loss": 0.4758, "step": 8565 }, { "epoch": 0.2625352458011524, "grad_norm": 1.7601157141246022, "learning_rate": 8.647817994525002e-06, "loss": 0.7093, "step": 8566 }, { "epoch": 0.2625658943238936, "grad_norm": 1.7738343490545299, "learning_rate": 8.647478538345508e-06, "loss": 0.7867, "step": 8567 }, { "epoch": 0.2625965428466348, "grad_norm": 1.628292213397361, "learning_rate": 8.647139046226476e-06, "loss": 0.7503, "step": 8568 }, { "epoch": 0.262627191369376, "grad_norm": 1.726636846675555, "learning_rate": 8.646799518171249e-06, "loss": 0.8421, "step": 8569 }, { "epoch": 0.2626578398921172, "grad_norm": 1.654186495860809, "learning_rate": 8.646459954183173e-06, "loss": 0.6643, "step": 8570 }, { "epoch": 0.2626884884148584, "grad_norm": 1.8030698981206859, "learning_rate": 8.646120354265594e-06, "loss": 0.7981, "step": 8571 }, { "epoch": 0.26271913693759963, "grad_norm": 1.7838814167739383, "learning_rate": 8.645780718421858e-06, "loss": 0.684, "step": 8572 }, { "epoch": 0.26274978546034083, "grad_norm": 0.8868179028867517, "learning_rate": 8.645441046655312e-06, "loss": 0.4742, "step": 8573 }, { "epoch": 0.26278043398308204, "grad_norm": 1.784794249947012, "learning_rate": 8.645101338969303e-06, "loss": 0.6072, "step": 8574 }, { "epoch": 0.26281108250582325, "grad_norm": 1.685205874929892, "learning_rate": 8.644761595367177e-06, "loss": 0.6764, "step": 8575 }, { "epoch": 0.2628417310285644, "grad_norm": 1.6227495739393996, "learning_rate": 8.644421815852284e-06, "loss": 0.6891, "step": 8576 }, { "epoch": 0.2628723795513056, "grad_norm": 1.789999104051144, "learning_rate": 8.644082000427968e-06, "loss": 0.7583, "step": 8577 }, { "epoch": 0.2629030280740468, "grad_norm": 1.7077468717998165, "learning_rate": 8.643742149097582e-06, "loss": 0.7573, "step": 8578 }, { "epoch": 0.262933676596788, "grad_norm": 1.714901955398567, "learning_rate": 8.64340226186447e-06, "loss": 0.6955, "step": 8579 }, { "epoch": 0.2629643251195292, "grad_norm": 1.6791330203891297, "learning_rate": 8.643062338731987e-06, "loss": 0.7014, "step": 8580 }, { "epoch": 0.2629949736422704, "grad_norm": 0.8364056233333773, "learning_rate": 8.642722379703477e-06, "loss": 0.4752, "step": 8581 }, { "epoch": 0.26302562216501163, "grad_norm": 1.7811297737707745, "learning_rate": 8.64238238478229e-06, "loss": 0.8627, "step": 8582 }, { "epoch": 0.26305627068775284, "grad_norm": 1.764310429027212, "learning_rate": 8.642042353971778e-06, "loss": 0.738, "step": 8583 }, { "epoch": 0.26308691921049404, "grad_norm": 1.495815016051855, "learning_rate": 8.641702287275291e-06, "loss": 0.6509, "step": 8584 }, { "epoch": 0.26311756773323525, "grad_norm": 1.6309114014026591, "learning_rate": 8.641362184696179e-06, "loss": 0.6799, "step": 8585 }, { "epoch": 0.26314821625597645, "grad_norm": 2.0501439841746127, "learning_rate": 8.641022046237795e-06, "loss": 0.7873, "step": 8586 }, { "epoch": 0.26317886477871766, "grad_norm": 1.681902654305359, "learning_rate": 8.640681871903488e-06, "loss": 0.7108, "step": 8587 }, { "epoch": 0.26320951330145886, "grad_norm": 1.7452338971270789, "learning_rate": 8.640341661696612e-06, "loss": 0.7295, "step": 8588 }, { "epoch": 0.26324016182420007, "grad_norm": 0.8679376052737935, "learning_rate": 8.640001415620519e-06, "loss": 0.4795, "step": 8589 }, { "epoch": 0.2632708103469413, "grad_norm": 1.863058552689238, "learning_rate": 8.639661133678558e-06, "loss": 0.7001, "step": 8590 }, { "epoch": 0.2633014588696825, "grad_norm": 0.7943582895058307, "learning_rate": 8.639320815874087e-06, "loss": 0.4737, "step": 8591 }, { "epoch": 0.2633321073924237, "grad_norm": 1.7984453385475556, "learning_rate": 8.638980462210455e-06, "loss": 0.751, "step": 8592 }, { "epoch": 0.2633627559151649, "grad_norm": 1.7645056765777016, "learning_rate": 8.638640072691017e-06, "loss": 0.7323, "step": 8593 }, { "epoch": 0.2633934044379061, "grad_norm": 1.8248121795032013, "learning_rate": 8.63829964731913e-06, "loss": 0.6924, "step": 8594 }, { "epoch": 0.2634240529606473, "grad_norm": 1.854497388944195, "learning_rate": 8.637959186098143e-06, "loss": 0.7132, "step": 8595 }, { "epoch": 0.2634547014833885, "grad_norm": 1.8053454266515299, "learning_rate": 8.637618689031415e-06, "loss": 0.6331, "step": 8596 }, { "epoch": 0.2634853500061297, "grad_norm": 1.4643091046989984, "learning_rate": 8.6372781561223e-06, "loss": 0.6022, "step": 8597 }, { "epoch": 0.2635159985288709, "grad_norm": 1.8860193278588577, "learning_rate": 8.636937587374152e-06, "loss": 0.5768, "step": 8598 }, { "epoch": 0.2635466470516121, "grad_norm": 1.610164710622763, "learning_rate": 8.636596982790327e-06, "loss": 0.6116, "step": 8599 }, { "epoch": 0.26357729557435333, "grad_norm": 1.5883194171920014, "learning_rate": 8.63625634237418e-06, "loss": 0.7628, "step": 8600 }, { "epoch": 0.26360794409709454, "grad_norm": 1.5550471287965657, "learning_rate": 8.63591566612907e-06, "loss": 0.6993, "step": 8601 }, { "epoch": 0.26363859261983574, "grad_norm": 0.9002496987725015, "learning_rate": 8.635574954058355e-06, "loss": 0.4787, "step": 8602 }, { "epoch": 0.26366924114257695, "grad_norm": 1.7666624435044733, "learning_rate": 8.635234206165386e-06, "loss": 0.646, "step": 8603 }, { "epoch": 0.26369988966531815, "grad_norm": 1.6486263387070694, "learning_rate": 8.634893422453527e-06, "loss": 0.6098, "step": 8604 }, { "epoch": 0.26373053818805936, "grad_norm": 1.5237634289365314, "learning_rate": 8.634552602926133e-06, "loss": 0.6123, "step": 8605 }, { "epoch": 0.26376118671080057, "grad_norm": 1.6809699800926852, "learning_rate": 8.63421174758656e-06, "loss": 0.6127, "step": 8606 }, { "epoch": 0.2637918352335417, "grad_norm": 0.8420852166321106, "learning_rate": 8.63387085643817e-06, "loss": 0.4783, "step": 8607 }, { "epoch": 0.2638224837562829, "grad_norm": 1.6911037663486888, "learning_rate": 8.633529929484322e-06, "loss": 0.72, "step": 8608 }, { "epoch": 0.2638531322790241, "grad_norm": 1.6048069906501674, "learning_rate": 8.633188966728374e-06, "loss": 0.7162, "step": 8609 }, { "epoch": 0.26388378080176533, "grad_norm": 1.824023260155792, "learning_rate": 8.632847968173683e-06, "loss": 0.7697, "step": 8610 }, { "epoch": 0.26391442932450654, "grad_norm": 0.7836945420190689, "learning_rate": 8.632506933823613e-06, "loss": 0.4799, "step": 8611 }, { "epoch": 0.26394507784724774, "grad_norm": 1.839743867898232, "learning_rate": 8.632165863681523e-06, "loss": 0.6635, "step": 8612 }, { "epoch": 0.26397572636998895, "grad_norm": 1.7887146727677963, "learning_rate": 8.631824757750774e-06, "loss": 0.7065, "step": 8613 }, { "epoch": 0.26400637489273016, "grad_norm": 1.8462201646165741, "learning_rate": 8.631483616034725e-06, "loss": 0.6937, "step": 8614 }, { "epoch": 0.26403702341547136, "grad_norm": 1.6119864217768574, "learning_rate": 8.631142438536739e-06, "loss": 0.6935, "step": 8615 }, { "epoch": 0.26406767193821257, "grad_norm": 1.824995459264544, "learning_rate": 8.630801225260177e-06, "loss": 0.7682, "step": 8616 }, { "epoch": 0.2640983204609538, "grad_norm": 1.786365469953861, "learning_rate": 8.630459976208403e-06, "loss": 0.651, "step": 8617 }, { "epoch": 0.264128968983695, "grad_norm": 1.735308089589714, "learning_rate": 8.630118691384776e-06, "loss": 0.6465, "step": 8618 }, { "epoch": 0.2641596175064362, "grad_norm": 1.48656396757209, "learning_rate": 8.629777370792663e-06, "loss": 0.6203, "step": 8619 }, { "epoch": 0.2641902660291774, "grad_norm": 1.6771877068475634, "learning_rate": 8.629436014435424e-06, "loss": 0.6814, "step": 8620 }, { "epoch": 0.2642209145519186, "grad_norm": 1.7840792189861252, "learning_rate": 8.629094622316423e-06, "loss": 0.6765, "step": 8621 }, { "epoch": 0.2642515630746598, "grad_norm": 1.6050164592935219, "learning_rate": 8.628753194439024e-06, "loss": 0.7729, "step": 8622 }, { "epoch": 0.264282211597401, "grad_norm": 1.8120205614343547, "learning_rate": 8.628411730806592e-06, "loss": 0.6431, "step": 8623 }, { "epoch": 0.2643128601201422, "grad_norm": 0.9387749541688563, "learning_rate": 8.62807023142249e-06, "loss": 0.4728, "step": 8624 }, { "epoch": 0.2643435086428834, "grad_norm": 1.666973974957228, "learning_rate": 8.627728696290084e-06, "loss": 0.7703, "step": 8625 }, { "epoch": 0.2643741571656246, "grad_norm": 1.7656225959196084, "learning_rate": 8.62738712541274e-06, "loss": 0.6743, "step": 8626 }, { "epoch": 0.26440480568836583, "grad_norm": 1.9218171913473998, "learning_rate": 8.627045518793821e-06, "loss": 0.6326, "step": 8627 }, { "epoch": 0.26443545421110703, "grad_norm": 1.9594603430845936, "learning_rate": 8.626703876436695e-06, "loss": 0.7734, "step": 8628 }, { "epoch": 0.26446610273384824, "grad_norm": 1.7508222900223998, "learning_rate": 8.626362198344728e-06, "loss": 0.6377, "step": 8629 }, { "epoch": 0.26449675125658945, "grad_norm": 1.8652785855234169, "learning_rate": 8.626020484521287e-06, "loss": 0.6522, "step": 8630 }, { "epoch": 0.26452739977933065, "grad_norm": 0.8755726370176181, "learning_rate": 8.625678734969737e-06, "loss": 0.4713, "step": 8631 }, { "epoch": 0.26455804830207186, "grad_norm": 1.7384300646264683, "learning_rate": 8.625336949693448e-06, "loss": 0.7074, "step": 8632 }, { "epoch": 0.26458869682481306, "grad_norm": 1.7286367658662656, "learning_rate": 8.624995128695785e-06, "loss": 0.6834, "step": 8633 }, { "epoch": 0.26461934534755427, "grad_norm": 1.6380641649722913, "learning_rate": 8.62465327198012e-06, "loss": 0.7307, "step": 8634 }, { "epoch": 0.2646499938702955, "grad_norm": 0.8445714116036024, "learning_rate": 8.624311379549817e-06, "loss": 0.4842, "step": 8635 }, { "epoch": 0.2646806423930367, "grad_norm": 1.9160925207542323, "learning_rate": 8.623969451408248e-06, "loss": 0.7201, "step": 8636 }, { "epoch": 0.2647112909157779, "grad_norm": 1.589384189534836, "learning_rate": 8.623627487558779e-06, "loss": 0.6915, "step": 8637 }, { "epoch": 0.26474193943851904, "grad_norm": 1.6238517053010504, "learning_rate": 8.623285488004781e-06, "loss": 0.7698, "step": 8638 }, { "epoch": 0.26477258796126024, "grad_norm": 2.1348458174239124, "learning_rate": 8.622943452749626e-06, "loss": 0.7226, "step": 8639 }, { "epoch": 0.26480323648400145, "grad_norm": 1.4963624399170505, "learning_rate": 8.62260138179668e-06, "loss": 0.672, "step": 8640 }, { "epoch": 0.26483388500674265, "grad_norm": 1.7625629679741361, "learning_rate": 8.622259275149317e-06, "loss": 0.7312, "step": 8641 }, { "epoch": 0.26486453352948386, "grad_norm": 1.8470722944856384, "learning_rate": 8.621917132810906e-06, "loss": 0.8351, "step": 8642 }, { "epoch": 0.26489518205222506, "grad_norm": 1.525427813980741, "learning_rate": 8.621574954784821e-06, "loss": 0.6672, "step": 8643 }, { "epoch": 0.26492583057496627, "grad_norm": 1.9494884455747512, "learning_rate": 8.621232741074429e-06, "loss": 0.6446, "step": 8644 }, { "epoch": 0.2649564790977075, "grad_norm": 0.9152131283971016, "learning_rate": 8.620890491683105e-06, "loss": 0.484, "step": 8645 }, { "epoch": 0.2649871276204487, "grad_norm": 1.8846997660854605, "learning_rate": 8.62054820661422e-06, "loss": 0.5751, "step": 8646 }, { "epoch": 0.2650177761431899, "grad_norm": 2.0195597364020754, "learning_rate": 8.620205885871147e-06, "loss": 0.6746, "step": 8647 }, { "epoch": 0.2650484246659311, "grad_norm": 1.851283331256732, "learning_rate": 8.61986352945726e-06, "loss": 0.6935, "step": 8648 }, { "epoch": 0.2650790731886723, "grad_norm": 1.5992714088645092, "learning_rate": 8.619521137375932e-06, "loss": 0.655, "step": 8649 }, { "epoch": 0.2651097217114135, "grad_norm": 0.8293089559324867, "learning_rate": 8.619178709630536e-06, "loss": 0.4664, "step": 8650 }, { "epoch": 0.2651403702341547, "grad_norm": 1.8060118161618566, "learning_rate": 8.618836246224444e-06, "loss": 0.7753, "step": 8651 }, { "epoch": 0.2651710187568959, "grad_norm": 1.551496028137282, "learning_rate": 8.618493747161034e-06, "loss": 0.6937, "step": 8652 }, { "epoch": 0.2652016672796371, "grad_norm": 1.633968610650129, "learning_rate": 8.618151212443679e-06, "loss": 0.725, "step": 8653 }, { "epoch": 0.2652323158023783, "grad_norm": 1.6434824649280186, "learning_rate": 8.617808642075756e-06, "loss": 0.662, "step": 8654 }, { "epoch": 0.26526296432511953, "grad_norm": 1.6216568402192524, "learning_rate": 8.617466036060638e-06, "loss": 0.7523, "step": 8655 }, { "epoch": 0.26529361284786074, "grad_norm": 1.8879795318547283, "learning_rate": 8.6171233944017e-06, "loss": 0.7796, "step": 8656 }, { "epoch": 0.26532426137060194, "grad_norm": 1.9645896997018875, "learning_rate": 8.61678071710232e-06, "loss": 0.7197, "step": 8657 }, { "epoch": 0.26535490989334315, "grad_norm": 1.5115200021928943, "learning_rate": 8.616438004165876e-06, "loss": 0.6382, "step": 8658 }, { "epoch": 0.26538555841608436, "grad_norm": 0.8165407007079416, "learning_rate": 8.616095255595743e-06, "loss": 0.4917, "step": 8659 }, { "epoch": 0.26541620693882556, "grad_norm": 1.7510209015491358, "learning_rate": 8.615752471395296e-06, "loss": 0.7871, "step": 8660 }, { "epoch": 0.26544685546156677, "grad_norm": 1.6895158689342604, "learning_rate": 8.615409651567916e-06, "loss": 0.6701, "step": 8661 }, { "epoch": 0.26547750398430797, "grad_norm": 1.7983567401700589, "learning_rate": 8.61506679611698e-06, "loss": 0.7456, "step": 8662 }, { "epoch": 0.2655081525070492, "grad_norm": 2.1176556929100383, "learning_rate": 8.614723905045865e-06, "loss": 0.7372, "step": 8663 }, { "epoch": 0.2655388010297904, "grad_norm": 1.746450376565469, "learning_rate": 8.61438097835795e-06, "loss": 0.6258, "step": 8664 }, { "epoch": 0.2655694495525316, "grad_norm": 1.644780870971775, "learning_rate": 8.614038016056617e-06, "loss": 0.6865, "step": 8665 }, { "epoch": 0.2656000980752728, "grad_norm": 1.7643082602720246, "learning_rate": 8.613695018145241e-06, "loss": 0.7539, "step": 8666 }, { "epoch": 0.265630746598014, "grad_norm": 1.6386354344840777, "learning_rate": 8.613351984627204e-06, "loss": 0.7383, "step": 8667 }, { "epoch": 0.2656613951207552, "grad_norm": 1.8890830335630293, "learning_rate": 8.613008915505885e-06, "loss": 0.6986, "step": 8668 }, { "epoch": 0.26569204364349636, "grad_norm": 2.0893913870738667, "learning_rate": 8.612665810784664e-06, "loss": 0.8127, "step": 8669 }, { "epoch": 0.26572269216623756, "grad_norm": 1.7366845220843774, "learning_rate": 8.612322670466924e-06, "loss": 0.6914, "step": 8670 }, { "epoch": 0.26575334068897877, "grad_norm": 1.9786824715859375, "learning_rate": 8.611979494556043e-06, "loss": 0.7321, "step": 8671 }, { "epoch": 0.26578398921172, "grad_norm": 1.7312214232406788, "learning_rate": 8.611636283055405e-06, "loss": 0.7301, "step": 8672 }, { "epoch": 0.2658146377344612, "grad_norm": 1.774504457984491, "learning_rate": 8.61129303596839e-06, "loss": 0.6713, "step": 8673 }, { "epoch": 0.2658452862572024, "grad_norm": 1.5773100521454266, "learning_rate": 8.61094975329838e-06, "loss": 0.6966, "step": 8674 }, { "epoch": 0.2658759347799436, "grad_norm": 1.6135717605976552, "learning_rate": 8.610606435048761e-06, "loss": 0.6777, "step": 8675 }, { "epoch": 0.2659065833026848, "grad_norm": 1.789124008849361, "learning_rate": 8.61026308122291e-06, "loss": 0.677, "step": 8676 }, { "epoch": 0.265937231825426, "grad_norm": 0.8274027847988817, "learning_rate": 8.609919691824213e-06, "loss": 0.4794, "step": 8677 }, { "epoch": 0.2659678803481672, "grad_norm": 1.595278970636013, "learning_rate": 8.609576266856057e-06, "loss": 0.6604, "step": 8678 }, { "epoch": 0.2659985288709084, "grad_norm": 0.8169521104477734, "learning_rate": 8.60923280632182e-06, "loss": 0.5046, "step": 8679 }, { "epoch": 0.2660291773936496, "grad_norm": 1.7354473141717477, "learning_rate": 8.608889310224888e-06, "loss": 0.728, "step": 8680 }, { "epoch": 0.2660598259163908, "grad_norm": 1.7522321130809937, "learning_rate": 8.608545778568648e-06, "loss": 0.6977, "step": 8681 }, { "epoch": 0.26609047443913203, "grad_norm": 1.6454857112745986, "learning_rate": 8.608202211356483e-06, "loss": 0.7116, "step": 8682 }, { "epoch": 0.26612112296187324, "grad_norm": 1.8657117583084988, "learning_rate": 8.607858608591778e-06, "loss": 0.6976, "step": 8683 }, { "epoch": 0.26615177148461444, "grad_norm": 0.7938193911656098, "learning_rate": 8.607514970277917e-06, "loss": 0.4527, "step": 8684 }, { "epoch": 0.26618242000735565, "grad_norm": 1.6742904102412521, "learning_rate": 8.60717129641829e-06, "loss": 0.6772, "step": 8685 }, { "epoch": 0.26621306853009685, "grad_norm": 1.911009310593633, "learning_rate": 8.606827587016281e-06, "loss": 0.6843, "step": 8686 }, { "epoch": 0.26624371705283806, "grad_norm": 0.7739527578099358, "learning_rate": 8.606483842075277e-06, "loss": 0.4756, "step": 8687 }, { "epoch": 0.26627436557557926, "grad_norm": 1.9527738788009537, "learning_rate": 8.606140061598665e-06, "loss": 0.6981, "step": 8688 }, { "epoch": 0.26630501409832047, "grad_norm": 1.858579591416523, "learning_rate": 8.605796245589833e-06, "loss": 0.8354, "step": 8689 }, { "epoch": 0.2663356626210617, "grad_norm": 1.6071797202241935, "learning_rate": 8.605452394052168e-06, "loss": 0.6035, "step": 8690 }, { "epoch": 0.2663663111438029, "grad_norm": 1.6179835993909029, "learning_rate": 8.605108506989057e-06, "loss": 0.7575, "step": 8691 }, { "epoch": 0.2663969596665441, "grad_norm": 1.6124506026901566, "learning_rate": 8.604764584403888e-06, "loss": 0.7217, "step": 8692 }, { "epoch": 0.2664276081892853, "grad_norm": 1.6454464940620979, "learning_rate": 8.604420626300054e-06, "loss": 0.6769, "step": 8693 }, { "epoch": 0.2664582567120265, "grad_norm": 1.766566533379448, "learning_rate": 8.60407663268094e-06, "loss": 0.7142, "step": 8694 }, { "epoch": 0.2664889052347677, "grad_norm": 0.8822864591025693, "learning_rate": 8.603732603549938e-06, "loss": 0.4881, "step": 8695 }, { "epoch": 0.2665195537575089, "grad_norm": 0.8697917007295235, "learning_rate": 8.603388538910435e-06, "loss": 0.4939, "step": 8696 }, { "epoch": 0.2665502022802501, "grad_norm": 1.9066963908619967, "learning_rate": 8.603044438765824e-06, "loss": 0.6668, "step": 8697 }, { "epoch": 0.2665808508029913, "grad_norm": 1.8432523776331224, "learning_rate": 8.602700303119493e-06, "loss": 0.8027, "step": 8698 }, { "epoch": 0.2666114993257325, "grad_norm": 2.15680258943209, "learning_rate": 8.602356131974837e-06, "loss": 0.8376, "step": 8699 }, { "epoch": 0.2666421478484737, "grad_norm": 1.5455186019684908, "learning_rate": 8.602011925335241e-06, "loss": 0.5805, "step": 8700 }, { "epoch": 0.2666727963712149, "grad_norm": 1.523026259566737, "learning_rate": 8.601667683204101e-06, "loss": 0.6664, "step": 8701 }, { "epoch": 0.2667034448939561, "grad_norm": 1.7805022320547734, "learning_rate": 8.601323405584808e-06, "loss": 0.7712, "step": 8702 }, { "epoch": 0.2667340934166973, "grad_norm": 1.9055084070712798, "learning_rate": 8.600979092480755e-06, "loss": 0.7448, "step": 8703 }, { "epoch": 0.2667647419394385, "grad_norm": 1.6975289547039651, "learning_rate": 8.600634743895332e-06, "loss": 0.6765, "step": 8704 }, { "epoch": 0.2667953904621797, "grad_norm": 1.969923984091332, "learning_rate": 8.600290359831935e-06, "loss": 0.7922, "step": 8705 }, { "epoch": 0.2668260389849209, "grad_norm": 1.5861299063925833, "learning_rate": 8.599945940293955e-06, "loss": 0.6673, "step": 8706 }, { "epoch": 0.2668566875076621, "grad_norm": 3.713289160877192, "learning_rate": 8.599601485284787e-06, "loss": 0.6828, "step": 8707 }, { "epoch": 0.2668873360304033, "grad_norm": 1.668275100983845, "learning_rate": 8.599256994807823e-06, "loss": 0.6466, "step": 8708 }, { "epoch": 0.2669179845531445, "grad_norm": 1.6924908916843473, "learning_rate": 8.598912468866461e-06, "loss": 0.7305, "step": 8709 }, { "epoch": 0.26694863307588573, "grad_norm": 1.8782245391592602, "learning_rate": 8.598567907464093e-06, "loss": 0.6944, "step": 8710 }, { "epoch": 0.26697928159862694, "grad_norm": 1.7095011256837775, "learning_rate": 8.598223310604115e-06, "loss": 0.6719, "step": 8711 }, { "epoch": 0.26700993012136814, "grad_norm": 1.505073260728854, "learning_rate": 8.597878678289921e-06, "loss": 0.7596, "step": 8712 }, { "epoch": 0.26704057864410935, "grad_norm": 1.8102268402957717, "learning_rate": 8.597534010524908e-06, "loss": 0.7688, "step": 8713 }, { "epoch": 0.26707122716685056, "grad_norm": 2.06934160318398, "learning_rate": 8.597189307312472e-06, "loss": 0.7341, "step": 8714 }, { "epoch": 0.26710187568959176, "grad_norm": 1.6940441697868125, "learning_rate": 8.59684456865601e-06, "loss": 0.6939, "step": 8715 }, { "epoch": 0.26713252421233297, "grad_norm": 1.6175300217058737, "learning_rate": 8.596499794558918e-06, "loss": 0.6305, "step": 8716 }, { "epoch": 0.2671631727350742, "grad_norm": 2.5128432435213712, "learning_rate": 8.596154985024594e-06, "loss": 0.6908, "step": 8717 }, { "epoch": 0.2671938212578154, "grad_norm": 1.858473104339601, "learning_rate": 8.595810140056433e-06, "loss": 0.7902, "step": 8718 }, { "epoch": 0.2672244697805566, "grad_norm": 1.8217942345950442, "learning_rate": 8.595465259657837e-06, "loss": 0.7146, "step": 8719 }, { "epoch": 0.2672551183032978, "grad_norm": 1.8170056331621591, "learning_rate": 8.5951203438322e-06, "loss": 0.7026, "step": 8720 }, { "epoch": 0.267285766826039, "grad_norm": 1.7971161855864795, "learning_rate": 8.594775392582923e-06, "loss": 0.7112, "step": 8721 }, { "epoch": 0.2673164153487802, "grad_norm": 1.6973349930262709, "learning_rate": 8.594430405913403e-06, "loss": 0.7462, "step": 8722 }, { "epoch": 0.2673470638715214, "grad_norm": 1.7162409879669307, "learning_rate": 8.594085383827043e-06, "loss": 0.6506, "step": 8723 }, { "epoch": 0.2673777123942626, "grad_norm": 1.7690449570594942, "learning_rate": 8.593740326327237e-06, "loss": 0.6539, "step": 8724 }, { "epoch": 0.2674083609170038, "grad_norm": 1.6480247295831991, "learning_rate": 8.59339523341739e-06, "loss": 0.7212, "step": 8725 }, { "epoch": 0.267439009439745, "grad_norm": 1.4888856026171802, "learning_rate": 8.593050105100902e-06, "loss": 0.7486, "step": 8726 }, { "epoch": 0.26746965796248623, "grad_norm": 1.6292571996708247, "learning_rate": 8.59270494138117e-06, "loss": 0.7156, "step": 8727 }, { "epoch": 0.26750030648522743, "grad_norm": 1.7199360692089334, "learning_rate": 8.592359742261598e-06, "loss": 0.7048, "step": 8728 }, { "epoch": 0.26753095500796864, "grad_norm": 1.6018973288835465, "learning_rate": 8.592014507745586e-06, "loss": 0.6541, "step": 8729 }, { "epoch": 0.26756160353070985, "grad_norm": 1.4253419780208052, "learning_rate": 8.591669237836534e-06, "loss": 0.6688, "step": 8730 }, { "epoch": 0.267592252053451, "grad_norm": 1.7014681861946501, "learning_rate": 8.591323932537847e-06, "loss": 0.7267, "step": 8731 }, { "epoch": 0.2676229005761922, "grad_norm": 1.8358003887215353, "learning_rate": 8.590978591852928e-06, "loss": 0.7471, "step": 8732 }, { "epoch": 0.2676535490989334, "grad_norm": 1.320721826278528, "learning_rate": 8.590633215785178e-06, "loss": 0.4969, "step": 8733 }, { "epoch": 0.2676841976216746, "grad_norm": 1.1232089987861138, "learning_rate": 8.590287804337998e-06, "loss": 0.4961, "step": 8734 }, { "epoch": 0.2677148461444158, "grad_norm": 1.7618460501468032, "learning_rate": 8.589942357514796e-06, "loss": 0.6115, "step": 8735 }, { "epoch": 0.267745494667157, "grad_norm": 1.4924463804702695, "learning_rate": 8.589596875318973e-06, "loss": 0.6566, "step": 8736 }, { "epoch": 0.26777614318989823, "grad_norm": 1.7374634892364293, "learning_rate": 8.589251357753932e-06, "loss": 0.6601, "step": 8737 }, { "epoch": 0.26780679171263944, "grad_norm": 1.0740270858689396, "learning_rate": 8.58890580482308e-06, "loss": 0.4631, "step": 8738 }, { "epoch": 0.26783744023538064, "grad_norm": 1.6032944269768523, "learning_rate": 8.58856021652982e-06, "loss": 0.7588, "step": 8739 }, { "epoch": 0.26786808875812185, "grad_norm": 1.7734405442977217, "learning_rate": 8.588214592877559e-06, "loss": 0.6658, "step": 8740 }, { "epoch": 0.26789873728086305, "grad_norm": 1.572963683669443, "learning_rate": 8.587868933869703e-06, "loss": 0.7255, "step": 8741 }, { "epoch": 0.26792938580360426, "grad_norm": 1.9516243882655355, "learning_rate": 8.587523239509653e-06, "loss": 0.7084, "step": 8742 }, { "epoch": 0.26796003432634546, "grad_norm": 1.6532335727556586, "learning_rate": 8.587177509800823e-06, "loss": 0.6793, "step": 8743 }, { "epoch": 0.26799068284908667, "grad_norm": 1.8750026471864316, "learning_rate": 8.586831744746611e-06, "loss": 0.7189, "step": 8744 }, { "epoch": 0.2680213313718279, "grad_norm": 1.7206122433783182, "learning_rate": 8.586485944350432e-06, "loss": 0.6665, "step": 8745 }, { "epoch": 0.2680519798945691, "grad_norm": 1.8370731596159444, "learning_rate": 8.586140108615685e-06, "loss": 0.7208, "step": 8746 }, { "epoch": 0.2680826284173103, "grad_norm": 1.6153792561042657, "learning_rate": 8.585794237545784e-06, "loss": 0.6697, "step": 8747 }, { "epoch": 0.2681132769400515, "grad_norm": 1.8400279429119635, "learning_rate": 8.585448331144135e-06, "loss": 0.682, "step": 8748 }, { "epoch": 0.2681439254627927, "grad_norm": 1.8547867946154062, "learning_rate": 8.585102389414147e-06, "loss": 0.7899, "step": 8749 }, { "epoch": 0.2681745739855339, "grad_norm": 1.4871101763981815, "learning_rate": 8.584756412359228e-06, "loss": 0.5469, "step": 8750 }, { "epoch": 0.2682052225082751, "grad_norm": 1.6563655681084601, "learning_rate": 8.584410399982786e-06, "loss": 0.7681, "step": 8751 }, { "epoch": 0.2682358710310163, "grad_norm": 1.86225570089154, "learning_rate": 8.58406435228823e-06, "loss": 0.7852, "step": 8752 }, { "epoch": 0.2682665195537575, "grad_norm": 1.9806064866236888, "learning_rate": 8.583718269278972e-06, "loss": 0.7421, "step": 8753 }, { "epoch": 0.2682971680764987, "grad_norm": 1.644826635402087, "learning_rate": 8.58337215095842e-06, "loss": 0.6329, "step": 8754 }, { "epoch": 0.26832781659923993, "grad_norm": 1.141603180759785, "learning_rate": 8.583025997329988e-06, "loss": 0.4794, "step": 8755 }, { "epoch": 0.26835846512198114, "grad_norm": 1.8326835326512092, "learning_rate": 8.58267980839708e-06, "loss": 0.645, "step": 8756 }, { "epoch": 0.26838911364472234, "grad_norm": 1.0332687619404066, "learning_rate": 8.582333584163116e-06, "loss": 0.493, "step": 8757 }, { "epoch": 0.26841976216746355, "grad_norm": 1.6123099677840278, "learning_rate": 8.5819873246315e-06, "loss": 0.6666, "step": 8758 }, { "epoch": 0.26845041069020475, "grad_norm": 2.436429695680043, "learning_rate": 8.581641029805646e-06, "loss": 0.7028, "step": 8759 }, { "epoch": 0.26848105921294596, "grad_norm": 1.8376748287466984, "learning_rate": 8.581294699688966e-06, "loss": 0.7168, "step": 8760 }, { "epoch": 0.26851170773568717, "grad_norm": 1.8180764342602542, "learning_rate": 8.580948334284875e-06, "loss": 0.7731, "step": 8761 }, { "epoch": 0.2685423562584283, "grad_norm": 1.7561515532006529, "learning_rate": 8.580601933596784e-06, "loss": 0.8169, "step": 8762 }, { "epoch": 0.2685730047811695, "grad_norm": 0.9652373599390358, "learning_rate": 8.580255497628104e-06, "loss": 0.496, "step": 8763 }, { "epoch": 0.2686036533039107, "grad_norm": 1.7487489295866459, "learning_rate": 8.579909026382251e-06, "loss": 0.7424, "step": 8764 }, { "epoch": 0.26863430182665193, "grad_norm": 2.0382255839352483, "learning_rate": 8.57956251986264e-06, "loss": 0.7454, "step": 8765 }, { "epoch": 0.26866495034939314, "grad_norm": 1.6346101204825385, "learning_rate": 8.579215978072683e-06, "loss": 0.6789, "step": 8766 }, { "epoch": 0.26869559887213434, "grad_norm": 1.6751747204179404, "learning_rate": 8.578869401015794e-06, "loss": 0.7012, "step": 8767 }, { "epoch": 0.26872624739487555, "grad_norm": 1.5956455891830115, "learning_rate": 8.57852278869539e-06, "loss": 0.6622, "step": 8768 }, { "epoch": 0.26875689591761676, "grad_norm": 1.8650906155635447, "learning_rate": 8.578176141114886e-06, "loss": 0.749, "step": 8769 }, { "epoch": 0.26878754444035796, "grad_norm": 1.6435889894837963, "learning_rate": 8.577829458277695e-06, "loss": 0.7131, "step": 8770 }, { "epoch": 0.26881819296309917, "grad_norm": 1.7883792680597725, "learning_rate": 8.577482740187237e-06, "loss": 0.6572, "step": 8771 }, { "epoch": 0.2688488414858404, "grad_norm": 2.020531464049706, "learning_rate": 8.577135986846925e-06, "loss": 0.8391, "step": 8772 }, { "epoch": 0.2688794900085816, "grad_norm": 1.7276512402995603, "learning_rate": 8.576789198260178e-06, "loss": 0.6852, "step": 8773 }, { "epoch": 0.2689101385313228, "grad_norm": 1.838240869661493, "learning_rate": 8.57644237443041e-06, "loss": 0.7762, "step": 8774 }, { "epoch": 0.268940787054064, "grad_norm": 1.7559888301722884, "learning_rate": 8.576095515361043e-06, "loss": 0.6904, "step": 8775 }, { "epoch": 0.2689714355768052, "grad_norm": 0.8852552122393474, "learning_rate": 8.575748621055488e-06, "loss": 0.4567, "step": 8776 }, { "epoch": 0.2690020840995464, "grad_norm": 1.6791018310998183, "learning_rate": 8.57540169151717e-06, "loss": 0.7316, "step": 8777 }, { "epoch": 0.2690327326222876, "grad_norm": 1.7977691022361117, "learning_rate": 8.575054726749503e-06, "loss": 0.7474, "step": 8778 }, { "epoch": 0.2690633811450288, "grad_norm": 1.8227825511027813, "learning_rate": 8.574707726755909e-06, "loss": 0.6689, "step": 8779 }, { "epoch": 0.26909402966777, "grad_norm": 0.8026576162447174, "learning_rate": 8.574360691539803e-06, "loss": 0.4886, "step": 8780 }, { "epoch": 0.2691246781905112, "grad_norm": 1.9800526044573052, "learning_rate": 8.574013621104607e-06, "loss": 0.8086, "step": 8781 }, { "epoch": 0.26915532671325243, "grad_norm": 1.5727481261544738, "learning_rate": 8.57366651545374e-06, "loss": 0.6319, "step": 8782 }, { "epoch": 0.26918597523599364, "grad_norm": 1.5475157836237405, "learning_rate": 8.573319374590622e-06, "loss": 0.6563, "step": 8783 }, { "epoch": 0.26921662375873484, "grad_norm": 1.8757964433062266, "learning_rate": 8.572972198518676e-06, "loss": 0.7387, "step": 8784 }, { "epoch": 0.26924727228147605, "grad_norm": 1.6888847295916019, "learning_rate": 8.57262498724132e-06, "loss": 0.6385, "step": 8785 }, { "epoch": 0.26927792080421725, "grad_norm": 1.6035222690608861, "learning_rate": 8.572277740761976e-06, "loss": 0.7591, "step": 8786 }, { "epoch": 0.26930856932695846, "grad_norm": 0.8806949347125431, "learning_rate": 8.571930459084065e-06, "loss": 0.4625, "step": 8787 }, { "epoch": 0.26933921784969966, "grad_norm": 0.8571734589138414, "learning_rate": 8.571583142211009e-06, "loss": 0.4713, "step": 8788 }, { "epoch": 0.26936986637244087, "grad_norm": 0.7799710080341319, "learning_rate": 8.57123579014623e-06, "loss": 0.4911, "step": 8789 }, { "epoch": 0.2694005148951821, "grad_norm": 1.6173918448308329, "learning_rate": 8.570888402893154e-06, "loss": 0.7059, "step": 8790 }, { "epoch": 0.2694311634179233, "grad_norm": 1.681891930972527, "learning_rate": 8.570540980455197e-06, "loss": 0.7984, "step": 8791 }, { "epoch": 0.2694618119406645, "grad_norm": 1.723225237658459, "learning_rate": 8.570193522835788e-06, "loss": 0.7825, "step": 8792 }, { "epoch": 0.26949246046340564, "grad_norm": 1.8184191601826825, "learning_rate": 8.56984603003835e-06, "loss": 0.7387, "step": 8793 }, { "epoch": 0.26952310898614684, "grad_norm": 1.571642952603127, "learning_rate": 8.569498502066302e-06, "loss": 0.6356, "step": 8794 }, { "epoch": 0.26955375750888805, "grad_norm": 1.9892305421920649, "learning_rate": 8.569150938923077e-06, "loss": 0.7463, "step": 8795 }, { "epoch": 0.26958440603162925, "grad_norm": 1.0545336677901505, "learning_rate": 8.56880334061209e-06, "loss": 0.4869, "step": 8796 }, { "epoch": 0.26961505455437046, "grad_norm": 1.8461747928576469, "learning_rate": 8.568455707136774e-06, "loss": 0.7302, "step": 8797 }, { "epoch": 0.26964570307711166, "grad_norm": 1.5264187085312373, "learning_rate": 8.568108038500548e-06, "loss": 0.5361, "step": 8798 }, { "epoch": 0.26967635159985287, "grad_norm": 0.9080871180551153, "learning_rate": 8.567760334706843e-06, "loss": 0.4659, "step": 8799 }, { "epoch": 0.2697070001225941, "grad_norm": 1.742546395798348, "learning_rate": 8.56741259575908e-06, "loss": 0.651, "step": 8800 }, { "epoch": 0.2697376486453353, "grad_norm": 1.6841933877132682, "learning_rate": 8.56706482166069e-06, "loss": 0.75, "step": 8801 }, { "epoch": 0.2697682971680765, "grad_norm": 2.1895079318585897, "learning_rate": 8.566717012415096e-06, "loss": 0.7222, "step": 8802 }, { "epoch": 0.2697989456908177, "grad_norm": 1.8003434421449969, "learning_rate": 8.56636916802573e-06, "loss": 0.6835, "step": 8803 }, { "epoch": 0.2698295942135589, "grad_norm": 2.2701199603501294, "learning_rate": 8.566021288496013e-06, "loss": 0.7551, "step": 8804 }, { "epoch": 0.2698602427363001, "grad_norm": 1.509687586507239, "learning_rate": 8.565673373829375e-06, "loss": 0.6454, "step": 8805 }, { "epoch": 0.2698908912590413, "grad_norm": 1.6228588798691859, "learning_rate": 8.565325424029248e-06, "loss": 0.7562, "step": 8806 }, { "epoch": 0.2699215397817825, "grad_norm": 1.8771108175093567, "learning_rate": 8.564977439099056e-06, "loss": 0.6755, "step": 8807 }, { "epoch": 0.2699521883045237, "grad_norm": 1.8049972962525018, "learning_rate": 8.564629419042227e-06, "loss": 0.6845, "step": 8808 }, { "epoch": 0.2699828368272649, "grad_norm": 1.752035913880027, "learning_rate": 8.564281363862196e-06, "loss": 0.6821, "step": 8809 }, { "epoch": 0.27001348535000613, "grad_norm": 1.7504595618974585, "learning_rate": 8.563933273562387e-06, "loss": 0.7217, "step": 8810 }, { "epoch": 0.27004413387274734, "grad_norm": 1.6389920254523476, "learning_rate": 8.563585148146231e-06, "loss": 0.6802, "step": 8811 }, { "epoch": 0.27007478239548854, "grad_norm": 1.5437333963427688, "learning_rate": 8.56323698761716e-06, "loss": 0.6462, "step": 8812 }, { "epoch": 0.27010543091822975, "grad_norm": 1.8152390740546618, "learning_rate": 8.562888791978604e-06, "loss": 0.6856, "step": 8813 }, { "epoch": 0.27013607944097096, "grad_norm": 2.417189416125779, "learning_rate": 8.562540561233991e-06, "loss": 0.6076, "step": 8814 }, { "epoch": 0.27016672796371216, "grad_norm": 1.6823462805788256, "learning_rate": 8.562192295386756e-06, "loss": 0.7032, "step": 8815 }, { "epoch": 0.27019737648645337, "grad_norm": 1.2739800664773668, "learning_rate": 8.561843994440327e-06, "loss": 0.505, "step": 8816 }, { "epoch": 0.2702280250091946, "grad_norm": 1.6269708306402013, "learning_rate": 8.56149565839814e-06, "loss": 0.6913, "step": 8817 }, { "epoch": 0.2702586735319358, "grad_norm": 1.7961490270275995, "learning_rate": 8.561147287263623e-06, "loss": 0.7438, "step": 8818 }, { "epoch": 0.270289322054677, "grad_norm": 1.9297664765704352, "learning_rate": 8.560798881040211e-06, "loss": 0.7373, "step": 8819 }, { "epoch": 0.2703199705774182, "grad_norm": 0.797588350021819, "learning_rate": 8.560450439731337e-06, "loss": 0.4941, "step": 8820 }, { "epoch": 0.2703506191001594, "grad_norm": 1.702505793834867, "learning_rate": 8.560101963340434e-06, "loss": 0.6233, "step": 8821 }, { "epoch": 0.2703812676229006, "grad_norm": 0.8239591946116277, "learning_rate": 8.559753451870936e-06, "loss": 0.4798, "step": 8822 }, { "epoch": 0.2704119161456418, "grad_norm": 1.6828080354864063, "learning_rate": 8.559404905326275e-06, "loss": 0.8077, "step": 8823 }, { "epoch": 0.27044256466838296, "grad_norm": 1.708635310700765, "learning_rate": 8.559056323709889e-06, "loss": 0.6692, "step": 8824 }, { "epoch": 0.27047321319112416, "grad_norm": 1.8926821279849504, "learning_rate": 8.558707707025209e-06, "loss": 0.6339, "step": 8825 }, { "epoch": 0.27050386171386537, "grad_norm": 1.537903565782643, "learning_rate": 8.558359055275671e-06, "loss": 0.6099, "step": 8826 }, { "epoch": 0.2705345102366066, "grad_norm": 1.8023575685731763, "learning_rate": 8.558010368464711e-06, "loss": 0.7335, "step": 8827 }, { "epoch": 0.2705651587593478, "grad_norm": 1.6308445552780313, "learning_rate": 8.557661646595766e-06, "loss": 0.6307, "step": 8828 }, { "epoch": 0.270595807282089, "grad_norm": 1.7514276679073437, "learning_rate": 8.557312889672267e-06, "loss": 0.766, "step": 8829 }, { "epoch": 0.2706264558048302, "grad_norm": 1.836596959831608, "learning_rate": 8.556964097697657e-06, "loss": 0.8112, "step": 8830 }, { "epoch": 0.2706571043275714, "grad_norm": 1.5905413361891771, "learning_rate": 8.556615270675368e-06, "loss": 0.7363, "step": 8831 }, { "epoch": 0.2706877528503126, "grad_norm": 0.8215948076323152, "learning_rate": 8.55626640860884e-06, "loss": 0.4553, "step": 8832 }, { "epoch": 0.2707184013730538, "grad_norm": 0.8256053687122133, "learning_rate": 8.555917511501508e-06, "loss": 0.4495, "step": 8833 }, { "epoch": 0.270749049895795, "grad_norm": 1.7845342372534814, "learning_rate": 8.555568579356813e-06, "loss": 0.7316, "step": 8834 }, { "epoch": 0.2707796984185362, "grad_norm": 1.7255267727495989, "learning_rate": 8.55521961217819e-06, "loss": 0.6984, "step": 8835 }, { "epoch": 0.2708103469412774, "grad_norm": 1.9782240811135685, "learning_rate": 8.554870609969077e-06, "loss": 0.8353, "step": 8836 }, { "epoch": 0.27084099546401863, "grad_norm": 0.8377351461492188, "learning_rate": 8.554521572732916e-06, "loss": 0.4777, "step": 8837 }, { "epoch": 0.27087164398675984, "grad_norm": 1.732214562776955, "learning_rate": 8.554172500473144e-06, "loss": 0.7866, "step": 8838 }, { "epoch": 0.27090229250950104, "grad_norm": 1.5656821695344603, "learning_rate": 8.553823393193201e-06, "loss": 0.7611, "step": 8839 }, { "epoch": 0.27093294103224225, "grad_norm": 1.7137042851275721, "learning_rate": 8.553474250896527e-06, "loss": 0.8427, "step": 8840 }, { "epoch": 0.27096358955498345, "grad_norm": 1.5833062539422573, "learning_rate": 8.553125073586561e-06, "loss": 0.7037, "step": 8841 }, { "epoch": 0.27099423807772466, "grad_norm": 1.6003667913392228, "learning_rate": 8.552775861266745e-06, "loss": 0.6149, "step": 8842 }, { "epoch": 0.27102488660046586, "grad_norm": 1.6526231020190096, "learning_rate": 8.552426613940521e-06, "loss": 0.5874, "step": 8843 }, { "epoch": 0.27105553512320707, "grad_norm": 2.0122599096017995, "learning_rate": 8.552077331611326e-06, "loss": 0.7873, "step": 8844 }, { "epoch": 0.2710861836459483, "grad_norm": 1.6327683844718004, "learning_rate": 8.551728014282607e-06, "loss": 0.7197, "step": 8845 }, { "epoch": 0.2711168321686895, "grad_norm": 2.0151087052282954, "learning_rate": 8.5513786619578e-06, "loss": 0.5964, "step": 8846 }, { "epoch": 0.2711474806914307, "grad_norm": 1.581167504382569, "learning_rate": 8.551029274640353e-06, "loss": 0.6691, "step": 8847 }, { "epoch": 0.2711781292141719, "grad_norm": 1.9235916280997536, "learning_rate": 8.550679852333705e-06, "loss": 0.7557, "step": 8848 }, { "epoch": 0.2712087777369131, "grad_norm": 1.7804378036148576, "learning_rate": 8.5503303950413e-06, "loss": 0.686, "step": 8849 }, { "epoch": 0.2712394262596543, "grad_norm": 1.5750640113672645, "learning_rate": 8.549980902766582e-06, "loss": 0.6396, "step": 8850 }, { "epoch": 0.2712700747823955, "grad_norm": 1.6340863689551708, "learning_rate": 8.549631375512994e-06, "loss": 0.6892, "step": 8851 }, { "epoch": 0.2713007233051367, "grad_norm": 1.587924546939549, "learning_rate": 8.549281813283978e-06, "loss": 0.7135, "step": 8852 }, { "epoch": 0.2713313718278779, "grad_norm": 1.7522095417573245, "learning_rate": 8.548932216082982e-06, "loss": 0.6564, "step": 8853 }, { "epoch": 0.2713620203506191, "grad_norm": 1.6114102508716326, "learning_rate": 8.548582583913447e-06, "loss": 0.6672, "step": 8854 }, { "epoch": 0.2713926688733603, "grad_norm": 1.6142742732045783, "learning_rate": 8.54823291677882e-06, "loss": 0.7644, "step": 8855 }, { "epoch": 0.2714233173961015, "grad_norm": 1.8080107685066178, "learning_rate": 8.547883214682549e-06, "loss": 0.7918, "step": 8856 }, { "epoch": 0.2714539659188427, "grad_norm": 2.3016588914605642, "learning_rate": 8.547533477628073e-06, "loss": 0.7264, "step": 8857 }, { "epoch": 0.2714846144415839, "grad_norm": 1.7968703671633262, "learning_rate": 8.547183705618845e-06, "loss": 0.6844, "step": 8858 }, { "epoch": 0.2715152629643251, "grad_norm": 0.9502573799364203, "learning_rate": 8.546833898658309e-06, "loss": 0.5001, "step": 8859 }, { "epoch": 0.2715459114870663, "grad_norm": 1.747553127168434, "learning_rate": 8.546484056749908e-06, "loss": 0.669, "step": 8860 }, { "epoch": 0.2715765600098075, "grad_norm": 1.878491219669752, "learning_rate": 8.546134179897095e-06, "loss": 0.7148, "step": 8861 }, { "epoch": 0.2716072085325487, "grad_norm": 1.839996355946716, "learning_rate": 8.545784268103312e-06, "loss": 0.8137, "step": 8862 }, { "epoch": 0.2716378570552899, "grad_norm": 1.5920525220525372, "learning_rate": 8.54543432137201e-06, "loss": 0.6533, "step": 8863 }, { "epoch": 0.2716685055780311, "grad_norm": 1.960347053118997, "learning_rate": 8.545084339706638e-06, "loss": 0.6712, "step": 8864 }, { "epoch": 0.27169915410077233, "grad_norm": 1.8208342203903862, "learning_rate": 8.544734323110641e-06, "loss": 0.7195, "step": 8865 }, { "epoch": 0.27172980262351354, "grad_norm": 1.8260524804415383, "learning_rate": 8.54438427158747e-06, "loss": 0.6593, "step": 8866 }, { "epoch": 0.27176045114625474, "grad_norm": 1.865184219657092, "learning_rate": 8.544034185140577e-06, "loss": 0.6733, "step": 8867 }, { "epoch": 0.27179109966899595, "grad_norm": 1.4938528022711894, "learning_rate": 8.543684063773406e-06, "loss": 0.6733, "step": 8868 }, { "epoch": 0.27182174819173716, "grad_norm": 1.7253669793915463, "learning_rate": 8.54333390748941e-06, "loss": 0.6589, "step": 8869 }, { "epoch": 0.27185239671447836, "grad_norm": 1.9071890434874221, "learning_rate": 8.542983716292037e-06, "loss": 0.6431, "step": 8870 }, { "epoch": 0.27188304523721957, "grad_norm": 1.5529377671858078, "learning_rate": 8.54263349018474e-06, "loss": 0.7034, "step": 8871 }, { "epoch": 0.2719136937599608, "grad_norm": 1.6976990889160106, "learning_rate": 8.542283229170967e-06, "loss": 0.7746, "step": 8872 }, { "epoch": 0.271944342282702, "grad_norm": 1.6340679277043382, "learning_rate": 8.541932933254174e-06, "loss": 0.7613, "step": 8873 }, { "epoch": 0.2719749908054432, "grad_norm": 1.8749306047483634, "learning_rate": 8.541582602437808e-06, "loss": 0.6513, "step": 8874 }, { "epoch": 0.2720056393281844, "grad_norm": 1.828331294959217, "learning_rate": 8.54123223672532e-06, "loss": 0.6765, "step": 8875 }, { "epoch": 0.2720362878509256, "grad_norm": 1.5732171066747733, "learning_rate": 8.540881836120169e-06, "loss": 0.7324, "step": 8876 }, { "epoch": 0.2720669363736668, "grad_norm": 1.8469949575776654, "learning_rate": 8.540531400625802e-06, "loss": 0.7053, "step": 8877 }, { "epoch": 0.272097584896408, "grad_norm": 1.693699467740712, "learning_rate": 8.540180930245671e-06, "loss": 0.7701, "step": 8878 }, { "epoch": 0.2721282334191492, "grad_norm": 1.9770049568913408, "learning_rate": 8.539830424983236e-06, "loss": 0.7546, "step": 8879 }, { "epoch": 0.2721588819418904, "grad_norm": 0.9851344379416642, "learning_rate": 8.539479884841941e-06, "loss": 0.4883, "step": 8880 }, { "epoch": 0.2721895304646316, "grad_norm": 1.7765378692131473, "learning_rate": 8.539129309825249e-06, "loss": 0.6598, "step": 8881 }, { "epoch": 0.27222017898737283, "grad_norm": 1.8034482779842749, "learning_rate": 8.538778699936608e-06, "loss": 0.777, "step": 8882 }, { "epoch": 0.27225082751011404, "grad_norm": 1.8205186745584194, "learning_rate": 8.538428055179476e-06, "loss": 0.6712, "step": 8883 }, { "epoch": 0.27228147603285524, "grad_norm": 0.7655008369368597, "learning_rate": 8.538077375557308e-06, "loss": 0.4892, "step": 8884 }, { "epoch": 0.27231212455559645, "grad_norm": 1.7341538461113861, "learning_rate": 8.537726661073556e-06, "loss": 0.7282, "step": 8885 }, { "epoch": 0.2723427730783376, "grad_norm": 1.6531500423403311, "learning_rate": 8.53737591173168e-06, "loss": 0.6194, "step": 8886 }, { "epoch": 0.2723734216010788, "grad_norm": 1.7530550224811159, "learning_rate": 8.537025127535132e-06, "loss": 0.7143, "step": 8887 }, { "epoch": 0.27240407012382, "grad_norm": 1.6613968329748647, "learning_rate": 8.536674308487373e-06, "loss": 0.6828, "step": 8888 }, { "epoch": 0.2724347186465612, "grad_norm": 1.5816561529195963, "learning_rate": 8.536323454591855e-06, "loss": 0.6963, "step": 8889 }, { "epoch": 0.2724653671693024, "grad_norm": 0.845816475943142, "learning_rate": 8.535972565852038e-06, "loss": 0.4704, "step": 8890 }, { "epoch": 0.2724960156920436, "grad_norm": 1.655668603845714, "learning_rate": 8.53562164227138e-06, "loss": 0.6416, "step": 8891 }, { "epoch": 0.27252666421478483, "grad_norm": 1.9536905223372159, "learning_rate": 8.535270683853336e-06, "loss": 0.8388, "step": 8892 }, { "epoch": 0.27255731273752604, "grad_norm": 0.819287426146296, "learning_rate": 8.534919690601365e-06, "loss": 0.4648, "step": 8893 }, { "epoch": 0.27258796126026724, "grad_norm": 0.7943848154411493, "learning_rate": 8.534568662518926e-06, "loss": 0.4819, "step": 8894 }, { "epoch": 0.27261860978300845, "grad_norm": 1.5684964159105819, "learning_rate": 8.534217599609479e-06, "loss": 0.7038, "step": 8895 }, { "epoch": 0.27264925830574965, "grad_norm": 1.6433777458260526, "learning_rate": 8.53386650187648e-06, "loss": 0.8252, "step": 8896 }, { "epoch": 0.27267990682849086, "grad_norm": 1.6870324069431173, "learning_rate": 8.53351536932339e-06, "loss": 0.7796, "step": 8897 }, { "epoch": 0.27271055535123206, "grad_norm": 0.8222315621039507, "learning_rate": 8.533164201953673e-06, "loss": 0.4893, "step": 8898 }, { "epoch": 0.27274120387397327, "grad_norm": 1.557609537116572, "learning_rate": 8.53281299977078e-06, "loss": 0.6769, "step": 8899 }, { "epoch": 0.2727718523967145, "grad_norm": 1.7644252961963303, "learning_rate": 8.532461762778179e-06, "loss": 0.6731, "step": 8900 }, { "epoch": 0.2728025009194557, "grad_norm": 1.8459054845479064, "learning_rate": 8.532110490979327e-06, "loss": 0.6781, "step": 8901 }, { "epoch": 0.2728331494421969, "grad_norm": 1.6439616329408109, "learning_rate": 8.531759184377688e-06, "loss": 0.7281, "step": 8902 }, { "epoch": 0.2728637979649381, "grad_norm": 1.6248513536698928, "learning_rate": 8.531407842976722e-06, "loss": 0.7429, "step": 8903 }, { "epoch": 0.2728944464876793, "grad_norm": 1.650080998850224, "learning_rate": 8.53105646677989e-06, "loss": 0.6032, "step": 8904 }, { "epoch": 0.2729250950104205, "grad_norm": 1.7884122795745152, "learning_rate": 8.530705055790655e-06, "loss": 0.7006, "step": 8905 }, { "epoch": 0.2729557435331617, "grad_norm": 1.7222333266935599, "learning_rate": 8.530353610012482e-06, "loss": 0.5788, "step": 8906 }, { "epoch": 0.2729863920559029, "grad_norm": 1.7274635626853594, "learning_rate": 8.530002129448828e-06, "loss": 0.7433, "step": 8907 }, { "epoch": 0.2730170405786441, "grad_norm": 0.9174566285577179, "learning_rate": 8.529650614103163e-06, "loss": 0.4814, "step": 8908 }, { "epoch": 0.2730476891013853, "grad_norm": 1.7612035838864213, "learning_rate": 8.529299063978947e-06, "loss": 0.7262, "step": 8909 }, { "epoch": 0.27307833762412653, "grad_norm": 1.6961099201796734, "learning_rate": 8.528947479079644e-06, "loss": 0.7318, "step": 8910 }, { "epoch": 0.27310898614686774, "grad_norm": 1.7834245854657134, "learning_rate": 8.528595859408718e-06, "loss": 0.7455, "step": 8911 }, { "epoch": 0.27313963466960894, "grad_norm": 2.1512413150559073, "learning_rate": 8.528244204969633e-06, "loss": 0.6922, "step": 8912 }, { "epoch": 0.27317028319235015, "grad_norm": 2.127428462245573, "learning_rate": 8.527892515765858e-06, "loss": 0.732, "step": 8913 }, { "epoch": 0.27320093171509136, "grad_norm": 1.8548303292643433, "learning_rate": 8.527540791800853e-06, "loss": 0.6978, "step": 8914 }, { "epoch": 0.27323158023783256, "grad_norm": 0.8204328525789767, "learning_rate": 8.527189033078087e-06, "loss": 0.4836, "step": 8915 }, { "epoch": 0.27326222876057377, "grad_norm": 1.820817757620297, "learning_rate": 8.526837239601025e-06, "loss": 0.8371, "step": 8916 }, { "epoch": 0.2732928772833149, "grad_norm": 1.8019718578149675, "learning_rate": 8.526485411373133e-06, "loss": 0.6926, "step": 8917 }, { "epoch": 0.2733235258060561, "grad_norm": 2.3784679658157786, "learning_rate": 8.52613354839788e-06, "loss": 0.6576, "step": 8918 }, { "epoch": 0.27335417432879733, "grad_norm": 1.9373370350258148, "learning_rate": 8.525781650678728e-06, "loss": 0.8002, "step": 8919 }, { "epoch": 0.27338482285153853, "grad_norm": 1.921617007625586, "learning_rate": 8.525429718219149e-06, "loss": 0.7306, "step": 8920 }, { "epoch": 0.27341547137427974, "grad_norm": 0.7994798709610614, "learning_rate": 8.525077751022608e-06, "loss": 0.4801, "step": 8921 }, { "epoch": 0.27344611989702095, "grad_norm": 1.7509254672734307, "learning_rate": 8.524725749092576e-06, "loss": 0.7677, "step": 8922 }, { "epoch": 0.27347676841976215, "grad_norm": 1.6811401727977147, "learning_rate": 8.524373712432516e-06, "loss": 0.6405, "step": 8923 }, { "epoch": 0.27350741694250336, "grad_norm": 1.4584699144324276, "learning_rate": 8.5240216410459e-06, "loss": 0.6635, "step": 8924 }, { "epoch": 0.27353806546524456, "grad_norm": 1.8019179614522824, "learning_rate": 8.5236695349362e-06, "loss": 0.7513, "step": 8925 }, { "epoch": 0.27356871398798577, "grad_norm": 1.5695373516084747, "learning_rate": 8.523317394106883e-06, "loss": 0.6253, "step": 8926 }, { "epoch": 0.273599362510727, "grad_norm": 2.155425098662471, "learning_rate": 8.522965218561416e-06, "loss": 0.5809, "step": 8927 }, { "epoch": 0.2736300110334682, "grad_norm": 0.8081088717412701, "learning_rate": 8.522613008303272e-06, "loss": 0.4873, "step": 8928 }, { "epoch": 0.2736606595562094, "grad_norm": 1.6895120114169198, "learning_rate": 8.522260763335921e-06, "loss": 0.6866, "step": 8929 }, { "epoch": 0.2736913080789506, "grad_norm": 1.6264514369482441, "learning_rate": 8.521908483662832e-06, "loss": 0.7698, "step": 8930 }, { "epoch": 0.2737219566016918, "grad_norm": 0.7902895570415639, "learning_rate": 8.52155616928748e-06, "loss": 0.4879, "step": 8931 }, { "epoch": 0.273752605124433, "grad_norm": 1.7762001771047475, "learning_rate": 8.52120382021333e-06, "loss": 0.6542, "step": 8932 }, { "epoch": 0.2737832536471742, "grad_norm": 1.7557327949000252, "learning_rate": 8.520851436443863e-06, "loss": 0.7257, "step": 8933 }, { "epoch": 0.2738139021699154, "grad_norm": 1.7687220370658254, "learning_rate": 8.520499017982543e-06, "loss": 0.6754, "step": 8934 }, { "epoch": 0.2738445506926566, "grad_norm": 1.483019749862212, "learning_rate": 8.520146564832846e-06, "loss": 0.6156, "step": 8935 }, { "epoch": 0.2738751992153978, "grad_norm": 1.7981788144519402, "learning_rate": 8.519794076998244e-06, "loss": 0.8156, "step": 8936 }, { "epoch": 0.27390584773813903, "grad_norm": 1.6267236341131455, "learning_rate": 8.519441554482211e-06, "loss": 0.6319, "step": 8937 }, { "epoch": 0.27393649626088024, "grad_norm": 1.6051061053232467, "learning_rate": 8.51908899728822e-06, "loss": 0.6618, "step": 8938 }, { "epoch": 0.27396714478362144, "grad_norm": 1.7011268778953512, "learning_rate": 8.518736405419742e-06, "loss": 0.6642, "step": 8939 }, { "epoch": 0.27399779330636265, "grad_norm": 1.4798392818504345, "learning_rate": 8.518383778880258e-06, "loss": 0.6417, "step": 8940 }, { "epoch": 0.27402844182910385, "grad_norm": 1.768032613524761, "learning_rate": 8.518031117673236e-06, "loss": 0.6395, "step": 8941 }, { "epoch": 0.27405909035184506, "grad_norm": 0.9237556735023857, "learning_rate": 8.517678421802153e-06, "loss": 0.4872, "step": 8942 }, { "epoch": 0.27408973887458626, "grad_norm": 1.7171307666490894, "learning_rate": 8.517325691270485e-06, "loss": 0.6917, "step": 8943 }, { "epoch": 0.27412038739732747, "grad_norm": 1.68318631367534, "learning_rate": 8.516972926081708e-06, "loss": 0.7705, "step": 8944 }, { "epoch": 0.2741510359200687, "grad_norm": 1.518627134125846, "learning_rate": 8.516620126239297e-06, "loss": 0.6904, "step": 8945 }, { "epoch": 0.2741816844428099, "grad_norm": 1.7948271330717895, "learning_rate": 8.516267291746727e-06, "loss": 0.6212, "step": 8946 }, { "epoch": 0.2742123329655511, "grad_norm": 1.5842818686450302, "learning_rate": 8.515914422607476e-06, "loss": 0.6174, "step": 8947 }, { "epoch": 0.27424298148829224, "grad_norm": 1.5997366628355438, "learning_rate": 8.51556151882502e-06, "loss": 0.696, "step": 8948 }, { "epoch": 0.27427363001103344, "grad_norm": 0.83702692048584, "learning_rate": 8.515208580402838e-06, "loss": 0.4717, "step": 8949 }, { "epoch": 0.27430427853377465, "grad_norm": 1.671521389339352, "learning_rate": 8.514855607344406e-06, "loss": 0.6954, "step": 8950 }, { "epoch": 0.27433492705651585, "grad_norm": 1.642686804106307, "learning_rate": 8.514502599653202e-06, "loss": 0.6524, "step": 8951 }, { "epoch": 0.27436557557925706, "grad_norm": 1.930443683722303, "learning_rate": 8.514149557332705e-06, "loss": 0.5977, "step": 8952 }, { "epoch": 0.27439622410199827, "grad_norm": 1.8588711483918727, "learning_rate": 8.513796480386393e-06, "loss": 0.691, "step": 8953 }, { "epoch": 0.27442687262473947, "grad_norm": 1.52118169150677, "learning_rate": 8.513443368817745e-06, "loss": 0.5949, "step": 8954 }, { "epoch": 0.2744575211474807, "grad_norm": 2.0404655192331855, "learning_rate": 8.513090222630241e-06, "loss": 0.6848, "step": 8955 }, { "epoch": 0.2744881696702219, "grad_norm": 1.6432140297581899, "learning_rate": 8.51273704182736e-06, "loss": 0.715, "step": 8956 }, { "epoch": 0.2745188181929631, "grad_norm": 1.8016682472158931, "learning_rate": 8.512383826412583e-06, "loss": 0.7759, "step": 8957 }, { "epoch": 0.2745494667157043, "grad_norm": 1.683598982792562, "learning_rate": 8.512030576389388e-06, "loss": 0.6959, "step": 8958 }, { "epoch": 0.2745801152384455, "grad_norm": 1.7648246329330908, "learning_rate": 8.511677291761258e-06, "loss": 0.6839, "step": 8959 }, { "epoch": 0.2746107637611867, "grad_norm": 1.884651434108522, "learning_rate": 8.511323972531674e-06, "loss": 0.6445, "step": 8960 }, { "epoch": 0.2746414122839279, "grad_norm": 1.717263793383077, "learning_rate": 8.510970618704115e-06, "loss": 0.6467, "step": 8961 }, { "epoch": 0.2746720608066691, "grad_norm": 1.957770385886508, "learning_rate": 8.510617230282064e-06, "loss": 0.6876, "step": 8962 }, { "epoch": 0.2747027093294103, "grad_norm": 1.6186129764501287, "learning_rate": 8.510263807269003e-06, "loss": 0.673, "step": 8963 }, { "epoch": 0.2747333578521515, "grad_norm": 1.6583991235931448, "learning_rate": 8.509910349668418e-06, "loss": 0.7013, "step": 8964 }, { "epoch": 0.27476400637489273, "grad_norm": 1.794606508038073, "learning_rate": 8.509556857483786e-06, "loss": 0.7349, "step": 8965 }, { "epoch": 0.27479465489763394, "grad_norm": 1.6199631123851008, "learning_rate": 8.509203330718591e-06, "loss": 0.6888, "step": 8966 }, { "epoch": 0.27482530342037514, "grad_norm": 1.7577080716912492, "learning_rate": 8.508849769376319e-06, "loss": 0.6529, "step": 8967 }, { "epoch": 0.27485595194311635, "grad_norm": 0.841711810591831, "learning_rate": 8.508496173460453e-06, "loss": 0.4772, "step": 8968 }, { "epoch": 0.27488660046585756, "grad_norm": 1.8617301905271653, "learning_rate": 8.508142542974476e-06, "loss": 0.8209, "step": 8969 }, { "epoch": 0.27491724898859876, "grad_norm": 1.584263187698999, "learning_rate": 8.507788877921873e-06, "loss": 0.6315, "step": 8970 }, { "epoch": 0.27494789751133997, "grad_norm": 1.684792796094721, "learning_rate": 8.507435178306127e-06, "loss": 0.6973, "step": 8971 }, { "epoch": 0.2749785460340812, "grad_norm": 1.8314017902475492, "learning_rate": 8.507081444130726e-06, "loss": 0.6656, "step": 8972 }, { "epoch": 0.2750091945568224, "grad_norm": 1.7181922690851663, "learning_rate": 8.506727675399154e-06, "loss": 0.7271, "step": 8973 }, { "epoch": 0.2750398430795636, "grad_norm": 0.7852113438286004, "learning_rate": 8.506373872114897e-06, "loss": 0.4746, "step": 8974 }, { "epoch": 0.2750704916023048, "grad_norm": 1.69665659570189, "learning_rate": 8.506020034281442e-06, "loss": 0.8079, "step": 8975 }, { "epoch": 0.275101140125046, "grad_norm": 1.776923016283744, "learning_rate": 8.505666161902275e-06, "loss": 0.6934, "step": 8976 }, { "epoch": 0.2751317886477872, "grad_norm": 1.8995394673882617, "learning_rate": 8.50531225498088e-06, "loss": 0.8192, "step": 8977 }, { "epoch": 0.2751624371705284, "grad_norm": 1.7685772333504686, "learning_rate": 8.504958313520749e-06, "loss": 0.5661, "step": 8978 }, { "epoch": 0.27519308569326956, "grad_norm": 1.910908891269093, "learning_rate": 8.504604337525366e-06, "loss": 0.7812, "step": 8979 }, { "epoch": 0.27522373421601076, "grad_norm": 1.7350372505074705, "learning_rate": 8.504250326998219e-06, "loss": 0.6859, "step": 8980 }, { "epoch": 0.27525438273875197, "grad_norm": 2.2127788108982975, "learning_rate": 8.503896281942798e-06, "loss": 0.7284, "step": 8981 }, { "epoch": 0.2752850312614932, "grad_norm": 1.7108325166222864, "learning_rate": 8.503542202362589e-06, "loss": 0.7069, "step": 8982 }, { "epoch": 0.2753156797842344, "grad_norm": 0.8533062819018975, "learning_rate": 8.503188088261083e-06, "loss": 0.4687, "step": 8983 }, { "epoch": 0.2753463283069756, "grad_norm": 1.5284421157648374, "learning_rate": 8.502833939641768e-06, "loss": 0.6006, "step": 8984 }, { "epoch": 0.2753769768297168, "grad_norm": 1.685049130655252, "learning_rate": 8.502479756508135e-06, "loss": 0.6967, "step": 8985 }, { "epoch": 0.275407625352458, "grad_norm": 3.56842439211834, "learning_rate": 8.502125538863673e-06, "loss": 0.7228, "step": 8986 }, { "epoch": 0.2754382738751992, "grad_norm": 1.802476308013116, "learning_rate": 8.50177128671187e-06, "loss": 0.8071, "step": 8987 }, { "epoch": 0.2754689223979404, "grad_norm": 1.8464821282430939, "learning_rate": 8.50141700005622e-06, "loss": 0.6597, "step": 8988 }, { "epoch": 0.2754995709206816, "grad_norm": 1.8414605579780177, "learning_rate": 8.501062678900212e-06, "loss": 0.7516, "step": 8989 }, { "epoch": 0.2755302194434228, "grad_norm": 1.794673669146081, "learning_rate": 8.500708323247339e-06, "loss": 0.7016, "step": 8990 }, { "epoch": 0.275560867966164, "grad_norm": 0.9141931288407747, "learning_rate": 8.50035393310109e-06, "loss": 0.4665, "step": 8991 }, { "epoch": 0.27559151648890523, "grad_norm": 0.980848349551805, "learning_rate": 8.499999508464958e-06, "loss": 0.4693, "step": 8992 }, { "epoch": 0.27562216501164644, "grad_norm": 1.7802828188720556, "learning_rate": 8.499645049342436e-06, "loss": 0.7065, "step": 8993 }, { "epoch": 0.27565281353438764, "grad_norm": 5.67243509382753, "learning_rate": 8.499290555737015e-06, "loss": 0.6994, "step": 8994 }, { "epoch": 0.27568346205712885, "grad_norm": 1.9910605182757009, "learning_rate": 8.49893602765219e-06, "loss": 0.7711, "step": 8995 }, { "epoch": 0.27571411057987005, "grad_norm": 1.6960190461397977, "learning_rate": 8.498581465091453e-06, "loss": 0.6193, "step": 8996 }, { "epoch": 0.27574475910261126, "grad_norm": 1.8750294748665164, "learning_rate": 8.498226868058296e-06, "loss": 0.7498, "step": 8997 }, { "epoch": 0.27577540762535246, "grad_norm": 1.834164860180798, "learning_rate": 8.497872236556214e-06, "loss": 0.6531, "step": 8998 }, { "epoch": 0.27580605614809367, "grad_norm": 1.5337940865499657, "learning_rate": 8.497517570588704e-06, "loss": 0.6306, "step": 8999 }, { "epoch": 0.2758367046708349, "grad_norm": 1.763417355481271, "learning_rate": 8.497162870159259e-06, "loss": 0.704, "step": 9000 }, { "epoch": 0.2758673531935761, "grad_norm": 1.8947257749798507, "learning_rate": 8.496808135271373e-06, "loss": 0.8322, "step": 9001 }, { "epoch": 0.2758980017163173, "grad_norm": 2.0289053606828964, "learning_rate": 8.49645336592854e-06, "loss": 0.7709, "step": 9002 }, { "epoch": 0.2759286502390585, "grad_norm": 1.6711791389160924, "learning_rate": 8.49609856213426e-06, "loss": 0.6883, "step": 9003 }, { "epoch": 0.2759592987617997, "grad_norm": 1.9206815944648743, "learning_rate": 8.495743723892024e-06, "loss": 0.7094, "step": 9004 }, { "epoch": 0.2759899472845409, "grad_norm": 2.003831752920464, "learning_rate": 8.495388851205332e-06, "loss": 0.7246, "step": 9005 }, { "epoch": 0.2760205958072821, "grad_norm": 1.8690851301465488, "learning_rate": 8.495033944077679e-06, "loss": 0.6933, "step": 9006 }, { "epoch": 0.2760512443300233, "grad_norm": 1.7331749634060647, "learning_rate": 8.494679002512562e-06, "loss": 0.5882, "step": 9007 }, { "epoch": 0.2760818928527645, "grad_norm": 1.7986876409866694, "learning_rate": 8.49432402651348e-06, "loss": 0.7105, "step": 9008 }, { "epoch": 0.2761125413755057, "grad_norm": 1.8479089599857523, "learning_rate": 8.493969016083928e-06, "loss": 0.7937, "step": 9009 }, { "epoch": 0.2761431898982469, "grad_norm": 2.185286117411614, "learning_rate": 8.493613971227405e-06, "loss": 0.6251, "step": 9010 }, { "epoch": 0.2761738384209881, "grad_norm": 1.6777117541562163, "learning_rate": 8.49325889194741e-06, "loss": 0.6983, "step": 9011 }, { "epoch": 0.2762044869437293, "grad_norm": 1.7012244062141935, "learning_rate": 8.49290377824744e-06, "loss": 0.7239, "step": 9012 }, { "epoch": 0.2762351354664705, "grad_norm": 1.454284130606198, "learning_rate": 8.492548630131e-06, "loss": 0.4865, "step": 9013 }, { "epoch": 0.2762657839892117, "grad_norm": 1.5819977203683984, "learning_rate": 8.49219344760158e-06, "loss": 0.7938, "step": 9014 }, { "epoch": 0.2762964325119529, "grad_norm": 1.7094470561246786, "learning_rate": 8.491838230662685e-06, "loss": 0.6448, "step": 9015 }, { "epoch": 0.2763270810346941, "grad_norm": 0.820485634625417, "learning_rate": 8.491482979317817e-06, "loss": 0.4885, "step": 9016 }, { "epoch": 0.2763577295574353, "grad_norm": 1.6846669479265801, "learning_rate": 8.491127693570472e-06, "loss": 0.6808, "step": 9017 }, { "epoch": 0.2763883780801765, "grad_norm": 1.9079865278679335, "learning_rate": 8.490772373424155e-06, "loss": 0.7879, "step": 9018 }, { "epoch": 0.27641902660291773, "grad_norm": 1.4880458962550738, "learning_rate": 8.49041701888236e-06, "loss": 0.6039, "step": 9019 }, { "epoch": 0.27644967512565893, "grad_norm": 1.5121278939778633, "learning_rate": 8.490061629948596e-06, "loss": 0.6186, "step": 9020 }, { "epoch": 0.27648032364840014, "grad_norm": 1.2371307453531069, "learning_rate": 8.489706206626363e-06, "loss": 0.4997, "step": 9021 }, { "epoch": 0.27651097217114134, "grad_norm": 1.992204941981258, "learning_rate": 8.489350748919161e-06, "loss": 0.7718, "step": 9022 }, { "epoch": 0.27654162069388255, "grad_norm": 1.6229657915464484, "learning_rate": 8.488995256830492e-06, "loss": 0.6702, "step": 9023 }, { "epoch": 0.27657226921662376, "grad_norm": 1.6179138130397475, "learning_rate": 8.48863973036386e-06, "loss": 0.5626, "step": 9024 }, { "epoch": 0.27660291773936496, "grad_norm": 1.8252008425519288, "learning_rate": 8.48828416952277e-06, "loss": 0.7251, "step": 9025 }, { "epoch": 0.27663356626210617, "grad_norm": 0.8597329940767525, "learning_rate": 8.487928574310722e-06, "loss": 0.502, "step": 9026 }, { "epoch": 0.2766642147848474, "grad_norm": 1.5209132704134694, "learning_rate": 8.487572944731221e-06, "loss": 0.6503, "step": 9027 }, { "epoch": 0.2766948633075886, "grad_norm": 0.8026947619609113, "learning_rate": 8.487217280787772e-06, "loss": 0.4683, "step": 9028 }, { "epoch": 0.2767255118303298, "grad_norm": 0.839903653538065, "learning_rate": 8.48686158248388e-06, "loss": 0.4864, "step": 9029 }, { "epoch": 0.276756160353071, "grad_norm": 1.952597699774007, "learning_rate": 8.486505849823048e-06, "loss": 0.7223, "step": 9030 }, { "epoch": 0.2767868088758122, "grad_norm": 1.74804474536977, "learning_rate": 8.48615008280878e-06, "loss": 0.6875, "step": 9031 }, { "epoch": 0.2768174573985534, "grad_norm": 1.7920652696548212, "learning_rate": 8.485794281444586e-06, "loss": 0.6861, "step": 9032 }, { "epoch": 0.2768481059212946, "grad_norm": 1.8232663343648345, "learning_rate": 8.485438445733967e-06, "loss": 0.6893, "step": 9033 }, { "epoch": 0.2768787544440358, "grad_norm": 1.5782898950046054, "learning_rate": 8.485082575680432e-06, "loss": 0.7499, "step": 9034 }, { "epoch": 0.276909402966777, "grad_norm": 1.8315874920084587, "learning_rate": 8.484726671287487e-06, "loss": 0.7021, "step": 9035 }, { "epoch": 0.2769400514895182, "grad_norm": 1.8230435482313863, "learning_rate": 8.484370732558637e-06, "loss": 0.6525, "step": 9036 }, { "epoch": 0.27697070001225943, "grad_norm": 1.5800653087417114, "learning_rate": 8.484014759497393e-06, "loss": 0.6476, "step": 9037 }, { "epoch": 0.27700134853500064, "grad_norm": 0.9596555023178303, "learning_rate": 8.48365875210726e-06, "loss": 0.4791, "step": 9038 }, { "epoch": 0.27703199705774184, "grad_norm": 1.7320594913008303, "learning_rate": 8.483302710391745e-06, "loss": 0.6637, "step": 9039 }, { "epoch": 0.27706264558048305, "grad_norm": 1.9132927697103987, "learning_rate": 8.482946634354357e-06, "loss": 0.6716, "step": 9040 }, { "epoch": 0.2770932941032242, "grad_norm": 1.7114365186988227, "learning_rate": 8.482590523998605e-06, "loss": 0.8054, "step": 9041 }, { "epoch": 0.2771239426259654, "grad_norm": 1.9457632727399263, "learning_rate": 8.482234379327998e-06, "loss": 0.7892, "step": 9042 }, { "epoch": 0.2771545911487066, "grad_norm": 0.8056859466014379, "learning_rate": 8.481878200346042e-06, "loss": 0.5033, "step": 9043 }, { "epoch": 0.2771852396714478, "grad_norm": 1.797620052324035, "learning_rate": 8.481521987056251e-06, "loss": 0.8186, "step": 9044 }, { "epoch": 0.277215888194189, "grad_norm": 1.6208492575340088, "learning_rate": 8.481165739462135e-06, "loss": 0.6201, "step": 9045 }, { "epoch": 0.2772465367169302, "grad_norm": 1.7090892492203087, "learning_rate": 8.480809457567201e-06, "loss": 0.6974, "step": 9046 }, { "epoch": 0.27727718523967143, "grad_norm": 1.80065226816876, "learning_rate": 8.48045314137496e-06, "loss": 0.7709, "step": 9047 }, { "epoch": 0.27730783376241264, "grad_norm": 1.7667659611695843, "learning_rate": 8.480096790888923e-06, "loss": 0.7044, "step": 9048 }, { "epoch": 0.27733848228515384, "grad_norm": 1.7801455983966386, "learning_rate": 8.479740406112605e-06, "loss": 0.728, "step": 9049 }, { "epoch": 0.27736913080789505, "grad_norm": 0.8554134006351591, "learning_rate": 8.479383987049512e-06, "loss": 0.4863, "step": 9050 }, { "epoch": 0.27739977933063625, "grad_norm": 1.8515627747452588, "learning_rate": 8.479027533703158e-06, "loss": 0.6811, "step": 9051 }, { "epoch": 0.27743042785337746, "grad_norm": 0.8065091655939846, "learning_rate": 8.478671046077055e-06, "loss": 0.4781, "step": 9052 }, { "epoch": 0.27746107637611866, "grad_norm": 0.8025348142966499, "learning_rate": 8.47831452417472e-06, "loss": 0.4797, "step": 9053 }, { "epoch": 0.27749172489885987, "grad_norm": 2.0185059371846057, "learning_rate": 8.477957967999659e-06, "loss": 0.7417, "step": 9054 }, { "epoch": 0.2775223734216011, "grad_norm": 1.744862420499543, "learning_rate": 8.47760137755539e-06, "loss": 0.5329, "step": 9055 }, { "epoch": 0.2775530219443423, "grad_norm": 0.8059743905331227, "learning_rate": 8.477244752845422e-06, "loss": 0.4739, "step": 9056 }, { "epoch": 0.2775836704670835, "grad_norm": 1.6255448461722164, "learning_rate": 8.476888093873274e-06, "loss": 0.6997, "step": 9057 }, { "epoch": 0.2776143189898247, "grad_norm": 1.6809903580309289, "learning_rate": 8.476531400642456e-06, "loss": 0.6816, "step": 9058 }, { "epoch": 0.2776449675125659, "grad_norm": 0.8549844589953546, "learning_rate": 8.476174673156488e-06, "loss": 0.4841, "step": 9059 }, { "epoch": 0.2776756160353071, "grad_norm": 1.529703202211574, "learning_rate": 8.475817911418879e-06, "loss": 0.6558, "step": 9060 }, { "epoch": 0.2777062645580483, "grad_norm": 1.543625937263075, "learning_rate": 8.475461115433147e-06, "loss": 0.6489, "step": 9061 }, { "epoch": 0.2777369130807895, "grad_norm": 0.8091784224801523, "learning_rate": 8.475104285202807e-06, "loss": 0.4568, "step": 9062 }, { "epoch": 0.2777675616035307, "grad_norm": 1.7937931245826422, "learning_rate": 8.474747420731377e-06, "loss": 0.7734, "step": 9063 }, { "epoch": 0.2777982101262719, "grad_norm": 1.8079692677423855, "learning_rate": 8.47439052202237e-06, "loss": 0.6447, "step": 9064 }, { "epoch": 0.27782885864901313, "grad_norm": 1.602998735812019, "learning_rate": 8.474033589079306e-06, "loss": 0.7661, "step": 9065 }, { "epoch": 0.27785950717175434, "grad_norm": 1.5574291002967964, "learning_rate": 8.473676621905699e-06, "loss": 0.6964, "step": 9066 }, { "epoch": 0.27789015569449554, "grad_norm": 1.7974346835792863, "learning_rate": 8.473319620505067e-06, "loss": 0.7943, "step": 9067 }, { "epoch": 0.27792080421723675, "grad_norm": 1.509401108079263, "learning_rate": 8.472962584880928e-06, "loss": 0.6054, "step": 9068 }, { "epoch": 0.27795145273997796, "grad_norm": 1.8676127254621804, "learning_rate": 8.4726055150368e-06, "loss": 0.6955, "step": 9069 }, { "epoch": 0.27798210126271916, "grad_norm": 1.6620959380880314, "learning_rate": 8.472248410976203e-06, "loss": 0.5907, "step": 9070 }, { "epoch": 0.27801274978546037, "grad_norm": 1.759579580423266, "learning_rate": 8.471891272702652e-06, "loss": 0.7861, "step": 9071 }, { "epoch": 0.2780433983082015, "grad_norm": 0.8765904050937524, "learning_rate": 8.471534100219671e-06, "loss": 0.4782, "step": 9072 }, { "epoch": 0.2780740468309427, "grad_norm": 1.6595056587335084, "learning_rate": 8.471176893530774e-06, "loss": 0.6547, "step": 9073 }, { "epoch": 0.27810469535368393, "grad_norm": 1.9927805933563996, "learning_rate": 8.470819652639483e-06, "loss": 0.6838, "step": 9074 }, { "epoch": 0.27813534387642513, "grad_norm": 1.6969062777499166, "learning_rate": 8.470462377549318e-06, "loss": 0.597, "step": 9075 }, { "epoch": 0.27816599239916634, "grad_norm": 1.7689089867076027, "learning_rate": 8.4701050682638e-06, "loss": 0.7216, "step": 9076 }, { "epoch": 0.27819664092190755, "grad_norm": 1.7535763777218731, "learning_rate": 8.469747724786448e-06, "loss": 0.6479, "step": 9077 }, { "epoch": 0.27822728944464875, "grad_norm": 1.5899275399737332, "learning_rate": 8.469390347120783e-06, "loss": 0.6777, "step": 9078 }, { "epoch": 0.27825793796738996, "grad_norm": 0.8223827951774887, "learning_rate": 8.46903293527033e-06, "loss": 0.4747, "step": 9079 }, { "epoch": 0.27828858649013116, "grad_norm": 1.5411226663313817, "learning_rate": 8.468675489238604e-06, "loss": 0.6194, "step": 9080 }, { "epoch": 0.27831923501287237, "grad_norm": 1.8126218878458713, "learning_rate": 8.468318009029135e-06, "loss": 0.7059, "step": 9081 }, { "epoch": 0.2783498835356136, "grad_norm": 1.8728665208362822, "learning_rate": 8.46796049464544e-06, "loss": 0.752, "step": 9082 }, { "epoch": 0.2783805320583548, "grad_norm": 0.7979028104683148, "learning_rate": 8.467602946091042e-06, "loss": 0.457, "step": 9083 }, { "epoch": 0.278411180581096, "grad_norm": 1.7007016861276996, "learning_rate": 8.467245363369466e-06, "loss": 0.7117, "step": 9084 }, { "epoch": 0.2784418291038372, "grad_norm": 1.778782427551739, "learning_rate": 8.466887746484232e-06, "loss": 0.7367, "step": 9085 }, { "epoch": 0.2784724776265784, "grad_norm": 1.947551974349518, "learning_rate": 8.466530095438868e-06, "loss": 0.6962, "step": 9086 }, { "epoch": 0.2785031261493196, "grad_norm": 0.8213053840103609, "learning_rate": 8.466172410236896e-06, "loss": 0.4766, "step": 9087 }, { "epoch": 0.2785337746720608, "grad_norm": 1.833732366664141, "learning_rate": 8.46581469088184e-06, "loss": 0.6129, "step": 9088 }, { "epoch": 0.278564423194802, "grad_norm": 1.7196732448703727, "learning_rate": 8.465456937377226e-06, "loss": 0.68, "step": 9089 }, { "epoch": 0.2785950717175432, "grad_norm": 1.7361351405065353, "learning_rate": 8.465099149726577e-06, "loss": 0.7816, "step": 9090 }, { "epoch": 0.2786257202402844, "grad_norm": 1.9209311144194043, "learning_rate": 8.46474132793342e-06, "loss": 0.6754, "step": 9091 }, { "epoch": 0.27865636876302563, "grad_norm": 1.6205494260837103, "learning_rate": 8.464383472001278e-06, "loss": 0.6307, "step": 9092 }, { "epoch": 0.27868701728576684, "grad_norm": 1.754292556005674, "learning_rate": 8.464025581933682e-06, "loss": 0.7092, "step": 9093 }, { "epoch": 0.27871766580850804, "grad_norm": 1.5780941690976715, "learning_rate": 8.463667657734155e-06, "loss": 0.7068, "step": 9094 }, { "epoch": 0.27874831433124925, "grad_norm": 1.8412435094982464, "learning_rate": 8.463309699406223e-06, "loss": 0.7323, "step": 9095 }, { "epoch": 0.27877896285399045, "grad_norm": 1.5745559180579491, "learning_rate": 8.462951706953418e-06, "loss": 0.7193, "step": 9096 }, { "epoch": 0.27880961137673166, "grad_norm": 1.7373316228609617, "learning_rate": 8.462593680379259e-06, "loss": 0.6815, "step": 9097 }, { "epoch": 0.27884025989947286, "grad_norm": 1.6511486931946961, "learning_rate": 8.46223561968728e-06, "loss": 0.7181, "step": 9098 }, { "epoch": 0.27887090842221407, "grad_norm": 1.8677460013290668, "learning_rate": 8.46187752488101e-06, "loss": 0.6465, "step": 9099 }, { "epoch": 0.2789015569449553, "grad_norm": 1.6100785946490492, "learning_rate": 8.461519395963973e-06, "loss": 0.6906, "step": 9100 }, { "epoch": 0.2789322054676965, "grad_norm": 1.6301902056656625, "learning_rate": 8.461161232939701e-06, "loss": 0.6866, "step": 9101 }, { "epoch": 0.2789628539904377, "grad_norm": 1.6043882980396227, "learning_rate": 8.46080303581172e-06, "loss": 0.7154, "step": 9102 }, { "epoch": 0.27899350251317884, "grad_norm": 1.640582728771274, "learning_rate": 8.460444804583562e-06, "loss": 0.6891, "step": 9103 }, { "epoch": 0.27902415103592004, "grad_norm": 1.801633340495724, "learning_rate": 8.460086539258758e-06, "loss": 0.7012, "step": 9104 }, { "epoch": 0.27905479955866125, "grad_norm": 2.129223839391183, "learning_rate": 8.459728239840833e-06, "loss": 0.7043, "step": 9105 }, { "epoch": 0.27908544808140245, "grad_norm": 1.762794609754471, "learning_rate": 8.459369906333322e-06, "loss": 0.7614, "step": 9106 }, { "epoch": 0.27911609660414366, "grad_norm": 1.7702795146516195, "learning_rate": 8.459011538739754e-06, "loss": 0.7504, "step": 9107 }, { "epoch": 0.27914674512688487, "grad_norm": 1.589215009566089, "learning_rate": 8.458653137063658e-06, "loss": 0.7275, "step": 9108 }, { "epoch": 0.27917739364962607, "grad_norm": 1.674496533031126, "learning_rate": 8.45829470130857e-06, "loss": 0.7087, "step": 9109 }, { "epoch": 0.2792080421723673, "grad_norm": 1.7445474292508978, "learning_rate": 8.457936231478022e-06, "loss": 0.7409, "step": 9110 }, { "epoch": 0.2792386906951085, "grad_norm": 1.7040863910899064, "learning_rate": 8.45757772757554e-06, "loss": 0.7045, "step": 9111 }, { "epoch": 0.2792693392178497, "grad_norm": 1.0200911398448675, "learning_rate": 8.45721918960466e-06, "loss": 0.4644, "step": 9112 }, { "epoch": 0.2792999877405909, "grad_norm": 1.5078161872895166, "learning_rate": 8.456860617568916e-06, "loss": 0.5949, "step": 9113 }, { "epoch": 0.2793306362633321, "grad_norm": 1.6642337537444154, "learning_rate": 8.45650201147184e-06, "loss": 0.6338, "step": 9114 }, { "epoch": 0.2793612847860733, "grad_norm": 1.4770523598341978, "learning_rate": 8.456143371316965e-06, "loss": 0.6023, "step": 9115 }, { "epoch": 0.2793919333088145, "grad_norm": 1.9488836625327326, "learning_rate": 8.455784697107823e-06, "loss": 0.7819, "step": 9116 }, { "epoch": 0.2794225818315557, "grad_norm": 1.7234310188890511, "learning_rate": 8.455425988847952e-06, "loss": 0.7462, "step": 9117 }, { "epoch": 0.2794532303542969, "grad_norm": 1.7171814680172979, "learning_rate": 8.455067246540887e-06, "loss": 0.6959, "step": 9118 }, { "epoch": 0.2794838788770381, "grad_norm": 1.855862921472609, "learning_rate": 8.454708470190157e-06, "loss": 0.6518, "step": 9119 }, { "epoch": 0.27951452739977933, "grad_norm": 1.6774474081652675, "learning_rate": 8.454349659799301e-06, "loss": 0.7949, "step": 9120 }, { "epoch": 0.27954517592252054, "grad_norm": 1.1026919726368245, "learning_rate": 8.453990815371855e-06, "loss": 0.4922, "step": 9121 }, { "epoch": 0.27957582444526174, "grad_norm": 1.8307481917209036, "learning_rate": 8.453631936911352e-06, "loss": 0.7093, "step": 9122 }, { "epoch": 0.27960647296800295, "grad_norm": 1.5853001074992503, "learning_rate": 8.45327302442133e-06, "loss": 0.637, "step": 9123 }, { "epoch": 0.27963712149074416, "grad_norm": 1.844145920866591, "learning_rate": 8.452914077905328e-06, "loss": 0.6855, "step": 9124 }, { "epoch": 0.27966777001348536, "grad_norm": 0.8230357937913534, "learning_rate": 8.452555097366879e-06, "loss": 0.4844, "step": 9125 }, { "epoch": 0.27969841853622657, "grad_norm": 1.490417126763475, "learning_rate": 8.452196082809521e-06, "loss": 0.5778, "step": 9126 }, { "epoch": 0.2797290670589678, "grad_norm": 1.7131905648866712, "learning_rate": 8.451837034236791e-06, "loss": 0.6289, "step": 9127 }, { "epoch": 0.279759715581709, "grad_norm": 1.7338376643462872, "learning_rate": 8.451477951652229e-06, "loss": 0.6903, "step": 9128 }, { "epoch": 0.2797903641044502, "grad_norm": 1.805253254238031, "learning_rate": 8.45111883505937e-06, "loss": 0.8016, "step": 9129 }, { "epoch": 0.2798210126271914, "grad_norm": 1.7721400235964266, "learning_rate": 8.450759684461756e-06, "loss": 0.6501, "step": 9130 }, { "epoch": 0.2798516611499326, "grad_norm": 1.6874493519947555, "learning_rate": 8.450400499862922e-06, "loss": 0.7136, "step": 9131 }, { "epoch": 0.2798823096726738, "grad_norm": 1.8338909251165465, "learning_rate": 8.45004128126641e-06, "loss": 0.7968, "step": 9132 }, { "epoch": 0.279912958195415, "grad_norm": 1.0010232785165272, "learning_rate": 8.449682028675759e-06, "loss": 0.4807, "step": 9133 }, { "epoch": 0.27994360671815616, "grad_norm": 1.8200071506926387, "learning_rate": 8.449322742094505e-06, "loss": 0.7629, "step": 9134 }, { "epoch": 0.27997425524089736, "grad_norm": 0.8576705266241226, "learning_rate": 8.448963421526196e-06, "loss": 0.4774, "step": 9135 }, { "epoch": 0.28000490376363857, "grad_norm": 1.7997844989140601, "learning_rate": 8.448604066974367e-06, "loss": 0.6701, "step": 9136 }, { "epoch": 0.2800355522863798, "grad_norm": 1.6926245977449688, "learning_rate": 8.448244678442557e-06, "loss": 0.7006, "step": 9137 }, { "epoch": 0.280066200809121, "grad_norm": 1.7161705254646582, "learning_rate": 8.447885255934312e-06, "loss": 0.7067, "step": 9138 }, { "epoch": 0.2800968493318622, "grad_norm": 1.7915764556142197, "learning_rate": 8.44752579945317e-06, "loss": 0.6974, "step": 9139 }, { "epoch": 0.2801274978546034, "grad_norm": 1.7074539351669884, "learning_rate": 8.447166309002677e-06, "loss": 0.711, "step": 9140 }, { "epoch": 0.2801581463773446, "grad_norm": 1.7962471570791716, "learning_rate": 8.44680678458637e-06, "loss": 0.7107, "step": 9141 }, { "epoch": 0.2801887949000858, "grad_norm": 1.6666449986148049, "learning_rate": 8.446447226207795e-06, "loss": 0.6447, "step": 9142 }, { "epoch": 0.280219443422827, "grad_norm": 1.6069710465541762, "learning_rate": 8.446087633870492e-06, "loss": 0.7463, "step": 9143 }, { "epoch": 0.2802500919455682, "grad_norm": 1.7828601637973427, "learning_rate": 8.445728007578008e-06, "loss": 0.6552, "step": 9144 }, { "epoch": 0.2802807404683094, "grad_norm": 1.6783041044946556, "learning_rate": 8.445368347333884e-06, "loss": 0.6715, "step": 9145 }, { "epoch": 0.2803113889910506, "grad_norm": 1.1976429846785563, "learning_rate": 8.445008653141662e-06, "loss": 0.465, "step": 9146 }, { "epoch": 0.28034203751379183, "grad_norm": 1.7228451749868772, "learning_rate": 8.44464892500489e-06, "loss": 0.6491, "step": 9147 }, { "epoch": 0.28037268603653304, "grad_norm": 1.7600537766321354, "learning_rate": 8.44428916292711e-06, "loss": 0.6398, "step": 9148 }, { "epoch": 0.28040333455927424, "grad_norm": 2.0619803306751754, "learning_rate": 8.443929366911869e-06, "loss": 0.7726, "step": 9149 }, { "epoch": 0.28043398308201545, "grad_norm": 1.7056068880987971, "learning_rate": 8.44356953696271e-06, "loss": 0.6686, "step": 9150 }, { "epoch": 0.28046463160475665, "grad_norm": 1.698727277666855, "learning_rate": 8.443209673083178e-06, "loss": 0.8238, "step": 9151 }, { "epoch": 0.28049528012749786, "grad_norm": 1.7435259045986629, "learning_rate": 8.442849775276823e-06, "loss": 0.7137, "step": 9152 }, { "epoch": 0.28052592865023906, "grad_norm": 1.8365437830173774, "learning_rate": 8.442489843547187e-06, "loss": 0.6607, "step": 9153 }, { "epoch": 0.28055657717298027, "grad_norm": 1.6615037477993857, "learning_rate": 8.442129877897818e-06, "loss": 0.7867, "step": 9154 }, { "epoch": 0.2805872256957215, "grad_norm": 1.7866891773800628, "learning_rate": 8.441769878332261e-06, "loss": 0.682, "step": 9155 }, { "epoch": 0.2806178742184627, "grad_norm": 0.94745487551086, "learning_rate": 8.441409844854067e-06, "loss": 0.472, "step": 9156 }, { "epoch": 0.2806485227412039, "grad_norm": 1.7515279632579521, "learning_rate": 8.441049777466778e-06, "loss": 0.5889, "step": 9157 }, { "epoch": 0.2806791712639451, "grad_norm": 1.9974321265790864, "learning_rate": 8.440689676173947e-06, "loss": 0.8024, "step": 9158 }, { "epoch": 0.2807098197866863, "grad_norm": 0.8094749078065999, "learning_rate": 8.440329540979122e-06, "loss": 0.4919, "step": 9159 }, { "epoch": 0.2807404683094275, "grad_norm": 1.653125836706125, "learning_rate": 8.439969371885847e-06, "loss": 0.6179, "step": 9160 }, { "epoch": 0.2807711168321687, "grad_norm": 1.8181871549915047, "learning_rate": 8.439609168897676e-06, "loss": 0.6552, "step": 9161 }, { "epoch": 0.2808017653549099, "grad_norm": 1.9974098370693358, "learning_rate": 8.439248932018153e-06, "loss": 0.7958, "step": 9162 }, { "epoch": 0.2808324138776511, "grad_norm": 1.568401629131645, "learning_rate": 8.438888661250833e-06, "loss": 0.6176, "step": 9163 }, { "epoch": 0.2808630624003923, "grad_norm": 1.7995470968367278, "learning_rate": 8.438528356599262e-06, "loss": 0.7226, "step": 9164 }, { "epoch": 0.2808937109231335, "grad_norm": 1.6033716433059417, "learning_rate": 8.438168018066992e-06, "loss": 0.6796, "step": 9165 }, { "epoch": 0.2809243594458747, "grad_norm": 1.876066950938984, "learning_rate": 8.437807645657572e-06, "loss": 0.7236, "step": 9166 }, { "epoch": 0.2809550079686159, "grad_norm": 1.9969468962381096, "learning_rate": 8.437447239374554e-06, "loss": 0.6852, "step": 9167 }, { "epoch": 0.2809856564913571, "grad_norm": 1.6491899733125368, "learning_rate": 8.437086799221489e-06, "loss": 0.6655, "step": 9168 }, { "epoch": 0.2810163050140983, "grad_norm": 1.6208828073667716, "learning_rate": 8.436726325201926e-06, "loss": 0.6144, "step": 9169 }, { "epoch": 0.2810469535368395, "grad_norm": 1.5746746418109445, "learning_rate": 8.436365817319423e-06, "loss": 0.6293, "step": 9170 }, { "epoch": 0.2810776020595807, "grad_norm": 1.9151478037602558, "learning_rate": 8.436005275577527e-06, "loss": 0.6555, "step": 9171 }, { "epoch": 0.2811082505823219, "grad_norm": 1.0138317354598037, "learning_rate": 8.435644699979792e-06, "loss": 0.4843, "step": 9172 }, { "epoch": 0.2811388991050631, "grad_norm": 1.7572085992799515, "learning_rate": 8.43528409052977e-06, "loss": 0.6568, "step": 9173 }, { "epoch": 0.28116954762780433, "grad_norm": 1.6620946081983958, "learning_rate": 8.434923447231015e-06, "loss": 0.5975, "step": 9174 }, { "epoch": 0.28120019615054553, "grad_norm": 1.7443699824088565, "learning_rate": 8.43456277008708e-06, "loss": 0.6955, "step": 9175 }, { "epoch": 0.28123084467328674, "grad_norm": 1.851676502393262, "learning_rate": 8.43420205910152e-06, "loss": 0.6801, "step": 9176 }, { "epoch": 0.28126149319602795, "grad_norm": 1.0383152892226202, "learning_rate": 8.433841314277888e-06, "loss": 0.4994, "step": 9177 }, { "epoch": 0.28129214171876915, "grad_norm": 0.8151903833045828, "learning_rate": 8.433480535619741e-06, "loss": 0.4493, "step": 9178 }, { "epoch": 0.28132279024151036, "grad_norm": 2.7250941966908027, "learning_rate": 8.433119723130629e-06, "loss": 0.7744, "step": 9179 }, { "epoch": 0.28135343876425156, "grad_norm": 1.8060938213401598, "learning_rate": 8.43275887681411e-06, "loss": 0.7566, "step": 9180 }, { "epoch": 0.28138408728699277, "grad_norm": 1.6647374534065624, "learning_rate": 8.432397996673741e-06, "loss": 0.7029, "step": 9181 }, { "epoch": 0.281414735809734, "grad_norm": 1.7556716505343597, "learning_rate": 8.432037082713077e-06, "loss": 0.739, "step": 9182 }, { "epoch": 0.2814453843324752, "grad_norm": 1.699118352129971, "learning_rate": 8.43167613493567e-06, "loss": 0.7543, "step": 9183 }, { "epoch": 0.2814760328552164, "grad_norm": 1.7184638159653767, "learning_rate": 8.431315153345084e-06, "loss": 0.8153, "step": 9184 }, { "epoch": 0.2815066813779576, "grad_norm": 1.7188926261615816, "learning_rate": 8.430954137944869e-06, "loss": 0.7054, "step": 9185 }, { "epoch": 0.2815373299006988, "grad_norm": 1.7747533315723678, "learning_rate": 8.430593088738586e-06, "loss": 0.7453, "step": 9186 }, { "epoch": 0.28156797842344, "grad_norm": 0.9368591555901344, "learning_rate": 8.430232005729792e-06, "loss": 0.505, "step": 9187 }, { "epoch": 0.2815986269461812, "grad_norm": 1.634594490148967, "learning_rate": 8.429870888922045e-06, "loss": 0.747, "step": 9188 }, { "epoch": 0.2816292754689224, "grad_norm": 1.897578779295267, "learning_rate": 8.429509738318902e-06, "loss": 0.7624, "step": 9189 }, { "epoch": 0.2816599239916636, "grad_norm": 1.8011533509230442, "learning_rate": 8.42914855392392e-06, "loss": 0.7678, "step": 9190 }, { "epoch": 0.2816905725144048, "grad_norm": 1.6719877855580103, "learning_rate": 8.428787335740663e-06, "loss": 0.6948, "step": 9191 }, { "epoch": 0.28172122103714603, "grad_norm": 1.801918095457166, "learning_rate": 8.428426083772685e-06, "loss": 0.6893, "step": 9192 }, { "epoch": 0.28175186955988724, "grad_norm": 1.5336639158194723, "learning_rate": 8.428064798023548e-06, "loss": 0.5794, "step": 9193 }, { "epoch": 0.28178251808262844, "grad_norm": 1.5651407668665802, "learning_rate": 8.427703478496812e-06, "loss": 0.6528, "step": 9194 }, { "epoch": 0.28181316660536965, "grad_norm": 1.6776240728689815, "learning_rate": 8.427342125196038e-06, "loss": 0.6904, "step": 9195 }, { "epoch": 0.28184381512811085, "grad_norm": 1.7330382407406835, "learning_rate": 8.426980738124783e-06, "loss": 0.743, "step": 9196 }, { "epoch": 0.281874463650852, "grad_norm": 1.6994379745618342, "learning_rate": 8.42661931728661e-06, "loss": 0.7067, "step": 9197 }, { "epoch": 0.2819051121735932, "grad_norm": 1.8803528168550474, "learning_rate": 8.42625786268508e-06, "loss": 0.6141, "step": 9198 }, { "epoch": 0.2819357606963344, "grad_norm": 1.6230530262424998, "learning_rate": 8.425896374323757e-06, "loss": 0.5539, "step": 9199 }, { "epoch": 0.2819664092190756, "grad_norm": 1.667361912908936, "learning_rate": 8.425534852206198e-06, "loss": 0.7335, "step": 9200 }, { "epoch": 0.2819970577418168, "grad_norm": 1.6556888885865793, "learning_rate": 8.425173296335967e-06, "loss": 0.719, "step": 9201 }, { "epoch": 0.28202770626455803, "grad_norm": 1.5887690452063918, "learning_rate": 8.42481170671663e-06, "loss": 0.6888, "step": 9202 }, { "epoch": 0.28205835478729924, "grad_norm": 1.8730875693969775, "learning_rate": 8.424450083351746e-06, "loss": 0.6591, "step": 9203 }, { "epoch": 0.28208900331004044, "grad_norm": 2.057804195979405, "learning_rate": 8.424088426244877e-06, "loss": 0.6411, "step": 9204 }, { "epoch": 0.28211965183278165, "grad_norm": 0.8961021121576378, "learning_rate": 8.423726735399592e-06, "loss": 0.4871, "step": 9205 }, { "epoch": 0.28215030035552285, "grad_norm": 1.8793503467314165, "learning_rate": 8.423365010819449e-06, "loss": 0.6952, "step": 9206 }, { "epoch": 0.28218094887826406, "grad_norm": 1.6788656976905554, "learning_rate": 8.423003252508015e-06, "loss": 0.7782, "step": 9207 }, { "epoch": 0.28221159740100527, "grad_norm": 1.729095038503731, "learning_rate": 8.422641460468855e-06, "loss": 0.694, "step": 9208 }, { "epoch": 0.28224224592374647, "grad_norm": 0.7858236603456015, "learning_rate": 8.422279634705531e-06, "loss": 0.495, "step": 9209 }, { "epoch": 0.2822728944464877, "grad_norm": 1.8907441974873123, "learning_rate": 8.421917775221612e-06, "loss": 0.6509, "step": 9210 }, { "epoch": 0.2823035429692289, "grad_norm": 1.6439182046721887, "learning_rate": 8.421555882020662e-06, "loss": 0.6542, "step": 9211 }, { "epoch": 0.2823341914919701, "grad_norm": 1.6504770182527455, "learning_rate": 8.421193955106244e-06, "loss": 0.6835, "step": 9212 }, { "epoch": 0.2823648400147113, "grad_norm": 1.6722760033069317, "learning_rate": 8.420831994481928e-06, "loss": 0.6658, "step": 9213 }, { "epoch": 0.2823954885374525, "grad_norm": 1.6350193041416214, "learning_rate": 8.420470000151281e-06, "loss": 0.6911, "step": 9214 }, { "epoch": 0.2824261370601937, "grad_norm": 1.6030238778448278, "learning_rate": 8.420107972117865e-06, "loss": 0.6677, "step": 9215 }, { "epoch": 0.2824567855829349, "grad_norm": 1.7480286020418125, "learning_rate": 8.419745910385253e-06, "loss": 0.7283, "step": 9216 }, { "epoch": 0.2824874341056761, "grad_norm": 1.9392914733450413, "learning_rate": 8.419383814957007e-06, "loss": 0.7419, "step": 9217 }, { "epoch": 0.2825180826284173, "grad_norm": 1.8985872580008778, "learning_rate": 8.419021685836698e-06, "loss": 0.7054, "step": 9218 }, { "epoch": 0.2825487311511585, "grad_norm": 1.8666202835220558, "learning_rate": 8.418659523027894e-06, "loss": 0.8002, "step": 9219 }, { "epoch": 0.28257937967389973, "grad_norm": 1.6272808783845067, "learning_rate": 8.418297326534165e-06, "loss": 0.6682, "step": 9220 }, { "epoch": 0.28261002819664094, "grad_norm": 1.650814816021029, "learning_rate": 8.417935096359073e-06, "loss": 0.6762, "step": 9221 }, { "epoch": 0.28264067671938214, "grad_norm": 2.022821967735445, "learning_rate": 8.417572832506196e-06, "loss": 0.6848, "step": 9222 }, { "epoch": 0.28267132524212335, "grad_norm": 1.5844903794379073, "learning_rate": 8.417210534979098e-06, "loss": 0.7096, "step": 9223 }, { "epoch": 0.28270197376486456, "grad_norm": 1.7698034123405788, "learning_rate": 8.41684820378135e-06, "loss": 0.7245, "step": 9224 }, { "epoch": 0.28273262228760576, "grad_norm": 1.6443607961255018, "learning_rate": 8.416485838916522e-06, "loss": 0.8036, "step": 9225 }, { "epoch": 0.28276327081034697, "grad_norm": 1.5123029440553037, "learning_rate": 8.416123440388188e-06, "loss": 0.7191, "step": 9226 }, { "epoch": 0.2827939193330882, "grad_norm": 1.6422081683524492, "learning_rate": 8.415761008199912e-06, "loss": 0.7758, "step": 9227 }, { "epoch": 0.2828245678558293, "grad_norm": 1.7951443625487482, "learning_rate": 8.415398542355271e-06, "loss": 0.6244, "step": 9228 }, { "epoch": 0.28285521637857053, "grad_norm": 1.747526181564695, "learning_rate": 8.415036042857834e-06, "loss": 0.7428, "step": 9229 }, { "epoch": 0.28288586490131173, "grad_norm": 0.8354897272630903, "learning_rate": 8.414673509711172e-06, "loss": 0.4766, "step": 9230 }, { "epoch": 0.28291651342405294, "grad_norm": 1.7770260112402072, "learning_rate": 8.41431094291886e-06, "loss": 0.7565, "step": 9231 }, { "epoch": 0.28294716194679415, "grad_norm": 1.5943644779884751, "learning_rate": 8.413948342484466e-06, "loss": 0.6568, "step": 9232 }, { "epoch": 0.28297781046953535, "grad_norm": 1.4911840488117787, "learning_rate": 8.413585708411566e-06, "loss": 0.5862, "step": 9233 }, { "epoch": 0.28300845899227656, "grad_norm": 1.8344042128186329, "learning_rate": 8.413223040703735e-06, "loss": 0.7171, "step": 9234 }, { "epoch": 0.28303910751501776, "grad_norm": 1.5667669783491138, "learning_rate": 8.412860339364542e-06, "loss": 0.655, "step": 9235 }, { "epoch": 0.28306975603775897, "grad_norm": 1.624000987276262, "learning_rate": 8.412497604397564e-06, "loss": 0.7758, "step": 9236 }, { "epoch": 0.2831004045605002, "grad_norm": 1.7533176385418194, "learning_rate": 8.412134835806374e-06, "loss": 0.6517, "step": 9237 }, { "epoch": 0.2831310530832414, "grad_norm": 1.777579593040081, "learning_rate": 8.411772033594544e-06, "loss": 0.7361, "step": 9238 }, { "epoch": 0.2831617016059826, "grad_norm": 1.7268290237004658, "learning_rate": 8.411409197765654e-06, "loss": 0.7499, "step": 9239 }, { "epoch": 0.2831923501287238, "grad_norm": 0.8294639396444521, "learning_rate": 8.411046328323276e-06, "loss": 0.4698, "step": 9240 }, { "epoch": 0.283222998651465, "grad_norm": 1.683928409501036, "learning_rate": 8.410683425270986e-06, "loss": 0.6592, "step": 9241 }, { "epoch": 0.2832536471742062, "grad_norm": 1.8772156208546467, "learning_rate": 8.410320488612358e-06, "loss": 0.7349, "step": 9242 }, { "epoch": 0.2832842956969474, "grad_norm": 1.8352556215097147, "learning_rate": 8.40995751835097e-06, "loss": 0.7396, "step": 9243 }, { "epoch": 0.2833149442196886, "grad_norm": 1.7417997956394524, "learning_rate": 8.409594514490401e-06, "loss": 0.6363, "step": 9244 }, { "epoch": 0.2833455927424298, "grad_norm": 1.5850243866471676, "learning_rate": 8.409231477034221e-06, "loss": 0.6429, "step": 9245 }, { "epoch": 0.283376241265171, "grad_norm": 1.601725962479589, "learning_rate": 8.408868405986013e-06, "loss": 0.6161, "step": 9246 }, { "epoch": 0.28340688978791223, "grad_norm": 0.8229069447880422, "learning_rate": 8.408505301349352e-06, "loss": 0.4943, "step": 9247 }, { "epoch": 0.28343753831065344, "grad_norm": 1.6717597297317224, "learning_rate": 8.408142163127815e-06, "loss": 0.8125, "step": 9248 }, { "epoch": 0.28346818683339464, "grad_norm": 1.6963375218903876, "learning_rate": 8.407778991324984e-06, "loss": 0.7185, "step": 9249 }, { "epoch": 0.28349883535613585, "grad_norm": 1.7509220196092308, "learning_rate": 8.407415785944431e-06, "loss": 0.7687, "step": 9250 }, { "epoch": 0.28352948387887705, "grad_norm": 1.6331394465283475, "learning_rate": 8.40705254698974e-06, "loss": 0.7278, "step": 9251 }, { "epoch": 0.28356013240161826, "grad_norm": 1.585419009155998, "learning_rate": 8.40668927446449e-06, "loss": 0.7267, "step": 9252 }, { "epoch": 0.28359078092435946, "grad_norm": 1.7745052072136223, "learning_rate": 8.406325968372258e-06, "loss": 0.7547, "step": 9253 }, { "epoch": 0.28362142944710067, "grad_norm": 0.8454180797283275, "learning_rate": 8.405962628716624e-06, "loss": 0.4773, "step": 9254 }, { "epoch": 0.2836520779698419, "grad_norm": 0.8038796886778863, "learning_rate": 8.405599255501168e-06, "loss": 0.4693, "step": 9255 }, { "epoch": 0.2836827264925831, "grad_norm": 1.6855263248026535, "learning_rate": 8.405235848729474e-06, "loss": 0.7276, "step": 9256 }, { "epoch": 0.2837133750153243, "grad_norm": 1.8477623403194938, "learning_rate": 8.404872408405118e-06, "loss": 0.7095, "step": 9257 }, { "epoch": 0.2837440235380655, "grad_norm": 1.7084261535124736, "learning_rate": 8.404508934531684e-06, "loss": 0.6146, "step": 9258 }, { "epoch": 0.28377467206080664, "grad_norm": 0.8038491857390029, "learning_rate": 8.404145427112751e-06, "loss": 0.4732, "step": 9259 }, { "epoch": 0.28380532058354785, "grad_norm": 1.6267166389949836, "learning_rate": 8.403781886151902e-06, "loss": 0.6446, "step": 9260 }, { "epoch": 0.28383596910628905, "grad_norm": 1.6314404482328884, "learning_rate": 8.403418311652721e-06, "loss": 0.677, "step": 9261 }, { "epoch": 0.28386661762903026, "grad_norm": 1.6096167921572477, "learning_rate": 8.403054703618787e-06, "loss": 0.748, "step": 9262 }, { "epoch": 0.28389726615177147, "grad_norm": 1.8793192332858617, "learning_rate": 8.402691062053685e-06, "loss": 0.7305, "step": 9263 }, { "epoch": 0.28392791467451267, "grad_norm": 1.8455379142692563, "learning_rate": 8.402327386960998e-06, "loss": 0.7503, "step": 9264 }, { "epoch": 0.2839585631972539, "grad_norm": 1.954596669517302, "learning_rate": 8.401963678344309e-06, "loss": 0.6428, "step": 9265 }, { "epoch": 0.2839892117199951, "grad_norm": 1.6092046066932222, "learning_rate": 8.401599936207199e-06, "loss": 0.6833, "step": 9266 }, { "epoch": 0.2840198602427363, "grad_norm": 1.6869613136296675, "learning_rate": 8.401236160553257e-06, "loss": 0.7596, "step": 9267 }, { "epoch": 0.2840505087654775, "grad_norm": 1.5824902650183614, "learning_rate": 8.400872351386063e-06, "loss": 0.7139, "step": 9268 }, { "epoch": 0.2840811572882187, "grad_norm": 1.7532569449791293, "learning_rate": 8.400508508709205e-06, "loss": 0.6993, "step": 9269 }, { "epoch": 0.2841118058109599, "grad_norm": 0.9537869264821767, "learning_rate": 8.400144632526266e-06, "loss": 0.4821, "step": 9270 }, { "epoch": 0.2841424543337011, "grad_norm": 0.9496255146886461, "learning_rate": 8.399780722840832e-06, "loss": 0.4805, "step": 9271 }, { "epoch": 0.2841731028564423, "grad_norm": 1.6451007182453719, "learning_rate": 8.399416779656489e-06, "loss": 0.6935, "step": 9272 }, { "epoch": 0.2842037513791835, "grad_norm": 1.8571585911258974, "learning_rate": 8.399052802976822e-06, "loss": 0.8631, "step": 9273 }, { "epoch": 0.28423439990192473, "grad_norm": 1.7468287196972723, "learning_rate": 8.398688792805417e-06, "loss": 0.7219, "step": 9274 }, { "epoch": 0.28426504842466593, "grad_norm": 1.7857765028382022, "learning_rate": 8.398324749145864e-06, "loss": 0.6642, "step": 9275 }, { "epoch": 0.28429569694740714, "grad_norm": 0.8707148229039529, "learning_rate": 8.397960672001748e-06, "loss": 0.4831, "step": 9276 }, { "epoch": 0.28432634547014835, "grad_norm": 1.8188124619903563, "learning_rate": 8.397596561376652e-06, "loss": 0.639, "step": 9277 }, { "epoch": 0.28435699399288955, "grad_norm": 1.8245974685729935, "learning_rate": 8.397232417274172e-06, "loss": 0.7174, "step": 9278 }, { "epoch": 0.28438764251563076, "grad_norm": 1.7973190733729985, "learning_rate": 8.396868239697891e-06, "loss": 0.7891, "step": 9279 }, { "epoch": 0.28441829103837196, "grad_norm": 1.9392530056850827, "learning_rate": 8.396504028651397e-06, "loss": 0.8146, "step": 9280 }, { "epoch": 0.28444893956111317, "grad_norm": 1.7589765242280015, "learning_rate": 8.39613978413828e-06, "loss": 0.735, "step": 9281 }, { "epoch": 0.2844795880838544, "grad_norm": 1.6325446019284946, "learning_rate": 8.395775506162129e-06, "loss": 0.7594, "step": 9282 }, { "epoch": 0.2845102366065956, "grad_norm": 2.0111386101013373, "learning_rate": 8.395411194726533e-06, "loss": 0.7431, "step": 9283 }, { "epoch": 0.2845408851293368, "grad_norm": 1.6811089978005567, "learning_rate": 8.395046849835084e-06, "loss": 0.7099, "step": 9284 }, { "epoch": 0.284571533652078, "grad_norm": 0.8414765688717398, "learning_rate": 8.394682471491366e-06, "loss": 0.4745, "step": 9285 }, { "epoch": 0.2846021821748192, "grad_norm": 1.7762352233639356, "learning_rate": 8.394318059698976e-06, "loss": 0.6475, "step": 9286 }, { "epoch": 0.2846328306975604, "grad_norm": 1.6880527102483294, "learning_rate": 8.393953614461501e-06, "loss": 0.6331, "step": 9287 }, { "epoch": 0.2846634792203016, "grad_norm": 1.5325591294298915, "learning_rate": 8.393589135782531e-06, "loss": 0.6781, "step": 9288 }, { "epoch": 0.2846941277430428, "grad_norm": 1.5420205039483885, "learning_rate": 8.393224623665658e-06, "loss": 0.6225, "step": 9289 }, { "epoch": 0.28472477626578396, "grad_norm": 1.6734498667196518, "learning_rate": 8.392860078114477e-06, "loss": 0.7213, "step": 9290 }, { "epoch": 0.28475542478852517, "grad_norm": 2.6675272678185267, "learning_rate": 8.39249549913258e-06, "loss": 0.6268, "step": 9291 }, { "epoch": 0.2847860733112664, "grad_norm": 1.6968872829732933, "learning_rate": 8.392130886723553e-06, "loss": 0.6226, "step": 9292 }, { "epoch": 0.2848167218340076, "grad_norm": 1.666065328679615, "learning_rate": 8.391766240890993e-06, "loss": 0.7603, "step": 9293 }, { "epoch": 0.2848473703567488, "grad_norm": 2.1259996445843847, "learning_rate": 8.391401561638492e-06, "loss": 0.6347, "step": 9294 }, { "epoch": 0.28487801887949, "grad_norm": 1.947498965788976, "learning_rate": 8.391036848969646e-06, "loss": 0.7423, "step": 9295 }, { "epoch": 0.2849086674022312, "grad_norm": 1.7368606413561198, "learning_rate": 8.390672102888044e-06, "loss": 0.7509, "step": 9296 }, { "epoch": 0.2849393159249724, "grad_norm": 1.797156209051628, "learning_rate": 8.390307323397285e-06, "loss": 0.6281, "step": 9297 }, { "epoch": 0.2849699644477136, "grad_norm": 1.9829734002474682, "learning_rate": 8.389942510500957e-06, "loss": 0.738, "step": 9298 }, { "epoch": 0.2850006129704548, "grad_norm": 1.562071717563335, "learning_rate": 8.38957766420266e-06, "loss": 0.6, "step": 9299 }, { "epoch": 0.285031261493196, "grad_norm": 1.8216205573373754, "learning_rate": 8.389212784505987e-06, "loss": 0.7178, "step": 9300 }, { "epoch": 0.2850619100159372, "grad_norm": 2.0652683521325574, "learning_rate": 8.388847871414533e-06, "loss": 0.7628, "step": 9301 }, { "epoch": 0.28509255853867843, "grad_norm": 1.816503140880705, "learning_rate": 8.388482924931893e-06, "loss": 0.7892, "step": 9302 }, { "epoch": 0.28512320706141964, "grad_norm": 1.6365260716691654, "learning_rate": 8.388117945061664e-06, "loss": 0.7096, "step": 9303 }, { "epoch": 0.28515385558416084, "grad_norm": 1.8276801307184884, "learning_rate": 8.387752931807442e-06, "loss": 0.6353, "step": 9304 }, { "epoch": 0.28518450410690205, "grad_norm": 1.9046261408584448, "learning_rate": 8.387387885172825e-06, "loss": 0.719, "step": 9305 }, { "epoch": 0.28521515262964325, "grad_norm": 0.7949902815991364, "learning_rate": 8.387022805161408e-06, "loss": 0.4671, "step": 9306 }, { "epoch": 0.28524580115238446, "grad_norm": 1.777979919786191, "learning_rate": 8.386657691776788e-06, "loss": 0.793, "step": 9307 }, { "epoch": 0.28527644967512567, "grad_norm": 1.7100357723361814, "learning_rate": 8.386292545022563e-06, "loss": 0.7186, "step": 9308 }, { "epoch": 0.28530709819786687, "grad_norm": 1.9544379123154472, "learning_rate": 8.385927364902332e-06, "loss": 0.7228, "step": 9309 }, { "epoch": 0.2853377467206081, "grad_norm": 1.7103041677426232, "learning_rate": 8.385562151419693e-06, "loss": 0.6969, "step": 9310 }, { "epoch": 0.2853683952433493, "grad_norm": 1.727165318656334, "learning_rate": 8.385196904578243e-06, "loss": 0.7912, "step": 9311 }, { "epoch": 0.2853990437660905, "grad_norm": 1.6346097171743443, "learning_rate": 8.384831624381582e-06, "loss": 0.6806, "step": 9312 }, { "epoch": 0.2854296922888317, "grad_norm": 0.8218441419239554, "learning_rate": 8.384466310833308e-06, "loss": 0.5057, "step": 9313 }, { "epoch": 0.2854603408115729, "grad_norm": 1.7013045284824555, "learning_rate": 8.384100963937023e-06, "loss": 0.8138, "step": 9314 }, { "epoch": 0.2854909893343141, "grad_norm": 1.8490753041302619, "learning_rate": 8.383735583696323e-06, "loss": 0.7228, "step": 9315 }, { "epoch": 0.2855216378570553, "grad_norm": 1.7126753419153842, "learning_rate": 8.383370170114812e-06, "loss": 0.6658, "step": 9316 }, { "epoch": 0.2855522863797965, "grad_norm": 1.5924557805641557, "learning_rate": 8.383004723196088e-06, "loss": 0.6855, "step": 9317 }, { "epoch": 0.2855829349025377, "grad_norm": 1.8994844256480663, "learning_rate": 8.382639242943755e-06, "loss": 0.6804, "step": 9318 }, { "epoch": 0.2856135834252789, "grad_norm": 1.7317050908948795, "learning_rate": 8.382273729361411e-06, "loss": 0.6984, "step": 9319 }, { "epoch": 0.28564423194802013, "grad_norm": 1.6618564290267255, "learning_rate": 8.381908182452659e-06, "loss": 0.687, "step": 9320 }, { "epoch": 0.2856748804707613, "grad_norm": 1.7127902792455119, "learning_rate": 8.3815426022211e-06, "loss": 0.6756, "step": 9321 }, { "epoch": 0.2857055289935025, "grad_norm": 1.8213859444919112, "learning_rate": 8.381176988670337e-06, "loss": 0.705, "step": 9322 }, { "epoch": 0.2857361775162437, "grad_norm": 2.283630341803989, "learning_rate": 8.38081134180397e-06, "loss": 0.6653, "step": 9323 }, { "epoch": 0.2857668260389849, "grad_norm": 0.9817962809496489, "learning_rate": 8.380445661625606e-06, "loss": 0.4932, "step": 9324 }, { "epoch": 0.2857974745617261, "grad_norm": 1.8065337866889952, "learning_rate": 8.380079948138844e-06, "loss": 0.7042, "step": 9325 }, { "epoch": 0.2858281230844673, "grad_norm": 1.498521041044799, "learning_rate": 8.379714201347291e-06, "loss": 0.699, "step": 9326 }, { "epoch": 0.2858587716072085, "grad_norm": 1.7041861988544664, "learning_rate": 8.37934842125455e-06, "loss": 0.7349, "step": 9327 }, { "epoch": 0.2858894201299497, "grad_norm": 1.750375305492842, "learning_rate": 8.378982607864224e-06, "loss": 0.6692, "step": 9328 }, { "epoch": 0.28592006865269093, "grad_norm": 1.75073060671992, "learning_rate": 8.378616761179916e-06, "loss": 0.7743, "step": 9329 }, { "epoch": 0.28595071717543213, "grad_norm": 1.820787174826518, "learning_rate": 8.378250881205235e-06, "loss": 0.7393, "step": 9330 }, { "epoch": 0.28598136569817334, "grad_norm": 0.8987334483116998, "learning_rate": 8.377884967943781e-06, "loss": 0.4904, "step": 9331 }, { "epoch": 0.28601201422091455, "grad_norm": 1.6585325390644134, "learning_rate": 8.377519021399164e-06, "loss": 0.7083, "step": 9332 }, { "epoch": 0.28604266274365575, "grad_norm": 1.9536456667814988, "learning_rate": 8.377153041574986e-06, "loss": 0.699, "step": 9333 }, { "epoch": 0.28607331126639696, "grad_norm": 1.8344767460913918, "learning_rate": 8.376787028474858e-06, "loss": 0.7942, "step": 9334 }, { "epoch": 0.28610395978913816, "grad_norm": 1.5674144449399843, "learning_rate": 8.376420982102381e-06, "loss": 0.8346, "step": 9335 }, { "epoch": 0.28613460831187937, "grad_norm": 1.7453112521177891, "learning_rate": 8.376054902461166e-06, "loss": 0.8057, "step": 9336 }, { "epoch": 0.2861652568346206, "grad_norm": 1.7737294872478055, "learning_rate": 8.375688789554817e-06, "loss": 0.691, "step": 9337 }, { "epoch": 0.2861959053573618, "grad_norm": 0.8214735691238204, "learning_rate": 8.375322643386943e-06, "loss": 0.4604, "step": 9338 }, { "epoch": 0.286226553880103, "grad_norm": 1.7353876739133571, "learning_rate": 8.37495646396115e-06, "loss": 0.6747, "step": 9339 }, { "epoch": 0.2862572024028442, "grad_norm": 1.7762163930622397, "learning_rate": 8.37459025128105e-06, "loss": 0.6765, "step": 9340 }, { "epoch": 0.2862878509255854, "grad_norm": 1.6230565795538359, "learning_rate": 8.374224005350247e-06, "loss": 0.7696, "step": 9341 }, { "epoch": 0.2863184994483266, "grad_norm": 1.6852963091934803, "learning_rate": 8.373857726172352e-06, "loss": 0.701, "step": 9342 }, { "epoch": 0.2863491479710678, "grad_norm": 1.5886482414677037, "learning_rate": 8.373491413750974e-06, "loss": 0.6737, "step": 9343 }, { "epoch": 0.286379796493809, "grad_norm": 1.5601822488548913, "learning_rate": 8.373125068089722e-06, "loss": 0.7005, "step": 9344 }, { "epoch": 0.2864104450165502, "grad_norm": 0.8220607610321989, "learning_rate": 8.372758689192205e-06, "loss": 0.4807, "step": 9345 }, { "epoch": 0.2864410935392914, "grad_norm": 0.7997837309580321, "learning_rate": 8.372392277062034e-06, "loss": 0.4824, "step": 9346 }, { "epoch": 0.28647174206203263, "grad_norm": 1.7614765868892515, "learning_rate": 8.372025831702819e-06, "loss": 0.7825, "step": 9347 }, { "epoch": 0.28650239058477384, "grad_norm": 1.553130786279322, "learning_rate": 8.37165935311817e-06, "loss": 0.7614, "step": 9348 }, { "epoch": 0.28653303910751504, "grad_norm": 1.6277798574480868, "learning_rate": 8.371292841311701e-06, "loss": 0.7334, "step": 9349 }, { "epoch": 0.28656368763025625, "grad_norm": 1.7669022025865244, "learning_rate": 8.370926296287018e-06, "loss": 0.7098, "step": 9350 }, { "epoch": 0.28659433615299745, "grad_norm": 1.9196513986673736, "learning_rate": 8.370559718047738e-06, "loss": 0.6962, "step": 9351 }, { "epoch": 0.2866249846757386, "grad_norm": 1.8056141227927966, "learning_rate": 8.37019310659747e-06, "loss": 0.7022, "step": 9352 }, { "epoch": 0.2866556331984798, "grad_norm": 1.533397850816452, "learning_rate": 8.369826461939828e-06, "loss": 0.7134, "step": 9353 }, { "epoch": 0.286686281721221, "grad_norm": 1.7782771472393382, "learning_rate": 8.369459784078422e-06, "loss": 0.6846, "step": 9354 }, { "epoch": 0.2867169302439622, "grad_norm": 1.8051944016913208, "learning_rate": 8.369093073016868e-06, "loss": 0.8257, "step": 9355 }, { "epoch": 0.2867475787667034, "grad_norm": 1.7249159106818375, "learning_rate": 8.368726328758775e-06, "loss": 0.6838, "step": 9356 }, { "epoch": 0.28677822728944463, "grad_norm": 1.8123550907517945, "learning_rate": 8.368359551307762e-06, "loss": 0.6851, "step": 9357 }, { "epoch": 0.28680887581218584, "grad_norm": 1.6279252578544074, "learning_rate": 8.36799274066744e-06, "loss": 0.7234, "step": 9358 }, { "epoch": 0.28683952433492704, "grad_norm": 1.6749702647860294, "learning_rate": 8.367625896841425e-06, "loss": 0.7618, "step": 9359 }, { "epoch": 0.28687017285766825, "grad_norm": 1.7181474573733744, "learning_rate": 8.367259019833329e-06, "loss": 0.7029, "step": 9360 }, { "epoch": 0.28690082138040945, "grad_norm": 0.9256939666267076, "learning_rate": 8.36689210964677e-06, "loss": 0.4743, "step": 9361 }, { "epoch": 0.28693146990315066, "grad_norm": 1.9439929306161683, "learning_rate": 8.36652516628536e-06, "loss": 0.7342, "step": 9362 }, { "epoch": 0.28696211842589187, "grad_norm": 1.6274578098142656, "learning_rate": 8.366158189752715e-06, "loss": 0.6557, "step": 9363 }, { "epoch": 0.28699276694863307, "grad_norm": 1.7424920832876183, "learning_rate": 8.365791180052454e-06, "loss": 0.6927, "step": 9364 }, { "epoch": 0.2870234154713743, "grad_norm": 1.7736739252386606, "learning_rate": 8.365424137188192e-06, "loss": 0.7253, "step": 9365 }, { "epoch": 0.2870540639941155, "grad_norm": 1.5652817964776637, "learning_rate": 8.365057061163544e-06, "loss": 0.7143, "step": 9366 }, { "epoch": 0.2870847125168567, "grad_norm": 1.83033375165565, "learning_rate": 8.364689951982126e-06, "loss": 0.6338, "step": 9367 }, { "epoch": 0.2871153610395979, "grad_norm": 1.6039423481559627, "learning_rate": 8.36432280964756e-06, "loss": 0.7088, "step": 9368 }, { "epoch": 0.2871460095623391, "grad_norm": 1.637665008968857, "learning_rate": 8.36395563416346e-06, "loss": 0.6686, "step": 9369 }, { "epoch": 0.2871766580850803, "grad_norm": 1.9079955072604176, "learning_rate": 8.363588425533442e-06, "loss": 0.8243, "step": 9370 }, { "epoch": 0.2872073066078215, "grad_norm": 0.8188259955296027, "learning_rate": 8.363221183761127e-06, "loss": 0.4279, "step": 9371 }, { "epoch": 0.2872379551305627, "grad_norm": 1.7374495503420364, "learning_rate": 8.362853908850136e-06, "loss": 0.5955, "step": 9372 }, { "epoch": 0.2872686036533039, "grad_norm": 1.9050522469316362, "learning_rate": 8.362486600804083e-06, "loss": 0.7276, "step": 9373 }, { "epoch": 0.28729925217604513, "grad_norm": 1.6628178565543164, "learning_rate": 8.36211925962659e-06, "loss": 0.6464, "step": 9374 }, { "epoch": 0.28732990069878633, "grad_norm": 1.5549037912953483, "learning_rate": 8.361751885321274e-06, "loss": 0.6098, "step": 9375 }, { "epoch": 0.28736054922152754, "grad_norm": 1.878011931467197, "learning_rate": 8.36138447789176e-06, "loss": 0.7957, "step": 9376 }, { "epoch": 0.28739119774426874, "grad_norm": 2.0327520842108258, "learning_rate": 8.36101703734166e-06, "loss": 0.743, "step": 9377 }, { "epoch": 0.28742184626700995, "grad_norm": 1.6702777131093967, "learning_rate": 8.360649563674604e-06, "loss": 0.6832, "step": 9378 }, { "epoch": 0.28745249478975116, "grad_norm": 1.8731003589275743, "learning_rate": 8.360282056894205e-06, "loss": 0.7306, "step": 9379 }, { "epoch": 0.28748314331249236, "grad_norm": 1.6652941394506897, "learning_rate": 8.359914517004089e-06, "loss": 0.7034, "step": 9380 }, { "epoch": 0.28751379183523357, "grad_norm": 1.873310610753865, "learning_rate": 8.359546944007873e-06, "loss": 0.7229, "step": 9381 }, { "epoch": 0.2875444403579748, "grad_norm": 0.831474900894518, "learning_rate": 8.359179337909182e-06, "loss": 0.4764, "step": 9382 }, { "epoch": 0.2875750888807159, "grad_norm": 1.6786105533272206, "learning_rate": 8.35881169871164e-06, "loss": 0.6737, "step": 9383 }, { "epoch": 0.28760573740345713, "grad_norm": 1.7344347416177717, "learning_rate": 8.358444026418864e-06, "loss": 0.7475, "step": 9384 }, { "epoch": 0.28763638592619833, "grad_norm": 1.7247867045715655, "learning_rate": 8.35807632103448e-06, "loss": 0.8551, "step": 9385 }, { "epoch": 0.28766703444893954, "grad_norm": 1.5962330837350034, "learning_rate": 8.357708582562114e-06, "loss": 0.7397, "step": 9386 }, { "epoch": 0.28769768297168075, "grad_norm": 1.7770846409950831, "learning_rate": 8.357340811005383e-06, "loss": 0.7841, "step": 9387 }, { "epoch": 0.28772833149442195, "grad_norm": 1.8257012460797744, "learning_rate": 8.356973006367915e-06, "loss": 0.7156, "step": 9388 }, { "epoch": 0.28775898001716316, "grad_norm": 0.779473185656542, "learning_rate": 8.356605168653334e-06, "loss": 0.4636, "step": 9389 }, { "epoch": 0.28778962853990436, "grad_norm": 1.8026584519439353, "learning_rate": 8.356237297865261e-06, "loss": 0.7323, "step": 9390 }, { "epoch": 0.28782027706264557, "grad_norm": 1.4830898017807632, "learning_rate": 8.355869394007326e-06, "loss": 0.6929, "step": 9391 }, { "epoch": 0.2878509255853868, "grad_norm": 1.6787902427542682, "learning_rate": 8.35550145708315e-06, "loss": 0.7059, "step": 9392 }, { "epoch": 0.287881574108128, "grad_norm": 0.7918914208162977, "learning_rate": 8.355133487096358e-06, "loss": 0.4579, "step": 9393 }, { "epoch": 0.2879122226308692, "grad_norm": 1.7668835585010445, "learning_rate": 8.35476548405058e-06, "loss": 0.6634, "step": 9394 }, { "epoch": 0.2879428711536104, "grad_norm": 0.8373900160097685, "learning_rate": 8.354397447949438e-06, "loss": 0.5031, "step": 9395 }, { "epoch": 0.2879735196763516, "grad_norm": 0.7645955874321492, "learning_rate": 8.35402937879656e-06, "loss": 0.4615, "step": 9396 }, { "epoch": 0.2880041681990928, "grad_norm": 1.492643022630482, "learning_rate": 8.35366127659557e-06, "loss": 0.6819, "step": 9397 }, { "epoch": 0.288034816721834, "grad_norm": 1.7180355280869053, "learning_rate": 8.353293141350101e-06, "loss": 0.6688, "step": 9398 }, { "epoch": 0.2880654652445752, "grad_norm": 1.552306838784709, "learning_rate": 8.352924973063776e-06, "loss": 0.6662, "step": 9399 }, { "epoch": 0.2880961137673164, "grad_norm": 1.6179083160579606, "learning_rate": 8.35255677174022e-06, "loss": 0.7346, "step": 9400 }, { "epoch": 0.2881267622900576, "grad_norm": 0.8183851511320228, "learning_rate": 8.352188537383069e-06, "loss": 0.4805, "step": 9401 }, { "epoch": 0.28815741081279883, "grad_norm": 1.748191106953706, "learning_rate": 8.351820269995945e-06, "loss": 0.7884, "step": 9402 }, { "epoch": 0.28818805933554004, "grad_norm": 1.8377309098113206, "learning_rate": 8.351451969582478e-06, "loss": 0.7067, "step": 9403 }, { "epoch": 0.28821870785828124, "grad_norm": 1.7593593554793403, "learning_rate": 8.351083636146296e-06, "loss": 0.6869, "step": 9404 }, { "epoch": 0.28824935638102245, "grad_norm": 1.573403627449142, "learning_rate": 8.35071526969103e-06, "loss": 0.6684, "step": 9405 }, { "epoch": 0.28828000490376365, "grad_norm": 1.9413469921088011, "learning_rate": 8.350346870220311e-06, "loss": 0.7932, "step": 9406 }, { "epoch": 0.28831065342650486, "grad_norm": 0.7872641010621099, "learning_rate": 8.349978437737765e-06, "loss": 0.4665, "step": 9407 }, { "epoch": 0.28834130194924606, "grad_norm": 1.7747680562540749, "learning_rate": 8.349609972247026e-06, "loss": 0.6858, "step": 9408 }, { "epoch": 0.28837195047198727, "grad_norm": 1.8635696568725424, "learning_rate": 8.349241473751721e-06, "loss": 0.7531, "step": 9409 }, { "epoch": 0.2884025989947285, "grad_norm": 1.603952386687509, "learning_rate": 8.348872942255484e-06, "loss": 0.6505, "step": 9410 }, { "epoch": 0.2884332475174697, "grad_norm": 1.7815121262355096, "learning_rate": 8.348504377761945e-06, "loss": 0.6101, "step": 9411 }, { "epoch": 0.2884638960402109, "grad_norm": 1.8850120839082185, "learning_rate": 8.348135780274735e-06, "loss": 0.7527, "step": 9412 }, { "epoch": 0.2884945445629521, "grad_norm": 1.879046193560528, "learning_rate": 8.347767149797488e-06, "loss": 0.6853, "step": 9413 }, { "epoch": 0.28852519308569324, "grad_norm": 0.7757619127112889, "learning_rate": 8.347398486333835e-06, "loss": 0.4616, "step": 9414 }, { "epoch": 0.28855584160843445, "grad_norm": 2.1326473103521377, "learning_rate": 8.347029789887406e-06, "loss": 0.6176, "step": 9415 }, { "epoch": 0.28858649013117565, "grad_norm": 1.7008664855304338, "learning_rate": 8.346661060461838e-06, "loss": 0.6653, "step": 9416 }, { "epoch": 0.28861713865391686, "grad_norm": 0.7876106402064572, "learning_rate": 8.34629229806076e-06, "loss": 0.4632, "step": 9417 }, { "epoch": 0.28864778717665807, "grad_norm": 1.7466845764210412, "learning_rate": 8.34592350268781e-06, "loss": 0.6182, "step": 9418 }, { "epoch": 0.28867843569939927, "grad_norm": 1.5202405066256166, "learning_rate": 8.345554674346618e-06, "loss": 0.6537, "step": 9419 }, { "epoch": 0.2887090842221405, "grad_norm": 1.8131298756880225, "learning_rate": 8.345185813040822e-06, "loss": 0.6404, "step": 9420 }, { "epoch": 0.2887397327448817, "grad_norm": 0.8045689482674163, "learning_rate": 8.344816918774052e-06, "loss": 0.4827, "step": 9421 }, { "epoch": 0.2887703812676229, "grad_norm": 1.5098460613860174, "learning_rate": 8.344447991549947e-06, "loss": 0.7419, "step": 9422 }, { "epoch": 0.2888010297903641, "grad_norm": 1.8603005467595823, "learning_rate": 8.344079031372138e-06, "loss": 0.6823, "step": 9423 }, { "epoch": 0.2888316783131053, "grad_norm": 1.852801400075066, "learning_rate": 8.343710038244264e-06, "loss": 0.7347, "step": 9424 }, { "epoch": 0.2888623268358465, "grad_norm": 1.7954596113926022, "learning_rate": 8.343341012169958e-06, "loss": 0.6707, "step": 9425 }, { "epoch": 0.2888929753585877, "grad_norm": 1.7960224215982565, "learning_rate": 8.34297195315286e-06, "loss": 0.6574, "step": 9426 }, { "epoch": 0.2889236238813289, "grad_norm": 1.732274580824317, "learning_rate": 8.342602861196603e-06, "loss": 0.6852, "step": 9427 }, { "epoch": 0.2889542724040701, "grad_norm": 1.540423407356877, "learning_rate": 8.342233736304824e-06, "loss": 0.6537, "step": 9428 }, { "epoch": 0.28898492092681133, "grad_norm": 1.6985578323112127, "learning_rate": 8.341864578481162e-06, "loss": 0.6822, "step": 9429 }, { "epoch": 0.28901556944955253, "grad_norm": 0.8500205995321624, "learning_rate": 8.341495387729253e-06, "loss": 0.4642, "step": 9430 }, { "epoch": 0.28904621797229374, "grad_norm": 0.8661546511810417, "learning_rate": 8.341126164052735e-06, "loss": 0.4831, "step": 9431 }, { "epoch": 0.28907686649503495, "grad_norm": 0.7784973377934908, "learning_rate": 8.340756907455246e-06, "loss": 0.472, "step": 9432 }, { "epoch": 0.28910751501777615, "grad_norm": 1.9024267751365465, "learning_rate": 8.340387617940424e-06, "loss": 0.7415, "step": 9433 }, { "epoch": 0.28913816354051736, "grad_norm": 0.8201640726044672, "learning_rate": 8.340018295511908e-06, "loss": 0.4535, "step": 9434 }, { "epoch": 0.28916881206325856, "grad_norm": 0.8100399532711696, "learning_rate": 8.339648940173337e-06, "loss": 0.4736, "step": 9435 }, { "epoch": 0.28919946058599977, "grad_norm": 1.7625298156217255, "learning_rate": 8.339279551928351e-06, "loss": 0.6125, "step": 9436 }, { "epoch": 0.289230109108741, "grad_norm": 1.7056375538815391, "learning_rate": 8.338910130780591e-06, "loss": 0.6761, "step": 9437 }, { "epoch": 0.2892607576314822, "grad_norm": 1.7571615682168633, "learning_rate": 8.338540676733693e-06, "loss": 0.744, "step": 9438 }, { "epoch": 0.2892914061542234, "grad_norm": 1.8720244018958638, "learning_rate": 8.3381711897913e-06, "loss": 0.7199, "step": 9439 }, { "epoch": 0.2893220546769646, "grad_norm": 1.8061964092132987, "learning_rate": 8.337801669957052e-06, "loss": 0.6988, "step": 9440 }, { "epoch": 0.2893527031997058, "grad_norm": 1.7589053430266535, "learning_rate": 8.337432117234591e-06, "loss": 0.7245, "step": 9441 }, { "epoch": 0.289383351722447, "grad_norm": 1.9271075402285258, "learning_rate": 8.337062531627556e-06, "loss": 0.8501, "step": 9442 }, { "epoch": 0.2894140002451882, "grad_norm": 1.930465620973963, "learning_rate": 8.33669291313959e-06, "loss": 0.7751, "step": 9443 }, { "epoch": 0.2894446487679294, "grad_norm": 1.7658179169237362, "learning_rate": 8.336323261774336e-06, "loss": 0.7471, "step": 9444 }, { "epoch": 0.28947529729067056, "grad_norm": 1.6334923391093263, "learning_rate": 8.335953577535437e-06, "loss": 0.6243, "step": 9445 }, { "epoch": 0.28950594581341177, "grad_norm": 1.5620716317628844, "learning_rate": 8.33558386042653e-06, "loss": 0.6371, "step": 9446 }, { "epoch": 0.289536594336153, "grad_norm": 1.7832014567306542, "learning_rate": 8.335214110451264e-06, "loss": 0.7073, "step": 9447 }, { "epoch": 0.2895672428588942, "grad_norm": 1.5361840272700023, "learning_rate": 8.334844327613278e-06, "loss": 0.6664, "step": 9448 }, { "epoch": 0.2895978913816354, "grad_norm": 1.783690308731656, "learning_rate": 8.33447451191622e-06, "loss": 0.7206, "step": 9449 }, { "epoch": 0.2896285399043766, "grad_norm": 1.0010185917377916, "learning_rate": 8.334104663363732e-06, "loss": 0.4823, "step": 9450 }, { "epoch": 0.2896591884271178, "grad_norm": 1.6290337728924806, "learning_rate": 8.333734781959456e-06, "loss": 0.7236, "step": 9451 }, { "epoch": 0.289689836949859, "grad_norm": 0.8411490796757592, "learning_rate": 8.333364867707038e-06, "loss": 0.4678, "step": 9452 }, { "epoch": 0.2897204854726002, "grad_norm": 1.7273968764226562, "learning_rate": 8.332994920610125e-06, "loss": 0.662, "step": 9453 }, { "epoch": 0.2897511339953414, "grad_norm": 1.7469988842468949, "learning_rate": 8.332624940672358e-06, "loss": 0.7207, "step": 9454 }, { "epoch": 0.2897817825180826, "grad_norm": 1.6166085087810176, "learning_rate": 8.332254927897386e-06, "loss": 0.5631, "step": 9455 }, { "epoch": 0.2898124310408238, "grad_norm": 1.9456451599187317, "learning_rate": 8.331884882288852e-06, "loss": 0.6925, "step": 9456 }, { "epoch": 0.28984307956356503, "grad_norm": 1.7707265027287478, "learning_rate": 8.331514803850406e-06, "loss": 0.6999, "step": 9457 }, { "epoch": 0.28987372808630624, "grad_norm": 1.7103824893411192, "learning_rate": 8.33114469258569e-06, "loss": 0.7214, "step": 9458 }, { "epoch": 0.28990437660904744, "grad_norm": 1.9549671122298589, "learning_rate": 8.330774548498356e-06, "loss": 0.6764, "step": 9459 }, { "epoch": 0.28993502513178865, "grad_norm": 0.9989994270180698, "learning_rate": 8.330404371592046e-06, "loss": 0.4545, "step": 9460 }, { "epoch": 0.28996567365452985, "grad_norm": 0.9561704246313809, "learning_rate": 8.33003416187041e-06, "loss": 0.4703, "step": 9461 }, { "epoch": 0.28999632217727106, "grad_norm": 1.8141077828722114, "learning_rate": 8.329663919337096e-06, "loss": 0.6785, "step": 9462 }, { "epoch": 0.29002697070001227, "grad_norm": 1.554029495940326, "learning_rate": 8.32929364399575e-06, "loss": 0.7348, "step": 9463 }, { "epoch": 0.29005761922275347, "grad_norm": 1.716903808493168, "learning_rate": 8.328923335850023e-06, "loss": 0.7092, "step": 9464 }, { "epoch": 0.2900882677454947, "grad_norm": 1.0245547169517795, "learning_rate": 8.328552994903562e-06, "loss": 0.4873, "step": 9465 }, { "epoch": 0.2901189162682359, "grad_norm": 1.7022558118823918, "learning_rate": 8.328182621160018e-06, "loss": 0.684, "step": 9466 }, { "epoch": 0.2901495647909771, "grad_norm": 1.8315384393496157, "learning_rate": 8.327812214623037e-06, "loss": 0.7311, "step": 9467 }, { "epoch": 0.2901802133137183, "grad_norm": 1.8322233197730378, "learning_rate": 8.327441775296273e-06, "loss": 0.7366, "step": 9468 }, { "epoch": 0.2902108618364595, "grad_norm": 1.9153333745605614, "learning_rate": 8.327071303183374e-06, "loss": 0.8235, "step": 9469 }, { "epoch": 0.2902415103592007, "grad_norm": 1.885992141546536, "learning_rate": 8.326700798287988e-06, "loss": 0.723, "step": 9470 }, { "epoch": 0.2902721588819419, "grad_norm": 0.8184267760500369, "learning_rate": 8.326330260613768e-06, "loss": 0.4471, "step": 9471 }, { "epoch": 0.2903028074046831, "grad_norm": 1.644866297816928, "learning_rate": 8.325959690164367e-06, "loss": 0.6294, "step": 9472 }, { "epoch": 0.2903334559274243, "grad_norm": 0.8462473064209126, "learning_rate": 8.325589086943433e-06, "loss": 0.497, "step": 9473 }, { "epoch": 0.2903641044501655, "grad_norm": 1.8785102598412868, "learning_rate": 8.325218450954619e-06, "loss": 0.7101, "step": 9474 }, { "epoch": 0.29039475297290673, "grad_norm": 0.8033676196456374, "learning_rate": 8.324847782201576e-06, "loss": 0.4527, "step": 9475 }, { "epoch": 0.2904254014956479, "grad_norm": 1.849065447181978, "learning_rate": 8.324477080687959e-06, "loss": 0.7465, "step": 9476 }, { "epoch": 0.2904560500183891, "grad_norm": 1.7933733545549817, "learning_rate": 8.324106346417416e-06, "loss": 0.7275, "step": 9477 }, { "epoch": 0.2904866985411303, "grad_norm": 1.9072950683956682, "learning_rate": 8.323735579393604e-06, "loss": 0.7203, "step": 9478 }, { "epoch": 0.2905173470638715, "grad_norm": 1.951661960186648, "learning_rate": 8.323364779620176e-06, "loss": 0.7277, "step": 9479 }, { "epoch": 0.2905479955866127, "grad_norm": 1.761221347585483, "learning_rate": 8.322993947100783e-06, "loss": 0.7248, "step": 9480 }, { "epoch": 0.2905786441093539, "grad_norm": 0.8997221027195564, "learning_rate": 8.32262308183908e-06, "loss": 0.4826, "step": 9481 }, { "epoch": 0.2906092926320951, "grad_norm": 1.8173352936925125, "learning_rate": 8.322252183838723e-06, "loss": 0.7493, "step": 9482 }, { "epoch": 0.2906399411548363, "grad_norm": 1.9368854655840666, "learning_rate": 8.321881253103366e-06, "loss": 0.7468, "step": 9483 }, { "epoch": 0.29067058967757753, "grad_norm": 1.9495074377496067, "learning_rate": 8.32151028963666e-06, "loss": 0.7816, "step": 9484 }, { "epoch": 0.29070123820031873, "grad_norm": 1.701796328171423, "learning_rate": 8.321139293442266e-06, "loss": 0.735, "step": 9485 }, { "epoch": 0.29073188672305994, "grad_norm": 1.9176431303841588, "learning_rate": 8.320768264523835e-06, "loss": 0.8292, "step": 9486 }, { "epoch": 0.29076253524580115, "grad_norm": 1.8786057370514098, "learning_rate": 8.320397202885027e-06, "loss": 0.6923, "step": 9487 }, { "epoch": 0.29079318376854235, "grad_norm": 1.6312181134741708, "learning_rate": 8.320026108529494e-06, "loss": 0.6894, "step": 9488 }, { "epoch": 0.29082383229128356, "grad_norm": 1.7693463633436812, "learning_rate": 8.319654981460895e-06, "loss": 0.6621, "step": 9489 }, { "epoch": 0.29085448081402476, "grad_norm": 1.9551641012801197, "learning_rate": 8.319283821682885e-06, "loss": 0.737, "step": 9490 }, { "epoch": 0.29088512933676597, "grad_norm": 1.707133686967687, "learning_rate": 8.318912629199123e-06, "loss": 0.6302, "step": 9491 }, { "epoch": 0.2909157778595072, "grad_norm": 1.594028835626414, "learning_rate": 8.318541404013264e-06, "loss": 0.6851, "step": 9492 }, { "epoch": 0.2909464263822484, "grad_norm": 1.789330830820213, "learning_rate": 8.31817014612897e-06, "loss": 0.7551, "step": 9493 }, { "epoch": 0.2909770749049896, "grad_norm": 0.8961335532870534, "learning_rate": 8.317798855549897e-06, "loss": 0.4682, "step": 9494 }, { "epoch": 0.2910077234277308, "grad_norm": 0.7846745199185511, "learning_rate": 8.317427532279702e-06, "loss": 0.4732, "step": 9495 }, { "epoch": 0.291038371950472, "grad_norm": 1.954536716126634, "learning_rate": 8.317056176322044e-06, "loss": 0.7392, "step": 9496 }, { "epoch": 0.2910690204732132, "grad_norm": 0.798685125179029, "learning_rate": 8.316684787680582e-06, "loss": 0.4765, "step": 9497 }, { "epoch": 0.2910996689959544, "grad_norm": 1.6254408350127807, "learning_rate": 8.316313366358978e-06, "loss": 0.7789, "step": 9498 }, { "epoch": 0.2911303175186956, "grad_norm": 1.6869622334803056, "learning_rate": 8.31594191236089e-06, "loss": 0.6904, "step": 9499 }, { "epoch": 0.2911609660414368, "grad_norm": 1.910559549945184, "learning_rate": 8.315570425689975e-06, "loss": 0.7752, "step": 9500 }, { "epoch": 0.291191614564178, "grad_norm": 1.8008266076307344, "learning_rate": 8.3151989063499e-06, "loss": 0.7408, "step": 9501 }, { "epoch": 0.29122226308691923, "grad_norm": 1.7832518797831276, "learning_rate": 8.314827354344318e-06, "loss": 0.744, "step": 9502 }, { "epoch": 0.29125291160966044, "grad_norm": 1.8507740696715096, "learning_rate": 8.3144557696769e-06, "loss": 0.8195, "step": 9503 }, { "epoch": 0.29128356013240164, "grad_norm": 1.9672959380628172, "learning_rate": 8.314084152351297e-06, "loss": 0.8299, "step": 9504 }, { "epoch": 0.29131420865514285, "grad_norm": 1.8102805773206159, "learning_rate": 8.313712502371174e-06, "loss": 0.7905, "step": 9505 }, { "epoch": 0.29134485717788405, "grad_norm": 1.8469636902847617, "learning_rate": 8.313340819740195e-06, "loss": 0.7329, "step": 9506 }, { "epoch": 0.2913755057006252, "grad_norm": 1.7331564292418975, "learning_rate": 8.312969104462024e-06, "loss": 0.7151, "step": 9507 }, { "epoch": 0.2914061542233664, "grad_norm": 0.8523882525105428, "learning_rate": 8.312597356540316e-06, "loss": 0.4686, "step": 9508 }, { "epoch": 0.2914368027461076, "grad_norm": 0.8191604749304303, "learning_rate": 8.312225575978741e-06, "loss": 0.5, "step": 9509 }, { "epoch": 0.2914674512688488, "grad_norm": 1.7770143800814282, "learning_rate": 8.311853762780959e-06, "loss": 0.6303, "step": 9510 }, { "epoch": 0.29149809979159, "grad_norm": 1.6796780043204647, "learning_rate": 8.311481916950636e-06, "loss": 0.6209, "step": 9511 }, { "epoch": 0.29152874831433123, "grad_norm": 0.811790271994923, "learning_rate": 8.311110038491435e-06, "loss": 0.4735, "step": 9512 }, { "epoch": 0.29155939683707244, "grad_norm": 1.666410834924763, "learning_rate": 8.310738127407017e-06, "loss": 0.6131, "step": 9513 }, { "epoch": 0.29159004535981364, "grad_norm": 1.6943509169550826, "learning_rate": 8.310366183701051e-06, "loss": 0.654, "step": 9514 }, { "epoch": 0.29162069388255485, "grad_norm": 1.8089950612005363, "learning_rate": 8.3099942073772e-06, "loss": 0.7179, "step": 9515 }, { "epoch": 0.29165134240529605, "grad_norm": 1.6822528206109522, "learning_rate": 8.30962219843913e-06, "loss": 0.6224, "step": 9516 }, { "epoch": 0.29168199092803726, "grad_norm": 0.8267982168329929, "learning_rate": 8.309250156890502e-06, "loss": 0.4843, "step": 9517 }, { "epoch": 0.29171263945077847, "grad_norm": 1.6477471121465013, "learning_rate": 8.308878082734988e-06, "loss": 0.6244, "step": 9518 }, { "epoch": 0.29174328797351967, "grad_norm": 1.533334307983797, "learning_rate": 8.308505975976252e-06, "loss": 0.7093, "step": 9519 }, { "epoch": 0.2917739364962609, "grad_norm": 1.830120773681969, "learning_rate": 8.30813383661796e-06, "loss": 0.737, "step": 9520 }, { "epoch": 0.2918045850190021, "grad_norm": 1.8655641572046773, "learning_rate": 8.307761664663778e-06, "loss": 0.7511, "step": 9521 }, { "epoch": 0.2918352335417433, "grad_norm": 1.5622069996704178, "learning_rate": 8.307389460117375e-06, "loss": 0.6549, "step": 9522 }, { "epoch": 0.2918658820644845, "grad_norm": 1.656292267623811, "learning_rate": 8.307017222982416e-06, "loss": 0.6956, "step": 9523 }, { "epoch": 0.2918965305872257, "grad_norm": 0.8560759267502712, "learning_rate": 8.306644953262571e-06, "loss": 0.4824, "step": 9524 }, { "epoch": 0.2919271791099669, "grad_norm": 1.7701996804925717, "learning_rate": 8.306272650961507e-06, "loss": 0.7519, "step": 9525 }, { "epoch": 0.2919578276327081, "grad_norm": 1.8030047554884332, "learning_rate": 8.305900316082893e-06, "loss": 0.6823, "step": 9526 }, { "epoch": 0.2919884761554493, "grad_norm": 1.6070083362755552, "learning_rate": 8.305527948630398e-06, "loss": 0.6603, "step": 9527 }, { "epoch": 0.2920191246781905, "grad_norm": 1.6482210657530807, "learning_rate": 8.305155548607688e-06, "loss": 0.6728, "step": 9528 }, { "epoch": 0.29204977320093173, "grad_norm": 1.8088759208574212, "learning_rate": 8.304783116018437e-06, "loss": 0.7395, "step": 9529 }, { "epoch": 0.29208042172367293, "grad_norm": 1.6472932850530546, "learning_rate": 8.304410650866312e-06, "loss": 0.7223, "step": 9530 }, { "epoch": 0.29211107024641414, "grad_norm": 1.6167705344827272, "learning_rate": 8.304038153154983e-06, "loss": 0.6262, "step": 9531 }, { "epoch": 0.29214171876915535, "grad_norm": 1.8133451408484975, "learning_rate": 8.303665622888121e-06, "loss": 0.7065, "step": 9532 }, { "epoch": 0.29217236729189655, "grad_norm": 1.7917427384124829, "learning_rate": 8.303293060069394e-06, "loss": 0.6926, "step": 9533 }, { "epoch": 0.29220301581463776, "grad_norm": 1.681103009398211, "learning_rate": 8.30292046470248e-06, "loss": 0.6814, "step": 9534 }, { "epoch": 0.29223366433737896, "grad_norm": 1.8532409215290098, "learning_rate": 8.302547836791042e-06, "loss": 0.6192, "step": 9535 }, { "epoch": 0.29226431286012017, "grad_norm": 0.8428901135715134, "learning_rate": 8.302175176338756e-06, "loss": 0.4745, "step": 9536 }, { "epoch": 0.2922949613828614, "grad_norm": 1.5801971249255995, "learning_rate": 8.301802483349293e-06, "loss": 0.5826, "step": 9537 }, { "epoch": 0.2923256099056025, "grad_norm": 1.818730619392167, "learning_rate": 8.301429757826326e-06, "loss": 0.7378, "step": 9538 }, { "epoch": 0.29235625842834373, "grad_norm": 0.8156706592920703, "learning_rate": 8.301056999773527e-06, "loss": 0.4858, "step": 9539 }, { "epoch": 0.29238690695108494, "grad_norm": 1.5590967794061314, "learning_rate": 8.300684209194567e-06, "loss": 0.6003, "step": 9540 }, { "epoch": 0.29241755547382614, "grad_norm": 1.6383598682133178, "learning_rate": 8.300311386093122e-06, "loss": 0.6203, "step": 9541 }, { "epoch": 0.29244820399656735, "grad_norm": 1.773376675961955, "learning_rate": 8.299938530472866e-06, "loss": 0.6791, "step": 9542 }, { "epoch": 0.29247885251930855, "grad_norm": 0.8851577372718185, "learning_rate": 8.29956564233747e-06, "loss": 0.4836, "step": 9543 }, { "epoch": 0.29250950104204976, "grad_norm": 1.7594099968171764, "learning_rate": 8.299192721690609e-06, "loss": 0.6659, "step": 9544 }, { "epoch": 0.29254014956479096, "grad_norm": 1.7919578688971605, "learning_rate": 8.298819768535959e-06, "loss": 0.7496, "step": 9545 }, { "epoch": 0.29257079808753217, "grad_norm": 1.8522509453191067, "learning_rate": 8.298446782877194e-06, "loss": 0.6657, "step": 9546 }, { "epoch": 0.2926014466102734, "grad_norm": 1.9561276505621437, "learning_rate": 8.298073764717988e-06, "loss": 0.6791, "step": 9547 }, { "epoch": 0.2926320951330146, "grad_norm": 1.6627746683887255, "learning_rate": 8.297700714062017e-06, "loss": 0.6873, "step": 9548 }, { "epoch": 0.2926627436557558, "grad_norm": 1.740622217369237, "learning_rate": 8.297327630912958e-06, "loss": 0.7137, "step": 9549 }, { "epoch": 0.292693392178497, "grad_norm": 0.8264121475064846, "learning_rate": 8.296954515274485e-06, "loss": 0.4686, "step": 9550 }, { "epoch": 0.2927240407012382, "grad_norm": 1.688076153521424, "learning_rate": 8.296581367150277e-06, "loss": 0.7162, "step": 9551 }, { "epoch": 0.2927546892239794, "grad_norm": 1.9184748425928941, "learning_rate": 8.296208186544008e-06, "loss": 0.6707, "step": 9552 }, { "epoch": 0.2927853377467206, "grad_norm": 0.8240812251782681, "learning_rate": 8.295834973459358e-06, "loss": 0.4618, "step": 9553 }, { "epoch": 0.2928159862694618, "grad_norm": 1.6656975196375954, "learning_rate": 8.295461727900003e-06, "loss": 0.7012, "step": 9554 }, { "epoch": 0.292846634792203, "grad_norm": 1.6933451550650578, "learning_rate": 8.295088449869619e-06, "loss": 0.6965, "step": 9555 }, { "epoch": 0.2928772833149442, "grad_norm": 1.8212714359875837, "learning_rate": 8.294715139371885e-06, "loss": 0.6942, "step": 9556 }, { "epoch": 0.29290793183768543, "grad_norm": 1.6709743667686983, "learning_rate": 8.29434179641048e-06, "loss": 0.6729, "step": 9557 }, { "epoch": 0.29293858036042664, "grad_norm": 1.6473471259932184, "learning_rate": 8.293968420989083e-06, "loss": 0.713, "step": 9558 }, { "epoch": 0.29296922888316784, "grad_norm": 1.8449861590402583, "learning_rate": 8.293595013111373e-06, "loss": 0.6936, "step": 9559 }, { "epoch": 0.29299987740590905, "grad_norm": 1.730079668441436, "learning_rate": 8.293221572781027e-06, "loss": 0.6529, "step": 9560 }, { "epoch": 0.29303052592865025, "grad_norm": 1.8329555624082532, "learning_rate": 8.292848100001727e-06, "loss": 0.7235, "step": 9561 }, { "epoch": 0.29306117445139146, "grad_norm": 2.0265392293185718, "learning_rate": 8.292474594777152e-06, "loss": 0.6809, "step": 9562 }, { "epoch": 0.29309182297413267, "grad_norm": 1.60781418710933, "learning_rate": 8.292101057110982e-06, "loss": 0.6043, "step": 9563 }, { "epoch": 0.29312247149687387, "grad_norm": 1.7598212696297255, "learning_rate": 8.2917274870069e-06, "loss": 0.6726, "step": 9564 }, { "epoch": 0.2931531200196151, "grad_norm": 1.676908305567569, "learning_rate": 8.291353884468583e-06, "loss": 0.6621, "step": 9565 }, { "epoch": 0.2931837685423563, "grad_norm": 1.4115253294046564, "learning_rate": 8.290980249499714e-06, "loss": 0.5762, "step": 9566 }, { "epoch": 0.2932144170650975, "grad_norm": 1.5712568842428292, "learning_rate": 8.290606582103975e-06, "loss": 0.7484, "step": 9567 }, { "epoch": 0.2932450655878387, "grad_norm": 1.5883211087757287, "learning_rate": 8.290232882285047e-06, "loss": 0.6349, "step": 9568 }, { "epoch": 0.29327571411057984, "grad_norm": 1.841003940034072, "learning_rate": 8.289859150046614e-06, "loss": 0.723, "step": 9569 }, { "epoch": 0.29330636263332105, "grad_norm": 1.910917775334536, "learning_rate": 8.289485385392356e-06, "loss": 0.7255, "step": 9570 }, { "epoch": 0.29333701115606226, "grad_norm": 1.6834436768901402, "learning_rate": 8.289111588325956e-06, "loss": 0.6629, "step": 9571 }, { "epoch": 0.29336765967880346, "grad_norm": 1.7278417803506956, "learning_rate": 8.2887377588511e-06, "loss": 0.6665, "step": 9572 }, { "epoch": 0.29339830820154467, "grad_norm": 1.0206536210509876, "learning_rate": 8.288363896971468e-06, "loss": 0.4995, "step": 9573 }, { "epoch": 0.29342895672428587, "grad_norm": 1.74830805705043, "learning_rate": 8.287990002690746e-06, "loss": 0.6767, "step": 9574 }, { "epoch": 0.2934596052470271, "grad_norm": 0.857287786119707, "learning_rate": 8.287616076012617e-06, "loss": 0.4998, "step": 9575 }, { "epoch": 0.2934902537697683, "grad_norm": 1.7675407142010058, "learning_rate": 8.287242116940765e-06, "loss": 0.6836, "step": 9576 }, { "epoch": 0.2935209022925095, "grad_norm": 1.91288585086657, "learning_rate": 8.286868125478876e-06, "loss": 0.7548, "step": 9577 }, { "epoch": 0.2935515508152507, "grad_norm": 1.5985969074508026, "learning_rate": 8.286494101630633e-06, "loss": 0.6238, "step": 9578 }, { "epoch": 0.2935821993379919, "grad_norm": 0.8720733932580829, "learning_rate": 8.286120045399724e-06, "loss": 0.4822, "step": 9579 }, { "epoch": 0.2936128478607331, "grad_norm": 1.597337418001928, "learning_rate": 8.285745956789832e-06, "loss": 0.669, "step": 9580 }, { "epoch": 0.2936434963834743, "grad_norm": 1.7202752174613822, "learning_rate": 8.285371835804646e-06, "loss": 0.6711, "step": 9581 }, { "epoch": 0.2936741449062155, "grad_norm": 0.913178313289372, "learning_rate": 8.28499768244785e-06, "loss": 0.4847, "step": 9582 }, { "epoch": 0.2937047934289567, "grad_norm": 1.3675438107101816, "learning_rate": 8.284623496723132e-06, "loss": 0.4514, "step": 9583 }, { "epoch": 0.29373544195169793, "grad_norm": 1.8459873513177227, "learning_rate": 8.284249278634178e-06, "loss": 0.7306, "step": 9584 }, { "epoch": 0.29376609047443913, "grad_norm": 1.4401959080267708, "learning_rate": 8.283875028184676e-06, "loss": 0.7361, "step": 9585 }, { "epoch": 0.29379673899718034, "grad_norm": 1.648156082049406, "learning_rate": 8.283500745378312e-06, "loss": 0.5905, "step": 9586 }, { "epoch": 0.29382738751992155, "grad_norm": 1.7655506865470223, "learning_rate": 8.283126430218776e-06, "loss": 0.6633, "step": 9587 }, { "epoch": 0.29385803604266275, "grad_norm": 1.6378100949963876, "learning_rate": 8.282752082709755e-06, "loss": 0.63, "step": 9588 }, { "epoch": 0.29388868456540396, "grad_norm": 1.5258806815131736, "learning_rate": 8.282377702854937e-06, "loss": 0.627, "step": 9589 }, { "epoch": 0.29391933308814516, "grad_norm": 1.7704499122474897, "learning_rate": 8.282003290658012e-06, "loss": 0.6753, "step": 9590 }, { "epoch": 0.29394998161088637, "grad_norm": 1.837963641793121, "learning_rate": 8.281628846122668e-06, "loss": 0.7507, "step": 9591 }, { "epoch": 0.2939806301336276, "grad_norm": 1.7892303277773156, "learning_rate": 8.281254369252598e-06, "loss": 0.6882, "step": 9592 }, { "epoch": 0.2940112786563688, "grad_norm": 1.2203541969707108, "learning_rate": 8.280879860051488e-06, "loss": 0.4836, "step": 9593 }, { "epoch": 0.29404192717911, "grad_norm": 1.8081877497120336, "learning_rate": 8.280505318523028e-06, "loss": 0.7633, "step": 9594 }, { "epoch": 0.2940725757018512, "grad_norm": 1.635407355938089, "learning_rate": 8.28013074467091e-06, "loss": 0.6057, "step": 9595 }, { "epoch": 0.2941032242245924, "grad_norm": 1.7089131636749733, "learning_rate": 8.279756138498826e-06, "loss": 0.7144, "step": 9596 }, { "epoch": 0.2941338727473336, "grad_norm": 0.7778193624298518, "learning_rate": 8.279381500010466e-06, "loss": 0.4676, "step": 9597 }, { "epoch": 0.2941645212700748, "grad_norm": 1.7069161339861105, "learning_rate": 8.279006829209519e-06, "loss": 0.7831, "step": 9598 }, { "epoch": 0.294195169792816, "grad_norm": 0.8554194408639904, "learning_rate": 8.27863212609968e-06, "loss": 0.508, "step": 9599 }, { "epoch": 0.29422581831555716, "grad_norm": 1.5604844891527805, "learning_rate": 8.278257390684639e-06, "loss": 0.7057, "step": 9600 }, { "epoch": 0.29425646683829837, "grad_norm": 1.6552978489075243, "learning_rate": 8.277882622968089e-06, "loss": 0.6225, "step": 9601 }, { "epoch": 0.2942871153610396, "grad_norm": 1.970049522338308, "learning_rate": 8.277507822953722e-06, "loss": 0.6722, "step": 9602 }, { "epoch": 0.2943177638837808, "grad_norm": 1.6852486273466463, "learning_rate": 8.277132990645235e-06, "loss": 0.6821, "step": 9603 }, { "epoch": 0.294348412406522, "grad_norm": 1.9399925259076678, "learning_rate": 8.276758126046316e-06, "loss": 0.8376, "step": 9604 }, { "epoch": 0.2943790609292632, "grad_norm": 1.6794306761610975, "learning_rate": 8.27638322916066e-06, "loss": 0.6533, "step": 9605 }, { "epoch": 0.2944097094520044, "grad_norm": 1.8970666698777485, "learning_rate": 8.276008299991965e-06, "loss": 0.765, "step": 9606 }, { "epoch": 0.2944403579747456, "grad_norm": 1.716754262172186, "learning_rate": 8.275633338543918e-06, "loss": 0.6791, "step": 9607 }, { "epoch": 0.2944710064974868, "grad_norm": 1.8484353467277177, "learning_rate": 8.27525834482022e-06, "loss": 0.673, "step": 9608 }, { "epoch": 0.294501655020228, "grad_norm": 1.683231124825208, "learning_rate": 8.274883318824563e-06, "loss": 0.7541, "step": 9609 }, { "epoch": 0.2945323035429692, "grad_norm": 1.6002508484152538, "learning_rate": 8.274508260560644e-06, "loss": 0.7084, "step": 9610 }, { "epoch": 0.2945629520657104, "grad_norm": 1.886222974347158, "learning_rate": 8.274133170032155e-06, "loss": 0.6518, "step": 9611 }, { "epoch": 0.29459360058845163, "grad_norm": 1.5852865915055372, "learning_rate": 8.273758047242795e-06, "loss": 0.68, "step": 9612 }, { "epoch": 0.29462424911119284, "grad_norm": 0.9332532370062857, "learning_rate": 8.27338289219626e-06, "loss": 0.4747, "step": 9613 }, { "epoch": 0.29465489763393404, "grad_norm": 1.676745123738552, "learning_rate": 8.273007704896246e-06, "loss": 0.6725, "step": 9614 }, { "epoch": 0.29468554615667525, "grad_norm": 0.8560412203699387, "learning_rate": 8.272632485346449e-06, "loss": 0.4632, "step": 9615 }, { "epoch": 0.29471619467941645, "grad_norm": 1.7538249126668084, "learning_rate": 8.272257233550566e-06, "loss": 0.7553, "step": 9616 }, { "epoch": 0.29474684320215766, "grad_norm": 1.8385204239586357, "learning_rate": 8.271881949512297e-06, "loss": 0.6947, "step": 9617 }, { "epoch": 0.29477749172489887, "grad_norm": 1.5397640139993614, "learning_rate": 8.271506633235335e-06, "loss": 0.6516, "step": 9618 }, { "epoch": 0.29480814024764007, "grad_norm": 0.806916651046564, "learning_rate": 8.271131284723384e-06, "loss": 0.4702, "step": 9619 }, { "epoch": 0.2948387887703813, "grad_norm": 1.8442253575452232, "learning_rate": 8.270755903980139e-06, "loss": 0.6482, "step": 9620 }, { "epoch": 0.2948694372931225, "grad_norm": 1.9103251853887102, "learning_rate": 8.270380491009297e-06, "loss": 0.7383, "step": 9621 }, { "epoch": 0.2949000858158637, "grad_norm": 1.7040684983510048, "learning_rate": 8.270005045814563e-06, "loss": 0.6533, "step": 9622 }, { "epoch": 0.2949307343386049, "grad_norm": 1.9436020419152236, "learning_rate": 8.26962956839963e-06, "loss": 0.7464, "step": 9623 }, { "epoch": 0.2949613828613461, "grad_norm": 0.8373550407993988, "learning_rate": 8.269254058768201e-06, "loss": 0.4892, "step": 9624 }, { "epoch": 0.2949920313840873, "grad_norm": 1.8775635529299197, "learning_rate": 8.268878516923975e-06, "loss": 0.7687, "step": 9625 }, { "epoch": 0.2950226799068285, "grad_norm": 0.789577250090038, "learning_rate": 8.268502942870654e-06, "loss": 0.4646, "step": 9626 }, { "epoch": 0.2950533284295697, "grad_norm": 1.7505695067051608, "learning_rate": 8.268127336611935e-06, "loss": 0.6753, "step": 9627 }, { "epoch": 0.2950839769523109, "grad_norm": 0.7956698514248758, "learning_rate": 8.267751698151523e-06, "loss": 0.4468, "step": 9628 }, { "epoch": 0.29511462547505213, "grad_norm": 1.7134400487271102, "learning_rate": 8.267376027493117e-06, "loss": 0.7118, "step": 9629 }, { "epoch": 0.29514527399779333, "grad_norm": 1.5048845211097277, "learning_rate": 8.267000324640418e-06, "loss": 0.8247, "step": 9630 }, { "epoch": 0.2951759225205345, "grad_norm": 1.4917051892617177, "learning_rate": 8.26662458959713e-06, "loss": 0.6594, "step": 9631 }, { "epoch": 0.2952065710432757, "grad_norm": 0.8210387457319369, "learning_rate": 8.266248822366953e-06, "loss": 0.4595, "step": 9632 }, { "epoch": 0.2952372195660169, "grad_norm": 0.8004633604513065, "learning_rate": 8.265873022953591e-06, "loss": 0.4698, "step": 9633 }, { "epoch": 0.2952678680887581, "grad_norm": 1.8104708331855803, "learning_rate": 8.265497191360747e-06, "loss": 0.6799, "step": 9634 }, { "epoch": 0.2952985166114993, "grad_norm": 1.76758454937122, "learning_rate": 8.265121327592124e-06, "loss": 0.6878, "step": 9635 }, { "epoch": 0.2953291651342405, "grad_norm": 1.688916287594444, "learning_rate": 8.264745431651424e-06, "loss": 0.6748, "step": 9636 }, { "epoch": 0.2953598136569817, "grad_norm": 1.5763220569885716, "learning_rate": 8.264369503542353e-06, "loss": 0.6616, "step": 9637 }, { "epoch": 0.2953904621797229, "grad_norm": 1.7515015333318682, "learning_rate": 8.263993543268613e-06, "loss": 0.7089, "step": 9638 }, { "epoch": 0.29542111070246413, "grad_norm": 1.6095277848666631, "learning_rate": 8.263617550833911e-06, "loss": 0.6938, "step": 9639 }, { "epoch": 0.29545175922520533, "grad_norm": 1.8465736292671566, "learning_rate": 8.263241526241949e-06, "loss": 0.7124, "step": 9640 }, { "epoch": 0.29548240774794654, "grad_norm": 1.6754105030571005, "learning_rate": 8.262865469496433e-06, "loss": 0.69, "step": 9641 }, { "epoch": 0.29551305627068775, "grad_norm": 1.4149141582554803, "learning_rate": 8.26248938060107e-06, "loss": 0.4686, "step": 9642 }, { "epoch": 0.29554370479342895, "grad_norm": 1.9641667682914887, "learning_rate": 8.262113259559564e-06, "loss": 0.7737, "step": 9643 }, { "epoch": 0.29557435331617016, "grad_norm": 1.8034596801475258, "learning_rate": 8.26173710637562e-06, "loss": 0.7167, "step": 9644 }, { "epoch": 0.29560500183891136, "grad_norm": 2.0100069703751595, "learning_rate": 8.261360921052948e-06, "loss": 0.786, "step": 9645 }, { "epoch": 0.29563565036165257, "grad_norm": 1.7503391316073817, "learning_rate": 8.260984703595252e-06, "loss": 0.7433, "step": 9646 }, { "epoch": 0.2956662988843938, "grad_norm": 1.5972840176712322, "learning_rate": 8.260608454006238e-06, "loss": 0.7393, "step": 9647 }, { "epoch": 0.295696947407135, "grad_norm": 1.6393309004866967, "learning_rate": 8.260232172289615e-06, "loss": 0.7228, "step": 9648 }, { "epoch": 0.2957275959298762, "grad_norm": 1.626003181223305, "learning_rate": 8.25985585844909e-06, "loss": 0.6898, "step": 9649 }, { "epoch": 0.2957582444526174, "grad_norm": 0.8372170171335941, "learning_rate": 8.259479512488373e-06, "loss": 0.4782, "step": 9650 }, { "epoch": 0.2957888929753586, "grad_norm": 1.6926477141595875, "learning_rate": 8.259103134411168e-06, "loss": 0.6856, "step": 9651 }, { "epoch": 0.2958195414980998, "grad_norm": 1.9177569455618944, "learning_rate": 8.258726724221187e-06, "loss": 0.7769, "step": 9652 }, { "epoch": 0.295850190020841, "grad_norm": 1.635090019729589, "learning_rate": 8.258350281922138e-06, "loss": 0.6913, "step": 9653 }, { "epoch": 0.2958808385435822, "grad_norm": 1.698307784798279, "learning_rate": 8.25797380751773e-06, "loss": 0.8134, "step": 9654 }, { "epoch": 0.2959114870663234, "grad_norm": 1.7733778123297992, "learning_rate": 8.257597301011673e-06, "loss": 0.6517, "step": 9655 }, { "epoch": 0.2959421355890646, "grad_norm": 1.8814485417540572, "learning_rate": 8.257220762407675e-06, "loss": 0.715, "step": 9656 }, { "epoch": 0.29597278411180583, "grad_norm": 1.8026297907074276, "learning_rate": 8.256844191709447e-06, "loss": 0.7082, "step": 9657 }, { "epoch": 0.29600343263454704, "grad_norm": 1.6495348460404662, "learning_rate": 8.256467588920703e-06, "loss": 0.6928, "step": 9658 }, { "epoch": 0.29603408115728824, "grad_norm": 0.8506310824649878, "learning_rate": 8.256090954045146e-06, "loss": 0.4739, "step": 9659 }, { "epoch": 0.29606472968002945, "grad_norm": 1.6597171481955049, "learning_rate": 8.255714287086496e-06, "loss": 0.6406, "step": 9660 }, { "epoch": 0.29609537820277065, "grad_norm": 1.8014261894225967, "learning_rate": 8.255337588048458e-06, "loss": 0.7753, "step": 9661 }, { "epoch": 0.2961260267255118, "grad_norm": 2.0899386384668404, "learning_rate": 8.254960856934746e-06, "loss": 0.7006, "step": 9662 }, { "epoch": 0.296156675248253, "grad_norm": 1.6035501176979083, "learning_rate": 8.254584093749071e-06, "loss": 0.796, "step": 9663 }, { "epoch": 0.2961873237709942, "grad_norm": 1.7663470197479654, "learning_rate": 8.254207298495148e-06, "loss": 0.5956, "step": 9664 }, { "epoch": 0.2962179722937354, "grad_norm": 0.8404012246234289, "learning_rate": 8.253830471176687e-06, "loss": 0.4499, "step": 9665 }, { "epoch": 0.2962486208164766, "grad_norm": 1.9983628138339702, "learning_rate": 8.253453611797403e-06, "loss": 0.6801, "step": 9666 }, { "epoch": 0.29627926933921783, "grad_norm": 0.8514438470731515, "learning_rate": 8.253076720361006e-06, "loss": 0.4921, "step": 9667 }, { "epoch": 0.29630991786195904, "grad_norm": 1.7671242800149811, "learning_rate": 8.252699796871213e-06, "loss": 0.7719, "step": 9668 }, { "epoch": 0.29634056638470024, "grad_norm": 0.8238830700075251, "learning_rate": 8.252322841331737e-06, "loss": 0.4756, "step": 9669 }, { "epoch": 0.29637121490744145, "grad_norm": 1.7027259796416492, "learning_rate": 8.251945853746293e-06, "loss": 0.716, "step": 9670 }, { "epoch": 0.29640186343018265, "grad_norm": 1.9172021643268748, "learning_rate": 8.251568834118592e-06, "loss": 0.7602, "step": 9671 }, { "epoch": 0.29643251195292386, "grad_norm": 1.6640765730276506, "learning_rate": 8.251191782452352e-06, "loss": 0.7366, "step": 9672 }, { "epoch": 0.29646316047566507, "grad_norm": 1.867771934096366, "learning_rate": 8.250814698751289e-06, "loss": 0.7176, "step": 9673 }, { "epoch": 0.29649380899840627, "grad_norm": 1.574734799414687, "learning_rate": 8.250437583019114e-06, "loss": 0.7227, "step": 9674 }, { "epoch": 0.2965244575211475, "grad_norm": 1.5280611779629039, "learning_rate": 8.250060435259548e-06, "loss": 0.467, "step": 9675 }, { "epoch": 0.2965551060438887, "grad_norm": 1.7518494545202308, "learning_rate": 8.249683255476304e-06, "loss": 0.7169, "step": 9676 }, { "epoch": 0.2965857545666299, "grad_norm": 0.980481763038534, "learning_rate": 8.2493060436731e-06, "loss": 0.4964, "step": 9677 }, { "epoch": 0.2966164030893711, "grad_norm": 2.0298504959900385, "learning_rate": 8.248928799853652e-06, "loss": 0.6978, "step": 9678 }, { "epoch": 0.2966470516121123, "grad_norm": 1.5891416978443396, "learning_rate": 8.248551524021678e-06, "loss": 0.669, "step": 9679 }, { "epoch": 0.2966777001348535, "grad_norm": 1.9668097965180737, "learning_rate": 8.248174216180895e-06, "loss": 0.6536, "step": 9680 }, { "epoch": 0.2967083486575947, "grad_norm": 1.4795630431651414, "learning_rate": 8.247796876335019e-06, "loss": 0.5948, "step": 9681 }, { "epoch": 0.2967389971803359, "grad_norm": 1.4091007373775326, "learning_rate": 8.247419504487769e-06, "loss": 0.5562, "step": 9682 }, { "epoch": 0.2967696457030771, "grad_norm": 0.783244646601774, "learning_rate": 8.247042100642863e-06, "loss": 0.4573, "step": 9683 }, { "epoch": 0.29680029422581833, "grad_norm": 1.8064293703842698, "learning_rate": 8.246664664804024e-06, "loss": 0.7412, "step": 9684 }, { "epoch": 0.29683094274855953, "grad_norm": 1.6330788160471965, "learning_rate": 8.246287196974964e-06, "loss": 0.6634, "step": 9685 }, { "epoch": 0.29686159127130074, "grad_norm": 1.6651039192165038, "learning_rate": 8.245909697159408e-06, "loss": 0.6532, "step": 9686 }, { "epoch": 0.29689223979404195, "grad_norm": 1.8054636167342801, "learning_rate": 8.245532165361072e-06, "loss": 0.7021, "step": 9687 }, { "epoch": 0.29692288831678315, "grad_norm": 1.5727353190495519, "learning_rate": 8.245154601583678e-06, "loss": 0.6984, "step": 9688 }, { "epoch": 0.29695353683952436, "grad_norm": 1.9506304898195317, "learning_rate": 8.244777005830944e-06, "loss": 0.6601, "step": 9689 }, { "epoch": 0.29698418536226556, "grad_norm": 1.6436763510708667, "learning_rate": 8.244399378106593e-06, "loss": 0.7562, "step": 9690 }, { "epoch": 0.29701483388500677, "grad_norm": 1.970773842326678, "learning_rate": 8.244021718414344e-06, "loss": 0.7632, "step": 9691 }, { "epoch": 0.297045482407748, "grad_norm": 2.013345105547174, "learning_rate": 8.24364402675792e-06, "loss": 0.5595, "step": 9692 }, { "epoch": 0.2970761309304891, "grad_norm": 1.4838070362167133, "learning_rate": 8.243266303141042e-06, "loss": 0.6314, "step": 9693 }, { "epoch": 0.29710677945323033, "grad_norm": 1.837021990029946, "learning_rate": 8.24288854756743e-06, "loss": 0.6399, "step": 9694 }, { "epoch": 0.29713742797597154, "grad_norm": 1.7097009215652, "learning_rate": 8.242510760040807e-06, "loss": 0.682, "step": 9695 }, { "epoch": 0.29716807649871274, "grad_norm": 1.6106093485950206, "learning_rate": 8.242132940564898e-06, "loss": 0.6663, "step": 9696 }, { "epoch": 0.29719872502145395, "grad_norm": 0.8645430065888323, "learning_rate": 8.241755089143421e-06, "loss": 0.4554, "step": 9697 }, { "epoch": 0.29722937354419515, "grad_norm": 1.7575898368169318, "learning_rate": 8.241377205780103e-06, "loss": 0.7626, "step": 9698 }, { "epoch": 0.29726002206693636, "grad_norm": 1.5618741598011745, "learning_rate": 8.240999290478667e-06, "loss": 0.6122, "step": 9699 }, { "epoch": 0.29729067058967756, "grad_norm": 1.6317540646998718, "learning_rate": 8.240621343242832e-06, "loss": 0.6997, "step": 9700 }, { "epoch": 0.29732131911241877, "grad_norm": 1.5729959940785105, "learning_rate": 8.240243364076328e-06, "loss": 0.6683, "step": 9701 }, { "epoch": 0.29735196763516, "grad_norm": 1.7380392529970077, "learning_rate": 8.23986535298288e-06, "loss": 0.7415, "step": 9702 }, { "epoch": 0.2973826161579012, "grad_norm": 1.6940234673186312, "learning_rate": 8.239487309966205e-06, "loss": 0.7221, "step": 9703 }, { "epoch": 0.2974132646806424, "grad_norm": 1.6374830014228183, "learning_rate": 8.239109235030037e-06, "loss": 0.6827, "step": 9704 }, { "epoch": 0.2974439132033836, "grad_norm": 1.634512289291701, "learning_rate": 8.238731128178094e-06, "loss": 0.5032, "step": 9705 }, { "epoch": 0.2974745617261248, "grad_norm": 0.8973778299323845, "learning_rate": 8.238352989414104e-06, "loss": 0.4894, "step": 9706 }, { "epoch": 0.297505210248866, "grad_norm": 1.9274153460544987, "learning_rate": 8.237974818741796e-06, "loss": 0.6531, "step": 9707 }, { "epoch": 0.2975358587716072, "grad_norm": 1.7637489215808515, "learning_rate": 8.237596616164893e-06, "loss": 0.5534, "step": 9708 }, { "epoch": 0.2975665072943484, "grad_norm": 1.9232231802682473, "learning_rate": 8.23721838168712e-06, "loss": 0.7982, "step": 9709 }, { "epoch": 0.2975971558170896, "grad_norm": 1.8785882985225217, "learning_rate": 8.236840115312207e-06, "loss": 0.6619, "step": 9710 }, { "epoch": 0.2976278043398308, "grad_norm": 1.9451066323134483, "learning_rate": 8.236461817043881e-06, "loss": 0.6744, "step": 9711 }, { "epoch": 0.29765845286257203, "grad_norm": 1.7099044327930035, "learning_rate": 8.236083486885869e-06, "loss": 0.6657, "step": 9712 }, { "epoch": 0.29768910138531324, "grad_norm": 1.7889568517923506, "learning_rate": 8.235705124841898e-06, "loss": 0.7269, "step": 9713 }, { "epoch": 0.29771974990805444, "grad_norm": 0.8251186952812745, "learning_rate": 8.235326730915696e-06, "loss": 0.4735, "step": 9714 }, { "epoch": 0.29775039843079565, "grad_norm": 1.8442423109298602, "learning_rate": 8.234948305110993e-06, "loss": 0.8088, "step": 9715 }, { "epoch": 0.29778104695353685, "grad_norm": 0.7572219445544273, "learning_rate": 8.234569847431514e-06, "loss": 0.5006, "step": 9716 }, { "epoch": 0.29781169547627806, "grad_norm": 1.8190705297307412, "learning_rate": 8.234191357880994e-06, "loss": 0.7402, "step": 9717 }, { "epoch": 0.29784234399901927, "grad_norm": 2.098820967117495, "learning_rate": 8.233812836463157e-06, "loss": 0.6553, "step": 9718 }, { "epoch": 0.29787299252176047, "grad_norm": 1.8352588470155993, "learning_rate": 8.233434283181737e-06, "loss": 0.6976, "step": 9719 }, { "epoch": 0.2979036410445017, "grad_norm": 1.81722245958645, "learning_rate": 8.23305569804046e-06, "loss": 0.7431, "step": 9720 }, { "epoch": 0.2979342895672429, "grad_norm": 1.8061276762129383, "learning_rate": 8.232677081043057e-06, "loss": 0.7542, "step": 9721 }, { "epoch": 0.2979649380899841, "grad_norm": 1.6556368141415523, "learning_rate": 8.23229843219326e-06, "loss": 0.6744, "step": 9722 }, { "epoch": 0.2979955866127253, "grad_norm": 1.893449009359125, "learning_rate": 8.231919751494802e-06, "loss": 0.7067, "step": 9723 }, { "epoch": 0.29802623513546644, "grad_norm": 1.7216341672724038, "learning_rate": 8.23154103895141e-06, "loss": 0.7084, "step": 9724 }, { "epoch": 0.29805688365820765, "grad_norm": 1.768601454011489, "learning_rate": 8.231162294566817e-06, "loss": 0.7218, "step": 9725 }, { "epoch": 0.29808753218094886, "grad_norm": 1.8385887324375196, "learning_rate": 8.230783518344754e-06, "loss": 0.7331, "step": 9726 }, { "epoch": 0.29811818070369006, "grad_norm": 0.9169992725618527, "learning_rate": 8.230404710288955e-06, "loss": 0.4692, "step": 9727 }, { "epoch": 0.29814882922643127, "grad_norm": 1.6348641210984314, "learning_rate": 8.230025870403153e-06, "loss": 0.6814, "step": 9728 }, { "epoch": 0.2981794777491725, "grad_norm": 1.7934693000311444, "learning_rate": 8.22964699869108e-06, "loss": 0.6706, "step": 9729 }, { "epoch": 0.2982101262719137, "grad_norm": 2.2422683448256944, "learning_rate": 8.229268095156469e-06, "loss": 0.7725, "step": 9730 }, { "epoch": 0.2982407747946549, "grad_norm": 1.8572757455292102, "learning_rate": 8.22888915980305e-06, "loss": 0.7453, "step": 9731 }, { "epoch": 0.2982714233173961, "grad_norm": 1.8468473798118938, "learning_rate": 8.228510192634564e-06, "loss": 0.732, "step": 9732 }, { "epoch": 0.2983020718401373, "grad_norm": 1.712140010990861, "learning_rate": 8.228131193654739e-06, "loss": 0.6954, "step": 9733 }, { "epoch": 0.2983327203628785, "grad_norm": 1.6534407405886837, "learning_rate": 8.227752162867312e-06, "loss": 0.7336, "step": 9734 }, { "epoch": 0.2983633688856197, "grad_norm": 1.5980211499704289, "learning_rate": 8.227373100276017e-06, "loss": 0.7083, "step": 9735 }, { "epoch": 0.2983940174083609, "grad_norm": 1.7547985264533907, "learning_rate": 8.226994005884588e-06, "loss": 0.7299, "step": 9736 }, { "epoch": 0.2984246659311021, "grad_norm": 0.8186630762701427, "learning_rate": 8.226614879696762e-06, "loss": 0.4732, "step": 9737 }, { "epoch": 0.2984553144538433, "grad_norm": 1.841288970252898, "learning_rate": 8.226235721716274e-06, "loss": 0.6474, "step": 9738 }, { "epoch": 0.29848596297658453, "grad_norm": 0.7937612064957111, "learning_rate": 8.22585653194686e-06, "loss": 0.4859, "step": 9739 }, { "epoch": 0.29851661149932573, "grad_norm": 1.6185109963705981, "learning_rate": 8.225477310392259e-06, "loss": 0.6291, "step": 9740 }, { "epoch": 0.29854726002206694, "grad_norm": 1.5052240454954526, "learning_rate": 8.2250980570562e-06, "loss": 0.7603, "step": 9741 }, { "epoch": 0.29857790854480815, "grad_norm": 1.7554872238340349, "learning_rate": 8.224718771942428e-06, "loss": 0.692, "step": 9742 }, { "epoch": 0.29860855706754935, "grad_norm": 2.036766495325701, "learning_rate": 8.224339455054675e-06, "loss": 0.7018, "step": 9743 }, { "epoch": 0.29863920559029056, "grad_norm": 1.7330170266873133, "learning_rate": 8.223960106396681e-06, "loss": 0.7299, "step": 9744 }, { "epoch": 0.29866985411303176, "grad_norm": 1.5897563446719252, "learning_rate": 8.223580725972184e-06, "loss": 0.704, "step": 9745 }, { "epoch": 0.29870050263577297, "grad_norm": 1.6702430656479272, "learning_rate": 8.223201313784921e-06, "loss": 0.6325, "step": 9746 }, { "epoch": 0.2987311511585142, "grad_norm": 1.7143747301084116, "learning_rate": 8.22282186983863e-06, "loss": 0.6434, "step": 9747 }, { "epoch": 0.2987617996812554, "grad_norm": 1.7306542275951986, "learning_rate": 8.22244239413705e-06, "loss": 0.7252, "step": 9748 }, { "epoch": 0.2987924482039966, "grad_norm": 1.686158378899414, "learning_rate": 8.222062886683923e-06, "loss": 0.6726, "step": 9749 }, { "epoch": 0.2988230967267378, "grad_norm": 1.734204517024543, "learning_rate": 8.221683347482984e-06, "loss": 0.6974, "step": 9750 }, { "epoch": 0.298853745249479, "grad_norm": 1.550547672954785, "learning_rate": 8.221303776537975e-06, "loss": 0.5967, "step": 9751 }, { "epoch": 0.2988843937722202, "grad_norm": 1.5308184289454878, "learning_rate": 8.220924173852635e-06, "loss": 0.5571, "step": 9752 }, { "epoch": 0.2989150422949614, "grad_norm": 2.0360339844948525, "learning_rate": 8.220544539430707e-06, "loss": 0.6749, "step": 9753 }, { "epoch": 0.2989456908177026, "grad_norm": 2.001220888019737, "learning_rate": 8.220164873275928e-06, "loss": 0.7377, "step": 9754 }, { "epoch": 0.29897633934044376, "grad_norm": 1.6251228307061243, "learning_rate": 8.21978517539204e-06, "loss": 0.6679, "step": 9755 }, { "epoch": 0.29900698786318497, "grad_norm": 1.7770693767379855, "learning_rate": 8.219405445782786e-06, "loss": 0.7524, "step": 9756 }, { "epoch": 0.2990376363859262, "grad_norm": 1.8727896018869907, "learning_rate": 8.219025684451907e-06, "loss": 0.7535, "step": 9757 }, { "epoch": 0.2990682849086674, "grad_norm": 1.846710161709493, "learning_rate": 8.218645891403145e-06, "loss": 0.7785, "step": 9758 }, { "epoch": 0.2990989334314086, "grad_norm": 1.7669218006291056, "learning_rate": 8.218266066640238e-06, "loss": 0.7566, "step": 9759 }, { "epoch": 0.2991295819541498, "grad_norm": 1.560205194865756, "learning_rate": 8.217886210166936e-06, "loss": 0.6485, "step": 9760 }, { "epoch": 0.299160230476891, "grad_norm": 1.8357621905876655, "learning_rate": 8.217506321986976e-06, "loss": 0.7047, "step": 9761 }, { "epoch": 0.2991908789996322, "grad_norm": 1.726793522844941, "learning_rate": 8.217126402104103e-06, "loss": 0.6345, "step": 9762 }, { "epoch": 0.2992215275223734, "grad_norm": 0.9631960129832212, "learning_rate": 8.216746450522059e-06, "loss": 0.464, "step": 9763 }, { "epoch": 0.2992521760451146, "grad_norm": 1.9904045535472268, "learning_rate": 8.216366467244592e-06, "loss": 0.6749, "step": 9764 }, { "epoch": 0.2992828245678558, "grad_norm": 1.5934980875935696, "learning_rate": 8.215986452275442e-06, "loss": 0.6418, "step": 9765 }, { "epoch": 0.299313473090597, "grad_norm": 1.9075736629591182, "learning_rate": 8.215606405618355e-06, "loss": 0.7753, "step": 9766 }, { "epoch": 0.29934412161333823, "grad_norm": 1.7251350434149355, "learning_rate": 8.215226327277073e-06, "loss": 0.705, "step": 9767 }, { "epoch": 0.29937477013607944, "grad_norm": 1.6208751341387573, "learning_rate": 8.214846217255346e-06, "loss": 0.7032, "step": 9768 }, { "epoch": 0.29940541865882064, "grad_norm": 1.8589963306731325, "learning_rate": 8.214466075556915e-06, "loss": 0.6633, "step": 9769 }, { "epoch": 0.29943606718156185, "grad_norm": 1.831793630295091, "learning_rate": 8.21408590218553e-06, "loss": 0.6963, "step": 9770 }, { "epoch": 0.29946671570430305, "grad_norm": 1.5177011254095907, "learning_rate": 8.213705697144932e-06, "loss": 0.6362, "step": 9771 }, { "epoch": 0.29949736422704426, "grad_norm": 0.9132877910916161, "learning_rate": 8.213325460438868e-06, "loss": 0.4863, "step": 9772 }, { "epoch": 0.29952801274978547, "grad_norm": 1.696593513395055, "learning_rate": 8.212945192071089e-06, "loss": 0.6543, "step": 9773 }, { "epoch": 0.29955866127252667, "grad_norm": 1.8257304080291206, "learning_rate": 8.212564892045338e-06, "loss": 0.7374, "step": 9774 }, { "epoch": 0.2995893097952679, "grad_norm": 1.664490118265574, "learning_rate": 8.212184560365363e-06, "loss": 0.7478, "step": 9775 }, { "epoch": 0.2996199583180091, "grad_norm": 1.7433231671981955, "learning_rate": 8.211804197034913e-06, "loss": 0.6634, "step": 9776 }, { "epoch": 0.2996506068407503, "grad_norm": 1.8517059531086746, "learning_rate": 8.211423802057733e-06, "loss": 0.7271, "step": 9777 }, { "epoch": 0.2996812553634915, "grad_norm": 1.65708663772969, "learning_rate": 8.211043375437573e-06, "loss": 0.7036, "step": 9778 }, { "epoch": 0.2997119038862327, "grad_norm": 1.6050846422180947, "learning_rate": 8.21066291717818e-06, "loss": 0.7586, "step": 9779 }, { "epoch": 0.2997425524089739, "grad_norm": 1.6516592793254457, "learning_rate": 8.210282427283304e-06, "loss": 0.6892, "step": 9780 }, { "epoch": 0.2997732009317151, "grad_norm": 0.917591810177919, "learning_rate": 8.209901905756695e-06, "loss": 0.4895, "step": 9781 }, { "epoch": 0.2998038494544563, "grad_norm": 1.5596721325768008, "learning_rate": 8.209521352602102e-06, "loss": 0.6437, "step": 9782 }, { "epoch": 0.2998344979771975, "grad_norm": 1.7294137759060721, "learning_rate": 8.209140767823271e-06, "loss": 0.7402, "step": 9783 }, { "epoch": 0.29986514649993873, "grad_norm": 1.6690730906580877, "learning_rate": 8.208760151423959e-06, "loss": 0.6862, "step": 9784 }, { "epoch": 0.29989579502267993, "grad_norm": 0.7862832899545711, "learning_rate": 8.208379503407908e-06, "loss": 0.4923, "step": 9785 }, { "epoch": 0.2999264435454211, "grad_norm": 1.8196667931249002, "learning_rate": 8.207998823778874e-06, "loss": 0.7279, "step": 9786 }, { "epoch": 0.2999570920681623, "grad_norm": 1.9713274792716864, "learning_rate": 8.207618112540607e-06, "loss": 0.7335, "step": 9787 }, { "epoch": 0.2999877405909035, "grad_norm": 1.7465329729482255, "learning_rate": 8.20723736969686e-06, "loss": 0.7197, "step": 9788 }, { "epoch": 0.3000183891136447, "grad_norm": 1.7924899095313618, "learning_rate": 8.20685659525138e-06, "loss": 0.7861, "step": 9789 }, { "epoch": 0.3000490376363859, "grad_norm": 1.5621555250922843, "learning_rate": 8.206475789207924e-06, "loss": 0.6331, "step": 9790 }, { "epoch": 0.3000796861591271, "grad_norm": 1.7165005701194882, "learning_rate": 8.20609495157024e-06, "loss": 0.7059, "step": 9791 }, { "epoch": 0.3001103346818683, "grad_norm": 1.719183038988036, "learning_rate": 8.205714082342082e-06, "loss": 0.705, "step": 9792 }, { "epoch": 0.3001409832046095, "grad_norm": 0.8773624055363176, "learning_rate": 8.205333181527203e-06, "loss": 0.4766, "step": 9793 }, { "epoch": 0.30017163172735073, "grad_norm": 1.871340407241197, "learning_rate": 8.204952249129356e-06, "loss": 0.6899, "step": 9794 }, { "epoch": 0.30020228025009194, "grad_norm": 1.6972426776662202, "learning_rate": 8.204571285152293e-06, "loss": 0.6914, "step": 9795 }, { "epoch": 0.30023292877283314, "grad_norm": 1.5848935352040927, "learning_rate": 8.204190289599773e-06, "loss": 0.6666, "step": 9796 }, { "epoch": 0.30026357729557435, "grad_norm": 1.9148881595242568, "learning_rate": 8.203809262475545e-06, "loss": 0.5837, "step": 9797 }, { "epoch": 0.30029422581831555, "grad_norm": 1.7102832624708861, "learning_rate": 8.203428203783362e-06, "loss": 0.6184, "step": 9798 }, { "epoch": 0.30032487434105676, "grad_norm": 1.6261576463379663, "learning_rate": 8.203047113526983e-06, "loss": 0.7262, "step": 9799 }, { "epoch": 0.30035552286379796, "grad_norm": 1.5644747637059984, "learning_rate": 8.202665991710162e-06, "loss": 0.6363, "step": 9800 }, { "epoch": 0.30038617138653917, "grad_norm": 1.5406579298101744, "learning_rate": 8.202284838336654e-06, "loss": 0.6491, "step": 9801 }, { "epoch": 0.3004168199092804, "grad_norm": 1.7432271904903802, "learning_rate": 8.201903653410213e-06, "loss": 0.6955, "step": 9802 }, { "epoch": 0.3004474684320216, "grad_norm": 1.55764413757883, "learning_rate": 8.201522436934596e-06, "loss": 0.6656, "step": 9803 }, { "epoch": 0.3004781169547628, "grad_norm": 0.8772009512675442, "learning_rate": 8.201141188913559e-06, "loss": 0.4795, "step": 9804 }, { "epoch": 0.300508765477504, "grad_norm": 1.826451910091905, "learning_rate": 8.20075990935086e-06, "loss": 0.6888, "step": 9805 }, { "epoch": 0.3005394140002452, "grad_norm": 1.5965945169772848, "learning_rate": 8.200378598250253e-06, "loss": 0.6734, "step": 9806 }, { "epoch": 0.3005700625229864, "grad_norm": 1.6004286537818952, "learning_rate": 8.199997255615497e-06, "loss": 0.6912, "step": 9807 }, { "epoch": 0.3006007110457276, "grad_norm": 1.7795739878507093, "learning_rate": 8.19961588145035e-06, "loss": 0.7393, "step": 9808 }, { "epoch": 0.3006313595684688, "grad_norm": 1.7196242354689772, "learning_rate": 8.19923447575857e-06, "loss": 0.6406, "step": 9809 }, { "epoch": 0.30066200809121, "grad_norm": 1.6038834095536776, "learning_rate": 8.198853038543913e-06, "loss": 0.709, "step": 9810 }, { "epoch": 0.3006926566139512, "grad_norm": 1.8452215187318521, "learning_rate": 8.198471569810138e-06, "loss": 0.7857, "step": 9811 }, { "epoch": 0.30072330513669243, "grad_norm": 1.7872481596715228, "learning_rate": 8.198090069561005e-06, "loss": 0.6976, "step": 9812 }, { "epoch": 0.30075395365943364, "grad_norm": 1.6392176317946494, "learning_rate": 8.197708537800271e-06, "loss": 0.6954, "step": 9813 }, { "epoch": 0.30078460218217484, "grad_norm": 2.0585815792120448, "learning_rate": 8.197326974531699e-06, "loss": 0.6025, "step": 9814 }, { "epoch": 0.30081525070491605, "grad_norm": 1.709851327013035, "learning_rate": 8.196945379759045e-06, "loss": 0.7272, "step": 9815 }, { "epoch": 0.30084589922765725, "grad_norm": 1.7901871504715712, "learning_rate": 8.19656375348607e-06, "loss": 0.6866, "step": 9816 }, { "epoch": 0.3008765477503984, "grad_norm": 0.8858953188530209, "learning_rate": 8.196182095716534e-06, "loss": 0.4741, "step": 9817 }, { "epoch": 0.3009071962731396, "grad_norm": 0.857417015339588, "learning_rate": 8.1958004064542e-06, "loss": 0.4562, "step": 9818 }, { "epoch": 0.3009378447958808, "grad_norm": 1.7141902049754434, "learning_rate": 8.195418685702826e-06, "loss": 0.7268, "step": 9819 }, { "epoch": 0.300968493318622, "grad_norm": 1.824758114783751, "learning_rate": 8.195036933466173e-06, "loss": 0.6514, "step": 9820 }, { "epoch": 0.3009991418413632, "grad_norm": 1.8635834131134803, "learning_rate": 8.194655149748005e-06, "loss": 0.6487, "step": 9821 }, { "epoch": 0.30102979036410443, "grad_norm": 1.8191645258902547, "learning_rate": 8.194273334552081e-06, "loss": 0.6768, "step": 9822 }, { "epoch": 0.30106043888684564, "grad_norm": 1.616178047171144, "learning_rate": 8.193891487882167e-06, "loss": 0.6685, "step": 9823 }, { "epoch": 0.30109108740958684, "grad_norm": 1.667238391436406, "learning_rate": 8.19350960974202e-06, "loss": 0.6781, "step": 9824 }, { "epoch": 0.30112173593232805, "grad_norm": 1.7992371674373862, "learning_rate": 8.193127700135408e-06, "loss": 0.782, "step": 9825 }, { "epoch": 0.30115238445506926, "grad_norm": 2.1925446113076785, "learning_rate": 8.192745759066089e-06, "loss": 0.5939, "step": 9826 }, { "epoch": 0.30118303297781046, "grad_norm": 1.813928212552698, "learning_rate": 8.192363786537834e-06, "loss": 0.7691, "step": 9827 }, { "epoch": 0.30121368150055167, "grad_norm": 1.173957176687985, "learning_rate": 8.191981782554397e-06, "loss": 0.4909, "step": 9828 }, { "epoch": 0.30124433002329287, "grad_norm": 1.6951562622815148, "learning_rate": 8.19159974711955e-06, "loss": 0.7817, "step": 9829 }, { "epoch": 0.3012749785460341, "grad_norm": 2.0532113966538277, "learning_rate": 8.191217680237053e-06, "loss": 0.7033, "step": 9830 }, { "epoch": 0.3013056270687753, "grad_norm": 0.8574431089644494, "learning_rate": 8.19083558191067e-06, "loss": 0.4882, "step": 9831 }, { "epoch": 0.3013362755915165, "grad_norm": 1.596746157429657, "learning_rate": 8.19045345214417e-06, "loss": 0.6639, "step": 9832 }, { "epoch": 0.3013669241142577, "grad_norm": 1.8232349204374834, "learning_rate": 8.190071290941313e-06, "loss": 0.6756, "step": 9833 }, { "epoch": 0.3013975726369989, "grad_norm": 1.9454571507418452, "learning_rate": 8.18968909830587e-06, "loss": 0.732, "step": 9834 }, { "epoch": 0.3014282211597401, "grad_norm": 1.902656221657718, "learning_rate": 8.189306874241603e-06, "loss": 0.7227, "step": 9835 }, { "epoch": 0.3014588696824813, "grad_norm": 1.638075849606096, "learning_rate": 8.18892461875228e-06, "loss": 0.6452, "step": 9836 }, { "epoch": 0.3014895182052225, "grad_norm": 1.7005781454193067, "learning_rate": 8.188542331841667e-06, "loss": 0.7634, "step": 9837 }, { "epoch": 0.3015201667279637, "grad_norm": 1.7356482034920155, "learning_rate": 8.188160013513531e-06, "loss": 0.7606, "step": 9838 }, { "epoch": 0.30155081525070493, "grad_norm": 1.08098436481968, "learning_rate": 8.187777663771637e-06, "loss": 0.4706, "step": 9839 }, { "epoch": 0.30158146377344613, "grad_norm": 1.777986099525208, "learning_rate": 8.187395282619755e-06, "loss": 0.6962, "step": 9840 }, { "epoch": 0.30161211229618734, "grad_norm": 1.9413077109882433, "learning_rate": 8.18701287006165e-06, "loss": 0.7641, "step": 9841 }, { "epoch": 0.30164276081892855, "grad_norm": 0.8197590184704292, "learning_rate": 8.186630426101094e-06, "loss": 0.4696, "step": 9842 }, { "epoch": 0.30167340934166975, "grad_norm": 1.747784371834914, "learning_rate": 8.186247950741852e-06, "loss": 0.7623, "step": 9843 }, { "epoch": 0.30170405786441096, "grad_norm": 1.632723872282145, "learning_rate": 8.185865443987695e-06, "loss": 0.535, "step": 9844 }, { "epoch": 0.30173470638715216, "grad_norm": 2.4803443056752412, "learning_rate": 8.18548290584239e-06, "loss": 0.634, "step": 9845 }, { "epoch": 0.30176535490989337, "grad_norm": 2.1493652303481525, "learning_rate": 8.185100336309706e-06, "loss": 0.6615, "step": 9846 }, { "epoch": 0.3017960034326346, "grad_norm": 1.8355950041518931, "learning_rate": 8.184717735393415e-06, "loss": 0.7261, "step": 9847 }, { "epoch": 0.3018266519553757, "grad_norm": 1.9224454230150028, "learning_rate": 8.184335103097284e-06, "loss": 0.7126, "step": 9848 }, { "epoch": 0.30185730047811693, "grad_norm": 1.7186607359903052, "learning_rate": 8.183952439425084e-06, "loss": 0.7257, "step": 9849 }, { "epoch": 0.30188794900085814, "grad_norm": 1.9873391326509187, "learning_rate": 8.183569744380587e-06, "loss": 0.7123, "step": 9850 }, { "epoch": 0.30191859752359934, "grad_norm": 2.523188445839736, "learning_rate": 8.183187017967562e-06, "loss": 0.6984, "step": 9851 }, { "epoch": 0.30194924604634055, "grad_norm": 1.9353284844885743, "learning_rate": 8.182804260189783e-06, "loss": 0.6373, "step": 9852 }, { "epoch": 0.30197989456908175, "grad_norm": 1.7181405452707852, "learning_rate": 8.182421471051018e-06, "loss": 0.6531, "step": 9853 }, { "epoch": 0.30201054309182296, "grad_norm": 1.8137468893414304, "learning_rate": 8.18203865055504e-06, "loss": 0.6632, "step": 9854 }, { "epoch": 0.30204119161456416, "grad_norm": 1.552831613230682, "learning_rate": 8.181655798705618e-06, "loss": 0.6195, "step": 9855 }, { "epoch": 0.30207184013730537, "grad_norm": 1.7232438837692146, "learning_rate": 8.18127291550653e-06, "loss": 0.7051, "step": 9856 }, { "epoch": 0.3021024886600466, "grad_norm": 1.452336799470743, "learning_rate": 8.180890000961548e-06, "loss": 0.6043, "step": 9857 }, { "epoch": 0.3021331371827878, "grad_norm": 1.6812397387009257, "learning_rate": 8.18050705507444e-06, "loss": 0.6753, "step": 9858 }, { "epoch": 0.302163785705529, "grad_norm": 1.680718057847362, "learning_rate": 8.180124077848983e-06, "loss": 0.7264, "step": 9859 }, { "epoch": 0.3021944342282702, "grad_norm": 1.8591978543508518, "learning_rate": 8.179741069288951e-06, "loss": 0.563, "step": 9860 }, { "epoch": 0.3022250827510114, "grad_norm": 1.4644562198834894, "learning_rate": 8.179358029398117e-06, "loss": 0.7128, "step": 9861 }, { "epoch": 0.3022557312737526, "grad_norm": 1.8392739597953172, "learning_rate": 8.178974958180253e-06, "loss": 0.7386, "step": 9862 }, { "epoch": 0.3022863797964938, "grad_norm": 1.9462727571008824, "learning_rate": 8.178591855639136e-06, "loss": 0.7386, "step": 9863 }, { "epoch": 0.302317028319235, "grad_norm": 1.7982685455602054, "learning_rate": 8.17820872177854e-06, "loss": 0.7292, "step": 9864 }, { "epoch": 0.3023476768419762, "grad_norm": 1.8067299141075952, "learning_rate": 8.17782555660224e-06, "loss": 0.7296, "step": 9865 }, { "epoch": 0.3023783253647174, "grad_norm": 1.589005519149208, "learning_rate": 8.177442360114012e-06, "loss": 0.7135, "step": 9866 }, { "epoch": 0.30240897388745863, "grad_norm": 1.2553059757339537, "learning_rate": 8.17705913231763e-06, "loss": 0.4566, "step": 9867 }, { "epoch": 0.30243962241019984, "grad_norm": 1.7417606486914352, "learning_rate": 8.176675873216874e-06, "loss": 0.6328, "step": 9868 }, { "epoch": 0.30247027093294104, "grad_norm": 1.641906339176859, "learning_rate": 8.176292582815517e-06, "loss": 0.5964, "step": 9869 }, { "epoch": 0.30250091945568225, "grad_norm": 1.5943275686821976, "learning_rate": 8.175909261117336e-06, "loss": 0.6243, "step": 9870 }, { "epoch": 0.30253156797842345, "grad_norm": 0.8773831561561108, "learning_rate": 8.17552590812611e-06, "loss": 0.4746, "step": 9871 }, { "epoch": 0.30256221650116466, "grad_norm": 2.0912301562357776, "learning_rate": 8.175142523845613e-06, "loss": 0.6835, "step": 9872 }, { "epoch": 0.30259286502390587, "grad_norm": 1.7563639486459084, "learning_rate": 8.174759108279625e-06, "loss": 0.6796, "step": 9873 }, { "epoch": 0.30262351354664707, "grad_norm": 2.027434262739058, "learning_rate": 8.174375661431924e-06, "loss": 0.8542, "step": 9874 }, { "epoch": 0.3026541620693883, "grad_norm": 1.5530561005999353, "learning_rate": 8.173992183306285e-06, "loss": 0.6029, "step": 9875 }, { "epoch": 0.3026848105921295, "grad_norm": 1.7971591487596734, "learning_rate": 8.17360867390649e-06, "loss": 0.7192, "step": 9876 }, { "epoch": 0.3027154591148707, "grad_norm": 1.6961502798040415, "learning_rate": 8.173225133236317e-06, "loss": 0.6984, "step": 9877 }, { "epoch": 0.3027461076376119, "grad_norm": 1.6991215987081387, "learning_rate": 8.172841561299547e-06, "loss": 0.7623, "step": 9878 }, { "epoch": 0.30277675616035304, "grad_norm": 1.563586060840047, "learning_rate": 8.172457958099954e-06, "loss": 0.7218, "step": 9879 }, { "epoch": 0.30280740468309425, "grad_norm": 1.5514054501126073, "learning_rate": 8.172074323641323e-06, "loss": 0.6464, "step": 9880 }, { "epoch": 0.30283805320583546, "grad_norm": 1.6969770865416205, "learning_rate": 8.171690657927432e-06, "loss": 0.7268, "step": 9881 }, { "epoch": 0.30286870172857666, "grad_norm": 1.7786924764445555, "learning_rate": 8.17130696096206e-06, "loss": 0.7703, "step": 9882 }, { "epoch": 0.30289935025131787, "grad_norm": 1.6598374619959793, "learning_rate": 8.17092323274899e-06, "loss": 0.6361, "step": 9883 }, { "epoch": 0.3029299987740591, "grad_norm": 1.9140113994251817, "learning_rate": 8.170539473292001e-06, "loss": 0.8107, "step": 9884 }, { "epoch": 0.3029606472968003, "grad_norm": 2.039317488664188, "learning_rate": 8.170155682594877e-06, "loss": 0.7514, "step": 9885 }, { "epoch": 0.3029912958195415, "grad_norm": 1.191875830153326, "learning_rate": 8.169771860661397e-06, "loss": 0.496, "step": 9886 }, { "epoch": 0.3030219443422827, "grad_norm": 0.9666976778556446, "learning_rate": 8.169388007495344e-06, "loss": 0.4661, "step": 9887 }, { "epoch": 0.3030525928650239, "grad_norm": 1.8877737134150703, "learning_rate": 8.169004123100501e-06, "loss": 0.6288, "step": 9888 }, { "epoch": 0.3030832413877651, "grad_norm": 1.7125491829748942, "learning_rate": 8.168620207480649e-06, "loss": 0.7024, "step": 9889 }, { "epoch": 0.3031138899105063, "grad_norm": 1.974114420524914, "learning_rate": 8.16823626063957e-06, "loss": 0.7157, "step": 9890 }, { "epoch": 0.3031445384332475, "grad_norm": 1.618431614452004, "learning_rate": 8.16785228258105e-06, "loss": 0.5814, "step": 9891 }, { "epoch": 0.3031751869559887, "grad_norm": 1.8868548836133756, "learning_rate": 8.16746827330887e-06, "loss": 0.6813, "step": 9892 }, { "epoch": 0.3032058354787299, "grad_norm": 1.9710655903766556, "learning_rate": 8.167084232826816e-06, "loss": 0.6122, "step": 9893 }, { "epoch": 0.30323648400147113, "grad_norm": 1.846272028871666, "learning_rate": 8.16670016113867e-06, "loss": 0.7546, "step": 9894 }, { "epoch": 0.30326713252421234, "grad_norm": 1.298714473201504, "learning_rate": 8.166316058248217e-06, "loss": 0.4904, "step": 9895 }, { "epoch": 0.30329778104695354, "grad_norm": 1.8008494300305415, "learning_rate": 8.165931924159242e-06, "loss": 0.6912, "step": 9896 }, { "epoch": 0.30332842956969475, "grad_norm": 1.709031580553762, "learning_rate": 8.165547758875529e-06, "loss": 0.6654, "step": 9897 }, { "epoch": 0.30335907809243595, "grad_norm": 1.9275641763001985, "learning_rate": 8.165163562400864e-06, "loss": 0.6533, "step": 9898 }, { "epoch": 0.30338972661517716, "grad_norm": 1.5714561836978216, "learning_rate": 8.164779334739033e-06, "loss": 0.6357, "step": 9899 }, { "epoch": 0.30342037513791836, "grad_norm": 0.8726593351617854, "learning_rate": 8.164395075893822e-06, "loss": 0.4789, "step": 9900 }, { "epoch": 0.30345102366065957, "grad_norm": 1.7356896479533797, "learning_rate": 8.164010785869016e-06, "loss": 0.6989, "step": 9901 }, { "epoch": 0.3034816721834008, "grad_norm": 1.7328092658827094, "learning_rate": 8.1636264646684e-06, "loss": 0.7114, "step": 9902 }, { "epoch": 0.303512320706142, "grad_norm": 1.6947284924009258, "learning_rate": 8.163242112295767e-06, "loss": 0.6843, "step": 9903 }, { "epoch": 0.3035429692288832, "grad_norm": 0.8508813234910475, "learning_rate": 8.162857728754898e-06, "loss": 0.4831, "step": 9904 }, { "epoch": 0.3035736177516244, "grad_norm": 1.603555600262242, "learning_rate": 8.162473314049584e-06, "loss": 0.662, "step": 9905 }, { "epoch": 0.3036042662743656, "grad_norm": 1.6830599525013532, "learning_rate": 8.16208886818361e-06, "loss": 0.679, "step": 9906 }, { "epoch": 0.3036349147971068, "grad_norm": 0.8277910134603678, "learning_rate": 8.161704391160765e-06, "loss": 0.4768, "step": 9907 }, { "epoch": 0.303665563319848, "grad_norm": 2.052250102683192, "learning_rate": 8.16131988298484e-06, "loss": 0.6879, "step": 9908 }, { "epoch": 0.3036962118425892, "grad_norm": 1.6900147197763065, "learning_rate": 8.160935343659618e-06, "loss": 0.7075, "step": 9909 }, { "epoch": 0.30372686036533036, "grad_norm": 1.6401468178264076, "learning_rate": 8.160550773188894e-06, "loss": 0.6699, "step": 9910 }, { "epoch": 0.30375750888807157, "grad_norm": 1.8051692934197334, "learning_rate": 8.160166171576453e-06, "loss": 0.6709, "step": 9911 }, { "epoch": 0.3037881574108128, "grad_norm": 1.5992883175754284, "learning_rate": 8.159781538826087e-06, "loss": 0.6734, "step": 9912 }, { "epoch": 0.303818805933554, "grad_norm": 1.811116605503594, "learning_rate": 8.159396874941584e-06, "loss": 0.6756, "step": 9913 }, { "epoch": 0.3038494544562952, "grad_norm": 1.78046485409467, "learning_rate": 8.159012179926736e-06, "loss": 0.6511, "step": 9914 }, { "epoch": 0.3038801029790364, "grad_norm": 1.6073455730462005, "learning_rate": 8.158627453785332e-06, "loss": 0.6664, "step": 9915 }, { "epoch": 0.3039107515017776, "grad_norm": 1.7158992697793665, "learning_rate": 8.158242696521165e-06, "loss": 0.7492, "step": 9916 }, { "epoch": 0.3039414000245188, "grad_norm": 1.7208427555972137, "learning_rate": 8.157857908138022e-06, "loss": 0.7126, "step": 9917 }, { "epoch": 0.30397204854726, "grad_norm": 1.887691707306023, "learning_rate": 8.1574730886397e-06, "loss": 0.7393, "step": 9918 }, { "epoch": 0.3040026970700012, "grad_norm": 1.7464737830352413, "learning_rate": 8.157088238029986e-06, "loss": 0.754, "step": 9919 }, { "epoch": 0.3040333455927424, "grad_norm": 1.3734995907547753, "learning_rate": 8.156703356312676e-06, "loss": 0.6167, "step": 9920 }, { "epoch": 0.3040639941154836, "grad_norm": 1.7033627958735853, "learning_rate": 8.156318443491558e-06, "loss": 0.7505, "step": 9921 }, { "epoch": 0.30409464263822483, "grad_norm": 1.5476217717541083, "learning_rate": 8.155933499570428e-06, "loss": 0.6652, "step": 9922 }, { "epoch": 0.30412529116096604, "grad_norm": 1.6884092725233089, "learning_rate": 8.155548524553076e-06, "loss": 0.7309, "step": 9923 }, { "epoch": 0.30415593968370724, "grad_norm": 1.9932172976828817, "learning_rate": 8.155163518443298e-06, "loss": 0.6226, "step": 9924 }, { "epoch": 0.30418658820644845, "grad_norm": 1.8963932365973104, "learning_rate": 8.154778481244888e-06, "loss": 0.6757, "step": 9925 }, { "epoch": 0.30421723672918966, "grad_norm": 1.7993101706063521, "learning_rate": 8.154393412961637e-06, "loss": 0.6703, "step": 9926 }, { "epoch": 0.30424788525193086, "grad_norm": 1.9598947373942466, "learning_rate": 8.15400831359734e-06, "loss": 0.7596, "step": 9927 }, { "epoch": 0.30427853377467207, "grad_norm": 1.9322864827782875, "learning_rate": 8.153623183155793e-06, "loss": 0.7635, "step": 9928 }, { "epoch": 0.30430918229741327, "grad_norm": 1.9099048569933417, "learning_rate": 8.15323802164079e-06, "loss": 0.6212, "step": 9929 }, { "epoch": 0.3043398308201545, "grad_norm": 1.8531741930847716, "learning_rate": 8.152852829056126e-06, "loss": 0.713, "step": 9930 }, { "epoch": 0.3043704793428957, "grad_norm": 1.7158150151798461, "learning_rate": 8.152467605405596e-06, "loss": 0.7085, "step": 9931 }, { "epoch": 0.3044011278656369, "grad_norm": 2.0320555240638045, "learning_rate": 8.152082350692996e-06, "loss": 0.7569, "step": 9932 }, { "epoch": 0.3044317763883781, "grad_norm": 1.5933143619059875, "learning_rate": 8.151697064922121e-06, "loss": 0.6488, "step": 9933 }, { "epoch": 0.3044624249111193, "grad_norm": 1.9563515665374656, "learning_rate": 8.151311748096771e-06, "loss": 0.7836, "step": 9934 }, { "epoch": 0.3044930734338605, "grad_norm": 1.9912710496027035, "learning_rate": 8.150926400220738e-06, "loss": 0.8075, "step": 9935 }, { "epoch": 0.3045237219566017, "grad_norm": 1.807034381511625, "learning_rate": 8.150541021297822e-06, "loss": 0.7382, "step": 9936 }, { "epoch": 0.3045543704793429, "grad_norm": 0.9169265681154324, "learning_rate": 8.150155611331819e-06, "loss": 0.4896, "step": 9937 }, { "epoch": 0.3045850190020841, "grad_norm": 1.6020011501405012, "learning_rate": 8.149770170326527e-06, "loss": 0.627, "step": 9938 }, { "epoch": 0.30461566752482533, "grad_norm": 1.9603291877008628, "learning_rate": 8.149384698285742e-06, "loss": 0.6811, "step": 9939 }, { "epoch": 0.30464631604756653, "grad_norm": 1.7348808983680664, "learning_rate": 8.148999195213266e-06, "loss": 0.6903, "step": 9940 }, { "epoch": 0.3046769645703077, "grad_norm": 1.6489289387271862, "learning_rate": 8.148613661112894e-06, "loss": 0.6119, "step": 9941 }, { "epoch": 0.3047076130930489, "grad_norm": 1.7212498981228779, "learning_rate": 8.148228095988427e-06, "loss": 0.6993, "step": 9942 }, { "epoch": 0.3047382616157901, "grad_norm": 1.903749932943825, "learning_rate": 8.147842499843663e-06, "loss": 0.7016, "step": 9943 }, { "epoch": 0.3047689101385313, "grad_norm": 1.8382436840587968, "learning_rate": 8.1474568726824e-06, "loss": 0.6664, "step": 9944 }, { "epoch": 0.3047995586612725, "grad_norm": 1.804892437799155, "learning_rate": 8.14707121450844e-06, "loss": 0.8187, "step": 9945 }, { "epoch": 0.3048302071840137, "grad_norm": 0.9444770940705277, "learning_rate": 8.146685525325582e-06, "loss": 0.4684, "step": 9946 }, { "epoch": 0.3048608557067549, "grad_norm": 1.810993879330766, "learning_rate": 8.146299805137626e-06, "loss": 0.7139, "step": 9947 }, { "epoch": 0.3048915042294961, "grad_norm": 1.7924840275299379, "learning_rate": 8.145914053948373e-06, "loss": 0.6601, "step": 9948 }, { "epoch": 0.30492215275223733, "grad_norm": 1.8746595470240743, "learning_rate": 8.145528271761624e-06, "loss": 0.7268, "step": 9949 }, { "epoch": 0.30495280127497854, "grad_norm": 1.8542020850166625, "learning_rate": 8.145142458581182e-06, "loss": 0.7072, "step": 9950 }, { "epoch": 0.30498344979771974, "grad_norm": 1.9537156968452036, "learning_rate": 8.144756614410846e-06, "loss": 0.6757, "step": 9951 }, { "epoch": 0.30501409832046095, "grad_norm": 1.6439733688048368, "learning_rate": 8.144370739254418e-06, "loss": 0.6443, "step": 9952 }, { "epoch": 0.30504474684320215, "grad_norm": 1.7840661215186073, "learning_rate": 8.1439848331157e-06, "loss": 0.7143, "step": 9953 }, { "epoch": 0.30507539536594336, "grad_norm": 1.7295314557884864, "learning_rate": 8.143598895998494e-06, "loss": 0.7506, "step": 9954 }, { "epoch": 0.30510604388868456, "grad_norm": 1.9360172695782982, "learning_rate": 8.143212927906606e-06, "loss": 0.7129, "step": 9955 }, { "epoch": 0.30513669241142577, "grad_norm": 1.627703122729354, "learning_rate": 8.142826928843835e-06, "loss": 0.577, "step": 9956 }, { "epoch": 0.305167340934167, "grad_norm": 0.8555671972885345, "learning_rate": 8.142440898813985e-06, "loss": 0.4747, "step": 9957 }, { "epoch": 0.3051979894569082, "grad_norm": 1.8404403478745408, "learning_rate": 8.142054837820865e-06, "loss": 0.6845, "step": 9958 }, { "epoch": 0.3052286379796494, "grad_norm": 1.5707709073512115, "learning_rate": 8.141668745868271e-06, "loss": 0.5722, "step": 9959 }, { "epoch": 0.3052592865023906, "grad_norm": 2.0678515547919813, "learning_rate": 8.141282622960012e-06, "loss": 0.7579, "step": 9960 }, { "epoch": 0.3052899350251318, "grad_norm": 1.6986045326867014, "learning_rate": 8.140896469099893e-06, "loss": 0.7451, "step": 9961 }, { "epoch": 0.305320583547873, "grad_norm": 0.7895058032989531, "learning_rate": 8.140510284291716e-06, "loss": 0.4642, "step": 9962 }, { "epoch": 0.3053512320706142, "grad_norm": 1.898382498758553, "learning_rate": 8.140124068539288e-06, "loss": 0.719, "step": 9963 }, { "epoch": 0.3053818805933554, "grad_norm": 1.9431115272419486, "learning_rate": 8.139737821846415e-06, "loss": 0.5947, "step": 9964 }, { "epoch": 0.3054125291160966, "grad_norm": 1.8533192737968534, "learning_rate": 8.139351544216901e-06, "loss": 0.7443, "step": 9965 }, { "epoch": 0.3054431776388378, "grad_norm": 1.6332134447864906, "learning_rate": 8.138965235654553e-06, "loss": 0.7247, "step": 9966 }, { "epoch": 0.30547382616157903, "grad_norm": 1.768405542590437, "learning_rate": 8.138578896163177e-06, "loss": 0.7457, "step": 9967 }, { "epoch": 0.30550447468432024, "grad_norm": 1.7249737915808079, "learning_rate": 8.138192525746582e-06, "loss": 0.6077, "step": 9968 }, { "epoch": 0.30553512320706144, "grad_norm": 1.9072440169560319, "learning_rate": 8.137806124408572e-06, "loss": 0.6494, "step": 9969 }, { "epoch": 0.30556577172980265, "grad_norm": 1.5127658687799577, "learning_rate": 8.137419692152954e-06, "loss": 0.6838, "step": 9970 }, { "epoch": 0.30559642025254385, "grad_norm": 0.8731842800367429, "learning_rate": 8.137033228983538e-06, "loss": 0.4923, "step": 9971 }, { "epoch": 0.305627068775285, "grad_norm": 1.8398095587863275, "learning_rate": 8.136646734904132e-06, "loss": 0.725, "step": 9972 }, { "epoch": 0.3056577172980262, "grad_norm": 1.6628448636444055, "learning_rate": 8.136260209918541e-06, "loss": 0.7188, "step": 9973 }, { "epoch": 0.3056883658207674, "grad_norm": 1.7181901448574632, "learning_rate": 8.135873654030577e-06, "loss": 0.6889, "step": 9974 }, { "epoch": 0.3057190143435086, "grad_norm": 1.7535843871399124, "learning_rate": 8.135487067244048e-06, "loss": 0.6908, "step": 9975 }, { "epoch": 0.3057496628662498, "grad_norm": 2.115953783533447, "learning_rate": 8.135100449562763e-06, "loss": 0.6348, "step": 9976 }, { "epoch": 0.30578031138899103, "grad_norm": 1.7871888001219765, "learning_rate": 8.13471380099053e-06, "loss": 0.8013, "step": 9977 }, { "epoch": 0.30581095991173224, "grad_norm": 2.053954820573454, "learning_rate": 8.13432712153116e-06, "loss": 0.7435, "step": 9978 }, { "epoch": 0.30584160843447344, "grad_norm": 1.5704363870178852, "learning_rate": 8.133940411188463e-06, "loss": 0.6564, "step": 9979 }, { "epoch": 0.30587225695721465, "grad_norm": 1.7668189188987162, "learning_rate": 8.13355366996625e-06, "loss": 0.6911, "step": 9980 }, { "epoch": 0.30590290547995586, "grad_norm": 1.7207956065284136, "learning_rate": 8.13316689786833e-06, "loss": 0.7244, "step": 9981 }, { "epoch": 0.30593355400269706, "grad_norm": 1.7644331084424802, "learning_rate": 8.132780094898515e-06, "loss": 0.6507, "step": 9982 }, { "epoch": 0.30596420252543827, "grad_norm": 1.658144010000826, "learning_rate": 8.132393261060616e-06, "loss": 0.6153, "step": 9983 }, { "epoch": 0.3059948510481795, "grad_norm": 0.7919473144194484, "learning_rate": 8.132006396358447e-06, "loss": 0.4615, "step": 9984 }, { "epoch": 0.3060254995709207, "grad_norm": 1.5826437031037688, "learning_rate": 8.131619500795815e-06, "loss": 0.6344, "step": 9985 }, { "epoch": 0.3060561480936619, "grad_norm": 1.681913920518749, "learning_rate": 8.131232574376535e-06, "loss": 0.6829, "step": 9986 }, { "epoch": 0.3060867966164031, "grad_norm": 1.9751660140820408, "learning_rate": 8.130845617104419e-06, "loss": 0.6862, "step": 9987 }, { "epoch": 0.3061174451391443, "grad_norm": 1.8097105563665412, "learning_rate": 8.130458628983281e-06, "loss": 0.7382, "step": 9988 }, { "epoch": 0.3061480936618855, "grad_norm": 1.80340401967374, "learning_rate": 8.130071610016934e-06, "loss": 0.6304, "step": 9989 }, { "epoch": 0.3061787421846267, "grad_norm": 1.6925083048958267, "learning_rate": 8.12968456020919e-06, "loss": 0.6444, "step": 9990 }, { "epoch": 0.3062093907073679, "grad_norm": 1.7568352011151858, "learning_rate": 8.129297479563863e-06, "loss": 0.6723, "step": 9991 }, { "epoch": 0.3062400392301091, "grad_norm": 1.9567694809837621, "learning_rate": 8.128910368084767e-06, "loss": 0.8131, "step": 9992 }, { "epoch": 0.3062706877528503, "grad_norm": 1.8578487305077476, "learning_rate": 8.128523225775717e-06, "loss": 0.6981, "step": 9993 }, { "epoch": 0.30630133627559153, "grad_norm": 1.8947991684041496, "learning_rate": 8.128136052640526e-06, "loss": 0.7995, "step": 9994 }, { "epoch": 0.30633198479833273, "grad_norm": 0.8391567221416658, "learning_rate": 8.12774884868301e-06, "loss": 0.4682, "step": 9995 }, { "epoch": 0.30636263332107394, "grad_norm": 2.467869319763568, "learning_rate": 8.127361613906988e-06, "loss": 0.7604, "step": 9996 }, { "epoch": 0.30639328184381515, "grad_norm": 1.976626494914839, "learning_rate": 8.126974348316268e-06, "loss": 0.7883, "step": 9997 }, { "epoch": 0.30642393036655635, "grad_norm": 1.6167741134990343, "learning_rate": 8.12658705191467e-06, "loss": 0.6713, "step": 9998 }, { "epoch": 0.30645457888929756, "grad_norm": 1.719209838254567, "learning_rate": 8.126199724706012e-06, "loss": 0.7432, "step": 9999 }, { "epoch": 0.30648522741203876, "grad_norm": 1.8594416401300824, "learning_rate": 8.125812366694106e-06, "loss": 0.6388, "step": 10000 }, { "epoch": 0.30651587593477997, "grad_norm": 0.8057456987891236, "learning_rate": 8.125424977882772e-06, "loss": 0.4781, "step": 10001 }, { "epoch": 0.3065465244575212, "grad_norm": 0.8007921001244939, "learning_rate": 8.125037558275826e-06, "loss": 0.4594, "step": 10002 }, { "epoch": 0.3065771729802623, "grad_norm": 1.6149823911433334, "learning_rate": 8.124650107877086e-06, "loss": 0.637, "step": 10003 }, { "epoch": 0.30660782150300353, "grad_norm": 1.6716818753052534, "learning_rate": 8.124262626690367e-06, "loss": 0.6767, "step": 10004 }, { "epoch": 0.30663847002574474, "grad_norm": 1.7496885202000658, "learning_rate": 8.123875114719491e-06, "loss": 0.6995, "step": 10005 }, { "epoch": 0.30666911854848594, "grad_norm": 1.5217555948699353, "learning_rate": 8.123487571968273e-06, "loss": 0.6383, "step": 10006 }, { "epoch": 0.30669976707122715, "grad_norm": 1.8785305461567685, "learning_rate": 8.123099998440535e-06, "loss": 0.7628, "step": 10007 }, { "epoch": 0.30673041559396835, "grad_norm": 1.57503084647934, "learning_rate": 8.12271239414009e-06, "loss": 0.6473, "step": 10008 }, { "epoch": 0.30676106411670956, "grad_norm": 1.6227432614540533, "learning_rate": 8.122324759070764e-06, "loss": 0.6935, "step": 10009 }, { "epoch": 0.30679171263945076, "grad_norm": 2.0172003163010794, "learning_rate": 8.121937093236371e-06, "loss": 0.668, "step": 10010 }, { "epoch": 0.30682236116219197, "grad_norm": 1.8384747580710254, "learning_rate": 8.121549396640736e-06, "loss": 0.7072, "step": 10011 }, { "epoch": 0.3068530096849332, "grad_norm": 1.7875535901697375, "learning_rate": 8.121161669287674e-06, "loss": 0.6941, "step": 10012 }, { "epoch": 0.3068836582076744, "grad_norm": 1.5571552976568779, "learning_rate": 8.120773911181005e-06, "loss": 0.6688, "step": 10013 }, { "epoch": 0.3069143067304156, "grad_norm": 1.6202993892688111, "learning_rate": 8.120386122324556e-06, "loss": 0.725, "step": 10014 }, { "epoch": 0.3069449552531568, "grad_norm": 1.709934132815986, "learning_rate": 8.119998302722143e-06, "loss": 0.6062, "step": 10015 }, { "epoch": 0.306975603775898, "grad_norm": 2.8728179331409036, "learning_rate": 8.119610452377588e-06, "loss": 0.6649, "step": 10016 }, { "epoch": 0.3070062522986392, "grad_norm": 1.7407477214928213, "learning_rate": 8.119222571294714e-06, "loss": 0.629, "step": 10017 }, { "epoch": 0.3070369008213804, "grad_norm": 1.683660714957663, "learning_rate": 8.118834659477341e-06, "loss": 0.5762, "step": 10018 }, { "epoch": 0.3070675493441216, "grad_norm": 1.9433140897096906, "learning_rate": 8.118446716929294e-06, "loss": 0.7013, "step": 10019 }, { "epoch": 0.3070981978668628, "grad_norm": 1.772304676162161, "learning_rate": 8.118058743654392e-06, "loss": 0.7152, "step": 10020 }, { "epoch": 0.307128846389604, "grad_norm": 0.9085512718050663, "learning_rate": 8.117670739656457e-06, "loss": 0.464, "step": 10021 }, { "epoch": 0.30715949491234523, "grad_norm": 1.8305972995985287, "learning_rate": 8.117282704939318e-06, "loss": 0.7246, "step": 10022 }, { "epoch": 0.30719014343508644, "grad_norm": 2.5109441044529492, "learning_rate": 8.116894639506794e-06, "loss": 0.6885, "step": 10023 }, { "epoch": 0.30722079195782764, "grad_norm": 1.7759064050396463, "learning_rate": 8.11650654336271e-06, "loss": 0.731, "step": 10024 }, { "epoch": 0.30725144048056885, "grad_norm": 1.8151224660835072, "learning_rate": 8.116118416510889e-06, "loss": 0.7214, "step": 10025 }, { "epoch": 0.30728208900331005, "grad_norm": 1.7565206800826514, "learning_rate": 8.115730258955156e-06, "loss": 0.7146, "step": 10026 }, { "epoch": 0.30731273752605126, "grad_norm": 1.7886762704881047, "learning_rate": 8.115342070699335e-06, "loss": 0.7514, "step": 10027 }, { "epoch": 0.30734338604879247, "grad_norm": 1.7149959207794132, "learning_rate": 8.114953851747252e-06, "loss": 0.7181, "step": 10028 }, { "epoch": 0.30737403457153367, "grad_norm": 1.712162174011383, "learning_rate": 8.114565602102733e-06, "loss": 0.7196, "step": 10029 }, { "epoch": 0.3074046830942749, "grad_norm": 1.681804370524691, "learning_rate": 8.1141773217696e-06, "loss": 0.7095, "step": 10030 }, { "epoch": 0.3074353316170161, "grad_norm": 0.8544495692585663, "learning_rate": 8.113789010751682e-06, "loss": 0.4701, "step": 10031 }, { "epoch": 0.3074659801397573, "grad_norm": 1.9018956476225455, "learning_rate": 8.113400669052805e-06, "loss": 0.6865, "step": 10032 }, { "epoch": 0.3074966286624985, "grad_norm": 0.7652581867266213, "learning_rate": 8.113012296676793e-06, "loss": 0.4899, "step": 10033 }, { "epoch": 0.30752727718523964, "grad_norm": 2.0391166605144875, "learning_rate": 8.112623893627476e-06, "loss": 0.84, "step": 10034 }, { "epoch": 0.30755792570798085, "grad_norm": 1.641176843797518, "learning_rate": 8.11223545990868e-06, "loss": 0.7066, "step": 10035 }, { "epoch": 0.30758857423072206, "grad_norm": 1.619517278336058, "learning_rate": 8.111846995524228e-06, "loss": 0.7085, "step": 10036 }, { "epoch": 0.30761922275346326, "grad_norm": 1.661654853814147, "learning_rate": 8.111458500477955e-06, "loss": 0.6678, "step": 10037 }, { "epoch": 0.30764987127620447, "grad_norm": 1.653663606956275, "learning_rate": 8.111069974773684e-06, "loss": 0.6835, "step": 10038 }, { "epoch": 0.3076805197989457, "grad_norm": 1.8773358052217388, "learning_rate": 8.110681418415245e-06, "loss": 0.7306, "step": 10039 }, { "epoch": 0.3077111683216869, "grad_norm": 1.8375903213656628, "learning_rate": 8.110292831406466e-06, "loss": 0.6962, "step": 10040 }, { "epoch": 0.3077418168444281, "grad_norm": 0.850981080095948, "learning_rate": 8.109904213751174e-06, "loss": 0.497, "step": 10041 }, { "epoch": 0.3077724653671693, "grad_norm": 1.6358576374183647, "learning_rate": 8.109515565453202e-06, "loss": 0.6445, "step": 10042 }, { "epoch": 0.3078031138899105, "grad_norm": 1.4080705451668314, "learning_rate": 8.109126886516376e-06, "loss": 0.6838, "step": 10043 }, { "epoch": 0.3078337624126517, "grad_norm": 1.7277811527335412, "learning_rate": 8.108738176944529e-06, "loss": 0.8038, "step": 10044 }, { "epoch": 0.3078644109353929, "grad_norm": 1.5645724452884309, "learning_rate": 8.10834943674149e-06, "loss": 0.6028, "step": 10045 }, { "epoch": 0.3078950594581341, "grad_norm": 0.8262300089899393, "learning_rate": 8.107960665911087e-06, "loss": 0.4886, "step": 10046 }, { "epoch": 0.3079257079808753, "grad_norm": 1.6493482845272724, "learning_rate": 8.107571864457153e-06, "loss": 0.7037, "step": 10047 }, { "epoch": 0.3079563565036165, "grad_norm": 1.534295277741896, "learning_rate": 8.107183032383517e-06, "loss": 0.698, "step": 10048 }, { "epoch": 0.30798700502635773, "grad_norm": 1.7182703998482822, "learning_rate": 8.106794169694012e-06, "loss": 0.73, "step": 10049 }, { "epoch": 0.30801765354909894, "grad_norm": 1.8142591810251345, "learning_rate": 8.106405276392471e-06, "loss": 0.7316, "step": 10050 }, { "epoch": 0.30804830207184014, "grad_norm": 1.8024801904775647, "learning_rate": 8.106016352482722e-06, "loss": 0.7296, "step": 10051 }, { "epoch": 0.30807895059458135, "grad_norm": 0.8120950284964582, "learning_rate": 8.105627397968601e-06, "loss": 0.4557, "step": 10052 }, { "epoch": 0.30810959911732255, "grad_norm": 0.7866239272681655, "learning_rate": 8.105238412853937e-06, "loss": 0.4638, "step": 10053 }, { "epoch": 0.30814024764006376, "grad_norm": 0.7687675885502991, "learning_rate": 8.104849397142566e-06, "loss": 0.4651, "step": 10054 }, { "epoch": 0.30817089616280496, "grad_norm": 0.7790846006398108, "learning_rate": 8.104460350838318e-06, "loss": 0.4774, "step": 10055 }, { "epoch": 0.30820154468554617, "grad_norm": 0.7785107641014709, "learning_rate": 8.104071273945029e-06, "loss": 0.4603, "step": 10056 }, { "epoch": 0.3082321932082874, "grad_norm": 1.7558124877301418, "learning_rate": 8.10368216646653e-06, "loss": 0.7628, "step": 10057 }, { "epoch": 0.3082628417310286, "grad_norm": 1.6611429778591755, "learning_rate": 8.103293028406658e-06, "loss": 0.5294, "step": 10058 }, { "epoch": 0.3082934902537698, "grad_norm": 2.995172270162869, "learning_rate": 8.102903859769244e-06, "loss": 0.701, "step": 10059 }, { "epoch": 0.308324138776511, "grad_norm": 1.7672260595609497, "learning_rate": 8.102514660558126e-06, "loss": 0.7225, "step": 10060 }, { "epoch": 0.3083547872992522, "grad_norm": 1.6317167671018173, "learning_rate": 8.102125430777138e-06, "loss": 0.7012, "step": 10061 }, { "epoch": 0.3083854358219934, "grad_norm": 1.5631672144926574, "learning_rate": 8.101736170430113e-06, "loss": 0.7275, "step": 10062 }, { "epoch": 0.3084160843447346, "grad_norm": 1.6835390000094919, "learning_rate": 8.101346879520888e-06, "loss": 0.6611, "step": 10063 }, { "epoch": 0.3084467328674758, "grad_norm": 1.7539966116507246, "learning_rate": 8.100957558053298e-06, "loss": 0.7299, "step": 10064 }, { "epoch": 0.30847738139021696, "grad_norm": 1.6819022592822206, "learning_rate": 8.10056820603118e-06, "loss": 0.6439, "step": 10065 }, { "epoch": 0.30850802991295817, "grad_norm": 1.6171432535146752, "learning_rate": 8.100178823458373e-06, "loss": 0.7455, "step": 10066 }, { "epoch": 0.3085386784356994, "grad_norm": 1.7424709695582234, "learning_rate": 8.099789410338708e-06, "loss": 0.8021, "step": 10067 }, { "epoch": 0.3085693269584406, "grad_norm": 1.827678982887259, "learning_rate": 8.099399966676025e-06, "loss": 0.7501, "step": 10068 }, { "epoch": 0.3085999754811818, "grad_norm": 1.8361868308740676, "learning_rate": 8.099010492474162e-06, "loss": 0.6763, "step": 10069 }, { "epoch": 0.308630624003923, "grad_norm": 1.6799639957198556, "learning_rate": 8.098620987736953e-06, "loss": 0.6837, "step": 10070 }, { "epoch": 0.3086612725266642, "grad_norm": 1.1337855844843137, "learning_rate": 8.098231452468242e-06, "loss": 0.4863, "step": 10071 }, { "epoch": 0.3086919210494054, "grad_norm": 1.7514464152043057, "learning_rate": 8.097841886671863e-06, "loss": 0.7111, "step": 10072 }, { "epoch": 0.3087225695721466, "grad_norm": 1.6419533226370162, "learning_rate": 8.097452290351655e-06, "loss": 0.7212, "step": 10073 }, { "epoch": 0.3087532180948878, "grad_norm": 1.785181100353865, "learning_rate": 8.097062663511457e-06, "loss": 0.7169, "step": 10074 }, { "epoch": 0.308783866617629, "grad_norm": 1.7447569295565761, "learning_rate": 8.096673006155107e-06, "loss": 0.6131, "step": 10075 }, { "epoch": 0.3088145151403702, "grad_norm": 1.772845647156414, "learning_rate": 8.096283318286446e-06, "loss": 0.6872, "step": 10076 }, { "epoch": 0.30884516366311143, "grad_norm": 1.8741020591822655, "learning_rate": 8.095893599909315e-06, "loss": 0.6593, "step": 10077 }, { "epoch": 0.30887581218585264, "grad_norm": 1.6849996263283185, "learning_rate": 8.09550385102755e-06, "loss": 0.7636, "step": 10078 }, { "epoch": 0.30890646070859384, "grad_norm": 0.8603937318391685, "learning_rate": 8.095114071644996e-06, "loss": 0.4571, "step": 10079 }, { "epoch": 0.30893710923133505, "grad_norm": 1.780188205668451, "learning_rate": 8.09472426176549e-06, "loss": 0.7142, "step": 10080 }, { "epoch": 0.30896775775407626, "grad_norm": 1.8137556212392536, "learning_rate": 8.094334421392873e-06, "loss": 0.6631, "step": 10081 }, { "epoch": 0.30899840627681746, "grad_norm": 1.6726502656202054, "learning_rate": 8.09394455053099e-06, "loss": 0.5968, "step": 10082 }, { "epoch": 0.30902905479955867, "grad_norm": 1.6998195155977374, "learning_rate": 8.093554649183677e-06, "loss": 0.7204, "step": 10083 }, { "epoch": 0.3090597033222999, "grad_norm": 1.6653314194244349, "learning_rate": 8.093164717354779e-06, "loss": 0.6325, "step": 10084 }, { "epoch": 0.3090903518450411, "grad_norm": 0.788931064419268, "learning_rate": 8.092774755048138e-06, "loss": 0.4633, "step": 10085 }, { "epoch": 0.3091210003677823, "grad_norm": 1.715583706698734, "learning_rate": 8.092384762267596e-06, "loss": 0.654, "step": 10086 }, { "epoch": 0.3091516488905235, "grad_norm": 1.7700461176077817, "learning_rate": 8.091994739016995e-06, "loss": 0.6766, "step": 10087 }, { "epoch": 0.3091822974132647, "grad_norm": 1.6181894056807764, "learning_rate": 8.09160468530018e-06, "loss": 0.6588, "step": 10088 }, { "epoch": 0.3092129459360059, "grad_norm": 1.8831072668402187, "learning_rate": 8.091214601120992e-06, "loss": 0.7604, "step": 10089 }, { "epoch": 0.3092435944587471, "grad_norm": 1.969028599202364, "learning_rate": 8.090824486483274e-06, "loss": 0.7453, "step": 10090 }, { "epoch": 0.3092742429814883, "grad_norm": 0.8889620597845428, "learning_rate": 8.090434341390874e-06, "loss": 0.485, "step": 10091 }, { "epoch": 0.3093048915042295, "grad_norm": 1.8749088670669127, "learning_rate": 8.090044165847634e-06, "loss": 0.72, "step": 10092 }, { "epoch": 0.3093355400269707, "grad_norm": 1.4787354360767133, "learning_rate": 8.089653959857398e-06, "loss": 0.6249, "step": 10093 }, { "epoch": 0.30936618854971193, "grad_norm": 1.6449970558596003, "learning_rate": 8.08926372342401e-06, "loss": 0.6135, "step": 10094 }, { "epoch": 0.30939683707245313, "grad_norm": 1.617305833831159, "learning_rate": 8.088873456551317e-06, "loss": 0.6912, "step": 10095 }, { "epoch": 0.3094274855951943, "grad_norm": 1.7727314507673428, "learning_rate": 8.088483159243164e-06, "loss": 0.6917, "step": 10096 }, { "epoch": 0.3094581341179355, "grad_norm": 1.7116448356930645, "learning_rate": 8.088092831503394e-06, "loss": 0.6296, "step": 10097 }, { "epoch": 0.3094887826406767, "grad_norm": 1.669287814110503, "learning_rate": 8.087702473335858e-06, "loss": 0.6611, "step": 10098 }, { "epoch": 0.3095194311634179, "grad_norm": 1.9615667675469812, "learning_rate": 8.087312084744397e-06, "loss": 0.5688, "step": 10099 }, { "epoch": 0.3095500796861591, "grad_norm": 1.692688172126528, "learning_rate": 8.086921665732861e-06, "loss": 0.5815, "step": 10100 }, { "epoch": 0.3095807282089003, "grad_norm": 1.584373986460534, "learning_rate": 8.086531216305095e-06, "loss": 0.649, "step": 10101 }, { "epoch": 0.3096113767316415, "grad_norm": 1.7291723285890366, "learning_rate": 8.086140736464949e-06, "loss": 0.6909, "step": 10102 }, { "epoch": 0.3096420252543827, "grad_norm": 1.7590291134477556, "learning_rate": 8.085750226216267e-06, "loss": 0.6288, "step": 10103 }, { "epoch": 0.30967267377712393, "grad_norm": 1.5677416329862728, "learning_rate": 8.0853596855629e-06, "loss": 0.6278, "step": 10104 }, { "epoch": 0.30970332229986514, "grad_norm": 1.887187618742984, "learning_rate": 8.084969114508693e-06, "loss": 0.6899, "step": 10105 }, { "epoch": 0.30973397082260634, "grad_norm": 1.81927406485111, "learning_rate": 8.084578513057499e-06, "loss": 0.6911, "step": 10106 }, { "epoch": 0.30976461934534755, "grad_norm": 0.8815177275923194, "learning_rate": 8.084187881213162e-06, "loss": 0.4464, "step": 10107 }, { "epoch": 0.30979526786808875, "grad_norm": 0.831122761464326, "learning_rate": 8.083797218979532e-06, "loss": 0.4652, "step": 10108 }, { "epoch": 0.30982591639082996, "grad_norm": 1.7413585515761807, "learning_rate": 8.083406526360459e-06, "loss": 0.6245, "step": 10109 }, { "epoch": 0.30985656491357116, "grad_norm": 1.9388748730735341, "learning_rate": 8.083015803359793e-06, "loss": 0.6903, "step": 10110 }, { "epoch": 0.30988721343631237, "grad_norm": 0.8417568301404633, "learning_rate": 8.082625049981383e-06, "loss": 0.4577, "step": 10111 }, { "epoch": 0.3099178619590536, "grad_norm": 1.812310893317172, "learning_rate": 8.08223426622908e-06, "loss": 0.6706, "step": 10112 }, { "epoch": 0.3099485104817948, "grad_norm": 1.6747522359081, "learning_rate": 8.081843452106735e-06, "loss": 0.6253, "step": 10113 }, { "epoch": 0.309979159004536, "grad_norm": 1.6017241041383226, "learning_rate": 8.081452607618196e-06, "loss": 0.602, "step": 10114 }, { "epoch": 0.3100098075272772, "grad_norm": 0.8720517148842616, "learning_rate": 8.08106173276732e-06, "loss": 0.4743, "step": 10115 }, { "epoch": 0.3100404560500184, "grad_norm": 1.8072466870877735, "learning_rate": 8.08067082755795e-06, "loss": 0.6359, "step": 10116 }, { "epoch": 0.3100711045727596, "grad_norm": 1.7256905232264013, "learning_rate": 8.080279891993943e-06, "loss": 0.7568, "step": 10117 }, { "epoch": 0.3101017530955008, "grad_norm": 1.8122934792871799, "learning_rate": 8.079888926079152e-06, "loss": 0.7563, "step": 10118 }, { "epoch": 0.310132401618242, "grad_norm": 1.6636519335442888, "learning_rate": 8.079497929817426e-06, "loss": 0.7255, "step": 10119 }, { "epoch": 0.3101630501409832, "grad_norm": 1.6115537185219433, "learning_rate": 8.07910690321262e-06, "loss": 0.5598, "step": 10120 }, { "epoch": 0.3101936986637244, "grad_norm": 0.8504589220174502, "learning_rate": 8.078715846268583e-06, "loss": 0.4705, "step": 10121 }, { "epoch": 0.31022434718646563, "grad_norm": 1.6139950869049242, "learning_rate": 8.078324758989174e-06, "loss": 0.68, "step": 10122 }, { "epoch": 0.31025499570920684, "grad_norm": 0.8156365721494608, "learning_rate": 8.077933641378243e-06, "loss": 0.4642, "step": 10123 }, { "epoch": 0.31028564423194804, "grad_norm": 2.064826217284638, "learning_rate": 8.077542493439643e-06, "loss": 0.6591, "step": 10124 }, { "epoch": 0.31031629275468925, "grad_norm": 1.6153779176496492, "learning_rate": 8.077151315177232e-06, "loss": 0.7171, "step": 10125 }, { "epoch": 0.31034694127743045, "grad_norm": 1.852348751504051, "learning_rate": 8.076760106594859e-06, "loss": 0.6561, "step": 10126 }, { "epoch": 0.3103775898001716, "grad_norm": 1.7898484423871, "learning_rate": 8.076368867696382e-06, "loss": 0.6871, "step": 10127 }, { "epoch": 0.3104082383229128, "grad_norm": 1.7417992910247408, "learning_rate": 8.075977598485656e-06, "loss": 0.6959, "step": 10128 }, { "epoch": 0.310438886845654, "grad_norm": 1.6208193484848572, "learning_rate": 8.075586298966536e-06, "loss": 0.6401, "step": 10129 }, { "epoch": 0.3104695353683952, "grad_norm": 0.8507793142067752, "learning_rate": 8.075194969142876e-06, "loss": 0.4657, "step": 10130 }, { "epoch": 0.3105001838911364, "grad_norm": 1.822669491533285, "learning_rate": 8.074803609018535e-06, "loss": 0.7604, "step": 10131 }, { "epoch": 0.31053083241387763, "grad_norm": 1.6115280936607073, "learning_rate": 8.074412218597367e-06, "loss": 0.7082, "step": 10132 }, { "epoch": 0.31056148093661884, "grad_norm": 1.6361780316393648, "learning_rate": 8.07402079788323e-06, "loss": 0.7618, "step": 10133 }, { "epoch": 0.31059212945936004, "grad_norm": 1.625532682314719, "learning_rate": 8.073629346879976e-06, "loss": 0.7274, "step": 10134 }, { "epoch": 0.31062277798210125, "grad_norm": 1.871841851448694, "learning_rate": 8.073237865591468e-06, "loss": 0.7756, "step": 10135 }, { "epoch": 0.31065342650484246, "grad_norm": 2.8135160580216696, "learning_rate": 8.07284635402156e-06, "loss": 0.6618, "step": 10136 }, { "epoch": 0.31068407502758366, "grad_norm": 1.6916045749916504, "learning_rate": 8.072454812174111e-06, "loss": 0.6981, "step": 10137 }, { "epoch": 0.31071472355032487, "grad_norm": 1.753590758889076, "learning_rate": 8.072063240052978e-06, "loss": 0.6051, "step": 10138 }, { "epoch": 0.3107453720730661, "grad_norm": 1.7280514434205065, "learning_rate": 8.071671637662022e-06, "loss": 0.6133, "step": 10139 }, { "epoch": 0.3107760205958073, "grad_norm": 1.6724925834515456, "learning_rate": 8.071280005005098e-06, "loss": 0.708, "step": 10140 }, { "epoch": 0.3108066691185485, "grad_norm": 1.7655272115963094, "learning_rate": 8.070888342086065e-06, "loss": 0.6642, "step": 10141 }, { "epoch": 0.3108373176412897, "grad_norm": 1.1103673360618431, "learning_rate": 8.070496648908786e-06, "loss": 0.4618, "step": 10142 }, { "epoch": 0.3108679661640309, "grad_norm": 1.821528980446909, "learning_rate": 8.070104925477116e-06, "loss": 0.6014, "step": 10143 }, { "epoch": 0.3108986146867721, "grad_norm": 0.8730600648821262, "learning_rate": 8.069713171794918e-06, "loss": 0.4731, "step": 10144 }, { "epoch": 0.3109292632095133, "grad_norm": 1.983476052522464, "learning_rate": 8.06932138786605e-06, "loss": 0.7167, "step": 10145 }, { "epoch": 0.3109599117322545, "grad_norm": 1.531679379690055, "learning_rate": 8.068929573694373e-06, "loss": 0.5411, "step": 10146 }, { "epoch": 0.3109905602549957, "grad_norm": 1.8443129990879759, "learning_rate": 8.068537729283748e-06, "loss": 0.6819, "step": 10147 }, { "epoch": 0.3110212087777369, "grad_norm": 1.7016957642285062, "learning_rate": 8.068145854638034e-06, "loss": 0.7479, "step": 10148 }, { "epoch": 0.31105185730047813, "grad_norm": 1.6566023174548126, "learning_rate": 8.067753949761095e-06, "loss": 0.6672, "step": 10149 }, { "epoch": 0.31108250582321934, "grad_norm": 1.8770510215302554, "learning_rate": 8.067362014656792e-06, "loss": 0.689, "step": 10150 }, { "epoch": 0.31111315434596054, "grad_norm": 1.7678809407121239, "learning_rate": 8.066970049328985e-06, "loss": 0.555, "step": 10151 }, { "epoch": 0.31114380286870175, "grad_norm": 1.8724335270034573, "learning_rate": 8.06657805378154e-06, "loss": 0.738, "step": 10152 }, { "epoch": 0.31117445139144295, "grad_norm": 1.927024317498484, "learning_rate": 8.066186028018314e-06, "loss": 0.6257, "step": 10153 }, { "epoch": 0.31120509991418416, "grad_norm": 1.8284009511927812, "learning_rate": 8.065793972043175e-06, "loss": 0.7071, "step": 10154 }, { "epoch": 0.31123574843692536, "grad_norm": 1.8104392781704077, "learning_rate": 8.06540188585998e-06, "loss": 0.6594, "step": 10155 }, { "epoch": 0.31126639695966657, "grad_norm": 1.6021870692118136, "learning_rate": 8.065009769472598e-06, "loss": 0.672, "step": 10156 }, { "epoch": 0.3112970454824078, "grad_norm": 1.7037731186854124, "learning_rate": 8.064617622884892e-06, "loss": 0.7125, "step": 10157 }, { "epoch": 0.3113276940051489, "grad_norm": 1.7425480337988668, "learning_rate": 8.064225446100723e-06, "loss": 0.6228, "step": 10158 }, { "epoch": 0.31135834252789013, "grad_norm": 1.6930155125776924, "learning_rate": 8.063833239123958e-06, "loss": 0.7095, "step": 10159 }, { "epoch": 0.31138899105063134, "grad_norm": 1.8041130159801273, "learning_rate": 8.063441001958456e-06, "loss": 0.6813, "step": 10160 }, { "epoch": 0.31141963957337254, "grad_norm": 1.9462973529687955, "learning_rate": 8.06304873460809e-06, "loss": 0.7749, "step": 10161 }, { "epoch": 0.31145028809611375, "grad_norm": 2.249675284095581, "learning_rate": 8.06265643707672e-06, "loss": 0.5814, "step": 10162 }, { "epoch": 0.31148093661885495, "grad_norm": 1.710251388197853, "learning_rate": 8.062264109368214e-06, "loss": 0.6763, "step": 10163 }, { "epoch": 0.31151158514159616, "grad_norm": 1.7145320551080157, "learning_rate": 8.061871751486434e-06, "loss": 0.7106, "step": 10164 }, { "epoch": 0.31154223366433736, "grad_norm": 1.924403899391398, "learning_rate": 8.06147936343525e-06, "loss": 0.6448, "step": 10165 }, { "epoch": 0.31157288218707857, "grad_norm": 1.841172014350238, "learning_rate": 8.061086945218523e-06, "loss": 0.7142, "step": 10166 }, { "epoch": 0.3116035307098198, "grad_norm": 2.0163861831308503, "learning_rate": 8.060694496840127e-06, "loss": 0.7394, "step": 10167 }, { "epoch": 0.311634179232561, "grad_norm": 1.942901485936241, "learning_rate": 8.060302018303923e-06, "loss": 0.6663, "step": 10168 }, { "epoch": 0.3116648277553022, "grad_norm": 1.7345590705464125, "learning_rate": 8.059909509613781e-06, "loss": 0.727, "step": 10169 }, { "epoch": 0.3116954762780434, "grad_norm": 1.5418040243326685, "learning_rate": 8.059516970773566e-06, "loss": 0.7315, "step": 10170 }, { "epoch": 0.3117261248007846, "grad_norm": 1.752457025470043, "learning_rate": 8.05912440178715e-06, "loss": 0.6677, "step": 10171 }, { "epoch": 0.3117567733235258, "grad_norm": 1.5810352316104213, "learning_rate": 8.058731802658397e-06, "loss": 0.5952, "step": 10172 }, { "epoch": 0.311787421846267, "grad_norm": 1.7145015268000687, "learning_rate": 8.058339173391179e-06, "loss": 0.5935, "step": 10173 }, { "epoch": 0.3118180703690082, "grad_norm": 1.9102317297802698, "learning_rate": 8.05794651398936e-06, "loss": 0.7111, "step": 10174 }, { "epoch": 0.3118487188917494, "grad_norm": 1.5423601766217383, "learning_rate": 8.057553824456812e-06, "loss": 0.6758, "step": 10175 }, { "epoch": 0.3118793674144906, "grad_norm": 1.9196314939173196, "learning_rate": 8.057161104797404e-06, "loss": 0.696, "step": 10176 }, { "epoch": 0.31191001593723183, "grad_norm": 1.7002529909467825, "learning_rate": 8.056768355015008e-06, "loss": 0.7178, "step": 10177 }, { "epoch": 0.31194066445997304, "grad_norm": 1.8182810330532906, "learning_rate": 8.056375575113489e-06, "loss": 0.7321, "step": 10178 }, { "epoch": 0.31197131298271424, "grad_norm": 1.8943885599754187, "learning_rate": 8.055982765096719e-06, "loss": 0.6391, "step": 10179 }, { "epoch": 0.31200196150545545, "grad_norm": 1.7204723047765342, "learning_rate": 8.05558992496857e-06, "loss": 0.7171, "step": 10180 }, { "epoch": 0.31203261002819666, "grad_norm": 1.592987311248771, "learning_rate": 8.05519705473291e-06, "loss": 0.665, "step": 10181 }, { "epoch": 0.31206325855093786, "grad_norm": 1.663425809598342, "learning_rate": 8.054804154393614e-06, "loss": 0.6328, "step": 10182 }, { "epoch": 0.31209390707367907, "grad_norm": 1.6846222139570486, "learning_rate": 8.05441122395455e-06, "loss": 0.755, "step": 10183 }, { "epoch": 0.3121245555964203, "grad_norm": 2.0454098602131694, "learning_rate": 8.054018263419591e-06, "loss": 0.6946, "step": 10184 }, { "epoch": 0.3121552041191615, "grad_norm": 1.8169944904018398, "learning_rate": 8.053625272792609e-06, "loss": 0.642, "step": 10185 }, { "epoch": 0.3121858526419027, "grad_norm": 1.6582364815939545, "learning_rate": 8.053232252077475e-06, "loss": 0.649, "step": 10186 }, { "epoch": 0.3122165011646439, "grad_norm": 1.4828999093079442, "learning_rate": 8.052839201278063e-06, "loss": 0.7458, "step": 10187 }, { "epoch": 0.3122471496873851, "grad_norm": 1.4803472649372866, "learning_rate": 8.052446120398246e-06, "loss": 0.4887, "step": 10188 }, { "epoch": 0.31227779821012625, "grad_norm": 1.815739206556174, "learning_rate": 8.052053009441893e-06, "loss": 0.8074, "step": 10189 }, { "epoch": 0.31230844673286745, "grad_norm": 2.3731385626081063, "learning_rate": 8.051659868412885e-06, "loss": 0.779, "step": 10190 }, { "epoch": 0.31233909525560866, "grad_norm": 1.9137756073461307, "learning_rate": 8.051266697315087e-06, "loss": 0.6179, "step": 10191 }, { "epoch": 0.31236974377834986, "grad_norm": 1.6889608036053037, "learning_rate": 8.050873496152382e-06, "loss": 0.7221, "step": 10192 }, { "epoch": 0.31240039230109107, "grad_norm": 1.70177629354553, "learning_rate": 8.050480264928637e-06, "loss": 0.6969, "step": 10193 }, { "epoch": 0.3124310408238323, "grad_norm": 1.772157532373174, "learning_rate": 8.050087003647731e-06, "loss": 0.6797, "step": 10194 }, { "epoch": 0.3124616893465735, "grad_norm": 1.753955730478708, "learning_rate": 8.049693712313537e-06, "loss": 0.6418, "step": 10195 }, { "epoch": 0.3124923378693147, "grad_norm": 1.8012183097492118, "learning_rate": 8.049300390929931e-06, "loss": 0.757, "step": 10196 }, { "epoch": 0.3125229863920559, "grad_norm": 1.7181063732503596, "learning_rate": 8.048907039500786e-06, "loss": 0.7204, "step": 10197 }, { "epoch": 0.3125536349147971, "grad_norm": 1.6438378781397014, "learning_rate": 8.048513658029981e-06, "loss": 0.7182, "step": 10198 }, { "epoch": 0.3125842834375383, "grad_norm": 1.7565382173805995, "learning_rate": 8.048120246521392e-06, "loss": 0.6486, "step": 10199 }, { "epoch": 0.3126149319602795, "grad_norm": 1.6615955051983922, "learning_rate": 8.047726804978893e-06, "loss": 0.6313, "step": 10200 }, { "epoch": 0.3126455804830207, "grad_norm": 1.7309263548472036, "learning_rate": 8.047333333406363e-06, "loss": 0.6723, "step": 10201 }, { "epoch": 0.3126762290057619, "grad_norm": 1.7095933681358442, "learning_rate": 8.046939831807678e-06, "loss": 0.7777, "step": 10202 }, { "epoch": 0.3127068775285031, "grad_norm": 1.6947453711771643, "learning_rate": 8.046546300186714e-06, "loss": 0.6687, "step": 10203 }, { "epoch": 0.31273752605124433, "grad_norm": 1.804425029221585, "learning_rate": 8.04615273854735e-06, "loss": 0.6597, "step": 10204 }, { "epoch": 0.31276817457398554, "grad_norm": 1.9665916720510055, "learning_rate": 8.045759146893465e-06, "loss": 0.686, "step": 10205 }, { "epoch": 0.31279882309672674, "grad_norm": 1.8430605953904968, "learning_rate": 8.045365525228934e-06, "loss": 0.713, "step": 10206 }, { "epoch": 0.31282947161946795, "grad_norm": 1.854644405813173, "learning_rate": 8.044971873557639e-06, "loss": 0.7248, "step": 10207 }, { "epoch": 0.31286012014220915, "grad_norm": 1.6139153414936482, "learning_rate": 8.044578191883456e-06, "loss": 0.6669, "step": 10208 }, { "epoch": 0.31289076866495036, "grad_norm": 1.647428758242388, "learning_rate": 8.044184480210267e-06, "loss": 0.6124, "step": 10209 }, { "epoch": 0.31292141718769156, "grad_norm": 1.609264663840641, "learning_rate": 8.043790738541947e-06, "loss": 0.7095, "step": 10210 }, { "epoch": 0.31295206571043277, "grad_norm": 1.7051927107196754, "learning_rate": 8.043396966882377e-06, "loss": 0.6614, "step": 10211 }, { "epoch": 0.312982714233174, "grad_norm": 1.2740378447380516, "learning_rate": 8.04300316523544e-06, "loss": 0.5108, "step": 10212 }, { "epoch": 0.3130133627559152, "grad_norm": 1.135124129218952, "learning_rate": 8.042609333605017e-06, "loss": 0.4919, "step": 10213 }, { "epoch": 0.3130440112786564, "grad_norm": 1.758425511979037, "learning_rate": 8.042215471994981e-06, "loss": 0.7187, "step": 10214 }, { "epoch": 0.3130746598013976, "grad_norm": 1.731461880898835, "learning_rate": 8.04182158040922e-06, "loss": 0.7101, "step": 10215 }, { "epoch": 0.3131053083241388, "grad_norm": 1.8599544476126204, "learning_rate": 8.041427658851613e-06, "loss": 0.5975, "step": 10216 }, { "epoch": 0.31313595684688, "grad_norm": 1.1957033883085273, "learning_rate": 8.04103370732604e-06, "loss": 0.4892, "step": 10217 }, { "epoch": 0.3131666053696212, "grad_norm": 1.0850601348975035, "learning_rate": 8.040639725836384e-06, "loss": 0.4817, "step": 10218 }, { "epoch": 0.3131972538923624, "grad_norm": 1.6753144386032548, "learning_rate": 8.040245714386528e-06, "loss": 0.7072, "step": 10219 }, { "epoch": 0.31322790241510357, "grad_norm": 1.6922184451439382, "learning_rate": 8.03985167298035e-06, "loss": 0.7178, "step": 10220 }, { "epoch": 0.31325855093784477, "grad_norm": 1.7022490198572477, "learning_rate": 8.039457601621738e-06, "loss": 0.7575, "step": 10221 }, { "epoch": 0.313289199460586, "grad_norm": 1.825165850100471, "learning_rate": 8.039063500314572e-06, "loss": 0.6727, "step": 10222 }, { "epoch": 0.3133198479833272, "grad_norm": 1.6411897710156371, "learning_rate": 8.038669369062736e-06, "loss": 0.759, "step": 10223 }, { "epoch": 0.3133504965060684, "grad_norm": 0.8601215195223313, "learning_rate": 8.038275207870114e-06, "loss": 0.4676, "step": 10224 }, { "epoch": 0.3133811450288096, "grad_norm": 1.9893837147896665, "learning_rate": 8.037881016740587e-06, "loss": 0.7555, "step": 10225 }, { "epoch": 0.3134117935515508, "grad_norm": 1.7840220417464825, "learning_rate": 8.037486795678042e-06, "loss": 0.7659, "step": 10226 }, { "epoch": 0.313442442074292, "grad_norm": 1.8518270333165352, "learning_rate": 8.037092544686364e-06, "loss": 0.7505, "step": 10227 }, { "epoch": 0.3134730905970332, "grad_norm": 1.716344784003412, "learning_rate": 8.036698263769434e-06, "loss": 0.6782, "step": 10228 }, { "epoch": 0.3135037391197744, "grad_norm": 1.6918442237026052, "learning_rate": 8.03630395293114e-06, "loss": 0.706, "step": 10229 }, { "epoch": 0.3135343876425156, "grad_norm": 1.648072183405197, "learning_rate": 8.035909612175366e-06, "loss": 0.6091, "step": 10230 }, { "epoch": 0.3135650361652568, "grad_norm": 1.7763922714045588, "learning_rate": 8.035515241505999e-06, "loss": 0.7361, "step": 10231 }, { "epoch": 0.31359568468799803, "grad_norm": 1.7951363810097214, "learning_rate": 8.035120840926922e-06, "loss": 0.7397, "step": 10232 }, { "epoch": 0.31362633321073924, "grad_norm": 1.583688258663894, "learning_rate": 8.034726410442024e-06, "loss": 0.6153, "step": 10233 }, { "epoch": 0.31365698173348044, "grad_norm": 1.7271782001319476, "learning_rate": 8.034331950055188e-06, "loss": 0.6491, "step": 10234 }, { "epoch": 0.31368763025622165, "grad_norm": 1.8049319782403765, "learning_rate": 8.033937459770306e-06, "loss": 0.6985, "step": 10235 }, { "epoch": 0.31371827877896286, "grad_norm": 1.7393159685426198, "learning_rate": 8.03354293959126e-06, "loss": 0.6293, "step": 10236 }, { "epoch": 0.31374892730170406, "grad_norm": 1.680276420393459, "learning_rate": 8.033148389521939e-06, "loss": 0.7893, "step": 10237 }, { "epoch": 0.31377957582444527, "grad_norm": 1.7490075828242515, "learning_rate": 8.032753809566232e-06, "loss": 0.6722, "step": 10238 }, { "epoch": 0.3138102243471865, "grad_norm": 1.8932299769952265, "learning_rate": 8.032359199728025e-06, "loss": 0.8089, "step": 10239 }, { "epoch": 0.3138408728699277, "grad_norm": 2.0978957714640196, "learning_rate": 8.031964560011207e-06, "loss": 0.6524, "step": 10240 }, { "epoch": 0.3138715213926689, "grad_norm": 1.6665676810907033, "learning_rate": 8.031569890419667e-06, "loss": 0.6609, "step": 10241 }, { "epoch": 0.3139021699154101, "grad_norm": 2.107507380615819, "learning_rate": 8.031175190957295e-06, "loss": 0.8009, "step": 10242 }, { "epoch": 0.3139328184381513, "grad_norm": 1.6106450759129987, "learning_rate": 8.030780461627975e-06, "loss": 0.6325, "step": 10243 }, { "epoch": 0.3139634669608925, "grad_norm": 1.501369590526032, "learning_rate": 8.0303857024356e-06, "loss": 0.6692, "step": 10244 }, { "epoch": 0.3139941154836337, "grad_norm": 1.6579552141662497, "learning_rate": 8.02999091338406e-06, "loss": 0.7428, "step": 10245 }, { "epoch": 0.3140247640063749, "grad_norm": 1.8542923925832295, "learning_rate": 8.029596094477246e-06, "loss": 0.721, "step": 10246 }, { "epoch": 0.3140554125291161, "grad_norm": 1.9637589932681412, "learning_rate": 8.029201245719046e-06, "loss": 0.7278, "step": 10247 }, { "epoch": 0.3140860610518573, "grad_norm": 1.8566052402267323, "learning_rate": 8.028806367113349e-06, "loss": 0.7003, "step": 10248 }, { "epoch": 0.31411670957459853, "grad_norm": 1.9072467322360744, "learning_rate": 8.028411458664047e-06, "loss": 0.7189, "step": 10249 }, { "epoch": 0.31414735809733974, "grad_norm": 2.0038107527631004, "learning_rate": 8.028016520375036e-06, "loss": 0.707, "step": 10250 }, { "epoch": 0.3141780066200809, "grad_norm": 1.042093513231399, "learning_rate": 8.0276215522502e-06, "loss": 0.482, "step": 10251 }, { "epoch": 0.3142086551428221, "grad_norm": 0.8673545953907261, "learning_rate": 8.027226554293435e-06, "loss": 0.4859, "step": 10252 }, { "epoch": 0.3142393036655633, "grad_norm": 1.6132067008062996, "learning_rate": 8.026831526508633e-06, "loss": 0.6826, "step": 10253 }, { "epoch": 0.3142699521883045, "grad_norm": 1.6567529732917545, "learning_rate": 8.026436468899686e-06, "loss": 0.6776, "step": 10254 }, { "epoch": 0.3143006007110457, "grad_norm": 1.5881416411951328, "learning_rate": 8.026041381470486e-06, "loss": 0.6852, "step": 10255 }, { "epoch": 0.3143312492337869, "grad_norm": 1.5820035370831755, "learning_rate": 8.025646264224924e-06, "loss": 0.5877, "step": 10256 }, { "epoch": 0.3143618977565281, "grad_norm": 0.9392477608929719, "learning_rate": 8.025251117166896e-06, "loss": 0.4672, "step": 10257 }, { "epoch": 0.3143925462792693, "grad_norm": 1.6908226821751586, "learning_rate": 8.024855940300298e-06, "loss": 0.7372, "step": 10258 }, { "epoch": 0.31442319480201053, "grad_norm": 1.7509340998450176, "learning_rate": 8.024460733629017e-06, "loss": 0.7125, "step": 10259 }, { "epoch": 0.31445384332475174, "grad_norm": 0.8424303633499397, "learning_rate": 8.024065497156951e-06, "loss": 0.4659, "step": 10260 }, { "epoch": 0.31448449184749294, "grad_norm": 1.735460357417008, "learning_rate": 8.023670230887995e-06, "loss": 0.6743, "step": 10261 }, { "epoch": 0.31451514037023415, "grad_norm": 1.9164321330789187, "learning_rate": 8.023274934826042e-06, "loss": 0.7622, "step": 10262 }, { "epoch": 0.31454578889297535, "grad_norm": 1.5879805020727678, "learning_rate": 8.022879608974988e-06, "loss": 0.6804, "step": 10263 }, { "epoch": 0.31457643741571656, "grad_norm": 0.8445760021786048, "learning_rate": 8.022484253338726e-06, "loss": 0.4784, "step": 10264 }, { "epoch": 0.31460708593845776, "grad_norm": 2.0188878944229827, "learning_rate": 8.022088867921157e-06, "loss": 0.6504, "step": 10265 }, { "epoch": 0.31463773446119897, "grad_norm": 1.6077070683938326, "learning_rate": 8.02169345272617e-06, "loss": 0.7552, "step": 10266 }, { "epoch": 0.3146683829839402, "grad_norm": 1.5747879025383342, "learning_rate": 8.021298007757663e-06, "loss": 0.6245, "step": 10267 }, { "epoch": 0.3146990315066814, "grad_norm": 1.5655018942918035, "learning_rate": 8.020902533019536e-06, "loss": 0.6679, "step": 10268 }, { "epoch": 0.3147296800294226, "grad_norm": 1.8110885762150248, "learning_rate": 8.020507028515684e-06, "loss": 0.8777, "step": 10269 }, { "epoch": 0.3147603285521638, "grad_norm": 1.8313327253917602, "learning_rate": 8.020111494250003e-06, "loss": 0.6926, "step": 10270 }, { "epoch": 0.314790977074905, "grad_norm": 1.735603932701433, "learning_rate": 8.019715930226389e-06, "loss": 0.747, "step": 10271 }, { "epoch": 0.3148216255976462, "grad_norm": 0.8812473739575681, "learning_rate": 8.019320336448743e-06, "loss": 0.4669, "step": 10272 }, { "epoch": 0.3148522741203874, "grad_norm": 1.6966035135027122, "learning_rate": 8.018924712920961e-06, "loss": 0.7476, "step": 10273 }, { "epoch": 0.3148829226431286, "grad_norm": 1.64806554518263, "learning_rate": 8.018529059646941e-06, "loss": 0.6197, "step": 10274 }, { "epoch": 0.3149135711658698, "grad_norm": 0.8736661296314143, "learning_rate": 8.018133376630582e-06, "loss": 0.4847, "step": 10275 }, { "epoch": 0.314944219688611, "grad_norm": 1.7269778890049055, "learning_rate": 8.017737663875782e-06, "loss": 0.7964, "step": 10276 }, { "epoch": 0.31497486821135223, "grad_norm": 1.7455598391709817, "learning_rate": 8.01734192138644e-06, "loss": 0.754, "step": 10277 }, { "epoch": 0.31500551673409344, "grad_norm": 1.4602705113955312, "learning_rate": 8.016946149166458e-06, "loss": 0.5298, "step": 10278 }, { "epoch": 0.31503616525683464, "grad_norm": 1.6005831076021697, "learning_rate": 8.016550347219734e-06, "loss": 0.6703, "step": 10279 }, { "epoch": 0.31506681377957585, "grad_norm": 1.79807250798061, "learning_rate": 8.016154515550165e-06, "loss": 0.6304, "step": 10280 }, { "epoch": 0.31509746230231706, "grad_norm": 1.510247594521405, "learning_rate": 8.015758654161657e-06, "loss": 0.6119, "step": 10281 }, { "epoch": 0.3151281108250582, "grad_norm": 1.6592276927898075, "learning_rate": 8.015362763058105e-06, "loss": 0.7123, "step": 10282 }, { "epoch": 0.3151587593477994, "grad_norm": 1.6983426852616887, "learning_rate": 8.014966842243414e-06, "loss": 0.6863, "step": 10283 }, { "epoch": 0.3151894078705406, "grad_norm": 1.7134905942092016, "learning_rate": 8.014570891721481e-06, "loss": 0.6706, "step": 10284 }, { "epoch": 0.3152200563932818, "grad_norm": 1.7181113823786547, "learning_rate": 8.014174911496213e-06, "loss": 0.732, "step": 10285 }, { "epoch": 0.31525070491602303, "grad_norm": 1.47853210144812, "learning_rate": 8.013778901571506e-06, "loss": 0.6747, "step": 10286 }, { "epoch": 0.31528135343876423, "grad_norm": 1.6848789542051483, "learning_rate": 8.013382861951264e-06, "loss": 0.6685, "step": 10287 }, { "epoch": 0.31531200196150544, "grad_norm": 1.7597538960720949, "learning_rate": 8.01298679263939e-06, "loss": 0.7102, "step": 10288 }, { "epoch": 0.31534265048424664, "grad_norm": 1.6389878587084445, "learning_rate": 8.012590693639786e-06, "loss": 0.654, "step": 10289 }, { "epoch": 0.31537329900698785, "grad_norm": 1.8146355310582671, "learning_rate": 8.012194564956357e-06, "loss": 0.7711, "step": 10290 }, { "epoch": 0.31540394752972906, "grad_norm": 1.7446195287490116, "learning_rate": 8.011798406593004e-06, "loss": 0.7819, "step": 10291 }, { "epoch": 0.31543459605247026, "grad_norm": 1.6249350817266666, "learning_rate": 8.011402218553628e-06, "loss": 0.7249, "step": 10292 }, { "epoch": 0.31546524457521147, "grad_norm": 1.5320020769424063, "learning_rate": 8.011006000842137e-06, "loss": 0.7798, "step": 10293 }, { "epoch": 0.3154958930979527, "grad_norm": 1.6308069610417335, "learning_rate": 8.010609753462433e-06, "loss": 0.6381, "step": 10294 }, { "epoch": 0.3155265416206939, "grad_norm": 1.0508510812441658, "learning_rate": 8.010213476418422e-06, "loss": 0.4771, "step": 10295 }, { "epoch": 0.3155571901434351, "grad_norm": 1.730042798963716, "learning_rate": 8.009817169714007e-06, "loss": 0.6367, "step": 10296 }, { "epoch": 0.3155878386661763, "grad_norm": 1.7125244233428862, "learning_rate": 8.009420833353094e-06, "loss": 0.7311, "step": 10297 }, { "epoch": 0.3156184871889175, "grad_norm": 1.6360262571299982, "learning_rate": 8.009024467339586e-06, "loss": 0.6911, "step": 10298 }, { "epoch": 0.3156491357116587, "grad_norm": 1.6948290811697913, "learning_rate": 8.00862807167739e-06, "loss": 0.6438, "step": 10299 }, { "epoch": 0.3156797842343999, "grad_norm": 0.8014656659391767, "learning_rate": 8.008231646370412e-06, "loss": 0.4861, "step": 10300 }, { "epoch": 0.3157104327571411, "grad_norm": 1.5639154784312095, "learning_rate": 8.007835191422559e-06, "loss": 0.6675, "step": 10301 }, { "epoch": 0.3157410812798823, "grad_norm": 1.561464738497131, "learning_rate": 8.007438706837735e-06, "loss": 0.7194, "step": 10302 }, { "epoch": 0.3157717298026235, "grad_norm": 1.6722182972821509, "learning_rate": 8.007042192619849e-06, "loss": 0.6881, "step": 10303 }, { "epoch": 0.31580237832536473, "grad_norm": 1.7394958405961016, "learning_rate": 8.006645648772806e-06, "loss": 0.7469, "step": 10304 }, { "epoch": 0.31583302684810594, "grad_norm": 1.6549701626955817, "learning_rate": 8.006249075300515e-06, "loss": 0.631, "step": 10305 }, { "epoch": 0.31586367537084714, "grad_norm": 1.8633868254520898, "learning_rate": 8.005852472206883e-06, "loss": 0.6612, "step": 10306 }, { "epoch": 0.31589432389358835, "grad_norm": 1.756358273196184, "learning_rate": 8.005455839495816e-06, "loss": 0.6046, "step": 10307 }, { "epoch": 0.31592497241632955, "grad_norm": 1.6089866986164802, "learning_rate": 8.005059177171225e-06, "loss": 0.6343, "step": 10308 }, { "epoch": 0.31595562093907076, "grad_norm": 1.7625762971469547, "learning_rate": 8.004662485237016e-06, "loss": 0.6196, "step": 10309 }, { "epoch": 0.31598626946181196, "grad_norm": 1.6442661043442701, "learning_rate": 8.004265763697099e-06, "loss": 0.7069, "step": 10310 }, { "epoch": 0.31601691798455317, "grad_norm": 1.9072602202187123, "learning_rate": 8.003869012555383e-06, "loss": 0.6883, "step": 10311 }, { "epoch": 0.3160475665072944, "grad_norm": 1.7303012472711905, "learning_rate": 8.00347223181578e-06, "loss": 0.6936, "step": 10312 }, { "epoch": 0.3160782150300355, "grad_norm": 0.8910514223137476, "learning_rate": 8.003075421482191e-06, "loss": 0.4617, "step": 10313 }, { "epoch": 0.31610886355277673, "grad_norm": 1.8865855124473627, "learning_rate": 8.002678581558534e-06, "loss": 0.7311, "step": 10314 }, { "epoch": 0.31613951207551794, "grad_norm": 1.539098422577212, "learning_rate": 8.002281712048717e-06, "loss": 0.705, "step": 10315 }, { "epoch": 0.31617016059825914, "grad_norm": 1.637688070155546, "learning_rate": 8.00188481295665e-06, "loss": 0.6644, "step": 10316 }, { "epoch": 0.31620080912100035, "grad_norm": 1.9839459488532587, "learning_rate": 8.001487884286245e-06, "loss": 0.6829, "step": 10317 }, { "epoch": 0.31623145764374155, "grad_norm": 1.5454168070043421, "learning_rate": 8.00109092604141e-06, "loss": 0.6127, "step": 10318 }, { "epoch": 0.31626210616648276, "grad_norm": 1.667398142926777, "learning_rate": 8.00069393822606e-06, "loss": 0.6702, "step": 10319 }, { "epoch": 0.31629275468922396, "grad_norm": 0.7665621436141825, "learning_rate": 8.000296920844102e-06, "loss": 0.4674, "step": 10320 }, { "epoch": 0.31632340321196517, "grad_norm": 1.9264790254857893, "learning_rate": 7.999899873899453e-06, "loss": 0.7151, "step": 10321 }, { "epoch": 0.3163540517347064, "grad_norm": 1.7644245232310627, "learning_rate": 7.999502797396024e-06, "loss": 0.7842, "step": 10322 }, { "epoch": 0.3163847002574476, "grad_norm": 1.4552787418939546, "learning_rate": 7.999105691337725e-06, "loss": 0.6255, "step": 10323 }, { "epoch": 0.3164153487801888, "grad_norm": 1.766365582477874, "learning_rate": 7.99870855572847e-06, "loss": 0.7161, "step": 10324 }, { "epoch": 0.31644599730293, "grad_norm": 1.9857084020219777, "learning_rate": 7.998311390572173e-06, "loss": 0.8855, "step": 10325 }, { "epoch": 0.3164766458256712, "grad_norm": 1.9678292031595717, "learning_rate": 7.997914195872746e-06, "loss": 0.7714, "step": 10326 }, { "epoch": 0.3165072943484124, "grad_norm": 1.4408761625639688, "learning_rate": 7.997516971634106e-06, "loss": 0.7052, "step": 10327 }, { "epoch": 0.3165379428711536, "grad_norm": 1.9065130462253064, "learning_rate": 7.99711971786016e-06, "loss": 0.8689, "step": 10328 }, { "epoch": 0.3165685913938948, "grad_norm": 1.6770637179203127, "learning_rate": 7.996722434554828e-06, "loss": 0.6687, "step": 10329 }, { "epoch": 0.316599239916636, "grad_norm": 1.572683229975785, "learning_rate": 7.996325121722024e-06, "loss": 0.611, "step": 10330 }, { "epoch": 0.3166298884393772, "grad_norm": 0.81494970079232, "learning_rate": 7.995927779365662e-06, "loss": 0.4738, "step": 10331 }, { "epoch": 0.31666053696211843, "grad_norm": 1.607302767349199, "learning_rate": 7.995530407489659e-06, "loss": 0.6142, "step": 10332 }, { "epoch": 0.31669118548485964, "grad_norm": 1.8122017921855118, "learning_rate": 7.995133006097923e-06, "loss": 0.7134, "step": 10333 }, { "epoch": 0.31672183400760084, "grad_norm": 1.97824285069799, "learning_rate": 7.99473557519438e-06, "loss": 0.6665, "step": 10334 }, { "epoch": 0.31675248253034205, "grad_norm": 0.8001512384294002, "learning_rate": 7.99433811478294e-06, "loss": 0.4705, "step": 10335 }, { "epoch": 0.31678313105308326, "grad_norm": 1.7195376330147776, "learning_rate": 7.99394062486752e-06, "loss": 0.7274, "step": 10336 }, { "epoch": 0.31681377957582446, "grad_norm": 1.7176875508879557, "learning_rate": 7.993543105452036e-06, "loss": 0.6528, "step": 10337 }, { "epoch": 0.31684442809856567, "grad_norm": 1.8758644278518726, "learning_rate": 7.993145556540407e-06, "loss": 0.639, "step": 10338 }, { "epoch": 0.3168750766213069, "grad_norm": 0.7816363012357664, "learning_rate": 7.99274797813655e-06, "loss": 0.4678, "step": 10339 }, { "epoch": 0.3169057251440481, "grad_norm": 1.7688204912567471, "learning_rate": 7.99235037024438e-06, "loss": 0.7073, "step": 10340 }, { "epoch": 0.3169363736667893, "grad_norm": 2.025790571356503, "learning_rate": 7.991952732867817e-06, "loss": 0.7395, "step": 10341 }, { "epoch": 0.3169670221895305, "grad_norm": 1.740908302002349, "learning_rate": 7.991555066010777e-06, "loss": 0.6905, "step": 10342 }, { "epoch": 0.3169976707122717, "grad_norm": 1.5802691899219619, "learning_rate": 7.99115736967718e-06, "loss": 0.5938, "step": 10343 }, { "epoch": 0.31702831923501285, "grad_norm": 1.553541142680813, "learning_rate": 7.990759643870944e-06, "loss": 0.6133, "step": 10344 }, { "epoch": 0.31705896775775405, "grad_norm": 0.7929985080783944, "learning_rate": 7.990361888595987e-06, "loss": 0.4685, "step": 10345 }, { "epoch": 0.31708961628049526, "grad_norm": 1.6249206911947536, "learning_rate": 7.989964103856232e-06, "loss": 0.6286, "step": 10346 }, { "epoch": 0.31712026480323646, "grad_norm": 2.1182595416196253, "learning_rate": 7.989566289655596e-06, "loss": 0.6905, "step": 10347 }, { "epoch": 0.31715091332597767, "grad_norm": 1.7209542167066818, "learning_rate": 7.989168445997994e-06, "loss": 0.7435, "step": 10348 }, { "epoch": 0.3171815618487189, "grad_norm": 1.7449322349102387, "learning_rate": 7.988770572887353e-06, "loss": 0.6238, "step": 10349 }, { "epoch": 0.3172122103714601, "grad_norm": 1.6817694327653443, "learning_rate": 7.988372670327591e-06, "loss": 0.6454, "step": 10350 }, { "epoch": 0.3172428588942013, "grad_norm": 1.75060870987441, "learning_rate": 7.987974738322629e-06, "loss": 0.6471, "step": 10351 }, { "epoch": 0.3172735074169425, "grad_norm": 1.8727430674448413, "learning_rate": 7.987576776876387e-06, "loss": 0.6687, "step": 10352 }, { "epoch": 0.3173041559396837, "grad_norm": 1.692444332973689, "learning_rate": 7.987178785992787e-06, "loss": 0.7389, "step": 10353 }, { "epoch": 0.3173348044624249, "grad_norm": 1.4498438199781447, "learning_rate": 7.98678076567575e-06, "loss": 0.6406, "step": 10354 }, { "epoch": 0.3173654529851661, "grad_norm": 2.096257773688264, "learning_rate": 7.986382715929196e-06, "loss": 0.853, "step": 10355 }, { "epoch": 0.3173961015079073, "grad_norm": 1.9023827965113733, "learning_rate": 7.985984636757051e-06, "loss": 0.7445, "step": 10356 }, { "epoch": 0.3174267500306485, "grad_norm": 1.657472324493234, "learning_rate": 7.985586528163234e-06, "loss": 0.7044, "step": 10357 }, { "epoch": 0.3174573985533897, "grad_norm": 1.909352852322529, "learning_rate": 7.98518839015167e-06, "loss": 0.8324, "step": 10358 }, { "epoch": 0.31748804707613093, "grad_norm": 1.750786227838019, "learning_rate": 7.984790222726281e-06, "loss": 0.6952, "step": 10359 }, { "epoch": 0.31751869559887214, "grad_norm": 1.9165948937360995, "learning_rate": 7.984392025890991e-06, "loss": 0.7708, "step": 10360 }, { "epoch": 0.31754934412161334, "grad_norm": 1.7652190966018344, "learning_rate": 7.98399379964972e-06, "loss": 0.6464, "step": 10361 }, { "epoch": 0.31757999264435455, "grad_norm": 2.0289464173391956, "learning_rate": 7.983595544006398e-06, "loss": 0.7719, "step": 10362 }, { "epoch": 0.31761064116709575, "grad_norm": 1.5649319923731555, "learning_rate": 7.983197258964943e-06, "loss": 0.6431, "step": 10363 }, { "epoch": 0.31764128968983696, "grad_norm": 2.0059934260132377, "learning_rate": 7.982798944529284e-06, "loss": 0.7133, "step": 10364 }, { "epoch": 0.31767193821257816, "grad_norm": 1.7558964756350466, "learning_rate": 7.982400600703344e-06, "loss": 0.6681, "step": 10365 }, { "epoch": 0.31770258673531937, "grad_norm": 1.5741926718857056, "learning_rate": 7.982002227491045e-06, "loss": 0.6734, "step": 10366 }, { "epoch": 0.3177332352580606, "grad_norm": 1.675904145486813, "learning_rate": 7.981603824896319e-06, "loss": 0.6122, "step": 10367 }, { "epoch": 0.3177638837808018, "grad_norm": 1.4466394031173586, "learning_rate": 7.981205392923085e-06, "loss": 0.6418, "step": 10368 }, { "epoch": 0.317794532303543, "grad_norm": 1.5976783615349264, "learning_rate": 7.980806931575273e-06, "loss": 0.6105, "step": 10369 }, { "epoch": 0.3178251808262842, "grad_norm": 1.6816665402857662, "learning_rate": 7.980408440856806e-06, "loss": 0.6661, "step": 10370 }, { "epoch": 0.3178558293490254, "grad_norm": 1.930723146114888, "learning_rate": 7.980009920771613e-06, "loss": 0.7152, "step": 10371 }, { "epoch": 0.3178864778717666, "grad_norm": 1.7329764634926952, "learning_rate": 7.979611371323619e-06, "loss": 0.6796, "step": 10372 }, { "epoch": 0.3179171263945078, "grad_norm": 1.5055272027045123, "learning_rate": 7.979212792516752e-06, "loss": 0.6399, "step": 10373 }, { "epoch": 0.317947774917249, "grad_norm": 1.8854174790485891, "learning_rate": 7.978814184354941e-06, "loss": 0.7121, "step": 10374 }, { "epoch": 0.31797842343999017, "grad_norm": 1.6088869098782284, "learning_rate": 7.978415546842108e-06, "loss": 0.5532, "step": 10375 }, { "epoch": 0.31800907196273137, "grad_norm": 1.7110140775350307, "learning_rate": 7.978016879982188e-06, "loss": 0.7279, "step": 10376 }, { "epoch": 0.3180397204854726, "grad_norm": 0.8391998503563887, "learning_rate": 7.977618183779106e-06, "loss": 0.4565, "step": 10377 }, { "epoch": 0.3180703690082138, "grad_norm": 1.7287076774564438, "learning_rate": 7.977219458236787e-06, "loss": 0.7426, "step": 10378 }, { "epoch": 0.318101017530955, "grad_norm": 1.9641506192746319, "learning_rate": 7.976820703359166e-06, "loss": 0.6705, "step": 10379 }, { "epoch": 0.3181316660536962, "grad_norm": 1.765195560006129, "learning_rate": 7.976421919150165e-06, "loss": 0.6855, "step": 10380 }, { "epoch": 0.3181623145764374, "grad_norm": 1.6888013702103983, "learning_rate": 7.976023105613722e-06, "loss": 0.6426, "step": 10381 }, { "epoch": 0.3181929630991786, "grad_norm": 1.8830420941802792, "learning_rate": 7.975624262753758e-06, "loss": 0.74, "step": 10382 }, { "epoch": 0.3182236116219198, "grad_norm": 1.6579071500371927, "learning_rate": 7.97522539057421e-06, "loss": 0.6411, "step": 10383 }, { "epoch": 0.318254260144661, "grad_norm": 1.7105203458933271, "learning_rate": 7.974826489079002e-06, "loss": 0.6449, "step": 10384 }, { "epoch": 0.3182849086674022, "grad_norm": 1.9548918632183006, "learning_rate": 7.97442755827207e-06, "loss": 0.819, "step": 10385 }, { "epoch": 0.31831555719014343, "grad_norm": 1.722573251270712, "learning_rate": 7.97402859815734e-06, "loss": 0.7158, "step": 10386 }, { "epoch": 0.31834620571288463, "grad_norm": 1.6584745224505726, "learning_rate": 7.973629608738746e-06, "loss": 0.679, "step": 10387 }, { "epoch": 0.31837685423562584, "grad_norm": 1.6925868637668038, "learning_rate": 7.97323059002022e-06, "loss": 0.6234, "step": 10388 }, { "epoch": 0.31840750275836704, "grad_norm": 1.8014290448725159, "learning_rate": 7.972831542005692e-06, "loss": 0.7222, "step": 10389 }, { "epoch": 0.31843815128110825, "grad_norm": 1.7600257607911083, "learning_rate": 7.972432464699093e-06, "loss": 0.734, "step": 10390 }, { "epoch": 0.31846879980384946, "grad_norm": 1.878985378914163, "learning_rate": 7.972033358104355e-06, "loss": 0.7146, "step": 10391 }, { "epoch": 0.31849944832659066, "grad_norm": 1.9053769143398445, "learning_rate": 7.971634222225416e-06, "loss": 0.6592, "step": 10392 }, { "epoch": 0.31853009684933187, "grad_norm": 1.7099177761531075, "learning_rate": 7.971235057066202e-06, "loss": 0.7441, "step": 10393 }, { "epoch": 0.3185607453720731, "grad_norm": 1.781893970713319, "learning_rate": 7.97083586263065e-06, "loss": 0.6674, "step": 10394 }, { "epoch": 0.3185913938948143, "grad_norm": 0.9161831925100504, "learning_rate": 7.970436638922691e-06, "loss": 0.4768, "step": 10395 }, { "epoch": 0.3186220424175555, "grad_norm": 0.7942368030167392, "learning_rate": 7.97003738594626e-06, "loss": 0.4704, "step": 10396 }, { "epoch": 0.3186526909402967, "grad_norm": 1.827646912577226, "learning_rate": 7.969638103705291e-06, "loss": 0.7191, "step": 10397 }, { "epoch": 0.3186833394630379, "grad_norm": 1.8174170275143124, "learning_rate": 7.969238792203719e-06, "loss": 0.7361, "step": 10398 }, { "epoch": 0.3187139879857791, "grad_norm": 1.8793262639014814, "learning_rate": 7.968839451445477e-06, "loss": 0.627, "step": 10399 }, { "epoch": 0.3187446365085203, "grad_norm": 1.474761778649149, "learning_rate": 7.968440081434499e-06, "loss": 0.6959, "step": 10400 }, { "epoch": 0.3187752850312615, "grad_norm": 1.8172577275389667, "learning_rate": 7.96804068217472e-06, "loss": 0.747, "step": 10401 }, { "epoch": 0.3188059335540027, "grad_norm": 1.803565131551222, "learning_rate": 7.96764125367008e-06, "loss": 0.6914, "step": 10402 }, { "epoch": 0.3188365820767439, "grad_norm": 1.6932729329997813, "learning_rate": 7.96724179592451e-06, "loss": 0.664, "step": 10403 }, { "epoch": 0.31886723059948513, "grad_norm": 1.8786827736491372, "learning_rate": 7.966842308941948e-06, "loss": 0.8192, "step": 10404 }, { "epoch": 0.31889787912222634, "grad_norm": 1.7781214779348806, "learning_rate": 7.966442792726328e-06, "loss": 0.6448, "step": 10405 }, { "epoch": 0.3189285276449675, "grad_norm": 1.2127451663107114, "learning_rate": 7.96604324728159e-06, "loss": 0.5116, "step": 10406 }, { "epoch": 0.3189591761677087, "grad_norm": 1.68303276702153, "learning_rate": 7.965643672611667e-06, "loss": 0.7017, "step": 10407 }, { "epoch": 0.3189898246904499, "grad_norm": 1.9749546631764883, "learning_rate": 7.965244068720501e-06, "loss": 0.6021, "step": 10408 }, { "epoch": 0.3190204732131911, "grad_norm": 1.6507062546492564, "learning_rate": 7.964844435612025e-06, "loss": 0.7056, "step": 10409 }, { "epoch": 0.3190511217359323, "grad_norm": 0.7824406090297301, "learning_rate": 7.964444773290177e-06, "loss": 0.4779, "step": 10410 }, { "epoch": 0.3190817702586735, "grad_norm": 1.561146153268163, "learning_rate": 7.964045081758898e-06, "loss": 0.7149, "step": 10411 }, { "epoch": 0.3191124187814147, "grad_norm": 1.9967931500797373, "learning_rate": 7.963645361022123e-06, "loss": 0.6884, "step": 10412 }, { "epoch": 0.3191430673041559, "grad_norm": 1.135794423036662, "learning_rate": 7.963245611083792e-06, "loss": 0.4695, "step": 10413 }, { "epoch": 0.31917371582689713, "grad_norm": 1.751291872624916, "learning_rate": 7.962845831947845e-06, "loss": 0.6163, "step": 10414 }, { "epoch": 0.31920436434963834, "grad_norm": 1.711653298716096, "learning_rate": 7.96244602361822e-06, "loss": 0.6899, "step": 10415 }, { "epoch": 0.31923501287237954, "grad_norm": 1.4445819207656085, "learning_rate": 7.962046186098854e-06, "loss": 0.6751, "step": 10416 }, { "epoch": 0.31926566139512075, "grad_norm": 0.9122120724231345, "learning_rate": 7.961646319393693e-06, "loss": 0.5009, "step": 10417 }, { "epoch": 0.31929630991786195, "grad_norm": 1.3731632254610044, "learning_rate": 7.96124642350667e-06, "loss": 0.5287, "step": 10418 }, { "epoch": 0.31932695844060316, "grad_norm": 1.754979070671725, "learning_rate": 7.96084649844173e-06, "loss": 0.6988, "step": 10419 }, { "epoch": 0.31935760696334436, "grad_norm": 1.8205965572783558, "learning_rate": 7.96044654420281e-06, "loss": 0.6618, "step": 10420 }, { "epoch": 0.31938825548608557, "grad_norm": 1.5271784237967054, "learning_rate": 7.960046560793854e-06, "loss": 0.6383, "step": 10421 }, { "epoch": 0.3194189040088268, "grad_norm": 1.8245199608531386, "learning_rate": 7.959646548218802e-06, "loss": 0.7767, "step": 10422 }, { "epoch": 0.319449552531568, "grad_norm": 0.8066625697798168, "learning_rate": 7.959246506481595e-06, "loss": 0.4471, "step": 10423 }, { "epoch": 0.3194802010543092, "grad_norm": 1.6064201366596758, "learning_rate": 7.958846435586175e-06, "loss": 0.6442, "step": 10424 }, { "epoch": 0.3195108495770504, "grad_norm": 1.7424639265225494, "learning_rate": 7.958446335536484e-06, "loss": 0.6484, "step": 10425 }, { "epoch": 0.3195414980997916, "grad_norm": 1.6954125362694379, "learning_rate": 7.958046206336463e-06, "loss": 0.6696, "step": 10426 }, { "epoch": 0.3195721466225328, "grad_norm": 1.6588842799903891, "learning_rate": 7.957646047990058e-06, "loss": 0.5764, "step": 10427 }, { "epoch": 0.319602795145274, "grad_norm": 1.6910499390934304, "learning_rate": 7.957245860501209e-06, "loss": 0.6172, "step": 10428 }, { "epoch": 0.3196334436680152, "grad_norm": 1.4456175166223815, "learning_rate": 7.956845643873861e-06, "loss": 0.5768, "step": 10429 }, { "epoch": 0.3196640921907564, "grad_norm": 1.7324244921428842, "learning_rate": 7.956445398111954e-06, "loss": 0.7121, "step": 10430 }, { "epoch": 0.3196947407134976, "grad_norm": 1.7847271718626945, "learning_rate": 7.956045123219436e-06, "loss": 0.7932, "step": 10431 }, { "epoch": 0.31972538923623883, "grad_norm": 1.8740091320782772, "learning_rate": 7.955644819200248e-06, "loss": 0.6637, "step": 10432 }, { "epoch": 0.31975603775898004, "grad_norm": 1.795119777094036, "learning_rate": 7.955244486058335e-06, "loss": 0.6932, "step": 10433 }, { "epoch": 0.31978668628172124, "grad_norm": 1.719661462067658, "learning_rate": 7.954844123797642e-06, "loss": 0.6948, "step": 10434 }, { "epoch": 0.31981733480446245, "grad_norm": 0.8175548277538084, "learning_rate": 7.954443732422116e-06, "loss": 0.4706, "step": 10435 }, { "epoch": 0.31984798332720366, "grad_norm": 0.8349422622404666, "learning_rate": 7.954043311935697e-06, "loss": 0.4571, "step": 10436 }, { "epoch": 0.3198786318499448, "grad_norm": 1.4393255721846918, "learning_rate": 7.953642862342335e-06, "loss": 0.6212, "step": 10437 }, { "epoch": 0.319909280372686, "grad_norm": 1.7694804254106176, "learning_rate": 7.953242383645974e-06, "loss": 0.7501, "step": 10438 }, { "epoch": 0.3199399288954272, "grad_norm": 1.6158835581344888, "learning_rate": 7.952841875850562e-06, "loss": 0.6154, "step": 10439 }, { "epoch": 0.3199705774181684, "grad_norm": 1.7643840187882889, "learning_rate": 7.95244133896004e-06, "loss": 0.5728, "step": 10440 }, { "epoch": 0.32000122594090963, "grad_norm": 1.593839548164367, "learning_rate": 7.95204077297836e-06, "loss": 0.6644, "step": 10441 }, { "epoch": 0.32003187446365083, "grad_norm": 1.54400683494199, "learning_rate": 7.951640177909467e-06, "loss": 0.679, "step": 10442 }, { "epoch": 0.32006252298639204, "grad_norm": 1.941046471023423, "learning_rate": 7.951239553757308e-06, "loss": 0.8019, "step": 10443 }, { "epoch": 0.32009317150913325, "grad_norm": 1.7680740137896622, "learning_rate": 7.95083890052583e-06, "loss": 0.6884, "step": 10444 }, { "epoch": 0.32012382003187445, "grad_norm": 1.666649551709474, "learning_rate": 7.95043821821898e-06, "loss": 0.6344, "step": 10445 }, { "epoch": 0.32015446855461566, "grad_norm": 1.6409508944024496, "learning_rate": 7.95003750684071e-06, "loss": 0.6983, "step": 10446 }, { "epoch": 0.32018511707735686, "grad_norm": 1.7117980785479925, "learning_rate": 7.949636766394966e-06, "loss": 0.6487, "step": 10447 }, { "epoch": 0.32021576560009807, "grad_norm": 1.5620909755144619, "learning_rate": 7.949235996885694e-06, "loss": 0.6003, "step": 10448 }, { "epoch": 0.3202464141228393, "grad_norm": 1.787211488857671, "learning_rate": 7.948835198316845e-06, "loss": 0.797, "step": 10449 }, { "epoch": 0.3202770626455805, "grad_norm": 1.7952876166537886, "learning_rate": 7.94843437069237e-06, "loss": 0.6032, "step": 10450 }, { "epoch": 0.3203077111683217, "grad_norm": 1.6632493005720685, "learning_rate": 7.948033514016216e-06, "loss": 0.5798, "step": 10451 }, { "epoch": 0.3203383596910629, "grad_norm": 1.026938145081259, "learning_rate": 7.947632628292334e-06, "loss": 0.4639, "step": 10452 }, { "epoch": 0.3203690082138041, "grad_norm": 1.7821616464574574, "learning_rate": 7.947231713524672e-06, "loss": 0.6943, "step": 10453 }, { "epoch": 0.3203996567365453, "grad_norm": 0.9005889266741222, "learning_rate": 7.946830769717184e-06, "loss": 0.4592, "step": 10454 }, { "epoch": 0.3204303052592865, "grad_norm": 1.7250096921127456, "learning_rate": 7.946429796873816e-06, "loss": 0.675, "step": 10455 }, { "epoch": 0.3204609537820277, "grad_norm": 1.8028588501687197, "learning_rate": 7.946028794998524e-06, "loss": 0.7631, "step": 10456 }, { "epoch": 0.3204916023047689, "grad_norm": 0.7866349106403938, "learning_rate": 7.945627764095253e-06, "loss": 0.4687, "step": 10457 }, { "epoch": 0.3205222508275101, "grad_norm": 1.7601502557776143, "learning_rate": 7.945226704167963e-06, "loss": 0.6039, "step": 10458 }, { "epoch": 0.32055289935025133, "grad_norm": 1.6334435213453402, "learning_rate": 7.944825615220598e-06, "loss": 0.7146, "step": 10459 }, { "epoch": 0.32058354787299254, "grad_norm": 1.7611949269219629, "learning_rate": 7.944424497257111e-06, "loss": 0.7898, "step": 10460 }, { "epoch": 0.32061419639573374, "grad_norm": 1.5553684513858248, "learning_rate": 7.944023350281458e-06, "loss": 0.6996, "step": 10461 }, { "epoch": 0.32064484491847495, "grad_norm": 1.7987206077536222, "learning_rate": 7.94362217429759e-06, "loss": 0.6734, "step": 10462 }, { "epoch": 0.32067549344121615, "grad_norm": 0.8650215572018628, "learning_rate": 7.943220969309458e-06, "loss": 0.4514, "step": 10463 }, { "epoch": 0.32070614196395736, "grad_norm": 0.848173961624055, "learning_rate": 7.94281973532102e-06, "loss": 0.4947, "step": 10464 }, { "epoch": 0.32073679048669856, "grad_norm": 1.7450820924788808, "learning_rate": 7.942418472336222e-06, "loss": 0.7364, "step": 10465 }, { "epoch": 0.32076743900943977, "grad_norm": 0.9105188676685093, "learning_rate": 7.942017180359025e-06, "loss": 0.4791, "step": 10466 }, { "epoch": 0.320798087532181, "grad_norm": 1.7773478012395583, "learning_rate": 7.941615859393379e-06, "loss": 0.6821, "step": 10467 }, { "epoch": 0.3208287360549221, "grad_norm": 1.7502555381139877, "learning_rate": 7.941214509443237e-06, "loss": 0.6981, "step": 10468 }, { "epoch": 0.32085938457766333, "grad_norm": 1.6827912964176033, "learning_rate": 7.940813130512559e-06, "loss": 0.6108, "step": 10469 }, { "epoch": 0.32089003310040454, "grad_norm": 1.5981152773160454, "learning_rate": 7.940411722605296e-06, "loss": 0.6786, "step": 10470 }, { "epoch": 0.32092068162314574, "grad_norm": 0.8797948268688601, "learning_rate": 7.940010285725403e-06, "loss": 0.4667, "step": 10471 }, { "epoch": 0.32095133014588695, "grad_norm": 1.8470291516579829, "learning_rate": 7.939608819876837e-06, "loss": 0.7585, "step": 10472 }, { "epoch": 0.32098197866862815, "grad_norm": 1.4662966285475036, "learning_rate": 7.939207325063553e-06, "loss": 0.6563, "step": 10473 }, { "epoch": 0.32101262719136936, "grad_norm": 1.6115970168866203, "learning_rate": 7.938805801289509e-06, "loss": 0.7633, "step": 10474 }, { "epoch": 0.32104327571411057, "grad_norm": 0.7989244000618565, "learning_rate": 7.938404248558658e-06, "loss": 0.4661, "step": 10475 }, { "epoch": 0.32107392423685177, "grad_norm": 0.8260250651829746, "learning_rate": 7.938002666874958e-06, "loss": 0.4709, "step": 10476 }, { "epoch": 0.321104572759593, "grad_norm": 1.5952579951055204, "learning_rate": 7.937601056242365e-06, "loss": 0.7048, "step": 10477 }, { "epoch": 0.3211352212823342, "grad_norm": 1.6258268183171436, "learning_rate": 7.937199416664839e-06, "loss": 0.6506, "step": 10478 }, { "epoch": 0.3211658698050754, "grad_norm": 1.727451716890594, "learning_rate": 7.936797748146335e-06, "loss": 0.6718, "step": 10479 }, { "epoch": 0.3211965183278166, "grad_norm": 1.6766948341030603, "learning_rate": 7.936396050690812e-06, "loss": 0.6849, "step": 10480 }, { "epoch": 0.3212271668505578, "grad_norm": 0.8887958110342795, "learning_rate": 7.935994324302226e-06, "loss": 0.4789, "step": 10481 }, { "epoch": 0.321257815373299, "grad_norm": 1.7376707976310524, "learning_rate": 7.935592568984537e-06, "loss": 0.7159, "step": 10482 }, { "epoch": 0.3212884638960402, "grad_norm": 1.6946121549144273, "learning_rate": 7.935190784741705e-06, "loss": 0.7076, "step": 10483 }, { "epoch": 0.3213191124187814, "grad_norm": 1.7742394562445083, "learning_rate": 7.934788971577685e-06, "loss": 0.7015, "step": 10484 }, { "epoch": 0.3213497609415226, "grad_norm": 0.8337629783947982, "learning_rate": 7.93438712949644e-06, "loss": 0.4767, "step": 10485 }, { "epoch": 0.3213804094642638, "grad_norm": 0.8247869377231828, "learning_rate": 7.933985258501926e-06, "loss": 0.4616, "step": 10486 }, { "epoch": 0.32141105798700503, "grad_norm": 1.7963405319436685, "learning_rate": 7.933583358598107e-06, "loss": 0.6975, "step": 10487 }, { "epoch": 0.32144170650974624, "grad_norm": 1.5904218113917061, "learning_rate": 7.933181429788937e-06, "loss": 0.6907, "step": 10488 }, { "epoch": 0.32147235503248744, "grad_norm": 1.7406780893236748, "learning_rate": 7.932779472078384e-06, "loss": 0.6727, "step": 10489 }, { "epoch": 0.32150300355522865, "grad_norm": 1.6567138657907632, "learning_rate": 7.932377485470402e-06, "loss": 0.6878, "step": 10490 }, { "epoch": 0.32153365207796986, "grad_norm": 0.8683767019652492, "learning_rate": 7.931975469968956e-06, "loss": 0.4506, "step": 10491 }, { "epoch": 0.32156430060071106, "grad_norm": 0.8860508621074091, "learning_rate": 7.931573425578003e-06, "loss": 0.4554, "step": 10492 }, { "epoch": 0.32159494912345227, "grad_norm": 1.8133571211166355, "learning_rate": 7.93117135230151e-06, "loss": 0.766, "step": 10493 }, { "epoch": 0.3216255976461935, "grad_norm": 1.7324134457977995, "learning_rate": 7.930769250143433e-06, "loss": 0.6622, "step": 10494 }, { "epoch": 0.3216562461689347, "grad_norm": 1.5062612784553129, "learning_rate": 7.930367119107738e-06, "loss": 0.6444, "step": 10495 }, { "epoch": 0.3216868946916759, "grad_norm": 1.7132534832828912, "learning_rate": 7.929964959198387e-06, "loss": 0.7808, "step": 10496 }, { "epoch": 0.3217175432144171, "grad_norm": 1.805755645974166, "learning_rate": 7.92956277041934e-06, "loss": 0.6743, "step": 10497 }, { "epoch": 0.3217481917371583, "grad_norm": 1.5740298405162674, "learning_rate": 7.929160552774561e-06, "loss": 0.6462, "step": 10498 }, { "epoch": 0.32177884025989945, "grad_norm": 1.8249993500649047, "learning_rate": 7.928758306268014e-06, "loss": 0.7186, "step": 10499 }, { "epoch": 0.32180948878264065, "grad_norm": 1.7200453937163573, "learning_rate": 7.928356030903663e-06, "loss": 0.698, "step": 10500 }, { "epoch": 0.32184013730538186, "grad_norm": 1.3760417442421453, "learning_rate": 7.927953726685472e-06, "loss": 0.6902, "step": 10501 }, { "epoch": 0.32187078582812306, "grad_norm": 0.9783151436255428, "learning_rate": 7.927551393617401e-06, "loss": 0.4775, "step": 10502 }, { "epoch": 0.32190143435086427, "grad_norm": 1.7538152330051662, "learning_rate": 7.927149031703418e-06, "loss": 0.7227, "step": 10503 }, { "epoch": 0.3219320828736055, "grad_norm": 1.694229637707536, "learning_rate": 7.926746640947487e-06, "loss": 0.6565, "step": 10504 }, { "epoch": 0.3219627313963467, "grad_norm": 1.8614976261582172, "learning_rate": 7.926344221353573e-06, "loss": 0.7014, "step": 10505 }, { "epoch": 0.3219933799190879, "grad_norm": 1.6362416116943972, "learning_rate": 7.925941772925639e-06, "loss": 0.7248, "step": 10506 }, { "epoch": 0.3220240284418291, "grad_norm": 1.8227443239608982, "learning_rate": 7.925539295667654e-06, "loss": 0.7745, "step": 10507 }, { "epoch": 0.3220546769645703, "grad_norm": 1.6321817253275885, "learning_rate": 7.925136789583581e-06, "loss": 0.6742, "step": 10508 }, { "epoch": 0.3220853254873115, "grad_norm": 1.7745182678480078, "learning_rate": 7.924734254677386e-06, "loss": 0.7346, "step": 10509 }, { "epoch": 0.3221159740100527, "grad_norm": 1.545140957515503, "learning_rate": 7.924331690953038e-06, "loss": 0.6796, "step": 10510 }, { "epoch": 0.3221466225327939, "grad_norm": 1.6154815211073663, "learning_rate": 7.9239290984145e-06, "loss": 0.6127, "step": 10511 }, { "epoch": 0.3221772710555351, "grad_norm": 1.9328287621204403, "learning_rate": 7.92352647706574e-06, "loss": 0.6867, "step": 10512 }, { "epoch": 0.3222079195782763, "grad_norm": 1.0095214957852596, "learning_rate": 7.923123826910726e-06, "loss": 0.4168, "step": 10513 }, { "epoch": 0.32223856810101753, "grad_norm": 1.9327600236766491, "learning_rate": 7.922721147953425e-06, "loss": 0.7194, "step": 10514 }, { "epoch": 0.32226921662375874, "grad_norm": 1.5933848423835155, "learning_rate": 7.922318440197805e-06, "loss": 0.6722, "step": 10515 }, { "epoch": 0.32229986514649994, "grad_norm": 0.814337774195997, "learning_rate": 7.921915703647836e-06, "loss": 0.4629, "step": 10516 }, { "epoch": 0.32233051366924115, "grad_norm": 1.723607823582721, "learning_rate": 7.921512938307481e-06, "loss": 0.7087, "step": 10517 }, { "epoch": 0.32236116219198235, "grad_norm": 1.4471069345087595, "learning_rate": 7.921110144180712e-06, "loss": 0.6921, "step": 10518 }, { "epoch": 0.32239181071472356, "grad_norm": 1.9365751278921173, "learning_rate": 7.920707321271497e-06, "loss": 0.6949, "step": 10519 }, { "epoch": 0.32242245923746476, "grad_norm": 0.7882557427338817, "learning_rate": 7.920304469583808e-06, "loss": 0.4622, "step": 10520 }, { "epoch": 0.32245310776020597, "grad_norm": 1.5937379960597773, "learning_rate": 7.91990158912161e-06, "loss": 0.6424, "step": 10521 }, { "epoch": 0.3224837562829472, "grad_norm": 0.8297894377988558, "learning_rate": 7.919498679888873e-06, "loss": 0.4851, "step": 10522 }, { "epoch": 0.3225144048056884, "grad_norm": 1.769011558883391, "learning_rate": 7.919095741889572e-06, "loss": 0.7136, "step": 10523 }, { "epoch": 0.3225450533284296, "grad_norm": 1.4661221935581308, "learning_rate": 7.91869277512767e-06, "loss": 0.6734, "step": 10524 }, { "epoch": 0.3225757018511708, "grad_norm": 1.7964002506855257, "learning_rate": 7.918289779607144e-06, "loss": 0.7065, "step": 10525 }, { "epoch": 0.322606350373912, "grad_norm": 2.0009987125895177, "learning_rate": 7.91788675533196e-06, "loss": 0.7164, "step": 10526 }, { "epoch": 0.3226369988966532, "grad_norm": 1.7451958852794098, "learning_rate": 7.917483702306094e-06, "loss": 0.6691, "step": 10527 }, { "epoch": 0.3226676474193944, "grad_norm": 1.7027447555070407, "learning_rate": 7.917080620533513e-06, "loss": 0.7166, "step": 10528 }, { "epoch": 0.3226982959421356, "grad_norm": 1.7284462987509592, "learning_rate": 7.91667751001819e-06, "loss": 0.5541, "step": 10529 }, { "epoch": 0.32272894446487677, "grad_norm": 1.9550730999137034, "learning_rate": 7.916274370764098e-06, "loss": 0.7375, "step": 10530 }, { "epoch": 0.32275959298761797, "grad_norm": 1.6106592039562062, "learning_rate": 7.915871202775209e-06, "loss": 0.7105, "step": 10531 }, { "epoch": 0.3227902415103592, "grad_norm": 0.8933906432831927, "learning_rate": 7.915468006055493e-06, "loss": 0.4556, "step": 10532 }, { "epoch": 0.3228208900331004, "grad_norm": 1.6211491925463324, "learning_rate": 7.915064780608926e-06, "loss": 0.7171, "step": 10533 }, { "epoch": 0.3228515385558416, "grad_norm": 1.877990046293407, "learning_rate": 7.91466152643948e-06, "loss": 0.6923, "step": 10534 }, { "epoch": 0.3228821870785828, "grad_norm": 1.8792296445950234, "learning_rate": 7.914258243551129e-06, "loss": 0.7315, "step": 10535 }, { "epoch": 0.322912835601324, "grad_norm": 1.796370692924275, "learning_rate": 7.913854931947844e-06, "loss": 0.6952, "step": 10536 }, { "epoch": 0.3229434841240652, "grad_norm": 1.7838603045222066, "learning_rate": 7.913451591633602e-06, "loss": 0.6935, "step": 10537 }, { "epoch": 0.3229741326468064, "grad_norm": 1.910907819910301, "learning_rate": 7.913048222612376e-06, "loss": 0.7035, "step": 10538 }, { "epoch": 0.3230047811695476, "grad_norm": 1.5668440386998972, "learning_rate": 7.91264482488814e-06, "loss": 0.6721, "step": 10539 }, { "epoch": 0.3230354296922888, "grad_norm": 1.5755297180262147, "learning_rate": 7.91224139846487e-06, "loss": 0.7252, "step": 10540 }, { "epoch": 0.32306607821503003, "grad_norm": 1.6736357143717928, "learning_rate": 7.911837943346538e-06, "loss": 0.6598, "step": 10541 }, { "epoch": 0.32309672673777123, "grad_norm": 1.733964170229089, "learning_rate": 7.911434459537124e-06, "loss": 0.6178, "step": 10542 }, { "epoch": 0.32312737526051244, "grad_norm": 1.6350882027362474, "learning_rate": 7.911030947040602e-06, "loss": 0.6719, "step": 10543 }, { "epoch": 0.32315802378325365, "grad_norm": 1.9553030769712498, "learning_rate": 7.910627405860947e-06, "loss": 0.6021, "step": 10544 }, { "epoch": 0.32318867230599485, "grad_norm": 1.5922237714236676, "learning_rate": 7.910223836002133e-06, "loss": 0.6421, "step": 10545 }, { "epoch": 0.32321932082873606, "grad_norm": 1.7596932591503576, "learning_rate": 7.909820237468141e-06, "loss": 0.7196, "step": 10546 }, { "epoch": 0.32324996935147726, "grad_norm": 1.668412651929019, "learning_rate": 7.909416610262945e-06, "loss": 0.648, "step": 10547 }, { "epoch": 0.32328061787421847, "grad_norm": 1.728292710439839, "learning_rate": 7.909012954390526e-06, "loss": 0.6812, "step": 10548 }, { "epoch": 0.3233112663969597, "grad_norm": 1.932621346952897, "learning_rate": 7.908609269854852e-06, "loss": 0.6065, "step": 10549 }, { "epoch": 0.3233419149197009, "grad_norm": 1.6437240830987558, "learning_rate": 7.908205556659911e-06, "loss": 0.6795, "step": 10550 }, { "epoch": 0.3233725634424421, "grad_norm": 1.8104409269877326, "learning_rate": 7.907801814809674e-06, "loss": 0.6955, "step": 10551 }, { "epoch": 0.3234032119651833, "grad_norm": 1.5814677490794722, "learning_rate": 7.907398044308123e-06, "loss": 0.7537, "step": 10552 }, { "epoch": 0.3234338604879245, "grad_norm": 1.7249789875428079, "learning_rate": 7.906994245159235e-06, "loss": 0.6477, "step": 10553 }, { "epoch": 0.3234645090106657, "grad_norm": 1.9807177637834343, "learning_rate": 7.90659041736699e-06, "loss": 0.7784, "step": 10554 }, { "epoch": 0.3234951575334069, "grad_norm": 1.7477290329878994, "learning_rate": 7.906186560935366e-06, "loss": 0.6705, "step": 10555 }, { "epoch": 0.3235258060561481, "grad_norm": 1.7098601060373904, "learning_rate": 7.905782675868341e-06, "loss": 0.6369, "step": 10556 }, { "epoch": 0.3235564545788893, "grad_norm": 1.677909555270474, "learning_rate": 7.905378762169896e-06, "loss": 0.7511, "step": 10557 }, { "epoch": 0.3235871031016305, "grad_norm": 1.7099519783488768, "learning_rate": 7.904974819844012e-06, "loss": 0.6667, "step": 10558 }, { "epoch": 0.32361775162437173, "grad_norm": 1.4663802678528661, "learning_rate": 7.904570848894666e-06, "loss": 0.6368, "step": 10559 }, { "epoch": 0.32364840014711294, "grad_norm": 1.5153255875662581, "learning_rate": 7.90416684932584e-06, "loss": 0.7246, "step": 10560 }, { "epoch": 0.3236790486698541, "grad_norm": 0.8426753137649525, "learning_rate": 7.903762821141516e-06, "loss": 0.463, "step": 10561 }, { "epoch": 0.3237096971925953, "grad_norm": 1.6597043605836643, "learning_rate": 7.903358764345674e-06, "loss": 0.6335, "step": 10562 }, { "epoch": 0.3237403457153365, "grad_norm": 1.6370817767993056, "learning_rate": 7.902954678942296e-06, "loss": 0.6249, "step": 10563 }, { "epoch": 0.3237709942380777, "grad_norm": 1.8887208917497007, "learning_rate": 7.902550564935363e-06, "loss": 0.6595, "step": 10564 }, { "epoch": 0.3238016427608189, "grad_norm": 1.7741180214805892, "learning_rate": 7.902146422328853e-06, "loss": 0.6173, "step": 10565 }, { "epoch": 0.3238322912835601, "grad_norm": 1.873277637929503, "learning_rate": 7.901742251126755e-06, "loss": 0.7186, "step": 10566 }, { "epoch": 0.3238629398063013, "grad_norm": 1.6954933932501974, "learning_rate": 7.901338051333047e-06, "loss": 0.659, "step": 10567 }, { "epoch": 0.3238935883290425, "grad_norm": 1.7619300069363986, "learning_rate": 7.900933822951714e-06, "loss": 0.7335, "step": 10568 }, { "epoch": 0.32392423685178373, "grad_norm": 1.7670495184955703, "learning_rate": 7.900529565986737e-06, "loss": 0.7269, "step": 10569 }, { "epoch": 0.32395488537452494, "grad_norm": 1.7120842737614124, "learning_rate": 7.9001252804421e-06, "loss": 0.7422, "step": 10570 }, { "epoch": 0.32398553389726614, "grad_norm": 1.7796646336227524, "learning_rate": 7.899720966321786e-06, "loss": 0.7923, "step": 10571 }, { "epoch": 0.32401618242000735, "grad_norm": 1.73697572187235, "learning_rate": 7.89931662362978e-06, "loss": 0.6601, "step": 10572 }, { "epoch": 0.32404683094274855, "grad_norm": 1.7616612262205873, "learning_rate": 7.898912252370066e-06, "loss": 0.7579, "step": 10573 }, { "epoch": 0.32407747946548976, "grad_norm": 1.8018971764130922, "learning_rate": 7.898507852546628e-06, "loss": 0.7399, "step": 10574 }, { "epoch": 0.32410812798823097, "grad_norm": 1.8314596473220253, "learning_rate": 7.89810342416345e-06, "loss": 0.659, "step": 10575 }, { "epoch": 0.32413877651097217, "grad_norm": 2.017183487736497, "learning_rate": 7.897698967224517e-06, "loss": 0.6765, "step": 10576 }, { "epoch": 0.3241694250337134, "grad_norm": 1.6596252764864718, "learning_rate": 7.897294481733816e-06, "loss": 0.7007, "step": 10577 }, { "epoch": 0.3242000735564546, "grad_norm": 1.717291202784849, "learning_rate": 7.896889967695329e-06, "loss": 0.734, "step": 10578 }, { "epoch": 0.3242307220791958, "grad_norm": 1.5497009700936937, "learning_rate": 7.896485425113045e-06, "loss": 0.6424, "step": 10579 }, { "epoch": 0.324261370601937, "grad_norm": 1.9515864851860685, "learning_rate": 7.896080853990951e-06, "loss": 0.7179, "step": 10580 }, { "epoch": 0.3242920191246782, "grad_norm": 1.9772354844722821, "learning_rate": 7.895676254333029e-06, "loss": 0.7107, "step": 10581 }, { "epoch": 0.3243226676474194, "grad_norm": 1.666594941796978, "learning_rate": 7.895271626143268e-06, "loss": 0.6545, "step": 10582 }, { "epoch": 0.3243533161701606, "grad_norm": 1.6928041125218052, "learning_rate": 7.894866969425656e-06, "loss": 0.7364, "step": 10583 }, { "epoch": 0.3243839646929018, "grad_norm": 1.5989618129511307, "learning_rate": 7.894462284184178e-06, "loss": 0.6985, "step": 10584 }, { "epoch": 0.324414613215643, "grad_norm": 1.6859763766205675, "learning_rate": 7.894057570422824e-06, "loss": 0.6153, "step": 10585 }, { "epoch": 0.3244452617383842, "grad_norm": 1.7906339576841916, "learning_rate": 7.893652828145579e-06, "loss": 0.745, "step": 10586 }, { "epoch": 0.32447591026112543, "grad_norm": 1.7749504456404228, "learning_rate": 7.893248057356433e-06, "loss": 0.7308, "step": 10587 }, { "epoch": 0.32450655878386664, "grad_norm": 1.6872087636369844, "learning_rate": 7.892843258059373e-06, "loss": 0.7264, "step": 10588 }, { "epoch": 0.32453720730660784, "grad_norm": 1.7030761276835953, "learning_rate": 7.892438430258388e-06, "loss": 0.7733, "step": 10589 }, { "epoch": 0.32456785582934905, "grad_norm": 1.7280952775533098, "learning_rate": 7.892033573957467e-06, "loss": 0.6677, "step": 10590 }, { "epoch": 0.32459850435209026, "grad_norm": 1.6867579006269733, "learning_rate": 7.8916286891606e-06, "loss": 0.7478, "step": 10591 }, { "epoch": 0.3246291528748314, "grad_norm": 1.7574233264832644, "learning_rate": 7.891223775871776e-06, "loss": 0.7143, "step": 10592 }, { "epoch": 0.3246598013975726, "grad_norm": 1.4232572653904467, "learning_rate": 7.890818834094985e-06, "loss": 0.6238, "step": 10593 }, { "epoch": 0.3246904499203138, "grad_norm": 1.664396667607645, "learning_rate": 7.890413863834214e-06, "loss": 0.7247, "step": 10594 }, { "epoch": 0.324721098443055, "grad_norm": 0.8256889010143097, "learning_rate": 7.890008865093458e-06, "loss": 0.4842, "step": 10595 }, { "epoch": 0.32475174696579623, "grad_norm": 1.8353445578237568, "learning_rate": 7.889603837876702e-06, "loss": 0.6602, "step": 10596 }, { "epoch": 0.32478239548853743, "grad_norm": 1.6888909890970747, "learning_rate": 7.889198782187944e-06, "loss": 0.6252, "step": 10597 }, { "epoch": 0.32481304401127864, "grad_norm": 1.94037969141328, "learning_rate": 7.888793698031167e-06, "loss": 0.8394, "step": 10598 }, { "epoch": 0.32484369253401985, "grad_norm": 2.066282407031063, "learning_rate": 7.88838858541037e-06, "loss": 0.7427, "step": 10599 }, { "epoch": 0.32487434105676105, "grad_norm": 1.7236183609551192, "learning_rate": 7.88798344432954e-06, "loss": 0.6263, "step": 10600 }, { "epoch": 0.32490498957950226, "grad_norm": 0.7932164238760486, "learning_rate": 7.88757827479267e-06, "loss": 0.4764, "step": 10601 }, { "epoch": 0.32493563810224346, "grad_norm": 1.691158749180594, "learning_rate": 7.887173076803753e-06, "loss": 0.6416, "step": 10602 }, { "epoch": 0.32496628662498467, "grad_norm": 1.7540152958513657, "learning_rate": 7.886767850366781e-06, "loss": 0.6727, "step": 10603 }, { "epoch": 0.3249969351477259, "grad_norm": 0.7931565346171888, "learning_rate": 7.886362595485747e-06, "loss": 0.4792, "step": 10604 }, { "epoch": 0.3250275836704671, "grad_norm": 1.7791629559981539, "learning_rate": 7.885957312164643e-06, "loss": 0.7916, "step": 10605 }, { "epoch": 0.3250582321932083, "grad_norm": 1.485371792377513, "learning_rate": 7.885552000407463e-06, "loss": 0.5995, "step": 10606 }, { "epoch": 0.3250888807159495, "grad_norm": 1.7246419340617458, "learning_rate": 7.885146660218202e-06, "loss": 0.6846, "step": 10607 }, { "epoch": 0.3251195292386907, "grad_norm": 1.7142510226280632, "learning_rate": 7.884741291600853e-06, "loss": 0.7134, "step": 10608 }, { "epoch": 0.3251501777614319, "grad_norm": 1.8916293970780733, "learning_rate": 7.884335894559408e-06, "loss": 0.7742, "step": 10609 }, { "epoch": 0.3251808262841731, "grad_norm": 1.742783518277049, "learning_rate": 7.883930469097864e-06, "loss": 0.7437, "step": 10610 }, { "epoch": 0.3252114748069143, "grad_norm": 1.58688803137667, "learning_rate": 7.883525015220215e-06, "loss": 0.6567, "step": 10611 }, { "epoch": 0.3252421233296555, "grad_norm": 1.4428079628258854, "learning_rate": 7.883119532930458e-06, "loss": 0.6096, "step": 10612 }, { "epoch": 0.3252727718523967, "grad_norm": 1.7375925810461341, "learning_rate": 7.882714022232585e-06, "loss": 0.7059, "step": 10613 }, { "epoch": 0.32530342037513793, "grad_norm": 1.7662767623837654, "learning_rate": 7.882308483130594e-06, "loss": 0.7259, "step": 10614 }, { "epoch": 0.32533406889787914, "grad_norm": 1.7056229212935579, "learning_rate": 7.88190291562848e-06, "loss": 0.6329, "step": 10615 }, { "epoch": 0.32536471742062034, "grad_norm": 0.9266809379982968, "learning_rate": 7.881497319730239e-06, "loss": 0.4862, "step": 10616 }, { "epoch": 0.32539536594336155, "grad_norm": 1.8521498245326322, "learning_rate": 7.881091695439867e-06, "loss": 0.7195, "step": 10617 }, { "epoch": 0.32542601446610275, "grad_norm": 1.5146724622428291, "learning_rate": 7.880686042761363e-06, "loss": 0.6036, "step": 10618 }, { "epoch": 0.32545666298884396, "grad_norm": 1.4164633558622801, "learning_rate": 7.88028036169872e-06, "loss": 0.5957, "step": 10619 }, { "epoch": 0.32548731151158516, "grad_norm": 1.6654636927235429, "learning_rate": 7.879874652255938e-06, "loss": 0.7018, "step": 10620 }, { "epoch": 0.32551796003432637, "grad_norm": 1.5760853248360724, "learning_rate": 7.879468914437016e-06, "loss": 0.6626, "step": 10621 }, { "epoch": 0.3255486085570676, "grad_norm": 1.735923893481031, "learning_rate": 7.879063148245949e-06, "loss": 0.7101, "step": 10622 }, { "epoch": 0.3255792570798087, "grad_norm": 1.8993146735133397, "learning_rate": 7.878657353686736e-06, "loss": 0.6879, "step": 10623 }, { "epoch": 0.32560990560254993, "grad_norm": 1.5900633457238518, "learning_rate": 7.878251530763377e-06, "loss": 0.6272, "step": 10624 }, { "epoch": 0.32564055412529114, "grad_norm": 1.4873016821019376, "learning_rate": 7.877845679479868e-06, "loss": 0.6825, "step": 10625 }, { "epoch": 0.32567120264803234, "grad_norm": 1.9088498938946017, "learning_rate": 7.87743979984021e-06, "loss": 0.7061, "step": 10626 }, { "epoch": 0.32570185117077355, "grad_norm": 1.7568217609394625, "learning_rate": 7.8770338918484e-06, "loss": 0.6761, "step": 10627 }, { "epoch": 0.32573249969351475, "grad_norm": 1.8262232610621933, "learning_rate": 7.87662795550844e-06, "loss": 0.6544, "step": 10628 }, { "epoch": 0.32576314821625596, "grad_norm": 1.6079489833722618, "learning_rate": 7.876221990824329e-06, "loss": 0.6784, "step": 10629 }, { "epoch": 0.32579379673899717, "grad_norm": 0.8685057256846949, "learning_rate": 7.875815997800064e-06, "loss": 0.4858, "step": 10630 }, { "epoch": 0.32582444526173837, "grad_norm": 2.0266060455441086, "learning_rate": 7.875409976439651e-06, "loss": 0.7084, "step": 10631 }, { "epoch": 0.3258550937844796, "grad_norm": 1.6483427745131556, "learning_rate": 7.875003926747087e-06, "loss": 0.6916, "step": 10632 }, { "epoch": 0.3258857423072208, "grad_norm": 1.557022015750213, "learning_rate": 7.874597848726375e-06, "loss": 0.6692, "step": 10633 }, { "epoch": 0.325916390829962, "grad_norm": 1.5424826680378645, "learning_rate": 7.874191742381514e-06, "loss": 0.7206, "step": 10634 }, { "epoch": 0.3259470393527032, "grad_norm": 0.810132921887563, "learning_rate": 7.873785607716507e-06, "loss": 0.4771, "step": 10635 }, { "epoch": 0.3259776878754444, "grad_norm": 1.98120664422848, "learning_rate": 7.873379444735354e-06, "loss": 0.7545, "step": 10636 }, { "epoch": 0.3260083363981856, "grad_norm": 1.7535083488011403, "learning_rate": 7.872973253442058e-06, "loss": 0.7835, "step": 10637 }, { "epoch": 0.3260389849209268, "grad_norm": 0.8053802373521246, "learning_rate": 7.872567033840621e-06, "loss": 0.4805, "step": 10638 }, { "epoch": 0.326069633443668, "grad_norm": 1.8035340595470912, "learning_rate": 7.872160785935047e-06, "loss": 0.6937, "step": 10639 }, { "epoch": 0.3261002819664092, "grad_norm": 1.8305077088321158, "learning_rate": 7.87175450972934e-06, "loss": 0.6993, "step": 10640 }, { "epoch": 0.32613093048915043, "grad_norm": 1.7983770480317374, "learning_rate": 7.871348205227498e-06, "loss": 0.7253, "step": 10641 }, { "epoch": 0.32616157901189163, "grad_norm": 2.0230560313262407, "learning_rate": 7.870941872433527e-06, "loss": 0.6366, "step": 10642 }, { "epoch": 0.32619222753463284, "grad_norm": 1.6398161980793817, "learning_rate": 7.870535511351433e-06, "loss": 0.6767, "step": 10643 }, { "epoch": 0.32622287605737404, "grad_norm": 1.8956972621946158, "learning_rate": 7.870129121985218e-06, "loss": 0.6048, "step": 10644 }, { "epoch": 0.32625352458011525, "grad_norm": 1.7532984808876915, "learning_rate": 7.869722704338887e-06, "loss": 0.6886, "step": 10645 }, { "epoch": 0.32628417310285646, "grad_norm": 1.6066154704983275, "learning_rate": 7.869316258416442e-06, "loss": 0.6318, "step": 10646 }, { "epoch": 0.32631482162559766, "grad_norm": 2.3000152162811496, "learning_rate": 7.868909784221891e-06, "loss": 0.7732, "step": 10647 }, { "epoch": 0.32634547014833887, "grad_norm": 1.844626343729666, "learning_rate": 7.868503281759238e-06, "loss": 0.6493, "step": 10648 }, { "epoch": 0.3263761186710801, "grad_norm": 1.612764441124113, "learning_rate": 7.868096751032489e-06, "loss": 0.7539, "step": 10649 }, { "epoch": 0.3264067671938213, "grad_norm": 1.8652644603182473, "learning_rate": 7.867690192045646e-06, "loss": 0.7691, "step": 10650 }, { "epoch": 0.3264374157165625, "grad_norm": 1.9896323447481237, "learning_rate": 7.86728360480272e-06, "loss": 0.7587, "step": 10651 }, { "epoch": 0.3264680642393037, "grad_norm": 1.8236311337922018, "learning_rate": 7.866876989307715e-06, "loss": 0.6661, "step": 10652 }, { "epoch": 0.3264987127620449, "grad_norm": 1.6869420931497063, "learning_rate": 7.866470345564636e-06, "loss": 0.6768, "step": 10653 }, { "epoch": 0.32652936128478605, "grad_norm": 0.8632947261776728, "learning_rate": 7.866063673577492e-06, "loss": 0.4749, "step": 10654 }, { "epoch": 0.32656000980752725, "grad_norm": 1.786390935458658, "learning_rate": 7.865656973350291e-06, "loss": 0.6834, "step": 10655 }, { "epoch": 0.32659065833026846, "grad_norm": 1.635712502638858, "learning_rate": 7.865250244887038e-06, "loss": 0.7166, "step": 10656 }, { "epoch": 0.32662130685300966, "grad_norm": 1.8604360476646107, "learning_rate": 7.86484348819174e-06, "loss": 0.7501, "step": 10657 }, { "epoch": 0.32665195537575087, "grad_norm": 1.6540858587903897, "learning_rate": 7.864436703268407e-06, "loss": 0.5677, "step": 10658 }, { "epoch": 0.3266826038984921, "grad_norm": 2.054158106714936, "learning_rate": 7.864029890121045e-06, "loss": 0.7032, "step": 10659 }, { "epoch": 0.3267132524212333, "grad_norm": 1.6294122830515068, "learning_rate": 7.863623048753665e-06, "loss": 0.6592, "step": 10660 }, { "epoch": 0.3267439009439745, "grad_norm": 1.5978341402536103, "learning_rate": 7.863216179170274e-06, "loss": 0.7649, "step": 10661 }, { "epoch": 0.3267745494667157, "grad_norm": 1.8170813141261521, "learning_rate": 7.862809281374882e-06, "loss": 0.7027, "step": 10662 }, { "epoch": 0.3268051979894569, "grad_norm": 1.8081617263538798, "learning_rate": 7.862402355371496e-06, "loss": 0.7424, "step": 10663 }, { "epoch": 0.3268358465121981, "grad_norm": 1.848570305148486, "learning_rate": 7.861995401164128e-06, "loss": 0.7378, "step": 10664 }, { "epoch": 0.3268664950349393, "grad_norm": 1.4840015621532276, "learning_rate": 7.861588418756787e-06, "loss": 0.6225, "step": 10665 }, { "epoch": 0.3268971435576805, "grad_norm": 1.5101986416962851, "learning_rate": 7.861181408153485e-06, "loss": 0.6156, "step": 10666 }, { "epoch": 0.3269277920804217, "grad_norm": 0.8230120929676032, "learning_rate": 7.860774369358229e-06, "loss": 0.4553, "step": 10667 }, { "epoch": 0.3269584406031629, "grad_norm": 1.6149980075228056, "learning_rate": 7.86036730237503e-06, "loss": 0.5685, "step": 10668 }, { "epoch": 0.32698908912590413, "grad_norm": 1.721500011868338, "learning_rate": 7.859960207207901e-06, "loss": 0.7483, "step": 10669 }, { "epoch": 0.32701973764864534, "grad_norm": 1.6755873369103829, "learning_rate": 7.859553083860854e-06, "loss": 0.673, "step": 10670 }, { "epoch": 0.32705038617138654, "grad_norm": 0.8009555923188935, "learning_rate": 7.859145932337897e-06, "loss": 0.4742, "step": 10671 }, { "epoch": 0.32708103469412775, "grad_norm": 0.7833950792284816, "learning_rate": 7.858738752643043e-06, "loss": 0.4532, "step": 10672 }, { "epoch": 0.32711168321686895, "grad_norm": 1.6521534716960131, "learning_rate": 7.858331544780306e-06, "loss": 0.6838, "step": 10673 }, { "epoch": 0.32714233173961016, "grad_norm": 1.8667006686951746, "learning_rate": 7.857924308753698e-06, "loss": 0.6395, "step": 10674 }, { "epoch": 0.32717298026235136, "grad_norm": 1.8062571366617624, "learning_rate": 7.857517044567228e-06, "loss": 0.7965, "step": 10675 }, { "epoch": 0.32720362878509257, "grad_norm": 1.5742559420248954, "learning_rate": 7.857109752224911e-06, "loss": 0.5745, "step": 10676 }, { "epoch": 0.3272342773078338, "grad_norm": 0.7983435193762469, "learning_rate": 7.856702431730763e-06, "loss": 0.4676, "step": 10677 }, { "epoch": 0.327264925830575, "grad_norm": 0.8799977126060976, "learning_rate": 7.856295083088793e-06, "loss": 0.4918, "step": 10678 }, { "epoch": 0.3272955743533162, "grad_norm": 1.5775936076670207, "learning_rate": 7.85588770630302e-06, "loss": 0.6698, "step": 10679 }, { "epoch": 0.3273262228760574, "grad_norm": 0.7994887231014898, "learning_rate": 7.855480301377451e-06, "loss": 0.4754, "step": 10680 }, { "epoch": 0.3273568713987986, "grad_norm": 1.7974881914689074, "learning_rate": 7.855072868316107e-06, "loss": 0.6882, "step": 10681 }, { "epoch": 0.3273875199215398, "grad_norm": 1.7385449849561565, "learning_rate": 7.854665407122998e-06, "loss": 0.7296, "step": 10682 }, { "epoch": 0.327418168444281, "grad_norm": 2.101935158857409, "learning_rate": 7.854257917802141e-06, "loss": 0.804, "step": 10683 }, { "epoch": 0.3274488169670222, "grad_norm": 0.8617901661524179, "learning_rate": 7.85385040035755e-06, "loss": 0.4918, "step": 10684 }, { "epoch": 0.32747946548976337, "grad_norm": 1.6676944216672118, "learning_rate": 7.853442854793241e-06, "loss": 0.7594, "step": 10685 }, { "epoch": 0.32751011401250457, "grad_norm": 1.7934347156690373, "learning_rate": 7.853035281113228e-06, "loss": 0.6497, "step": 10686 }, { "epoch": 0.3275407625352458, "grad_norm": 1.5143751271533938, "learning_rate": 7.852627679321529e-06, "loss": 0.6104, "step": 10687 }, { "epoch": 0.327571411057987, "grad_norm": 1.7224569697274215, "learning_rate": 7.85222004942216e-06, "loss": 0.7255, "step": 10688 }, { "epoch": 0.3276020595807282, "grad_norm": 1.5629033643074604, "learning_rate": 7.851812391419139e-06, "loss": 0.6276, "step": 10689 }, { "epoch": 0.3276327081034694, "grad_norm": 1.595422992718776, "learning_rate": 7.851404705316478e-06, "loss": 0.6855, "step": 10690 }, { "epoch": 0.3276633566262106, "grad_norm": 0.8450526305557425, "learning_rate": 7.850996991118199e-06, "loss": 0.4906, "step": 10691 }, { "epoch": 0.3276940051489518, "grad_norm": 1.4606920667828276, "learning_rate": 7.850589248828316e-06, "loss": 0.5871, "step": 10692 }, { "epoch": 0.327724653671693, "grad_norm": 1.5784191657709978, "learning_rate": 7.850181478450847e-06, "loss": 0.7252, "step": 10693 }, { "epoch": 0.3277553021944342, "grad_norm": 1.7567070973758558, "learning_rate": 7.849773679989814e-06, "loss": 0.6945, "step": 10694 }, { "epoch": 0.3277859507171754, "grad_norm": 1.6361604675807215, "learning_rate": 7.849365853449228e-06, "loss": 0.6565, "step": 10695 }, { "epoch": 0.32781659923991663, "grad_norm": 1.7170637135469233, "learning_rate": 7.848957998833113e-06, "loss": 0.7327, "step": 10696 }, { "epoch": 0.32784724776265783, "grad_norm": 1.5831810970721722, "learning_rate": 7.848550116145486e-06, "loss": 0.6797, "step": 10697 }, { "epoch": 0.32787789628539904, "grad_norm": 0.7546303060918953, "learning_rate": 7.848142205390364e-06, "loss": 0.4612, "step": 10698 }, { "epoch": 0.32790854480814025, "grad_norm": 1.7732842204630546, "learning_rate": 7.847734266571769e-06, "loss": 0.6467, "step": 10699 }, { "epoch": 0.32793919333088145, "grad_norm": 1.518696036345308, "learning_rate": 7.847326299693721e-06, "loss": 0.7286, "step": 10700 }, { "epoch": 0.32796984185362266, "grad_norm": 1.522754861570597, "learning_rate": 7.846918304760239e-06, "loss": 0.645, "step": 10701 }, { "epoch": 0.32800049037636386, "grad_norm": 0.7891683682039259, "learning_rate": 7.84651028177534e-06, "loss": 0.4467, "step": 10702 }, { "epoch": 0.32803113889910507, "grad_norm": 1.5868290650504742, "learning_rate": 7.846102230743049e-06, "loss": 0.7282, "step": 10703 }, { "epoch": 0.3280617874218463, "grad_norm": 1.7461637954259355, "learning_rate": 7.845694151667382e-06, "loss": 0.6672, "step": 10704 }, { "epoch": 0.3280924359445875, "grad_norm": 1.7106787522450695, "learning_rate": 7.845286044552365e-06, "loss": 0.6858, "step": 10705 }, { "epoch": 0.3281230844673287, "grad_norm": 1.8494192023503124, "learning_rate": 7.844877909402015e-06, "loss": 0.6311, "step": 10706 }, { "epoch": 0.3281537329900699, "grad_norm": 0.8673732763027145, "learning_rate": 7.844469746220356e-06, "loss": 0.4868, "step": 10707 }, { "epoch": 0.3281843815128111, "grad_norm": 2.0170743480666884, "learning_rate": 7.844061555011408e-06, "loss": 0.7443, "step": 10708 }, { "epoch": 0.3282150300355523, "grad_norm": 1.6957566394500334, "learning_rate": 7.843653335779194e-06, "loss": 0.6291, "step": 10709 }, { "epoch": 0.3282456785582935, "grad_norm": 0.7769784448169675, "learning_rate": 7.843245088527736e-06, "loss": 0.4548, "step": 10710 }, { "epoch": 0.3282763270810347, "grad_norm": 0.7850199323811362, "learning_rate": 7.842836813261057e-06, "loss": 0.4795, "step": 10711 }, { "epoch": 0.3283069756037759, "grad_norm": 1.7302496310272841, "learning_rate": 7.84242850998318e-06, "loss": 0.6699, "step": 10712 }, { "epoch": 0.3283376241265171, "grad_norm": 1.5597510574235858, "learning_rate": 7.842020178698126e-06, "loss": 0.6958, "step": 10713 }, { "epoch": 0.32836827264925833, "grad_norm": 1.7034779915053493, "learning_rate": 7.841611819409922e-06, "loss": 0.6979, "step": 10714 }, { "epoch": 0.32839892117199954, "grad_norm": 1.9225022898856285, "learning_rate": 7.841203432122588e-06, "loss": 0.6553, "step": 10715 }, { "epoch": 0.3284295696947407, "grad_norm": 1.3949760391769574, "learning_rate": 7.840795016840151e-06, "loss": 0.6718, "step": 10716 }, { "epoch": 0.3284602182174819, "grad_norm": 1.7321250739418852, "learning_rate": 7.840386573566634e-06, "loss": 0.7074, "step": 10717 }, { "epoch": 0.3284908667402231, "grad_norm": 1.5342671173083704, "learning_rate": 7.83997810230606e-06, "loss": 0.5798, "step": 10718 }, { "epoch": 0.3285215152629643, "grad_norm": 2.030960178853603, "learning_rate": 7.839569603062456e-06, "loss": 0.6625, "step": 10719 }, { "epoch": 0.3285521637857055, "grad_norm": 0.9230229678166129, "learning_rate": 7.839161075839846e-06, "loss": 0.4943, "step": 10720 }, { "epoch": 0.3285828123084467, "grad_norm": 2.626603648807846, "learning_rate": 7.838752520642256e-06, "loss": 0.754, "step": 10721 }, { "epoch": 0.3286134608311879, "grad_norm": 1.615037983578785, "learning_rate": 7.83834393747371e-06, "loss": 0.6145, "step": 10722 }, { "epoch": 0.3286441093539291, "grad_norm": 2.292572106050651, "learning_rate": 7.837935326338236e-06, "loss": 0.7194, "step": 10723 }, { "epoch": 0.32867475787667033, "grad_norm": 1.8462802120318498, "learning_rate": 7.837526687239858e-06, "loss": 0.6567, "step": 10724 }, { "epoch": 0.32870540639941154, "grad_norm": 1.9565857849376798, "learning_rate": 7.837118020182606e-06, "loss": 0.7986, "step": 10725 }, { "epoch": 0.32873605492215274, "grad_norm": 1.4795938622786047, "learning_rate": 7.8367093251705e-06, "loss": 0.625, "step": 10726 }, { "epoch": 0.32876670344489395, "grad_norm": 0.7935924038322327, "learning_rate": 7.836300602207574e-06, "loss": 0.4673, "step": 10727 }, { "epoch": 0.32879735196763515, "grad_norm": 1.6360171161747659, "learning_rate": 7.835891851297852e-06, "loss": 0.6915, "step": 10728 }, { "epoch": 0.32882800049037636, "grad_norm": 1.7742509363199446, "learning_rate": 7.835483072445363e-06, "loss": 0.6077, "step": 10729 }, { "epoch": 0.32885864901311757, "grad_norm": 0.7997627543606389, "learning_rate": 7.835074265654133e-06, "loss": 0.4861, "step": 10730 }, { "epoch": 0.32888929753585877, "grad_norm": 1.8178498389681375, "learning_rate": 7.83466543092819e-06, "loss": 0.6517, "step": 10731 }, { "epoch": 0.3289199460586, "grad_norm": 1.70936084582804, "learning_rate": 7.834256568271564e-06, "loss": 0.6064, "step": 10732 }, { "epoch": 0.3289505945813412, "grad_norm": 1.904579568828216, "learning_rate": 7.833847677688282e-06, "loss": 0.7793, "step": 10733 }, { "epoch": 0.3289812431040824, "grad_norm": 0.8178715495429909, "learning_rate": 7.833438759182375e-06, "loss": 0.4627, "step": 10734 }, { "epoch": 0.3290118916268236, "grad_norm": 1.5324898237960054, "learning_rate": 7.833029812757871e-06, "loss": 0.5396, "step": 10735 }, { "epoch": 0.3290425401495648, "grad_norm": 1.7086670222826923, "learning_rate": 7.832620838418798e-06, "loss": 0.7407, "step": 10736 }, { "epoch": 0.329073188672306, "grad_norm": 0.8174346883743621, "learning_rate": 7.832211836169188e-06, "loss": 0.4495, "step": 10737 }, { "epoch": 0.3291038371950472, "grad_norm": 1.6695562439465452, "learning_rate": 7.831802806013072e-06, "loss": 0.6488, "step": 10738 }, { "epoch": 0.3291344857177884, "grad_norm": 1.840918331844408, "learning_rate": 7.831393747954477e-06, "loss": 0.7614, "step": 10739 }, { "epoch": 0.3291651342405296, "grad_norm": 1.815630454710627, "learning_rate": 7.830984661997434e-06, "loss": 0.689, "step": 10740 }, { "epoch": 0.32919578276327083, "grad_norm": 1.6762992464722133, "learning_rate": 7.830575548145975e-06, "loss": 0.7255, "step": 10741 }, { "epoch": 0.32922643128601203, "grad_norm": 1.6978442392491055, "learning_rate": 7.83016640640413e-06, "loss": 0.6779, "step": 10742 }, { "epoch": 0.32925707980875324, "grad_norm": 1.7702697354905366, "learning_rate": 7.829757236775934e-06, "loss": 0.7307, "step": 10743 }, { "epoch": 0.32928772833149444, "grad_norm": 0.898816670605621, "learning_rate": 7.829348039265413e-06, "loss": 0.4627, "step": 10744 }, { "epoch": 0.32931837685423565, "grad_norm": 1.5991016622675986, "learning_rate": 7.828938813876603e-06, "loss": 0.6612, "step": 10745 }, { "epoch": 0.32934902537697686, "grad_norm": 1.5416940660724474, "learning_rate": 7.828529560613536e-06, "loss": 0.6291, "step": 10746 }, { "epoch": 0.329379673899718, "grad_norm": 1.8437268769903228, "learning_rate": 7.828120279480242e-06, "loss": 0.6922, "step": 10747 }, { "epoch": 0.3294103224224592, "grad_norm": 1.768952146578513, "learning_rate": 7.827710970480757e-06, "loss": 0.7255, "step": 10748 }, { "epoch": 0.3294409709452004, "grad_norm": 1.8088393500675246, "learning_rate": 7.827301633619112e-06, "loss": 0.6688, "step": 10749 }, { "epoch": 0.3294716194679416, "grad_norm": 1.900625383252148, "learning_rate": 7.826892268899338e-06, "loss": 0.6161, "step": 10750 }, { "epoch": 0.32950226799068283, "grad_norm": 1.7890622311317, "learning_rate": 7.826482876325474e-06, "loss": 0.7104, "step": 10751 }, { "epoch": 0.32953291651342403, "grad_norm": 1.7552890558721348, "learning_rate": 7.82607345590155e-06, "loss": 0.6905, "step": 10752 }, { "epoch": 0.32956356503616524, "grad_norm": 1.3835347957887496, "learning_rate": 7.825664007631601e-06, "loss": 0.6693, "step": 10753 }, { "epoch": 0.32959421355890645, "grad_norm": 1.7456432654592835, "learning_rate": 7.825254531519663e-06, "loss": 0.7313, "step": 10754 }, { "epoch": 0.32962486208164765, "grad_norm": 1.7456557606456036, "learning_rate": 7.824845027569769e-06, "loss": 0.7157, "step": 10755 }, { "epoch": 0.32965551060438886, "grad_norm": 1.6665806866680233, "learning_rate": 7.824435495785953e-06, "loss": 0.671, "step": 10756 }, { "epoch": 0.32968615912713006, "grad_norm": 1.7618361541834688, "learning_rate": 7.82402593617225e-06, "loss": 0.6722, "step": 10757 }, { "epoch": 0.32971680764987127, "grad_norm": 1.610417701650447, "learning_rate": 7.8236163487327e-06, "loss": 0.6846, "step": 10758 }, { "epoch": 0.3297474561726125, "grad_norm": 1.602768546083451, "learning_rate": 7.823206733471333e-06, "loss": 0.5345, "step": 10759 }, { "epoch": 0.3297781046953537, "grad_norm": 1.609484235216472, "learning_rate": 7.82279709039219e-06, "loss": 0.6184, "step": 10760 }, { "epoch": 0.3298087532180949, "grad_norm": 0.8963825564617305, "learning_rate": 7.822387419499304e-06, "loss": 0.4671, "step": 10761 }, { "epoch": 0.3298394017408361, "grad_norm": 1.5171512539184457, "learning_rate": 7.821977720796713e-06, "loss": 0.6025, "step": 10762 }, { "epoch": 0.3298700502635773, "grad_norm": 1.5520833186805805, "learning_rate": 7.821567994288452e-06, "loss": 0.6722, "step": 10763 }, { "epoch": 0.3299006987863185, "grad_norm": 1.6665443947790093, "learning_rate": 7.821158239978561e-06, "loss": 0.8399, "step": 10764 }, { "epoch": 0.3299313473090597, "grad_norm": 1.6792669282710504, "learning_rate": 7.820748457871077e-06, "loss": 0.6175, "step": 10765 }, { "epoch": 0.3299619958318009, "grad_norm": 1.7655121431363394, "learning_rate": 7.820338647970036e-06, "loss": 0.7216, "step": 10766 }, { "epoch": 0.3299926443545421, "grad_norm": 1.7333571915719261, "learning_rate": 7.819928810279476e-06, "loss": 0.6792, "step": 10767 }, { "epoch": 0.3300232928772833, "grad_norm": 0.8643607166681198, "learning_rate": 7.819518944803434e-06, "loss": 0.4871, "step": 10768 }, { "epoch": 0.33005394140002453, "grad_norm": 1.6360211878873925, "learning_rate": 7.819109051545955e-06, "loss": 0.6846, "step": 10769 }, { "epoch": 0.33008458992276574, "grad_norm": 1.7852295633144057, "learning_rate": 7.81869913051107e-06, "loss": 0.709, "step": 10770 }, { "epoch": 0.33011523844550694, "grad_norm": 1.6520308184507677, "learning_rate": 7.818289181702822e-06, "loss": 0.7007, "step": 10771 }, { "epoch": 0.33014588696824815, "grad_norm": 1.5797642367331632, "learning_rate": 7.81787920512525e-06, "loss": 0.6487, "step": 10772 }, { "epoch": 0.33017653549098935, "grad_norm": 0.8113216843763471, "learning_rate": 7.817469200782394e-06, "loss": 0.4728, "step": 10773 }, { "epoch": 0.33020718401373056, "grad_norm": 1.6865345580801423, "learning_rate": 7.81705916867829e-06, "loss": 0.6413, "step": 10774 }, { "epoch": 0.33023783253647176, "grad_norm": 0.808754516348698, "learning_rate": 7.816649108816982e-06, "loss": 0.4706, "step": 10775 }, { "epoch": 0.33026848105921297, "grad_norm": 1.6055463629954465, "learning_rate": 7.816239021202512e-06, "loss": 0.6347, "step": 10776 }, { "epoch": 0.3302991295819542, "grad_norm": 1.715966109104157, "learning_rate": 7.815828905838917e-06, "loss": 0.6291, "step": 10777 }, { "epoch": 0.3303297781046953, "grad_norm": 1.716286517223071, "learning_rate": 7.81541876273024e-06, "loss": 0.7031, "step": 10778 }, { "epoch": 0.33036042662743653, "grad_norm": 1.5693031886135316, "learning_rate": 7.81500859188052e-06, "loss": 0.6389, "step": 10779 }, { "epoch": 0.33039107515017774, "grad_norm": 1.5874791386660778, "learning_rate": 7.814598393293802e-06, "loss": 0.6209, "step": 10780 }, { "epoch": 0.33042172367291894, "grad_norm": 1.6284243850911921, "learning_rate": 7.814188166974125e-06, "loss": 0.6826, "step": 10781 }, { "epoch": 0.33045237219566015, "grad_norm": 1.885963433688321, "learning_rate": 7.813777912925533e-06, "loss": 0.7462, "step": 10782 }, { "epoch": 0.33048302071840135, "grad_norm": 1.7024980451190463, "learning_rate": 7.813367631152066e-06, "loss": 0.7397, "step": 10783 }, { "epoch": 0.33051366924114256, "grad_norm": 1.5756339228981449, "learning_rate": 7.812957321657769e-06, "loss": 0.671, "step": 10784 }, { "epoch": 0.33054431776388377, "grad_norm": 1.8851558441244636, "learning_rate": 7.812546984446681e-06, "loss": 0.6759, "step": 10785 }, { "epoch": 0.33057496628662497, "grad_norm": 1.712961525410508, "learning_rate": 7.81213661952285e-06, "loss": 0.6804, "step": 10786 }, { "epoch": 0.3306056148093662, "grad_norm": 0.9153326041117261, "learning_rate": 7.811726226890317e-06, "loss": 0.4933, "step": 10787 }, { "epoch": 0.3306362633321074, "grad_norm": 1.7183073640772786, "learning_rate": 7.811315806553126e-06, "loss": 0.6454, "step": 10788 }, { "epoch": 0.3306669118548486, "grad_norm": 1.59693523245959, "learning_rate": 7.810905358515323e-06, "loss": 0.6522, "step": 10789 }, { "epoch": 0.3306975603775898, "grad_norm": 1.699518645285084, "learning_rate": 7.810494882780947e-06, "loss": 0.7173, "step": 10790 }, { "epoch": 0.330728208900331, "grad_norm": 0.8125736820008332, "learning_rate": 7.810084379354049e-06, "loss": 0.4702, "step": 10791 }, { "epoch": 0.3307588574230722, "grad_norm": 0.7831151977399571, "learning_rate": 7.809673848238668e-06, "loss": 0.4633, "step": 10792 }, { "epoch": 0.3307895059458134, "grad_norm": 1.682962049140233, "learning_rate": 7.809263289438855e-06, "loss": 0.6792, "step": 10793 }, { "epoch": 0.3308201544685546, "grad_norm": 1.6922495547614493, "learning_rate": 7.80885270295865e-06, "loss": 0.6344, "step": 10794 }, { "epoch": 0.3308508029912958, "grad_norm": 1.6312624685197288, "learning_rate": 7.8084420888021e-06, "loss": 0.7249, "step": 10795 }, { "epoch": 0.33088145151403703, "grad_norm": 1.686518865054059, "learning_rate": 7.80803144697325e-06, "loss": 0.7441, "step": 10796 }, { "epoch": 0.33091210003677823, "grad_norm": 1.658104747241581, "learning_rate": 7.807620777476151e-06, "loss": 0.7642, "step": 10797 }, { "epoch": 0.33094274855951944, "grad_norm": 1.6352774830788923, "learning_rate": 7.807210080314844e-06, "loss": 0.562, "step": 10798 }, { "epoch": 0.33097339708226065, "grad_norm": 1.7293217393541227, "learning_rate": 7.80679935549338e-06, "loss": 0.7062, "step": 10799 }, { "epoch": 0.33100404560500185, "grad_norm": 1.6385445267794327, "learning_rate": 7.806388603015802e-06, "loss": 0.7277, "step": 10800 }, { "epoch": 0.33103469412774306, "grad_norm": 1.6757883936039923, "learning_rate": 7.805977822886159e-06, "loss": 0.7112, "step": 10801 }, { "epoch": 0.33106534265048426, "grad_norm": 1.7543210254706905, "learning_rate": 7.8055670151085e-06, "loss": 0.6492, "step": 10802 }, { "epoch": 0.33109599117322547, "grad_norm": 1.5830046814399525, "learning_rate": 7.80515617968687e-06, "loss": 0.645, "step": 10803 }, { "epoch": 0.3311266396959667, "grad_norm": 1.9988879052433655, "learning_rate": 7.80474531662532e-06, "loss": 0.7889, "step": 10804 }, { "epoch": 0.3311572882187079, "grad_norm": 1.9128683500181312, "learning_rate": 7.804334425927896e-06, "loss": 0.6593, "step": 10805 }, { "epoch": 0.3311879367414491, "grad_norm": 0.956446397976111, "learning_rate": 7.803923507598645e-06, "loss": 0.4428, "step": 10806 }, { "epoch": 0.3312185852641903, "grad_norm": 1.8118649135603646, "learning_rate": 7.803512561641622e-06, "loss": 0.6863, "step": 10807 }, { "epoch": 0.3312492337869315, "grad_norm": 1.6355627831539894, "learning_rate": 7.803101588060871e-06, "loss": 0.6448, "step": 10808 }, { "epoch": 0.33127988230967265, "grad_norm": 1.8023790686285537, "learning_rate": 7.802690586860442e-06, "loss": 0.7922, "step": 10809 }, { "epoch": 0.33131053083241385, "grad_norm": 1.784482480543444, "learning_rate": 7.802279558044385e-06, "loss": 0.6793, "step": 10810 }, { "epoch": 0.33134117935515506, "grad_norm": 1.6393006363149374, "learning_rate": 7.801868501616752e-06, "loss": 0.684, "step": 10811 }, { "epoch": 0.33137182787789626, "grad_norm": 1.6315036683175432, "learning_rate": 7.801457417581592e-06, "loss": 0.7096, "step": 10812 }, { "epoch": 0.33140247640063747, "grad_norm": 0.8795008757986977, "learning_rate": 7.801046305942954e-06, "loss": 0.4693, "step": 10813 }, { "epoch": 0.3314331249233787, "grad_norm": 1.7337071924008811, "learning_rate": 7.80063516670489e-06, "loss": 0.6743, "step": 10814 }, { "epoch": 0.3314637734461199, "grad_norm": 0.9233340778419447, "learning_rate": 7.800223999871452e-06, "loss": 0.4711, "step": 10815 }, { "epoch": 0.3314944219688611, "grad_norm": 2.0603592006001548, "learning_rate": 7.799812805446691e-06, "loss": 0.7226, "step": 10816 }, { "epoch": 0.3315250704916023, "grad_norm": 1.7519422069130788, "learning_rate": 7.799401583434659e-06, "loss": 0.6115, "step": 10817 }, { "epoch": 0.3315557190143435, "grad_norm": 1.92849057974925, "learning_rate": 7.798990333839405e-06, "loss": 0.7957, "step": 10818 }, { "epoch": 0.3315863675370847, "grad_norm": 1.5777103048868117, "learning_rate": 7.798579056664984e-06, "loss": 0.7252, "step": 10819 }, { "epoch": 0.3316170160598259, "grad_norm": 1.6988768143496504, "learning_rate": 7.798167751915446e-06, "loss": 0.7166, "step": 10820 }, { "epoch": 0.3316476645825671, "grad_norm": 1.59121088740816, "learning_rate": 7.797756419594846e-06, "loss": 0.7605, "step": 10821 }, { "epoch": 0.3316783131053083, "grad_norm": 1.7844987119800422, "learning_rate": 7.797345059707236e-06, "loss": 0.6496, "step": 10822 }, { "epoch": 0.3317089616280495, "grad_norm": 1.6322233491500384, "learning_rate": 7.79693367225667e-06, "loss": 0.699, "step": 10823 }, { "epoch": 0.33173961015079073, "grad_norm": 1.8417436223300827, "learning_rate": 7.7965222572472e-06, "loss": 0.6848, "step": 10824 }, { "epoch": 0.33177025867353194, "grad_norm": 0.9644361149887974, "learning_rate": 7.796110814682882e-06, "loss": 0.4466, "step": 10825 }, { "epoch": 0.33180090719627314, "grad_norm": 1.8316885030502028, "learning_rate": 7.79569934456777e-06, "loss": 0.6804, "step": 10826 }, { "epoch": 0.33183155571901435, "grad_norm": 1.7288339323383461, "learning_rate": 7.795287846905912e-06, "loss": 0.6982, "step": 10827 }, { "epoch": 0.33186220424175555, "grad_norm": 1.5970136689294068, "learning_rate": 7.794876321701372e-06, "loss": 0.6901, "step": 10828 }, { "epoch": 0.33189285276449676, "grad_norm": 1.788471475920761, "learning_rate": 7.794464768958198e-06, "loss": 0.6594, "step": 10829 }, { "epoch": 0.33192350128723797, "grad_norm": 1.750918463066456, "learning_rate": 7.79405318868045e-06, "loss": 0.6557, "step": 10830 }, { "epoch": 0.33195414980997917, "grad_norm": 1.7465440395489507, "learning_rate": 7.79364158087218e-06, "loss": 0.7124, "step": 10831 }, { "epoch": 0.3319847983327204, "grad_norm": 0.8570623663366038, "learning_rate": 7.793229945537444e-06, "loss": 0.472, "step": 10832 }, { "epoch": 0.3320154468554616, "grad_norm": 2.1080828836292933, "learning_rate": 7.792818282680299e-06, "loss": 0.574, "step": 10833 }, { "epoch": 0.3320460953782028, "grad_norm": 1.8254800002871936, "learning_rate": 7.792406592304802e-06, "loss": 0.6627, "step": 10834 }, { "epoch": 0.332076743900944, "grad_norm": 1.8233336106058264, "learning_rate": 7.791994874415008e-06, "loss": 0.7338, "step": 10835 }, { "epoch": 0.3321073924236852, "grad_norm": 1.841148925222183, "learning_rate": 7.791583129014973e-06, "loss": 0.6568, "step": 10836 }, { "epoch": 0.3321380409464264, "grad_norm": 1.6854452201636336, "learning_rate": 7.791171356108755e-06, "loss": 0.6718, "step": 10837 }, { "epoch": 0.3321686894691676, "grad_norm": 1.4614002519379636, "learning_rate": 7.790759555700413e-06, "loss": 0.6545, "step": 10838 }, { "epoch": 0.3321993379919088, "grad_norm": 1.942330187865892, "learning_rate": 7.790347727794003e-06, "loss": 0.7535, "step": 10839 }, { "epoch": 0.33222998651464997, "grad_norm": 1.5337739191187747, "learning_rate": 7.789935872393582e-06, "loss": 0.5665, "step": 10840 }, { "epoch": 0.33226063503739117, "grad_norm": 1.6999077177758506, "learning_rate": 7.78952398950321e-06, "loss": 0.6837, "step": 10841 }, { "epoch": 0.3322912835601324, "grad_norm": 0.8535434968021792, "learning_rate": 7.789112079126942e-06, "loss": 0.4612, "step": 10842 }, { "epoch": 0.3323219320828736, "grad_norm": 0.8131861980717159, "learning_rate": 7.788700141268842e-06, "loss": 0.4835, "step": 10843 }, { "epoch": 0.3323525806056148, "grad_norm": 1.9684529723501423, "learning_rate": 7.788288175932965e-06, "loss": 0.7195, "step": 10844 }, { "epoch": 0.332383229128356, "grad_norm": 0.741449891897247, "learning_rate": 7.787876183123371e-06, "loss": 0.4636, "step": 10845 }, { "epoch": 0.3324138776510972, "grad_norm": 1.5805558017172556, "learning_rate": 7.787464162844118e-06, "loss": 0.5704, "step": 10846 }, { "epoch": 0.3324445261738384, "grad_norm": 1.68232440697366, "learning_rate": 7.78705211509927e-06, "loss": 0.7415, "step": 10847 }, { "epoch": 0.3324751746965796, "grad_norm": 1.8638737856616725, "learning_rate": 7.786640039892884e-06, "loss": 0.6572, "step": 10848 }, { "epoch": 0.3325058232193208, "grad_norm": 0.9828325243330144, "learning_rate": 7.78622793722902e-06, "loss": 0.4859, "step": 10849 }, { "epoch": 0.332536471742062, "grad_norm": 1.7111769884135397, "learning_rate": 7.78581580711174e-06, "loss": 0.729, "step": 10850 }, { "epoch": 0.33256712026480323, "grad_norm": 1.6448911771996848, "learning_rate": 7.785403649545103e-06, "loss": 0.6856, "step": 10851 }, { "epoch": 0.33259776878754443, "grad_norm": 0.8577214121564866, "learning_rate": 7.784991464533171e-06, "loss": 0.4708, "step": 10852 }, { "epoch": 0.33262841731028564, "grad_norm": 1.4558842225175084, "learning_rate": 7.784579252080006e-06, "loss": 0.6782, "step": 10853 }, { "epoch": 0.33265906583302685, "grad_norm": 1.6305610248224705, "learning_rate": 7.78416701218967e-06, "loss": 0.6385, "step": 10854 }, { "epoch": 0.33268971435576805, "grad_norm": 1.7913519527263135, "learning_rate": 7.783754744866223e-06, "loss": 0.6488, "step": 10855 }, { "epoch": 0.33272036287850926, "grad_norm": 0.8691700991713442, "learning_rate": 7.783342450113727e-06, "loss": 0.4646, "step": 10856 }, { "epoch": 0.33275101140125046, "grad_norm": 1.7977700092878854, "learning_rate": 7.782930127936248e-06, "loss": 0.7207, "step": 10857 }, { "epoch": 0.33278165992399167, "grad_norm": 1.7656617175734328, "learning_rate": 7.782517778337845e-06, "loss": 0.6233, "step": 10858 }, { "epoch": 0.3328123084467329, "grad_norm": 1.8388440369796986, "learning_rate": 7.782105401322584e-06, "loss": 0.6789, "step": 10859 }, { "epoch": 0.3328429569694741, "grad_norm": 1.753502241023095, "learning_rate": 7.781692996894526e-06, "loss": 0.6644, "step": 10860 }, { "epoch": 0.3328736054922153, "grad_norm": 1.9209499567283088, "learning_rate": 7.781280565057734e-06, "loss": 0.7775, "step": 10861 }, { "epoch": 0.3329042540149565, "grad_norm": 0.8176288738181923, "learning_rate": 7.780868105816275e-06, "loss": 0.4879, "step": 10862 }, { "epoch": 0.3329349025376977, "grad_norm": 1.6165594526194709, "learning_rate": 7.78045561917421e-06, "loss": 0.6726, "step": 10863 }, { "epoch": 0.3329655510604389, "grad_norm": 0.8281412352788333, "learning_rate": 7.780043105135604e-06, "loss": 0.4938, "step": 10864 }, { "epoch": 0.3329961995831801, "grad_norm": 0.8048500310349952, "learning_rate": 7.779630563704522e-06, "loss": 0.4636, "step": 10865 }, { "epoch": 0.3330268481059213, "grad_norm": 1.7313143524778636, "learning_rate": 7.779217994885028e-06, "loss": 0.783, "step": 10866 }, { "epoch": 0.3330574966286625, "grad_norm": 1.679506867404854, "learning_rate": 7.778805398681191e-06, "loss": 0.5732, "step": 10867 }, { "epoch": 0.3330881451514037, "grad_norm": 1.6964946316022418, "learning_rate": 7.77839277509707e-06, "loss": 0.7161, "step": 10868 }, { "epoch": 0.33311879367414493, "grad_norm": 1.884332891749786, "learning_rate": 7.777980124136735e-06, "loss": 0.7037, "step": 10869 }, { "epoch": 0.33314944219688614, "grad_norm": 0.8108372390640648, "learning_rate": 7.777567445804253e-06, "loss": 0.4647, "step": 10870 }, { "epoch": 0.3331800907196273, "grad_norm": 0.8158215868245611, "learning_rate": 7.777154740103687e-06, "loss": 0.4643, "step": 10871 }, { "epoch": 0.3332107392423685, "grad_norm": 1.758041546237296, "learning_rate": 7.776742007039104e-06, "loss": 0.6633, "step": 10872 }, { "epoch": 0.3332413877651097, "grad_norm": 1.9986467329319553, "learning_rate": 7.776329246614574e-06, "loss": 0.6959, "step": 10873 }, { "epoch": 0.3332720362878509, "grad_norm": 1.955197644556663, "learning_rate": 7.77591645883416e-06, "loss": 0.6209, "step": 10874 }, { "epoch": 0.3333026848105921, "grad_norm": 1.7975277666932727, "learning_rate": 7.77550364370193e-06, "loss": 0.7114, "step": 10875 }, { "epoch": 0.3333333333333333, "grad_norm": 1.7496133182326778, "learning_rate": 7.775090801221953e-06, "loss": 0.6735, "step": 10876 }, { "epoch": 0.3333639818560745, "grad_norm": 1.6937430724210358, "learning_rate": 7.774677931398295e-06, "loss": 0.6483, "step": 10877 }, { "epoch": 0.3333946303788157, "grad_norm": 1.7995485737282695, "learning_rate": 7.774265034235029e-06, "loss": 0.7144, "step": 10878 }, { "epoch": 0.33342527890155693, "grad_norm": 1.743875589825426, "learning_rate": 7.773852109736217e-06, "loss": 0.6702, "step": 10879 }, { "epoch": 0.33345592742429814, "grad_norm": 0.9862730298883094, "learning_rate": 7.773439157905931e-06, "loss": 0.4842, "step": 10880 }, { "epoch": 0.33348657594703934, "grad_norm": 1.8472121524513088, "learning_rate": 7.773026178748239e-06, "loss": 0.6491, "step": 10881 }, { "epoch": 0.33351722446978055, "grad_norm": 1.9177441814368281, "learning_rate": 7.77261317226721e-06, "loss": 0.6962, "step": 10882 }, { "epoch": 0.33354787299252175, "grad_norm": 0.8256344558178554, "learning_rate": 7.772200138466917e-06, "loss": 0.454, "step": 10883 }, { "epoch": 0.33357852151526296, "grad_norm": 1.8984619810217114, "learning_rate": 7.771787077351425e-06, "loss": 0.6896, "step": 10884 }, { "epoch": 0.33360917003800417, "grad_norm": 0.7635994430691071, "learning_rate": 7.771373988924806e-06, "loss": 0.4511, "step": 10885 }, { "epoch": 0.33363981856074537, "grad_norm": 1.7427057590119213, "learning_rate": 7.770960873191128e-06, "loss": 0.7347, "step": 10886 }, { "epoch": 0.3336704670834866, "grad_norm": 1.641029976037963, "learning_rate": 7.770547730154465e-06, "loss": 0.6176, "step": 10887 }, { "epoch": 0.3337011156062278, "grad_norm": 1.8932246219464808, "learning_rate": 7.770134559818888e-06, "loss": 0.7712, "step": 10888 }, { "epoch": 0.333731764128969, "grad_norm": 1.6038001587609358, "learning_rate": 7.769721362188465e-06, "loss": 0.6326, "step": 10889 }, { "epoch": 0.3337624126517102, "grad_norm": 1.6129382095340061, "learning_rate": 7.769308137267268e-06, "loss": 0.6593, "step": 10890 }, { "epoch": 0.3337930611744514, "grad_norm": 1.8901645133839133, "learning_rate": 7.76889488505937e-06, "loss": 0.6747, "step": 10891 }, { "epoch": 0.3338237096971926, "grad_norm": 1.6381058177535728, "learning_rate": 7.768481605568843e-06, "loss": 0.6331, "step": 10892 }, { "epoch": 0.3338543582199338, "grad_norm": 1.617825748526461, "learning_rate": 7.768068298799758e-06, "loss": 0.6516, "step": 10893 }, { "epoch": 0.333885006742675, "grad_norm": 0.8909340316837673, "learning_rate": 7.767654964756186e-06, "loss": 0.4473, "step": 10894 }, { "epoch": 0.3339156552654162, "grad_norm": 1.6163821197571941, "learning_rate": 7.767241603442204e-06, "loss": 0.6341, "step": 10895 }, { "epoch": 0.33394630378815743, "grad_norm": 1.8211165242108138, "learning_rate": 7.76682821486188e-06, "loss": 0.6398, "step": 10896 }, { "epoch": 0.33397695231089863, "grad_norm": 1.9646577075097886, "learning_rate": 7.766414799019294e-06, "loss": 0.7093, "step": 10897 }, { "epoch": 0.33400760083363984, "grad_norm": 1.532538246446913, "learning_rate": 7.76600135591851e-06, "loss": 0.698, "step": 10898 }, { "epoch": 0.33403824935638105, "grad_norm": 0.8387091273100931, "learning_rate": 7.765587885563609e-06, "loss": 0.4827, "step": 10899 }, { "epoch": 0.33406889787912225, "grad_norm": 1.6480811658474148, "learning_rate": 7.765174387958663e-06, "loss": 0.662, "step": 10900 }, { "epoch": 0.33409954640186346, "grad_norm": 1.6863044321302127, "learning_rate": 7.764760863107748e-06, "loss": 0.6489, "step": 10901 }, { "epoch": 0.3341301949246046, "grad_norm": 1.5865839516727875, "learning_rate": 7.764347311014935e-06, "loss": 0.6727, "step": 10902 }, { "epoch": 0.3341608434473458, "grad_norm": 1.613243353535291, "learning_rate": 7.7639337316843e-06, "loss": 0.5715, "step": 10903 }, { "epoch": 0.334191491970087, "grad_norm": 1.9902832533596833, "learning_rate": 7.763520125119918e-06, "loss": 0.6792, "step": 10904 }, { "epoch": 0.3342221404928282, "grad_norm": 1.7884607936709833, "learning_rate": 7.763106491325869e-06, "loss": 0.773, "step": 10905 }, { "epoch": 0.33425278901556943, "grad_norm": 1.5753543269847812, "learning_rate": 7.762692830306223e-06, "loss": 0.7473, "step": 10906 }, { "epoch": 0.33428343753831063, "grad_norm": 1.8267753418119774, "learning_rate": 7.762279142065055e-06, "loss": 0.7408, "step": 10907 }, { "epoch": 0.33431408606105184, "grad_norm": 1.6546753768534148, "learning_rate": 7.761865426606447e-06, "loss": 0.6431, "step": 10908 }, { "epoch": 0.33434473458379305, "grad_norm": 1.5440676523601446, "learning_rate": 7.76145168393447e-06, "loss": 0.6329, "step": 10909 }, { "epoch": 0.33437538310653425, "grad_norm": 0.8975185983374674, "learning_rate": 7.761037914053205e-06, "loss": 0.4568, "step": 10910 }, { "epoch": 0.33440603162927546, "grad_norm": 1.8688083913080407, "learning_rate": 7.760624116966726e-06, "loss": 0.6881, "step": 10911 }, { "epoch": 0.33443668015201666, "grad_norm": 1.9881751201118874, "learning_rate": 7.760210292679114e-06, "loss": 0.6645, "step": 10912 }, { "epoch": 0.33446732867475787, "grad_norm": 1.86707109063973, "learning_rate": 7.75979644119444e-06, "loss": 0.6989, "step": 10913 }, { "epoch": 0.3344979771974991, "grad_norm": 1.883440896141748, "learning_rate": 7.759382562516786e-06, "loss": 0.7667, "step": 10914 }, { "epoch": 0.3345286257202403, "grad_norm": 1.8713099063478615, "learning_rate": 7.758968656650231e-06, "loss": 0.6718, "step": 10915 }, { "epoch": 0.3345592742429815, "grad_norm": 1.6278434288028791, "learning_rate": 7.758554723598852e-06, "loss": 0.5853, "step": 10916 }, { "epoch": 0.3345899227657227, "grad_norm": 2.031608194662077, "learning_rate": 7.758140763366726e-06, "loss": 0.7006, "step": 10917 }, { "epoch": 0.3346205712884639, "grad_norm": 1.9105289306741662, "learning_rate": 7.757726775957935e-06, "loss": 0.7698, "step": 10918 }, { "epoch": 0.3346512198112051, "grad_norm": 1.7347654207658414, "learning_rate": 7.757312761376555e-06, "loss": 0.6247, "step": 10919 }, { "epoch": 0.3346818683339463, "grad_norm": 1.439609946130256, "learning_rate": 7.75689871962667e-06, "loss": 0.575, "step": 10920 }, { "epoch": 0.3347125168566875, "grad_norm": 1.7153123151556833, "learning_rate": 7.756484650712352e-06, "loss": 0.7062, "step": 10921 }, { "epoch": 0.3347431653794287, "grad_norm": 1.6409904349741433, "learning_rate": 7.756070554637689e-06, "loss": 0.7328, "step": 10922 }, { "epoch": 0.3347738139021699, "grad_norm": 1.6960533717797601, "learning_rate": 7.755656431406756e-06, "loss": 0.6824, "step": 10923 }, { "epoch": 0.33480446242491113, "grad_norm": 1.7686178013397997, "learning_rate": 7.755242281023634e-06, "loss": 0.6542, "step": 10924 }, { "epoch": 0.33483511094765234, "grad_norm": 1.0275306786889833, "learning_rate": 7.754828103492407e-06, "loss": 0.4736, "step": 10925 }, { "epoch": 0.33486575947039354, "grad_norm": 1.8209572075398885, "learning_rate": 7.754413898817152e-06, "loss": 0.5273, "step": 10926 }, { "epoch": 0.33489640799313475, "grad_norm": 1.7258951413880346, "learning_rate": 7.753999667001952e-06, "loss": 0.7046, "step": 10927 }, { "epoch": 0.33492705651587595, "grad_norm": 1.6446343947868578, "learning_rate": 7.753585408050892e-06, "loss": 0.7477, "step": 10928 }, { "epoch": 0.33495770503861716, "grad_norm": 1.6535760841368063, "learning_rate": 7.753171121968048e-06, "loss": 0.7178, "step": 10929 }, { "epoch": 0.33498835356135837, "grad_norm": 2.038544310504032, "learning_rate": 7.752756808757505e-06, "loss": 0.7822, "step": 10930 }, { "epoch": 0.33501900208409957, "grad_norm": 1.5092899192400135, "learning_rate": 7.752342468423343e-06, "loss": 0.649, "step": 10931 }, { "epoch": 0.3350496506068408, "grad_norm": 1.8183383688151733, "learning_rate": 7.75192810096965e-06, "loss": 0.6608, "step": 10932 }, { "epoch": 0.3350802991295819, "grad_norm": 1.648421354758214, "learning_rate": 7.751513706400502e-06, "loss": 0.6328, "step": 10933 }, { "epoch": 0.33511094765232313, "grad_norm": 2.1344300965463643, "learning_rate": 7.751099284719988e-06, "loss": 0.6868, "step": 10934 }, { "epoch": 0.33514159617506434, "grad_norm": 1.6058824363352486, "learning_rate": 7.750684835932185e-06, "loss": 0.6341, "step": 10935 }, { "epoch": 0.33517224469780554, "grad_norm": 1.7727002492922619, "learning_rate": 7.750270360041185e-06, "loss": 0.6122, "step": 10936 }, { "epoch": 0.33520289322054675, "grad_norm": 1.5565980449874548, "learning_rate": 7.749855857051065e-06, "loss": 0.6822, "step": 10937 }, { "epoch": 0.33523354174328795, "grad_norm": 1.7435065688251399, "learning_rate": 7.749441326965912e-06, "loss": 0.6678, "step": 10938 }, { "epoch": 0.33526419026602916, "grad_norm": 0.9746180590864405, "learning_rate": 7.749026769789811e-06, "loss": 0.4686, "step": 10939 }, { "epoch": 0.33529483878877037, "grad_norm": 1.738111524688391, "learning_rate": 7.748612185526845e-06, "loss": 0.6342, "step": 10940 }, { "epoch": 0.33532548731151157, "grad_norm": 1.7061369092020893, "learning_rate": 7.7481975741811e-06, "loss": 0.6703, "step": 10941 }, { "epoch": 0.3353561358342528, "grad_norm": 1.7767395608885637, "learning_rate": 7.747782935756662e-06, "loss": 0.6463, "step": 10942 }, { "epoch": 0.335386784356994, "grad_norm": 1.834001896040395, "learning_rate": 7.747368270257616e-06, "loss": 0.6662, "step": 10943 }, { "epoch": 0.3354174328797352, "grad_norm": 1.6157993080567832, "learning_rate": 7.746953577688046e-06, "loss": 0.6762, "step": 10944 }, { "epoch": 0.3354480814024764, "grad_norm": 1.8065752316810495, "learning_rate": 7.74653885805204e-06, "loss": 0.7468, "step": 10945 }, { "epoch": 0.3354787299252176, "grad_norm": 1.8486408983216676, "learning_rate": 7.746124111353683e-06, "loss": 0.6726, "step": 10946 }, { "epoch": 0.3355093784479588, "grad_norm": 0.8792692287390222, "learning_rate": 7.745709337597062e-06, "loss": 0.4491, "step": 10947 }, { "epoch": 0.3355400269707, "grad_norm": 1.4785387420788447, "learning_rate": 7.745294536786268e-06, "loss": 0.6781, "step": 10948 }, { "epoch": 0.3355706754934412, "grad_norm": 1.7066405729497383, "learning_rate": 7.74487970892538e-06, "loss": 0.7141, "step": 10949 }, { "epoch": 0.3356013240161824, "grad_norm": 1.5735801991715028, "learning_rate": 7.744464854018494e-06, "loss": 0.6414, "step": 10950 }, { "epoch": 0.33563197253892363, "grad_norm": 1.6826023115001074, "learning_rate": 7.74404997206969e-06, "loss": 0.6736, "step": 10951 }, { "epoch": 0.33566262106166483, "grad_norm": 1.5121879546526935, "learning_rate": 7.743635063083062e-06, "loss": 0.584, "step": 10952 }, { "epoch": 0.33569326958440604, "grad_norm": 1.9379336915370693, "learning_rate": 7.743220127062696e-06, "loss": 0.683, "step": 10953 }, { "epoch": 0.33572391810714725, "grad_norm": 1.7752305375086408, "learning_rate": 7.742805164012679e-06, "loss": 0.7634, "step": 10954 }, { "epoch": 0.33575456662988845, "grad_norm": 1.6279976280114699, "learning_rate": 7.742390173937103e-06, "loss": 0.6578, "step": 10955 }, { "epoch": 0.33578521515262966, "grad_norm": 1.763769711973038, "learning_rate": 7.74197515684005e-06, "loss": 0.6102, "step": 10956 }, { "epoch": 0.33581586367537086, "grad_norm": 1.6800109219999089, "learning_rate": 7.741560112725619e-06, "loss": 0.6716, "step": 10957 }, { "epoch": 0.33584651219811207, "grad_norm": 1.6487920433720602, "learning_rate": 7.741145041597892e-06, "loss": 0.7033, "step": 10958 }, { "epoch": 0.3358771607208533, "grad_norm": 1.7390370368239552, "learning_rate": 7.740729943460965e-06, "loss": 0.706, "step": 10959 }, { "epoch": 0.3359078092435945, "grad_norm": 2.0117618085062023, "learning_rate": 7.740314818318921e-06, "loss": 0.7748, "step": 10960 }, { "epoch": 0.3359384577663357, "grad_norm": 1.7657654198384296, "learning_rate": 7.739899666175856e-06, "loss": 0.6817, "step": 10961 }, { "epoch": 0.3359691062890769, "grad_norm": 1.6461232251672306, "learning_rate": 7.739484487035858e-06, "loss": 0.6296, "step": 10962 }, { "epoch": 0.3359997548118181, "grad_norm": 0.8670168386986385, "learning_rate": 7.739069280903017e-06, "loss": 0.4692, "step": 10963 }, { "epoch": 0.33603040333455925, "grad_norm": 1.5652553567808123, "learning_rate": 7.738654047781427e-06, "loss": 0.7223, "step": 10964 }, { "epoch": 0.33606105185730045, "grad_norm": 1.6685533745784744, "learning_rate": 7.738238787675178e-06, "loss": 0.5735, "step": 10965 }, { "epoch": 0.33609170038004166, "grad_norm": 1.8919097876098556, "learning_rate": 7.737823500588361e-06, "loss": 0.7244, "step": 10966 }, { "epoch": 0.33612234890278286, "grad_norm": 1.9867346149636458, "learning_rate": 7.73740818652507e-06, "loss": 0.7326, "step": 10967 }, { "epoch": 0.33615299742552407, "grad_norm": 1.848982969049535, "learning_rate": 7.736992845489394e-06, "loss": 0.7339, "step": 10968 }, { "epoch": 0.3361836459482653, "grad_norm": 0.7601828968784949, "learning_rate": 7.736577477485427e-06, "loss": 0.4571, "step": 10969 }, { "epoch": 0.3362142944710065, "grad_norm": 1.8555430144888612, "learning_rate": 7.736162082517265e-06, "loss": 0.7981, "step": 10970 }, { "epoch": 0.3362449429937477, "grad_norm": 1.734137444995043, "learning_rate": 7.735746660588993e-06, "loss": 0.6915, "step": 10971 }, { "epoch": 0.3362755915164889, "grad_norm": 1.671056154142007, "learning_rate": 7.735331211704713e-06, "loss": 0.6309, "step": 10972 }, { "epoch": 0.3363062400392301, "grad_norm": 1.7918010590463551, "learning_rate": 7.734915735868513e-06, "loss": 0.8007, "step": 10973 }, { "epoch": 0.3363368885619713, "grad_norm": 1.5798167877631646, "learning_rate": 7.73450023308449e-06, "loss": 0.6422, "step": 10974 }, { "epoch": 0.3363675370847125, "grad_norm": 1.766204381619496, "learning_rate": 7.734084703356736e-06, "loss": 0.6644, "step": 10975 }, { "epoch": 0.3363981856074537, "grad_norm": 1.776023593383118, "learning_rate": 7.733669146689344e-06, "loss": 0.7516, "step": 10976 }, { "epoch": 0.3364288341301949, "grad_norm": 1.617170423163549, "learning_rate": 7.733253563086413e-06, "loss": 0.6748, "step": 10977 }, { "epoch": 0.3364594826529361, "grad_norm": 1.715685553849979, "learning_rate": 7.732837952552035e-06, "loss": 0.679, "step": 10978 }, { "epoch": 0.33649013117567733, "grad_norm": 1.7793926658049914, "learning_rate": 7.732422315090304e-06, "loss": 0.7143, "step": 10979 }, { "epoch": 0.33652077969841854, "grad_norm": 1.714160793327565, "learning_rate": 7.732006650705318e-06, "loss": 0.7242, "step": 10980 }, { "epoch": 0.33655142822115974, "grad_norm": 0.8899186812368616, "learning_rate": 7.73159095940117e-06, "loss": 0.4964, "step": 10981 }, { "epoch": 0.33658207674390095, "grad_norm": 1.5567876412543642, "learning_rate": 7.731175241181959e-06, "loss": 0.6966, "step": 10982 }, { "epoch": 0.33661272526664215, "grad_norm": 1.5840807080131103, "learning_rate": 7.730759496051778e-06, "loss": 0.7478, "step": 10983 }, { "epoch": 0.33664337378938336, "grad_norm": 1.9355213742033692, "learning_rate": 7.730343724014726e-06, "loss": 0.6362, "step": 10984 }, { "epoch": 0.33667402231212457, "grad_norm": 1.9182465684387333, "learning_rate": 7.729927925074898e-06, "loss": 0.7318, "step": 10985 }, { "epoch": 0.33670467083486577, "grad_norm": 1.7416826988311738, "learning_rate": 7.729512099236394e-06, "loss": 0.689, "step": 10986 }, { "epoch": 0.336735319357607, "grad_norm": 1.707331863493097, "learning_rate": 7.729096246503307e-06, "loss": 0.7489, "step": 10987 }, { "epoch": 0.3367659678803482, "grad_norm": 1.6930498281366002, "learning_rate": 7.728680366879736e-06, "loss": 0.719, "step": 10988 }, { "epoch": 0.3367966164030894, "grad_norm": 1.7209197523118147, "learning_rate": 7.728264460369781e-06, "loss": 0.6594, "step": 10989 }, { "epoch": 0.3368272649258306, "grad_norm": 2.0440295573543406, "learning_rate": 7.727848526977535e-06, "loss": 0.6255, "step": 10990 }, { "epoch": 0.3368579134485718, "grad_norm": 1.752732314902263, "learning_rate": 7.727432566707103e-06, "loss": 0.7174, "step": 10991 }, { "epoch": 0.336888561971313, "grad_norm": 1.3650484019262699, "learning_rate": 7.727016579562578e-06, "loss": 0.539, "step": 10992 }, { "epoch": 0.3369192104940542, "grad_norm": 1.8058078397056763, "learning_rate": 7.726600565548061e-06, "loss": 0.6943, "step": 10993 }, { "epoch": 0.3369498590167954, "grad_norm": 1.893373512565859, "learning_rate": 7.726184524667653e-06, "loss": 0.7434, "step": 10994 }, { "epoch": 0.33698050753953657, "grad_norm": 1.8502221360264792, "learning_rate": 7.72576845692545e-06, "loss": 0.6991, "step": 10995 }, { "epoch": 0.3370111560622778, "grad_norm": 1.7951504494381816, "learning_rate": 7.72535236232555e-06, "loss": 0.6792, "step": 10996 }, { "epoch": 0.337041804585019, "grad_norm": 1.7625909636560468, "learning_rate": 7.72493624087206e-06, "loss": 0.7337, "step": 10997 }, { "epoch": 0.3370724531077602, "grad_norm": 1.9076571190756888, "learning_rate": 7.724520092569075e-06, "loss": 0.7096, "step": 10998 }, { "epoch": 0.3371031016305014, "grad_norm": 1.8354928484395332, "learning_rate": 7.724103917420695e-06, "loss": 0.7619, "step": 10999 }, { "epoch": 0.3371337501532426, "grad_norm": 0.9016623310230905, "learning_rate": 7.723687715431024e-06, "loss": 0.452, "step": 11000 }, { "epoch": 0.3371643986759838, "grad_norm": 2.094862923254399, "learning_rate": 7.723271486604162e-06, "loss": 0.7422, "step": 11001 }, { "epoch": 0.337195047198725, "grad_norm": 1.8805310876820327, "learning_rate": 7.722855230944206e-06, "loss": 0.755, "step": 11002 }, { "epoch": 0.3372256957214662, "grad_norm": 1.8464353112020808, "learning_rate": 7.722438948455263e-06, "loss": 0.7344, "step": 11003 }, { "epoch": 0.3372563442442074, "grad_norm": 1.5727105098338905, "learning_rate": 7.722022639141431e-06, "loss": 0.6856, "step": 11004 }, { "epoch": 0.3372869927669486, "grad_norm": 1.7740467376000142, "learning_rate": 7.721606303006815e-06, "loss": 0.6482, "step": 11005 }, { "epoch": 0.33731764128968983, "grad_norm": 1.6555577179192742, "learning_rate": 7.721189940055513e-06, "loss": 0.6846, "step": 11006 }, { "epoch": 0.33734828981243103, "grad_norm": 1.7321202396945274, "learning_rate": 7.720773550291634e-06, "loss": 0.6565, "step": 11007 }, { "epoch": 0.33737893833517224, "grad_norm": 1.6858563011936964, "learning_rate": 7.720357133719274e-06, "loss": 0.6837, "step": 11008 }, { "epoch": 0.33740958685791345, "grad_norm": 1.6961520672515618, "learning_rate": 7.719940690342543e-06, "loss": 0.6667, "step": 11009 }, { "epoch": 0.33744023538065465, "grad_norm": 1.8767931797993807, "learning_rate": 7.719524220165537e-06, "loss": 0.7602, "step": 11010 }, { "epoch": 0.33747088390339586, "grad_norm": 1.7421473813224697, "learning_rate": 7.719107723192363e-06, "loss": 0.7749, "step": 11011 }, { "epoch": 0.33750153242613706, "grad_norm": 2.277282347050048, "learning_rate": 7.718691199427126e-06, "loss": 0.6758, "step": 11012 }, { "epoch": 0.33753218094887827, "grad_norm": 1.804225301160795, "learning_rate": 7.718274648873929e-06, "loss": 0.6163, "step": 11013 }, { "epoch": 0.3375628294716195, "grad_norm": 1.7696558247882639, "learning_rate": 7.717858071536877e-06, "loss": 0.7507, "step": 11014 }, { "epoch": 0.3375934779943607, "grad_norm": 1.5456393913638553, "learning_rate": 7.717441467420072e-06, "loss": 0.682, "step": 11015 }, { "epoch": 0.3376241265171019, "grad_norm": 1.7806658696012567, "learning_rate": 7.717024836527623e-06, "loss": 0.7236, "step": 11016 }, { "epoch": 0.3376547750398431, "grad_norm": 1.5196701690756782, "learning_rate": 7.716608178863631e-06, "loss": 0.6333, "step": 11017 }, { "epoch": 0.3376854235625843, "grad_norm": 1.607312554583663, "learning_rate": 7.716191494432206e-06, "loss": 0.5797, "step": 11018 }, { "epoch": 0.3377160720853255, "grad_norm": 1.977496474792238, "learning_rate": 7.71577478323745e-06, "loss": 0.7383, "step": 11019 }, { "epoch": 0.3377467206080667, "grad_norm": 1.5567240691371758, "learning_rate": 7.71535804528347e-06, "loss": 0.6786, "step": 11020 }, { "epoch": 0.3377773691308079, "grad_norm": 1.7721908112358167, "learning_rate": 7.714941280574373e-06, "loss": 0.7433, "step": 11021 }, { "epoch": 0.3378080176535491, "grad_norm": 0.9559427934977114, "learning_rate": 7.714524489114264e-06, "loss": 0.4894, "step": 11022 }, { "epoch": 0.3378386661762903, "grad_norm": 1.4805626210932694, "learning_rate": 7.714107670907252e-06, "loss": 0.6864, "step": 11023 }, { "epoch": 0.33786931469903153, "grad_norm": 1.5858642824032005, "learning_rate": 7.713690825957442e-06, "loss": 0.6393, "step": 11024 }, { "epoch": 0.33789996322177274, "grad_norm": 1.718311598447449, "learning_rate": 7.713273954268942e-06, "loss": 0.7706, "step": 11025 }, { "epoch": 0.3379306117445139, "grad_norm": 1.6441479001930965, "learning_rate": 7.712857055845859e-06, "loss": 0.6018, "step": 11026 }, { "epoch": 0.3379612602672551, "grad_norm": 1.7361805158640664, "learning_rate": 7.712440130692302e-06, "loss": 0.6404, "step": 11027 }, { "epoch": 0.3379919087899963, "grad_norm": 1.7877804322531965, "learning_rate": 7.712023178812378e-06, "loss": 0.7774, "step": 11028 }, { "epoch": 0.3380225573127375, "grad_norm": 1.6376563594853826, "learning_rate": 7.711606200210195e-06, "loss": 0.7215, "step": 11029 }, { "epoch": 0.3380532058354787, "grad_norm": 1.6988899259490702, "learning_rate": 7.711189194889864e-06, "loss": 0.7661, "step": 11030 }, { "epoch": 0.3380838543582199, "grad_norm": 1.7644828683689306, "learning_rate": 7.710772162855492e-06, "loss": 0.7376, "step": 11031 }, { "epoch": 0.3381145028809611, "grad_norm": 0.8419935647386401, "learning_rate": 7.710355104111186e-06, "loss": 0.4692, "step": 11032 }, { "epoch": 0.3381451514037023, "grad_norm": 1.6132978869617975, "learning_rate": 7.70993801866106e-06, "loss": 0.6292, "step": 11033 }, { "epoch": 0.33817579992644353, "grad_norm": 1.6390930171757456, "learning_rate": 7.70952090650922e-06, "loss": 0.6712, "step": 11034 }, { "epoch": 0.33820644844918474, "grad_norm": 0.8154244943966991, "learning_rate": 7.709103767659779e-06, "loss": 0.4753, "step": 11035 }, { "epoch": 0.33823709697192594, "grad_norm": 1.6196467357886648, "learning_rate": 7.708686602116843e-06, "loss": 0.5876, "step": 11036 }, { "epoch": 0.33826774549466715, "grad_norm": 1.5381530156330816, "learning_rate": 7.708269409884528e-06, "loss": 0.6913, "step": 11037 }, { "epoch": 0.33829839401740835, "grad_norm": 1.510492703029193, "learning_rate": 7.707852190966937e-06, "loss": 0.6072, "step": 11038 }, { "epoch": 0.33832904254014956, "grad_norm": 1.5977341859570446, "learning_rate": 7.70743494536819e-06, "loss": 0.7592, "step": 11039 }, { "epoch": 0.33835969106289077, "grad_norm": 1.5546612995407705, "learning_rate": 7.707017673092391e-06, "loss": 0.6574, "step": 11040 }, { "epoch": 0.33839033958563197, "grad_norm": 1.566241967302383, "learning_rate": 7.706600374143655e-06, "loss": 0.7081, "step": 11041 }, { "epoch": 0.3384209881083732, "grad_norm": 1.654449709060952, "learning_rate": 7.706183048526095e-06, "loss": 0.6643, "step": 11042 }, { "epoch": 0.3384516366311144, "grad_norm": 1.49736175090248, "learning_rate": 7.70576569624382e-06, "loss": 0.6272, "step": 11043 }, { "epoch": 0.3384822851538556, "grad_norm": 1.5931519609235905, "learning_rate": 7.705348317300943e-06, "loss": 0.7134, "step": 11044 }, { "epoch": 0.3385129336765968, "grad_norm": 1.7503511559647202, "learning_rate": 7.704930911701575e-06, "loss": 0.7236, "step": 11045 }, { "epoch": 0.338543582199338, "grad_norm": 1.5410717910259126, "learning_rate": 7.704513479449831e-06, "loss": 0.6582, "step": 11046 }, { "epoch": 0.3385742307220792, "grad_norm": 1.7046430508239525, "learning_rate": 7.704096020549824e-06, "loss": 0.6094, "step": 11047 }, { "epoch": 0.3386048792448204, "grad_norm": 1.597214158918231, "learning_rate": 7.70367853500567e-06, "loss": 0.7152, "step": 11048 }, { "epoch": 0.3386355277675616, "grad_norm": 1.6317158580226483, "learning_rate": 7.703261022821476e-06, "loss": 0.6754, "step": 11049 }, { "epoch": 0.3386661762903028, "grad_norm": 1.9832781307753906, "learning_rate": 7.702843484001361e-06, "loss": 0.7315, "step": 11050 }, { "epoch": 0.33869682481304403, "grad_norm": 1.7960847315636919, "learning_rate": 7.70242591854944e-06, "loss": 0.6877, "step": 11051 }, { "epoch": 0.33872747333578523, "grad_norm": 1.798248574361676, "learning_rate": 7.70200832646982e-06, "loss": 0.7239, "step": 11052 }, { "epoch": 0.33875812185852644, "grad_norm": 1.652087430527781, "learning_rate": 7.701590707766624e-06, "loss": 0.6748, "step": 11053 }, { "epoch": 0.33878877038126765, "grad_norm": 1.7718670998059474, "learning_rate": 7.701173062443963e-06, "loss": 0.7451, "step": 11054 }, { "epoch": 0.33881941890400885, "grad_norm": 1.7769994121341401, "learning_rate": 7.700755390505952e-06, "loss": 0.7562, "step": 11055 }, { "epoch": 0.33885006742675006, "grad_norm": 1.0413975618918845, "learning_rate": 7.700337691956708e-06, "loss": 0.4872, "step": 11056 }, { "epoch": 0.3388807159494912, "grad_norm": 1.7275965007142258, "learning_rate": 7.699919966800344e-06, "loss": 0.6473, "step": 11057 }, { "epoch": 0.3389113644722324, "grad_norm": 1.9173837288941435, "learning_rate": 7.69950221504098e-06, "loss": 0.7289, "step": 11058 }, { "epoch": 0.3389420129949736, "grad_norm": 1.5509788113437981, "learning_rate": 7.699084436682728e-06, "loss": 0.5286, "step": 11059 }, { "epoch": 0.3389726615177148, "grad_norm": 1.8717564832484836, "learning_rate": 7.698666631729708e-06, "loss": 0.645, "step": 11060 }, { "epoch": 0.33900331004045603, "grad_norm": 1.5589179375258824, "learning_rate": 7.698248800186035e-06, "loss": 0.6628, "step": 11061 }, { "epoch": 0.33903395856319724, "grad_norm": 1.6379186547883615, "learning_rate": 7.697830942055825e-06, "loss": 0.7039, "step": 11062 }, { "epoch": 0.33906460708593844, "grad_norm": 0.8980505944664496, "learning_rate": 7.697413057343198e-06, "loss": 0.4572, "step": 11063 }, { "epoch": 0.33909525560867965, "grad_norm": 1.7288046415661964, "learning_rate": 7.696995146052269e-06, "loss": 0.7546, "step": 11064 }, { "epoch": 0.33912590413142085, "grad_norm": 0.8287500649516086, "learning_rate": 7.696577208187157e-06, "loss": 0.4734, "step": 11065 }, { "epoch": 0.33915655265416206, "grad_norm": 1.8500103701340007, "learning_rate": 7.69615924375198e-06, "loss": 0.6916, "step": 11066 }, { "epoch": 0.33918720117690326, "grad_norm": 1.5453043697976385, "learning_rate": 7.695741252750857e-06, "loss": 0.5988, "step": 11067 }, { "epoch": 0.33921784969964447, "grad_norm": 1.6674979284513078, "learning_rate": 7.695323235187904e-06, "loss": 0.6456, "step": 11068 }, { "epoch": 0.3392484982223857, "grad_norm": 1.6911613605931692, "learning_rate": 7.694905191067241e-06, "loss": 0.7142, "step": 11069 }, { "epoch": 0.3392791467451269, "grad_norm": 1.8373536323731763, "learning_rate": 7.69448712039299e-06, "loss": 0.6763, "step": 11070 }, { "epoch": 0.3393097952678681, "grad_norm": 0.8682374886031297, "learning_rate": 7.694069023169267e-06, "loss": 0.4753, "step": 11071 }, { "epoch": 0.3393404437906093, "grad_norm": 1.723995562866244, "learning_rate": 7.69365089940019e-06, "loss": 0.7736, "step": 11072 }, { "epoch": 0.3393710923133505, "grad_norm": 1.7321326793716603, "learning_rate": 7.693232749089886e-06, "loss": 0.6855, "step": 11073 }, { "epoch": 0.3394017408360917, "grad_norm": 1.8587273848367198, "learning_rate": 7.69281457224247e-06, "loss": 0.7583, "step": 11074 }, { "epoch": 0.3394323893588329, "grad_norm": 1.6061001111812272, "learning_rate": 7.69239636886206e-06, "loss": 0.6873, "step": 11075 }, { "epoch": 0.3394630378815741, "grad_norm": 1.6870930768687569, "learning_rate": 7.691978138952782e-06, "loss": 0.7387, "step": 11076 }, { "epoch": 0.3394936864043153, "grad_norm": 1.5764344446205443, "learning_rate": 7.691559882518753e-06, "loss": 0.7067, "step": 11077 }, { "epoch": 0.3395243349270565, "grad_norm": 1.530060905716234, "learning_rate": 7.691141599564098e-06, "loss": 0.6764, "step": 11078 }, { "epoch": 0.33955498344979773, "grad_norm": 1.5447600505559165, "learning_rate": 7.690723290092933e-06, "loss": 0.695, "step": 11079 }, { "epoch": 0.33958563197253894, "grad_norm": 1.554631193495637, "learning_rate": 7.690304954109387e-06, "loss": 0.6585, "step": 11080 }, { "epoch": 0.33961628049528014, "grad_norm": 1.8486543574832632, "learning_rate": 7.689886591617574e-06, "loss": 0.7227, "step": 11081 }, { "epoch": 0.33964692901802135, "grad_norm": 1.5081010537793134, "learning_rate": 7.689468202621623e-06, "loss": 0.632, "step": 11082 }, { "epoch": 0.33967757754076255, "grad_norm": 1.5630538128417175, "learning_rate": 7.689049787125654e-06, "loss": 0.6473, "step": 11083 }, { "epoch": 0.33970822606350376, "grad_norm": 1.5638371908583333, "learning_rate": 7.688631345133787e-06, "loss": 0.6622, "step": 11084 }, { "epoch": 0.33973887458624497, "grad_norm": 1.7375581523178951, "learning_rate": 7.688212876650149e-06, "loss": 0.6923, "step": 11085 }, { "epoch": 0.33976952310898617, "grad_norm": 1.670385915483066, "learning_rate": 7.687794381678861e-06, "loss": 0.6044, "step": 11086 }, { "epoch": 0.3398001716317274, "grad_norm": 1.6806788261095233, "learning_rate": 7.687375860224047e-06, "loss": 0.6928, "step": 11087 }, { "epoch": 0.3398308201544685, "grad_norm": 1.6920839315588807, "learning_rate": 7.686957312289833e-06, "loss": 0.7236, "step": 11088 }, { "epoch": 0.33986146867720973, "grad_norm": 1.740509596828907, "learning_rate": 7.686538737880339e-06, "loss": 0.6714, "step": 11089 }, { "epoch": 0.33989211719995094, "grad_norm": 1.5824933987289274, "learning_rate": 7.686120136999692e-06, "loss": 0.6785, "step": 11090 }, { "epoch": 0.33992276572269214, "grad_norm": 1.6664154507297788, "learning_rate": 7.685701509652017e-06, "loss": 0.671, "step": 11091 }, { "epoch": 0.33995341424543335, "grad_norm": 1.74542695466802, "learning_rate": 7.685282855841438e-06, "loss": 0.771, "step": 11092 }, { "epoch": 0.33998406276817456, "grad_norm": 1.5899692702800852, "learning_rate": 7.684864175572078e-06, "loss": 0.6624, "step": 11093 }, { "epoch": 0.34001471129091576, "grad_norm": 1.6621410236092826, "learning_rate": 7.684445468848064e-06, "loss": 0.6856, "step": 11094 }, { "epoch": 0.34004535981365697, "grad_norm": 1.6006377463941728, "learning_rate": 7.684026735673525e-06, "loss": 0.5953, "step": 11095 }, { "epoch": 0.3400760083363982, "grad_norm": 1.6821620702049884, "learning_rate": 7.68360797605258e-06, "loss": 0.7056, "step": 11096 }, { "epoch": 0.3401066568591394, "grad_norm": 1.6027185380059208, "learning_rate": 7.683189189989364e-06, "loss": 0.6624, "step": 11097 }, { "epoch": 0.3401373053818806, "grad_norm": 1.9763245156848697, "learning_rate": 7.682770377487995e-06, "loss": 0.8134, "step": 11098 }, { "epoch": 0.3401679539046218, "grad_norm": 1.4652055005147422, "learning_rate": 7.682351538552603e-06, "loss": 0.6422, "step": 11099 }, { "epoch": 0.340198602427363, "grad_norm": 1.7646402224971085, "learning_rate": 7.681932673187315e-06, "loss": 0.7096, "step": 11100 }, { "epoch": 0.3402292509501042, "grad_norm": 1.5436284653648507, "learning_rate": 7.68151378139626e-06, "loss": 0.6933, "step": 11101 }, { "epoch": 0.3402598994728454, "grad_norm": 1.608012936590729, "learning_rate": 7.681094863183562e-06, "loss": 0.6762, "step": 11102 }, { "epoch": 0.3402905479955866, "grad_norm": 1.0338711693257274, "learning_rate": 7.68067591855335e-06, "loss": 0.501, "step": 11103 }, { "epoch": 0.3403211965183278, "grad_norm": 1.671225466903337, "learning_rate": 7.680256947509754e-06, "loss": 0.7255, "step": 11104 }, { "epoch": 0.340351845041069, "grad_norm": 1.8356457416550225, "learning_rate": 7.679837950056899e-06, "loss": 0.8258, "step": 11105 }, { "epoch": 0.34038249356381023, "grad_norm": 1.599606885480282, "learning_rate": 7.679418926198915e-06, "loss": 0.6606, "step": 11106 }, { "epoch": 0.34041314208655143, "grad_norm": 1.8983377904660461, "learning_rate": 7.678999875939931e-06, "loss": 0.7418, "step": 11107 }, { "epoch": 0.34044379060929264, "grad_norm": 1.725760680720646, "learning_rate": 7.678580799284077e-06, "loss": 0.6567, "step": 11108 }, { "epoch": 0.34047443913203385, "grad_norm": 1.52407638238097, "learning_rate": 7.67816169623548e-06, "loss": 0.6137, "step": 11109 }, { "epoch": 0.34050508765477505, "grad_norm": 0.8538324024888082, "learning_rate": 7.67774256679827e-06, "loss": 0.4676, "step": 11110 }, { "epoch": 0.34053573617751626, "grad_norm": 1.4905283417880677, "learning_rate": 7.677323410976577e-06, "loss": 0.6484, "step": 11111 }, { "epoch": 0.34056638470025746, "grad_norm": 1.722948494786767, "learning_rate": 7.676904228774533e-06, "loss": 0.649, "step": 11112 }, { "epoch": 0.34059703322299867, "grad_norm": 1.6941780734064091, "learning_rate": 7.676485020196266e-06, "loss": 0.79, "step": 11113 }, { "epoch": 0.3406276817457399, "grad_norm": 1.6920734799373895, "learning_rate": 7.676065785245909e-06, "loss": 0.7558, "step": 11114 }, { "epoch": 0.3406583302684811, "grad_norm": 2.0035044886509157, "learning_rate": 7.675646523927588e-06, "loss": 0.7107, "step": 11115 }, { "epoch": 0.3406889787912223, "grad_norm": 1.5349587768039148, "learning_rate": 7.67522723624544e-06, "loss": 0.6487, "step": 11116 }, { "epoch": 0.3407196273139635, "grad_norm": 1.6855081359376913, "learning_rate": 7.674807922203593e-06, "loss": 0.7259, "step": 11117 }, { "epoch": 0.3407502758367047, "grad_norm": 1.8363487549238153, "learning_rate": 7.674388581806179e-06, "loss": 0.6156, "step": 11118 }, { "epoch": 0.34078092435944585, "grad_norm": 1.7374937476784598, "learning_rate": 7.673969215057328e-06, "loss": 0.6018, "step": 11119 }, { "epoch": 0.34081157288218705, "grad_norm": 1.5774245689742366, "learning_rate": 7.673549821961176e-06, "loss": 0.6551, "step": 11120 }, { "epoch": 0.34084222140492826, "grad_norm": 1.8924494323856604, "learning_rate": 7.673130402521854e-06, "loss": 0.7299, "step": 11121 }, { "epoch": 0.34087286992766946, "grad_norm": 1.6960609696518707, "learning_rate": 7.672710956743491e-06, "loss": 0.6322, "step": 11122 }, { "epoch": 0.34090351845041067, "grad_norm": 0.8282379259645308, "learning_rate": 7.672291484630226e-06, "loss": 0.4875, "step": 11123 }, { "epoch": 0.3409341669731519, "grad_norm": 1.6872949410244151, "learning_rate": 7.67187198618619e-06, "loss": 0.6881, "step": 11124 }, { "epoch": 0.3409648154958931, "grad_norm": 1.8598970851796894, "learning_rate": 7.671452461415514e-06, "loss": 0.684, "step": 11125 }, { "epoch": 0.3409954640186343, "grad_norm": 1.628384062017471, "learning_rate": 7.671032910322333e-06, "loss": 0.6683, "step": 11126 }, { "epoch": 0.3410261125413755, "grad_norm": 1.892427540088753, "learning_rate": 7.670613332910784e-06, "loss": 0.9176, "step": 11127 }, { "epoch": 0.3410567610641167, "grad_norm": 1.6460629146465666, "learning_rate": 7.670193729184997e-06, "loss": 0.7361, "step": 11128 }, { "epoch": 0.3410874095868579, "grad_norm": 1.782371871643998, "learning_rate": 7.669774099149107e-06, "loss": 0.6957, "step": 11129 }, { "epoch": 0.3411180581095991, "grad_norm": 0.8004039608944472, "learning_rate": 7.66935444280725e-06, "loss": 0.4684, "step": 11130 }, { "epoch": 0.3411487066323403, "grad_norm": 1.590199108029428, "learning_rate": 7.66893476016356e-06, "loss": 0.6149, "step": 11131 }, { "epoch": 0.3411793551550815, "grad_norm": 1.5758789102892312, "learning_rate": 7.668515051222175e-06, "loss": 0.6003, "step": 11132 }, { "epoch": 0.3412100036778227, "grad_norm": 1.8270477586136262, "learning_rate": 7.668095315987227e-06, "loss": 0.7854, "step": 11133 }, { "epoch": 0.34124065220056393, "grad_norm": 1.9925345128258245, "learning_rate": 7.667675554462854e-06, "loss": 0.6697, "step": 11134 }, { "epoch": 0.34127130072330514, "grad_norm": 1.5154261549402217, "learning_rate": 7.66725576665319e-06, "loss": 0.6959, "step": 11135 }, { "epoch": 0.34130194924604634, "grad_norm": 1.434709907530207, "learning_rate": 7.666835952562375e-06, "loss": 0.5269, "step": 11136 }, { "epoch": 0.34133259776878755, "grad_norm": 1.8021842078742836, "learning_rate": 7.66641611219454e-06, "loss": 0.684, "step": 11137 }, { "epoch": 0.34136324629152875, "grad_norm": 1.7375953566362994, "learning_rate": 7.665996245553826e-06, "loss": 0.705, "step": 11138 }, { "epoch": 0.34139389481426996, "grad_norm": 0.8189114207890699, "learning_rate": 7.665576352644369e-06, "loss": 0.4685, "step": 11139 }, { "epoch": 0.34142454333701117, "grad_norm": 1.905248513244333, "learning_rate": 7.665156433470307e-06, "loss": 0.6783, "step": 11140 }, { "epoch": 0.34145519185975237, "grad_norm": 1.6398129635803431, "learning_rate": 7.664736488035776e-06, "loss": 0.6119, "step": 11141 }, { "epoch": 0.3414858403824936, "grad_norm": 0.7497994074318924, "learning_rate": 7.664316516344914e-06, "loss": 0.4447, "step": 11142 }, { "epoch": 0.3415164889052348, "grad_norm": 1.7095470237878994, "learning_rate": 7.66389651840186e-06, "loss": 0.6114, "step": 11143 }, { "epoch": 0.341547137427976, "grad_norm": 1.7394905523908921, "learning_rate": 7.663476494210753e-06, "loss": 0.6664, "step": 11144 }, { "epoch": 0.3415777859507172, "grad_norm": 1.6169133129591904, "learning_rate": 7.663056443775729e-06, "loss": 0.6253, "step": 11145 }, { "epoch": 0.3416084344734584, "grad_norm": 1.737893489206989, "learning_rate": 7.66263636710093e-06, "loss": 0.6025, "step": 11146 }, { "epoch": 0.3416390829961996, "grad_norm": 1.6143176854591519, "learning_rate": 7.662216264190494e-06, "loss": 0.5955, "step": 11147 }, { "epoch": 0.3416697315189408, "grad_norm": 1.7283209763272815, "learning_rate": 7.661796135048559e-06, "loss": 0.6872, "step": 11148 }, { "epoch": 0.341700380041682, "grad_norm": 0.8808422204733303, "learning_rate": 7.661375979679265e-06, "loss": 0.4599, "step": 11149 }, { "epoch": 0.34173102856442317, "grad_norm": 2.1564342971160775, "learning_rate": 7.660955798086754e-06, "loss": 0.7548, "step": 11150 }, { "epoch": 0.3417616770871644, "grad_norm": 1.7533229520146796, "learning_rate": 7.660535590275163e-06, "loss": 0.6626, "step": 11151 }, { "epoch": 0.3417923256099056, "grad_norm": 1.8564411526135365, "learning_rate": 7.660115356248635e-06, "loss": 0.6717, "step": 11152 }, { "epoch": 0.3418229741326468, "grad_norm": 1.7337131418935101, "learning_rate": 7.659695096011308e-06, "loss": 0.5979, "step": 11153 }, { "epoch": 0.341853622655388, "grad_norm": 1.699466562440792, "learning_rate": 7.659274809567327e-06, "loss": 0.7249, "step": 11154 }, { "epoch": 0.3418842711781292, "grad_norm": 1.6674604618390232, "learning_rate": 7.658854496920831e-06, "loss": 0.7498, "step": 11155 }, { "epoch": 0.3419149197008704, "grad_norm": 1.9260123423042603, "learning_rate": 7.658434158075961e-06, "loss": 0.6861, "step": 11156 }, { "epoch": 0.3419455682236116, "grad_norm": 1.9139127995756124, "learning_rate": 7.658013793036858e-06, "loss": 0.6719, "step": 11157 }, { "epoch": 0.3419762167463528, "grad_norm": 1.8335412600630638, "learning_rate": 7.657593401807667e-06, "loss": 0.6888, "step": 11158 }, { "epoch": 0.342006865269094, "grad_norm": 1.8273033218231878, "learning_rate": 7.657172984392526e-06, "loss": 0.7403, "step": 11159 }, { "epoch": 0.3420375137918352, "grad_norm": 1.7995316675730761, "learning_rate": 7.65675254079558e-06, "loss": 0.612, "step": 11160 }, { "epoch": 0.34206816231457643, "grad_norm": 1.598573798370139, "learning_rate": 7.656332071020972e-06, "loss": 0.6284, "step": 11161 }, { "epoch": 0.34209881083731764, "grad_norm": 1.5978264735980567, "learning_rate": 7.655911575072846e-06, "loss": 0.6111, "step": 11162 }, { "epoch": 0.34212945936005884, "grad_norm": 1.4822072548402252, "learning_rate": 7.655491052955341e-06, "loss": 0.6668, "step": 11163 }, { "epoch": 0.34216010788280005, "grad_norm": 1.7468905210890169, "learning_rate": 7.655070504672605e-06, "loss": 0.682, "step": 11164 }, { "epoch": 0.34219075640554125, "grad_norm": 1.6569722335595038, "learning_rate": 7.65464993022878e-06, "loss": 0.7527, "step": 11165 }, { "epoch": 0.34222140492828246, "grad_norm": 1.807344333497844, "learning_rate": 7.654229329628007e-06, "loss": 0.6496, "step": 11166 }, { "epoch": 0.34225205345102366, "grad_norm": 1.9747301723978958, "learning_rate": 7.653808702874436e-06, "loss": 0.7323, "step": 11167 }, { "epoch": 0.34228270197376487, "grad_norm": 1.773379697276879, "learning_rate": 7.65338804997221e-06, "loss": 0.7685, "step": 11168 }, { "epoch": 0.3423133504965061, "grad_norm": 1.8210076080901418, "learning_rate": 7.652967370925471e-06, "loss": 0.702, "step": 11169 }, { "epoch": 0.3423439990192473, "grad_norm": 1.4683197433738575, "learning_rate": 7.652546665738368e-06, "loss": 0.4292, "step": 11170 }, { "epoch": 0.3423746475419885, "grad_norm": 1.8952134088294055, "learning_rate": 7.65212593441504e-06, "loss": 0.7056, "step": 11171 }, { "epoch": 0.3424052960647297, "grad_norm": 1.0040228980702566, "learning_rate": 7.651705176959638e-06, "loss": 0.4651, "step": 11172 }, { "epoch": 0.3424359445874709, "grad_norm": 1.6484613293961434, "learning_rate": 7.651284393376307e-06, "loss": 0.6689, "step": 11173 }, { "epoch": 0.3424665931102121, "grad_norm": 1.5228097212206781, "learning_rate": 7.650863583669193e-06, "loss": 0.6213, "step": 11174 }, { "epoch": 0.3424972416329533, "grad_norm": 1.8371722880376224, "learning_rate": 7.650442747842442e-06, "loss": 0.6678, "step": 11175 }, { "epoch": 0.3425278901556945, "grad_norm": 1.776989673279219, "learning_rate": 7.6500218859002e-06, "loss": 0.6701, "step": 11176 }, { "epoch": 0.3425585386784357, "grad_norm": 1.7983793136630981, "learning_rate": 7.649600997846613e-06, "loss": 0.7721, "step": 11177 }, { "epoch": 0.3425891872011769, "grad_norm": 1.4905412188652498, "learning_rate": 7.649180083685832e-06, "loss": 0.6411, "step": 11178 }, { "epoch": 0.34261983572391813, "grad_norm": 0.8019831205829266, "learning_rate": 7.648759143422e-06, "loss": 0.4711, "step": 11179 }, { "epoch": 0.34265048424665934, "grad_norm": 1.7691016546878624, "learning_rate": 7.648338177059266e-06, "loss": 0.7194, "step": 11180 }, { "epoch": 0.3426811327694005, "grad_norm": 1.6008229547585124, "learning_rate": 7.64791718460178e-06, "loss": 0.7655, "step": 11181 }, { "epoch": 0.3427117812921417, "grad_norm": 1.546291089908468, "learning_rate": 7.647496166053687e-06, "loss": 0.612, "step": 11182 }, { "epoch": 0.3427424298148829, "grad_norm": 1.7163742146014553, "learning_rate": 7.647075121419139e-06, "loss": 0.7335, "step": 11183 }, { "epoch": 0.3427730783376241, "grad_norm": 1.8229332708710881, "learning_rate": 7.646654050702281e-06, "loss": 0.7054, "step": 11184 }, { "epoch": 0.3428037268603653, "grad_norm": 1.7027643515907858, "learning_rate": 7.646232953907262e-06, "loss": 0.7089, "step": 11185 }, { "epoch": 0.3428343753831065, "grad_norm": 1.7551863913431895, "learning_rate": 7.645811831038235e-06, "loss": 0.6691, "step": 11186 }, { "epoch": 0.3428650239058477, "grad_norm": 1.694541940879516, "learning_rate": 7.645390682099346e-06, "loss": 0.6801, "step": 11187 }, { "epoch": 0.3428956724285889, "grad_norm": 1.6926002675900795, "learning_rate": 7.644969507094747e-06, "loss": 0.6958, "step": 11188 }, { "epoch": 0.34292632095133013, "grad_norm": 1.8557410168367854, "learning_rate": 7.644548306028585e-06, "loss": 0.681, "step": 11189 }, { "epoch": 0.34295696947407134, "grad_norm": 0.7854484374378099, "learning_rate": 7.644127078905013e-06, "loss": 0.4397, "step": 11190 }, { "epoch": 0.34298761799681254, "grad_norm": 1.8471081543125603, "learning_rate": 7.643705825728178e-06, "loss": 0.7279, "step": 11191 }, { "epoch": 0.34301826651955375, "grad_norm": 1.6726387022343958, "learning_rate": 7.643284546502237e-06, "loss": 0.6214, "step": 11192 }, { "epoch": 0.34304891504229496, "grad_norm": 1.6242554948239316, "learning_rate": 7.642863241231332e-06, "loss": 0.6234, "step": 11193 }, { "epoch": 0.34307956356503616, "grad_norm": 1.955176576039062, "learning_rate": 7.642441909919625e-06, "loss": 0.6317, "step": 11194 }, { "epoch": 0.34311021208777737, "grad_norm": 1.8559282869189095, "learning_rate": 7.64202055257126e-06, "loss": 0.6734, "step": 11195 }, { "epoch": 0.34314086061051857, "grad_norm": 1.892615602237088, "learning_rate": 7.641599169190388e-06, "loss": 0.7137, "step": 11196 }, { "epoch": 0.3431715091332598, "grad_norm": 0.8137100182167522, "learning_rate": 7.641177759781167e-06, "loss": 0.4348, "step": 11197 }, { "epoch": 0.343202157656001, "grad_norm": 1.6777014166548136, "learning_rate": 7.640756324347743e-06, "loss": 0.7013, "step": 11198 }, { "epoch": 0.3432328061787422, "grad_norm": 1.853475959792435, "learning_rate": 7.640334862894273e-06, "loss": 0.757, "step": 11199 }, { "epoch": 0.3432634547014834, "grad_norm": 2.0863734513945786, "learning_rate": 7.639913375424906e-06, "loss": 0.7054, "step": 11200 }, { "epoch": 0.3432941032242246, "grad_norm": 1.6753178776678632, "learning_rate": 7.639491861943799e-06, "loss": 0.6689, "step": 11201 }, { "epoch": 0.3433247517469658, "grad_norm": 1.7152942656015127, "learning_rate": 7.639070322455101e-06, "loss": 0.6891, "step": 11202 }, { "epoch": 0.343355400269707, "grad_norm": 1.7818434549590572, "learning_rate": 7.63864875696297e-06, "loss": 0.6769, "step": 11203 }, { "epoch": 0.3433860487924482, "grad_norm": 1.7546462928746582, "learning_rate": 7.638227165471559e-06, "loss": 0.723, "step": 11204 }, { "epoch": 0.3434166973151894, "grad_norm": 0.8717464488434745, "learning_rate": 7.637805547985018e-06, "loss": 0.4758, "step": 11205 }, { "epoch": 0.34344734583793063, "grad_norm": 1.4833496373635715, "learning_rate": 7.637383904507505e-06, "loss": 0.5926, "step": 11206 }, { "epoch": 0.34347799436067183, "grad_norm": 0.797045017604631, "learning_rate": 7.636962235043173e-06, "loss": 0.4696, "step": 11207 }, { "epoch": 0.34350864288341304, "grad_norm": 1.857985237604881, "learning_rate": 7.636540539596178e-06, "loss": 0.6714, "step": 11208 }, { "epoch": 0.34353929140615425, "grad_norm": 1.5841141560217116, "learning_rate": 7.636118818170675e-06, "loss": 0.6824, "step": 11209 }, { "epoch": 0.34356993992889545, "grad_norm": 1.6176099495622307, "learning_rate": 7.635697070770818e-06, "loss": 0.6766, "step": 11210 }, { "epoch": 0.34360058845163666, "grad_norm": 1.6448016242088623, "learning_rate": 7.635275297400764e-06, "loss": 0.6744, "step": 11211 }, { "epoch": 0.3436312369743778, "grad_norm": 1.6902634042274738, "learning_rate": 7.634853498064667e-06, "loss": 0.7661, "step": 11212 }, { "epoch": 0.343661885497119, "grad_norm": 1.6834797742059024, "learning_rate": 7.634431672766685e-06, "loss": 0.6845, "step": 11213 }, { "epoch": 0.3436925340198602, "grad_norm": 1.8851959462957972, "learning_rate": 7.634009821510974e-06, "loss": 0.6887, "step": 11214 }, { "epoch": 0.3437231825426014, "grad_norm": 1.7849879838255127, "learning_rate": 7.63358794430169e-06, "loss": 0.6843, "step": 11215 }, { "epoch": 0.34375383106534263, "grad_norm": 1.851904353015375, "learning_rate": 7.63316604114299e-06, "loss": 0.7076, "step": 11216 }, { "epoch": 0.34378447958808384, "grad_norm": 1.6890471282252018, "learning_rate": 7.632744112039033e-06, "loss": 0.6055, "step": 11217 }, { "epoch": 0.34381512811082504, "grad_norm": 2.109381384365075, "learning_rate": 7.632322156993972e-06, "loss": 0.7203, "step": 11218 }, { "epoch": 0.34384577663356625, "grad_norm": 1.7833693075449022, "learning_rate": 7.631900176011968e-06, "loss": 0.7151, "step": 11219 }, { "epoch": 0.34387642515630745, "grad_norm": 1.8639258778861556, "learning_rate": 7.63147816909718e-06, "loss": 0.7455, "step": 11220 }, { "epoch": 0.34390707367904866, "grad_norm": 1.863077722559882, "learning_rate": 7.631056136253762e-06, "loss": 0.7175, "step": 11221 }, { "epoch": 0.34393772220178986, "grad_norm": 1.7024533893231002, "learning_rate": 7.630634077485875e-06, "loss": 0.7051, "step": 11222 }, { "epoch": 0.34396837072453107, "grad_norm": 1.884778892812447, "learning_rate": 7.630211992797679e-06, "loss": 0.7566, "step": 11223 }, { "epoch": 0.3439990192472723, "grad_norm": 1.6861992280117808, "learning_rate": 7.62978988219333e-06, "loss": 0.7735, "step": 11224 }, { "epoch": 0.3440296677700135, "grad_norm": 2.2047777344939403, "learning_rate": 7.629367745676989e-06, "loss": 0.6976, "step": 11225 }, { "epoch": 0.3440603162927547, "grad_norm": 1.5952898433624545, "learning_rate": 7.628945583252814e-06, "loss": 0.6195, "step": 11226 }, { "epoch": 0.3440909648154959, "grad_norm": 2.157536043530882, "learning_rate": 7.628523394924966e-06, "loss": 0.7573, "step": 11227 }, { "epoch": 0.3441216133382371, "grad_norm": 1.6783331023368468, "learning_rate": 7.628101180697606e-06, "loss": 0.7682, "step": 11228 }, { "epoch": 0.3441522618609783, "grad_norm": 1.792987256920334, "learning_rate": 7.62767894057489e-06, "loss": 0.6466, "step": 11229 }, { "epoch": 0.3441829103837195, "grad_norm": 1.5183810069158008, "learning_rate": 7.627256674560983e-06, "loss": 0.6676, "step": 11230 }, { "epoch": 0.3442135589064607, "grad_norm": 1.6668413249270138, "learning_rate": 7.626834382660042e-06, "loss": 0.6386, "step": 11231 }, { "epoch": 0.3442442074292019, "grad_norm": 1.5794200626085382, "learning_rate": 7.626412064876231e-06, "loss": 0.6218, "step": 11232 }, { "epoch": 0.3442748559519431, "grad_norm": 1.685682985355828, "learning_rate": 7.625989721213709e-06, "loss": 0.6115, "step": 11233 }, { "epoch": 0.34430550447468433, "grad_norm": 1.7093370493129183, "learning_rate": 7.62556735167664e-06, "loss": 0.7303, "step": 11234 }, { "epoch": 0.34433615299742554, "grad_norm": 1.7483745161871416, "learning_rate": 7.625144956269183e-06, "loss": 0.6729, "step": 11235 }, { "epoch": 0.34436680152016674, "grad_norm": 1.0998739469092653, "learning_rate": 7.6247225349955016e-06, "loss": 0.4938, "step": 11236 }, { "epoch": 0.34439745004290795, "grad_norm": 1.9063462749931108, "learning_rate": 7.624300087859757e-06, "loss": 0.7332, "step": 11237 }, { "epoch": 0.34442809856564915, "grad_norm": 1.9124901915485282, "learning_rate": 7.623877614866111e-06, "loss": 0.7072, "step": 11238 }, { "epoch": 0.34445874708839036, "grad_norm": 1.7183967165174303, "learning_rate": 7.62345511601873e-06, "loss": 0.7157, "step": 11239 }, { "epoch": 0.34448939561113157, "grad_norm": 1.7064226926043515, "learning_rate": 7.623032591321773e-06, "loss": 0.7003, "step": 11240 }, { "epoch": 0.34452004413387277, "grad_norm": 1.8949753152296882, "learning_rate": 7.6226100407794055e-06, "loss": 0.7257, "step": 11241 }, { "epoch": 0.344550692656614, "grad_norm": 1.8313304030939062, "learning_rate": 7.62218746439579e-06, "loss": 0.7283, "step": 11242 }, { "epoch": 0.3445813411793552, "grad_norm": 1.882303900026205, "learning_rate": 7.62176486217509e-06, "loss": 0.6752, "step": 11243 }, { "epoch": 0.34461198970209633, "grad_norm": 1.5001993595952627, "learning_rate": 7.6213422341214695e-06, "loss": 0.6246, "step": 11244 }, { "epoch": 0.34464263822483754, "grad_norm": 1.905830614887603, "learning_rate": 7.620919580239094e-06, "loss": 0.6558, "step": 11245 }, { "epoch": 0.34467328674757874, "grad_norm": 1.756450552361871, "learning_rate": 7.6204969005321284e-06, "loss": 0.7107, "step": 11246 }, { "epoch": 0.34470393527031995, "grad_norm": 1.8921951120361817, "learning_rate": 7.620074195004734e-06, "loss": 0.7012, "step": 11247 }, { "epoch": 0.34473458379306116, "grad_norm": 1.9980415688320834, "learning_rate": 7.619651463661081e-06, "loss": 0.7348, "step": 11248 }, { "epoch": 0.34476523231580236, "grad_norm": 1.6521019128764212, "learning_rate": 7.619228706505329e-06, "loss": 0.6353, "step": 11249 }, { "epoch": 0.34479588083854357, "grad_norm": 1.044535156259894, "learning_rate": 7.618805923541648e-06, "loss": 0.4802, "step": 11250 }, { "epoch": 0.3448265293612848, "grad_norm": 1.7732960057078044, "learning_rate": 7.618383114774203e-06, "loss": 0.6974, "step": 11251 }, { "epoch": 0.344857177884026, "grad_norm": 2.152178696757823, "learning_rate": 7.617960280207158e-06, "loss": 0.7371, "step": 11252 }, { "epoch": 0.3448878264067672, "grad_norm": 1.713323735496478, "learning_rate": 7.6175374198446805e-06, "loss": 0.7237, "step": 11253 }, { "epoch": 0.3449184749295084, "grad_norm": 1.8138684603139514, "learning_rate": 7.617114533690937e-06, "loss": 0.6629, "step": 11254 }, { "epoch": 0.3449491234522496, "grad_norm": 0.7956160379006225, "learning_rate": 7.6166916217500945e-06, "loss": 0.4737, "step": 11255 }, { "epoch": 0.3449797719749908, "grad_norm": 1.5755568114158751, "learning_rate": 7.6162686840263204e-06, "loss": 0.6593, "step": 11256 }, { "epoch": 0.345010420497732, "grad_norm": 1.8232631073238903, "learning_rate": 7.6158457205237804e-06, "loss": 0.7371, "step": 11257 }, { "epoch": 0.3450410690204732, "grad_norm": 2.12309400290454, "learning_rate": 7.6154227312466445e-06, "loss": 0.8057, "step": 11258 }, { "epoch": 0.3450717175432144, "grad_norm": 1.577034581724824, "learning_rate": 7.61499971619908e-06, "loss": 0.5747, "step": 11259 }, { "epoch": 0.3451023660659556, "grad_norm": 1.8194257987018498, "learning_rate": 7.614576675385253e-06, "loss": 0.6705, "step": 11260 }, { "epoch": 0.34513301458869683, "grad_norm": 1.8599574023644543, "learning_rate": 7.614153608809332e-06, "loss": 0.7115, "step": 11261 }, { "epoch": 0.34516366311143803, "grad_norm": 2.066718560896306, "learning_rate": 7.613730516475487e-06, "loss": 0.6131, "step": 11262 }, { "epoch": 0.34519431163417924, "grad_norm": 1.5205604708822802, "learning_rate": 7.613307398387888e-06, "loss": 0.6316, "step": 11263 }, { "epoch": 0.34522496015692045, "grad_norm": 1.5075248868912772, "learning_rate": 7.612884254550701e-06, "loss": 0.6393, "step": 11264 }, { "epoch": 0.34525560867966165, "grad_norm": 1.8311976453338075, "learning_rate": 7.612461084968099e-06, "loss": 0.7615, "step": 11265 }, { "epoch": 0.34528625720240286, "grad_norm": 1.6117682541855822, "learning_rate": 7.612037889644247e-06, "loss": 0.608, "step": 11266 }, { "epoch": 0.34531690572514406, "grad_norm": 1.9654201621930845, "learning_rate": 7.611614668583321e-06, "loss": 0.7594, "step": 11267 }, { "epoch": 0.34534755424788527, "grad_norm": 0.9045653857456901, "learning_rate": 7.611191421789483e-06, "loss": 0.4444, "step": 11268 }, { "epoch": 0.3453782027706265, "grad_norm": 1.6256503660097037, "learning_rate": 7.61076814926691e-06, "loss": 0.6075, "step": 11269 }, { "epoch": 0.3454088512933677, "grad_norm": 1.7453132159625118, "learning_rate": 7.61034485101977e-06, "loss": 0.643, "step": 11270 }, { "epoch": 0.3454394998161089, "grad_norm": 1.5729414038003164, "learning_rate": 7.6099215270522344e-06, "loss": 0.6323, "step": 11271 }, { "epoch": 0.3454701483388501, "grad_norm": 1.9849732941102844, "learning_rate": 7.609498177368475e-06, "loss": 0.7375, "step": 11272 }, { "epoch": 0.3455007968615913, "grad_norm": 1.876005718854759, "learning_rate": 7.6090748019726624e-06, "loss": 0.7173, "step": 11273 }, { "epoch": 0.3455314453843325, "grad_norm": 1.6928737032949492, "learning_rate": 7.608651400868967e-06, "loss": 0.7757, "step": 11274 }, { "epoch": 0.34556209390707365, "grad_norm": 1.666829040332369, "learning_rate": 7.608227974061562e-06, "loss": 0.6846, "step": 11275 }, { "epoch": 0.34559274242981486, "grad_norm": 0.877546836704234, "learning_rate": 7.60780452155462e-06, "loss": 0.4852, "step": 11276 }, { "epoch": 0.34562339095255606, "grad_norm": 1.8036546394874573, "learning_rate": 7.6073810433523125e-06, "loss": 0.6865, "step": 11277 }, { "epoch": 0.34565403947529727, "grad_norm": 1.6821052037647786, "learning_rate": 7.606957539458813e-06, "loss": 0.5368, "step": 11278 }, { "epoch": 0.3456846879980385, "grad_norm": 1.5960842021382757, "learning_rate": 7.606534009878293e-06, "loss": 0.5922, "step": 11279 }, { "epoch": 0.3457153365207797, "grad_norm": 0.7694587975139161, "learning_rate": 7.606110454614928e-06, "loss": 0.4436, "step": 11280 }, { "epoch": 0.3457459850435209, "grad_norm": 1.6832305021057121, "learning_rate": 7.605686873672887e-06, "loss": 0.6981, "step": 11281 }, { "epoch": 0.3457766335662621, "grad_norm": 0.8174607608322992, "learning_rate": 7.605263267056349e-06, "loss": 0.4644, "step": 11282 }, { "epoch": 0.3458072820890033, "grad_norm": 0.7670142181193865, "learning_rate": 7.604839634769485e-06, "loss": 0.4543, "step": 11283 }, { "epoch": 0.3458379306117445, "grad_norm": 1.5473757212118047, "learning_rate": 7.60441597681647e-06, "loss": 0.6685, "step": 11284 }, { "epoch": 0.3458685791344857, "grad_norm": 1.7696675505723434, "learning_rate": 7.603992293201476e-06, "loss": 0.6728, "step": 11285 }, { "epoch": 0.3458992276572269, "grad_norm": 1.6086878721310487, "learning_rate": 7.603568583928682e-06, "loss": 0.5733, "step": 11286 }, { "epoch": 0.3459298761799681, "grad_norm": 1.8672245509423466, "learning_rate": 7.6031448490022595e-06, "loss": 0.7585, "step": 11287 }, { "epoch": 0.3459605247027093, "grad_norm": 1.648820107308981, "learning_rate": 7.602721088426385e-06, "loss": 0.7067, "step": 11288 }, { "epoch": 0.34599117322545053, "grad_norm": 1.5089885450207945, "learning_rate": 7.602297302205234e-06, "loss": 0.5514, "step": 11289 }, { "epoch": 0.34602182174819174, "grad_norm": 2.1050255951856127, "learning_rate": 7.601873490342982e-06, "loss": 0.6695, "step": 11290 }, { "epoch": 0.34605247027093294, "grad_norm": 0.7933629657164045, "learning_rate": 7.601449652843804e-06, "loss": 0.443, "step": 11291 }, { "epoch": 0.34608311879367415, "grad_norm": 1.6583064187850665, "learning_rate": 7.601025789711877e-06, "loss": 0.6764, "step": 11292 }, { "epoch": 0.34611376731641535, "grad_norm": 1.904345410268112, "learning_rate": 7.6006019009513775e-06, "loss": 0.7692, "step": 11293 }, { "epoch": 0.34614441583915656, "grad_norm": 1.6414651817957748, "learning_rate": 7.600177986566483e-06, "loss": 0.6262, "step": 11294 }, { "epoch": 0.34617506436189777, "grad_norm": 1.7860072134196971, "learning_rate": 7.5997540465613686e-06, "loss": 0.6744, "step": 11295 }, { "epoch": 0.34620571288463897, "grad_norm": 1.7626659441422396, "learning_rate": 7.599330080940212e-06, "loss": 0.6612, "step": 11296 }, { "epoch": 0.3462363614073802, "grad_norm": 1.8965679374991975, "learning_rate": 7.598906089707192e-06, "loss": 0.7526, "step": 11297 }, { "epoch": 0.3462670099301214, "grad_norm": 1.548051017842533, "learning_rate": 7.598482072866485e-06, "loss": 0.6569, "step": 11298 }, { "epoch": 0.3462976584528626, "grad_norm": 1.7463886668026842, "learning_rate": 7.598058030422269e-06, "loss": 0.6775, "step": 11299 }, { "epoch": 0.3463283069756038, "grad_norm": 1.6774661980118757, "learning_rate": 7.597633962378722e-06, "loss": 0.6284, "step": 11300 }, { "epoch": 0.346358955498345, "grad_norm": 1.50166496626409, "learning_rate": 7.597209868740024e-06, "loss": 0.6366, "step": 11301 }, { "epoch": 0.3463896040210862, "grad_norm": 1.54510780418444, "learning_rate": 7.596785749510352e-06, "loss": 0.6962, "step": 11302 }, { "epoch": 0.3464202525438274, "grad_norm": 1.6537518291435982, "learning_rate": 7.596361604693886e-06, "loss": 0.7018, "step": 11303 }, { "epoch": 0.3464509010665686, "grad_norm": 1.688430262825934, "learning_rate": 7.595937434294804e-06, "loss": 0.7363, "step": 11304 }, { "epoch": 0.3464815495893098, "grad_norm": 1.7708634789065774, "learning_rate": 7.595513238317288e-06, "loss": 0.7017, "step": 11305 }, { "epoch": 0.346512198112051, "grad_norm": 1.5374665647281163, "learning_rate": 7.595089016765516e-06, "loss": 0.5786, "step": 11306 }, { "epoch": 0.3465428466347922, "grad_norm": 1.7234383248021359, "learning_rate": 7.5946647696436645e-06, "loss": 0.7208, "step": 11307 }, { "epoch": 0.3465734951575334, "grad_norm": 1.7429622886741951, "learning_rate": 7.59424049695592e-06, "loss": 0.6845, "step": 11308 }, { "epoch": 0.3466041436802746, "grad_norm": 1.6002843226132293, "learning_rate": 7.593816198706461e-06, "loss": 0.6684, "step": 11309 }, { "epoch": 0.3466347922030158, "grad_norm": 1.700779479344675, "learning_rate": 7.593391874899465e-06, "loss": 0.6875, "step": 11310 }, { "epoch": 0.346665440725757, "grad_norm": 0.8782847603848577, "learning_rate": 7.592967525539117e-06, "loss": 0.4769, "step": 11311 }, { "epoch": 0.3466960892484982, "grad_norm": 1.6096284418474365, "learning_rate": 7.592543150629595e-06, "loss": 0.6978, "step": 11312 }, { "epoch": 0.3467267377712394, "grad_norm": 1.6043734194571106, "learning_rate": 7.5921187501750836e-06, "loss": 0.7211, "step": 11313 }, { "epoch": 0.3467573862939806, "grad_norm": 0.8317817991956592, "learning_rate": 7.591694324179761e-06, "loss": 0.4615, "step": 11314 }, { "epoch": 0.3467880348167218, "grad_norm": 1.6676328006076309, "learning_rate": 7.591269872647813e-06, "loss": 0.6871, "step": 11315 }, { "epoch": 0.34681868333946303, "grad_norm": 1.776525733695272, "learning_rate": 7.590845395583418e-06, "loss": 0.7347, "step": 11316 }, { "epoch": 0.34684933186220424, "grad_norm": 1.6821913928312757, "learning_rate": 7.590420892990763e-06, "loss": 0.7344, "step": 11317 }, { "epoch": 0.34687998038494544, "grad_norm": 1.7913779734832382, "learning_rate": 7.589996364874026e-06, "loss": 0.6712, "step": 11318 }, { "epoch": 0.34691062890768665, "grad_norm": 1.7037952168288997, "learning_rate": 7.5895718112373925e-06, "loss": 0.7966, "step": 11319 }, { "epoch": 0.34694127743042785, "grad_norm": 1.812165742744796, "learning_rate": 7.589147232085046e-06, "loss": 0.7497, "step": 11320 }, { "epoch": 0.34697192595316906, "grad_norm": 1.5198608888965377, "learning_rate": 7.58872262742117e-06, "loss": 0.681, "step": 11321 }, { "epoch": 0.34700257447591026, "grad_norm": 0.9336950989015755, "learning_rate": 7.588297997249946e-06, "loss": 0.476, "step": 11322 }, { "epoch": 0.34703322299865147, "grad_norm": 0.8626131205819154, "learning_rate": 7.58787334157556e-06, "loss": 0.4555, "step": 11323 }, { "epoch": 0.3470638715213927, "grad_norm": 1.7328149193762499, "learning_rate": 7.587448660402197e-06, "loss": 0.7092, "step": 11324 }, { "epoch": 0.3470945200441339, "grad_norm": 0.7512390326092421, "learning_rate": 7.587023953734041e-06, "loss": 0.462, "step": 11325 }, { "epoch": 0.3471251685668751, "grad_norm": 1.6355780823749482, "learning_rate": 7.5865992215752725e-06, "loss": 0.569, "step": 11326 }, { "epoch": 0.3471558170896163, "grad_norm": 1.5631621789722578, "learning_rate": 7.5861744639300825e-06, "loss": 0.6101, "step": 11327 }, { "epoch": 0.3471864656123575, "grad_norm": 1.5387732612211418, "learning_rate": 7.585749680802654e-06, "loss": 0.6555, "step": 11328 }, { "epoch": 0.3472171141350987, "grad_norm": 1.7299661097512433, "learning_rate": 7.58532487219717e-06, "loss": 0.6766, "step": 11329 }, { "epoch": 0.3472477626578399, "grad_norm": 1.6909784092741988, "learning_rate": 7.584900038117822e-06, "loss": 0.6892, "step": 11330 }, { "epoch": 0.3472784111805811, "grad_norm": 0.9712284983998182, "learning_rate": 7.584475178568789e-06, "loss": 0.4914, "step": 11331 }, { "epoch": 0.3473090597033223, "grad_norm": 1.681175567438718, "learning_rate": 7.584050293554264e-06, "loss": 0.7217, "step": 11332 }, { "epoch": 0.3473397082260635, "grad_norm": 1.6204231557044884, "learning_rate": 7.583625383078428e-06, "loss": 0.717, "step": 11333 }, { "epoch": 0.34737035674880473, "grad_norm": 1.719187443707247, "learning_rate": 7.583200447145471e-06, "loss": 0.6409, "step": 11334 }, { "epoch": 0.34740100527154594, "grad_norm": 1.8925486699723455, "learning_rate": 7.582775485759579e-06, "loss": 0.6426, "step": 11335 }, { "epoch": 0.34743165379428714, "grad_norm": 1.5593743585916835, "learning_rate": 7.58235049892494e-06, "loss": 0.5896, "step": 11336 }, { "epoch": 0.3474623023170283, "grad_norm": 1.7304122869633263, "learning_rate": 7.58192548664574e-06, "loss": 0.7402, "step": 11337 }, { "epoch": 0.3474929508397695, "grad_norm": 1.6406855593099157, "learning_rate": 7.581500448926168e-06, "loss": 0.803, "step": 11338 }, { "epoch": 0.3475235993625107, "grad_norm": 1.876963135767928, "learning_rate": 7.58107538577041e-06, "loss": 0.6226, "step": 11339 }, { "epoch": 0.3475542478852519, "grad_norm": 1.819181921275411, "learning_rate": 7.580650297182658e-06, "loss": 0.709, "step": 11340 }, { "epoch": 0.3475848964079931, "grad_norm": 1.6399305421750654, "learning_rate": 7.580225183167098e-06, "loss": 0.6378, "step": 11341 }, { "epoch": 0.3476155449307343, "grad_norm": 1.7131472511515196, "learning_rate": 7.579800043727917e-06, "loss": 0.6466, "step": 11342 }, { "epoch": 0.3476461934534755, "grad_norm": 1.9076531032986057, "learning_rate": 7.579374878869308e-06, "loss": 0.6924, "step": 11343 }, { "epoch": 0.34767684197621673, "grad_norm": 1.7706258062932363, "learning_rate": 7.5789496885954585e-06, "loss": 0.7933, "step": 11344 }, { "epoch": 0.34770749049895794, "grad_norm": 0.8305982415033486, "learning_rate": 7.578524472910558e-06, "loss": 0.4598, "step": 11345 }, { "epoch": 0.34773813902169914, "grad_norm": 1.7127295728150926, "learning_rate": 7.578099231818794e-06, "loss": 0.7329, "step": 11346 }, { "epoch": 0.34776878754444035, "grad_norm": 0.8059600445020093, "learning_rate": 7.57767396532436e-06, "loss": 0.4695, "step": 11347 }, { "epoch": 0.34779943606718156, "grad_norm": 1.4929293758085826, "learning_rate": 7.5772486734314455e-06, "loss": 0.7042, "step": 11348 }, { "epoch": 0.34783008458992276, "grad_norm": 1.6740662787599263, "learning_rate": 7.576823356144241e-06, "loss": 0.758, "step": 11349 }, { "epoch": 0.34786073311266397, "grad_norm": 0.7674531737114149, "learning_rate": 7.576398013466935e-06, "loss": 0.4499, "step": 11350 }, { "epoch": 0.3478913816354052, "grad_norm": 1.8448648003779704, "learning_rate": 7.575972645403722e-06, "loss": 0.7956, "step": 11351 }, { "epoch": 0.3479220301581464, "grad_norm": 1.8026510407003669, "learning_rate": 7.575547251958788e-06, "loss": 0.6786, "step": 11352 }, { "epoch": 0.3479526786808876, "grad_norm": 0.8071193675165715, "learning_rate": 7.57512183313633e-06, "loss": 0.4622, "step": 11353 }, { "epoch": 0.3479833272036288, "grad_norm": 1.5110023585014465, "learning_rate": 7.5746963889405374e-06, "loss": 0.6773, "step": 11354 }, { "epoch": 0.34801397572637, "grad_norm": 1.5893585258487084, "learning_rate": 7.574270919375604e-06, "loss": 0.6292, "step": 11355 }, { "epoch": 0.3480446242491112, "grad_norm": 1.5620435310214071, "learning_rate": 7.57384542444572e-06, "loss": 0.5969, "step": 11356 }, { "epoch": 0.3480752727718524, "grad_norm": 1.5702541213791887, "learning_rate": 7.573419904155077e-06, "loss": 0.7586, "step": 11357 }, { "epoch": 0.3481059212945936, "grad_norm": 1.617637707893616, "learning_rate": 7.57299435850787e-06, "loss": 0.7321, "step": 11358 }, { "epoch": 0.3481365698173348, "grad_norm": 1.7334276112484386, "learning_rate": 7.572568787508292e-06, "loss": 0.6784, "step": 11359 }, { "epoch": 0.348167218340076, "grad_norm": 0.8189916458344001, "learning_rate": 7.572143191160535e-06, "loss": 0.4538, "step": 11360 }, { "epoch": 0.34819786686281723, "grad_norm": 1.7067942461695496, "learning_rate": 7.5717175694687925e-06, "loss": 0.6854, "step": 11361 }, { "epoch": 0.34822851538555843, "grad_norm": 1.7817941440046672, "learning_rate": 7.57129192243726e-06, "loss": 0.7481, "step": 11362 }, { "epoch": 0.34825916390829964, "grad_norm": 1.952093199764622, "learning_rate": 7.57086625007013e-06, "loss": 0.6866, "step": 11363 }, { "epoch": 0.34828981243104085, "grad_norm": 1.6523043189985689, "learning_rate": 7.570440552371596e-06, "loss": 0.6886, "step": 11364 }, { "epoch": 0.34832046095378205, "grad_norm": 2.0954209261534253, "learning_rate": 7.570014829345854e-06, "loss": 0.6914, "step": 11365 }, { "epoch": 0.34835110947652326, "grad_norm": 1.7224366911912885, "learning_rate": 7.5695890809971e-06, "loss": 0.7365, "step": 11366 }, { "epoch": 0.34838175799926446, "grad_norm": 1.953297165813685, "learning_rate": 7.569163307329526e-06, "loss": 0.835, "step": 11367 }, { "epoch": 0.3484124065220056, "grad_norm": 1.6569299456222328, "learning_rate": 7.568737508347327e-06, "loss": 0.6525, "step": 11368 }, { "epoch": 0.3484430550447468, "grad_norm": 0.7801975608285545, "learning_rate": 7.568311684054701e-06, "loss": 0.4647, "step": 11369 }, { "epoch": 0.348473703567488, "grad_norm": 1.6139541147858858, "learning_rate": 7.5678858344558436e-06, "loss": 0.6998, "step": 11370 }, { "epoch": 0.34850435209022923, "grad_norm": 1.6772555458553677, "learning_rate": 7.567459959554951e-06, "loss": 0.6343, "step": 11371 }, { "epoch": 0.34853500061297044, "grad_norm": 0.8108612134421567, "learning_rate": 7.567034059356216e-06, "loss": 0.4708, "step": 11372 }, { "epoch": 0.34856564913571164, "grad_norm": 1.5851887160321907, "learning_rate": 7.56660813386384e-06, "loss": 0.6612, "step": 11373 }, { "epoch": 0.34859629765845285, "grad_norm": 0.764884276410695, "learning_rate": 7.566182183082016e-06, "loss": 0.4659, "step": 11374 }, { "epoch": 0.34862694618119405, "grad_norm": 1.9274897753197784, "learning_rate": 7.565756207014942e-06, "loss": 0.7326, "step": 11375 }, { "epoch": 0.34865759470393526, "grad_norm": 1.8526177414441598, "learning_rate": 7.565330205666817e-06, "loss": 0.7295, "step": 11376 }, { "epoch": 0.34868824322667646, "grad_norm": 1.5971411234019859, "learning_rate": 7.564904179041837e-06, "loss": 0.6309, "step": 11377 }, { "epoch": 0.34871889174941767, "grad_norm": 1.65398958827608, "learning_rate": 7.564478127144199e-06, "loss": 0.7646, "step": 11378 }, { "epoch": 0.3487495402721589, "grad_norm": 1.699476157992531, "learning_rate": 7.564052049978102e-06, "loss": 0.685, "step": 11379 }, { "epoch": 0.3487801887949001, "grad_norm": 1.9520430042160621, "learning_rate": 7.563625947547743e-06, "loss": 0.7107, "step": 11380 }, { "epoch": 0.3488108373176413, "grad_norm": 1.6570412591437305, "learning_rate": 7.5631998198573245e-06, "loss": 0.7224, "step": 11381 }, { "epoch": 0.3488414858403825, "grad_norm": 1.778664397064262, "learning_rate": 7.5627736669110405e-06, "loss": 0.6358, "step": 11382 }, { "epoch": 0.3488721343631237, "grad_norm": 0.9508370782797205, "learning_rate": 7.562347488713091e-06, "loss": 0.4907, "step": 11383 }, { "epoch": 0.3489027828858649, "grad_norm": 1.72655728663921, "learning_rate": 7.561921285267677e-06, "loss": 0.703, "step": 11384 }, { "epoch": 0.3489334314086061, "grad_norm": 1.6484064547966484, "learning_rate": 7.561495056578998e-06, "loss": 0.6733, "step": 11385 }, { "epoch": 0.3489640799313473, "grad_norm": 0.79226911555872, "learning_rate": 7.561068802651253e-06, "loss": 0.4516, "step": 11386 }, { "epoch": 0.3489947284540885, "grad_norm": 1.7814202363865634, "learning_rate": 7.560642523488639e-06, "loss": 0.7362, "step": 11387 }, { "epoch": 0.3490253769768297, "grad_norm": 1.9781032028763168, "learning_rate": 7.560216219095362e-06, "loss": 0.7378, "step": 11388 }, { "epoch": 0.34905602549957093, "grad_norm": 1.7271543654755548, "learning_rate": 7.5597898894756176e-06, "loss": 0.6137, "step": 11389 }, { "epoch": 0.34908667402231214, "grad_norm": 1.5923681992312784, "learning_rate": 7.55936353463361e-06, "loss": 0.6335, "step": 11390 }, { "epoch": 0.34911732254505334, "grad_norm": 1.91320537691559, "learning_rate": 7.558937154573538e-06, "loss": 0.6678, "step": 11391 }, { "epoch": 0.34914797106779455, "grad_norm": 1.6051421444813134, "learning_rate": 7.558510749299604e-06, "loss": 0.6673, "step": 11392 }, { "epoch": 0.34917861959053575, "grad_norm": 1.621612804135462, "learning_rate": 7.5580843188160084e-06, "loss": 0.6748, "step": 11393 }, { "epoch": 0.34920926811327696, "grad_norm": 1.8131482251215973, "learning_rate": 7.557657863126954e-06, "loss": 0.7028, "step": 11394 }, { "epoch": 0.34923991663601817, "grad_norm": 1.532849125325236, "learning_rate": 7.557231382236641e-06, "loss": 0.6528, "step": 11395 }, { "epoch": 0.34927056515875937, "grad_norm": 1.6196021385144945, "learning_rate": 7.556804876149273e-06, "loss": 0.6869, "step": 11396 }, { "epoch": 0.3493012136815006, "grad_norm": 1.674040823677964, "learning_rate": 7.556378344869053e-06, "loss": 0.7547, "step": 11397 }, { "epoch": 0.3493318622042418, "grad_norm": 1.7135597172685708, "learning_rate": 7.5559517884001834e-06, "loss": 0.7592, "step": 11398 }, { "epoch": 0.34936251072698293, "grad_norm": 1.6147770766112153, "learning_rate": 7.555525206746868e-06, "loss": 0.6825, "step": 11399 }, { "epoch": 0.34939315924972414, "grad_norm": 1.6856738255084414, "learning_rate": 7.555098599913306e-06, "loss": 0.668, "step": 11400 }, { "epoch": 0.34942380777246534, "grad_norm": 1.8476950348277263, "learning_rate": 7.5546719679037066e-06, "loss": 0.7085, "step": 11401 }, { "epoch": 0.34945445629520655, "grad_norm": 1.839559623174642, "learning_rate": 7.554245310722268e-06, "loss": 0.7037, "step": 11402 }, { "epoch": 0.34948510481794776, "grad_norm": 1.7740451761458815, "learning_rate": 7.553818628373198e-06, "loss": 0.6977, "step": 11403 }, { "epoch": 0.34951575334068896, "grad_norm": 1.905024500459886, "learning_rate": 7.5533919208607e-06, "loss": 0.4674, "step": 11404 }, { "epoch": 0.34954640186343017, "grad_norm": 1.7030274509983612, "learning_rate": 7.552965188188979e-06, "loss": 0.7263, "step": 11405 }, { "epoch": 0.3495770503861714, "grad_norm": 1.8063727228924362, "learning_rate": 7.552538430362236e-06, "loss": 0.7185, "step": 11406 }, { "epoch": 0.3496076989089126, "grad_norm": 0.97020909694521, "learning_rate": 7.552111647384681e-06, "loss": 0.466, "step": 11407 }, { "epoch": 0.3496383474316538, "grad_norm": 1.792790014005606, "learning_rate": 7.551684839260516e-06, "loss": 0.6789, "step": 11408 }, { "epoch": 0.349668995954395, "grad_norm": 1.538442761982762, "learning_rate": 7.5512580059939475e-06, "loss": 0.6266, "step": 11409 }, { "epoch": 0.3496996444771362, "grad_norm": 1.6566214153835692, "learning_rate": 7.550831147589182e-06, "loss": 0.6281, "step": 11410 }, { "epoch": 0.3497302929998774, "grad_norm": 1.8118479822051448, "learning_rate": 7.550404264050423e-06, "loss": 0.6803, "step": 11411 }, { "epoch": 0.3497609415226186, "grad_norm": 0.8084450175254454, "learning_rate": 7.549977355381879e-06, "loss": 0.4632, "step": 11412 }, { "epoch": 0.3497915900453598, "grad_norm": 1.6813058246465835, "learning_rate": 7.549550421587757e-06, "loss": 0.6717, "step": 11413 }, { "epoch": 0.349822238568101, "grad_norm": 1.7998250749333973, "learning_rate": 7.549123462672261e-06, "loss": 0.6863, "step": 11414 }, { "epoch": 0.3498528870908422, "grad_norm": 1.5250261269720702, "learning_rate": 7.548696478639599e-06, "loss": 0.6141, "step": 11415 }, { "epoch": 0.34988353561358343, "grad_norm": 1.5986276715608276, "learning_rate": 7.548269469493978e-06, "loss": 0.6598, "step": 11416 }, { "epoch": 0.34991418413632464, "grad_norm": 1.6104724774710066, "learning_rate": 7.547842435239608e-06, "loss": 0.659, "step": 11417 }, { "epoch": 0.34994483265906584, "grad_norm": 1.7366489468379782, "learning_rate": 7.5474153758806935e-06, "loss": 0.7213, "step": 11418 }, { "epoch": 0.34997548118180705, "grad_norm": 1.7660592084258842, "learning_rate": 7.546988291421442e-06, "loss": 0.6351, "step": 11419 }, { "epoch": 0.35000612970454825, "grad_norm": 1.715201510279455, "learning_rate": 7.546561181866065e-06, "loss": 0.7636, "step": 11420 }, { "epoch": 0.35003677822728946, "grad_norm": 1.81460382087414, "learning_rate": 7.546134047218769e-06, "loss": 0.5843, "step": 11421 }, { "epoch": 0.35006742675003066, "grad_norm": 0.9496076679442147, "learning_rate": 7.545706887483764e-06, "loss": 0.4865, "step": 11422 }, { "epoch": 0.35009807527277187, "grad_norm": 1.5229618224793613, "learning_rate": 7.5452797026652544e-06, "loss": 0.7117, "step": 11423 }, { "epoch": 0.3501287237955131, "grad_norm": 1.667713236464221, "learning_rate": 7.544852492767455e-06, "loss": 0.7514, "step": 11424 }, { "epoch": 0.3501593723182543, "grad_norm": 0.8024918046993864, "learning_rate": 7.544425257794572e-06, "loss": 0.4743, "step": 11425 }, { "epoch": 0.3501900208409955, "grad_norm": 1.696231445618546, "learning_rate": 7.543997997750816e-06, "loss": 0.6947, "step": 11426 }, { "epoch": 0.3502206693637367, "grad_norm": 1.77292678641442, "learning_rate": 7.543570712640398e-06, "loss": 0.758, "step": 11427 }, { "epoch": 0.3502513178864779, "grad_norm": 1.9264937555063708, "learning_rate": 7.543143402467525e-06, "loss": 0.8201, "step": 11428 }, { "epoch": 0.3502819664092191, "grad_norm": 1.589107010196626, "learning_rate": 7.542716067236411e-06, "loss": 0.6782, "step": 11429 }, { "epoch": 0.35031261493196025, "grad_norm": 1.7527072754780761, "learning_rate": 7.542288706951264e-06, "loss": 0.7433, "step": 11430 }, { "epoch": 0.35034326345470146, "grad_norm": 1.7575091291230056, "learning_rate": 7.541861321616297e-06, "loss": 0.6372, "step": 11431 }, { "epoch": 0.35037391197744266, "grad_norm": 1.5611545666343447, "learning_rate": 7.541433911235719e-06, "loss": 0.6264, "step": 11432 }, { "epoch": 0.35040456050018387, "grad_norm": 1.7716748573201042, "learning_rate": 7.541006475813744e-06, "loss": 0.7652, "step": 11433 }, { "epoch": 0.3504352090229251, "grad_norm": 1.8017955860799642, "learning_rate": 7.540579015354579e-06, "loss": 0.711, "step": 11434 }, { "epoch": 0.3504658575456663, "grad_norm": 1.7534263850873835, "learning_rate": 7.540151529862441e-06, "loss": 0.7258, "step": 11435 }, { "epoch": 0.3504965060684075, "grad_norm": 1.6320762561310378, "learning_rate": 7.5397240193415415e-06, "loss": 0.6937, "step": 11436 }, { "epoch": 0.3505271545911487, "grad_norm": 1.6320308990067531, "learning_rate": 7.539296483796088e-06, "loss": 0.6829, "step": 11437 }, { "epoch": 0.3505578031138899, "grad_norm": 1.8170083533437893, "learning_rate": 7.538868923230298e-06, "loss": 0.6099, "step": 11438 }, { "epoch": 0.3505884516366311, "grad_norm": 1.9772144736689845, "learning_rate": 7.538441337648383e-06, "loss": 0.7692, "step": 11439 }, { "epoch": 0.3506191001593723, "grad_norm": 0.8346239449011975, "learning_rate": 7.538013727054557e-06, "loss": 0.4463, "step": 11440 }, { "epoch": 0.3506497486821135, "grad_norm": 2.017205711585814, "learning_rate": 7.537586091453031e-06, "loss": 0.6528, "step": 11441 }, { "epoch": 0.3506803972048547, "grad_norm": 1.8295114341565275, "learning_rate": 7.537158430848022e-06, "loss": 0.6224, "step": 11442 }, { "epoch": 0.3507110457275959, "grad_norm": 1.6225207200942493, "learning_rate": 7.536730745243739e-06, "loss": 0.6476, "step": 11443 }, { "epoch": 0.35074169425033713, "grad_norm": 1.8772068136892393, "learning_rate": 7.5363030346444014e-06, "loss": 0.6549, "step": 11444 }, { "epoch": 0.35077234277307834, "grad_norm": 1.6502766504568094, "learning_rate": 7.535875299054218e-06, "loss": 0.7194, "step": 11445 }, { "epoch": 0.35080299129581954, "grad_norm": 1.7064956225709258, "learning_rate": 7.535447538477409e-06, "loss": 0.6089, "step": 11446 }, { "epoch": 0.35083363981856075, "grad_norm": 1.7253715647515562, "learning_rate": 7.535019752918186e-06, "loss": 0.6242, "step": 11447 }, { "epoch": 0.35086428834130196, "grad_norm": 1.764414358018767, "learning_rate": 7.5345919423807655e-06, "loss": 0.7836, "step": 11448 }, { "epoch": 0.35089493686404316, "grad_norm": 1.747047141753099, "learning_rate": 7.534164106869361e-06, "loss": 0.6919, "step": 11449 }, { "epoch": 0.35092558538678437, "grad_norm": 1.6847265805453844, "learning_rate": 7.53373624638819e-06, "loss": 0.7343, "step": 11450 }, { "epoch": 0.3509562339095256, "grad_norm": 1.5907026096707517, "learning_rate": 7.533308360941466e-06, "loss": 0.6134, "step": 11451 }, { "epoch": 0.3509868824322668, "grad_norm": 0.8681308766302552, "learning_rate": 7.532880450533408e-06, "loss": 0.4554, "step": 11452 }, { "epoch": 0.351017530955008, "grad_norm": 1.8807056730129377, "learning_rate": 7.53245251516823e-06, "loss": 0.6965, "step": 11453 }, { "epoch": 0.3510481794777492, "grad_norm": 2.170667274143468, "learning_rate": 7.53202455485015e-06, "loss": 0.6751, "step": 11454 }, { "epoch": 0.3510788280004904, "grad_norm": 1.6218763583259161, "learning_rate": 7.531596569583385e-06, "loss": 0.6484, "step": 11455 }, { "epoch": 0.3511094765232316, "grad_norm": 1.5748163231067256, "learning_rate": 7.53116855937215e-06, "loss": 0.7103, "step": 11456 }, { "epoch": 0.3511401250459728, "grad_norm": 1.8956358803390392, "learning_rate": 7.530740524220663e-06, "loss": 0.7289, "step": 11457 }, { "epoch": 0.351170773568714, "grad_norm": 1.8053427891717324, "learning_rate": 7.530312464133143e-06, "loss": 0.6851, "step": 11458 }, { "epoch": 0.3512014220914552, "grad_norm": 2.135487965184047, "learning_rate": 7.529884379113808e-06, "loss": 0.7505, "step": 11459 }, { "epoch": 0.3512320706141964, "grad_norm": 0.8594001014326774, "learning_rate": 7.529456269166872e-06, "loss": 0.4708, "step": 11460 }, { "epoch": 0.3512627191369376, "grad_norm": 1.675873894996503, "learning_rate": 7.529028134296559e-06, "loss": 0.7047, "step": 11461 }, { "epoch": 0.3512933676596788, "grad_norm": 1.9543806955179022, "learning_rate": 7.5285999745070825e-06, "loss": 0.6061, "step": 11462 }, { "epoch": 0.35132401618242, "grad_norm": 2.0650936319087814, "learning_rate": 7.528171789802665e-06, "loss": 0.6731, "step": 11463 }, { "epoch": 0.3513546647051612, "grad_norm": 1.7745616607812758, "learning_rate": 7.527743580187525e-06, "loss": 0.6357, "step": 11464 }, { "epoch": 0.3513853132279024, "grad_norm": 1.9856771972517575, "learning_rate": 7.527315345665879e-06, "loss": 0.6877, "step": 11465 }, { "epoch": 0.3514159617506436, "grad_norm": 1.740883464501038, "learning_rate": 7.526887086241947e-06, "loss": 0.6352, "step": 11466 }, { "epoch": 0.3514466102733848, "grad_norm": 0.822084328284019, "learning_rate": 7.526458801919954e-06, "loss": 0.4703, "step": 11467 }, { "epoch": 0.351477258796126, "grad_norm": 1.7838214686395422, "learning_rate": 7.526030492704113e-06, "loss": 0.7083, "step": 11468 }, { "epoch": 0.3515079073188672, "grad_norm": 1.639553441444486, "learning_rate": 7.525602158598647e-06, "loss": 0.614, "step": 11469 }, { "epoch": 0.3515385558416084, "grad_norm": 1.9393593566493537, "learning_rate": 7.525173799607778e-06, "loss": 0.7143, "step": 11470 }, { "epoch": 0.35156920436434963, "grad_norm": 0.8145277886693462, "learning_rate": 7.5247454157357254e-06, "loss": 0.4801, "step": 11471 }, { "epoch": 0.35159985288709084, "grad_norm": 1.8273658871984977, "learning_rate": 7.52431700698671e-06, "loss": 0.6965, "step": 11472 }, { "epoch": 0.35163050140983204, "grad_norm": 1.9958343480203775, "learning_rate": 7.523888573364953e-06, "loss": 0.8387, "step": 11473 }, { "epoch": 0.35166114993257325, "grad_norm": 1.6287248420306897, "learning_rate": 7.523460114874677e-06, "loss": 0.6896, "step": 11474 }, { "epoch": 0.35169179845531445, "grad_norm": 2.665527712983616, "learning_rate": 7.523031631520101e-06, "loss": 0.6682, "step": 11475 }, { "epoch": 0.35172244697805566, "grad_norm": 1.7739599117273548, "learning_rate": 7.52260312330545e-06, "loss": 0.6289, "step": 11476 }, { "epoch": 0.35175309550079686, "grad_norm": 1.7801635111572465, "learning_rate": 7.522174590234945e-06, "loss": 0.6409, "step": 11477 }, { "epoch": 0.35178374402353807, "grad_norm": 1.7521594650834111, "learning_rate": 7.521746032312809e-06, "loss": 0.768, "step": 11478 }, { "epoch": 0.3518143925462793, "grad_norm": 1.6793466177062144, "learning_rate": 7.521317449543263e-06, "loss": 0.6873, "step": 11479 }, { "epoch": 0.3518450410690205, "grad_norm": 1.7685644006632135, "learning_rate": 7.520888841930532e-06, "loss": 0.6978, "step": 11480 }, { "epoch": 0.3518756895917617, "grad_norm": 1.571812427092118, "learning_rate": 7.520460209478837e-06, "loss": 0.6439, "step": 11481 }, { "epoch": 0.3519063381145029, "grad_norm": 1.5889915217234905, "learning_rate": 7.520031552192404e-06, "loss": 0.6921, "step": 11482 }, { "epoch": 0.3519369866372441, "grad_norm": 1.7806736915120018, "learning_rate": 7.519602870075455e-06, "loss": 0.6477, "step": 11483 }, { "epoch": 0.3519676351599853, "grad_norm": 1.6982767196302293, "learning_rate": 7.519174163132212e-06, "loss": 0.7168, "step": 11484 }, { "epoch": 0.3519982836827265, "grad_norm": 1.6249313177476208, "learning_rate": 7.518745431366905e-06, "loss": 0.5841, "step": 11485 }, { "epoch": 0.3520289322054677, "grad_norm": 1.7301409121799092, "learning_rate": 7.5183166747837534e-06, "loss": 0.5982, "step": 11486 }, { "epoch": 0.3520595807282089, "grad_norm": 1.65781341944439, "learning_rate": 7.517887893386984e-06, "loss": 0.5984, "step": 11487 }, { "epoch": 0.3520902292509501, "grad_norm": 1.5612348235037676, "learning_rate": 7.517459087180819e-06, "loss": 0.6071, "step": 11488 }, { "epoch": 0.35212087777369133, "grad_norm": 1.5387781967786345, "learning_rate": 7.517030256169486e-06, "loss": 0.5937, "step": 11489 }, { "epoch": 0.35215152629643254, "grad_norm": 1.7757232352861334, "learning_rate": 7.516601400357211e-06, "loss": 0.7016, "step": 11490 }, { "epoch": 0.35218217481917374, "grad_norm": 1.6716386475950529, "learning_rate": 7.516172519748218e-06, "loss": 0.6081, "step": 11491 }, { "epoch": 0.3522128233419149, "grad_norm": 1.6001061950393538, "learning_rate": 7.515743614346734e-06, "loss": 0.6975, "step": 11492 }, { "epoch": 0.3522434718646561, "grad_norm": 1.7216272854340722, "learning_rate": 7.5153146841569825e-06, "loss": 0.664, "step": 11493 }, { "epoch": 0.3522741203873973, "grad_norm": 0.9104537840978975, "learning_rate": 7.514885729183195e-06, "loss": 0.4725, "step": 11494 }, { "epoch": 0.3523047689101385, "grad_norm": 1.7847175505967632, "learning_rate": 7.514456749429592e-06, "loss": 0.766, "step": 11495 }, { "epoch": 0.3523354174328797, "grad_norm": 1.9022721849673196, "learning_rate": 7.514027744900404e-06, "loss": 0.6111, "step": 11496 }, { "epoch": 0.3523660659556209, "grad_norm": 1.575498281225898, "learning_rate": 7.513598715599857e-06, "loss": 0.6344, "step": 11497 }, { "epoch": 0.3523967144783621, "grad_norm": 1.6331737009621388, "learning_rate": 7.51316966153218e-06, "loss": 0.5958, "step": 11498 }, { "epoch": 0.35242736300110333, "grad_norm": 1.7249736684236623, "learning_rate": 7.512740582701598e-06, "loss": 0.7251, "step": 11499 }, { "epoch": 0.35245801152384454, "grad_norm": 1.820602645318811, "learning_rate": 7.512311479112342e-06, "loss": 0.7119, "step": 11500 }, { "epoch": 0.35248866004658574, "grad_norm": 0.7977080850338382, "learning_rate": 7.511882350768636e-06, "loss": 0.4807, "step": 11501 }, { "epoch": 0.35251930856932695, "grad_norm": 1.6361909666884584, "learning_rate": 7.511453197674714e-06, "loss": 0.6599, "step": 11502 }, { "epoch": 0.35254995709206816, "grad_norm": 0.8331475704820773, "learning_rate": 7.511024019834798e-06, "loss": 0.4584, "step": 11503 }, { "epoch": 0.35258060561480936, "grad_norm": 0.7852891642350198, "learning_rate": 7.51059481725312e-06, "loss": 0.4426, "step": 11504 }, { "epoch": 0.35261125413755057, "grad_norm": 1.5764130253158848, "learning_rate": 7.510165589933911e-06, "loss": 0.685, "step": 11505 }, { "epoch": 0.3526419026602918, "grad_norm": 1.6135032672413598, "learning_rate": 7.509736337881395e-06, "loss": 0.6593, "step": 11506 }, { "epoch": 0.352672551183033, "grad_norm": 0.7766728447706246, "learning_rate": 7.509307061099806e-06, "loss": 0.4699, "step": 11507 }, { "epoch": 0.3527031997057742, "grad_norm": 0.7846030024922993, "learning_rate": 7.508877759593373e-06, "loss": 0.4401, "step": 11508 }, { "epoch": 0.3527338482285154, "grad_norm": 1.7371215277171819, "learning_rate": 7.508448433366326e-06, "loss": 0.5609, "step": 11509 }, { "epoch": 0.3527644967512566, "grad_norm": 1.822417826692636, "learning_rate": 7.508019082422893e-06, "loss": 0.6703, "step": 11510 }, { "epoch": 0.3527951452739978, "grad_norm": 1.636929803066855, "learning_rate": 7.507589706767309e-06, "loss": 0.7077, "step": 11511 }, { "epoch": 0.352825793796739, "grad_norm": 1.6307178598608998, "learning_rate": 7.5071603064038e-06, "loss": 0.7366, "step": 11512 }, { "epoch": 0.3528564423194802, "grad_norm": 1.5967246435125362, "learning_rate": 7.5067308813366e-06, "loss": 0.7462, "step": 11513 }, { "epoch": 0.3528870908422214, "grad_norm": 1.64729152022403, "learning_rate": 7.506301431569938e-06, "loss": 0.7352, "step": 11514 }, { "epoch": 0.3529177393649626, "grad_norm": 1.6968999539677785, "learning_rate": 7.505871957108049e-06, "loss": 0.703, "step": 11515 }, { "epoch": 0.35294838788770383, "grad_norm": 1.6707756841286392, "learning_rate": 7.505442457955159e-06, "loss": 0.6545, "step": 11516 }, { "epoch": 0.35297903641044504, "grad_norm": 0.8646169115591696, "learning_rate": 7.505012934115506e-06, "loss": 0.4379, "step": 11517 }, { "epoch": 0.35300968493318624, "grad_norm": 1.8269410607267949, "learning_rate": 7.504583385593318e-06, "loss": 0.6641, "step": 11518 }, { "epoch": 0.35304033345592745, "grad_norm": 1.5696208205974542, "learning_rate": 7.504153812392832e-06, "loss": 0.6442, "step": 11519 }, { "epoch": 0.35307098197866865, "grad_norm": 1.9961449937930107, "learning_rate": 7.503724214518275e-06, "loss": 0.6647, "step": 11520 }, { "epoch": 0.35310163050140986, "grad_norm": 1.8136114742620981, "learning_rate": 7.503294591973883e-06, "loss": 0.5898, "step": 11521 }, { "epoch": 0.35313227902415106, "grad_norm": 0.8385022822540775, "learning_rate": 7.50286494476389e-06, "loss": 0.489, "step": 11522 }, { "epoch": 0.3531629275468922, "grad_norm": 1.536324633828694, "learning_rate": 7.502435272892527e-06, "loss": 0.6665, "step": 11523 }, { "epoch": 0.3531935760696334, "grad_norm": 1.8619185268873737, "learning_rate": 7.50200557636403e-06, "loss": 0.7498, "step": 11524 }, { "epoch": 0.3532242245923746, "grad_norm": 1.9954772785342152, "learning_rate": 7.501575855182631e-06, "loss": 0.6801, "step": 11525 }, { "epoch": 0.35325487311511583, "grad_norm": 1.573652658339017, "learning_rate": 7.501146109352566e-06, "loss": 0.5985, "step": 11526 }, { "epoch": 0.35328552163785704, "grad_norm": 0.8160655569598101, "learning_rate": 7.500716338878068e-06, "loss": 0.471, "step": 11527 }, { "epoch": 0.35331617016059824, "grad_norm": 1.7897795574160613, "learning_rate": 7.500286543763372e-06, "loss": 0.6816, "step": 11528 }, { "epoch": 0.35334681868333945, "grad_norm": 1.6072053020184927, "learning_rate": 7.499856724012713e-06, "loss": 0.6781, "step": 11529 }, { "epoch": 0.35337746720608065, "grad_norm": 1.8161389294298538, "learning_rate": 7.4994268796303275e-06, "loss": 0.7143, "step": 11530 }, { "epoch": 0.35340811572882186, "grad_norm": 1.7061376863277748, "learning_rate": 7.498997010620447e-06, "loss": 0.6641, "step": 11531 }, { "epoch": 0.35343876425156306, "grad_norm": 1.8201608438375971, "learning_rate": 7.498567116987311e-06, "loss": 0.6838, "step": 11532 }, { "epoch": 0.35346941277430427, "grad_norm": 1.9175811547233572, "learning_rate": 7.498137198735153e-06, "loss": 0.7904, "step": 11533 }, { "epoch": 0.3535000612970455, "grad_norm": 1.681686081795786, "learning_rate": 7.4977072558682104e-06, "loss": 0.6137, "step": 11534 }, { "epoch": 0.3535307098197867, "grad_norm": 1.7951524125526113, "learning_rate": 7.497277288390719e-06, "loss": 0.7353, "step": 11535 }, { "epoch": 0.3535613583425279, "grad_norm": 1.6203213026860488, "learning_rate": 7.496847296306917e-06, "loss": 0.6703, "step": 11536 }, { "epoch": 0.3535920068652691, "grad_norm": 1.8116215048671631, "learning_rate": 7.496417279621039e-06, "loss": 0.7456, "step": 11537 }, { "epoch": 0.3536226553880103, "grad_norm": 1.8081705396122012, "learning_rate": 7.495987238337321e-06, "loss": 0.7373, "step": 11538 }, { "epoch": 0.3536533039107515, "grad_norm": 1.6266718006144087, "learning_rate": 7.495557172460004e-06, "loss": 0.6565, "step": 11539 }, { "epoch": 0.3536839524334927, "grad_norm": 1.8695362795312345, "learning_rate": 7.495127081993323e-06, "loss": 0.6285, "step": 11540 }, { "epoch": 0.3537146009562339, "grad_norm": 1.6194557032570933, "learning_rate": 7.494696966941517e-06, "loss": 0.6686, "step": 11541 }, { "epoch": 0.3537452494789751, "grad_norm": 1.6884560811547316, "learning_rate": 7.494266827308823e-06, "loss": 0.6828, "step": 11542 }, { "epoch": 0.3537758980017163, "grad_norm": 1.6912695398119963, "learning_rate": 7.4938366630994805e-06, "loss": 0.6983, "step": 11543 }, { "epoch": 0.35380654652445753, "grad_norm": 2.002398296430419, "learning_rate": 7.493406474317726e-06, "loss": 0.6877, "step": 11544 }, { "epoch": 0.35383719504719874, "grad_norm": 1.8183020707661715, "learning_rate": 7.492976260967801e-06, "loss": 0.6688, "step": 11545 }, { "epoch": 0.35386784356993994, "grad_norm": 1.6855873117672848, "learning_rate": 7.492546023053941e-06, "loss": 0.6308, "step": 11546 }, { "epoch": 0.35389849209268115, "grad_norm": 1.6249546331224052, "learning_rate": 7.49211576058039e-06, "loss": 0.6074, "step": 11547 }, { "epoch": 0.35392914061542236, "grad_norm": 1.7905327920465453, "learning_rate": 7.491685473551384e-06, "loss": 0.7201, "step": 11548 }, { "epoch": 0.35395978913816356, "grad_norm": 1.5999403170752127, "learning_rate": 7.4912551619711614e-06, "loss": 0.631, "step": 11549 }, { "epoch": 0.35399043766090477, "grad_norm": 1.9142260730969012, "learning_rate": 7.490824825843965e-06, "loss": 0.6763, "step": 11550 }, { "epoch": 0.35402108618364597, "grad_norm": 2.0288307996504087, "learning_rate": 7.490394465174036e-06, "loss": 0.7325, "step": 11551 }, { "epoch": 0.3540517347063872, "grad_norm": 1.549024979617377, "learning_rate": 7.489964079965613e-06, "loss": 0.6619, "step": 11552 }, { "epoch": 0.3540823832291284, "grad_norm": 0.9190885024854405, "learning_rate": 7.489533670222935e-06, "loss": 0.4741, "step": 11553 }, { "epoch": 0.35411303175186953, "grad_norm": 1.8391967850689488, "learning_rate": 7.489103235950246e-06, "loss": 0.6966, "step": 11554 }, { "epoch": 0.35414368027461074, "grad_norm": 1.6211849751147696, "learning_rate": 7.488672777151786e-06, "loss": 0.5722, "step": 11555 }, { "epoch": 0.35417432879735194, "grad_norm": 0.8117928839429486, "learning_rate": 7.488242293831795e-06, "loss": 0.4703, "step": 11556 }, { "epoch": 0.35420497732009315, "grad_norm": 1.7054393163971326, "learning_rate": 7.487811785994518e-06, "loss": 0.6188, "step": 11557 }, { "epoch": 0.35423562584283436, "grad_norm": 0.751975358976021, "learning_rate": 7.487381253644193e-06, "loss": 0.4491, "step": 11558 }, { "epoch": 0.35426627436557556, "grad_norm": 1.9008345748263107, "learning_rate": 7.486950696785066e-06, "loss": 0.6799, "step": 11559 }, { "epoch": 0.35429692288831677, "grad_norm": 1.7515405362746963, "learning_rate": 7.4865201154213765e-06, "loss": 0.598, "step": 11560 }, { "epoch": 0.354327571411058, "grad_norm": 1.8483190864527228, "learning_rate": 7.486089509557368e-06, "loss": 0.792, "step": 11561 }, { "epoch": 0.3543582199337992, "grad_norm": 0.7840131735797282, "learning_rate": 7.485658879197284e-06, "loss": 0.4594, "step": 11562 }, { "epoch": 0.3543888684565404, "grad_norm": 0.8235559139544832, "learning_rate": 7.4852282243453665e-06, "loss": 0.4775, "step": 11563 }, { "epoch": 0.3544195169792816, "grad_norm": 1.5646020864524388, "learning_rate": 7.4847975450058596e-06, "loss": 0.6929, "step": 11564 }, { "epoch": 0.3544501655020228, "grad_norm": 1.812354445812359, "learning_rate": 7.4843668411830076e-06, "loss": 0.7436, "step": 11565 }, { "epoch": 0.354480814024764, "grad_norm": 1.890227758800528, "learning_rate": 7.483936112881052e-06, "loss": 0.7275, "step": 11566 }, { "epoch": 0.3545114625475052, "grad_norm": 1.6021352928150805, "learning_rate": 7.48350536010424e-06, "loss": 0.5744, "step": 11567 }, { "epoch": 0.3545421110702464, "grad_norm": 1.8625177179043693, "learning_rate": 7.483074582856812e-06, "loss": 0.7647, "step": 11568 }, { "epoch": 0.3545727595929876, "grad_norm": 1.812591108217456, "learning_rate": 7.482643781143016e-06, "loss": 0.6366, "step": 11569 }, { "epoch": 0.3546034081157288, "grad_norm": 1.769639015508177, "learning_rate": 7.482212954967095e-06, "loss": 0.6404, "step": 11570 }, { "epoch": 0.35463405663847003, "grad_norm": 1.700512543178736, "learning_rate": 7.481782104333297e-06, "loss": 0.5974, "step": 11571 }, { "epoch": 0.35466470516121124, "grad_norm": 1.6314356675338788, "learning_rate": 7.481351229245862e-06, "loss": 0.7222, "step": 11572 }, { "epoch": 0.35469535368395244, "grad_norm": 1.5207909745684254, "learning_rate": 7.4809203297090405e-06, "loss": 0.6637, "step": 11573 }, { "epoch": 0.35472600220669365, "grad_norm": 1.5742764175188213, "learning_rate": 7.480489405727075e-06, "loss": 0.6345, "step": 11574 }, { "epoch": 0.35475665072943485, "grad_norm": 1.6304422649658734, "learning_rate": 7.480058457304214e-06, "loss": 0.5993, "step": 11575 }, { "epoch": 0.35478729925217606, "grad_norm": 1.8607597894148566, "learning_rate": 7.479627484444701e-06, "loss": 0.7531, "step": 11576 }, { "epoch": 0.35481794777491726, "grad_norm": 1.6423509963450464, "learning_rate": 7.479196487152784e-06, "loss": 0.6692, "step": 11577 }, { "epoch": 0.35484859629765847, "grad_norm": 1.0546201238381399, "learning_rate": 7.478765465432712e-06, "loss": 0.495, "step": 11578 }, { "epoch": 0.3548792448203997, "grad_norm": 1.7107206739507406, "learning_rate": 7.478334419288726e-06, "loss": 0.6193, "step": 11579 }, { "epoch": 0.3549098933431409, "grad_norm": 1.5851073401602482, "learning_rate": 7.4779033487250796e-06, "loss": 0.5442, "step": 11580 }, { "epoch": 0.3549405418658821, "grad_norm": 1.707975379473729, "learning_rate": 7.477472253746017e-06, "loss": 0.601, "step": 11581 }, { "epoch": 0.3549711903886233, "grad_norm": 1.6152203731041903, "learning_rate": 7.477041134355787e-06, "loss": 0.6315, "step": 11582 }, { "epoch": 0.3550018389113645, "grad_norm": 1.6806908040263806, "learning_rate": 7.476609990558636e-06, "loss": 0.7617, "step": 11583 }, { "epoch": 0.3550324874341057, "grad_norm": 0.7786381463097348, "learning_rate": 7.476178822358813e-06, "loss": 0.4588, "step": 11584 }, { "epoch": 0.35506313595684685, "grad_norm": 1.7402228775699837, "learning_rate": 7.4757476297605654e-06, "loss": 0.7551, "step": 11585 }, { "epoch": 0.35509378447958806, "grad_norm": 1.6216796027646538, "learning_rate": 7.475316412768145e-06, "loss": 0.6998, "step": 11586 }, { "epoch": 0.35512443300232927, "grad_norm": 1.95091302872517, "learning_rate": 7.474885171385797e-06, "loss": 0.7294, "step": 11587 }, { "epoch": 0.35515508152507047, "grad_norm": 0.8659523725684465, "learning_rate": 7.4744539056177735e-06, "loss": 0.4804, "step": 11588 }, { "epoch": 0.3551857300478117, "grad_norm": 1.876965361264726, "learning_rate": 7.4740226154683215e-06, "loss": 0.6961, "step": 11589 }, { "epoch": 0.3552163785705529, "grad_norm": 1.7013120183700576, "learning_rate": 7.473591300941692e-06, "loss": 0.762, "step": 11590 }, { "epoch": 0.3552470270932941, "grad_norm": 1.487944029699359, "learning_rate": 7.473159962042136e-06, "loss": 0.5129, "step": 11591 }, { "epoch": 0.3552776756160353, "grad_norm": 1.7019548096031836, "learning_rate": 7.4727285987738995e-06, "loss": 0.6674, "step": 11592 }, { "epoch": 0.3553083241387765, "grad_norm": 1.6714334645108642, "learning_rate": 7.472297211141237e-06, "loss": 0.652, "step": 11593 }, { "epoch": 0.3553389726615177, "grad_norm": 1.8973097769159681, "learning_rate": 7.471865799148398e-06, "loss": 0.6297, "step": 11594 }, { "epoch": 0.3553696211842589, "grad_norm": 1.5419817637402538, "learning_rate": 7.471434362799632e-06, "loss": 0.625, "step": 11595 }, { "epoch": 0.3554002697070001, "grad_norm": 1.4272647066523545, "learning_rate": 7.47100290209919e-06, "loss": 0.6916, "step": 11596 }, { "epoch": 0.3554309182297413, "grad_norm": 0.8276034732007527, "learning_rate": 7.470571417051327e-06, "loss": 0.4789, "step": 11597 }, { "epoch": 0.3554615667524825, "grad_norm": 1.8069693670211748, "learning_rate": 7.470139907660288e-06, "loss": 0.6227, "step": 11598 }, { "epoch": 0.35549221527522373, "grad_norm": 1.75546383442252, "learning_rate": 7.4697083739303315e-06, "loss": 0.7052, "step": 11599 }, { "epoch": 0.35552286379796494, "grad_norm": 1.7005328054719464, "learning_rate": 7.469276815865705e-06, "loss": 0.7193, "step": 11600 }, { "epoch": 0.35555351232070614, "grad_norm": 1.5967382569194908, "learning_rate": 7.468845233470663e-06, "loss": 0.668, "step": 11601 }, { "epoch": 0.35558416084344735, "grad_norm": 1.6656133972216702, "learning_rate": 7.468413626749457e-06, "loss": 0.6001, "step": 11602 }, { "epoch": 0.35561480936618856, "grad_norm": 2.0845430972598766, "learning_rate": 7.467981995706339e-06, "loss": 0.6604, "step": 11603 }, { "epoch": 0.35564545788892976, "grad_norm": 1.690675580335757, "learning_rate": 7.467550340345564e-06, "loss": 0.6465, "step": 11604 }, { "epoch": 0.35567610641167097, "grad_norm": 1.6411408214555958, "learning_rate": 7.467118660671383e-06, "loss": 0.6777, "step": 11605 }, { "epoch": 0.3557067549344122, "grad_norm": 0.8331927018839651, "learning_rate": 7.466686956688053e-06, "loss": 0.4663, "step": 11606 }, { "epoch": 0.3557374034571534, "grad_norm": 0.8331391402626344, "learning_rate": 7.466255228399824e-06, "loss": 0.4604, "step": 11607 }, { "epoch": 0.3557680519798946, "grad_norm": 1.663042447995613, "learning_rate": 7.4658234758109505e-06, "loss": 0.655, "step": 11608 }, { "epoch": 0.3557987005026358, "grad_norm": 1.6023563730202857, "learning_rate": 7.465391698925688e-06, "loss": 0.6521, "step": 11609 }, { "epoch": 0.355829349025377, "grad_norm": 1.5302993704400532, "learning_rate": 7.4649598977482894e-06, "loss": 0.698, "step": 11610 }, { "epoch": 0.3558599975481182, "grad_norm": 1.623637301236848, "learning_rate": 7.46452807228301e-06, "loss": 0.6079, "step": 11611 }, { "epoch": 0.3558906460708594, "grad_norm": 0.8634783038784498, "learning_rate": 7.464096222534107e-06, "loss": 0.4618, "step": 11612 }, { "epoch": 0.3559212945936006, "grad_norm": 1.7107072399123349, "learning_rate": 7.463664348505832e-06, "loss": 0.6729, "step": 11613 }, { "epoch": 0.3559519431163418, "grad_norm": 1.636793641182195, "learning_rate": 7.463232450202443e-06, "loss": 0.6768, "step": 11614 }, { "epoch": 0.355982591639083, "grad_norm": 0.8792798630539294, "learning_rate": 7.462800527628193e-06, "loss": 0.4795, "step": 11615 }, { "epoch": 0.3560132401618242, "grad_norm": 1.710344544405129, "learning_rate": 7.462368580787341e-06, "loss": 0.6986, "step": 11616 }, { "epoch": 0.3560438886845654, "grad_norm": 0.8104013315825509, "learning_rate": 7.46193660968414e-06, "loss": 0.4539, "step": 11617 }, { "epoch": 0.3560745372073066, "grad_norm": 1.669885723418072, "learning_rate": 7.461504614322848e-06, "loss": 0.6714, "step": 11618 }, { "epoch": 0.3561051857300478, "grad_norm": 1.7259351129942682, "learning_rate": 7.46107259470772e-06, "loss": 0.7379, "step": 11619 }, { "epoch": 0.356135834252789, "grad_norm": 1.5633331497472034, "learning_rate": 7.460640550843015e-06, "loss": 0.7105, "step": 11620 }, { "epoch": 0.3561664827755302, "grad_norm": 1.6115477097054687, "learning_rate": 7.46020848273299e-06, "loss": 0.6685, "step": 11621 }, { "epoch": 0.3561971312982714, "grad_norm": 1.6704200938320615, "learning_rate": 7.459776390381901e-06, "loss": 0.6615, "step": 11622 }, { "epoch": 0.3562277798210126, "grad_norm": 1.6443037461239731, "learning_rate": 7.459344273794004e-06, "loss": 0.729, "step": 11623 }, { "epoch": 0.3562584283437538, "grad_norm": 1.8027573000183872, "learning_rate": 7.4589121329735604e-06, "loss": 0.6762, "step": 11624 }, { "epoch": 0.356289076866495, "grad_norm": 1.6543193855371827, "learning_rate": 7.4584799679248275e-06, "loss": 0.6837, "step": 11625 }, { "epoch": 0.35631972538923623, "grad_norm": 1.506689520889283, "learning_rate": 7.45804777865206e-06, "loss": 0.7575, "step": 11626 }, { "epoch": 0.35635037391197744, "grad_norm": 2.020036747174492, "learning_rate": 7.457615565159521e-06, "loss": 0.7668, "step": 11627 }, { "epoch": 0.35638102243471864, "grad_norm": 1.5672536173331542, "learning_rate": 7.457183327451465e-06, "loss": 0.6147, "step": 11628 }, { "epoch": 0.35641167095745985, "grad_norm": 1.8335877677423122, "learning_rate": 7.456751065532153e-06, "loss": 0.7291, "step": 11629 }, { "epoch": 0.35644231948020105, "grad_norm": 1.1412993704195857, "learning_rate": 7.456318779405845e-06, "loss": 0.464, "step": 11630 }, { "epoch": 0.35647296800294226, "grad_norm": 1.8619977626677557, "learning_rate": 7.4558864690768e-06, "loss": 0.6459, "step": 11631 }, { "epoch": 0.35650361652568346, "grad_norm": 1.900572168036094, "learning_rate": 7.455454134549278e-06, "loss": 0.6322, "step": 11632 }, { "epoch": 0.35653426504842467, "grad_norm": 0.8713776903275755, "learning_rate": 7.455021775827536e-06, "loss": 0.4664, "step": 11633 }, { "epoch": 0.3565649135711659, "grad_norm": 1.6482902119801084, "learning_rate": 7.454589392915838e-06, "loss": 0.6895, "step": 11634 }, { "epoch": 0.3565955620939071, "grad_norm": 1.6672597348325522, "learning_rate": 7.454156985818441e-06, "loss": 0.6856, "step": 11635 }, { "epoch": 0.3566262106166483, "grad_norm": 1.8795221582324282, "learning_rate": 7.453724554539609e-06, "loss": 0.7449, "step": 11636 }, { "epoch": 0.3566568591393895, "grad_norm": 1.487729927570375, "learning_rate": 7.4532920990836e-06, "loss": 0.5559, "step": 11637 }, { "epoch": 0.3566875076621307, "grad_norm": 1.7059678331524135, "learning_rate": 7.4528596194546775e-06, "loss": 0.7335, "step": 11638 }, { "epoch": 0.3567181561848719, "grad_norm": 1.706430980146084, "learning_rate": 7.4524271156570996e-06, "loss": 0.6692, "step": 11639 }, { "epoch": 0.3567488047076131, "grad_norm": 1.7229600920938677, "learning_rate": 7.451994587695133e-06, "loss": 0.682, "step": 11640 }, { "epoch": 0.3567794532303543, "grad_norm": 1.604056136596775, "learning_rate": 7.4515620355730334e-06, "loss": 0.6484, "step": 11641 }, { "epoch": 0.3568101017530955, "grad_norm": 1.6281978392066858, "learning_rate": 7.451129459295066e-06, "loss": 0.6283, "step": 11642 }, { "epoch": 0.3568407502758367, "grad_norm": 1.8884438664382392, "learning_rate": 7.450696858865494e-06, "loss": 0.7055, "step": 11643 }, { "epoch": 0.35687139879857793, "grad_norm": 1.6251361786550427, "learning_rate": 7.4502642342885775e-06, "loss": 0.6091, "step": 11644 }, { "epoch": 0.35690204732131914, "grad_norm": 1.702356729245703, "learning_rate": 7.4498315855685835e-06, "loss": 0.6423, "step": 11645 }, { "epoch": 0.35693269584406034, "grad_norm": 1.1651568737667457, "learning_rate": 7.449398912709768e-06, "loss": 0.4436, "step": 11646 }, { "epoch": 0.3569633443668015, "grad_norm": 1.6273361324262092, "learning_rate": 7.448966215716401e-06, "loss": 0.5841, "step": 11647 }, { "epoch": 0.3569939928895427, "grad_norm": 1.8808769572255137, "learning_rate": 7.448533494592743e-06, "loss": 0.6946, "step": 11648 }, { "epoch": 0.3570246414122839, "grad_norm": 0.9310667950306141, "learning_rate": 7.448100749343059e-06, "loss": 0.4647, "step": 11649 }, { "epoch": 0.3570552899350251, "grad_norm": 1.7566759229377036, "learning_rate": 7.447667979971609e-06, "loss": 0.684, "step": 11650 }, { "epoch": 0.3570859384577663, "grad_norm": 1.670385288307715, "learning_rate": 7.447235186482662e-06, "loss": 0.7101, "step": 11651 }, { "epoch": 0.3571165869805075, "grad_norm": 1.746604416821685, "learning_rate": 7.446802368880481e-06, "loss": 0.694, "step": 11652 }, { "epoch": 0.35714723550324873, "grad_norm": 1.8473340806044423, "learning_rate": 7.44636952716933e-06, "loss": 0.6701, "step": 11653 }, { "epoch": 0.35717788402598993, "grad_norm": 1.6049912415347145, "learning_rate": 7.445936661353472e-06, "loss": 0.6156, "step": 11654 }, { "epoch": 0.35720853254873114, "grad_norm": 1.7322573211938996, "learning_rate": 7.445503771437177e-06, "loss": 0.7593, "step": 11655 }, { "epoch": 0.35723918107147234, "grad_norm": 1.489575960182561, "learning_rate": 7.445070857424706e-06, "loss": 0.5869, "step": 11656 }, { "epoch": 0.35726982959421355, "grad_norm": 1.8339539606992972, "learning_rate": 7.4446379193203265e-06, "loss": 0.6394, "step": 11657 }, { "epoch": 0.35730047811695476, "grad_norm": 1.499792996270755, "learning_rate": 7.444204957128305e-06, "loss": 0.6516, "step": 11658 }, { "epoch": 0.35733112663969596, "grad_norm": 1.6190070136002814, "learning_rate": 7.443771970852907e-06, "loss": 0.6511, "step": 11659 }, { "epoch": 0.35736177516243717, "grad_norm": 1.6859261911732035, "learning_rate": 7.443338960498398e-06, "loss": 0.6746, "step": 11660 }, { "epoch": 0.3573924236851784, "grad_norm": 1.7328176436121998, "learning_rate": 7.4429059260690425e-06, "loss": 0.7041, "step": 11661 }, { "epoch": 0.3574230722079196, "grad_norm": 1.8246883908405414, "learning_rate": 7.442472867569112e-06, "loss": 0.7081, "step": 11662 }, { "epoch": 0.3574537207306608, "grad_norm": 1.6920086867171547, "learning_rate": 7.442039785002872e-06, "loss": 0.4697, "step": 11663 }, { "epoch": 0.357484369253402, "grad_norm": 1.8796911058435575, "learning_rate": 7.441606678374589e-06, "loss": 0.709, "step": 11664 }, { "epoch": 0.3575150177761432, "grad_norm": 0.8906052079677012, "learning_rate": 7.441173547688529e-06, "loss": 0.4744, "step": 11665 }, { "epoch": 0.3575456662988844, "grad_norm": 1.8234440391156852, "learning_rate": 7.440740392948964e-06, "loss": 0.6585, "step": 11666 }, { "epoch": 0.3575763148216256, "grad_norm": 1.7742711987014865, "learning_rate": 7.4403072141601585e-06, "loss": 0.6819, "step": 11667 }, { "epoch": 0.3576069633443668, "grad_norm": 1.6701536867268716, "learning_rate": 7.439874011326381e-06, "loss": 0.7131, "step": 11668 }, { "epoch": 0.357637611867108, "grad_norm": 2.0093544767595213, "learning_rate": 7.439440784451901e-06, "loss": 0.8084, "step": 11669 }, { "epoch": 0.3576682603898492, "grad_norm": 1.744410670423588, "learning_rate": 7.439007533540986e-06, "loss": 0.6967, "step": 11670 }, { "epoch": 0.35769890891259043, "grad_norm": 3.5050390034295607, "learning_rate": 7.438574258597907e-06, "loss": 0.6776, "step": 11671 }, { "epoch": 0.35772955743533164, "grad_norm": 1.8629290678880424, "learning_rate": 7.438140959626931e-06, "loss": 0.656, "step": 11672 }, { "epoch": 0.35776020595807284, "grad_norm": 2.021802473978107, "learning_rate": 7.437707636632329e-06, "loss": 0.7146, "step": 11673 }, { "epoch": 0.35779085448081405, "grad_norm": 1.5662819824956624, "learning_rate": 7.437274289618368e-06, "loss": 0.6249, "step": 11674 }, { "epoch": 0.35782150300355525, "grad_norm": 1.622144245169312, "learning_rate": 7.436840918589323e-06, "loss": 0.6368, "step": 11675 }, { "epoch": 0.35785215152629646, "grad_norm": 1.734709978145341, "learning_rate": 7.436407523549458e-06, "loss": 0.688, "step": 11676 }, { "epoch": 0.35788280004903766, "grad_norm": 1.7698030494434414, "learning_rate": 7.435974104503048e-06, "loss": 0.628, "step": 11677 }, { "epoch": 0.3579134485717788, "grad_norm": 2.0653609029571367, "learning_rate": 7.435540661454361e-06, "loss": 0.5135, "step": 11678 }, { "epoch": 0.35794409709452, "grad_norm": 1.749560636168231, "learning_rate": 7.43510719440767e-06, "loss": 0.624, "step": 11679 }, { "epoch": 0.3579747456172612, "grad_norm": 1.7666587469474175, "learning_rate": 7.434673703367243e-06, "loss": 0.6713, "step": 11680 }, { "epoch": 0.35800539414000243, "grad_norm": 1.7346319396335559, "learning_rate": 7.434240188337355e-06, "loss": 0.6542, "step": 11681 }, { "epoch": 0.35803604266274364, "grad_norm": 0.9361569226920551, "learning_rate": 7.433806649322274e-06, "loss": 0.4733, "step": 11682 }, { "epoch": 0.35806669118548484, "grad_norm": 2.0333593973416466, "learning_rate": 7.433373086326274e-06, "loss": 0.7028, "step": 11683 }, { "epoch": 0.35809733970822605, "grad_norm": 2.091401143616636, "learning_rate": 7.432939499353627e-06, "loss": 0.7234, "step": 11684 }, { "epoch": 0.35812798823096725, "grad_norm": 1.9615321714800125, "learning_rate": 7.432505888408603e-06, "loss": 0.7152, "step": 11685 }, { "epoch": 0.35815863675370846, "grad_norm": 1.8000814638908997, "learning_rate": 7.432072253495478e-06, "loss": 0.6051, "step": 11686 }, { "epoch": 0.35818928527644966, "grad_norm": 1.7536649996071698, "learning_rate": 7.431638594618521e-06, "loss": 0.6742, "step": 11687 }, { "epoch": 0.35821993379919087, "grad_norm": 1.193788237045127, "learning_rate": 7.431204911782009e-06, "loss": 0.4726, "step": 11688 }, { "epoch": 0.3582505823219321, "grad_norm": 1.9755724287618635, "learning_rate": 7.4307712049902105e-06, "loss": 0.7541, "step": 11689 }, { "epoch": 0.3582812308446733, "grad_norm": 1.7476113091713474, "learning_rate": 7.430337474247403e-06, "loss": 0.7717, "step": 11690 }, { "epoch": 0.3583118793674145, "grad_norm": 1.8755295909905776, "learning_rate": 7.429903719557857e-06, "loss": 0.728, "step": 11691 }, { "epoch": 0.3583425278901557, "grad_norm": 2.157062261981498, "learning_rate": 7.42946994092585e-06, "loss": 0.6891, "step": 11692 }, { "epoch": 0.3583731764128969, "grad_norm": 1.6535549075334186, "learning_rate": 7.429036138355652e-06, "loss": 0.7061, "step": 11693 }, { "epoch": 0.3584038249356381, "grad_norm": 1.589439744105541, "learning_rate": 7.428602311851542e-06, "loss": 0.6647, "step": 11694 }, { "epoch": 0.3584344734583793, "grad_norm": 0.8514630694112959, "learning_rate": 7.42816846141779e-06, "loss": 0.435, "step": 11695 }, { "epoch": 0.3584651219811205, "grad_norm": 1.8713592182126215, "learning_rate": 7.4277345870586725e-06, "loss": 0.7719, "step": 11696 }, { "epoch": 0.3584957705038617, "grad_norm": 1.7500009655456195, "learning_rate": 7.427300688778465e-06, "loss": 0.683, "step": 11697 }, { "epoch": 0.3585264190266029, "grad_norm": 1.5376357559581828, "learning_rate": 7.426866766581444e-06, "loss": 0.6987, "step": 11698 }, { "epoch": 0.35855706754934413, "grad_norm": 1.6823607755061114, "learning_rate": 7.4264328204718835e-06, "loss": 0.6752, "step": 11699 }, { "epoch": 0.35858771607208534, "grad_norm": 1.8826446887534303, "learning_rate": 7.425998850454059e-06, "loss": 0.7262, "step": 11700 }, { "epoch": 0.35861836459482654, "grad_norm": 1.8396172173564178, "learning_rate": 7.425564856532248e-06, "loss": 0.6885, "step": 11701 }, { "epoch": 0.35864901311756775, "grad_norm": 1.4814253960086428, "learning_rate": 7.4251308387107256e-06, "loss": 0.634, "step": 11702 }, { "epoch": 0.35867966164030896, "grad_norm": 1.4784877119788193, "learning_rate": 7.424696796993769e-06, "loss": 0.6608, "step": 11703 }, { "epoch": 0.35871031016305016, "grad_norm": 1.7430366950624436, "learning_rate": 7.424262731385653e-06, "loss": 0.7758, "step": 11704 }, { "epoch": 0.35874095868579137, "grad_norm": 0.879839568296998, "learning_rate": 7.423828641890657e-06, "loss": 0.4695, "step": 11705 }, { "epoch": 0.3587716072085326, "grad_norm": 1.6650555851992757, "learning_rate": 7.423394528513057e-06, "loss": 0.7449, "step": 11706 }, { "epoch": 0.3588022557312738, "grad_norm": 1.7293470513338305, "learning_rate": 7.422960391257131e-06, "loss": 0.6085, "step": 11707 }, { "epoch": 0.358832904254015, "grad_norm": 1.4980524363038863, "learning_rate": 7.422526230127156e-06, "loss": 0.7375, "step": 11708 }, { "epoch": 0.35886355277675613, "grad_norm": 1.6214439035974346, "learning_rate": 7.42209204512741e-06, "loss": 0.7459, "step": 11709 }, { "epoch": 0.35889420129949734, "grad_norm": 1.7382453365031623, "learning_rate": 7.421657836262172e-06, "loss": 0.7497, "step": 11710 }, { "epoch": 0.35892484982223855, "grad_norm": 1.7368588251382793, "learning_rate": 7.42122360353572e-06, "loss": 0.7225, "step": 11711 }, { "epoch": 0.35895549834497975, "grad_norm": 1.8274880969467713, "learning_rate": 7.4207893469523304e-06, "loss": 0.733, "step": 11712 }, { "epoch": 0.35898614686772096, "grad_norm": 1.4210488933590335, "learning_rate": 7.420355066516286e-06, "loss": 0.5975, "step": 11713 }, { "epoch": 0.35901679539046216, "grad_norm": 1.5980891549794982, "learning_rate": 7.419920762231864e-06, "loss": 0.6376, "step": 11714 }, { "epoch": 0.35904744391320337, "grad_norm": 1.7477119390207045, "learning_rate": 7.419486434103341e-06, "loss": 0.6425, "step": 11715 }, { "epoch": 0.3590780924359446, "grad_norm": 1.5940158063927434, "learning_rate": 7.419052082135001e-06, "loss": 0.6914, "step": 11716 }, { "epoch": 0.3591087409586858, "grad_norm": 1.7660345242211477, "learning_rate": 7.418617706331123e-06, "loss": 0.7765, "step": 11717 }, { "epoch": 0.359139389481427, "grad_norm": 1.5038337406828026, "learning_rate": 7.418183306695984e-06, "loss": 0.7713, "step": 11718 }, { "epoch": 0.3591700380041682, "grad_norm": 1.4469598725015715, "learning_rate": 7.417748883233866e-06, "loss": 0.6392, "step": 11719 }, { "epoch": 0.3592006865269094, "grad_norm": 1.8515242464926172, "learning_rate": 7.417314435949051e-06, "loss": 0.7521, "step": 11720 }, { "epoch": 0.3592313350496506, "grad_norm": 0.8148521796105942, "learning_rate": 7.416879964845818e-06, "loss": 0.4665, "step": 11721 }, { "epoch": 0.3592619835723918, "grad_norm": 0.8376915371649019, "learning_rate": 7.416445469928448e-06, "loss": 0.4661, "step": 11722 }, { "epoch": 0.359292632095133, "grad_norm": 1.7072326936475204, "learning_rate": 7.416010951201222e-06, "loss": 0.7823, "step": 11723 }, { "epoch": 0.3593232806178742, "grad_norm": 1.6524891767128629, "learning_rate": 7.415576408668423e-06, "loss": 0.5893, "step": 11724 }, { "epoch": 0.3593539291406154, "grad_norm": 1.490749959994526, "learning_rate": 7.415141842334333e-06, "loss": 0.6036, "step": 11725 }, { "epoch": 0.35938457766335663, "grad_norm": 1.788772352339334, "learning_rate": 7.4147072522032294e-06, "loss": 0.7276, "step": 11726 }, { "epoch": 0.35941522618609784, "grad_norm": 1.7075567107443836, "learning_rate": 7.4142726382793984e-06, "loss": 0.708, "step": 11727 }, { "epoch": 0.35944587470883904, "grad_norm": 1.5805269599261371, "learning_rate": 7.4138380005671214e-06, "loss": 0.6694, "step": 11728 }, { "epoch": 0.35947652323158025, "grad_norm": 1.7691985893947506, "learning_rate": 7.413403339070682e-06, "loss": 0.6925, "step": 11729 }, { "epoch": 0.35950717175432145, "grad_norm": 1.7559831647726398, "learning_rate": 7.412968653794362e-06, "loss": 0.6342, "step": 11730 }, { "epoch": 0.35953782027706266, "grad_norm": 1.6763470576594879, "learning_rate": 7.412533944742443e-06, "loss": 0.7245, "step": 11731 }, { "epoch": 0.35956846879980386, "grad_norm": 1.8992546885154449, "learning_rate": 7.412099211919209e-06, "loss": 0.6545, "step": 11732 }, { "epoch": 0.35959911732254507, "grad_norm": 1.5646727515105538, "learning_rate": 7.411664455328948e-06, "loss": 0.5714, "step": 11733 }, { "epoch": 0.3596297658452863, "grad_norm": 1.7898941174484266, "learning_rate": 7.411229674975937e-06, "loss": 0.7741, "step": 11734 }, { "epoch": 0.3596604143680275, "grad_norm": 1.6083417028056464, "learning_rate": 7.410794870864464e-06, "loss": 0.6777, "step": 11735 }, { "epoch": 0.3596910628907687, "grad_norm": 1.5925479916846652, "learning_rate": 7.410360042998812e-06, "loss": 0.7249, "step": 11736 }, { "epoch": 0.3597217114135099, "grad_norm": 1.6151729297194641, "learning_rate": 7.409925191383267e-06, "loss": 0.6795, "step": 11737 }, { "epoch": 0.3597523599362511, "grad_norm": 1.6859992265420911, "learning_rate": 7.40949031602211e-06, "loss": 0.7046, "step": 11738 }, { "epoch": 0.3597830084589923, "grad_norm": 1.7817084446840878, "learning_rate": 7.4090554169196304e-06, "loss": 0.6979, "step": 11739 }, { "epoch": 0.35981365698173345, "grad_norm": 1.6976663280356061, "learning_rate": 7.408620494080111e-06, "loss": 0.6204, "step": 11740 }, { "epoch": 0.35984430550447466, "grad_norm": 1.687572703997558, "learning_rate": 7.408185547507837e-06, "loss": 0.6886, "step": 11741 }, { "epoch": 0.35987495402721587, "grad_norm": 1.6450351213899723, "learning_rate": 7.4077505772070955e-06, "loss": 0.676, "step": 11742 }, { "epoch": 0.35990560254995707, "grad_norm": 1.5217156522571769, "learning_rate": 7.407315583182171e-06, "loss": 0.6132, "step": 11743 }, { "epoch": 0.3599362510726983, "grad_norm": 0.8897906688643556, "learning_rate": 7.406880565437351e-06, "loss": 0.4595, "step": 11744 }, { "epoch": 0.3599668995954395, "grad_norm": 1.727606868006014, "learning_rate": 7.4064455239769195e-06, "loss": 0.586, "step": 11745 }, { "epoch": 0.3599975481181807, "grad_norm": 1.591072835372839, "learning_rate": 7.406010458805165e-06, "loss": 0.5661, "step": 11746 }, { "epoch": 0.3600281966409219, "grad_norm": 1.8004804076472056, "learning_rate": 7.405575369926374e-06, "loss": 0.6542, "step": 11747 }, { "epoch": 0.3600588451636631, "grad_norm": 1.6697298071183584, "learning_rate": 7.405140257344835e-06, "loss": 0.6588, "step": 11748 }, { "epoch": 0.3600894936864043, "grad_norm": 1.5722053404512888, "learning_rate": 7.404705121064831e-06, "loss": 0.666, "step": 11749 }, { "epoch": 0.3601201422091455, "grad_norm": 1.6908684149664086, "learning_rate": 7.404269961090653e-06, "loss": 0.6762, "step": 11750 }, { "epoch": 0.3601507907318867, "grad_norm": 1.7782219559021692, "learning_rate": 7.403834777426588e-06, "loss": 0.6659, "step": 11751 }, { "epoch": 0.3601814392546279, "grad_norm": 1.7422385676644414, "learning_rate": 7.403399570076924e-06, "loss": 0.7418, "step": 11752 }, { "epoch": 0.3602120877773691, "grad_norm": 1.5466598688649502, "learning_rate": 7.40296433904595e-06, "loss": 0.7043, "step": 11753 }, { "epoch": 0.36024273630011033, "grad_norm": 1.652303137370934, "learning_rate": 7.4025290843379525e-06, "loss": 0.6324, "step": 11754 }, { "epoch": 0.36027338482285154, "grad_norm": 1.5471915495187951, "learning_rate": 7.402093805957221e-06, "loss": 0.6303, "step": 11755 }, { "epoch": 0.36030403334559274, "grad_norm": 1.6202347298509956, "learning_rate": 7.401658503908046e-06, "loss": 0.6821, "step": 11756 }, { "epoch": 0.36033468186833395, "grad_norm": 1.929181587608493, "learning_rate": 7.401223178194714e-06, "loss": 0.7274, "step": 11757 }, { "epoch": 0.36036533039107516, "grad_norm": 1.6143222714209413, "learning_rate": 7.400787828821516e-06, "loss": 0.6269, "step": 11758 }, { "epoch": 0.36039597891381636, "grad_norm": 1.6919991176857205, "learning_rate": 7.400352455792741e-06, "loss": 0.6368, "step": 11759 }, { "epoch": 0.36042662743655757, "grad_norm": 0.8515313743486554, "learning_rate": 7.39991705911268e-06, "loss": 0.4567, "step": 11760 }, { "epoch": 0.3604572759592988, "grad_norm": 1.728933482663062, "learning_rate": 7.399481638785622e-06, "loss": 0.6983, "step": 11761 }, { "epoch": 0.36048792448204, "grad_norm": 0.7873718447785923, "learning_rate": 7.3990461948158565e-06, "loss": 0.442, "step": 11762 }, { "epoch": 0.3605185730047812, "grad_norm": 2.0350770013173385, "learning_rate": 7.398610727207677e-06, "loss": 0.6406, "step": 11763 }, { "epoch": 0.3605492215275224, "grad_norm": 1.8305035789685364, "learning_rate": 7.39817523596537e-06, "loss": 0.7188, "step": 11764 }, { "epoch": 0.3605798700502636, "grad_norm": 1.4941683209938579, "learning_rate": 7.39773972109323e-06, "loss": 0.6627, "step": 11765 }, { "epoch": 0.3606105185730048, "grad_norm": 0.7907457920346234, "learning_rate": 7.397304182595546e-06, "loss": 0.4726, "step": 11766 }, { "epoch": 0.360641167095746, "grad_norm": 1.6950802007842276, "learning_rate": 7.396868620476611e-06, "loss": 0.6534, "step": 11767 }, { "epoch": 0.3606718156184872, "grad_norm": 1.819303962534574, "learning_rate": 7.396433034740718e-06, "loss": 0.6499, "step": 11768 }, { "epoch": 0.3607024641412284, "grad_norm": 1.6048135553299738, "learning_rate": 7.395997425392154e-06, "loss": 0.6894, "step": 11769 }, { "epoch": 0.3607331126639696, "grad_norm": 1.6137692558266916, "learning_rate": 7.395561792435216e-06, "loss": 0.6325, "step": 11770 }, { "epoch": 0.3607637611867108, "grad_norm": 1.7690230572504906, "learning_rate": 7.395126135874196e-06, "loss": 0.551, "step": 11771 }, { "epoch": 0.360794409709452, "grad_norm": 1.8304570905958577, "learning_rate": 7.394690455713383e-06, "loss": 0.6823, "step": 11772 }, { "epoch": 0.3608250582321932, "grad_norm": 1.8341693510695645, "learning_rate": 7.394254751957073e-06, "loss": 0.7466, "step": 11773 }, { "epoch": 0.3608557067549344, "grad_norm": 0.881838618961353, "learning_rate": 7.393819024609559e-06, "loss": 0.4679, "step": 11774 }, { "epoch": 0.3608863552776756, "grad_norm": 0.8380057310428838, "learning_rate": 7.3933832736751335e-06, "loss": 0.4647, "step": 11775 }, { "epoch": 0.3609170038004168, "grad_norm": 1.7709156587881854, "learning_rate": 7.39294749915809e-06, "loss": 0.7188, "step": 11776 }, { "epoch": 0.360947652323158, "grad_norm": 1.9201534870862416, "learning_rate": 7.392511701062721e-06, "loss": 0.7592, "step": 11777 }, { "epoch": 0.3609783008458992, "grad_norm": 1.6801179452120625, "learning_rate": 7.392075879393324e-06, "loss": 0.6283, "step": 11778 }, { "epoch": 0.3610089493686404, "grad_norm": 1.5859915994292149, "learning_rate": 7.391640034154192e-06, "loss": 0.703, "step": 11779 }, { "epoch": 0.3610395978913816, "grad_norm": 2.0266018461787274, "learning_rate": 7.391204165349618e-06, "loss": 0.692, "step": 11780 }, { "epoch": 0.36107024641412283, "grad_norm": 1.8068630434393183, "learning_rate": 7.390768272983896e-06, "loss": 0.6324, "step": 11781 }, { "epoch": 0.36110089493686404, "grad_norm": 1.486958452065328, "learning_rate": 7.390332357061324e-06, "loss": 0.6807, "step": 11782 }, { "epoch": 0.36113154345960524, "grad_norm": 1.6847781667906452, "learning_rate": 7.389896417586195e-06, "loss": 0.6928, "step": 11783 }, { "epoch": 0.36116219198234645, "grad_norm": 1.7342589344351254, "learning_rate": 7.389460454562806e-06, "loss": 0.6821, "step": 11784 }, { "epoch": 0.36119284050508765, "grad_norm": 1.90780228211002, "learning_rate": 7.3890244679954516e-06, "loss": 0.7093, "step": 11785 }, { "epoch": 0.36122348902782886, "grad_norm": 1.618240400119155, "learning_rate": 7.3885884578884256e-06, "loss": 0.7523, "step": 11786 }, { "epoch": 0.36125413755057006, "grad_norm": 1.5422623503703698, "learning_rate": 7.388152424246031e-06, "loss": 0.6795, "step": 11787 }, { "epoch": 0.36128478607331127, "grad_norm": 1.5514006045261939, "learning_rate": 7.387716367072556e-06, "loss": 0.6068, "step": 11788 }, { "epoch": 0.3613154345960525, "grad_norm": 1.6220981249002688, "learning_rate": 7.387280286372302e-06, "loss": 0.7613, "step": 11789 }, { "epoch": 0.3613460831187937, "grad_norm": 1.5311129148234055, "learning_rate": 7.386844182149564e-06, "loss": 0.6625, "step": 11790 }, { "epoch": 0.3613767316415349, "grad_norm": 1.761859527611915, "learning_rate": 7.38640805440864e-06, "loss": 0.6776, "step": 11791 }, { "epoch": 0.3614073801642761, "grad_norm": 1.6815795944385727, "learning_rate": 7.385971903153826e-06, "loss": 0.7018, "step": 11792 }, { "epoch": 0.3614380286870173, "grad_norm": 0.9455953547229894, "learning_rate": 7.3855357283894216e-06, "loss": 0.4803, "step": 11793 }, { "epoch": 0.3614686772097585, "grad_norm": 0.8549566638728768, "learning_rate": 7.385099530119723e-06, "loss": 0.4659, "step": 11794 }, { "epoch": 0.3614993257324997, "grad_norm": 1.9608432996467833, "learning_rate": 7.384663308349027e-06, "loss": 0.6101, "step": 11795 }, { "epoch": 0.3615299742552409, "grad_norm": 1.5614156815549416, "learning_rate": 7.3842270630816345e-06, "loss": 0.6693, "step": 11796 }, { "epoch": 0.3615606227779821, "grad_norm": 1.6404937122578007, "learning_rate": 7.383790794321841e-06, "loss": 0.6893, "step": 11797 }, { "epoch": 0.3615912713007233, "grad_norm": 1.940344804858651, "learning_rate": 7.383354502073951e-06, "loss": 0.6762, "step": 11798 }, { "epoch": 0.36162191982346453, "grad_norm": 1.9416624275362635, "learning_rate": 7.382918186342256e-06, "loss": 0.7399, "step": 11799 }, { "epoch": 0.36165256834620574, "grad_norm": 1.5883028412595501, "learning_rate": 7.382481847131059e-06, "loss": 0.7412, "step": 11800 }, { "epoch": 0.36168321686894694, "grad_norm": 1.62407953563252, "learning_rate": 7.382045484444658e-06, "loss": 0.6964, "step": 11801 }, { "epoch": 0.3617138653916881, "grad_norm": 1.794602925651006, "learning_rate": 7.3816090982873554e-06, "loss": 0.7186, "step": 11802 }, { "epoch": 0.3617445139144293, "grad_norm": 1.6358070476611657, "learning_rate": 7.381172688663447e-06, "loss": 0.7136, "step": 11803 }, { "epoch": 0.3617751624371705, "grad_norm": 1.6715699634553867, "learning_rate": 7.380736255577236e-06, "loss": 0.642, "step": 11804 }, { "epoch": 0.3618058109599117, "grad_norm": 1.991969436581181, "learning_rate": 7.380299799033022e-06, "loss": 0.72, "step": 11805 }, { "epoch": 0.3618364594826529, "grad_norm": 1.660626196136167, "learning_rate": 7.379863319035104e-06, "loss": 0.6347, "step": 11806 }, { "epoch": 0.3618671080053941, "grad_norm": 1.982664310076471, "learning_rate": 7.3794268155877845e-06, "loss": 0.6973, "step": 11807 }, { "epoch": 0.36189775652813533, "grad_norm": 1.7378638877274504, "learning_rate": 7.3789902886953636e-06, "loss": 0.7692, "step": 11808 }, { "epoch": 0.36192840505087653, "grad_norm": 1.7607176483896434, "learning_rate": 7.378553738362142e-06, "loss": 0.6181, "step": 11809 }, { "epoch": 0.36195905357361774, "grad_norm": 1.6120675718295905, "learning_rate": 7.378117164592422e-06, "loss": 0.6094, "step": 11810 }, { "epoch": 0.36198970209635895, "grad_norm": 1.5999828881251499, "learning_rate": 7.377680567390506e-06, "loss": 0.6404, "step": 11811 }, { "epoch": 0.36202035061910015, "grad_norm": 1.592790413349076, "learning_rate": 7.3772439467606934e-06, "loss": 0.6497, "step": 11812 }, { "epoch": 0.36205099914184136, "grad_norm": 1.8079217601809667, "learning_rate": 7.376807302707291e-06, "loss": 0.71, "step": 11813 }, { "epoch": 0.36208164766458256, "grad_norm": 1.1138256430912026, "learning_rate": 7.376370635234596e-06, "loss": 0.5016, "step": 11814 }, { "epoch": 0.36211229618732377, "grad_norm": 2.09488125697356, "learning_rate": 7.375933944346913e-06, "loss": 0.6341, "step": 11815 }, { "epoch": 0.362142944710065, "grad_norm": 1.6438130109529314, "learning_rate": 7.375497230048544e-06, "loss": 0.6193, "step": 11816 }, { "epoch": 0.3621735932328062, "grad_norm": 1.9348851893446348, "learning_rate": 7.375060492343796e-06, "loss": 0.7968, "step": 11817 }, { "epoch": 0.3622042417555474, "grad_norm": 1.7561187614775478, "learning_rate": 7.374623731236966e-06, "loss": 0.6797, "step": 11818 }, { "epoch": 0.3622348902782886, "grad_norm": 1.658140178486378, "learning_rate": 7.374186946732362e-06, "loss": 0.6661, "step": 11819 }, { "epoch": 0.3622655388010298, "grad_norm": 1.6857158408795387, "learning_rate": 7.373750138834287e-06, "loss": 0.7433, "step": 11820 }, { "epoch": 0.362296187323771, "grad_norm": 1.7878082418411756, "learning_rate": 7.373313307547044e-06, "loss": 0.5904, "step": 11821 }, { "epoch": 0.3623268358465122, "grad_norm": 1.6759628652775214, "learning_rate": 7.372876452874938e-06, "loss": 0.6824, "step": 11822 }, { "epoch": 0.3623574843692534, "grad_norm": 1.6586337305358108, "learning_rate": 7.372439574822273e-06, "loss": 0.6627, "step": 11823 }, { "epoch": 0.3623881328919946, "grad_norm": 1.5774134779187323, "learning_rate": 7.372002673393355e-06, "loss": 0.6559, "step": 11824 }, { "epoch": 0.3624187814147358, "grad_norm": 1.7153665611151745, "learning_rate": 7.371565748592487e-06, "loss": 0.7217, "step": 11825 }, { "epoch": 0.36244942993747703, "grad_norm": 0.7897293084245643, "learning_rate": 7.371128800423975e-06, "loss": 0.4544, "step": 11826 }, { "epoch": 0.36248007846021824, "grad_norm": 1.7466127505848505, "learning_rate": 7.370691828892124e-06, "loss": 0.6965, "step": 11827 }, { "epoch": 0.36251072698295944, "grad_norm": 0.7561964566746053, "learning_rate": 7.370254834001241e-06, "loss": 0.4657, "step": 11828 }, { "epoch": 0.36254137550570065, "grad_norm": 1.486393998854381, "learning_rate": 7.369817815755629e-06, "loss": 0.7252, "step": 11829 }, { "epoch": 0.36257202402844185, "grad_norm": 1.8371031171886416, "learning_rate": 7.3693807741595955e-06, "loss": 0.6699, "step": 11830 }, { "epoch": 0.36260267255118306, "grad_norm": 1.9085158969642777, "learning_rate": 7.368943709217448e-06, "loss": 0.7271, "step": 11831 }, { "epoch": 0.36263332107392426, "grad_norm": 1.7189517430297285, "learning_rate": 7.368506620933491e-06, "loss": 0.8163, "step": 11832 }, { "epoch": 0.3626639695966654, "grad_norm": 1.8740664319038198, "learning_rate": 7.3680695093120334e-06, "loss": 0.6761, "step": 11833 }, { "epoch": 0.3626946181194066, "grad_norm": 0.7947866935943672, "learning_rate": 7.3676323743573805e-06, "loss": 0.4726, "step": 11834 }, { "epoch": 0.3627252666421478, "grad_norm": 1.9244114762963553, "learning_rate": 7.36719521607384e-06, "loss": 0.6876, "step": 11835 }, { "epoch": 0.36275591516488903, "grad_norm": 1.862571028119381, "learning_rate": 7.366758034465719e-06, "loss": 0.6438, "step": 11836 }, { "epoch": 0.36278656368763024, "grad_norm": 0.7976529477457022, "learning_rate": 7.366320829537328e-06, "loss": 0.4973, "step": 11837 }, { "epoch": 0.36281721221037144, "grad_norm": 1.8026143357468725, "learning_rate": 7.365883601292969e-06, "loss": 0.7318, "step": 11838 }, { "epoch": 0.36284786073311265, "grad_norm": 0.788116206085327, "learning_rate": 7.365446349736955e-06, "loss": 0.4707, "step": 11839 }, { "epoch": 0.36287850925585385, "grad_norm": 1.8058657590479295, "learning_rate": 7.365009074873594e-06, "loss": 0.5932, "step": 11840 }, { "epoch": 0.36290915777859506, "grad_norm": 1.629146759753325, "learning_rate": 7.364571776707192e-06, "loss": 0.6967, "step": 11841 }, { "epoch": 0.36293980630133627, "grad_norm": 0.7940838859739818, "learning_rate": 7.3641344552420605e-06, "loss": 0.4768, "step": 11842 }, { "epoch": 0.36297045482407747, "grad_norm": 1.6361326801200538, "learning_rate": 7.363697110482506e-06, "loss": 0.7501, "step": 11843 }, { "epoch": 0.3630011033468187, "grad_norm": 1.6301507336233525, "learning_rate": 7.36325974243284e-06, "loss": 0.6452, "step": 11844 }, { "epoch": 0.3630317518695599, "grad_norm": 1.4415200889257511, "learning_rate": 7.362822351097371e-06, "loss": 0.6442, "step": 11845 }, { "epoch": 0.3630624003923011, "grad_norm": 1.9245679898862527, "learning_rate": 7.362384936480407e-06, "loss": 0.6552, "step": 11846 }, { "epoch": 0.3630930489150423, "grad_norm": 1.6880347695006948, "learning_rate": 7.361947498586262e-06, "loss": 0.7192, "step": 11847 }, { "epoch": 0.3631236974377835, "grad_norm": 1.6167056832517435, "learning_rate": 7.361510037419244e-06, "loss": 0.5677, "step": 11848 }, { "epoch": 0.3631543459605247, "grad_norm": 1.8356570883786905, "learning_rate": 7.361072552983661e-06, "loss": 0.6669, "step": 11849 }, { "epoch": 0.3631849944832659, "grad_norm": 1.703845902987877, "learning_rate": 7.360635045283828e-06, "loss": 0.7157, "step": 11850 }, { "epoch": 0.3632156430060071, "grad_norm": 1.7165975182612068, "learning_rate": 7.360197514324055e-06, "loss": 0.5678, "step": 11851 }, { "epoch": 0.3632462915287483, "grad_norm": 1.8213408085483755, "learning_rate": 7.35975996010865e-06, "loss": 0.5974, "step": 11852 }, { "epoch": 0.3632769400514895, "grad_norm": 1.954926020510581, "learning_rate": 7.359322382641926e-06, "loss": 0.7565, "step": 11853 }, { "epoch": 0.36330758857423073, "grad_norm": 1.8796740538709942, "learning_rate": 7.358884781928196e-06, "loss": 0.7562, "step": 11854 }, { "epoch": 0.36333823709697194, "grad_norm": 1.6583272087126517, "learning_rate": 7.35844715797177e-06, "loss": 0.6824, "step": 11855 }, { "epoch": 0.36336888561971314, "grad_norm": 1.8476515270061806, "learning_rate": 7.358009510776963e-06, "loss": 0.6871, "step": 11856 }, { "epoch": 0.36339953414245435, "grad_norm": 1.7901582041491726, "learning_rate": 7.357571840348082e-06, "loss": 0.6864, "step": 11857 }, { "epoch": 0.36343018266519556, "grad_norm": 1.7347553407694762, "learning_rate": 7.357134146689444e-06, "loss": 0.6764, "step": 11858 }, { "epoch": 0.36346083118793676, "grad_norm": 1.6405186342426716, "learning_rate": 7.356696429805359e-06, "loss": 0.6775, "step": 11859 }, { "epoch": 0.36349147971067797, "grad_norm": 1.6101181671329419, "learning_rate": 7.356258689700143e-06, "loss": 0.7071, "step": 11860 }, { "epoch": 0.3635221282334192, "grad_norm": 1.5860156516014632, "learning_rate": 7.355820926378105e-06, "loss": 0.6924, "step": 11861 }, { "epoch": 0.3635527767561604, "grad_norm": 0.9464929242409337, "learning_rate": 7.3553831398435625e-06, "loss": 0.4603, "step": 11862 }, { "epoch": 0.3635834252789016, "grad_norm": 0.8855267457652044, "learning_rate": 7.354945330100827e-06, "loss": 0.4385, "step": 11863 }, { "epoch": 0.36361407380164273, "grad_norm": 1.6036724507451803, "learning_rate": 7.354507497154212e-06, "loss": 0.7352, "step": 11864 }, { "epoch": 0.36364472232438394, "grad_norm": 1.615557727326425, "learning_rate": 7.354069641008032e-06, "loss": 0.6806, "step": 11865 }, { "epoch": 0.36367537084712515, "grad_norm": 1.7549470931866713, "learning_rate": 7.353631761666602e-06, "loss": 0.6898, "step": 11866 }, { "epoch": 0.36370601936986635, "grad_norm": 1.6064214842170736, "learning_rate": 7.353193859134237e-06, "loss": 0.6082, "step": 11867 }, { "epoch": 0.36373666789260756, "grad_norm": 1.7418202543674604, "learning_rate": 7.352755933415249e-06, "loss": 0.7859, "step": 11868 }, { "epoch": 0.36376731641534876, "grad_norm": 1.8067364818357825, "learning_rate": 7.352317984513956e-06, "loss": 0.7891, "step": 11869 }, { "epoch": 0.36379796493808997, "grad_norm": 1.72920322246423, "learning_rate": 7.351880012434671e-06, "loss": 0.6301, "step": 11870 }, { "epoch": 0.3638286134608312, "grad_norm": 1.7810048096664512, "learning_rate": 7.351442017181711e-06, "loss": 0.7437, "step": 11871 }, { "epoch": 0.3638592619835724, "grad_norm": 1.5372968160725653, "learning_rate": 7.351003998759392e-06, "loss": 0.6373, "step": 11872 }, { "epoch": 0.3638899105063136, "grad_norm": 1.6641493032515744, "learning_rate": 7.350565957172028e-06, "loss": 0.6357, "step": 11873 }, { "epoch": 0.3639205590290548, "grad_norm": 1.6208010720241492, "learning_rate": 7.350127892423936e-06, "loss": 0.687, "step": 11874 }, { "epoch": 0.363951207551796, "grad_norm": 1.834496105404569, "learning_rate": 7.349689804519433e-06, "loss": 0.6447, "step": 11875 }, { "epoch": 0.3639818560745372, "grad_norm": 1.4747443117858556, "learning_rate": 7.349251693462836e-06, "loss": 0.6103, "step": 11876 }, { "epoch": 0.3640125045972784, "grad_norm": 1.7704716796684985, "learning_rate": 7.348813559258461e-06, "loss": 0.6465, "step": 11877 }, { "epoch": 0.3640431531200196, "grad_norm": 1.8361796321251835, "learning_rate": 7.348375401910624e-06, "loss": 0.6786, "step": 11878 }, { "epoch": 0.3640738016427608, "grad_norm": 1.5829071913180992, "learning_rate": 7.3479372214236425e-06, "loss": 0.6936, "step": 11879 }, { "epoch": 0.364104450165502, "grad_norm": 1.642265358667297, "learning_rate": 7.347499017801837e-06, "loss": 0.6851, "step": 11880 }, { "epoch": 0.36413509868824323, "grad_norm": 1.7132039984291716, "learning_rate": 7.347060791049521e-06, "loss": 0.7251, "step": 11881 }, { "epoch": 0.36416574721098444, "grad_norm": 1.922915281693989, "learning_rate": 7.3466225411710156e-06, "loss": 0.6789, "step": 11882 }, { "epoch": 0.36419639573372564, "grad_norm": 1.6338897184926406, "learning_rate": 7.3461842681706375e-06, "loss": 0.68, "step": 11883 }, { "epoch": 0.36422704425646685, "grad_norm": 1.5786844143855063, "learning_rate": 7.345745972052707e-06, "loss": 0.6298, "step": 11884 }, { "epoch": 0.36425769277920805, "grad_norm": 1.0859927793823447, "learning_rate": 7.345307652821538e-06, "loss": 0.4811, "step": 11885 }, { "epoch": 0.36428834130194926, "grad_norm": 1.5376424216030362, "learning_rate": 7.344869310481455e-06, "loss": 0.6938, "step": 11886 }, { "epoch": 0.36431898982469046, "grad_norm": 1.756991299058162, "learning_rate": 7.344430945036776e-06, "loss": 0.6654, "step": 11887 }, { "epoch": 0.36434963834743167, "grad_norm": 1.730035579508242, "learning_rate": 7.343992556491817e-06, "loss": 0.7723, "step": 11888 }, { "epoch": 0.3643802868701729, "grad_norm": 1.7742228672622329, "learning_rate": 7.3435541448509e-06, "loss": 0.732, "step": 11889 }, { "epoch": 0.3644109353929141, "grad_norm": 1.503438555589714, "learning_rate": 7.343115710118344e-06, "loss": 0.6881, "step": 11890 }, { "epoch": 0.3644415839156553, "grad_norm": 1.6756952871322308, "learning_rate": 7.34267725229847e-06, "loss": 0.6888, "step": 11891 }, { "epoch": 0.3644722324383965, "grad_norm": 1.5456475466880972, "learning_rate": 7.3422387713955965e-06, "loss": 0.5731, "step": 11892 }, { "epoch": 0.3645028809611377, "grad_norm": 1.749973964614965, "learning_rate": 7.341800267414047e-06, "loss": 0.674, "step": 11893 }, { "epoch": 0.3645335294838789, "grad_norm": 1.8381869009324765, "learning_rate": 7.341361740358139e-06, "loss": 0.645, "step": 11894 }, { "epoch": 0.36456417800662005, "grad_norm": 1.7458080958285467, "learning_rate": 7.340923190232195e-06, "loss": 0.6767, "step": 11895 }, { "epoch": 0.36459482652936126, "grad_norm": 1.756992362918409, "learning_rate": 7.3404846170405355e-06, "loss": 0.7311, "step": 11896 }, { "epoch": 0.36462547505210247, "grad_norm": 1.509947390783055, "learning_rate": 7.340046020787484e-06, "loss": 0.6717, "step": 11897 }, { "epoch": 0.36465612357484367, "grad_norm": 1.639876141331752, "learning_rate": 7.339607401477357e-06, "loss": 0.6308, "step": 11898 }, { "epoch": 0.3646867720975849, "grad_norm": 0.8399284776690098, "learning_rate": 7.339168759114483e-06, "loss": 0.4503, "step": 11899 }, { "epoch": 0.3647174206203261, "grad_norm": 1.7471549363017784, "learning_rate": 7.338730093703179e-06, "loss": 0.7777, "step": 11900 }, { "epoch": 0.3647480691430673, "grad_norm": 1.5364278189264926, "learning_rate": 7.33829140524777e-06, "loss": 0.6842, "step": 11901 }, { "epoch": 0.3647787176658085, "grad_norm": 1.4744136455479242, "learning_rate": 7.337852693752576e-06, "loss": 0.6823, "step": 11902 }, { "epoch": 0.3648093661885497, "grad_norm": 1.8691972427308234, "learning_rate": 7.337413959221923e-06, "loss": 0.6968, "step": 11903 }, { "epoch": 0.3648400147112909, "grad_norm": 1.6480161567037255, "learning_rate": 7.33697520166013e-06, "loss": 0.7452, "step": 11904 }, { "epoch": 0.3648706632340321, "grad_norm": 1.7543391305515357, "learning_rate": 7.336536421071524e-06, "loss": 0.7191, "step": 11905 }, { "epoch": 0.3649013117567733, "grad_norm": 1.9525188909633007, "learning_rate": 7.336097617460427e-06, "loss": 0.726, "step": 11906 }, { "epoch": 0.3649319602795145, "grad_norm": 1.8658384885045007, "learning_rate": 7.335658790831162e-06, "loss": 0.7154, "step": 11907 }, { "epoch": 0.36496260880225573, "grad_norm": 1.7884443590595949, "learning_rate": 7.335219941188052e-06, "loss": 0.6676, "step": 11908 }, { "epoch": 0.36499325732499693, "grad_norm": 1.6337570432786692, "learning_rate": 7.334781068535424e-06, "loss": 0.6983, "step": 11909 }, { "epoch": 0.36502390584773814, "grad_norm": 1.8219007730622787, "learning_rate": 7.334342172877601e-06, "loss": 0.623, "step": 11910 }, { "epoch": 0.36505455437047934, "grad_norm": 1.6300918565622353, "learning_rate": 7.333903254218906e-06, "loss": 0.654, "step": 11911 }, { "epoch": 0.36508520289322055, "grad_norm": 1.7504391315817085, "learning_rate": 7.333464312563666e-06, "loss": 0.6468, "step": 11912 }, { "epoch": 0.36511585141596176, "grad_norm": 1.6833014576451293, "learning_rate": 7.333025347916205e-06, "loss": 0.7118, "step": 11913 }, { "epoch": 0.36514649993870296, "grad_norm": 1.8532396773608157, "learning_rate": 7.3325863602808486e-06, "loss": 0.6909, "step": 11914 }, { "epoch": 0.36517714846144417, "grad_norm": 1.6941924698720634, "learning_rate": 7.332147349661921e-06, "loss": 0.7235, "step": 11915 }, { "epoch": 0.3652077969841854, "grad_norm": 1.671897571315355, "learning_rate": 7.33170831606375e-06, "loss": 0.7312, "step": 11916 }, { "epoch": 0.3652384455069266, "grad_norm": 1.7294827482778758, "learning_rate": 7.33126925949066e-06, "loss": 0.6136, "step": 11917 }, { "epoch": 0.3652690940296678, "grad_norm": 1.6943974701989306, "learning_rate": 7.330830179946977e-06, "loss": 0.633, "step": 11918 }, { "epoch": 0.365299742552409, "grad_norm": 0.8286690448982382, "learning_rate": 7.330391077437028e-06, "loss": 0.4707, "step": 11919 }, { "epoch": 0.3653303910751502, "grad_norm": 0.8043578523488317, "learning_rate": 7.32995195196514e-06, "loss": 0.4658, "step": 11920 }, { "epoch": 0.3653610395978914, "grad_norm": 1.5817409845451522, "learning_rate": 7.329512803535639e-06, "loss": 0.7165, "step": 11921 }, { "epoch": 0.3653916881206326, "grad_norm": 1.7825513699186044, "learning_rate": 7.329073632152852e-06, "loss": 0.8032, "step": 11922 }, { "epoch": 0.3654223366433738, "grad_norm": 2.1996009308821614, "learning_rate": 7.328634437821107e-06, "loss": 0.746, "step": 11923 }, { "epoch": 0.365452985166115, "grad_norm": 1.7162844568685178, "learning_rate": 7.328195220544731e-06, "loss": 0.6603, "step": 11924 }, { "epoch": 0.3654836336888562, "grad_norm": 1.712839446623128, "learning_rate": 7.327755980328053e-06, "loss": 0.7013, "step": 11925 }, { "epoch": 0.3655142822115974, "grad_norm": 1.830441082503169, "learning_rate": 7.3273167171753965e-06, "loss": 0.6925, "step": 11926 }, { "epoch": 0.3655449307343386, "grad_norm": 1.7169093183793545, "learning_rate": 7.326877431091095e-06, "loss": 0.6483, "step": 11927 }, { "epoch": 0.3655755792570798, "grad_norm": 1.7508347413753376, "learning_rate": 7.326438122079474e-06, "loss": 0.716, "step": 11928 }, { "epoch": 0.365606227779821, "grad_norm": 0.9667735753717682, "learning_rate": 7.325998790144866e-06, "loss": 0.4612, "step": 11929 }, { "epoch": 0.3656368763025622, "grad_norm": 1.7192894886698737, "learning_rate": 7.325559435291594e-06, "loss": 0.6867, "step": 11930 }, { "epoch": 0.3656675248253034, "grad_norm": 1.655569366881228, "learning_rate": 7.3251200575239904e-06, "loss": 0.635, "step": 11931 }, { "epoch": 0.3656981733480446, "grad_norm": 1.8249199479931724, "learning_rate": 7.324680656846382e-06, "loss": 0.6175, "step": 11932 }, { "epoch": 0.3657288218707858, "grad_norm": 1.6641456346886139, "learning_rate": 7.324241233263103e-06, "loss": 0.6431, "step": 11933 }, { "epoch": 0.365759470393527, "grad_norm": 1.7752808469092285, "learning_rate": 7.32380178677848e-06, "loss": 0.6409, "step": 11934 }, { "epoch": 0.3657901189162682, "grad_norm": 1.6760271361881416, "learning_rate": 7.323362317396843e-06, "loss": 0.7503, "step": 11935 }, { "epoch": 0.36582076743900943, "grad_norm": 1.7585486373502144, "learning_rate": 7.322922825122522e-06, "loss": 0.6819, "step": 11936 }, { "epoch": 0.36585141596175064, "grad_norm": 1.6938054208211488, "learning_rate": 7.322483309959849e-06, "loss": 0.6469, "step": 11937 }, { "epoch": 0.36588206448449184, "grad_norm": 0.8470614423660866, "learning_rate": 7.322043771913154e-06, "loss": 0.4777, "step": 11938 }, { "epoch": 0.36591271300723305, "grad_norm": 1.667021723019065, "learning_rate": 7.321604210986766e-06, "loss": 0.7073, "step": 11939 }, { "epoch": 0.36594336152997425, "grad_norm": 1.6527680038245707, "learning_rate": 7.321164627185019e-06, "loss": 0.7027, "step": 11940 }, { "epoch": 0.36597401005271546, "grad_norm": 1.6209807777375556, "learning_rate": 7.320725020512243e-06, "loss": 0.6142, "step": 11941 }, { "epoch": 0.36600465857545667, "grad_norm": 1.815724125797066, "learning_rate": 7.32028539097277e-06, "loss": 0.687, "step": 11942 }, { "epoch": 0.36603530709819787, "grad_norm": 1.7395456061008094, "learning_rate": 7.31984573857093e-06, "loss": 0.7291, "step": 11943 }, { "epoch": 0.3660659556209391, "grad_norm": 1.6344927384972794, "learning_rate": 7.319406063311056e-06, "loss": 0.5912, "step": 11944 }, { "epoch": 0.3660966041436803, "grad_norm": 0.7823425548902037, "learning_rate": 7.318966365197483e-06, "loss": 0.4677, "step": 11945 }, { "epoch": 0.3661272526664215, "grad_norm": 1.7103674749435072, "learning_rate": 7.318526644234538e-06, "loss": 0.6688, "step": 11946 }, { "epoch": 0.3661579011891627, "grad_norm": 0.8064031478976332, "learning_rate": 7.31808690042656e-06, "loss": 0.4711, "step": 11947 }, { "epoch": 0.3661885497119039, "grad_norm": 0.7854862355726725, "learning_rate": 7.317647133777877e-06, "loss": 0.4767, "step": 11948 }, { "epoch": 0.3662191982346451, "grad_norm": 1.5997923350789176, "learning_rate": 7.317207344292825e-06, "loss": 0.6463, "step": 11949 }, { "epoch": 0.3662498467573863, "grad_norm": 1.884846224819618, "learning_rate": 7.316767531975734e-06, "loss": 0.6519, "step": 11950 }, { "epoch": 0.3662804952801275, "grad_norm": 0.7719245962882257, "learning_rate": 7.31632769683094e-06, "loss": 0.4748, "step": 11951 }, { "epoch": 0.3663111438028687, "grad_norm": 0.7558982304670666, "learning_rate": 7.315887838862778e-06, "loss": 0.4633, "step": 11952 }, { "epoch": 0.3663417923256099, "grad_norm": 1.7649388448386407, "learning_rate": 7.315447958075581e-06, "loss": 0.6977, "step": 11953 }, { "epoch": 0.36637244084835113, "grad_norm": 1.5270002828650828, "learning_rate": 7.315008054473681e-06, "loss": 0.5891, "step": 11954 }, { "epoch": 0.36640308937109234, "grad_norm": 1.723956446743994, "learning_rate": 7.314568128061416e-06, "loss": 0.6318, "step": 11955 }, { "epoch": 0.36643373789383354, "grad_norm": 1.8959170401012302, "learning_rate": 7.314128178843118e-06, "loss": 0.739, "step": 11956 }, { "epoch": 0.3664643864165747, "grad_norm": 1.659905663785205, "learning_rate": 7.313688206823123e-06, "loss": 0.7229, "step": 11957 }, { "epoch": 0.3664950349393159, "grad_norm": 1.8230607569126343, "learning_rate": 7.3132482120057654e-06, "loss": 0.6973, "step": 11958 }, { "epoch": 0.3665256834620571, "grad_norm": 1.7102567357234906, "learning_rate": 7.312808194395382e-06, "loss": 0.6926, "step": 11959 }, { "epoch": 0.3665563319847983, "grad_norm": 1.5966721688037933, "learning_rate": 7.312368153996309e-06, "loss": 0.6981, "step": 11960 }, { "epoch": 0.3665869805075395, "grad_norm": 1.5602094192014102, "learning_rate": 7.311928090812878e-06, "loss": 0.6567, "step": 11961 }, { "epoch": 0.3666176290302807, "grad_norm": 1.430178218801929, "learning_rate": 7.31148800484943e-06, "loss": 0.6365, "step": 11962 }, { "epoch": 0.36664827755302193, "grad_norm": 1.6210857310552254, "learning_rate": 7.311047896110299e-06, "loss": 0.6084, "step": 11963 }, { "epoch": 0.36667892607576313, "grad_norm": 1.63049110940471, "learning_rate": 7.310607764599823e-06, "loss": 0.6238, "step": 11964 }, { "epoch": 0.36670957459850434, "grad_norm": 0.9124571482046374, "learning_rate": 7.310167610322337e-06, "loss": 0.4548, "step": 11965 }, { "epoch": 0.36674022312124555, "grad_norm": 1.7532335226275393, "learning_rate": 7.309727433282177e-06, "loss": 0.7797, "step": 11966 }, { "epoch": 0.36677087164398675, "grad_norm": 1.1695966712247374, "learning_rate": 7.3092872334836814e-06, "loss": 0.4661, "step": 11967 }, { "epoch": 0.36680152016672796, "grad_norm": 2.1168865177536698, "learning_rate": 7.30884701093119e-06, "loss": 0.6638, "step": 11968 }, { "epoch": 0.36683216868946916, "grad_norm": 1.4781182599939093, "learning_rate": 7.308406765629037e-06, "loss": 0.6608, "step": 11969 }, { "epoch": 0.36686281721221037, "grad_norm": 1.7390752619601275, "learning_rate": 7.307966497581562e-06, "loss": 0.7419, "step": 11970 }, { "epoch": 0.3668934657349516, "grad_norm": 1.6166711067405735, "learning_rate": 7.307526206793102e-06, "loss": 0.6261, "step": 11971 }, { "epoch": 0.3669241142576928, "grad_norm": 1.8074198867381708, "learning_rate": 7.3070858932679956e-06, "loss": 0.6586, "step": 11972 }, { "epoch": 0.366954762780434, "grad_norm": 1.8709770138093789, "learning_rate": 7.3066455570105824e-06, "loss": 0.8503, "step": 11973 }, { "epoch": 0.3669854113031752, "grad_norm": 0.8581936428860965, "learning_rate": 7.3062051980252e-06, "loss": 0.4691, "step": 11974 }, { "epoch": 0.3670160598259164, "grad_norm": 1.5600908310330608, "learning_rate": 7.305764816316188e-06, "loss": 0.6785, "step": 11975 }, { "epoch": 0.3670467083486576, "grad_norm": 1.7544586666692585, "learning_rate": 7.305324411887884e-06, "loss": 0.731, "step": 11976 }, { "epoch": 0.3670773568713988, "grad_norm": 1.7289641517620526, "learning_rate": 7.3048839847446305e-06, "loss": 0.6796, "step": 11977 }, { "epoch": 0.36710800539414, "grad_norm": 0.7863724856499854, "learning_rate": 7.304443534890764e-06, "loss": 0.4801, "step": 11978 }, { "epoch": 0.3671386539168812, "grad_norm": 1.8142925599200521, "learning_rate": 7.304003062330627e-06, "loss": 0.6999, "step": 11979 }, { "epoch": 0.3671693024396224, "grad_norm": 0.8112577881771054, "learning_rate": 7.303562567068557e-06, "loss": 0.492, "step": 11980 }, { "epoch": 0.36719995096236363, "grad_norm": 0.7549940987925461, "learning_rate": 7.303122049108897e-06, "loss": 0.4571, "step": 11981 }, { "epoch": 0.36723059948510484, "grad_norm": 1.5200419980016946, "learning_rate": 7.302681508455985e-06, "loss": 0.6778, "step": 11982 }, { "epoch": 0.36726124800784604, "grad_norm": 1.7159513212061295, "learning_rate": 7.302240945114164e-06, "loss": 0.6355, "step": 11983 }, { "epoch": 0.36729189653058725, "grad_norm": 1.7279448557503554, "learning_rate": 7.301800359087773e-06, "loss": 0.8405, "step": 11984 }, { "epoch": 0.36732254505332845, "grad_norm": 1.506937533078609, "learning_rate": 7.301359750381154e-06, "loss": 0.6001, "step": 11985 }, { "epoch": 0.36735319357606966, "grad_norm": 1.7049074706573764, "learning_rate": 7.300919118998648e-06, "loss": 0.7185, "step": 11986 }, { "epoch": 0.36738384209881086, "grad_norm": 1.6981258408726976, "learning_rate": 7.300478464944599e-06, "loss": 0.6687, "step": 11987 }, { "epoch": 0.367414490621552, "grad_norm": 1.8453057334856953, "learning_rate": 7.300037788223346e-06, "loss": 0.738, "step": 11988 }, { "epoch": 0.3674451391442932, "grad_norm": 1.6098595358252636, "learning_rate": 7.299597088839232e-06, "loss": 0.6409, "step": 11989 }, { "epoch": 0.3674757876670344, "grad_norm": 1.7855708316082364, "learning_rate": 7.2991563667966004e-06, "loss": 0.7257, "step": 11990 }, { "epoch": 0.36750643618977563, "grad_norm": 0.9438077339869825, "learning_rate": 7.298715622099793e-06, "loss": 0.4725, "step": 11991 }, { "epoch": 0.36753708471251684, "grad_norm": 1.7239306973415156, "learning_rate": 7.298274854753153e-06, "loss": 0.6152, "step": 11992 }, { "epoch": 0.36756773323525804, "grad_norm": 1.6255295597419779, "learning_rate": 7.29783406476102e-06, "loss": 0.6999, "step": 11993 }, { "epoch": 0.36759838175799925, "grad_norm": 1.910990809469809, "learning_rate": 7.297393252127744e-06, "loss": 0.7288, "step": 11994 }, { "epoch": 0.36762903028074045, "grad_norm": 1.7236496673355202, "learning_rate": 7.2969524168576615e-06, "loss": 0.6875, "step": 11995 }, { "epoch": 0.36765967880348166, "grad_norm": 1.7488384536656245, "learning_rate": 7.29651155895512e-06, "loss": 0.6526, "step": 11996 }, { "epoch": 0.36769032732622287, "grad_norm": 0.8060225927716155, "learning_rate": 7.296070678424461e-06, "loss": 0.4641, "step": 11997 }, { "epoch": 0.36772097584896407, "grad_norm": 1.7944227365395153, "learning_rate": 7.295629775270033e-06, "loss": 0.709, "step": 11998 }, { "epoch": 0.3677516243717053, "grad_norm": 1.7045135937984934, "learning_rate": 7.2951888494961755e-06, "loss": 0.7342, "step": 11999 }, { "epoch": 0.3677822728944465, "grad_norm": 1.7735203190489142, "learning_rate": 7.294747901107235e-06, "loss": 0.7215, "step": 12000 }, { "epoch": 0.3678129214171877, "grad_norm": 1.6248115246448727, "learning_rate": 7.294306930107556e-06, "loss": 0.5861, "step": 12001 }, { "epoch": 0.3678435699399289, "grad_norm": 1.5727490610092845, "learning_rate": 7.293865936501485e-06, "loss": 0.6411, "step": 12002 }, { "epoch": 0.3678742184626701, "grad_norm": 1.6214322125982272, "learning_rate": 7.293424920293366e-06, "loss": 0.7747, "step": 12003 }, { "epoch": 0.3679048669854113, "grad_norm": 2.231141327042331, "learning_rate": 7.2929838814875434e-06, "loss": 0.6673, "step": 12004 }, { "epoch": 0.3679355155081525, "grad_norm": 0.7893231853390528, "learning_rate": 7.292542820088364e-06, "loss": 0.457, "step": 12005 }, { "epoch": 0.3679661640308937, "grad_norm": 1.765272853599741, "learning_rate": 7.292101736100175e-06, "loss": 0.7359, "step": 12006 }, { "epoch": 0.3679968125536349, "grad_norm": 1.7068570760296162, "learning_rate": 7.291660629527321e-06, "loss": 0.6468, "step": 12007 }, { "epoch": 0.36802746107637613, "grad_norm": 1.7313151485109948, "learning_rate": 7.291219500374147e-06, "loss": 0.6665, "step": 12008 }, { "epoch": 0.36805810959911733, "grad_norm": 1.7359734585312312, "learning_rate": 7.2907783486450016e-06, "loss": 0.5823, "step": 12009 }, { "epoch": 0.36808875812185854, "grad_norm": 1.8136066199693734, "learning_rate": 7.290337174344231e-06, "loss": 0.7444, "step": 12010 }, { "epoch": 0.36811940664459974, "grad_norm": 0.779069838708038, "learning_rate": 7.289895977476184e-06, "loss": 0.4659, "step": 12011 }, { "epoch": 0.36815005516734095, "grad_norm": 1.5046625872449784, "learning_rate": 7.289454758045203e-06, "loss": 0.6906, "step": 12012 }, { "epoch": 0.36818070369008216, "grad_norm": 1.6889450584711594, "learning_rate": 7.289013516055639e-06, "loss": 0.7128, "step": 12013 }, { "epoch": 0.36821135221282336, "grad_norm": 3.1276504096141466, "learning_rate": 7.288572251511842e-06, "loss": 0.7061, "step": 12014 }, { "epoch": 0.36824200073556457, "grad_norm": 1.670444446646396, "learning_rate": 7.2881309644181546e-06, "loss": 0.6388, "step": 12015 }, { "epoch": 0.3682726492583058, "grad_norm": 0.7831171423663651, "learning_rate": 7.287689654778928e-06, "loss": 0.4669, "step": 12016 }, { "epoch": 0.368303297781047, "grad_norm": 1.8563976038665644, "learning_rate": 7.287248322598509e-06, "loss": 0.6587, "step": 12017 }, { "epoch": 0.3683339463037882, "grad_norm": 1.5322474778764954, "learning_rate": 7.286806967881248e-06, "loss": 0.6008, "step": 12018 }, { "epoch": 0.36836459482652933, "grad_norm": 1.7301494793333687, "learning_rate": 7.286365590631492e-06, "loss": 0.727, "step": 12019 }, { "epoch": 0.36839524334927054, "grad_norm": 1.6593758876330031, "learning_rate": 7.285924190853593e-06, "loss": 0.6594, "step": 12020 }, { "epoch": 0.36842589187201175, "grad_norm": 0.8403587590012654, "learning_rate": 7.285482768551897e-06, "loss": 0.4794, "step": 12021 }, { "epoch": 0.36845654039475295, "grad_norm": 1.6899003117371312, "learning_rate": 7.285041323730754e-06, "loss": 0.6722, "step": 12022 }, { "epoch": 0.36848718891749416, "grad_norm": 1.67282333035223, "learning_rate": 7.284599856394513e-06, "loss": 0.694, "step": 12023 }, { "epoch": 0.36851783744023536, "grad_norm": 1.4785213882549788, "learning_rate": 7.284158366547527e-06, "loss": 0.6011, "step": 12024 }, { "epoch": 0.36854848596297657, "grad_norm": 1.6013121472470704, "learning_rate": 7.283716854194144e-06, "loss": 0.5672, "step": 12025 }, { "epoch": 0.3685791344857178, "grad_norm": 1.6596705116124484, "learning_rate": 7.283275319338714e-06, "loss": 0.602, "step": 12026 }, { "epoch": 0.368609783008459, "grad_norm": 1.8334245997000658, "learning_rate": 7.282833761985588e-06, "loss": 0.5861, "step": 12027 }, { "epoch": 0.3686404315312002, "grad_norm": 1.7290431744554877, "learning_rate": 7.282392182139117e-06, "loss": 0.5904, "step": 12028 }, { "epoch": 0.3686710800539414, "grad_norm": 1.730673162488494, "learning_rate": 7.2819505798036525e-06, "loss": 0.6733, "step": 12029 }, { "epoch": 0.3687017285766826, "grad_norm": 1.7623209253852177, "learning_rate": 7.281508954983544e-06, "loss": 0.6663, "step": 12030 }, { "epoch": 0.3687323770994238, "grad_norm": 1.811747679083268, "learning_rate": 7.281067307683144e-06, "loss": 0.7045, "step": 12031 }, { "epoch": 0.368763025622165, "grad_norm": 1.5515157096985661, "learning_rate": 7.280625637906804e-06, "loss": 0.6983, "step": 12032 }, { "epoch": 0.3687936741449062, "grad_norm": 1.8768504510819328, "learning_rate": 7.280183945658879e-06, "loss": 0.6511, "step": 12033 }, { "epoch": 0.3688243226676474, "grad_norm": 0.8793150656243185, "learning_rate": 7.279742230943714e-06, "loss": 0.4537, "step": 12034 }, { "epoch": 0.3688549711903886, "grad_norm": 0.819350298768413, "learning_rate": 7.2793004937656686e-06, "loss": 0.4749, "step": 12035 }, { "epoch": 0.36888561971312983, "grad_norm": 0.813318553484185, "learning_rate": 7.27885873412909e-06, "loss": 0.4872, "step": 12036 }, { "epoch": 0.36891626823587104, "grad_norm": 1.7323520888001567, "learning_rate": 7.278416952038335e-06, "loss": 0.69, "step": 12037 }, { "epoch": 0.36894691675861224, "grad_norm": 1.6562346240664423, "learning_rate": 7.277975147497753e-06, "loss": 0.6206, "step": 12038 }, { "epoch": 0.36897756528135345, "grad_norm": 0.7991343093717975, "learning_rate": 7.277533320511702e-06, "loss": 0.4684, "step": 12039 }, { "epoch": 0.36900821380409465, "grad_norm": 1.8674440987090777, "learning_rate": 7.27709147108453e-06, "loss": 0.8086, "step": 12040 }, { "epoch": 0.36903886232683586, "grad_norm": 1.665449640485241, "learning_rate": 7.276649599220594e-06, "loss": 0.6876, "step": 12041 }, { "epoch": 0.36906951084957706, "grad_norm": 1.7393215154366302, "learning_rate": 7.276207704924247e-06, "loss": 0.6801, "step": 12042 }, { "epoch": 0.36910015937231827, "grad_norm": 1.9231469163517698, "learning_rate": 7.275765788199842e-06, "loss": 0.6246, "step": 12043 }, { "epoch": 0.3691308078950595, "grad_norm": 1.8177328790588514, "learning_rate": 7.275323849051734e-06, "loss": 0.7178, "step": 12044 }, { "epoch": 0.3691614564178007, "grad_norm": 1.580081866715608, "learning_rate": 7.27488188748428e-06, "loss": 0.6655, "step": 12045 }, { "epoch": 0.3691921049405419, "grad_norm": 1.6746154783966114, "learning_rate": 7.274439903501832e-06, "loss": 0.7126, "step": 12046 }, { "epoch": 0.3692227534632831, "grad_norm": 1.7443469448191071, "learning_rate": 7.273997897108744e-06, "loss": 0.6685, "step": 12047 }, { "epoch": 0.3692534019860243, "grad_norm": 1.6182430571737634, "learning_rate": 7.273555868309377e-06, "loss": 0.6965, "step": 12048 }, { "epoch": 0.3692840505087655, "grad_norm": 1.8013251965099661, "learning_rate": 7.273113817108078e-06, "loss": 0.6606, "step": 12049 }, { "epoch": 0.36931469903150665, "grad_norm": 1.885781309257011, "learning_rate": 7.2726717435092095e-06, "loss": 0.5874, "step": 12050 }, { "epoch": 0.36934534755424786, "grad_norm": 1.4265225749524721, "learning_rate": 7.272229647517124e-06, "loss": 0.7493, "step": 12051 }, { "epoch": 0.36937599607698907, "grad_norm": 1.6147386889644988, "learning_rate": 7.2717875291361796e-06, "loss": 0.6053, "step": 12052 }, { "epoch": 0.36940664459973027, "grad_norm": 1.5409606590535647, "learning_rate": 7.2713453883707294e-06, "loss": 0.7133, "step": 12053 }, { "epoch": 0.3694372931224715, "grad_norm": 1.8361290535705972, "learning_rate": 7.270903225225132e-06, "loss": 0.6958, "step": 12054 }, { "epoch": 0.3694679416452127, "grad_norm": 1.7489003086461905, "learning_rate": 7.2704610397037445e-06, "loss": 0.5812, "step": 12055 }, { "epoch": 0.3694985901679539, "grad_norm": 1.804652618541021, "learning_rate": 7.270018831810924e-06, "loss": 0.856, "step": 12056 }, { "epoch": 0.3695292386906951, "grad_norm": 1.552057049896232, "learning_rate": 7.269576601551027e-06, "loss": 0.6758, "step": 12057 }, { "epoch": 0.3695598872134363, "grad_norm": 1.545593675829641, "learning_rate": 7.269134348928411e-06, "loss": 0.6033, "step": 12058 }, { "epoch": 0.3695905357361775, "grad_norm": 1.492961672795765, "learning_rate": 7.268692073947434e-06, "loss": 0.6431, "step": 12059 }, { "epoch": 0.3696211842589187, "grad_norm": 1.5830277514796482, "learning_rate": 7.268249776612453e-06, "loss": 0.6563, "step": 12060 }, { "epoch": 0.3696518327816599, "grad_norm": 1.707902930550058, "learning_rate": 7.267807456927828e-06, "loss": 0.6273, "step": 12061 }, { "epoch": 0.3696824813044011, "grad_norm": 0.9132648474099486, "learning_rate": 7.267365114897914e-06, "loss": 0.4822, "step": 12062 }, { "epoch": 0.36971312982714233, "grad_norm": 0.9300949762765142, "learning_rate": 7.266922750527073e-06, "loss": 0.4851, "step": 12063 }, { "epoch": 0.36974377834988353, "grad_norm": 1.6407514824999754, "learning_rate": 7.26648036381966e-06, "loss": 0.6281, "step": 12064 }, { "epoch": 0.36977442687262474, "grad_norm": 0.7299502461727269, "learning_rate": 7.266037954780038e-06, "loss": 0.4509, "step": 12065 }, { "epoch": 0.36980507539536595, "grad_norm": 1.7871522188534559, "learning_rate": 7.265595523412563e-06, "loss": 0.6684, "step": 12066 }, { "epoch": 0.36983572391810715, "grad_norm": 1.676666659112615, "learning_rate": 7.265153069721597e-06, "loss": 0.7402, "step": 12067 }, { "epoch": 0.36986637244084836, "grad_norm": 1.759043567968306, "learning_rate": 7.264710593711497e-06, "loss": 0.6944, "step": 12068 }, { "epoch": 0.36989702096358956, "grad_norm": 1.7728016326027969, "learning_rate": 7.264268095386625e-06, "loss": 0.6145, "step": 12069 }, { "epoch": 0.36992766948633077, "grad_norm": 1.7923656393257283, "learning_rate": 7.263825574751339e-06, "loss": 0.6593, "step": 12070 }, { "epoch": 0.369958318009072, "grad_norm": 0.8628118337246518, "learning_rate": 7.26338303181e-06, "loss": 0.4378, "step": 12071 }, { "epoch": 0.3699889665318132, "grad_norm": 1.9282693519428937, "learning_rate": 7.262940466566971e-06, "loss": 0.6497, "step": 12072 }, { "epoch": 0.3700196150545544, "grad_norm": 1.8537772203432865, "learning_rate": 7.262497879026609e-06, "loss": 0.7453, "step": 12073 }, { "epoch": 0.3700502635772956, "grad_norm": 1.8241426803892673, "learning_rate": 7.2620552691932766e-06, "loss": 0.7283, "step": 12074 }, { "epoch": 0.3700809121000368, "grad_norm": 0.8610869651180998, "learning_rate": 7.2616126370713355e-06, "loss": 0.4657, "step": 12075 }, { "epoch": 0.370111560622778, "grad_norm": 0.803036721310234, "learning_rate": 7.261169982665146e-06, "loss": 0.441, "step": 12076 }, { "epoch": 0.3701422091455192, "grad_norm": 0.7761469116427766, "learning_rate": 7.26072730597907e-06, "loss": 0.4517, "step": 12077 }, { "epoch": 0.3701728576682604, "grad_norm": 1.8731518241162082, "learning_rate": 7.26028460701747e-06, "loss": 0.6965, "step": 12078 }, { "epoch": 0.3702035061910016, "grad_norm": 0.7923206852397874, "learning_rate": 7.259841885784707e-06, "loss": 0.4625, "step": 12079 }, { "epoch": 0.3702341547137428, "grad_norm": 1.6634616015667814, "learning_rate": 7.259399142285145e-06, "loss": 0.6647, "step": 12080 }, { "epoch": 0.370264803236484, "grad_norm": 1.7509294081442095, "learning_rate": 7.258956376523143e-06, "loss": 0.6992, "step": 12081 }, { "epoch": 0.3702954517592252, "grad_norm": 1.6528775226026273, "learning_rate": 7.258513588503067e-06, "loss": 0.6711, "step": 12082 }, { "epoch": 0.3703261002819664, "grad_norm": 1.737100963674655, "learning_rate": 7.258070778229279e-06, "loss": 0.6895, "step": 12083 }, { "epoch": 0.3703567488047076, "grad_norm": 1.774539912195661, "learning_rate": 7.257627945706141e-06, "loss": 0.7002, "step": 12084 }, { "epoch": 0.3703873973274488, "grad_norm": 1.6199519005972736, "learning_rate": 7.257185090938017e-06, "loss": 0.7178, "step": 12085 }, { "epoch": 0.37041804585019, "grad_norm": 1.789315533494675, "learning_rate": 7.2567422139292706e-06, "loss": 0.7141, "step": 12086 }, { "epoch": 0.3704486943729312, "grad_norm": 0.9679257582691889, "learning_rate": 7.256299314684269e-06, "loss": 0.4747, "step": 12087 }, { "epoch": 0.3704793428956724, "grad_norm": 1.827713734910388, "learning_rate": 7.25585639320737e-06, "loss": 0.6878, "step": 12088 }, { "epoch": 0.3705099914184136, "grad_norm": 1.7130442112005113, "learning_rate": 7.255413449502942e-06, "loss": 0.6438, "step": 12089 }, { "epoch": 0.3705406399411548, "grad_norm": 1.6548357721609477, "learning_rate": 7.254970483575345e-06, "loss": 0.6033, "step": 12090 }, { "epoch": 0.37057128846389603, "grad_norm": 0.8285253211577897, "learning_rate": 7.254527495428951e-06, "loss": 0.4595, "step": 12091 }, { "epoch": 0.37060193698663724, "grad_norm": 2.1577909642034174, "learning_rate": 7.254084485068119e-06, "loss": 0.7897, "step": 12092 }, { "epoch": 0.37063258550937844, "grad_norm": 2.0717266027524524, "learning_rate": 7.2536414524972154e-06, "loss": 0.7508, "step": 12093 }, { "epoch": 0.37066323403211965, "grad_norm": 1.7263214705709007, "learning_rate": 7.253198397720607e-06, "loss": 0.7366, "step": 12094 }, { "epoch": 0.37069388255486085, "grad_norm": 0.7932951091270575, "learning_rate": 7.252755320742658e-06, "loss": 0.4477, "step": 12095 }, { "epoch": 0.37072453107760206, "grad_norm": 1.7834779327427344, "learning_rate": 7.252312221567734e-06, "loss": 0.7141, "step": 12096 }, { "epoch": 0.37075517960034327, "grad_norm": 1.8455865852615192, "learning_rate": 7.2518691002002014e-06, "loss": 0.6834, "step": 12097 }, { "epoch": 0.37078582812308447, "grad_norm": 1.6856037435370421, "learning_rate": 7.251425956644426e-06, "loss": 0.5863, "step": 12098 }, { "epoch": 0.3708164766458257, "grad_norm": 0.8040019455942432, "learning_rate": 7.250982790904776e-06, "loss": 0.4457, "step": 12099 }, { "epoch": 0.3708471251685669, "grad_norm": 2.052058039978196, "learning_rate": 7.250539602985616e-06, "loss": 0.7852, "step": 12100 }, { "epoch": 0.3708777736913081, "grad_norm": 0.8228225798442527, "learning_rate": 7.250096392891312e-06, "loss": 0.469, "step": 12101 }, { "epoch": 0.3709084222140493, "grad_norm": 1.7629590595099163, "learning_rate": 7.249653160626236e-06, "loss": 0.6504, "step": 12102 }, { "epoch": 0.3709390707367905, "grad_norm": 1.6144373575732587, "learning_rate": 7.249209906194748e-06, "loss": 0.5829, "step": 12103 }, { "epoch": 0.3709697192595317, "grad_norm": 1.8083619105252715, "learning_rate": 7.248766629601221e-06, "loss": 0.5598, "step": 12104 }, { "epoch": 0.3710003677822729, "grad_norm": 1.863457809858878, "learning_rate": 7.2483233308500215e-06, "loss": 0.6742, "step": 12105 }, { "epoch": 0.3710310163050141, "grad_norm": 1.7360764310479921, "learning_rate": 7.247880009945517e-06, "loss": 0.5653, "step": 12106 }, { "epoch": 0.3710616648277553, "grad_norm": 1.7571824608841802, "learning_rate": 7.247436666892075e-06, "loss": 0.6584, "step": 12107 }, { "epoch": 0.3710923133504965, "grad_norm": 1.598615909180718, "learning_rate": 7.246993301694064e-06, "loss": 0.7191, "step": 12108 }, { "epoch": 0.37112296187323773, "grad_norm": 1.704509968903115, "learning_rate": 7.246549914355853e-06, "loss": 0.6883, "step": 12109 }, { "epoch": 0.37115361039597894, "grad_norm": 1.8086405012082, "learning_rate": 7.246106504881811e-06, "loss": 0.6047, "step": 12110 }, { "epoch": 0.37118425891872014, "grad_norm": 1.6963583018810418, "learning_rate": 7.245663073276309e-06, "loss": 0.6817, "step": 12111 }, { "epoch": 0.3712149074414613, "grad_norm": 1.9519030169905156, "learning_rate": 7.245219619543712e-06, "loss": 0.5797, "step": 12112 }, { "epoch": 0.3712455559642025, "grad_norm": 1.9193061182452564, "learning_rate": 7.244776143688392e-06, "loss": 0.6801, "step": 12113 }, { "epoch": 0.3712762044869437, "grad_norm": 1.4847265798243212, "learning_rate": 7.244332645714719e-06, "loss": 0.5649, "step": 12114 }, { "epoch": 0.3713068530096849, "grad_norm": 1.5906427027286307, "learning_rate": 7.2438891256270615e-06, "loss": 0.6435, "step": 12115 }, { "epoch": 0.3713375015324261, "grad_norm": 1.638487932275848, "learning_rate": 7.24344558342979e-06, "loss": 0.7449, "step": 12116 }, { "epoch": 0.3713681500551673, "grad_norm": 1.857436338011162, "learning_rate": 7.243002019127277e-06, "loss": 0.7152, "step": 12117 }, { "epoch": 0.37139879857790853, "grad_norm": 1.7057296551417844, "learning_rate": 7.2425584327238895e-06, "loss": 0.5611, "step": 12118 }, { "epoch": 0.37142944710064973, "grad_norm": 1.6440381371712822, "learning_rate": 7.2421148242240005e-06, "loss": 0.621, "step": 12119 }, { "epoch": 0.37146009562339094, "grad_norm": 1.787982788750262, "learning_rate": 7.241671193631979e-06, "loss": 0.6864, "step": 12120 }, { "epoch": 0.37149074414613215, "grad_norm": 1.7840073202509992, "learning_rate": 7.2412275409522e-06, "loss": 0.7318, "step": 12121 }, { "epoch": 0.37152139266887335, "grad_norm": 1.8759841094697238, "learning_rate": 7.240783866189031e-06, "loss": 0.6936, "step": 12122 }, { "epoch": 0.37155204119161456, "grad_norm": 1.7441736323628707, "learning_rate": 7.240340169346847e-06, "loss": 0.5183, "step": 12123 }, { "epoch": 0.37158268971435576, "grad_norm": 1.509326220741127, "learning_rate": 7.239896450430016e-06, "loss": 0.6236, "step": 12124 }, { "epoch": 0.37161333823709697, "grad_norm": 1.727299159164586, "learning_rate": 7.239452709442914e-06, "loss": 0.7294, "step": 12125 }, { "epoch": 0.3716439867598382, "grad_norm": 1.9037548300859821, "learning_rate": 7.23900894638991e-06, "loss": 0.6828, "step": 12126 }, { "epoch": 0.3716746352825794, "grad_norm": 1.7548974870050595, "learning_rate": 7.238565161275379e-06, "loss": 0.7289, "step": 12127 }, { "epoch": 0.3717052838053206, "grad_norm": 0.9648829492058526, "learning_rate": 7.2381213541036925e-06, "loss": 0.4562, "step": 12128 }, { "epoch": 0.3717359323280618, "grad_norm": 1.6556072047485082, "learning_rate": 7.237677524879223e-06, "loss": 0.6734, "step": 12129 }, { "epoch": 0.371766580850803, "grad_norm": 1.7741237665998855, "learning_rate": 7.2372336736063456e-06, "loss": 0.652, "step": 12130 }, { "epoch": 0.3717972293735442, "grad_norm": 1.9094276161421677, "learning_rate": 7.23678980028943e-06, "loss": 0.6318, "step": 12131 }, { "epoch": 0.3718278778962854, "grad_norm": 1.6289079144767822, "learning_rate": 7.2363459049328545e-06, "loss": 0.6752, "step": 12132 }, { "epoch": 0.3718585264190266, "grad_norm": 1.752744009746961, "learning_rate": 7.23590198754099e-06, "loss": 0.7195, "step": 12133 }, { "epoch": 0.3718891749417678, "grad_norm": 1.652232636916924, "learning_rate": 7.235458048118211e-06, "loss": 0.7197, "step": 12134 }, { "epoch": 0.371919823464509, "grad_norm": 1.633314452395313, "learning_rate": 7.235014086668892e-06, "loss": 0.6519, "step": 12135 }, { "epoch": 0.37195047198725023, "grad_norm": 1.7675590979614435, "learning_rate": 7.234570103197407e-06, "loss": 0.7377, "step": 12136 }, { "epoch": 0.37198112050999144, "grad_norm": 1.8701314894336365, "learning_rate": 7.2341260977081314e-06, "loss": 0.6025, "step": 12137 }, { "epoch": 0.37201176903273264, "grad_norm": 1.6142795763795903, "learning_rate": 7.233682070205439e-06, "loss": 0.6618, "step": 12138 }, { "epoch": 0.37204241755547385, "grad_norm": 1.659996589768685, "learning_rate": 7.2332380206937055e-06, "loss": 0.64, "step": 12139 }, { "epoch": 0.37207306607821505, "grad_norm": 0.8417480388927286, "learning_rate": 7.232793949177308e-06, "loss": 0.4832, "step": 12140 }, { "epoch": 0.37210371460095626, "grad_norm": 0.9048126206986115, "learning_rate": 7.23234985566062e-06, "loss": 0.4722, "step": 12141 }, { "epoch": 0.37213436312369746, "grad_norm": 1.8846394617046065, "learning_rate": 7.231905740148017e-06, "loss": 0.6098, "step": 12142 }, { "epoch": 0.3721650116464386, "grad_norm": 1.844771884788096, "learning_rate": 7.231461602643876e-06, "loss": 0.6583, "step": 12143 }, { "epoch": 0.3721956601691798, "grad_norm": 1.6331649552024132, "learning_rate": 7.2310174431525715e-06, "loss": 0.7469, "step": 12144 }, { "epoch": 0.372226308691921, "grad_norm": 1.9444999987435725, "learning_rate": 7.230573261678484e-06, "loss": 0.7352, "step": 12145 }, { "epoch": 0.37225695721466223, "grad_norm": 1.685122110824292, "learning_rate": 7.230129058225986e-06, "loss": 0.6251, "step": 12146 }, { "epoch": 0.37228760573740344, "grad_norm": 1.8986392105644143, "learning_rate": 7.229684832799455e-06, "loss": 0.8284, "step": 12147 }, { "epoch": 0.37231825426014464, "grad_norm": 1.8373923928884404, "learning_rate": 7.22924058540327e-06, "loss": 0.6646, "step": 12148 }, { "epoch": 0.37234890278288585, "grad_norm": 0.8946413936221389, "learning_rate": 7.228796316041807e-06, "loss": 0.4703, "step": 12149 }, { "epoch": 0.37237955130562705, "grad_norm": 1.6993520071327004, "learning_rate": 7.228352024719442e-06, "loss": 0.6207, "step": 12150 }, { "epoch": 0.37241019982836826, "grad_norm": 1.6065093251966975, "learning_rate": 7.2279077114405575e-06, "loss": 0.6534, "step": 12151 }, { "epoch": 0.37244084835110947, "grad_norm": 1.5674762068005441, "learning_rate": 7.227463376209527e-06, "loss": 0.6724, "step": 12152 }, { "epoch": 0.37247149687385067, "grad_norm": 1.4692549870974585, "learning_rate": 7.227019019030729e-06, "loss": 0.6352, "step": 12153 }, { "epoch": 0.3725021453965919, "grad_norm": 1.8236354999132107, "learning_rate": 7.226574639908543e-06, "loss": 0.7856, "step": 12154 }, { "epoch": 0.3725327939193331, "grad_norm": 1.6808674343188152, "learning_rate": 7.226130238847347e-06, "loss": 0.7058, "step": 12155 }, { "epoch": 0.3725634424420743, "grad_norm": 1.5692838704948626, "learning_rate": 7.225685815851522e-06, "loss": 0.6589, "step": 12156 }, { "epoch": 0.3725940909648155, "grad_norm": 1.9086327413358086, "learning_rate": 7.225241370925444e-06, "loss": 0.6763, "step": 12157 }, { "epoch": 0.3726247394875567, "grad_norm": 1.7048211722127806, "learning_rate": 7.224796904073493e-06, "loss": 0.5647, "step": 12158 }, { "epoch": 0.3726553880102979, "grad_norm": 0.794714610782924, "learning_rate": 7.224352415300049e-06, "loss": 0.4851, "step": 12159 }, { "epoch": 0.3726860365330391, "grad_norm": 1.5530540452682566, "learning_rate": 7.223907904609493e-06, "loss": 0.5805, "step": 12160 }, { "epoch": 0.3727166850557803, "grad_norm": 1.844796884566749, "learning_rate": 7.223463372006202e-06, "loss": 0.7534, "step": 12161 }, { "epoch": 0.3727473335785215, "grad_norm": 1.7690419251990455, "learning_rate": 7.223018817494558e-06, "loss": 0.6745, "step": 12162 }, { "epoch": 0.37277798210126273, "grad_norm": 1.818568177445544, "learning_rate": 7.222574241078939e-06, "loss": 0.7357, "step": 12163 }, { "epoch": 0.37280863062400393, "grad_norm": 1.9876624200556456, "learning_rate": 7.22212964276373e-06, "loss": 0.703, "step": 12164 }, { "epoch": 0.37283927914674514, "grad_norm": 1.6357554201987023, "learning_rate": 7.221685022553309e-06, "loss": 0.6652, "step": 12165 }, { "epoch": 0.37286992766948635, "grad_norm": 1.6542059130849291, "learning_rate": 7.221240380452055e-06, "loss": 0.6068, "step": 12166 }, { "epoch": 0.37290057619222755, "grad_norm": 0.8475096330126426, "learning_rate": 7.220795716464352e-06, "loss": 0.4635, "step": 12167 }, { "epoch": 0.37293122471496876, "grad_norm": 0.77028219801398, "learning_rate": 7.2203510305945815e-06, "loss": 0.4277, "step": 12168 }, { "epoch": 0.37296187323770996, "grad_norm": 1.8048418160529354, "learning_rate": 7.2199063228471235e-06, "loss": 0.6478, "step": 12169 }, { "epoch": 0.37299252176045117, "grad_norm": 1.866765099764495, "learning_rate": 7.21946159322636e-06, "loss": 0.7256, "step": 12170 }, { "epoch": 0.3730231702831924, "grad_norm": 1.6575013088328656, "learning_rate": 7.219016841736675e-06, "loss": 0.6638, "step": 12171 }, { "epoch": 0.3730538188059336, "grad_norm": 1.6084180354810498, "learning_rate": 7.218572068382448e-06, "loss": 0.7129, "step": 12172 }, { "epoch": 0.3730844673286748, "grad_norm": 1.7117962900756194, "learning_rate": 7.218127273168063e-06, "loss": 0.6495, "step": 12173 }, { "epoch": 0.37311511585141593, "grad_norm": 1.4751430312704048, "learning_rate": 7.217682456097902e-06, "loss": 0.58, "step": 12174 }, { "epoch": 0.37314576437415714, "grad_norm": 1.7920261678669223, "learning_rate": 7.21723761717635e-06, "loss": 0.6605, "step": 12175 }, { "epoch": 0.37317641289689835, "grad_norm": 1.6051159153231207, "learning_rate": 7.216792756407787e-06, "loss": 0.5819, "step": 12176 }, { "epoch": 0.37320706141963955, "grad_norm": 1.7399247717610302, "learning_rate": 7.216347873796598e-06, "loss": 0.7089, "step": 12177 }, { "epoch": 0.37323770994238076, "grad_norm": 1.8261016759931141, "learning_rate": 7.215902969347166e-06, "loss": 0.6695, "step": 12178 }, { "epoch": 0.37326835846512196, "grad_norm": 1.7033711744826416, "learning_rate": 7.215458043063877e-06, "loss": 0.6235, "step": 12179 }, { "epoch": 0.37329900698786317, "grad_norm": 1.5694427053139612, "learning_rate": 7.215013094951111e-06, "loss": 0.6958, "step": 12180 }, { "epoch": 0.3733296555106044, "grad_norm": 1.853190843812496, "learning_rate": 7.214568125013254e-06, "loss": 0.7668, "step": 12181 }, { "epoch": 0.3733603040333456, "grad_norm": 1.548984555538895, "learning_rate": 7.214123133254691e-06, "loss": 0.6976, "step": 12182 }, { "epoch": 0.3733909525560868, "grad_norm": 1.904960514652845, "learning_rate": 7.2136781196798075e-06, "loss": 0.7471, "step": 12183 }, { "epoch": 0.373421601078828, "grad_norm": 1.561760791574482, "learning_rate": 7.213233084292986e-06, "loss": 0.6554, "step": 12184 }, { "epoch": 0.3734522496015692, "grad_norm": 1.806355019460574, "learning_rate": 7.212788027098613e-06, "loss": 0.6746, "step": 12185 }, { "epoch": 0.3734828981243104, "grad_norm": 1.7064177674473195, "learning_rate": 7.212342948101075e-06, "loss": 0.6428, "step": 12186 }, { "epoch": 0.3735135466470516, "grad_norm": 1.7694766926822996, "learning_rate": 7.211897847304753e-06, "loss": 0.6888, "step": 12187 }, { "epoch": 0.3735441951697928, "grad_norm": 1.9784468122016903, "learning_rate": 7.211452724714037e-06, "loss": 0.7545, "step": 12188 }, { "epoch": 0.373574843692534, "grad_norm": 0.9977351678945053, "learning_rate": 7.211007580333311e-06, "loss": 0.4492, "step": 12189 }, { "epoch": 0.3736054922152752, "grad_norm": 1.7777922886061523, "learning_rate": 7.2105624141669615e-06, "loss": 0.6991, "step": 12190 }, { "epoch": 0.37363614073801643, "grad_norm": 0.9124647174756735, "learning_rate": 7.210117226219377e-06, "loss": 0.4834, "step": 12191 }, { "epoch": 0.37366678926075764, "grad_norm": 1.662050114692412, "learning_rate": 7.20967201649494e-06, "loss": 0.7135, "step": 12192 }, { "epoch": 0.37369743778349884, "grad_norm": 0.7755058546210565, "learning_rate": 7.209226784998039e-06, "loss": 0.4465, "step": 12193 }, { "epoch": 0.37372808630624005, "grad_norm": 1.6890621431683097, "learning_rate": 7.2087815317330625e-06, "loss": 0.6538, "step": 12194 }, { "epoch": 0.37375873482898125, "grad_norm": 0.8342648960597374, "learning_rate": 7.2083362567043955e-06, "loss": 0.4912, "step": 12195 }, { "epoch": 0.37378938335172246, "grad_norm": 0.8389124427377364, "learning_rate": 7.207890959916426e-06, "loss": 0.4562, "step": 12196 }, { "epoch": 0.37382003187446367, "grad_norm": 1.557364974744378, "learning_rate": 7.207445641373543e-06, "loss": 0.6791, "step": 12197 }, { "epoch": 0.37385068039720487, "grad_norm": 2.2052418529320414, "learning_rate": 7.207000301080132e-06, "loss": 0.7087, "step": 12198 }, { "epoch": 0.3738813289199461, "grad_norm": 1.6989396472977563, "learning_rate": 7.206554939040585e-06, "loss": 0.6013, "step": 12199 }, { "epoch": 0.3739119774426873, "grad_norm": 0.7710098715686627, "learning_rate": 7.206109555259284e-06, "loss": 0.4505, "step": 12200 }, { "epoch": 0.3739426259654285, "grad_norm": 1.5995561452725473, "learning_rate": 7.205664149740623e-06, "loss": 0.6877, "step": 12201 }, { "epoch": 0.3739732744881697, "grad_norm": 1.7826446143394137, "learning_rate": 7.205218722488989e-06, "loss": 0.7341, "step": 12202 }, { "epoch": 0.3740039230109109, "grad_norm": 1.5735645986240705, "learning_rate": 7.204773273508772e-06, "loss": 0.6395, "step": 12203 }, { "epoch": 0.3740345715336521, "grad_norm": 1.4692051306131362, "learning_rate": 7.2043278028043565e-06, "loss": 0.6186, "step": 12204 }, { "epoch": 0.37406522005639326, "grad_norm": 1.7015653038411356, "learning_rate": 7.203882310380137e-06, "loss": 0.7037, "step": 12205 }, { "epoch": 0.37409586857913446, "grad_norm": 1.5925858373464934, "learning_rate": 7.203436796240502e-06, "loss": 0.6555, "step": 12206 }, { "epoch": 0.37412651710187567, "grad_norm": 1.3624479572966426, "learning_rate": 7.202991260389839e-06, "loss": 0.5989, "step": 12207 }, { "epoch": 0.37415716562461687, "grad_norm": 1.8380187290335452, "learning_rate": 7.202545702832539e-06, "loss": 0.6666, "step": 12208 }, { "epoch": 0.3741878141473581, "grad_norm": 1.6400631592086337, "learning_rate": 7.202100123572994e-06, "loss": 0.5384, "step": 12209 }, { "epoch": 0.3742184626700993, "grad_norm": 1.8578257322528031, "learning_rate": 7.201654522615593e-06, "loss": 0.6018, "step": 12210 }, { "epoch": 0.3742491111928405, "grad_norm": 1.602248678339458, "learning_rate": 7.2012088999647264e-06, "loss": 0.6123, "step": 12211 }, { "epoch": 0.3742797597155817, "grad_norm": 1.985984480499888, "learning_rate": 7.200763255624785e-06, "loss": 0.6688, "step": 12212 }, { "epoch": 0.3743104082383229, "grad_norm": 1.7326279014719825, "learning_rate": 7.200317589600161e-06, "loss": 0.6805, "step": 12213 }, { "epoch": 0.3743410567610641, "grad_norm": 1.7470244399709207, "learning_rate": 7.199871901895244e-06, "loss": 0.6002, "step": 12214 }, { "epoch": 0.3743717052838053, "grad_norm": 1.7849958673874666, "learning_rate": 7.199426192514427e-06, "loss": 0.6985, "step": 12215 }, { "epoch": 0.3744023538065465, "grad_norm": 1.6904637666715694, "learning_rate": 7.198980461462101e-06, "loss": 0.7227, "step": 12216 }, { "epoch": 0.3744330023292877, "grad_norm": 1.7449475900569589, "learning_rate": 7.198534708742656e-06, "loss": 0.6166, "step": 12217 }, { "epoch": 0.37446365085202893, "grad_norm": 1.5772600536631989, "learning_rate": 7.198088934360488e-06, "loss": 0.7723, "step": 12218 }, { "epoch": 0.37449429937477013, "grad_norm": 1.5842821617922467, "learning_rate": 7.197643138319988e-06, "loss": 0.727, "step": 12219 }, { "epoch": 0.37452494789751134, "grad_norm": 1.8922293358507671, "learning_rate": 7.197197320625546e-06, "loss": 0.7291, "step": 12220 }, { "epoch": 0.37455559642025255, "grad_norm": 1.5927583190457357, "learning_rate": 7.196751481281556e-06, "loss": 0.6146, "step": 12221 }, { "epoch": 0.37458624494299375, "grad_norm": 1.6727548846961353, "learning_rate": 7.196305620292413e-06, "loss": 0.6792, "step": 12222 }, { "epoch": 0.37461689346573496, "grad_norm": 1.7564438099084825, "learning_rate": 7.195859737662509e-06, "loss": 0.7347, "step": 12223 }, { "epoch": 0.37464754198847616, "grad_norm": 1.6069439138117396, "learning_rate": 7.195413833396236e-06, "loss": 0.7009, "step": 12224 }, { "epoch": 0.37467819051121737, "grad_norm": 1.8520611902143735, "learning_rate": 7.19496790749799e-06, "loss": 0.6897, "step": 12225 }, { "epoch": 0.3747088390339586, "grad_norm": 1.656721062359851, "learning_rate": 7.194521959972163e-06, "loss": 0.5846, "step": 12226 }, { "epoch": 0.3747394875566998, "grad_norm": 1.7707955155123105, "learning_rate": 7.194075990823151e-06, "loss": 0.6862, "step": 12227 }, { "epoch": 0.374770136079441, "grad_norm": 1.58158656088169, "learning_rate": 7.193630000055344e-06, "loss": 0.5989, "step": 12228 }, { "epoch": 0.3748007846021822, "grad_norm": 1.673172495369431, "learning_rate": 7.193183987673143e-06, "loss": 0.6879, "step": 12229 }, { "epoch": 0.3748314331249234, "grad_norm": 1.810609849487826, "learning_rate": 7.192737953680936e-06, "loss": 0.7421, "step": 12230 }, { "epoch": 0.3748620816476646, "grad_norm": 1.6946112852458386, "learning_rate": 7.192291898083122e-06, "loss": 0.6885, "step": 12231 }, { "epoch": 0.3748927301704058, "grad_norm": 1.7730191993545037, "learning_rate": 7.191845820884093e-06, "loss": 0.6121, "step": 12232 }, { "epoch": 0.374923378693147, "grad_norm": 1.7048826438194529, "learning_rate": 7.191399722088249e-06, "loss": 0.6371, "step": 12233 }, { "epoch": 0.3749540272158882, "grad_norm": 1.599824175952047, "learning_rate": 7.190953601699983e-06, "loss": 0.5357, "step": 12234 }, { "epoch": 0.3749846757386294, "grad_norm": 1.7096434677225085, "learning_rate": 7.190507459723689e-06, "loss": 0.6854, "step": 12235 }, { "epoch": 0.3750153242613706, "grad_norm": 1.5961888447059378, "learning_rate": 7.190061296163765e-06, "loss": 0.7056, "step": 12236 }, { "epoch": 0.3750459727841118, "grad_norm": 1.6326185339636918, "learning_rate": 7.189615111024608e-06, "loss": 0.6613, "step": 12237 }, { "epoch": 0.375076621306853, "grad_norm": 1.7370351108601472, "learning_rate": 7.189168904310612e-06, "loss": 0.6647, "step": 12238 }, { "epoch": 0.3751072698295942, "grad_norm": 1.6921644562818914, "learning_rate": 7.188722676026174e-06, "loss": 0.7222, "step": 12239 }, { "epoch": 0.3751379183523354, "grad_norm": 2.1487484337072753, "learning_rate": 7.1882764261756925e-06, "loss": 0.6839, "step": 12240 }, { "epoch": 0.3751685668750766, "grad_norm": 0.889709275915457, "learning_rate": 7.187830154763563e-06, "loss": 0.4654, "step": 12241 }, { "epoch": 0.3751992153978178, "grad_norm": 1.7454109201369838, "learning_rate": 7.187383861794184e-06, "loss": 0.7285, "step": 12242 }, { "epoch": 0.375229863920559, "grad_norm": 0.8485411002347986, "learning_rate": 7.186937547271951e-06, "loss": 0.4789, "step": 12243 }, { "epoch": 0.3752605124433002, "grad_norm": 1.869859717608598, "learning_rate": 7.186491211201263e-06, "loss": 0.8004, "step": 12244 }, { "epoch": 0.3752911609660414, "grad_norm": 1.7741989365541018, "learning_rate": 7.186044853586518e-06, "loss": 0.7161, "step": 12245 }, { "epoch": 0.37532180948878263, "grad_norm": 1.8871070498914557, "learning_rate": 7.1855984744321135e-06, "loss": 0.5849, "step": 12246 }, { "epoch": 0.37535245801152384, "grad_norm": 1.795438277352554, "learning_rate": 7.185152073742448e-06, "loss": 0.7452, "step": 12247 }, { "epoch": 0.37538310653426504, "grad_norm": 1.7531971320323172, "learning_rate": 7.184705651521919e-06, "loss": 0.7176, "step": 12248 }, { "epoch": 0.37541375505700625, "grad_norm": 0.7939381171289878, "learning_rate": 7.184259207774928e-06, "loss": 0.4696, "step": 12249 }, { "epoch": 0.37544440357974745, "grad_norm": 2.427780982709156, "learning_rate": 7.183812742505871e-06, "loss": 0.6613, "step": 12250 }, { "epoch": 0.37547505210248866, "grad_norm": 1.8551126707940566, "learning_rate": 7.183366255719149e-06, "loss": 0.6786, "step": 12251 }, { "epoch": 0.37550570062522987, "grad_norm": 1.6243889492553216, "learning_rate": 7.182919747419161e-06, "loss": 0.6347, "step": 12252 }, { "epoch": 0.37553634914797107, "grad_norm": 1.7283830322786773, "learning_rate": 7.182473217610306e-06, "loss": 0.6413, "step": 12253 }, { "epoch": 0.3755669976707123, "grad_norm": 1.5511335648258389, "learning_rate": 7.182026666296983e-06, "loss": 0.6814, "step": 12254 }, { "epoch": 0.3755976461934535, "grad_norm": 1.7313201272197662, "learning_rate": 7.1815800934835945e-06, "loss": 0.6625, "step": 12255 }, { "epoch": 0.3756282947161947, "grad_norm": 1.554803314915936, "learning_rate": 7.181133499174538e-06, "loss": 0.6804, "step": 12256 }, { "epoch": 0.3756589432389359, "grad_norm": 1.7965505033045461, "learning_rate": 7.180686883374216e-06, "loss": 0.6285, "step": 12257 }, { "epoch": 0.3756895917616771, "grad_norm": 1.5713416942395428, "learning_rate": 7.180240246087027e-06, "loss": 0.6295, "step": 12258 }, { "epoch": 0.3757202402844183, "grad_norm": 1.5587181325791992, "learning_rate": 7.179793587317374e-06, "loss": 0.6535, "step": 12259 }, { "epoch": 0.3757508888071595, "grad_norm": 1.6783997050877024, "learning_rate": 7.179346907069657e-06, "loss": 0.7504, "step": 12260 }, { "epoch": 0.3757815373299007, "grad_norm": 1.8422705032192241, "learning_rate": 7.178900205348276e-06, "loss": 0.6697, "step": 12261 }, { "epoch": 0.3758121858526419, "grad_norm": 1.5408050705016831, "learning_rate": 7.178453482157635e-06, "loss": 0.6205, "step": 12262 }, { "epoch": 0.37584283437538313, "grad_norm": 0.8959067136802906, "learning_rate": 7.178006737502135e-06, "loss": 0.4723, "step": 12263 }, { "epoch": 0.37587348289812433, "grad_norm": 1.692910914789313, "learning_rate": 7.177559971386175e-06, "loss": 0.6696, "step": 12264 }, { "epoch": 0.37590413142086554, "grad_norm": 1.9872834766159848, "learning_rate": 7.177113183814162e-06, "loss": 0.6678, "step": 12265 }, { "epoch": 0.37593477994360674, "grad_norm": 0.8346872260975212, "learning_rate": 7.176666374790494e-06, "loss": 0.4694, "step": 12266 }, { "epoch": 0.3759654284663479, "grad_norm": 1.7522836245104776, "learning_rate": 7.176219544319576e-06, "loss": 0.625, "step": 12267 }, { "epoch": 0.3759960769890891, "grad_norm": 1.8516315682125493, "learning_rate": 7.175772692405811e-06, "loss": 0.6803, "step": 12268 }, { "epoch": 0.3760267255118303, "grad_norm": 1.9196571594562213, "learning_rate": 7.175325819053599e-06, "loss": 0.7704, "step": 12269 }, { "epoch": 0.3760573740345715, "grad_norm": 1.6853447752130095, "learning_rate": 7.174878924267346e-06, "loss": 0.6064, "step": 12270 }, { "epoch": 0.3760880225573127, "grad_norm": 1.7829723727545426, "learning_rate": 7.174432008051454e-06, "loss": 0.7571, "step": 12271 }, { "epoch": 0.3761186710800539, "grad_norm": 1.9022757127083019, "learning_rate": 7.1739850704103295e-06, "loss": 0.6514, "step": 12272 }, { "epoch": 0.37614931960279513, "grad_norm": 1.7588526871566006, "learning_rate": 7.17353811134837e-06, "loss": 0.7704, "step": 12273 }, { "epoch": 0.37617996812553633, "grad_norm": 1.431842050037238, "learning_rate": 7.1730911308699865e-06, "loss": 0.6822, "step": 12274 }, { "epoch": 0.37621061664827754, "grad_norm": 1.621047262186365, "learning_rate": 7.172644128979578e-06, "loss": 0.6665, "step": 12275 }, { "epoch": 0.37624126517101875, "grad_norm": 1.8177763059754255, "learning_rate": 7.172197105681553e-06, "loss": 0.6562, "step": 12276 }, { "epoch": 0.37627191369375995, "grad_norm": 1.868428007710165, "learning_rate": 7.171750060980314e-06, "loss": 0.7066, "step": 12277 }, { "epoch": 0.37630256221650116, "grad_norm": 1.5919495865981623, "learning_rate": 7.171302994880264e-06, "loss": 0.5965, "step": 12278 }, { "epoch": 0.37633321073924236, "grad_norm": 1.9057117662871794, "learning_rate": 7.170855907385812e-06, "loss": 0.6897, "step": 12279 }, { "epoch": 0.37636385926198357, "grad_norm": 1.7752591099552695, "learning_rate": 7.17040879850136e-06, "loss": 0.6559, "step": 12280 }, { "epoch": 0.3763945077847248, "grad_norm": 1.6786092098915262, "learning_rate": 7.169961668231316e-06, "loss": 0.6473, "step": 12281 }, { "epoch": 0.376425156307466, "grad_norm": 1.4857559897086887, "learning_rate": 7.169514516580083e-06, "loss": 0.5915, "step": 12282 }, { "epoch": 0.3764558048302072, "grad_norm": 1.028929832544415, "learning_rate": 7.169067343552069e-06, "loss": 0.4588, "step": 12283 }, { "epoch": 0.3764864533529484, "grad_norm": 1.6817048497608615, "learning_rate": 7.1686201491516795e-06, "loss": 0.6829, "step": 12284 }, { "epoch": 0.3765171018756896, "grad_norm": 0.8294081331416995, "learning_rate": 7.168172933383322e-06, "loss": 0.4631, "step": 12285 }, { "epoch": 0.3765477503984308, "grad_norm": 1.7591912779927352, "learning_rate": 7.167725696251399e-06, "loss": 0.6844, "step": 12286 }, { "epoch": 0.376578398921172, "grad_norm": 1.714890188077214, "learning_rate": 7.167278437760322e-06, "loss": 0.7981, "step": 12287 }, { "epoch": 0.3766090474439132, "grad_norm": 0.7624142794517267, "learning_rate": 7.1668311579144966e-06, "loss": 0.4523, "step": 12288 }, { "epoch": 0.3766396959666544, "grad_norm": 1.9947290391661965, "learning_rate": 7.166383856718328e-06, "loss": 0.6882, "step": 12289 }, { "epoch": 0.3766703444893956, "grad_norm": 1.8190084787875156, "learning_rate": 7.165936534176225e-06, "loss": 0.7698, "step": 12290 }, { "epoch": 0.37670099301213683, "grad_norm": 2.0393362915549127, "learning_rate": 7.165489190292596e-06, "loss": 0.6938, "step": 12291 }, { "epoch": 0.37673164153487804, "grad_norm": 1.5657707432613284, "learning_rate": 7.1650418250718475e-06, "loss": 0.6871, "step": 12292 }, { "epoch": 0.37676229005761924, "grad_norm": 1.761597815358482, "learning_rate": 7.164594438518389e-06, "loss": 0.6967, "step": 12293 }, { "epoch": 0.37679293858036045, "grad_norm": 1.899295658726107, "learning_rate": 7.164147030636627e-06, "loss": 0.7913, "step": 12294 }, { "epoch": 0.37682358710310165, "grad_norm": 1.5943513018841664, "learning_rate": 7.16369960143097e-06, "loss": 0.7233, "step": 12295 }, { "epoch": 0.37685423562584286, "grad_norm": 1.6629008638712799, "learning_rate": 7.163252150905828e-06, "loss": 0.7536, "step": 12296 }, { "epoch": 0.37688488414858407, "grad_norm": 1.6875771231090297, "learning_rate": 7.162804679065608e-06, "loss": 0.7384, "step": 12297 }, { "epoch": 0.3769155326713252, "grad_norm": 1.8817942823634677, "learning_rate": 7.162357185914721e-06, "loss": 0.6644, "step": 12298 }, { "epoch": 0.3769461811940664, "grad_norm": 1.6688958136064083, "learning_rate": 7.161909671457576e-06, "loss": 0.7279, "step": 12299 }, { "epoch": 0.3769768297168076, "grad_norm": 1.0550269941111907, "learning_rate": 7.161462135698581e-06, "loss": 0.4417, "step": 12300 }, { "epoch": 0.37700747823954883, "grad_norm": 1.7992460872574563, "learning_rate": 7.161014578642146e-06, "loss": 0.8152, "step": 12301 }, { "epoch": 0.37703812676229004, "grad_norm": 1.5184879019247883, "learning_rate": 7.160567000292682e-06, "loss": 0.7346, "step": 12302 }, { "epoch": 0.37706877528503124, "grad_norm": 1.5260594577440585, "learning_rate": 7.160119400654599e-06, "loss": 0.6351, "step": 12303 }, { "epoch": 0.37709942380777245, "grad_norm": 1.5297961492462506, "learning_rate": 7.159671779732305e-06, "loss": 0.6564, "step": 12304 }, { "epoch": 0.37713007233051365, "grad_norm": 1.6503519050663444, "learning_rate": 7.1592241375302145e-06, "loss": 0.6643, "step": 12305 }, { "epoch": 0.37716072085325486, "grad_norm": 2.101681317587911, "learning_rate": 7.158776474052735e-06, "loss": 0.7216, "step": 12306 }, { "epoch": 0.37719136937599607, "grad_norm": 1.6953213773302869, "learning_rate": 7.1583287893042795e-06, "loss": 0.6703, "step": 12307 }, { "epoch": 0.37722201789873727, "grad_norm": 1.6856320899463715, "learning_rate": 7.157881083289257e-06, "loss": 0.7377, "step": 12308 }, { "epoch": 0.3772526664214785, "grad_norm": 1.7024485802004206, "learning_rate": 7.157433356012081e-06, "loss": 0.6442, "step": 12309 }, { "epoch": 0.3772833149442197, "grad_norm": 1.8181205872008646, "learning_rate": 7.156985607477163e-06, "loss": 0.6335, "step": 12310 }, { "epoch": 0.3773139634669609, "grad_norm": 0.9358350604114135, "learning_rate": 7.156537837688913e-06, "loss": 0.4667, "step": 12311 }, { "epoch": 0.3773446119897021, "grad_norm": 1.622937222969028, "learning_rate": 7.156090046651742e-06, "loss": 0.632, "step": 12312 }, { "epoch": 0.3773752605124433, "grad_norm": 1.8746280049740438, "learning_rate": 7.155642234370066e-06, "loss": 0.6346, "step": 12313 }, { "epoch": 0.3774059090351845, "grad_norm": 1.8316090366530027, "learning_rate": 7.1551944008482964e-06, "loss": 0.6738, "step": 12314 }, { "epoch": 0.3774365575579257, "grad_norm": 1.7782137501010642, "learning_rate": 7.154746546090844e-06, "loss": 0.6827, "step": 12315 }, { "epoch": 0.3774672060806669, "grad_norm": 1.6607620341814284, "learning_rate": 7.154298670102122e-06, "loss": 0.6336, "step": 12316 }, { "epoch": 0.3774978546034081, "grad_norm": 0.8262582590540148, "learning_rate": 7.1538507728865445e-06, "loss": 0.4426, "step": 12317 }, { "epoch": 0.37752850312614933, "grad_norm": 1.6870177761494392, "learning_rate": 7.153402854448525e-06, "loss": 0.7256, "step": 12318 }, { "epoch": 0.37755915164889053, "grad_norm": 1.6376723307112229, "learning_rate": 7.1529549147924735e-06, "loss": 0.6734, "step": 12319 }, { "epoch": 0.37758980017163174, "grad_norm": 1.7996724270448752, "learning_rate": 7.152506953922808e-06, "loss": 0.6785, "step": 12320 }, { "epoch": 0.37762044869437295, "grad_norm": 1.4197222395541735, "learning_rate": 7.152058971843941e-06, "loss": 0.5508, "step": 12321 }, { "epoch": 0.37765109721711415, "grad_norm": 0.8593405251631275, "learning_rate": 7.151610968560287e-06, "loss": 0.4884, "step": 12322 }, { "epoch": 0.37768174573985536, "grad_norm": 1.7082622732563988, "learning_rate": 7.151162944076259e-06, "loss": 0.6518, "step": 12323 }, { "epoch": 0.37771239426259656, "grad_norm": 1.611025204455271, "learning_rate": 7.150714898396273e-06, "loss": 0.7375, "step": 12324 }, { "epoch": 0.37774304278533777, "grad_norm": 1.731125596729317, "learning_rate": 7.15026683152474e-06, "loss": 0.7222, "step": 12325 }, { "epoch": 0.377773691308079, "grad_norm": 1.5595506141329998, "learning_rate": 7.149818743466081e-06, "loss": 0.7058, "step": 12326 }, { "epoch": 0.3778043398308202, "grad_norm": 1.7207782502731408, "learning_rate": 7.149370634224706e-06, "loss": 0.7852, "step": 12327 }, { "epoch": 0.3778349883535614, "grad_norm": 1.5958096671709945, "learning_rate": 7.148922503805034e-06, "loss": 0.6619, "step": 12328 }, { "epoch": 0.37786563687630254, "grad_norm": 0.7805895292248324, "learning_rate": 7.1484743522114766e-06, "loss": 0.457, "step": 12329 }, { "epoch": 0.37789628539904374, "grad_norm": 0.7828674183099353, "learning_rate": 7.148026179448452e-06, "loss": 0.4504, "step": 12330 }, { "epoch": 0.37792693392178495, "grad_norm": 1.7739410851620634, "learning_rate": 7.1475779855203764e-06, "loss": 0.7228, "step": 12331 }, { "epoch": 0.37795758244452615, "grad_norm": 1.7162599843213766, "learning_rate": 7.147129770431666e-06, "loss": 0.6639, "step": 12332 }, { "epoch": 0.37798823096726736, "grad_norm": 1.5053089072392654, "learning_rate": 7.146681534186737e-06, "loss": 0.6477, "step": 12333 }, { "epoch": 0.37801887949000856, "grad_norm": 1.530569170398407, "learning_rate": 7.146233276790003e-06, "loss": 0.7345, "step": 12334 }, { "epoch": 0.37804952801274977, "grad_norm": 0.8502336964046631, "learning_rate": 7.145784998245886e-06, "loss": 0.4683, "step": 12335 }, { "epoch": 0.378080176535491, "grad_norm": 1.3880414678221906, "learning_rate": 7.145336698558798e-06, "loss": 0.5823, "step": 12336 }, { "epoch": 0.3781108250582322, "grad_norm": 1.9134885233710546, "learning_rate": 7.144888377733161e-06, "loss": 0.6994, "step": 12337 }, { "epoch": 0.3781414735809734, "grad_norm": 1.7912757384475453, "learning_rate": 7.144440035773388e-06, "loss": 0.6221, "step": 12338 }, { "epoch": 0.3781721221037146, "grad_norm": 1.6115889893075566, "learning_rate": 7.143991672683901e-06, "loss": 0.6643, "step": 12339 }, { "epoch": 0.3782027706264558, "grad_norm": 1.953866038569664, "learning_rate": 7.143543288469113e-06, "loss": 0.5856, "step": 12340 }, { "epoch": 0.378233419149197, "grad_norm": 0.7943356663229123, "learning_rate": 7.143094883133447e-06, "loss": 0.4318, "step": 12341 }, { "epoch": 0.3782640676719382, "grad_norm": 1.6844570887058734, "learning_rate": 7.142646456681317e-06, "loss": 0.6271, "step": 12342 }, { "epoch": 0.3782947161946794, "grad_norm": 0.8065344140472965, "learning_rate": 7.142198009117143e-06, "loss": 0.4695, "step": 12343 }, { "epoch": 0.3783253647174206, "grad_norm": 1.9054634171796125, "learning_rate": 7.141749540445344e-06, "loss": 0.6883, "step": 12344 }, { "epoch": 0.3783560132401618, "grad_norm": 1.7683066667893688, "learning_rate": 7.141301050670339e-06, "loss": 0.5817, "step": 12345 }, { "epoch": 0.37838666176290303, "grad_norm": 1.6903437119867994, "learning_rate": 7.140852539796548e-06, "loss": 0.6517, "step": 12346 }, { "epoch": 0.37841731028564424, "grad_norm": 1.6614249572985496, "learning_rate": 7.140404007828387e-06, "loss": 0.6228, "step": 12347 }, { "epoch": 0.37844795880838544, "grad_norm": 1.7376354769287032, "learning_rate": 7.1399554547702806e-06, "loss": 0.7096, "step": 12348 }, { "epoch": 0.37847860733112665, "grad_norm": 1.658284257622475, "learning_rate": 7.139506880626645e-06, "loss": 0.7049, "step": 12349 }, { "epoch": 0.37850925585386785, "grad_norm": 0.8081207031737456, "learning_rate": 7.1390582854018995e-06, "loss": 0.4622, "step": 12350 }, { "epoch": 0.37853990437660906, "grad_norm": 2.326003964280761, "learning_rate": 7.138609669100465e-06, "loss": 0.5933, "step": 12351 }, { "epoch": 0.37857055289935027, "grad_norm": 0.7712066733787885, "learning_rate": 7.138161031726762e-06, "loss": 0.4408, "step": 12352 }, { "epoch": 0.37860120142209147, "grad_norm": 1.9173667543210478, "learning_rate": 7.137712373285213e-06, "loss": 0.7634, "step": 12353 }, { "epoch": 0.3786318499448327, "grad_norm": 0.7778575594235201, "learning_rate": 7.1372636937802365e-06, "loss": 0.456, "step": 12354 }, { "epoch": 0.3786624984675739, "grad_norm": 1.8179556403743535, "learning_rate": 7.136814993216253e-06, "loss": 0.6722, "step": 12355 }, { "epoch": 0.3786931469903151, "grad_norm": 1.725602326833194, "learning_rate": 7.136366271597685e-06, "loss": 0.6234, "step": 12356 }, { "epoch": 0.3787237955130563, "grad_norm": 1.7165610475269242, "learning_rate": 7.135917528928955e-06, "loss": 0.7384, "step": 12357 }, { "epoch": 0.3787544440357975, "grad_norm": 1.854210243886279, "learning_rate": 7.135468765214481e-06, "loss": 0.7133, "step": 12358 }, { "epoch": 0.3787850925585387, "grad_norm": 1.887607521652132, "learning_rate": 7.135019980458688e-06, "loss": 0.6188, "step": 12359 }, { "epoch": 0.37881574108127986, "grad_norm": 0.8284288421760376, "learning_rate": 7.1345711746659975e-06, "loss": 0.4538, "step": 12360 }, { "epoch": 0.37884638960402106, "grad_norm": 1.805271899038268, "learning_rate": 7.134122347840831e-06, "loss": 0.7666, "step": 12361 }, { "epoch": 0.37887703812676227, "grad_norm": 1.7326357979975653, "learning_rate": 7.133673499987609e-06, "loss": 0.6567, "step": 12362 }, { "epoch": 0.3789076866495035, "grad_norm": 1.674793261810419, "learning_rate": 7.133224631110758e-06, "loss": 0.646, "step": 12363 }, { "epoch": 0.3789383351722447, "grad_norm": 1.7597014434365528, "learning_rate": 7.132775741214698e-06, "loss": 0.6575, "step": 12364 }, { "epoch": 0.3789689836949859, "grad_norm": 1.6617214862364742, "learning_rate": 7.132326830303853e-06, "loss": 0.6889, "step": 12365 }, { "epoch": 0.3789996322177271, "grad_norm": 1.7523869801341134, "learning_rate": 7.131877898382647e-06, "loss": 0.7224, "step": 12366 }, { "epoch": 0.3790302807404683, "grad_norm": 2.2357166957810444, "learning_rate": 7.131428945455501e-06, "loss": 0.6212, "step": 12367 }, { "epoch": 0.3790609292632095, "grad_norm": 1.704083170293474, "learning_rate": 7.130979971526841e-06, "loss": 0.6992, "step": 12368 }, { "epoch": 0.3790915777859507, "grad_norm": 2.009703934801357, "learning_rate": 7.130530976601091e-06, "loss": 0.7697, "step": 12369 }, { "epoch": 0.3791222263086919, "grad_norm": 1.9866144695737096, "learning_rate": 7.130081960682673e-06, "loss": 0.7044, "step": 12370 }, { "epoch": 0.3791528748314331, "grad_norm": 1.670503375010645, "learning_rate": 7.129632923776013e-06, "loss": 0.6491, "step": 12371 }, { "epoch": 0.3791835233541743, "grad_norm": 1.572501048319628, "learning_rate": 7.129183865885535e-06, "loss": 0.648, "step": 12372 }, { "epoch": 0.37921417187691553, "grad_norm": 1.73313045336625, "learning_rate": 7.128734787015662e-06, "loss": 0.6597, "step": 12373 }, { "epoch": 0.37924482039965673, "grad_norm": 1.5023923666415393, "learning_rate": 7.128285687170823e-06, "loss": 0.6777, "step": 12374 }, { "epoch": 0.37927546892239794, "grad_norm": 2.0552875124132846, "learning_rate": 7.127836566355438e-06, "loss": 0.6699, "step": 12375 }, { "epoch": 0.37930611744513915, "grad_norm": 1.5954375306976218, "learning_rate": 7.1273874245739375e-06, "loss": 0.739, "step": 12376 }, { "epoch": 0.37933676596788035, "grad_norm": 1.676575223546246, "learning_rate": 7.126938261830743e-06, "loss": 0.6796, "step": 12377 }, { "epoch": 0.37936741449062156, "grad_norm": 1.5827047781091284, "learning_rate": 7.126489078130282e-06, "loss": 0.6632, "step": 12378 }, { "epoch": 0.37939806301336276, "grad_norm": 1.5985653311853818, "learning_rate": 7.126039873476979e-06, "loss": 0.7009, "step": 12379 }, { "epoch": 0.37942871153610397, "grad_norm": 0.8584481024517451, "learning_rate": 7.125590647875263e-06, "loss": 0.4761, "step": 12380 }, { "epoch": 0.3794593600588452, "grad_norm": 1.8714630515470925, "learning_rate": 7.125141401329557e-06, "loss": 0.7277, "step": 12381 }, { "epoch": 0.3794900085815864, "grad_norm": 1.616011613113855, "learning_rate": 7.12469213384429e-06, "loss": 0.7565, "step": 12382 }, { "epoch": 0.3795206571043276, "grad_norm": 1.9228843669872124, "learning_rate": 7.124242845423887e-06, "loss": 0.7539, "step": 12383 }, { "epoch": 0.3795513056270688, "grad_norm": 0.7975487172620325, "learning_rate": 7.123793536072776e-06, "loss": 0.4434, "step": 12384 }, { "epoch": 0.37958195414981, "grad_norm": 1.4997324745352771, "learning_rate": 7.123344205795384e-06, "loss": 0.6691, "step": 12385 }, { "epoch": 0.3796126026725512, "grad_norm": 0.7839329350851566, "learning_rate": 7.122894854596139e-06, "loss": 0.4615, "step": 12386 }, { "epoch": 0.3796432511952924, "grad_norm": 1.8298877831860474, "learning_rate": 7.122445482479467e-06, "loss": 0.6854, "step": 12387 }, { "epoch": 0.3796738997180336, "grad_norm": 1.6759096235728765, "learning_rate": 7.121996089449795e-06, "loss": 0.757, "step": 12388 }, { "epoch": 0.3797045482407748, "grad_norm": 1.9258989709261431, "learning_rate": 7.121546675511555e-06, "loss": 0.7656, "step": 12389 }, { "epoch": 0.379735196763516, "grad_norm": 1.8296508449892346, "learning_rate": 7.12109724066917e-06, "loss": 0.7205, "step": 12390 }, { "epoch": 0.3797658452862572, "grad_norm": 1.8045758431111045, "learning_rate": 7.120647784927075e-06, "loss": 0.7487, "step": 12391 }, { "epoch": 0.3797964938089984, "grad_norm": 1.7667775892396451, "learning_rate": 7.120198308289693e-06, "loss": 0.6922, "step": 12392 }, { "epoch": 0.3798271423317396, "grad_norm": 1.85096283104568, "learning_rate": 7.119748810761454e-06, "loss": 0.6664, "step": 12393 }, { "epoch": 0.3798577908544808, "grad_norm": 1.6724361627251294, "learning_rate": 7.119299292346788e-06, "loss": 0.6052, "step": 12394 }, { "epoch": 0.379888439377222, "grad_norm": 1.6652444348635866, "learning_rate": 7.118849753050126e-06, "loss": 0.6326, "step": 12395 }, { "epoch": 0.3799190878999632, "grad_norm": 1.657280300647117, "learning_rate": 7.1184001928758915e-06, "loss": 0.6703, "step": 12396 }, { "epoch": 0.3799497364227044, "grad_norm": 1.5307207705837147, "learning_rate": 7.11795061182852e-06, "loss": 0.7076, "step": 12397 }, { "epoch": 0.3799803849454456, "grad_norm": 1.8282204516053364, "learning_rate": 7.117501009912437e-06, "loss": 0.7863, "step": 12398 }, { "epoch": 0.3800110334681868, "grad_norm": 0.8229131183042544, "learning_rate": 7.117051387132078e-06, "loss": 0.4472, "step": 12399 }, { "epoch": 0.380041681990928, "grad_norm": 1.7927907215049994, "learning_rate": 7.1166017434918685e-06, "loss": 0.7266, "step": 12400 }, { "epoch": 0.38007233051366923, "grad_norm": 1.6375243925465948, "learning_rate": 7.11615207899624e-06, "loss": 0.6371, "step": 12401 }, { "epoch": 0.38010297903641044, "grad_norm": 1.5754559643758113, "learning_rate": 7.115702393649625e-06, "loss": 0.6867, "step": 12402 }, { "epoch": 0.38013362755915164, "grad_norm": 2.003614227711755, "learning_rate": 7.115252687456452e-06, "loss": 0.7122, "step": 12403 }, { "epoch": 0.38016427608189285, "grad_norm": 1.7018461090069774, "learning_rate": 7.114802960421155e-06, "loss": 0.6963, "step": 12404 }, { "epoch": 0.38019492460463405, "grad_norm": 1.768506628793403, "learning_rate": 7.114353212548159e-06, "loss": 0.7402, "step": 12405 }, { "epoch": 0.38022557312737526, "grad_norm": 1.7494175398725065, "learning_rate": 7.113903443841904e-06, "loss": 0.6402, "step": 12406 }, { "epoch": 0.38025622165011647, "grad_norm": 1.7892297358579639, "learning_rate": 7.113453654306815e-06, "loss": 0.6825, "step": 12407 }, { "epoch": 0.38028687017285767, "grad_norm": 1.6727310575571719, "learning_rate": 7.113003843947328e-06, "loss": 0.71, "step": 12408 }, { "epoch": 0.3803175186955989, "grad_norm": 1.7607096935657418, "learning_rate": 7.112554012767872e-06, "loss": 0.6737, "step": 12409 }, { "epoch": 0.3803481672183401, "grad_norm": 0.8060388702829894, "learning_rate": 7.112104160772883e-06, "loss": 0.4648, "step": 12410 }, { "epoch": 0.3803788157410813, "grad_norm": 1.72372157148546, "learning_rate": 7.11165428796679e-06, "loss": 0.7317, "step": 12411 }, { "epoch": 0.3804094642638225, "grad_norm": 1.578681999076581, "learning_rate": 7.111204394354026e-06, "loss": 0.6834, "step": 12412 }, { "epoch": 0.3804401127865637, "grad_norm": 1.8215869692522333, "learning_rate": 7.110754479939025e-06, "loss": 0.6882, "step": 12413 }, { "epoch": 0.3804707613093049, "grad_norm": 1.8535793490523635, "learning_rate": 7.1103045447262205e-06, "loss": 0.7543, "step": 12414 }, { "epoch": 0.3805014098320461, "grad_norm": 1.8087871679022978, "learning_rate": 7.109854588720047e-06, "loss": 0.686, "step": 12415 }, { "epoch": 0.3805320583547873, "grad_norm": 1.860213921729575, "learning_rate": 7.1094046119249345e-06, "loss": 0.7278, "step": 12416 }, { "epoch": 0.3805627068775285, "grad_norm": 1.7382352941568147, "learning_rate": 7.108954614345319e-06, "loss": 0.7006, "step": 12417 }, { "epoch": 0.38059335540026973, "grad_norm": 1.8014857719126607, "learning_rate": 7.108504595985635e-06, "loss": 0.6963, "step": 12418 }, { "epoch": 0.38062400392301093, "grad_norm": 0.7997844985317345, "learning_rate": 7.108054556850316e-06, "loss": 0.4539, "step": 12419 }, { "epoch": 0.38065465244575214, "grad_norm": 1.5842885084113467, "learning_rate": 7.107604496943794e-06, "loss": 0.7401, "step": 12420 }, { "epoch": 0.38068530096849335, "grad_norm": 1.8280568698137352, "learning_rate": 7.107154416270508e-06, "loss": 0.7942, "step": 12421 }, { "epoch": 0.3807159494912345, "grad_norm": 1.6891393205127434, "learning_rate": 7.106704314834889e-06, "loss": 0.657, "step": 12422 }, { "epoch": 0.3807465980139757, "grad_norm": 1.5979942348979694, "learning_rate": 7.106254192641373e-06, "loss": 0.6867, "step": 12423 }, { "epoch": 0.3807772465367169, "grad_norm": 1.4449040919844118, "learning_rate": 7.1058040496943955e-06, "loss": 0.5254, "step": 12424 }, { "epoch": 0.3808078950594581, "grad_norm": 1.614606752408276, "learning_rate": 7.105353885998393e-06, "loss": 0.7755, "step": 12425 }, { "epoch": 0.3808385435821993, "grad_norm": 1.519699383521856, "learning_rate": 7.104903701557799e-06, "loss": 0.6442, "step": 12426 }, { "epoch": 0.3808691921049405, "grad_norm": 1.6452857597546366, "learning_rate": 7.104453496377051e-06, "loss": 0.6795, "step": 12427 }, { "epoch": 0.38089984062768173, "grad_norm": 1.6272944474350695, "learning_rate": 7.104003270460585e-06, "loss": 0.6952, "step": 12428 }, { "epoch": 0.38093048915042294, "grad_norm": 1.7421337067435836, "learning_rate": 7.103553023812834e-06, "loss": 0.7297, "step": 12429 }, { "epoch": 0.38096113767316414, "grad_norm": 1.7731909797919143, "learning_rate": 7.10310275643824e-06, "loss": 0.6683, "step": 12430 }, { "epoch": 0.38099178619590535, "grad_norm": 0.8760956209456109, "learning_rate": 7.102652468341234e-06, "loss": 0.4678, "step": 12431 }, { "epoch": 0.38102243471864655, "grad_norm": 1.6336629383074779, "learning_rate": 7.102202159526256e-06, "loss": 0.6328, "step": 12432 }, { "epoch": 0.38105308324138776, "grad_norm": 1.7583305795454358, "learning_rate": 7.101751829997743e-06, "loss": 0.6956, "step": 12433 }, { "epoch": 0.38108373176412896, "grad_norm": 1.4856429540663054, "learning_rate": 7.10130147976013e-06, "loss": 0.6899, "step": 12434 }, { "epoch": 0.38111438028687017, "grad_norm": 1.7526233869305177, "learning_rate": 7.100851108817857e-06, "loss": 0.6777, "step": 12435 }, { "epoch": 0.3811450288096114, "grad_norm": 1.8680383972717907, "learning_rate": 7.100400717175359e-06, "loss": 0.6888, "step": 12436 }, { "epoch": 0.3811756773323526, "grad_norm": 1.7637412001953348, "learning_rate": 7.099950304837078e-06, "loss": 0.6498, "step": 12437 }, { "epoch": 0.3812063258550938, "grad_norm": 1.7257604000448497, "learning_rate": 7.099499871807447e-06, "loss": 0.6193, "step": 12438 }, { "epoch": 0.381236974377835, "grad_norm": 1.7066023200439515, "learning_rate": 7.099049418090907e-06, "loss": 0.6642, "step": 12439 }, { "epoch": 0.3812676229005762, "grad_norm": 1.7407952705047296, "learning_rate": 7.0985989436918965e-06, "loss": 0.5681, "step": 12440 }, { "epoch": 0.3812982714233174, "grad_norm": 1.7613482300379557, "learning_rate": 7.098148448614855e-06, "loss": 0.672, "step": 12441 }, { "epoch": 0.3813289199460586, "grad_norm": 1.6333922601386994, "learning_rate": 7.097697932864216e-06, "loss": 0.6273, "step": 12442 }, { "epoch": 0.3813595684687998, "grad_norm": 1.0495986547807423, "learning_rate": 7.097247396444426e-06, "loss": 0.4812, "step": 12443 }, { "epoch": 0.381390216991541, "grad_norm": 1.7953085878091275, "learning_rate": 7.096796839359918e-06, "loss": 0.7077, "step": 12444 }, { "epoch": 0.3814208655142822, "grad_norm": 1.7104426505876584, "learning_rate": 7.096346261615139e-06, "loss": 0.5877, "step": 12445 }, { "epoch": 0.38145151403702343, "grad_norm": 0.7873064333547676, "learning_rate": 7.0958956632145206e-06, "loss": 0.4521, "step": 12446 }, { "epoch": 0.38148216255976464, "grad_norm": 1.8090969761769622, "learning_rate": 7.095445044162505e-06, "loss": 0.7699, "step": 12447 }, { "epoch": 0.38151281108250584, "grad_norm": 0.7560233646309934, "learning_rate": 7.094994404463534e-06, "loss": 0.4386, "step": 12448 }, { "epoch": 0.38154345960524705, "grad_norm": 1.6907159216461878, "learning_rate": 7.09454374412205e-06, "loss": 0.727, "step": 12449 }, { "epoch": 0.38157410812798825, "grad_norm": 1.810969686903362, "learning_rate": 7.094093063142487e-06, "loss": 0.7088, "step": 12450 }, { "epoch": 0.38160475665072946, "grad_norm": 1.616519996771201, "learning_rate": 7.093642361529291e-06, "loss": 0.6999, "step": 12451 }, { "epoch": 0.38163540517347067, "grad_norm": 1.7480791647219323, "learning_rate": 7.0931916392869e-06, "loss": 0.7075, "step": 12452 }, { "epoch": 0.3816660536962118, "grad_norm": 1.9002452348966752, "learning_rate": 7.092740896419757e-06, "loss": 0.6471, "step": 12453 }, { "epoch": 0.381696702218953, "grad_norm": 1.6464615008110597, "learning_rate": 7.092290132932302e-06, "loss": 0.7225, "step": 12454 }, { "epoch": 0.3817273507416942, "grad_norm": 1.806500493529527, "learning_rate": 7.09183934882898e-06, "loss": 0.7677, "step": 12455 }, { "epoch": 0.38175799926443543, "grad_norm": 1.5881018111588645, "learning_rate": 7.091388544114225e-06, "loss": 0.609, "step": 12456 }, { "epoch": 0.38178864778717664, "grad_norm": 1.8193126909696673, "learning_rate": 7.090937718792486e-06, "loss": 0.6692, "step": 12457 }, { "epoch": 0.38181929630991784, "grad_norm": 1.8163229684018667, "learning_rate": 7.090486872868203e-06, "loss": 0.7686, "step": 12458 }, { "epoch": 0.38184994483265905, "grad_norm": 1.9161871243691158, "learning_rate": 7.090036006345816e-06, "loss": 0.6963, "step": 12459 }, { "epoch": 0.38188059335540026, "grad_norm": 1.595539292158665, "learning_rate": 7.089585119229772e-06, "loss": 0.5727, "step": 12460 }, { "epoch": 0.38191124187814146, "grad_norm": 1.5601455441797327, "learning_rate": 7.089134211524508e-06, "loss": 0.6051, "step": 12461 }, { "epoch": 0.38194189040088267, "grad_norm": 1.7681141264914182, "learning_rate": 7.088683283234474e-06, "loss": 0.7475, "step": 12462 }, { "epoch": 0.38197253892362387, "grad_norm": 1.8058768980592796, "learning_rate": 7.088232334364107e-06, "loss": 0.7386, "step": 12463 }, { "epoch": 0.3820031874463651, "grad_norm": 1.5375811011650584, "learning_rate": 7.087781364917853e-06, "loss": 0.6797, "step": 12464 }, { "epoch": 0.3820338359691063, "grad_norm": 2.014616949914649, "learning_rate": 7.087330374900154e-06, "loss": 0.6841, "step": 12465 }, { "epoch": 0.3820644844918475, "grad_norm": 1.8363827569815447, "learning_rate": 7.086879364315455e-06, "loss": 0.715, "step": 12466 }, { "epoch": 0.3820951330145887, "grad_norm": 1.810393711839318, "learning_rate": 7.0864283331682e-06, "loss": 0.7653, "step": 12467 }, { "epoch": 0.3821257815373299, "grad_norm": 1.919366971321183, "learning_rate": 7.085977281462834e-06, "loss": 0.6349, "step": 12468 }, { "epoch": 0.3821564300600711, "grad_norm": 1.7980559742204276, "learning_rate": 7.085526209203799e-06, "loss": 0.7029, "step": 12469 }, { "epoch": 0.3821870785828123, "grad_norm": 1.6903610103212077, "learning_rate": 7.08507511639554e-06, "loss": 0.6727, "step": 12470 }, { "epoch": 0.3822177271055535, "grad_norm": 1.7654483542169923, "learning_rate": 7.084624003042504e-06, "loss": 0.7154, "step": 12471 }, { "epoch": 0.3822483756282947, "grad_norm": 1.6334576343125387, "learning_rate": 7.084172869149133e-06, "loss": 0.6541, "step": 12472 }, { "epoch": 0.38227902415103593, "grad_norm": 1.8556168694383675, "learning_rate": 7.083721714719874e-06, "loss": 0.7083, "step": 12473 }, { "epoch": 0.38230967267377713, "grad_norm": 1.6034061058664388, "learning_rate": 7.0832705397591715e-06, "loss": 0.6473, "step": 12474 }, { "epoch": 0.38234032119651834, "grad_norm": 1.7435872314975704, "learning_rate": 7.082819344271472e-06, "loss": 0.6963, "step": 12475 }, { "epoch": 0.38237096971925955, "grad_norm": 1.6868182516883854, "learning_rate": 7.08236812826122e-06, "loss": 0.6216, "step": 12476 }, { "epoch": 0.38240161824200075, "grad_norm": 1.7542090520303264, "learning_rate": 7.0819168917328625e-06, "loss": 0.6218, "step": 12477 }, { "epoch": 0.38243226676474196, "grad_norm": 1.6275742666916746, "learning_rate": 7.081465634690844e-06, "loss": 0.6557, "step": 12478 }, { "epoch": 0.38246291528748316, "grad_norm": 1.6433742397652336, "learning_rate": 7.081014357139613e-06, "loss": 0.6513, "step": 12479 }, { "epoch": 0.38249356381022437, "grad_norm": 0.9022823464163364, "learning_rate": 7.080563059083616e-06, "loss": 0.457, "step": 12480 }, { "epoch": 0.3825242123329656, "grad_norm": 1.7587002562480312, "learning_rate": 7.0801117405272975e-06, "loss": 0.6565, "step": 12481 }, { "epoch": 0.3825548608557068, "grad_norm": 0.8488887158532309, "learning_rate": 7.079660401475106e-06, "loss": 0.4805, "step": 12482 }, { "epoch": 0.382585509378448, "grad_norm": 1.655140107753154, "learning_rate": 7.079209041931489e-06, "loss": 0.689, "step": 12483 }, { "epoch": 0.38261615790118914, "grad_norm": 1.7587863976614244, "learning_rate": 7.078757661900893e-06, "loss": 0.7067, "step": 12484 }, { "epoch": 0.38264680642393034, "grad_norm": 1.6107516512764437, "learning_rate": 7.078306261387765e-06, "loss": 0.6401, "step": 12485 }, { "epoch": 0.38267745494667155, "grad_norm": 0.8417742703755524, "learning_rate": 7.077854840396554e-06, "loss": 0.4566, "step": 12486 }, { "epoch": 0.38270810346941275, "grad_norm": 1.6404464789717905, "learning_rate": 7.077403398931709e-06, "loss": 0.7478, "step": 12487 }, { "epoch": 0.38273875199215396, "grad_norm": 1.8927734326231094, "learning_rate": 7.0769519369976755e-06, "loss": 0.6121, "step": 12488 }, { "epoch": 0.38276940051489516, "grad_norm": 1.8636892032507673, "learning_rate": 7.076500454598903e-06, "loss": 0.7396, "step": 12489 }, { "epoch": 0.38280004903763637, "grad_norm": 1.889860707327642, "learning_rate": 7.076048951739841e-06, "loss": 0.7319, "step": 12490 }, { "epoch": 0.3828306975603776, "grad_norm": 1.9119639931355825, "learning_rate": 7.075597428424939e-06, "loss": 0.7919, "step": 12491 }, { "epoch": 0.3828613460831188, "grad_norm": 1.6687845725232924, "learning_rate": 7.075145884658642e-06, "loss": 0.678, "step": 12492 }, { "epoch": 0.38289199460586, "grad_norm": 1.7176264157898504, "learning_rate": 7.074694320445401e-06, "loss": 0.7624, "step": 12493 }, { "epoch": 0.3829226431286012, "grad_norm": 1.5820956606812382, "learning_rate": 7.074242735789668e-06, "loss": 0.6563, "step": 12494 }, { "epoch": 0.3829532916513424, "grad_norm": 1.521110187666586, "learning_rate": 7.073791130695891e-06, "loss": 0.7193, "step": 12495 }, { "epoch": 0.3829839401740836, "grad_norm": 0.8329322975338033, "learning_rate": 7.073339505168517e-06, "loss": 0.4397, "step": 12496 }, { "epoch": 0.3830145886968248, "grad_norm": 1.637648085542418, "learning_rate": 7.072887859212001e-06, "loss": 0.7062, "step": 12497 }, { "epoch": 0.383045237219566, "grad_norm": 0.8291799806690053, "learning_rate": 7.072436192830788e-06, "loss": 0.4342, "step": 12498 }, { "epoch": 0.3830758857423072, "grad_norm": 0.768568753416181, "learning_rate": 7.071984506029333e-06, "loss": 0.4739, "step": 12499 }, { "epoch": 0.3831065342650484, "grad_norm": 1.6461777565306428, "learning_rate": 7.071532798812084e-06, "loss": 0.6374, "step": 12500 }, { "epoch": 0.38313718278778963, "grad_norm": 1.866518680691406, "learning_rate": 7.071081071183492e-06, "loss": 0.7168, "step": 12501 }, { "epoch": 0.38316783131053084, "grad_norm": 1.5824113883774278, "learning_rate": 7.0706293231480074e-06, "loss": 0.6139, "step": 12502 }, { "epoch": 0.38319847983327204, "grad_norm": 1.6909818179385276, "learning_rate": 7.070177554710085e-06, "loss": 0.63, "step": 12503 }, { "epoch": 0.38322912835601325, "grad_norm": 1.6675419821024484, "learning_rate": 7.069725765874171e-06, "loss": 0.7174, "step": 12504 }, { "epoch": 0.38325977687875445, "grad_norm": 1.7617329560493045, "learning_rate": 7.06927395664472e-06, "loss": 0.6922, "step": 12505 }, { "epoch": 0.38329042540149566, "grad_norm": 1.8697769920840686, "learning_rate": 7.068822127026183e-06, "loss": 0.6962, "step": 12506 }, { "epoch": 0.38332107392423687, "grad_norm": 1.709579798093553, "learning_rate": 7.0683702770230135e-06, "loss": 0.7058, "step": 12507 }, { "epoch": 0.38335172244697807, "grad_norm": 1.0777164750831127, "learning_rate": 7.067918406639661e-06, "loss": 0.4525, "step": 12508 }, { "epoch": 0.3833823709697193, "grad_norm": 1.6969737787204937, "learning_rate": 7.067466515880581e-06, "loss": 0.6247, "step": 12509 }, { "epoch": 0.3834130194924605, "grad_norm": 1.9555491681207569, "learning_rate": 7.067014604750223e-06, "loss": 0.6862, "step": 12510 }, { "epoch": 0.3834436680152017, "grad_norm": 1.6581488466461414, "learning_rate": 7.066562673253042e-06, "loss": 0.7193, "step": 12511 }, { "epoch": 0.3834743165379429, "grad_norm": 2.0140434199242065, "learning_rate": 7.0661107213934885e-06, "loss": 0.7126, "step": 12512 }, { "epoch": 0.3835049650606841, "grad_norm": 1.616062127345154, "learning_rate": 7.065658749176019e-06, "loss": 0.6347, "step": 12513 }, { "epoch": 0.3835356135834253, "grad_norm": 0.8198219439548036, "learning_rate": 7.0652067566050865e-06, "loss": 0.4559, "step": 12514 }, { "epoch": 0.38356626210616646, "grad_norm": 0.8371310802599218, "learning_rate": 7.064754743685141e-06, "loss": 0.4429, "step": 12515 }, { "epoch": 0.38359691062890766, "grad_norm": 1.9326329852640602, "learning_rate": 7.064302710420641e-06, "loss": 0.8029, "step": 12516 }, { "epoch": 0.38362755915164887, "grad_norm": 1.697024558440496, "learning_rate": 7.063850656816036e-06, "loss": 0.6832, "step": 12517 }, { "epoch": 0.3836582076743901, "grad_norm": 1.8170138341673014, "learning_rate": 7.063398582875785e-06, "loss": 0.688, "step": 12518 }, { "epoch": 0.3836888561971313, "grad_norm": 1.845278668706287, "learning_rate": 7.062946488604337e-06, "loss": 0.5853, "step": 12519 }, { "epoch": 0.3837195047198725, "grad_norm": 1.718421739917768, "learning_rate": 7.06249437400615e-06, "loss": 0.7509, "step": 12520 }, { "epoch": 0.3837501532426137, "grad_norm": 1.651833523011401, "learning_rate": 7.062042239085679e-06, "loss": 0.5707, "step": 12521 }, { "epoch": 0.3837808017653549, "grad_norm": 1.60074545936047, "learning_rate": 7.0615900838473785e-06, "loss": 0.6696, "step": 12522 }, { "epoch": 0.3838114502880961, "grad_norm": 1.767908589014101, "learning_rate": 7.061137908295703e-06, "loss": 0.7158, "step": 12523 }, { "epoch": 0.3838420988108373, "grad_norm": 1.8171516981162439, "learning_rate": 7.060685712435107e-06, "loss": 0.707, "step": 12524 }, { "epoch": 0.3838727473335785, "grad_norm": 1.8563072569138648, "learning_rate": 7.06023349627005e-06, "loss": 0.7423, "step": 12525 }, { "epoch": 0.3839033958563197, "grad_norm": 1.763668985300677, "learning_rate": 7.059781259804984e-06, "loss": 0.7021, "step": 12526 }, { "epoch": 0.3839340443790609, "grad_norm": 1.733807263702497, "learning_rate": 7.0593290030443665e-06, "loss": 0.7098, "step": 12527 }, { "epoch": 0.38396469290180213, "grad_norm": 1.677770025876056, "learning_rate": 7.058876725992653e-06, "loss": 0.5843, "step": 12528 }, { "epoch": 0.38399534142454333, "grad_norm": 1.6615966998929084, "learning_rate": 7.0584244286543e-06, "loss": 0.7098, "step": 12529 }, { "epoch": 0.38402598994728454, "grad_norm": 1.7618897854065645, "learning_rate": 7.0579721110337655e-06, "loss": 0.682, "step": 12530 }, { "epoch": 0.38405663847002575, "grad_norm": 1.6971212574492094, "learning_rate": 7.057519773135505e-06, "loss": 0.5475, "step": 12531 }, { "epoch": 0.38408728699276695, "grad_norm": 1.8711775776443746, "learning_rate": 7.057067414963974e-06, "loss": 0.6959, "step": 12532 }, { "epoch": 0.38411793551550816, "grad_norm": 1.841501365408269, "learning_rate": 7.056615036523633e-06, "loss": 0.7913, "step": 12533 }, { "epoch": 0.38414858403824936, "grad_norm": 1.8614122802526112, "learning_rate": 7.056162637818939e-06, "loss": 0.7925, "step": 12534 }, { "epoch": 0.38417923256099057, "grad_norm": 1.772840188049756, "learning_rate": 7.055710218854347e-06, "loss": 0.665, "step": 12535 }, { "epoch": 0.3842098810837318, "grad_norm": 1.9410148046073048, "learning_rate": 7.055257779634316e-06, "loss": 0.6287, "step": 12536 }, { "epoch": 0.384240529606473, "grad_norm": 1.6811799987513574, "learning_rate": 7.054805320163305e-06, "loss": 0.7019, "step": 12537 }, { "epoch": 0.3842711781292142, "grad_norm": 0.9624292001934165, "learning_rate": 7.05435284044577e-06, "loss": 0.453, "step": 12538 }, { "epoch": 0.3843018266519554, "grad_norm": 1.6858894151479469, "learning_rate": 7.053900340486172e-06, "loss": 0.7415, "step": 12539 }, { "epoch": 0.3843324751746966, "grad_norm": 1.5749982639072808, "learning_rate": 7.053447820288968e-06, "loss": 0.7041, "step": 12540 }, { "epoch": 0.3843631236974378, "grad_norm": 0.8307311398119095, "learning_rate": 7.052995279858619e-06, "loss": 0.473, "step": 12541 }, { "epoch": 0.384393772220179, "grad_norm": 1.550081710642525, "learning_rate": 7.05254271919958e-06, "loss": 0.6427, "step": 12542 }, { "epoch": 0.3844244207429202, "grad_norm": 1.5514997715504673, "learning_rate": 7.052090138316312e-06, "loss": 0.6067, "step": 12543 }, { "epoch": 0.3844550692656614, "grad_norm": 1.7995955167665494, "learning_rate": 7.051637537213276e-06, "loss": 0.7247, "step": 12544 }, { "epoch": 0.3844857177884026, "grad_norm": 1.73143276213533, "learning_rate": 7.051184915894932e-06, "loss": 0.7208, "step": 12545 }, { "epoch": 0.3845163663111438, "grad_norm": 0.8425183826717481, "learning_rate": 7.050732274365737e-06, "loss": 0.4671, "step": 12546 }, { "epoch": 0.384547014833885, "grad_norm": 2.027752909847489, "learning_rate": 7.050279612630151e-06, "loss": 0.7239, "step": 12547 }, { "epoch": 0.3845776633566262, "grad_norm": 1.7646663974697874, "learning_rate": 7.049826930692636e-06, "loss": 0.648, "step": 12548 }, { "epoch": 0.3846083118793674, "grad_norm": 1.738681979440453, "learning_rate": 7.0493742285576525e-06, "loss": 0.664, "step": 12549 }, { "epoch": 0.3846389604021086, "grad_norm": 1.7982734625105647, "learning_rate": 7.048921506229659e-06, "loss": 0.7678, "step": 12550 }, { "epoch": 0.3846696089248498, "grad_norm": 1.7037233695184246, "learning_rate": 7.0484687637131185e-06, "loss": 0.7474, "step": 12551 }, { "epoch": 0.384700257447591, "grad_norm": 1.8236339857462762, "learning_rate": 7.048016001012492e-06, "loss": 0.6512, "step": 12552 }, { "epoch": 0.3847309059703322, "grad_norm": 1.7301884930789346, "learning_rate": 7.047563218132238e-06, "loss": 0.6322, "step": 12553 }, { "epoch": 0.3847615544930734, "grad_norm": 0.8379653451488212, "learning_rate": 7.047110415076821e-06, "loss": 0.4559, "step": 12554 }, { "epoch": 0.3847922030158146, "grad_norm": 1.6717616436506275, "learning_rate": 7.0466575918507e-06, "loss": 0.6032, "step": 12555 }, { "epoch": 0.38482285153855583, "grad_norm": 2.272977812769102, "learning_rate": 7.04620474845834e-06, "loss": 0.784, "step": 12556 }, { "epoch": 0.38485350006129704, "grad_norm": 1.8637123565303662, "learning_rate": 7.045751884904201e-06, "loss": 0.6493, "step": 12557 }, { "epoch": 0.38488414858403824, "grad_norm": 1.7015398751860138, "learning_rate": 7.045299001192743e-06, "loss": 0.5514, "step": 12558 }, { "epoch": 0.38491479710677945, "grad_norm": 1.7683350963320437, "learning_rate": 7.0448460973284325e-06, "loss": 0.6679, "step": 12559 }, { "epoch": 0.38494544562952066, "grad_norm": 1.8751361821406198, "learning_rate": 7.0443931733157285e-06, "loss": 0.7275, "step": 12560 }, { "epoch": 0.38497609415226186, "grad_norm": 1.5634251551928733, "learning_rate": 7.043940229159099e-06, "loss": 0.6187, "step": 12561 }, { "epoch": 0.38500674267500307, "grad_norm": 1.7180800114449941, "learning_rate": 7.043487264863e-06, "loss": 0.6291, "step": 12562 }, { "epoch": 0.38503739119774427, "grad_norm": 1.7792759257116328, "learning_rate": 7.0430342804318996e-06, "loss": 0.6535, "step": 12563 }, { "epoch": 0.3850680397204855, "grad_norm": 1.9326708655173868, "learning_rate": 7.0425812758702595e-06, "loss": 0.6495, "step": 12564 }, { "epoch": 0.3850986882432267, "grad_norm": 1.7540754632871818, "learning_rate": 7.042128251182543e-06, "loss": 0.7112, "step": 12565 }, { "epoch": 0.3851293367659679, "grad_norm": 1.551495813345524, "learning_rate": 7.041675206373215e-06, "loss": 0.7218, "step": 12566 }, { "epoch": 0.3851599852887091, "grad_norm": 1.6413644575546644, "learning_rate": 7.041222141446737e-06, "loss": 0.6757, "step": 12567 }, { "epoch": 0.3851906338114503, "grad_norm": 1.778317274477018, "learning_rate": 7.0407690564075795e-06, "loss": 0.7715, "step": 12568 }, { "epoch": 0.3852212823341915, "grad_norm": 1.600529927817669, "learning_rate": 7.0403159512601985e-06, "loss": 0.5951, "step": 12569 }, { "epoch": 0.3852519308569327, "grad_norm": 1.7722837413419656, "learning_rate": 7.039862826009063e-06, "loss": 0.595, "step": 12570 }, { "epoch": 0.3852825793796739, "grad_norm": 1.998855305110881, "learning_rate": 7.039409680658636e-06, "loss": 0.7228, "step": 12571 }, { "epoch": 0.3853132279024151, "grad_norm": 1.8655436387442537, "learning_rate": 7.038956515213387e-06, "loss": 0.7431, "step": 12572 }, { "epoch": 0.38534387642515633, "grad_norm": 1.8459385923118228, "learning_rate": 7.038503329677775e-06, "loss": 0.7295, "step": 12573 }, { "epoch": 0.38537452494789753, "grad_norm": 1.748002082382606, "learning_rate": 7.03805012405627e-06, "loss": 0.6903, "step": 12574 }, { "epoch": 0.38540517347063874, "grad_norm": 1.655190732148016, "learning_rate": 7.037596898353333e-06, "loss": 0.7454, "step": 12575 }, { "epoch": 0.38543582199337995, "grad_norm": 0.8419412356737042, "learning_rate": 7.037143652573435e-06, "loss": 0.4794, "step": 12576 }, { "epoch": 0.3854664705161211, "grad_norm": 1.7640900437885103, "learning_rate": 7.036690386721038e-06, "loss": 0.6029, "step": 12577 }, { "epoch": 0.3854971190388623, "grad_norm": 1.5644186586223745, "learning_rate": 7.0362371008006104e-06, "loss": 0.5066, "step": 12578 }, { "epoch": 0.3855277675616035, "grad_norm": 1.7626361677934304, "learning_rate": 7.035783794816616e-06, "loss": 0.6925, "step": 12579 }, { "epoch": 0.3855584160843447, "grad_norm": 1.7303293788654877, "learning_rate": 7.035330468773524e-06, "loss": 0.6327, "step": 12580 }, { "epoch": 0.3855890646070859, "grad_norm": 1.976113691102881, "learning_rate": 7.034877122675801e-06, "loss": 0.7108, "step": 12581 }, { "epoch": 0.3856197131298271, "grad_norm": 1.722097434483835, "learning_rate": 7.034423756527912e-06, "loss": 0.6614, "step": 12582 }, { "epoch": 0.38565036165256833, "grad_norm": 1.5610378215511926, "learning_rate": 7.033970370334325e-06, "loss": 0.6201, "step": 12583 }, { "epoch": 0.38568101017530954, "grad_norm": 1.5582772459777205, "learning_rate": 7.033516964099508e-06, "loss": 0.6646, "step": 12584 }, { "epoch": 0.38571165869805074, "grad_norm": 1.9135013785401147, "learning_rate": 7.033063537827929e-06, "loss": 0.6632, "step": 12585 }, { "epoch": 0.38574230722079195, "grad_norm": 1.769540098352752, "learning_rate": 7.032610091524052e-06, "loss": 0.6736, "step": 12586 }, { "epoch": 0.38577295574353315, "grad_norm": 1.6619501359324254, "learning_rate": 7.032156625192353e-06, "loss": 0.6149, "step": 12587 }, { "epoch": 0.38580360426627436, "grad_norm": 1.7646846507152667, "learning_rate": 7.031703138837289e-06, "loss": 0.7456, "step": 12588 }, { "epoch": 0.38583425278901556, "grad_norm": 1.6394081794357291, "learning_rate": 7.031249632463337e-06, "loss": 0.7204, "step": 12589 }, { "epoch": 0.38586490131175677, "grad_norm": 1.6778224339482113, "learning_rate": 7.030796106074962e-06, "loss": 0.6721, "step": 12590 }, { "epoch": 0.385895549834498, "grad_norm": 1.9835748331513037, "learning_rate": 7.030342559676633e-06, "loss": 0.6082, "step": 12591 }, { "epoch": 0.3859261983572392, "grad_norm": 1.6878754868052703, "learning_rate": 7.029888993272821e-06, "loss": 0.6887, "step": 12592 }, { "epoch": 0.3859568468799804, "grad_norm": 1.7287728102659656, "learning_rate": 7.02943540686799e-06, "loss": 0.665, "step": 12593 }, { "epoch": 0.3859874954027216, "grad_norm": 0.8829288936098492, "learning_rate": 7.028981800466617e-06, "loss": 0.4855, "step": 12594 }, { "epoch": 0.3860181439254628, "grad_norm": 1.556559806179213, "learning_rate": 7.028528174073165e-06, "loss": 0.7073, "step": 12595 }, { "epoch": 0.386048792448204, "grad_norm": 1.8027102129983112, "learning_rate": 7.028074527692106e-06, "loss": 0.7725, "step": 12596 }, { "epoch": 0.3860794409709452, "grad_norm": 1.627757686894866, "learning_rate": 7.027620861327908e-06, "loss": 0.7713, "step": 12597 }, { "epoch": 0.3861100894936864, "grad_norm": 1.83941173106393, "learning_rate": 7.027167174985046e-06, "loss": 0.7043, "step": 12598 }, { "epoch": 0.3861407380164276, "grad_norm": 1.8508142512280314, "learning_rate": 7.026713468667985e-06, "loss": 0.7526, "step": 12599 }, { "epoch": 0.3861713865391688, "grad_norm": 1.836115887403888, "learning_rate": 7.026259742381199e-06, "loss": 0.7051, "step": 12600 }, { "epoch": 0.38620203506191003, "grad_norm": 1.7111623416513777, "learning_rate": 7.0258059961291555e-06, "loss": 0.6453, "step": 12601 }, { "epoch": 0.38623268358465124, "grad_norm": 1.414312941868564, "learning_rate": 7.025352229916329e-06, "loss": 0.5395, "step": 12602 }, { "epoch": 0.38626333210739244, "grad_norm": 0.8937469556090092, "learning_rate": 7.024898443747189e-06, "loss": 0.4478, "step": 12603 }, { "epoch": 0.38629398063013365, "grad_norm": 1.6226467573910117, "learning_rate": 7.024444637626206e-06, "loss": 0.7081, "step": 12604 }, { "epoch": 0.38632462915287485, "grad_norm": 1.8707654380121206, "learning_rate": 7.023990811557851e-06, "loss": 0.5779, "step": 12605 }, { "epoch": 0.38635527767561606, "grad_norm": 2.0979403246218147, "learning_rate": 7.023536965546598e-06, "loss": 0.6954, "step": 12606 }, { "epoch": 0.38638592619835727, "grad_norm": 0.7573938656753694, "learning_rate": 7.023083099596917e-06, "loss": 0.4501, "step": 12607 }, { "epoch": 0.3864165747210984, "grad_norm": 1.8267374778017103, "learning_rate": 7.022629213713279e-06, "loss": 0.7027, "step": 12608 }, { "epoch": 0.3864472232438396, "grad_norm": 1.5591360205403832, "learning_rate": 7.0221753079001595e-06, "loss": 0.6445, "step": 12609 }, { "epoch": 0.3864778717665808, "grad_norm": 1.5899005487954998, "learning_rate": 7.021721382162029e-06, "loss": 0.6092, "step": 12610 }, { "epoch": 0.38650852028932203, "grad_norm": 1.5964472701529042, "learning_rate": 7.021267436503362e-06, "loss": 0.6564, "step": 12611 }, { "epoch": 0.38653916881206324, "grad_norm": 1.8001735383825996, "learning_rate": 7.0208134709286265e-06, "loss": 0.6637, "step": 12612 }, { "epoch": 0.38656981733480444, "grad_norm": 1.981768206014162, "learning_rate": 7.020359485442302e-06, "loss": 0.7071, "step": 12613 }, { "epoch": 0.38660046585754565, "grad_norm": 1.73101842966499, "learning_rate": 7.019905480048858e-06, "loss": 0.6685, "step": 12614 }, { "epoch": 0.38663111438028686, "grad_norm": 1.8001237199129818, "learning_rate": 7.019451454752767e-06, "loss": 0.6598, "step": 12615 }, { "epoch": 0.38666176290302806, "grad_norm": 1.552309887193767, "learning_rate": 7.018997409558504e-06, "loss": 0.7581, "step": 12616 }, { "epoch": 0.38669241142576927, "grad_norm": 1.7272448098922697, "learning_rate": 7.018543344470544e-06, "loss": 0.7331, "step": 12617 }, { "epoch": 0.3867230599485105, "grad_norm": 1.7807824327963642, "learning_rate": 7.01808925949336e-06, "loss": 0.7205, "step": 12618 }, { "epoch": 0.3867537084712517, "grad_norm": 1.605772548096926, "learning_rate": 7.0176351546314256e-06, "loss": 0.7487, "step": 12619 }, { "epoch": 0.3867843569939929, "grad_norm": 1.699652098361673, "learning_rate": 7.017181029889216e-06, "loss": 0.6116, "step": 12620 }, { "epoch": 0.3868150055167341, "grad_norm": 1.7448345883936498, "learning_rate": 7.016726885271206e-06, "loss": 0.6843, "step": 12621 }, { "epoch": 0.3868456540394753, "grad_norm": 0.8624540706833134, "learning_rate": 7.01627272078187e-06, "loss": 0.4711, "step": 12622 }, { "epoch": 0.3868763025622165, "grad_norm": 1.5813556883817963, "learning_rate": 7.0158185364256825e-06, "loss": 0.7346, "step": 12623 }, { "epoch": 0.3869069510849577, "grad_norm": 1.5497509315779128, "learning_rate": 7.0153643322071195e-06, "loss": 0.6503, "step": 12624 }, { "epoch": 0.3869375996076989, "grad_norm": 1.7705733342410763, "learning_rate": 7.014910108130655e-06, "loss": 0.687, "step": 12625 }, { "epoch": 0.3869682481304401, "grad_norm": 1.7237011853557924, "learning_rate": 7.014455864200768e-06, "loss": 0.6654, "step": 12626 }, { "epoch": 0.3869988966531813, "grad_norm": 0.8130074626865187, "learning_rate": 7.01400160042193e-06, "loss": 0.4656, "step": 12627 }, { "epoch": 0.38702954517592253, "grad_norm": 2.037099733427609, "learning_rate": 7.01354731679862e-06, "loss": 0.7771, "step": 12628 }, { "epoch": 0.38706019369866373, "grad_norm": 1.5363458471023497, "learning_rate": 7.013093013335312e-06, "loss": 0.6808, "step": 12629 }, { "epoch": 0.38709084222140494, "grad_norm": 1.8109995045247582, "learning_rate": 7.012638690036485e-06, "loss": 0.5774, "step": 12630 }, { "epoch": 0.38712149074414615, "grad_norm": 1.6303647276251971, "learning_rate": 7.012184346906612e-06, "loss": 0.6423, "step": 12631 }, { "epoch": 0.38715213926688735, "grad_norm": 1.7080320246605136, "learning_rate": 7.011729983950174e-06, "loss": 0.7013, "step": 12632 }, { "epoch": 0.38718278778962856, "grad_norm": 0.8082519643273688, "learning_rate": 7.011275601171643e-06, "loss": 0.4723, "step": 12633 }, { "epoch": 0.38721343631236976, "grad_norm": 1.685914001140179, "learning_rate": 7.010821198575501e-06, "loss": 0.6458, "step": 12634 }, { "epoch": 0.38724408483511097, "grad_norm": 1.5937640463719887, "learning_rate": 7.010366776166224e-06, "loss": 0.7147, "step": 12635 }, { "epoch": 0.3872747333578522, "grad_norm": 1.8933941121665627, "learning_rate": 7.009912333948287e-06, "loss": 0.7071, "step": 12636 }, { "epoch": 0.3873053818805934, "grad_norm": 1.6142178553101856, "learning_rate": 7.009457871926169e-06, "loss": 0.6595, "step": 12637 }, { "epoch": 0.3873360304033346, "grad_norm": 1.6713093899768974, "learning_rate": 7.009003390104351e-06, "loss": 0.6901, "step": 12638 }, { "epoch": 0.38736667892607574, "grad_norm": 1.6406824686689843, "learning_rate": 7.008548888487308e-06, "loss": 0.6916, "step": 12639 }, { "epoch": 0.38739732744881694, "grad_norm": 1.681594349825039, "learning_rate": 7.008094367079516e-06, "loss": 0.576, "step": 12640 }, { "epoch": 0.38742797597155815, "grad_norm": 1.5637778158829712, "learning_rate": 7.00763982588546e-06, "loss": 0.6646, "step": 12641 }, { "epoch": 0.38745862449429935, "grad_norm": 1.8401102813821884, "learning_rate": 7.007185264909613e-06, "loss": 0.6319, "step": 12642 }, { "epoch": 0.38748927301704056, "grad_norm": 1.661769133767752, "learning_rate": 7.006730684156456e-06, "loss": 0.6831, "step": 12643 }, { "epoch": 0.38751992153978176, "grad_norm": 0.8469472334716449, "learning_rate": 7.0062760836304685e-06, "loss": 0.4907, "step": 12644 }, { "epoch": 0.38755057006252297, "grad_norm": 1.659578378341949, "learning_rate": 7.00582146333613e-06, "loss": 0.64, "step": 12645 }, { "epoch": 0.3875812185852642, "grad_norm": 1.674913681174053, "learning_rate": 7.0053668232779195e-06, "loss": 0.6602, "step": 12646 }, { "epoch": 0.3876118671080054, "grad_norm": 1.6065001012428397, "learning_rate": 7.004912163460316e-06, "loss": 0.6134, "step": 12647 }, { "epoch": 0.3876425156307466, "grad_norm": 1.6830729111569194, "learning_rate": 7.004457483887799e-06, "loss": 0.6313, "step": 12648 }, { "epoch": 0.3876731641534878, "grad_norm": 0.8078558932645793, "learning_rate": 7.004002784564852e-06, "loss": 0.4593, "step": 12649 }, { "epoch": 0.387703812676229, "grad_norm": 1.664093608196273, "learning_rate": 7.003548065495951e-06, "loss": 0.6482, "step": 12650 }, { "epoch": 0.3877344611989702, "grad_norm": 1.7896244633782834, "learning_rate": 7.003093326685578e-06, "loss": 0.6502, "step": 12651 }, { "epoch": 0.3877651097217114, "grad_norm": 1.6672678125382048, "learning_rate": 7.002638568138214e-06, "loss": 0.7447, "step": 12652 }, { "epoch": 0.3877957582444526, "grad_norm": 1.531486537405969, "learning_rate": 7.002183789858341e-06, "loss": 0.698, "step": 12653 }, { "epoch": 0.3878264067671938, "grad_norm": 1.6987061406330657, "learning_rate": 7.00172899185044e-06, "loss": 0.6601, "step": 12654 }, { "epoch": 0.387857055289935, "grad_norm": 1.6560626277899524, "learning_rate": 7.0012741741189886e-06, "loss": 0.6354, "step": 12655 }, { "epoch": 0.38788770381267623, "grad_norm": 1.5859928457495538, "learning_rate": 7.0008193366684706e-06, "loss": 0.646, "step": 12656 }, { "epoch": 0.38791835233541744, "grad_norm": 1.8454804433206013, "learning_rate": 7.00036447950337e-06, "loss": 0.7207, "step": 12657 }, { "epoch": 0.38794900085815864, "grad_norm": 1.7028600879656, "learning_rate": 6.999909602628164e-06, "loss": 0.6549, "step": 12658 }, { "epoch": 0.38797964938089985, "grad_norm": 1.7788885358533122, "learning_rate": 6.999454706047338e-06, "loss": 0.6271, "step": 12659 }, { "epoch": 0.38801029790364105, "grad_norm": 1.7742368043810637, "learning_rate": 6.998999789765372e-06, "loss": 0.7273, "step": 12660 }, { "epoch": 0.38804094642638226, "grad_norm": 1.7914735916917235, "learning_rate": 6.998544853786753e-06, "loss": 0.7547, "step": 12661 }, { "epoch": 0.38807159494912347, "grad_norm": 0.7947825446291579, "learning_rate": 6.998089898115956e-06, "loss": 0.4415, "step": 12662 }, { "epoch": 0.38810224347186467, "grad_norm": 1.5818238262784583, "learning_rate": 6.99763492275747e-06, "loss": 0.6812, "step": 12663 }, { "epoch": 0.3881328919946059, "grad_norm": 1.5880697671627038, "learning_rate": 6.997179927715777e-06, "loss": 0.7236, "step": 12664 }, { "epoch": 0.3881635405173471, "grad_norm": 3.4177724780849768, "learning_rate": 6.996724912995359e-06, "loss": 0.6253, "step": 12665 }, { "epoch": 0.3881941890400883, "grad_norm": 1.8453853513918317, "learning_rate": 6.996269878600698e-06, "loss": 0.6637, "step": 12666 }, { "epoch": 0.3882248375628295, "grad_norm": 1.9440700866720277, "learning_rate": 6.99581482453628e-06, "loss": 0.8003, "step": 12667 }, { "epoch": 0.3882554860855707, "grad_norm": 1.6520697549411745, "learning_rate": 6.995359750806587e-06, "loss": 0.7039, "step": 12668 }, { "epoch": 0.3882861346083119, "grad_norm": 1.7573801284597406, "learning_rate": 6.994904657416105e-06, "loss": 0.6584, "step": 12669 }, { "epoch": 0.38831678313105306, "grad_norm": 2.5418774115039593, "learning_rate": 6.994449544369316e-06, "loss": 0.7549, "step": 12670 }, { "epoch": 0.38834743165379426, "grad_norm": 2.8642274543711292, "learning_rate": 6.993994411670706e-06, "loss": 0.6841, "step": 12671 }, { "epoch": 0.38837808017653547, "grad_norm": 1.6931632049373855, "learning_rate": 6.9935392593247595e-06, "loss": 0.6555, "step": 12672 }, { "epoch": 0.3884087286992767, "grad_norm": 0.871118242867026, "learning_rate": 6.99308408733596e-06, "loss": 0.4646, "step": 12673 }, { "epoch": 0.3884393772220179, "grad_norm": 1.833096875741303, "learning_rate": 6.992628895708792e-06, "loss": 0.7903, "step": 12674 }, { "epoch": 0.3884700257447591, "grad_norm": 1.8070423393361357, "learning_rate": 6.992173684447743e-06, "loss": 0.7415, "step": 12675 }, { "epoch": 0.3885006742675003, "grad_norm": 1.7002493024940741, "learning_rate": 6.991718453557297e-06, "loss": 0.7133, "step": 12676 }, { "epoch": 0.3885313227902415, "grad_norm": 1.7035555127379378, "learning_rate": 6.991263203041938e-06, "loss": 0.6415, "step": 12677 }, { "epoch": 0.3885619713129827, "grad_norm": 1.703624093279578, "learning_rate": 6.990807932906154e-06, "loss": 0.6455, "step": 12678 }, { "epoch": 0.3885926198357239, "grad_norm": 1.9249162461553257, "learning_rate": 6.99035264315443e-06, "loss": 0.7155, "step": 12679 }, { "epoch": 0.3886232683584651, "grad_norm": 0.8135470568899501, "learning_rate": 6.9898973337912534e-06, "loss": 0.4487, "step": 12680 }, { "epoch": 0.3886539168812063, "grad_norm": 1.7775978075930614, "learning_rate": 6.989442004821108e-06, "loss": 0.6966, "step": 12681 }, { "epoch": 0.3886845654039475, "grad_norm": 1.770894567604518, "learning_rate": 6.988986656248482e-06, "loss": 0.6175, "step": 12682 }, { "epoch": 0.38871521392668873, "grad_norm": 1.6025021715367855, "learning_rate": 6.98853128807786e-06, "loss": 0.6698, "step": 12683 }, { "epoch": 0.38874586244942994, "grad_norm": 1.6692246295185458, "learning_rate": 6.988075900313734e-06, "loss": 0.558, "step": 12684 }, { "epoch": 0.38877651097217114, "grad_norm": 1.486427752157363, "learning_rate": 6.987620492960584e-06, "loss": 0.6306, "step": 12685 }, { "epoch": 0.38880715949491235, "grad_norm": 1.4382756540372517, "learning_rate": 6.987165066022902e-06, "loss": 0.6636, "step": 12686 }, { "epoch": 0.38883780801765355, "grad_norm": 1.5627652918928732, "learning_rate": 6.986709619505173e-06, "loss": 0.6089, "step": 12687 }, { "epoch": 0.38886845654039476, "grad_norm": 1.7500102172331096, "learning_rate": 6.986254153411888e-06, "loss": 0.6296, "step": 12688 }, { "epoch": 0.38889910506313596, "grad_norm": 1.7700675527155267, "learning_rate": 6.985798667747531e-06, "loss": 0.6312, "step": 12689 }, { "epoch": 0.38892975358587717, "grad_norm": 1.4987137790332312, "learning_rate": 6.985343162516591e-06, "loss": 0.6931, "step": 12690 }, { "epoch": 0.3889604021086184, "grad_norm": 1.9366387164104144, "learning_rate": 6.98488763772356e-06, "loss": 0.6134, "step": 12691 }, { "epoch": 0.3889910506313596, "grad_norm": 1.5537458253085297, "learning_rate": 6.9844320933729205e-06, "loss": 0.6791, "step": 12692 }, { "epoch": 0.3890216991541008, "grad_norm": 1.979792114480207, "learning_rate": 6.983976529469165e-06, "loss": 0.6357, "step": 12693 }, { "epoch": 0.389052347676842, "grad_norm": 1.543856878971336, "learning_rate": 6.983520946016779e-06, "loss": 0.5921, "step": 12694 }, { "epoch": 0.3890829961995832, "grad_norm": 1.8052283650489354, "learning_rate": 6.983065343020258e-06, "loss": 0.607, "step": 12695 }, { "epoch": 0.3891136447223244, "grad_norm": 1.5734786468080972, "learning_rate": 6.982609720484082e-06, "loss": 0.7297, "step": 12696 }, { "epoch": 0.3891442932450656, "grad_norm": 1.7242013828810228, "learning_rate": 6.9821540784127485e-06, "loss": 0.6966, "step": 12697 }, { "epoch": 0.3891749417678068, "grad_norm": 0.7860915674902687, "learning_rate": 6.981698416810742e-06, "loss": 0.4571, "step": 12698 }, { "epoch": 0.389205590290548, "grad_norm": 0.8032644671252684, "learning_rate": 6.981242735682554e-06, "loss": 0.4467, "step": 12699 }, { "epoch": 0.3892362388132892, "grad_norm": 1.6603071269745728, "learning_rate": 6.980787035032676e-06, "loss": 0.7133, "step": 12700 }, { "epoch": 0.3892668873360304, "grad_norm": 1.592393242184925, "learning_rate": 6.980331314865596e-06, "loss": 0.6236, "step": 12701 }, { "epoch": 0.3892975358587716, "grad_norm": 1.7171673290029916, "learning_rate": 6.9798755751858025e-06, "loss": 0.7159, "step": 12702 }, { "epoch": 0.3893281843815128, "grad_norm": 0.8351930567281197, "learning_rate": 6.979419815997791e-06, "loss": 0.459, "step": 12703 }, { "epoch": 0.389358832904254, "grad_norm": 1.647060617403317, "learning_rate": 6.9789640373060486e-06, "loss": 0.6666, "step": 12704 }, { "epoch": 0.3893894814269952, "grad_norm": 1.62916346123947, "learning_rate": 6.978508239115067e-06, "loss": 0.6997, "step": 12705 }, { "epoch": 0.3894201299497364, "grad_norm": 0.8299804148891916, "learning_rate": 6.9780524214293375e-06, "loss": 0.4639, "step": 12706 }, { "epoch": 0.3894507784724776, "grad_norm": 1.530051517010672, "learning_rate": 6.977596584253352e-06, "loss": 0.6777, "step": 12707 }, { "epoch": 0.3894814269952188, "grad_norm": 0.798040044080886, "learning_rate": 6.977140727591601e-06, "loss": 0.4598, "step": 12708 }, { "epoch": 0.38951207551796, "grad_norm": 0.7948781265124893, "learning_rate": 6.976684851448577e-06, "loss": 0.4419, "step": 12709 }, { "epoch": 0.3895427240407012, "grad_norm": 1.7872733548260118, "learning_rate": 6.976228955828771e-06, "loss": 0.7294, "step": 12710 }, { "epoch": 0.38957337256344243, "grad_norm": 1.716427587074317, "learning_rate": 6.975773040736675e-06, "loss": 0.7177, "step": 12711 }, { "epoch": 0.38960402108618364, "grad_norm": 1.7349072889030157, "learning_rate": 6.975317106176783e-06, "loss": 0.7013, "step": 12712 }, { "epoch": 0.38963466960892484, "grad_norm": 1.8261772982416566, "learning_rate": 6.9748611521535845e-06, "loss": 0.655, "step": 12713 }, { "epoch": 0.38966531813166605, "grad_norm": 1.4957282437106152, "learning_rate": 6.974405178671575e-06, "loss": 0.6298, "step": 12714 }, { "epoch": 0.38969596665440726, "grad_norm": 1.5944441726852834, "learning_rate": 6.973949185735246e-06, "loss": 0.6273, "step": 12715 }, { "epoch": 0.38972661517714846, "grad_norm": 1.7337019312922026, "learning_rate": 6.973493173349089e-06, "loss": 0.6771, "step": 12716 }, { "epoch": 0.38975726369988967, "grad_norm": 1.5935646553957434, "learning_rate": 6.9730371415176014e-06, "loss": 0.6296, "step": 12717 }, { "epoch": 0.3897879122226309, "grad_norm": 1.5587222655206754, "learning_rate": 6.9725810902452725e-06, "loss": 0.6245, "step": 12718 }, { "epoch": 0.3898185607453721, "grad_norm": 1.716027794347996, "learning_rate": 6.972125019536599e-06, "loss": 0.7065, "step": 12719 }, { "epoch": 0.3898492092681133, "grad_norm": 2.0326883880709703, "learning_rate": 6.971668929396071e-06, "loss": 0.6701, "step": 12720 }, { "epoch": 0.3898798577908545, "grad_norm": 2.1746826269534547, "learning_rate": 6.971212819828185e-06, "loss": 0.6605, "step": 12721 }, { "epoch": 0.3899105063135957, "grad_norm": 1.0809988153900127, "learning_rate": 6.970756690837436e-06, "loss": 0.47, "step": 12722 }, { "epoch": 0.3899411548363369, "grad_norm": 0.9477486641902411, "learning_rate": 6.970300542428315e-06, "loss": 0.4384, "step": 12723 }, { "epoch": 0.3899718033590781, "grad_norm": 1.6355719179612387, "learning_rate": 6.96984437460532e-06, "loss": 0.6132, "step": 12724 }, { "epoch": 0.3900024518818193, "grad_norm": 0.7668267500342858, "learning_rate": 6.969388187372944e-06, "loss": 0.443, "step": 12725 }, { "epoch": 0.3900331004045605, "grad_norm": 1.8060469126924077, "learning_rate": 6.968931980735683e-06, "loss": 0.5602, "step": 12726 }, { "epoch": 0.3900637489273017, "grad_norm": 1.526959393389725, "learning_rate": 6.968475754698032e-06, "loss": 0.6396, "step": 12727 }, { "epoch": 0.39009439745004293, "grad_norm": 1.7154481104983585, "learning_rate": 6.968019509264483e-06, "loss": 0.704, "step": 12728 }, { "epoch": 0.39012504597278413, "grad_norm": 1.905471296706804, "learning_rate": 6.967563244439537e-06, "loss": 0.69, "step": 12729 }, { "epoch": 0.39015569449552534, "grad_norm": 1.847946423826652, "learning_rate": 6.9671069602276854e-06, "loss": 0.6997, "step": 12730 }, { "epoch": 0.39018634301826655, "grad_norm": 1.1117043262734296, "learning_rate": 6.966650656633424e-06, "loss": 0.4652, "step": 12731 }, { "epoch": 0.3902169915410077, "grad_norm": 1.919954752068234, "learning_rate": 6.966194333661254e-06, "loss": 0.7087, "step": 12732 }, { "epoch": 0.3902476400637489, "grad_norm": 1.9400851696526562, "learning_rate": 6.965737991315667e-06, "loss": 0.6533, "step": 12733 }, { "epoch": 0.3902782885864901, "grad_norm": 1.7075700855251963, "learning_rate": 6.965281629601161e-06, "loss": 0.7162, "step": 12734 }, { "epoch": 0.3903089371092313, "grad_norm": 1.7155443122058538, "learning_rate": 6.9648252485222304e-06, "loss": 0.6792, "step": 12735 }, { "epoch": 0.3903395856319725, "grad_norm": 1.793830614571341, "learning_rate": 6.9643688480833746e-06, "loss": 0.6195, "step": 12736 }, { "epoch": 0.3903702341547137, "grad_norm": 1.8627657470146337, "learning_rate": 6.96391242828909e-06, "loss": 0.65, "step": 12737 }, { "epoch": 0.39040088267745493, "grad_norm": 0.8098942369013034, "learning_rate": 6.963455989143876e-06, "loss": 0.4488, "step": 12738 }, { "epoch": 0.39043153120019614, "grad_norm": 0.7653368801458168, "learning_rate": 6.9629995306522245e-06, "loss": 0.4379, "step": 12739 }, { "epoch": 0.39046217972293734, "grad_norm": 1.7425157222542564, "learning_rate": 6.962543052818638e-06, "loss": 0.6111, "step": 12740 }, { "epoch": 0.39049282824567855, "grad_norm": 0.7754055021492906, "learning_rate": 6.962086555647614e-06, "loss": 0.4641, "step": 12741 }, { "epoch": 0.39052347676841975, "grad_norm": 1.774002693705309, "learning_rate": 6.9616300391436456e-06, "loss": 0.6393, "step": 12742 }, { "epoch": 0.39055412529116096, "grad_norm": 1.7746747358904598, "learning_rate": 6.961173503311237e-06, "loss": 0.6905, "step": 12743 }, { "epoch": 0.39058477381390216, "grad_norm": 0.8153402098660923, "learning_rate": 6.960716948154884e-06, "loss": 0.438, "step": 12744 }, { "epoch": 0.39061542233664337, "grad_norm": 1.7412768481976593, "learning_rate": 6.960260373679085e-06, "loss": 0.7125, "step": 12745 }, { "epoch": 0.3906460708593846, "grad_norm": 1.864563060971494, "learning_rate": 6.959803779888338e-06, "loss": 0.6995, "step": 12746 }, { "epoch": 0.3906767193821258, "grad_norm": 1.7203673224176361, "learning_rate": 6.959347166787144e-06, "loss": 0.6572, "step": 12747 }, { "epoch": 0.390707367904867, "grad_norm": 1.5232167950193063, "learning_rate": 6.95889053438e-06, "loss": 0.5556, "step": 12748 }, { "epoch": 0.3907380164276082, "grad_norm": 1.5896683537506235, "learning_rate": 6.958433882671408e-06, "loss": 0.6127, "step": 12749 }, { "epoch": 0.3907686649503494, "grad_norm": 1.8382186320999734, "learning_rate": 6.957977211665865e-06, "loss": 0.7805, "step": 12750 }, { "epoch": 0.3907993134730906, "grad_norm": 0.8185325856739444, "learning_rate": 6.957520521367871e-06, "loss": 0.4565, "step": 12751 }, { "epoch": 0.3908299619958318, "grad_norm": 0.7778606345153953, "learning_rate": 6.9570638117819266e-06, "loss": 0.4504, "step": 12752 }, { "epoch": 0.390860610518573, "grad_norm": 1.6459618694949483, "learning_rate": 6.9566070829125345e-06, "loss": 0.7205, "step": 12753 }, { "epoch": 0.3908912590413142, "grad_norm": 1.8047470810102222, "learning_rate": 6.956150334764188e-06, "loss": 0.5629, "step": 12754 }, { "epoch": 0.3909219075640554, "grad_norm": 2.0622597881034364, "learning_rate": 6.9556935673413935e-06, "loss": 0.7502, "step": 12755 }, { "epoch": 0.39095255608679663, "grad_norm": 1.476840190359142, "learning_rate": 6.95523678064865e-06, "loss": 0.6769, "step": 12756 }, { "epoch": 0.39098320460953784, "grad_norm": 1.728362493115384, "learning_rate": 6.9547799746904575e-06, "loss": 0.6309, "step": 12757 }, { "epoch": 0.39101385313227904, "grad_norm": 1.7422526793158561, "learning_rate": 6.954323149471319e-06, "loss": 0.6189, "step": 12758 }, { "epoch": 0.39104450165502025, "grad_norm": 0.8568792960875706, "learning_rate": 6.953866304995733e-06, "loss": 0.4699, "step": 12759 }, { "epoch": 0.39107515017776145, "grad_norm": 0.8684078609714987, "learning_rate": 6.953409441268204e-06, "loss": 0.4749, "step": 12760 }, { "epoch": 0.39110579870050266, "grad_norm": 2.1094195201843355, "learning_rate": 6.952952558293231e-06, "loss": 0.6761, "step": 12761 }, { "epoch": 0.39113644722324387, "grad_norm": 1.6363448279486303, "learning_rate": 6.952495656075318e-06, "loss": 0.6322, "step": 12762 }, { "epoch": 0.391167095745985, "grad_norm": 1.8772298049910563, "learning_rate": 6.952038734618964e-06, "loss": 0.6401, "step": 12763 }, { "epoch": 0.3911977442687262, "grad_norm": 1.8000760826591178, "learning_rate": 6.951581793928674e-06, "loss": 0.6974, "step": 12764 }, { "epoch": 0.3912283927914674, "grad_norm": 0.7894620216619117, "learning_rate": 6.951124834008948e-06, "loss": 0.4502, "step": 12765 }, { "epoch": 0.39125904131420863, "grad_norm": 0.8056016697795857, "learning_rate": 6.950667854864293e-06, "loss": 0.4842, "step": 12766 }, { "epoch": 0.39128968983694984, "grad_norm": 1.6156362023846293, "learning_rate": 6.950210856499204e-06, "loss": 0.5239, "step": 12767 }, { "epoch": 0.39132033835969104, "grad_norm": 1.8873954403762434, "learning_rate": 6.949753838918192e-06, "loss": 0.7155, "step": 12768 }, { "epoch": 0.39135098688243225, "grad_norm": 1.7520178447802595, "learning_rate": 6.949296802125755e-06, "loss": 0.7118, "step": 12769 }, { "epoch": 0.39138163540517346, "grad_norm": 1.910964658130452, "learning_rate": 6.948839746126399e-06, "loss": 0.5817, "step": 12770 }, { "epoch": 0.39141228392791466, "grad_norm": 1.6999650657650454, "learning_rate": 6.948382670924625e-06, "loss": 0.6159, "step": 12771 }, { "epoch": 0.39144293245065587, "grad_norm": 0.860422747312449, "learning_rate": 6.947925576524939e-06, "loss": 0.4492, "step": 12772 }, { "epoch": 0.3914735809733971, "grad_norm": 1.7880058430905446, "learning_rate": 6.947468462931843e-06, "loss": 0.6654, "step": 12773 }, { "epoch": 0.3915042294961383, "grad_norm": 1.716583844626467, "learning_rate": 6.947011330149842e-06, "loss": 0.7193, "step": 12774 }, { "epoch": 0.3915348780188795, "grad_norm": 1.7880296510129128, "learning_rate": 6.94655417818344e-06, "loss": 0.6885, "step": 12775 }, { "epoch": 0.3915655265416207, "grad_norm": 1.6891656944902296, "learning_rate": 6.9460970070371425e-06, "loss": 0.6431, "step": 12776 }, { "epoch": 0.3915961750643619, "grad_norm": 1.4796139140458953, "learning_rate": 6.945639816715454e-06, "loss": 0.6366, "step": 12777 }, { "epoch": 0.3916268235871031, "grad_norm": 1.7894918566377458, "learning_rate": 6.945182607222876e-06, "loss": 0.6828, "step": 12778 }, { "epoch": 0.3916574721098443, "grad_norm": 1.8446242698084916, "learning_rate": 6.944725378563918e-06, "loss": 0.7392, "step": 12779 }, { "epoch": 0.3916881206325855, "grad_norm": 1.6677522955941477, "learning_rate": 6.944268130743083e-06, "loss": 0.6327, "step": 12780 }, { "epoch": 0.3917187691553267, "grad_norm": 1.6457542949724568, "learning_rate": 6.943810863764877e-06, "loss": 0.5945, "step": 12781 }, { "epoch": 0.3917494176780679, "grad_norm": 1.8473250379479866, "learning_rate": 6.943353577633803e-06, "loss": 0.7271, "step": 12782 }, { "epoch": 0.39178006620080913, "grad_norm": 1.8323651313782747, "learning_rate": 6.9428962723543716e-06, "loss": 0.7699, "step": 12783 }, { "epoch": 0.39181071472355034, "grad_norm": 1.7040744038439128, "learning_rate": 6.942438947931085e-06, "loss": 0.6073, "step": 12784 }, { "epoch": 0.39184136324629154, "grad_norm": 0.7975332287222682, "learning_rate": 6.94198160436845e-06, "loss": 0.4347, "step": 12785 }, { "epoch": 0.39187201176903275, "grad_norm": 1.5618508728599232, "learning_rate": 6.941524241670975e-06, "loss": 0.669, "step": 12786 }, { "epoch": 0.39190266029177395, "grad_norm": 1.651166647190108, "learning_rate": 6.941066859843163e-06, "loss": 0.5949, "step": 12787 }, { "epoch": 0.39193330881451516, "grad_norm": 1.7138495462287429, "learning_rate": 6.940609458889525e-06, "loss": 0.6587, "step": 12788 }, { "epoch": 0.39196395733725636, "grad_norm": 2.4088492549421585, "learning_rate": 6.940152038814563e-06, "loss": 0.4381, "step": 12789 }, { "epoch": 0.39199460585999757, "grad_norm": 0.8283650991135721, "learning_rate": 6.939694599622788e-06, "loss": 0.4663, "step": 12790 }, { "epoch": 0.3920252543827388, "grad_norm": 1.6558448777783836, "learning_rate": 6.939237141318704e-06, "loss": 0.6496, "step": 12791 }, { "epoch": 0.39205590290548, "grad_norm": 1.8557509550626794, "learning_rate": 6.9387796639068224e-06, "loss": 0.7312, "step": 12792 }, { "epoch": 0.3920865514282212, "grad_norm": 1.674859801184698, "learning_rate": 6.9383221673916475e-06, "loss": 0.6088, "step": 12793 }, { "epoch": 0.39211719995096234, "grad_norm": 1.6955987594026192, "learning_rate": 6.93786465177769e-06, "loss": 0.624, "step": 12794 }, { "epoch": 0.39214784847370354, "grad_norm": 1.735710910519497, "learning_rate": 6.937407117069454e-06, "loss": 0.5943, "step": 12795 }, { "epoch": 0.39217849699644475, "grad_norm": 1.7002425276368625, "learning_rate": 6.936949563271452e-06, "loss": 0.6165, "step": 12796 }, { "epoch": 0.39220914551918595, "grad_norm": 1.7982960705253035, "learning_rate": 6.936491990388189e-06, "loss": 0.7252, "step": 12797 }, { "epoch": 0.39223979404192716, "grad_norm": 1.6488466389118879, "learning_rate": 6.936034398424175e-06, "loss": 0.6354, "step": 12798 }, { "epoch": 0.39227044256466836, "grad_norm": 1.5702549822950276, "learning_rate": 6.93557678738392e-06, "loss": 0.7178, "step": 12799 }, { "epoch": 0.39230109108740957, "grad_norm": 1.735117195791221, "learning_rate": 6.9351191572719304e-06, "loss": 0.6275, "step": 12800 }, { "epoch": 0.3923317396101508, "grad_norm": 1.729155765169266, "learning_rate": 6.9346615080927175e-06, "loss": 0.7279, "step": 12801 }, { "epoch": 0.392362388132892, "grad_norm": 0.906763635762934, "learning_rate": 6.9342038398507875e-06, "loss": 0.4582, "step": 12802 }, { "epoch": 0.3923930366556332, "grad_norm": 0.8370077203268396, "learning_rate": 6.933746152550655e-06, "loss": 0.4704, "step": 12803 }, { "epoch": 0.3924236851783744, "grad_norm": 1.6358095677475146, "learning_rate": 6.933288446196825e-06, "loss": 0.7186, "step": 12804 }, { "epoch": 0.3924543337011156, "grad_norm": 0.815014318578277, "learning_rate": 6.932830720793811e-06, "loss": 0.4686, "step": 12805 }, { "epoch": 0.3924849822238568, "grad_norm": 1.6217960683738073, "learning_rate": 6.932372976346119e-06, "loss": 0.676, "step": 12806 }, { "epoch": 0.392515630746598, "grad_norm": 0.8749956222513517, "learning_rate": 6.931915212858265e-06, "loss": 0.4495, "step": 12807 }, { "epoch": 0.3925462792693392, "grad_norm": 1.846153546163894, "learning_rate": 6.931457430334753e-06, "loss": 0.7735, "step": 12808 }, { "epoch": 0.3925769277920804, "grad_norm": 1.7942164329945463, "learning_rate": 6.930999628780097e-06, "loss": 0.6428, "step": 12809 }, { "epoch": 0.3926075763148216, "grad_norm": 1.8282447014359697, "learning_rate": 6.930541808198809e-06, "loss": 0.7487, "step": 12810 }, { "epoch": 0.39263822483756283, "grad_norm": 1.7035169703975575, "learning_rate": 6.930083968595398e-06, "loss": 0.6839, "step": 12811 }, { "epoch": 0.39266887336030404, "grad_norm": 1.9281776453044426, "learning_rate": 6.929626109974377e-06, "loss": 0.7545, "step": 12812 }, { "epoch": 0.39269952188304524, "grad_norm": 1.6718661971462319, "learning_rate": 6.929168232340253e-06, "loss": 0.7312, "step": 12813 }, { "epoch": 0.39273017040578645, "grad_norm": 1.8364944143778645, "learning_rate": 6.928710335697544e-06, "loss": 0.7256, "step": 12814 }, { "epoch": 0.39276081892852766, "grad_norm": 1.6699192381094436, "learning_rate": 6.9282524200507585e-06, "loss": 0.666, "step": 12815 }, { "epoch": 0.39279146745126886, "grad_norm": 1.9766038908375703, "learning_rate": 6.927794485404407e-06, "loss": 0.7725, "step": 12816 }, { "epoch": 0.39282211597401007, "grad_norm": 1.5427094322953856, "learning_rate": 6.927336531763002e-06, "loss": 0.6792, "step": 12817 }, { "epoch": 0.39285276449675127, "grad_norm": 1.8253684867356315, "learning_rate": 6.926878559131061e-06, "loss": 0.6281, "step": 12818 }, { "epoch": 0.3928834130194925, "grad_norm": 1.5093679376845308, "learning_rate": 6.92642056751309e-06, "loss": 0.6776, "step": 12819 }, { "epoch": 0.3929140615422337, "grad_norm": 1.5316544055121653, "learning_rate": 6.925962556913605e-06, "loss": 0.5614, "step": 12820 }, { "epoch": 0.3929447100649749, "grad_norm": 1.8253346400275936, "learning_rate": 6.925504527337117e-06, "loss": 0.7215, "step": 12821 }, { "epoch": 0.3929753585877161, "grad_norm": 2.2109968619213833, "learning_rate": 6.925046478788142e-06, "loss": 0.6611, "step": 12822 }, { "epoch": 0.3930060071104573, "grad_norm": 0.8744746741890788, "learning_rate": 6.92458841127119e-06, "loss": 0.4748, "step": 12823 }, { "epoch": 0.3930366556331985, "grad_norm": 1.6885910501225752, "learning_rate": 6.924130324790776e-06, "loss": 0.6397, "step": 12824 }, { "epoch": 0.39306730415593966, "grad_norm": 1.7675412106673032, "learning_rate": 6.923672219351414e-06, "loss": 0.7229, "step": 12825 }, { "epoch": 0.39309795267868086, "grad_norm": 1.6381360654202766, "learning_rate": 6.923214094957618e-06, "loss": 0.64, "step": 12826 }, { "epoch": 0.39312860120142207, "grad_norm": 1.5276315609202775, "learning_rate": 6.922755951613901e-06, "loss": 0.6471, "step": 12827 }, { "epoch": 0.3931592497241633, "grad_norm": 1.7389559045197212, "learning_rate": 6.922297789324777e-06, "loss": 0.5621, "step": 12828 }, { "epoch": 0.3931898982469045, "grad_norm": 1.8017707019161715, "learning_rate": 6.921839608094761e-06, "loss": 0.6837, "step": 12829 }, { "epoch": 0.3932205467696457, "grad_norm": 1.604889268678593, "learning_rate": 6.92138140792837e-06, "loss": 0.7017, "step": 12830 }, { "epoch": 0.3932511952923869, "grad_norm": 1.755390270194418, "learning_rate": 6.920923188830113e-06, "loss": 0.6892, "step": 12831 }, { "epoch": 0.3932818438151281, "grad_norm": 1.6750370040788523, "learning_rate": 6.9204649508045095e-06, "loss": 0.7041, "step": 12832 }, { "epoch": 0.3933124923378693, "grad_norm": 1.6407824556769819, "learning_rate": 6.920006693856074e-06, "loss": 0.6216, "step": 12833 }, { "epoch": 0.3933431408606105, "grad_norm": 1.7481222160731953, "learning_rate": 6.919548417989321e-06, "loss": 0.7578, "step": 12834 }, { "epoch": 0.3933737893833517, "grad_norm": 0.8498210067746387, "learning_rate": 6.919090123208767e-06, "loss": 0.4731, "step": 12835 }, { "epoch": 0.3934044379060929, "grad_norm": 1.5208837032172386, "learning_rate": 6.918631809518926e-06, "loss": 0.5833, "step": 12836 }, { "epoch": 0.3934350864288341, "grad_norm": 1.7995836035036437, "learning_rate": 6.918173476924316e-06, "loss": 0.7343, "step": 12837 }, { "epoch": 0.39346573495157533, "grad_norm": 1.8029815237023334, "learning_rate": 6.917715125429452e-06, "loss": 0.7122, "step": 12838 }, { "epoch": 0.39349638347431654, "grad_norm": 1.555929613905045, "learning_rate": 6.917256755038848e-06, "loss": 0.6443, "step": 12839 }, { "epoch": 0.39352703199705774, "grad_norm": 1.6855063348680142, "learning_rate": 6.916798365757025e-06, "loss": 0.6976, "step": 12840 }, { "epoch": 0.39355768051979895, "grad_norm": 1.877829324453549, "learning_rate": 6.916339957588496e-06, "loss": 0.703, "step": 12841 }, { "epoch": 0.39358832904254015, "grad_norm": 1.8127994046274514, "learning_rate": 6.91588153053778e-06, "loss": 0.6791, "step": 12842 }, { "epoch": 0.39361897756528136, "grad_norm": 1.727699929204338, "learning_rate": 6.915423084609392e-06, "loss": 0.6515, "step": 12843 }, { "epoch": 0.39364962608802256, "grad_norm": 1.7096445625388936, "learning_rate": 6.914964619807851e-06, "loss": 0.7228, "step": 12844 }, { "epoch": 0.39368027461076377, "grad_norm": 1.9199666392800885, "learning_rate": 6.914506136137674e-06, "loss": 0.7644, "step": 12845 }, { "epoch": 0.393710923133505, "grad_norm": 1.9348219618895068, "learning_rate": 6.914047633603378e-06, "loss": 0.7129, "step": 12846 }, { "epoch": 0.3937415716562462, "grad_norm": 0.9106743425410112, "learning_rate": 6.91358911220948e-06, "loss": 0.4753, "step": 12847 }, { "epoch": 0.3937722201789874, "grad_norm": 1.7152729618215143, "learning_rate": 6.913130571960499e-06, "loss": 0.721, "step": 12848 }, { "epoch": 0.3938028687017286, "grad_norm": 1.8330881128916736, "learning_rate": 6.912672012860954e-06, "loss": 0.769, "step": 12849 }, { "epoch": 0.3938335172244698, "grad_norm": 0.7795835031878718, "learning_rate": 6.912213434915362e-06, "loss": 0.4722, "step": 12850 }, { "epoch": 0.393864165747211, "grad_norm": 1.6237081313292945, "learning_rate": 6.91175483812824e-06, "loss": 0.5959, "step": 12851 }, { "epoch": 0.3938948142699522, "grad_norm": 0.783607038016501, "learning_rate": 6.911296222504111e-06, "loss": 0.4341, "step": 12852 }, { "epoch": 0.3939254627926934, "grad_norm": 1.7436865335420524, "learning_rate": 6.910837588047491e-06, "loss": 0.7171, "step": 12853 }, { "epoch": 0.3939561113154346, "grad_norm": 1.6384395700305767, "learning_rate": 6.910378934762898e-06, "loss": 0.6949, "step": 12854 }, { "epoch": 0.3939867598381758, "grad_norm": 1.9521798638149523, "learning_rate": 6.909920262654852e-06, "loss": 0.6925, "step": 12855 }, { "epoch": 0.394017408360917, "grad_norm": 1.605679663451887, "learning_rate": 6.909461571727874e-06, "loss": 0.5139, "step": 12856 }, { "epoch": 0.3940480568836582, "grad_norm": 1.6286419976001338, "learning_rate": 6.909002861986485e-06, "loss": 0.6907, "step": 12857 }, { "epoch": 0.3940787054063994, "grad_norm": 1.6854299863175992, "learning_rate": 6.908544133435199e-06, "loss": 0.6986, "step": 12858 }, { "epoch": 0.3941093539291406, "grad_norm": 1.6820807733073577, "learning_rate": 6.90808538607854e-06, "loss": 0.7011, "step": 12859 }, { "epoch": 0.3941400024518818, "grad_norm": 1.333275035428145, "learning_rate": 6.907626619921027e-06, "loss": 0.5877, "step": 12860 }, { "epoch": 0.394170650974623, "grad_norm": 1.7456204632439525, "learning_rate": 6.907167834967183e-06, "loss": 0.6229, "step": 12861 }, { "epoch": 0.3942012994973642, "grad_norm": 1.7679834684235984, "learning_rate": 6.906709031221524e-06, "loss": 0.7108, "step": 12862 }, { "epoch": 0.3942319480201054, "grad_norm": 1.8230435416470023, "learning_rate": 6.906250208688575e-06, "loss": 0.7062, "step": 12863 }, { "epoch": 0.3942625965428466, "grad_norm": 1.7560050673429715, "learning_rate": 6.9057913673728535e-06, "loss": 0.6814, "step": 12864 }, { "epoch": 0.3942932450655878, "grad_norm": 0.9188743807309934, "learning_rate": 6.905332507278882e-06, "loss": 0.453, "step": 12865 }, { "epoch": 0.39432389358832903, "grad_norm": 2.2175165527211527, "learning_rate": 6.904873628411184e-06, "loss": 0.6916, "step": 12866 }, { "epoch": 0.39435454211107024, "grad_norm": 1.6741034374258674, "learning_rate": 6.904414730774277e-06, "loss": 0.7004, "step": 12867 }, { "epoch": 0.39438519063381144, "grad_norm": 2.065245077004732, "learning_rate": 6.903955814372684e-06, "loss": 0.7526, "step": 12868 }, { "epoch": 0.39441583915655265, "grad_norm": 1.666574323240617, "learning_rate": 6.903496879210927e-06, "loss": 0.7019, "step": 12869 }, { "epoch": 0.39444648767929386, "grad_norm": 1.4989705015924042, "learning_rate": 6.90303792529353e-06, "loss": 0.588, "step": 12870 }, { "epoch": 0.39447713620203506, "grad_norm": 1.7419143662363756, "learning_rate": 6.902578952625012e-06, "loss": 0.6865, "step": 12871 }, { "epoch": 0.39450778472477627, "grad_norm": 1.7854108155240758, "learning_rate": 6.9021199612098976e-06, "loss": 0.5822, "step": 12872 }, { "epoch": 0.3945384332475175, "grad_norm": 1.9826734464155653, "learning_rate": 6.901660951052707e-06, "loss": 0.6644, "step": 12873 }, { "epoch": 0.3945690817702587, "grad_norm": 1.9137795528426877, "learning_rate": 6.901201922157967e-06, "loss": 0.6543, "step": 12874 }, { "epoch": 0.3945997302929999, "grad_norm": 1.685837959486255, "learning_rate": 6.900742874530195e-06, "loss": 0.68, "step": 12875 }, { "epoch": 0.3946303788157411, "grad_norm": 1.5984631237108222, "learning_rate": 6.90028380817392e-06, "loss": 0.649, "step": 12876 }, { "epoch": 0.3946610273384823, "grad_norm": 1.7245710826626266, "learning_rate": 6.899824723093661e-06, "loss": 0.6967, "step": 12877 }, { "epoch": 0.3946916758612235, "grad_norm": 1.5758136480672293, "learning_rate": 6.899365619293943e-06, "loss": 0.6375, "step": 12878 }, { "epoch": 0.3947223243839647, "grad_norm": 1.9027676977841994, "learning_rate": 6.89890649677929e-06, "loss": 0.7094, "step": 12879 }, { "epoch": 0.3947529729067059, "grad_norm": 0.9410153286112082, "learning_rate": 6.898447355554225e-06, "loss": 0.4664, "step": 12880 }, { "epoch": 0.3947836214294471, "grad_norm": 0.8955716710066053, "learning_rate": 6.8979881956232734e-06, "loss": 0.4595, "step": 12881 }, { "epoch": 0.3948142699521883, "grad_norm": 1.5914866236594785, "learning_rate": 6.8975290169909555e-06, "loss": 0.6625, "step": 12882 }, { "epoch": 0.39484491847492953, "grad_norm": 1.5122520827998696, "learning_rate": 6.8970698196618016e-06, "loss": 0.6532, "step": 12883 }, { "epoch": 0.39487556699767073, "grad_norm": 1.9036342863965625, "learning_rate": 6.896610603640332e-06, "loss": 0.7152, "step": 12884 }, { "epoch": 0.39490621552041194, "grad_norm": 1.669192250543987, "learning_rate": 6.896151368931075e-06, "loss": 0.7197, "step": 12885 }, { "epoch": 0.39493686404315315, "grad_norm": 1.7488590635793642, "learning_rate": 6.8956921155385505e-06, "loss": 0.6758, "step": 12886 }, { "epoch": 0.3949675125658943, "grad_norm": 1.5236374471987029, "learning_rate": 6.895232843467289e-06, "loss": 0.682, "step": 12887 }, { "epoch": 0.3949981610886355, "grad_norm": 1.6384080814252058, "learning_rate": 6.894773552721812e-06, "loss": 0.5999, "step": 12888 }, { "epoch": 0.3950288096113767, "grad_norm": 1.820885890265432, "learning_rate": 6.8943142433066466e-06, "loss": 0.7426, "step": 12889 }, { "epoch": 0.3950594581341179, "grad_norm": 1.6629243196485897, "learning_rate": 6.893854915226318e-06, "loss": 0.6959, "step": 12890 }, { "epoch": 0.3950901066568591, "grad_norm": 1.2785081352060492, "learning_rate": 6.893395568485352e-06, "loss": 0.464, "step": 12891 }, { "epoch": 0.3951207551796003, "grad_norm": 1.4601229272345777, "learning_rate": 6.892936203088278e-06, "loss": 0.6401, "step": 12892 }, { "epoch": 0.39515140370234153, "grad_norm": 1.5885845154015903, "learning_rate": 6.892476819039616e-06, "loss": 0.6696, "step": 12893 }, { "epoch": 0.39518205222508274, "grad_norm": 0.8437906612355025, "learning_rate": 6.892017416343897e-06, "loss": 0.4472, "step": 12894 }, { "epoch": 0.39521270074782394, "grad_norm": 1.7086300108218286, "learning_rate": 6.891557995005646e-06, "loss": 0.7603, "step": 12895 }, { "epoch": 0.39524334927056515, "grad_norm": 0.7751484502057416, "learning_rate": 6.891098555029389e-06, "loss": 0.4513, "step": 12896 }, { "epoch": 0.39527399779330635, "grad_norm": 1.523388820454628, "learning_rate": 6.890639096419656e-06, "loss": 0.6067, "step": 12897 }, { "epoch": 0.39530464631604756, "grad_norm": 1.5941095002511114, "learning_rate": 6.8901796191809715e-06, "loss": 0.6911, "step": 12898 }, { "epoch": 0.39533529483878876, "grad_norm": 1.5904466678801266, "learning_rate": 6.889720123317863e-06, "loss": 0.7125, "step": 12899 }, { "epoch": 0.39536594336152997, "grad_norm": 1.770311751809896, "learning_rate": 6.88926060883486e-06, "loss": 0.589, "step": 12900 }, { "epoch": 0.3953965918842712, "grad_norm": 1.0050177238140263, "learning_rate": 6.888801075736487e-06, "loss": 0.4337, "step": 12901 }, { "epoch": 0.3954272404070124, "grad_norm": 1.8137234270619016, "learning_rate": 6.888341524027275e-06, "loss": 0.739, "step": 12902 }, { "epoch": 0.3954578889297536, "grad_norm": 1.7377569011151421, "learning_rate": 6.8878819537117514e-06, "loss": 0.7483, "step": 12903 }, { "epoch": 0.3954885374524948, "grad_norm": 1.5008542961774802, "learning_rate": 6.887422364794443e-06, "loss": 0.6285, "step": 12904 }, { "epoch": 0.395519185975236, "grad_norm": 1.586429624942359, "learning_rate": 6.886962757279878e-06, "loss": 0.5438, "step": 12905 }, { "epoch": 0.3955498344979772, "grad_norm": 1.7057851594239668, "learning_rate": 6.8865031311725885e-06, "loss": 0.7624, "step": 12906 }, { "epoch": 0.3955804830207184, "grad_norm": 1.8690651124876176, "learning_rate": 6.8860434864771e-06, "loss": 0.5968, "step": 12907 }, { "epoch": 0.3956111315434596, "grad_norm": 1.662370593737395, "learning_rate": 6.885583823197941e-06, "loss": 0.7683, "step": 12908 }, { "epoch": 0.3956417800662008, "grad_norm": 2.0056408703778543, "learning_rate": 6.885124141339643e-06, "loss": 0.6996, "step": 12909 }, { "epoch": 0.395672428588942, "grad_norm": 1.8410299999697135, "learning_rate": 6.8846644409067345e-06, "loss": 0.7274, "step": 12910 }, { "epoch": 0.39570307711168323, "grad_norm": 1.8025637756012949, "learning_rate": 6.884204721903747e-06, "loss": 0.7223, "step": 12911 }, { "epoch": 0.39573372563442444, "grad_norm": 1.6720559982091334, "learning_rate": 6.883744984335205e-06, "loss": 0.7133, "step": 12912 }, { "epoch": 0.39576437415716564, "grad_norm": 1.749670749127907, "learning_rate": 6.883285228205645e-06, "loss": 0.668, "step": 12913 }, { "epoch": 0.39579502267990685, "grad_norm": 1.7739017443733622, "learning_rate": 6.882825453519591e-06, "loss": 0.6851, "step": 12914 }, { "epoch": 0.39582567120264806, "grad_norm": 1.8420818013064661, "learning_rate": 6.882365660281578e-06, "loss": 0.6417, "step": 12915 }, { "epoch": 0.39585631972538926, "grad_norm": 1.7782661058452631, "learning_rate": 6.881905848496133e-06, "loss": 0.6896, "step": 12916 }, { "epoch": 0.39588696824813047, "grad_norm": 1.7329839025713918, "learning_rate": 6.88144601816779e-06, "loss": 0.6434, "step": 12917 }, { "epoch": 0.3959176167708716, "grad_norm": 2.088411208348128, "learning_rate": 6.880986169301076e-06, "loss": 0.7372, "step": 12918 }, { "epoch": 0.3959482652936128, "grad_norm": 1.545753598653505, "learning_rate": 6.8805263019005265e-06, "loss": 0.6757, "step": 12919 }, { "epoch": 0.39597891381635403, "grad_norm": 0.9462790887586137, "learning_rate": 6.880066415970668e-06, "loss": 0.476, "step": 12920 }, { "epoch": 0.39600956233909523, "grad_norm": 1.554014791487098, "learning_rate": 6.879606511516035e-06, "loss": 0.6226, "step": 12921 }, { "epoch": 0.39604021086183644, "grad_norm": 1.7094581900398491, "learning_rate": 6.879146588541158e-06, "loss": 0.5895, "step": 12922 }, { "epoch": 0.39607085938457764, "grad_norm": 1.6388170995963394, "learning_rate": 6.878686647050567e-06, "loss": 0.6446, "step": 12923 }, { "epoch": 0.39610150790731885, "grad_norm": 0.781746559064236, "learning_rate": 6.878226687048798e-06, "loss": 0.4757, "step": 12924 }, { "epoch": 0.39613215643006006, "grad_norm": 1.9197641414015658, "learning_rate": 6.8777667085403795e-06, "loss": 0.701, "step": 12925 }, { "epoch": 0.39616280495280126, "grad_norm": 1.5428043218400278, "learning_rate": 6.8773067115298476e-06, "loss": 0.6352, "step": 12926 }, { "epoch": 0.39619345347554247, "grad_norm": 1.6242689546660536, "learning_rate": 6.8768466960217306e-06, "loss": 0.7727, "step": 12927 }, { "epoch": 0.3962241019982837, "grad_norm": 1.7839646962956388, "learning_rate": 6.876386662020562e-06, "loss": 0.5536, "step": 12928 }, { "epoch": 0.3962547505210249, "grad_norm": 1.5952992077472954, "learning_rate": 6.875926609530876e-06, "loss": 0.5583, "step": 12929 }, { "epoch": 0.3962853990437661, "grad_norm": 1.531461145811626, "learning_rate": 6.875466538557207e-06, "loss": 0.686, "step": 12930 }, { "epoch": 0.3963160475665073, "grad_norm": 1.8111518715604313, "learning_rate": 6.8750064491040845e-06, "loss": 0.6882, "step": 12931 }, { "epoch": 0.3963466960892485, "grad_norm": 1.7138153425755234, "learning_rate": 6.874546341176045e-06, "loss": 0.699, "step": 12932 }, { "epoch": 0.3963773446119897, "grad_norm": 0.8018554139655781, "learning_rate": 6.874086214777619e-06, "loss": 0.4655, "step": 12933 }, { "epoch": 0.3964079931347309, "grad_norm": 1.59872251534501, "learning_rate": 6.873626069913344e-06, "loss": 0.597, "step": 12934 }, { "epoch": 0.3964386416574721, "grad_norm": 1.6967778625464622, "learning_rate": 6.8731659065877505e-06, "loss": 0.7017, "step": 12935 }, { "epoch": 0.3964692901802133, "grad_norm": 1.8271645526437423, "learning_rate": 6.8727057248053745e-06, "loss": 0.8143, "step": 12936 }, { "epoch": 0.3964999387029545, "grad_norm": 0.7730998746502482, "learning_rate": 6.87224552457075e-06, "loss": 0.463, "step": 12937 }, { "epoch": 0.39653058722569573, "grad_norm": 0.775033336494515, "learning_rate": 6.871785305888411e-06, "loss": 0.4357, "step": 12938 }, { "epoch": 0.39656123574843694, "grad_norm": 1.6606900680234702, "learning_rate": 6.8713250687628926e-06, "loss": 0.6074, "step": 12939 }, { "epoch": 0.39659188427117814, "grad_norm": 1.7218883020368951, "learning_rate": 6.87086481319873e-06, "loss": 0.6362, "step": 12940 }, { "epoch": 0.39662253279391935, "grad_norm": 1.6543587140423237, "learning_rate": 6.870404539200457e-06, "loss": 0.6541, "step": 12941 }, { "epoch": 0.39665318131666055, "grad_norm": 0.8084369434832033, "learning_rate": 6.869944246772611e-06, "loss": 0.4603, "step": 12942 }, { "epoch": 0.39668382983940176, "grad_norm": 1.608876052873772, "learning_rate": 6.869483935919724e-06, "loss": 0.6184, "step": 12943 }, { "epoch": 0.39671447836214296, "grad_norm": 1.5987396487648584, "learning_rate": 6.869023606646334e-06, "loss": 0.7358, "step": 12944 }, { "epoch": 0.39674512688488417, "grad_norm": 0.756207565796295, "learning_rate": 6.868563258956976e-06, "loss": 0.4543, "step": 12945 }, { "epoch": 0.3967757754076254, "grad_norm": 1.7837120586751376, "learning_rate": 6.868102892856186e-06, "loss": 0.6524, "step": 12946 }, { "epoch": 0.3968064239303666, "grad_norm": 1.6876004663558128, "learning_rate": 6.867642508348502e-06, "loss": 0.7189, "step": 12947 }, { "epoch": 0.3968370724531078, "grad_norm": 1.8575847235608882, "learning_rate": 6.867182105438457e-06, "loss": 0.6629, "step": 12948 }, { "epoch": 0.39686772097584894, "grad_norm": 1.706326267716556, "learning_rate": 6.866721684130588e-06, "loss": 0.7322, "step": 12949 }, { "epoch": 0.39689836949859014, "grad_norm": 1.5173063521611692, "learning_rate": 6.866261244429435e-06, "loss": 0.6256, "step": 12950 }, { "epoch": 0.39692901802133135, "grad_norm": 0.7921127382776443, "learning_rate": 6.86580078633953e-06, "loss": 0.4604, "step": 12951 }, { "epoch": 0.39695966654407255, "grad_norm": 1.908977251018469, "learning_rate": 6.865340309865413e-06, "loss": 0.6964, "step": 12952 }, { "epoch": 0.39699031506681376, "grad_norm": 1.589035696412955, "learning_rate": 6.864879815011622e-06, "loss": 0.674, "step": 12953 }, { "epoch": 0.39702096358955496, "grad_norm": 0.7518638764251384, "learning_rate": 6.8644193017826935e-06, "loss": 0.4624, "step": 12954 }, { "epoch": 0.39705161211229617, "grad_norm": 1.7132911208669286, "learning_rate": 6.863958770183163e-06, "loss": 0.6167, "step": 12955 }, { "epoch": 0.3970822606350374, "grad_norm": 1.8006094712032847, "learning_rate": 6.86349822021757e-06, "loss": 0.6669, "step": 12956 }, { "epoch": 0.3971129091577786, "grad_norm": 1.4942020232821083, "learning_rate": 6.863037651890453e-06, "loss": 0.5325, "step": 12957 }, { "epoch": 0.3971435576805198, "grad_norm": 0.7743523897004215, "learning_rate": 6.862577065206349e-06, "loss": 0.4484, "step": 12958 }, { "epoch": 0.397174206203261, "grad_norm": 1.4680055628914592, "learning_rate": 6.862116460169796e-06, "loss": 0.5805, "step": 12959 }, { "epoch": 0.3972048547260022, "grad_norm": 1.5721388373589482, "learning_rate": 6.8616558367853336e-06, "loss": 0.6252, "step": 12960 }, { "epoch": 0.3972355032487434, "grad_norm": 1.8026136334297993, "learning_rate": 6.861195195057501e-06, "loss": 0.7374, "step": 12961 }, { "epoch": 0.3972661517714846, "grad_norm": 1.7425066422237543, "learning_rate": 6.860734534990834e-06, "loss": 0.6479, "step": 12962 }, { "epoch": 0.3972968002942258, "grad_norm": 1.8484888196410263, "learning_rate": 6.860273856589874e-06, "loss": 0.7537, "step": 12963 }, { "epoch": 0.397327448816967, "grad_norm": 1.981690174353681, "learning_rate": 6.859813159859161e-06, "loss": 0.6344, "step": 12964 }, { "epoch": 0.3973580973397082, "grad_norm": 1.4852062600334204, "learning_rate": 6.859352444803233e-06, "loss": 0.6687, "step": 12965 }, { "epoch": 0.39738874586244943, "grad_norm": 0.8288131306067621, "learning_rate": 6.858891711426627e-06, "loss": 0.4439, "step": 12966 }, { "epoch": 0.39741939438519064, "grad_norm": 2.183390070150591, "learning_rate": 6.858430959733888e-06, "loss": 0.6246, "step": 12967 }, { "epoch": 0.39745004290793184, "grad_norm": 1.7101276899866764, "learning_rate": 6.857970189729552e-06, "loss": 0.6732, "step": 12968 }, { "epoch": 0.39748069143067305, "grad_norm": 1.8745831137416524, "learning_rate": 6.857509401418161e-06, "loss": 0.7734, "step": 12969 }, { "epoch": 0.39751133995341426, "grad_norm": 1.8497503270011701, "learning_rate": 6.857048594804254e-06, "loss": 0.6946, "step": 12970 }, { "epoch": 0.39754198847615546, "grad_norm": 1.650658178061215, "learning_rate": 6.856587769892372e-06, "loss": 0.7492, "step": 12971 }, { "epoch": 0.39757263699889667, "grad_norm": 0.7900345305326703, "learning_rate": 6.8561269266870555e-06, "loss": 0.4436, "step": 12972 }, { "epoch": 0.3976032855216379, "grad_norm": 2.0211115398435306, "learning_rate": 6.855666065192848e-06, "loss": 0.7162, "step": 12973 }, { "epoch": 0.3976339340443791, "grad_norm": 1.8399005817430043, "learning_rate": 6.855205185414284e-06, "loss": 0.6507, "step": 12974 }, { "epoch": 0.3976645825671203, "grad_norm": 1.7180561192506592, "learning_rate": 6.854744287355912e-06, "loss": 0.6749, "step": 12975 }, { "epoch": 0.3976952310898615, "grad_norm": 1.9677204871443217, "learning_rate": 6.854283371022269e-06, "loss": 0.6268, "step": 12976 }, { "epoch": 0.3977258796126027, "grad_norm": 1.553854855502085, "learning_rate": 6.853822436417896e-06, "loss": 0.6611, "step": 12977 }, { "epoch": 0.3977565281353439, "grad_norm": 1.8565637632087322, "learning_rate": 6.853361483547338e-06, "loss": 0.6833, "step": 12978 }, { "epoch": 0.3977871766580851, "grad_norm": 1.883474821725989, "learning_rate": 6.852900512415134e-06, "loss": 0.6916, "step": 12979 }, { "epoch": 0.39781782518082626, "grad_norm": 1.688915933399336, "learning_rate": 6.852439523025829e-06, "loss": 0.7097, "step": 12980 }, { "epoch": 0.39784847370356746, "grad_norm": 1.6932444089370782, "learning_rate": 6.851978515383962e-06, "loss": 0.6792, "step": 12981 }, { "epoch": 0.39787912222630867, "grad_norm": 1.6402176642898385, "learning_rate": 6.851517489494076e-06, "loss": 0.5936, "step": 12982 }, { "epoch": 0.3979097707490499, "grad_norm": 1.7404408703838945, "learning_rate": 6.851056445360714e-06, "loss": 0.6337, "step": 12983 }, { "epoch": 0.3979404192717911, "grad_norm": 1.7703689343303428, "learning_rate": 6.850595382988422e-06, "loss": 0.7113, "step": 12984 }, { "epoch": 0.3979710677945323, "grad_norm": 1.8065795909604536, "learning_rate": 6.850134302381738e-06, "loss": 0.7227, "step": 12985 }, { "epoch": 0.3980017163172735, "grad_norm": 1.678078557746351, "learning_rate": 6.849673203545208e-06, "loss": 0.6599, "step": 12986 }, { "epoch": 0.3980323648400147, "grad_norm": 1.6782954018232799, "learning_rate": 6.849212086483374e-06, "loss": 0.6977, "step": 12987 }, { "epoch": 0.3980630133627559, "grad_norm": 1.9878754978299051, "learning_rate": 6.848750951200782e-06, "loss": 0.696, "step": 12988 }, { "epoch": 0.3980936618854971, "grad_norm": 1.762454166312318, "learning_rate": 6.848289797701972e-06, "loss": 0.5908, "step": 12989 }, { "epoch": 0.3981243104082383, "grad_norm": 1.7263492881943905, "learning_rate": 6.847828625991492e-06, "loss": 0.7575, "step": 12990 }, { "epoch": 0.3981549589309795, "grad_norm": 1.6364275885613735, "learning_rate": 6.847367436073881e-06, "loss": 0.678, "step": 12991 }, { "epoch": 0.3981856074537207, "grad_norm": 2.059943385640417, "learning_rate": 6.8469062279536865e-06, "loss": 0.7559, "step": 12992 }, { "epoch": 0.39821625597646193, "grad_norm": 1.6450964324654618, "learning_rate": 6.8464450016354546e-06, "loss": 0.7743, "step": 12993 }, { "epoch": 0.39824690449920314, "grad_norm": 1.8159516440915382, "learning_rate": 6.845983757123726e-06, "loss": 0.713, "step": 12994 }, { "epoch": 0.39827755302194434, "grad_norm": 1.5273056183598364, "learning_rate": 6.845522494423047e-06, "loss": 0.6088, "step": 12995 }, { "epoch": 0.39830820154468555, "grad_norm": 0.8598230916564741, "learning_rate": 6.845061213537962e-06, "loss": 0.4394, "step": 12996 }, { "epoch": 0.39833885006742675, "grad_norm": 1.9291626814051133, "learning_rate": 6.844599914473019e-06, "loss": 0.6984, "step": 12997 }, { "epoch": 0.39836949859016796, "grad_norm": 1.6362579028064301, "learning_rate": 6.844138597232759e-06, "loss": 0.6107, "step": 12998 }, { "epoch": 0.39840014711290916, "grad_norm": 1.6654218183799068, "learning_rate": 6.843677261821732e-06, "loss": 0.7478, "step": 12999 }, { "epoch": 0.39843079563565037, "grad_norm": 1.8537939287118292, "learning_rate": 6.843215908244478e-06, "loss": 0.7647, "step": 13000 }, { "epoch": 0.3984614441583916, "grad_norm": 1.8626602728106398, "learning_rate": 6.842754536505549e-06, "loss": 0.7859, "step": 13001 }, { "epoch": 0.3984920926811328, "grad_norm": 1.6632729562607353, "learning_rate": 6.842293146609485e-06, "loss": 0.6601, "step": 13002 }, { "epoch": 0.398522741203874, "grad_norm": 1.8611420247551558, "learning_rate": 6.841831738560838e-06, "loss": 0.7491, "step": 13003 }, { "epoch": 0.3985533897266152, "grad_norm": 1.661575283125316, "learning_rate": 6.841370312364151e-06, "loss": 0.6102, "step": 13004 }, { "epoch": 0.3985840382493564, "grad_norm": 1.88652704803404, "learning_rate": 6.84090886802397e-06, "loss": 0.5895, "step": 13005 }, { "epoch": 0.3986146867720976, "grad_norm": 1.7893980247076844, "learning_rate": 6.8404474055448434e-06, "loss": 0.6936, "step": 13006 }, { "epoch": 0.3986453352948388, "grad_norm": 0.8574993398938129, "learning_rate": 6.8399859249313186e-06, "loss": 0.4403, "step": 13007 }, { "epoch": 0.39867598381758, "grad_norm": 1.897484291644445, "learning_rate": 6.839524426187941e-06, "loss": 0.7647, "step": 13008 }, { "epoch": 0.3987066323403212, "grad_norm": 0.8625286580783705, "learning_rate": 6.839062909319258e-06, "loss": 0.4722, "step": 13009 }, { "epoch": 0.3987372808630624, "grad_norm": 1.6956135477838752, "learning_rate": 6.838601374329819e-06, "loss": 0.7519, "step": 13010 }, { "epoch": 0.3987679293858036, "grad_norm": 1.7448842883292257, "learning_rate": 6.838139821224169e-06, "loss": 0.5911, "step": 13011 }, { "epoch": 0.3987985779085448, "grad_norm": 1.8217058659477083, "learning_rate": 6.837678250006859e-06, "loss": 0.6664, "step": 13012 }, { "epoch": 0.398829226431286, "grad_norm": 1.6939652553154845, "learning_rate": 6.837216660682432e-06, "loss": 0.7042, "step": 13013 }, { "epoch": 0.3988598749540272, "grad_norm": 1.7386074208919993, "learning_rate": 6.83675505325544e-06, "loss": 0.6624, "step": 13014 }, { "epoch": 0.3988905234767684, "grad_norm": 1.778435797999328, "learning_rate": 6.836293427730431e-06, "loss": 0.661, "step": 13015 }, { "epoch": 0.3989211719995096, "grad_norm": 1.7502708116549321, "learning_rate": 6.835831784111955e-06, "loss": 0.7196, "step": 13016 }, { "epoch": 0.3989518205222508, "grad_norm": 1.6998078400857266, "learning_rate": 6.835370122404555e-06, "loss": 0.6476, "step": 13017 }, { "epoch": 0.398982469044992, "grad_norm": 0.9937784245882989, "learning_rate": 6.834908442612786e-06, "loss": 0.4612, "step": 13018 }, { "epoch": 0.3990131175677332, "grad_norm": 1.6641774098811661, "learning_rate": 6.834446744741195e-06, "loss": 0.5851, "step": 13019 }, { "epoch": 0.3990437660904744, "grad_norm": 1.4446723758082787, "learning_rate": 6.8339850287943285e-06, "loss": 0.5618, "step": 13020 }, { "epoch": 0.39907441461321563, "grad_norm": 1.76113540375855, "learning_rate": 6.83352329477674e-06, "loss": 0.6845, "step": 13021 }, { "epoch": 0.39910506313595684, "grad_norm": 1.7255454393971668, "learning_rate": 6.833061542692976e-06, "loss": 0.6441, "step": 13022 }, { "epoch": 0.39913571165869804, "grad_norm": 1.7250313421398986, "learning_rate": 6.83259977254759e-06, "loss": 0.6134, "step": 13023 }, { "epoch": 0.39916636018143925, "grad_norm": 1.5575736552163721, "learning_rate": 6.8321379843451286e-06, "loss": 0.6291, "step": 13024 }, { "epoch": 0.39919700870418046, "grad_norm": 1.844784837086635, "learning_rate": 6.831676178090142e-06, "loss": 0.7399, "step": 13025 }, { "epoch": 0.39922765722692166, "grad_norm": 1.6860327925221514, "learning_rate": 6.831214353787182e-06, "loss": 0.6129, "step": 13026 }, { "epoch": 0.39925830574966287, "grad_norm": 1.638101427297558, "learning_rate": 6.8307525114407994e-06, "loss": 0.7425, "step": 13027 }, { "epoch": 0.3992889542724041, "grad_norm": 1.7063342992369397, "learning_rate": 6.830290651055541e-06, "loss": 0.6067, "step": 13028 }, { "epoch": 0.3993196027951453, "grad_norm": 0.8209143724709959, "learning_rate": 6.8298287726359625e-06, "loss": 0.4489, "step": 13029 }, { "epoch": 0.3993502513178865, "grad_norm": 1.8404080589490102, "learning_rate": 6.829366876186614e-06, "loss": 0.6857, "step": 13030 }, { "epoch": 0.3993808998406277, "grad_norm": 1.8194777927050798, "learning_rate": 6.828904961712043e-06, "loss": 0.6005, "step": 13031 }, { "epoch": 0.3994115483633689, "grad_norm": 1.9474129474068074, "learning_rate": 6.828443029216805e-06, "loss": 0.7202, "step": 13032 }, { "epoch": 0.3994421968861101, "grad_norm": 1.8061016185307353, "learning_rate": 6.82798107870545e-06, "loss": 0.6613, "step": 13033 }, { "epoch": 0.3994728454088513, "grad_norm": 1.7344284919916453, "learning_rate": 6.82751911018253e-06, "loss": 0.7359, "step": 13034 }, { "epoch": 0.3995034939315925, "grad_norm": 1.5502589220982734, "learning_rate": 6.8270571236525955e-06, "loss": 0.6905, "step": 13035 }, { "epoch": 0.3995341424543337, "grad_norm": 1.713502602709454, "learning_rate": 6.8265951191202005e-06, "loss": 0.7964, "step": 13036 }, { "epoch": 0.3995647909770749, "grad_norm": 1.6890644807933473, "learning_rate": 6.826133096589895e-06, "loss": 0.7575, "step": 13037 }, { "epoch": 0.39959543949981613, "grad_norm": 1.759141681454577, "learning_rate": 6.825671056066237e-06, "loss": 0.6992, "step": 13038 }, { "epoch": 0.39962608802255734, "grad_norm": 1.5371794394828213, "learning_rate": 6.8252089975537705e-06, "loss": 0.653, "step": 13039 }, { "epoch": 0.39965673654529854, "grad_norm": 1.8371788377256324, "learning_rate": 6.824746921057054e-06, "loss": 0.6889, "step": 13040 }, { "epoch": 0.39968738506803975, "grad_norm": 1.875803971447146, "learning_rate": 6.824284826580639e-06, "loss": 0.6855, "step": 13041 }, { "epoch": 0.3997180335907809, "grad_norm": 1.7731766773963902, "learning_rate": 6.82382271412908e-06, "loss": 0.7116, "step": 13042 }, { "epoch": 0.3997486821135221, "grad_norm": 1.6021878016450704, "learning_rate": 6.823360583706928e-06, "loss": 0.5207, "step": 13043 }, { "epoch": 0.3997793306362633, "grad_norm": 0.8728681959479901, "learning_rate": 6.822898435318739e-06, "loss": 0.4454, "step": 13044 }, { "epoch": 0.3998099791590045, "grad_norm": 1.5494970073404777, "learning_rate": 6.822436268969064e-06, "loss": 0.6635, "step": 13045 }, { "epoch": 0.3998406276817457, "grad_norm": 1.702161862701147, "learning_rate": 6.821974084662458e-06, "loss": 0.5814, "step": 13046 }, { "epoch": 0.3998712762044869, "grad_norm": 0.8043572499014161, "learning_rate": 6.821511882403477e-06, "loss": 0.4564, "step": 13047 }, { "epoch": 0.39990192472722813, "grad_norm": 1.7467356312683537, "learning_rate": 6.82104966219667e-06, "loss": 0.6264, "step": 13048 }, { "epoch": 0.39993257324996934, "grad_norm": 0.8003571633468967, "learning_rate": 6.820587424046598e-06, "loss": 0.4451, "step": 13049 }, { "epoch": 0.39996322177271054, "grad_norm": 1.7983831029662187, "learning_rate": 6.820125167957812e-06, "loss": 0.5625, "step": 13050 }, { "epoch": 0.39999387029545175, "grad_norm": 1.7311635313134968, "learning_rate": 6.819662893934866e-06, "loss": 0.6657, "step": 13051 }, { "epoch": 0.40002451881819295, "grad_norm": 0.7875613709469863, "learning_rate": 6.819200601982316e-06, "loss": 0.4408, "step": 13052 }, { "epoch": 0.40005516734093416, "grad_norm": 0.7490424911182856, "learning_rate": 6.818738292104719e-06, "loss": 0.4644, "step": 13053 }, { "epoch": 0.40008581586367536, "grad_norm": 0.7729336070045049, "learning_rate": 6.818275964306624e-06, "loss": 0.4515, "step": 13054 }, { "epoch": 0.40011646438641657, "grad_norm": 0.7564066821874743, "learning_rate": 6.817813618592595e-06, "loss": 0.4691, "step": 13055 }, { "epoch": 0.4001471129091578, "grad_norm": 1.6214130646804639, "learning_rate": 6.817351254967179e-06, "loss": 0.7572, "step": 13056 }, { "epoch": 0.400177761431899, "grad_norm": 1.9187831115199396, "learning_rate": 6.816888873434939e-06, "loss": 0.7226, "step": 13057 }, { "epoch": 0.4002084099546402, "grad_norm": 1.5423602688125984, "learning_rate": 6.816426474000428e-06, "loss": 0.69, "step": 13058 }, { "epoch": 0.4002390584773814, "grad_norm": 1.9055019950719079, "learning_rate": 6.815964056668203e-06, "loss": 0.6823, "step": 13059 }, { "epoch": 0.4002697070001226, "grad_norm": 1.7359815244571408, "learning_rate": 6.815501621442817e-06, "loss": 0.7111, "step": 13060 }, { "epoch": 0.4003003555228638, "grad_norm": 1.6035980892954151, "learning_rate": 6.815039168328831e-06, "loss": 0.6965, "step": 13061 }, { "epoch": 0.400331004045605, "grad_norm": 1.813344558195773, "learning_rate": 6.814576697330799e-06, "loss": 0.7263, "step": 13062 }, { "epoch": 0.4003616525683462, "grad_norm": 1.696934507117339, "learning_rate": 6.814114208453277e-06, "loss": 0.6449, "step": 13063 }, { "epoch": 0.4003923010910874, "grad_norm": 1.5499616498535271, "learning_rate": 6.813651701700826e-06, "loss": 0.6241, "step": 13064 }, { "epoch": 0.4004229496138286, "grad_norm": 0.8896268300167294, "learning_rate": 6.813189177078e-06, "loss": 0.4634, "step": 13065 }, { "epoch": 0.40045359813656983, "grad_norm": 1.7839860559208738, "learning_rate": 6.812726634589357e-06, "loss": 0.6493, "step": 13066 }, { "epoch": 0.40048424665931104, "grad_norm": 1.7333850986098167, "learning_rate": 6.812264074239454e-06, "loss": 0.6278, "step": 13067 }, { "epoch": 0.40051489518205224, "grad_norm": 2.0444617155470524, "learning_rate": 6.8118014960328506e-06, "loss": 0.705, "step": 13068 }, { "epoch": 0.40054554370479345, "grad_norm": 2.031329340971062, "learning_rate": 6.811338899974102e-06, "loss": 0.7383, "step": 13069 }, { "epoch": 0.40057619222753466, "grad_norm": 1.70493521572197, "learning_rate": 6.8108762860677695e-06, "loss": 0.6719, "step": 13070 }, { "epoch": 0.40060684075027586, "grad_norm": 1.6619916193873505, "learning_rate": 6.810413654318409e-06, "loss": 0.6718, "step": 13071 }, { "epoch": 0.40063748927301707, "grad_norm": 1.780990668701932, "learning_rate": 6.809951004730578e-06, "loss": 0.6308, "step": 13072 }, { "epoch": 0.4006681377957582, "grad_norm": 1.51013577206518, "learning_rate": 6.8094883373088385e-06, "loss": 0.578, "step": 13073 }, { "epoch": 0.4006987863184994, "grad_norm": 1.8588473097540712, "learning_rate": 6.809025652057747e-06, "loss": 0.6817, "step": 13074 }, { "epoch": 0.40072943484124063, "grad_norm": 1.6806993194025774, "learning_rate": 6.808562948981863e-06, "loss": 0.6276, "step": 13075 }, { "epoch": 0.40076008336398183, "grad_norm": 1.7041692080917294, "learning_rate": 6.808100228085745e-06, "loss": 0.731, "step": 13076 }, { "epoch": 0.40079073188672304, "grad_norm": 1.4106432638266044, "learning_rate": 6.807637489373954e-06, "loss": 0.6215, "step": 13077 }, { "epoch": 0.40082138040946425, "grad_norm": 1.8111906175342496, "learning_rate": 6.807174732851046e-06, "loss": 0.6583, "step": 13078 }, { "epoch": 0.40085202893220545, "grad_norm": 1.4369668958900979, "learning_rate": 6.806711958521584e-06, "loss": 0.5576, "step": 13079 }, { "epoch": 0.40088267745494666, "grad_norm": 1.5574388790799685, "learning_rate": 6.806249166390129e-06, "loss": 0.5684, "step": 13080 }, { "epoch": 0.40091332597768786, "grad_norm": 1.8104553794928946, "learning_rate": 6.805786356461237e-06, "loss": 0.6858, "step": 13081 }, { "epoch": 0.40094397450042907, "grad_norm": 1.5028954993452752, "learning_rate": 6.80532352873947e-06, "loss": 0.6268, "step": 13082 }, { "epoch": 0.4009746230231703, "grad_norm": 1.5636233990816801, "learning_rate": 6.804860683229387e-06, "loss": 0.5345, "step": 13083 }, { "epoch": 0.4010052715459115, "grad_norm": 0.9159089627719667, "learning_rate": 6.804397819935552e-06, "loss": 0.4433, "step": 13084 }, { "epoch": 0.4010359200686527, "grad_norm": 1.6315295768980722, "learning_rate": 6.803934938862523e-06, "loss": 0.5718, "step": 13085 }, { "epoch": 0.4010665685913939, "grad_norm": 1.6511192772551482, "learning_rate": 6.803472040014862e-06, "loss": 0.6734, "step": 13086 }, { "epoch": 0.4010972171141351, "grad_norm": 1.8476404149690153, "learning_rate": 6.803009123397128e-06, "loss": 0.7064, "step": 13087 }, { "epoch": 0.4011278656368763, "grad_norm": 1.705924952788557, "learning_rate": 6.802546189013886e-06, "loss": 0.69, "step": 13088 }, { "epoch": 0.4011585141596175, "grad_norm": 2.0739031807771573, "learning_rate": 6.802083236869692e-06, "loss": 0.6466, "step": 13089 }, { "epoch": 0.4011891626823587, "grad_norm": 1.7622407756291683, "learning_rate": 6.801620266969113e-06, "loss": 0.6361, "step": 13090 }, { "epoch": 0.4012198112050999, "grad_norm": 1.93865344599298, "learning_rate": 6.801157279316708e-06, "loss": 0.6636, "step": 13091 }, { "epoch": 0.4012504597278411, "grad_norm": 1.6307792140679065, "learning_rate": 6.800694273917041e-06, "loss": 0.6347, "step": 13092 }, { "epoch": 0.40128110825058233, "grad_norm": 1.6534445050757947, "learning_rate": 6.80023125077467e-06, "loss": 0.6221, "step": 13093 }, { "epoch": 0.40131175677332354, "grad_norm": 1.7444013634907214, "learning_rate": 6.799768209894162e-06, "loss": 0.733, "step": 13094 }, { "epoch": 0.40134240529606474, "grad_norm": 1.7054061761188941, "learning_rate": 6.799305151280076e-06, "loss": 0.6809, "step": 13095 }, { "epoch": 0.40137305381880595, "grad_norm": 1.644183950157889, "learning_rate": 6.798842074936978e-06, "loss": 0.6035, "step": 13096 }, { "epoch": 0.40140370234154715, "grad_norm": 1.7643164503674456, "learning_rate": 6.7983789808694255e-06, "loss": 0.6237, "step": 13097 }, { "epoch": 0.40143435086428836, "grad_norm": 1.625295511252115, "learning_rate": 6.7979158690819865e-06, "loss": 0.6582, "step": 13098 }, { "epoch": 0.40146499938702956, "grad_norm": 1.8284504102528012, "learning_rate": 6.797452739579223e-06, "loss": 0.6778, "step": 13099 }, { "epoch": 0.40149564790977077, "grad_norm": 0.829551060694743, "learning_rate": 6.796989592365697e-06, "loss": 0.435, "step": 13100 }, { "epoch": 0.401526296432512, "grad_norm": 1.6147139135245956, "learning_rate": 6.796526427445973e-06, "loss": 0.663, "step": 13101 }, { "epoch": 0.4015569449552532, "grad_norm": 1.8337047680716523, "learning_rate": 6.796063244824613e-06, "loss": 0.737, "step": 13102 }, { "epoch": 0.4015875934779944, "grad_norm": 4.6833788445246585, "learning_rate": 6.7956000445061856e-06, "loss": 0.808, "step": 13103 }, { "epoch": 0.40161824200073554, "grad_norm": 1.592626984558874, "learning_rate": 6.795136826495249e-06, "loss": 0.6031, "step": 13104 }, { "epoch": 0.40164889052347674, "grad_norm": 2.045579328424955, "learning_rate": 6.7946735907963715e-06, "loss": 0.7007, "step": 13105 }, { "epoch": 0.40167953904621795, "grad_norm": 1.6891985683920399, "learning_rate": 6.794210337414113e-06, "loss": 0.6705, "step": 13106 }, { "epoch": 0.40171018756895915, "grad_norm": 1.689439293100246, "learning_rate": 6.793747066353044e-06, "loss": 0.6739, "step": 13107 }, { "epoch": 0.40174083609170036, "grad_norm": 1.5683335479028904, "learning_rate": 6.793283777617725e-06, "loss": 0.6942, "step": 13108 }, { "epoch": 0.40177148461444157, "grad_norm": 1.8412092691209057, "learning_rate": 6.792820471212724e-06, "loss": 0.6969, "step": 13109 }, { "epoch": 0.40180213313718277, "grad_norm": 1.5530699679550541, "learning_rate": 6.792357147142601e-06, "loss": 0.6588, "step": 13110 }, { "epoch": 0.401832781659924, "grad_norm": 1.8641676545719184, "learning_rate": 6.791893805411928e-06, "loss": 0.7133, "step": 13111 }, { "epoch": 0.4018634301826652, "grad_norm": 1.5491521358872868, "learning_rate": 6.791430446025263e-06, "loss": 0.5829, "step": 13112 }, { "epoch": 0.4018940787054064, "grad_norm": 1.83865038318167, "learning_rate": 6.790967068987177e-06, "loss": 0.7299, "step": 13113 }, { "epoch": 0.4019247272281476, "grad_norm": 1.7913210087438602, "learning_rate": 6.790503674302235e-06, "loss": 0.6473, "step": 13114 }, { "epoch": 0.4019553757508888, "grad_norm": 0.8190066207272246, "learning_rate": 6.7900402619750015e-06, "loss": 0.4599, "step": 13115 }, { "epoch": 0.40198602427363, "grad_norm": 1.8293188822195419, "learning_rate": 6.789576832010044e-06, "loss": 0.6249, "step": 13116 }, { "epoch": 0.4020166727963712, "grad_norm": 1.7285397382819787, "learning_rate": 6.7891133844119276e-06, "loss": 0.661, "step": 13117 }, { "epoch": 0.4020473213191124, "grad_norm": 0.7798714413558965, "learning_rate": 6.788649919185218e-06, "loss": 0.46, "step": 13118 }, { "epoch": 0.4020779698418536, "grad_norm": 0.8039827744433375, "learning_rate": 6.788186436334485e-06, "loss": 0.4637, "step": 13119 }, { "epoch": 0.4021086183645948, "grad_norm": 1.6603393091452212, "learning_rate": 6.787722935864294e-06, "loss": 0.645, "step": 13120 }, { "epoch": 0.40213926688733603, "grad_norm": 1.6365932180094496, "learning_rate": 6.787259417779209e-06, "loss": 0.691, "step": 13121 }, { "epoch": 0.40216991541007724, "grad_norm": 1.5383970535984646, "learning_rate": 6.786795882083801e-06, "loss": 0.641, "step": 13122 }, { "epoch": 0.40220056393281844, "grad_norm": 0.7593807570276212, "learning_rate": 6.7863323287826365e-06, "loss": 0.4508, "step": 13123 }, { "epoch": 0.40223121245555965, "grad_norm": 0.735686814293279, "learning_rate": 6.785868757880283e-06, "loss": 0.4497, "step": 13124 }, { "epoch": 0.40226186097830086, "grad_norm": 0.758904134447392, "learning_rate": 6.785405169381305e-06, "loss": 0.4342, "step": 13125 }, { "epoch": 0.40229250950104206, "grad_norm": 1.64565177456025, "learning_rate": 6.784941563290276e-06, "loss": 0.6369, "step": 13126 }, { "epoch": 0.40232315802378327, "grad_norm": 1.641681436536184, "learning_rate": 6.78447793961176e-06, "loss": 0.6975, "step": 13127 }, { "epoch": 0.4023538065465245, "grad_norm": 1.5894080044280543, "learning_rate": 6.784014298350326e-06, "loss": 0.681, "step": 13128 }, { "epoch": 0.4023844550692657, "grad_norm": 1.7157154714801512, "learning_rate": 6.783550639510542e-06, "loss": 0.7338, "step": 13129 }, { "epoch": 0.4024151035920069, "grad_norm": 0.7708733596407032, "learning_rate": 6.783086963096979e-06, "loss": 0.444, "step": 13130 }, { "epoch": 0.4024457521147481, "grad_norm": 1.7941201196028298, "learning_rate": 6.782623269114203e-06, "loss": 0.6424, "step": 13131 }, { "epoch": 0.4024764006374893, "grad_norm": 1.572758104961907, "learning_rate": 6.782159557566783e-06, "loss": 0.6229, "step": 13132 }, { "epoch": 0.4025070491602305, "grad_norm": 1.8099694650369627, "learning_rate": 6.7816958284592896e-06, "loss": 0.6889, "step": 13133 }, { "epoch": 0.4025376976829717, "grad_norm": 1.4951712363101872, "learning_rate": 6.781232081796292e-06, "loss": 0.6716, "step": 13134 }, { "epoch": 0.40256834620571286, "grad_norm": 1.8312235381473265, "learning_rate": 6.780768317582358e-06, "loss": 0.6916, "step": 13135 }, { "epoch": 0.40259899472845406, "grad_norm": 1.5987162893760773, "learning_rate": 6.7803045358220575e-06, "loss": 0.6676, "step": 13136 }, { "epoch": 0.40262964325119527, "grad_norm": 0.8560043220228325, "learning_rate": 6.7798407365199624e-06, "loss": 0.4459, "step": 13137 }, { "epoch": 0.4026602917739365, "grad_norm": 1.7950719842206233, "learning_rate": 6.7793769196806414e-06, "loss": 0.6415, "step": 13138 }, { "epoch": 0.4026909402966777, "grad_norm": 0.7849427939497611, "learning_rate": 6.778913085308663e-06, "loss": 0.4565, "step": 13139 }, { "epoch": 0.4027215888194189, "grad_norm": 1.6296121327080832, "learning_rate": 6.7784492334086e-06, "loss": 0.5987, "step": 13140 }, { "epoch": 0.4027522373421601, "grad_norm": 1.815884723375786, "learning_rate": 6.77798536398502e-06, "loss": 0.7236, "step": 13141 }, { "epoch": 0.4027828858649013, "grad_norm": 0.8080296428760723, "learning_rate": 6.777521477042497e-06, "loss": 0.4738, "step": 13142 }, { "epoch": 0.4028135343876425, "grad_norm": 1.5696520957239188, "learning_rate": 6.777057572585599e-06, "loss": 0.5579, "step": 13143 }, { "epoch": 0.4028441829103837, "grad_norm": 1.749586819900728, "learning_rate": 6.776593650618899e-06, "loss": 0.755, "step": 13144 }, { "epoch": 0.4028748314331249, "grad_norm": 1.5401494674590945, "learning_rate": 6.776129711146966e-06, "loss": 0.6903, "step": 13145 }, { "epoch": 0.4029054799558661, "grad_norm": 1.8861435042468153, "learning_rate": 6.775665754174374e-06, "loss": 0.7099, "step": 13146 }, { "epoch": 0.4029361284786073, "grad_norm": 1.8078953529255786, "learning_rate": 6.775201779705692e-06, "loss": 0.7252, "step": 13147 }, { "epoch": 0.40296677700134853, "grad_norm": 1.79411448428115, "learning_rate": 6.774737787745492e-06, "loss": 0.6372, "step": 13148 }, { "epoch": 0.40299742552408974, "grad_norm": 1.6989162063043743, "learning_rate": 6.774273778298347e-06, "loss": 0.7538, "step": 13149 }, { "epoch": 0.40302807404683094, "grad_norm": 1.699374739139116, "learning_rate": 6.773809751368831e-06, "loss": 0.6303, "step": 13150 }, { "epoch": 0.40305872256957215, "grad_norm": 0.8459699339891429, "learning_rate": 6.773345706961509e-06, "loss": 0.4462, "step": 13151 }, { "epoch": 0.40308937109231335, "grad_norm": 1.5647768197370078, "learning_rate": 6.772881645080962e-06, "loss": 0.6008, "step": 13152 }, { "epoch": 0.40312001961505456, "grad_norm": 1.6400122376607578, "learning_rate": 6.772417565731756e-06, "loss": 0.7365, "step": 13153 }, { "epoch": 0.40315066813779576, "grad_norm": 1.58254744019157, "learning_rate": 6.771953468918467e-06, "loss": 0.586, "step": 13154 }, { "epoch": 0.40318131666053697, "grad_norm": 1.6146232069751103, "learning_rate": 6.771489354645668e-06, "loss": 0.6608, "step": 13155 }, { "epoch": 0.4032119651832782, "grad_norm": 1.4259223224525759, "learning_rate": 6.771025222917931e-06, "loss": 0.6365, "step": 13156 }, { "epoch": 0.4032426137060194, "grad_norm": 1.613833307833611, "learning_rate": 6.77056107373983e-06, "loss": 0.6949, "step": 13157 }, { "epoch": 0.4032732622287606, "grad_norm": 1.798732933766527, "learning_rate": 6.770096907115935e-06, "loss": 0.685, "step": 13158 }, { "epoch": 0.4033039107515018, "grad_norm": 1.8086699520824714, "learning_rate": 6.769632723050824e-06, "loss": 0.6857, "step": 13159 }, { "epoch": 0.403334559274243, "grad_norm": 1.8700447689767576, "learning_rate": 6.769168521549069e-06, "loss": 0.6899, "step": 13160 }, { "epoch": 0.4033652077969842, "grad_norm": 1.678404272810012, "learning_rate": 6.768704302615245e-06, "loss": 0.6502, "step": 13161 }, { "epoch": 0.4033958563197254, "grad_norm": 1.7549577277321664, "learning_rate": 6.768240066253923e-06, "loss": 0.6623, "step": 13162 }, { "epoch": 0.4034265048424666, "grad_norm": 2.562266333354294, "learning_rate": 6.767775812469679e-06, "loss": 0.8049, "step": 13163 }, { "epoch": 0.4034571533652078, "grad_norm": 1.6793618494775373, "learning_rate": 6.767311541267089e-06, "loss": 0.6975, "step": 13164 }, { "epoch": 0.403487801887949, "grad_norm": 1.691178994326731, "learning_rate": 6.766847252650726e-06, "loss": 0.7346, "step": 13165 }, { "epoch": 0.4035184504106902, "grad_norm": 0.7964964884897435, "learning_rate": 6.766382946625164e-06, "loss": 0.4487, "step": 13166 }, { "epoch": 0.4035490989334314, "grad_norm": 1.7257499499499565, "learning_rate": 6.76591862319498e-06, "loss": 0.6475, "step": 13167 }, { "epoch": 0.4035797474561726, "grad_norm": 0.7976638258888686, "learning_rate": 6.7654542823647475e-06, "loss": 0.4866, "step": 13168 }, { "epoch": 0.4036103959789138, "grad_norm": 1.6137274371943824, "learning_rate": 6.764989924139043e-06, "loss": 0.5307, "step": 13169 }, { "epoch": 0.403641044501655, "grad_norm": 1.6070678736961206, "learning_rate": 6.764525548522441e-06, "loss": 0.653, "step": 13170 }, { "epoch": 0.4036716930243962, "grad_norm": 1.714281447573636, "learning_rate": 6.764061155519515e-06, "loss": 0.6073, "step": 13171 }, { "epoch": 0.4037023415471374, "grad_norm": 1.8238785441235241, "learning_rate": 6.763596745134845e-06, "loss": 0.6712, "step": 13172 }, { "epoch": 0.4037329900698786, "grad_norm": 1.781152143535151, "learning_rate": 6.763132317373004e-06, "loss": 0.6888, "step": 13173 }, { "epoch": 0.4037636385926198, "grad_norm": 1.6446167397508558, "learning_rate": 6.762667872238572e-06, "loss": 0.5877, "step": 13174 }, { "epoch": 0.40379428711536103, "grad_norm": 1.835773836509297, "learning_rate": 6.762203409736119e-06, "loss": 0.705, "step": 13175 }, { "epoch": 0.40382493563810223, "grad_norm": 1.6349288265352901, "learning_rate": 6.761738929870227e-06, "loss": 0.5993, "step": 13176 }, { "epoch": 0.40385558416084344, "grad_norm": 1.7650793542408012, "learning_rate": 6.761274432645471e-06, "loss": 0.6581, "step": 13177 }, { "epoch": 0.40388623268358465, "grad_norm": 0.8651740882776284, "learning_rate": 6.7608099180664255e-06, "loss": 0.4644, "step": 13178 }, { "epoch": 0.40391688120632585, "grad_norm": 1.875813814045223, "learning_rate": 6.76034538613767e-06, "loss": 0.6612, "step": 13179 }, { "epoch": 0.40394752972906706, "grad_norm": 1.741546335721029, "learning_rate": 6.759880836863781e-06, "loss": 0.6972, "step": 13180 }, { "epoch": 0.40397817825180826, "grad_norm": 1.8095974986419052, "learning_rate": 6.759416270249337e-06, "loss": 0.7, "step": 13181 }, { "epoch": 0.40400882677454947, "grad_norm": 1.6264604561359646, "learning_rate": 6.758951686298913e-06, "loss": 0.6519, "step": 13182 }, { "epoch": 0.4040394752972907, "grad_norm": 1.689305225460867, "learning_rate": 6.758487085017088e-06, "loss": 0.6279, "step": 13183 }, { "epoch": 0.4040701238200319, "grad_norm": 1.76748003915936, "learning_rate": 6.7580224664084405e-06, "loss": 0.6663, "step": 13184 }, { "epoch": 0.4041007723427731, "grad_norm": 1.6885582776202277, "learning_rate": 6.757557830477548e-06, "loss": 0.671, "step": 13185 }, { "epoch": 0.4041314208655143, "grad_norm": 1.7762058028965713, "learning_rate": 6.757093177228987e-06, "loss": 0.711, "step": 13186 }, { "epoch": 0.4041620693882555, "grad_norm": 1.896831888335628, "learning_rate": 6.756628506667339e-06, "loss": 0.7011, "step": 13187 }, { "epoch": 0.4041927179109967, "grad_norm": 1.7738965638981554, "learning_rate": 6.7561638187971804e-06, "loss": 0.7024, "step": 13188 }, { "epoch": 0.4042233664337379, "grad_norm": 1.6424380991651415, "learning_rate": 6.755699113623091e-06, "loss": 0.6301, "step": 13189 }, { "epoch": 0.4042540149564791, "grad_norm": 1.771734625431502, "learning_rate": 6.755234391149646e-06, "loss": 0.6966, "step": 13190 }, { "epoch": 0.4042846634792203, "grad_norm": 1.6937053844934418, "learning_rate": 6.754769651381431e-06, "loss": 0.6297, "step": 13191 }, { "epoch": 0.4043153120019615, "grad_norm": 1.8480189774034506, "learning_rate": 6.75430489432302e-06, "loss": 0.6717, "step": 13192 }, { "epoch": 0.40434596052470273, "grad_norm": 1.69339640517167, "learning_rate": 6.753840119978995e-06, "loss": 0.7144, "step": 13193 }, { "epoch": 0.40437660904744394, "grad_norm": 1.673490915801232, "learning_rate": 6.753375328353933e-06, "loss": 0.622, "step": 13194 }, { "epoch": 0.40440725757018514, "grad_norm": 2.9489384982161377, "learning_rate": 6.752910519452417e-06, "loss": 0.7755, "step": 13195 }, { "epoch": 0.40443790609292635, "grad_norm": 1.7096252381833774, "learning_rate": 6.752445693279024e-06, "loss": 0.7465, "step": 13196 }, { "epoch": 0.4044685546156675, "grad_norm": 1.7306979985950695, "learning_rate": 6.751980849838336e-06, "loss": 0.6955, "step": 13197 }, { "epoch": 0.4044992031384087, "grad_norm": 1.6295103219005374, "learning_rate": 6.7515159891349314e-06, "loss": 0.6099, "step": 13198 }, { "epoch": 0.4045298516611499, "grad_norm": 1.6475262035792766, "learning_rate": 6.751051111173391e-06, "loss": 0.6558, "step": 13199 }, { "epoch": 0.4045605001838911, "grad_norm": 1.8604257555642811, "learning_rate": 6.750586215958299e-06, "loss": 0.6342, "step": 13200 }, { "epoch": 0.4045911487066323, "grad_norm": 1.7803700258392503, "learning_rate": 6.75012130349423e-06, "loss": 0.6429, "step": 13201 }, { "epoch": 0.4046217972293735, "grad_norm": 1.6624581099503128, "learning_rate": 6.749656373785769e-06, "loss": 0.727, "step": 13202 }, { "epoch": 0.40465244575211473, "grad_norm": 1.74700233123295, "learning_rate": 6.749191426837496e-06, "loss": 0.5991, "step": 13203 }, { "epoch": 0.40468309427485594, "grad_norm": 1.5371437827786494, "learning_rate": 6.748726462653994e-06, "loss": 0.5929, "step": 13204 }, { "epoch": 0.40471374279759714, "grad_norm": 1.7080720253278485, "learning_rate": 6.7482614812398405e-06, "loss": 0.6575, "step": 13205 }, { "epoch": 0.40474439132033835, "grad_norm": 1.6946267386091358, "learning_rate": 6.747796482599621e-06, "loss": 0.5942, "step": 13206 }, { "epoch": 0.40477503984307955, "grad_norm": 1.6633224402478164, "learning_rate": 6.747331466737914e-06, "loss": 0.6238, "step": 13207 }, { "epoch": 0.40480568836582076, "grad_norm": 1.9141086501776934, "learning_rate": 6.7468664336593044e-06, "loss": 0.5826, "step": 13208 }, { "epoch": 0.40483633688856197, "grad_norm": 1.5322386515913176, "learning_rate": 6.746401383368372e-06, "loss": 0.6598, "step": 13209 }, { "epoch": 0.40486698541130317, "grad_norm": 1.8555807665405595, "learning_rate": 6.7459363158697e-06, "loss": 0.6768, "step": 13210 }, { "epoch": 0.4048976339340444, "grad_norm": 1.774881247581876, "learning_rate": 6.745471231167871e-06, "loss": 0.6185, "step": 13211 }, { "epoch": 0.4049282824567856, "grad_norm": 1.736072411390974, "learning_rate": 6.745006129267467e-06, "loss": 0.6988, "step": 13212 }, { "epoch": 0.4049589309795268, "grad_norm": 0.830401998513927, "learning_rate": 6.7445410101730716e-06, "loss": 0.4602, "step": 13213 }, { "epoch": 0.404989579502268, "grad_norm": 1.7776702801393338, "learning_rate": 6.744075873889266e-06, "loss": 0.7298, "step": 13214 }, { "epoch": 0.4050202280250092, "grad_norm": 1.8470321807894774, "learning_rate": 6.743610720420637e-06, "loss": 0.7337, "step": 13215 }, { "epoch": 0.4050508765477504, "grad_norm": 0.8237139623372283, "learning_rate": 6.743145549771764e-06, "loss": 0.4525, "step": 13216 }, { "epoch": 0.4050815250704916, "grad_norm": 1.716135344914798, "learning_rate": 6.742680361947231e-06, "loss": 0.5915, "step": 13217 }, { "epoch": 0.4051121735932328, "grad_norm": 1.5927954734056855, "learning_rate": 6.742215156951624e-06, "loss": 0.5721, "step": 13218 }, { "epoch": 0.405142822115974, "grad_norm": 1.636897527119504, "learning_rate": 6.741749934789526e-06, "loss": 0.6862, "step": 13219 }, { "epoch": 0.4051734706387152, "grad_norm": 1.8074627555401357, "learning_rate": 6.741284695465518e-06, "loss": 0.6819, "step": 13220 }, { "epoch": 0.40520411916145643, "grad_norm": 1.5494396101077286, "learning_rate": 6.740819438984187e-06, "loss": 0.6368, "step": 13221 }, { "epoch": 0.40523476768419764, "grad_norm": 1.5472731766736207, "learning_rate": 6.740354165350117e-06, "loss": 0.696, "step": 13222 }, { "epoch": 0.40526541620693884, "grad_norm": 1.8802356318542894, "learning_rate": 6.739888874567893e-06, "loss": 0.6271, "step": 13223 }, { "epoch": 0.40529606472968005, "grad_norm": 1.7150214307183482, "learning_rate": 6.739423566642098e-06, "loss": 0.6865, "step": 13224 }, { "epoch": 0.40532671325242126, "grad_norm": 1.6289742546283776, "learning_rate": 6.738958241577317e-06, "loss": 0.6508, "step": 13225 }, { "epoch": 0.40535736177516246, "grad_norm": 1.7653505874219306, "learning_rate": 6.738492899378136e-06, "loss": 0.739, "step": 13226 }, { "epoch": 0.40538801029790367, "grad_norm": 1.7177106831214344, "learning_rate": 6.73802754004914e-06, "loss": 0.6256, "step": 13227 }, { "epoch": 0.4054186588206448, "grad_norm": 1.6971362424666112, "learning_rate": 6.737562163594914e-06, "loss": 0.6256, "step": 13228 }, { "epoch": 0.405449307343386, "grad_norm": 1.6571681374943599, "learning_rate": 6.737096770020042e-06, "loss": 0.5756, "step": 13229 }, { "epoch": 0.40547995586612723, "grad_norm": 2.038373669216938, "learning_rate": 6.736631359329112e-06, "loss": 0.656, "step": 13230 }, { "epoch": 0.40551060438886843, "grad_norm": 1.7044837670079231, "learning_rate": 6.736165931526711e-06, "loss": 0.7307, "step": 13231 }, { "epoch": 0.40554125291160964, "grad_norm": 0.994354832580736, "learning_rate": 6.73570048661742e-06, "loss": 0.456, "step": 13232 }, { "epoch": 0.40557190143435085, "grad_norm": 0.9427874947383883, "learning_rate": 6.735235024605829e-06, "loss": 0.4503, "step": 13233 }, { "epoch": 0.40560254995709205, "grad_norm": 1.6326378956048944, "learning_rate": 6.734769545496523e-06, "loss": 0.6331, "step": 13234 }, { "epoch": 0.40563319847983326, "grad_norm": 1.8453447531332592, "learning_rate": 6.734304049294089e-06, "loss": 0.6986, "step": 13235 }, { "epoch": 0.40566384700257446, "grad_norm": 1.6597864081505733, "learning_rate": 6.7338385360031135e-06, "loss": 0.6491, "step": 13236 }, { "epoch": 0.40569449552531567, "grad_norm": 1.551932637850828, "learning_rate": 6.7333730056281825e-06, "loss": 0.5729, "step": 13237 }, { "epoch": 0.4057251440480569, "grad_norm": 1.5436735959277748, "learning_rate": 6.732907458173885e-06, "loss": 0.5961, "step": 13238 }, { "epoch": 0.4057557925707981, "grad_norm": 1.8305925911435372, "learning_rate": 6.732441893644807e-06, "loss": 0.6785, "step": 13239 }, { "epoch": 0.4057864410935393, "grad_norm": 1.88844857485183, "learning_rate": 6.731976312045534e-06, "loss": 0.7326, "step": 13240 }, { "epoch": 0.4058170896162805, "grad_norm": 1.853968074218349, "learning_rate": 6.731510713380657e-06, "loss": 0.6694, "step": 13241 }, { "epoch": 0.4058477381390217, "grad_norm": 1.7415013553351997, "learning_rate": 6.7310450976547616e-06, "loss": 0.7114, "step": 13242 }, { "epoch": 0.4058783866617629, "grad_norm": 1.9991512378098288, "learning_rate": 6.730579464872435e-06, "loss": 0.6876, "step": 13243 }, { "epoch": 0.4059090351845041, "grad_norm": 1.6975590111502834, "learning_rate": 6.730113815038266e-06, "loss": 0.6899, "step": 13244 }, { "epoch": 0.4059396837072453, "grad_norm": 1.3492046558045119, "learning_rate": 6.729648148156844e-06, "loss": 0.534, "step": 13245 }, { "epoch": 0.4059703322299865, "grad_norm": 1.5097359607744514, "learning_rate": 6.729182464232758e-06, "loss": 0.5781, "step": 13246 }, { "epoch": 0.4060009807527277, "grad_norm": 1.3553420887476095, "learning_rate": 6.728716763270592e-06, "loss": 0.4732, "step": 13247 }, { "epoch": 0.40603162927546893, "grad_norm": 1.8152170366583527, "learning_rate": 6.728251045274937e-06, "loss": 0.6905, "step": 13248 }, { "epoch": 0.40606227779821014, "grad_norm": 1.0103834238422302, "learning_rate": 6.727785310250384e-06, "loss": 0.4488, "step": 13249 }, { "epoch": 0.40609292632095134, "grad_norm": 1.5608268889916268, "learning_rate": 6.72731955820152e-06, "loss": 0.6514, "step": 13250 }, { "epoch": 0.40612357484369255, "grad_norm": 1.6795972782383173, "learning_rate": 6.726853789132933e-06, "loss": 0.6513, "step": 13251 }, { "epoch": 0.40615422336643375, "grad_norm": 1.7275362279879782, "learning_rate": 6.7263880030492155e-06, "loss": 0.6885, "step": 13252 }, { "epoch": 0.40618487188917496, "grad_norm": 1.665918261825264, "learning_rate": 6.725922199954955e-06, "loss": 0.5594, "step": 13253 }, { "epoch": 0.40621552041191616, "grad_norm": 1.9120571405685935, "learning_rate": 6.725456379854742e-06, "loss": 0.7326, "step": 13254 }, { "epoch": 0.40624616893465737, "grad_norm": 1.7632051144024008, "learning_rate": 6.724990542753164e-06, "loss": 0.6606, "step": 13255 }, { "epoch": 0.4062768174573986, "grad_norm": 1.725045431740236, "learning_rate": 6.724524688654814e-06, "loss": 0.6387, "step": 13256 }, { "epoch": 0.4063074659801398, "grad_norm": 1.4129013698146562, "learning_rate": 6.72405881756428e-06, "loss": 0.4719, "step": 13257 }, { "epoch": 0.406338114502881, "grad_norm": 1.2184057838643823, "learning_rate": 6.723592929486156e-06, "loss": 0.463, "step": 13258 }, { "epoch": 0.4063687630256222, "grad_norm": 1.7792447988990798, "learning_rate": 6.7231270244250266e-06, "loss": 0.6691, "step": 13259 }, { "epoch": 0.40639941154836334, "grad_norm": 1.6574595011336155, "learning_rate": 6.722661102385488e-06, "loss": 0.7098, "step": 13260 }, { "epoch": 0.40643006007110455, "grad_norm": 0.7785400297523631, "learning_rate": 6.722195163372128e-06, "loss": 0.4517, "step": 13261 }, { "epoch": 0.40646070859384575, "grad_norm": 1.503989456924655, "learning_rate": 6.721729207389538e-06, "loss": 0.6441, "step": 13262 }, { "epoch": 0.40649135711658696, "grad_norm": 1.730674627979536, "learning_rate": 6.72126323444231e-06, "loss": 0.6062, "step": 13263 }, { "epoch": 0.40652200563932817, "grad_norm": 1.8076549010874021, "learning_rate": 6.720797244535036e-06, "loss": 0.627, "step": 13264 }, { "epoch": 0.40655265416206937, "grad_norm": 1.5826335144465857, "learning_rate": 6.720331237672305e-06, "loss": 0.6578, "step": 13265 }, { "epoch": 0.4065833026848106, "grad_norm": 1.7971890615222825, "learning_rate": 6.71986521385871e-06, "loss": 0.7266, "step": 13266 }, { "epoch": 0.4066139512075518, "grad_norm": 2.7683575574969095, "learning_rate": 6.7193991730988435e-06, "loss": 0.7347, "step": 13267 }, { "epoch": 0.406644599730293, "grad_norm": 1.923298189849223, "learning_rate": 6.718933115397296e-06, "loss": 0.6968, "step": 13268 }, { "epoch": 0.4066752482530342, "grad_norm": 1.6544954307951867, "learning_rate": 6.718467040758663e-06, "loss": 0.6111, "step": 13269 }, { "epoch": 0.4067058967757754, "grad_norm": 1.6869069538489052, "learning_rate": 6.718000949187533e-06, "loss": 0.6744, "step": 13270 }, { "epoch": 0.4067365452985166, "grad_norm": 1.6823221251174425, "learning_rate": 6.7175348406884995e-06, "loss": 0.7072, "step": 13271 }, { "epoch": 0.4067671938212578, "grad_norm": 1.8011176177314978, "learning_rate": 6.717068715266157e-06, "loss": 0.6736, "step": 13272 }, { "epoch": 0.406797842343999, "grad_norm": 1.6517294576428507, "learning_rate": 6.716602572925099e-06, "loss": 0.635, "step": 13273 }, { "epoch": 0.4068284908667402, "grad_norm": 1.5683946182990909, "learning_rate": 6.716136413669912e-06, "loss": 0.7026, "step": 13274 }, { "epoch": 0.40685913938948143, "grad_norm": 1.6665598637559491, "learning_rate": 6.715670237505198e-06, "loss": 0.6461, "step": 13275 }, { "epoch": 0.40688978791222263, "grad_norm": 1.582627102556214, "learning_rate": 6.715204044435543e-06, "loss": 0.6724, "step": 13276 }, { "epoch": 0.40692043643496384, "grad_norm": 1.8179601738261164, "learning_rate": 6.7147378344655455e-06, "loss": 0.6879, "step": 13277 }, { "epoch": 0.40695108495770504, "grad_norm": 1.781269672237062, "learning_rate": 6.714271607599797e-06, "loss": 0.7231, "step": 13278 }, { "epoch": 0.40698173348044625, "grad_norm": 1.760709623789008, "learning_rate": 6.713805363842893e-06, "loss": 0.64, "step": 13279 }, { "epoch": 0.40701238200318746, "grad_norm": 1.8732699407350653, "learning_rate": 6.7133391031994236e-06, "loss": 0.7258, "step": 13280 }, { "epoch": 0.40704303052592866, "grad_norm": 1.8451849137302134, "learning_rate": 6.712872825673987e-06, "loss": 0.7222, "step": 13281 }, { "epoch": 0.40707367904866987, "grad_norm": 1.55621959146622, "learning_rate": 6.712406531271176e-06, "loss": 0.5977, "step": 13282 }, { "epoch": 0.4071043275714111, "grad_norm": 1.7484437945758584, "learning_rate": 6.711940219995585e-06, "loss": 0.6692, "step": 13283 }, { "epoch": 0.4071349760941523, "grad_norm": 1.4921399405065698, "learning_rate": 6.711473891851812e-06, "loss": 0.4624, "step": 13284 }, { "epoch": 0.4071656246168935, "grad_norm": 1.7792139198610648, "learning_rate": 6.711007546844444e-06, "loss": 0.6894, "step": 13285 }, { "epoch": 0.4071962731396347, "grad_norm": 1.4689017664701007, "learning_rate": 6.710541184978084e-06, "loss": 0.5964, "step": 13286 }, { "epoch": 0.4072269216623759, "grad_norm": 1.9057443931632077, "learning_rate": 6.7100748062573225e-06, "loss": 0.6113, "step": 13287 }, { "epoch": 0.4072575701851171, "grad_norm": 2.075697691948813, "learning_rate": 6.709608410686759e-06, "loss": 0.7004, "step": 13288 }, { "epoch": 0.4072882187078583, "grad_norm": 1.6155958023286499, "learning_rate": 6.7091419982709836e-06, "loss": 0.5908, "step": 13289 }, { "epoch": 0.4073188672305995, "grad_norm": 1.8015312826913084, "learning_rate": 6.7086755690145965e-06, "loss": 0.5726, "step": 13290 }, { "epoch": 0.40734951575334066, "grad_norm": 1.4944458253149349, "learning_rate": 6.7082091229221904e-06, "loss": 0.6149, "step": 13291 }, { "epoch": 0.40738016427608187, "grad_norm": 1.7695670064103626, "learning_rate": 6.707742659998364e-06, "loss": 0.7767, "step": 13292 }, { "epoch": 0.4074108127988231, "grad_norm": 1.7510662665325325, "learning_rate": 6.707276180247712e-06, "loss": 0.6399, "step": 13293 }, { "epoch": 0.4074414613215643, "grad_norm": 1.6029192856903507, "learning_rate": 6.706809683674829e-06, "loss": 0.6909, "step": 13294 }, { "epoch": 0.4074721098443055, "grad_norm": 0.9373124174270224, "learning_rate": 6.706343170284315e-06, "loss": 0.449, "step": 13295 }, { "epoch": 0.4075027583670467, "grad_norm": 1.628812811804074, "learning_rate": 6.705876640080766e-06, "loss": 0.7653, "step": 13296 }, { "epoch": 0.4075334068897879, "grad_norm": 1.4715687196462586, "learning_rate": 6.7054100930687785e-06, "loss": 0.6396, "step": 13297 }, { "epoch": 0.4075640554125291, "grad_norm": 1.9167074603516827, "learning_rate": 6.704943529252947e-06, "loss": 0.6834, "step": 13298 }, { "epoch": 0.4075947039352703, "grad_norm": 1.730822283211162, "learning_rate": 6.7044769486378715e-06, "loss": 0.7104, "step": 13299 }, { "epoch": 0.4076253524580115, "grad_norm": 1.64433090491783, "learning_rate": 6.704010351228149e-06, "loss": 0.671, "step": 13300 }, { "epoch": 0.4076560009807527, "grad_norm": 1.60309506977877, "learning_rate": 6.703543737028375e-06, "loss": 0.7081, "step": 13301 }, { "epoch": 0.4076866495034939, "grad_norm": 0.822272349268506, "learning_rate": 6.7030771060431495e-06, "loss": 0.4489, "step": 13302 }, { "epoch": 0.40771729802623513, "grad_norm": 2.19526180366706, "learning_rate": 6.70261045827707e-06, "loss": 0.6416, "step": 13303 }, { "epoch": 0.40774794654897634, "grad_norm": 2.013769511179118, "learning_rate": 6.702143793734735e-06, "loss": 0.7032, "step": 13304 }, { "epoch": 0.40777859507171754, "grad_norm": 1.5756063003135987, "learning_rate": 6.7016771124207404e-06, "loss": 0.6859, "step": 13305 }, { "epoch": 0.40780924359445875, "grad_norm": 1.7443663582032305, "learning_rate": 6.701210414339685e-06, "loss": 0.5983, "step": 13306 }, { "epoch": 0.40783989211719995, "grad_norm": 1.7673265695410711, "learning_rate": 6.7007436994961685e-06, "loss": 0.5807, "step": 13307 }, { "epoch": 0.40787054063994116, "grad_norm": 1.6976696379286855, "learning_rate": 6.7002769678947895e-06, "loss": 0.667, "step": 13308 }, { "epoch": 0.40790118916268236, "grad_norm": 1.650074543781794, "learning_rate": 6.699810219540146e-06, "loss": 0.5925, "step": 13309 }, { "epoch": 0.40793183768542357, "grad_norm": 2.0100216354819977, "learning_rate": 6.699343454436839e-06, "loss": 0.6394, "step": 13310 }, { "epoch": 0.4079624862081648, "grad_norm": 1.5934403512736859, "learning_rate": 6.698876672589465e-06, "loss": 0.6566, "step": 13311 }, { "epoch": 0.407993134730906, "grad_norm": 1.6710360906043842, "learning_rate": 6.698409874002626e-06, "loss": 0.5786, "step": 13312 }, { "epoch": 0.4080237832536472, "grad_norm": 1.5752933026190201, "learning_rate": 6.697943058680918e-06, "loss": 0.7309, "step": 13313 }, { "epoch": 0.4080544317763884, "grad_norm": 1.6947110100635148, "learning_rate": 6.697476226628943e-06, "loss": 0.6471, "step": 13314 }, { "epoch": 0.4080850802991296, "grad_norm": 1.9619992308922232, "learning_rate": 6.697009377851301e-06, "loss": 0.6981, "step": 13315 }, { "epoch": 0.4081157288218708, "grad_norm": 2.027486457169507, "learning_rate": 6.696542512352592e-06, "loss": 0.6672, "step": 13316 }, { "epoch": 0.408146377344612, "grad_norm": 1.667689243042061, "learning_rate": 6.696075630137413e-06, "loss": 0.6841, "step": 13317 }, { "epoch": 0.4081770258673532, "grad_norm": 1.8061329473791545, "learning_rate": 6.6956087312103694e-06, "loss": 0.7027, "step": 13318 }, { "epoch": 0.4082076743900944, "grad_norm": 0.8309860065904496, "learning_rate": 6.695141815576058e-06, "loss": 0.4262, "step": 13319 }, { "epoch": 0.4082383229128356, "grad_norm": 1.758966873646382, "learning_rate": 6.694674883239081e-06, "loss": 0.6096, "step": 13320 }, { "epoch": 0.40826897143557683, "grad_norm": 1.7456764970475454, "learning_rate": 6.694207934204038e-06, "loss": 0.726, "step": 13321 }, { "epoch": 0.408299619958318, "grad_norm": 1.696284603126578, "learning_rate": 6.693740968475531e-06, "loss": 0.6243, "step": 13322 }, { "epoch": 0.4083302684810592, "grad_norm": 0.7894710217422082, "learning_rate": 6.693273986058162e-06, "loss": 0.4786, "step": 13323 }, { "epoch": 0.4083609170038004, "grad_norm": 1.5249485076939058, "learning_rate": 6.69280698695653e-06, "loss": 0.6593, "step": 13324 }, { "epoch": 0.4083915655265416, "grad_norm": 1.7115849451697, "learning_rate": 6.692339971175239e-06, "loss": 0.6241, "step": 13325 }, { "epoch": 0.4084222140492828, "grad_norm": 1.8275553522382562, "learning_rate": 6.691872938718887e-06, "loss": 0.7329, "step": 13326 }, { "epoch": 0.408452862572024, "grad_norm": 1.6098835834812313, "learning_rate": 6.691405889592081e-06, "loss": 0.6101, "step": 13327 }, { "epoch": 0.4084835110947652, "grad_norm": 1.6870754113297426, "learning_rate": 6.6909388237994175e-06, "loss": 0.5609, "step": 13328 }, { "epoch": 0.4085141596175064, "grad_norm": 0.8773814461176866, "learning_rate": 6.690471741345503e-06, "loss": 0.4792, "step": 13329 }, { "epoch": 0.40854480814024763, "grad_norm": 1.7926737240834723, "learning_rate": 6.690004642234935e-06, "loss": 0.6992, "step": 13330 }, { "epoch": 0.40857545666298883, "grad_norm": 1.9241451556945757, "learning_rate": 6.6895375264723225e-06, "loss": 0.684, "step": 13331 }, { "epoch": 0.40860610518573004, "grad_norm": 1.9788875036854545, "learning_rate": 6.689070394062261e-06, "loss": 0.7076, "step": 13332 }, { "epoch": 0.40863675370847125, "grad_norm": 1.4539385125596525, "learning_rate": 6.688603245009359e-06, "loss": 0.7004, "step": 13333 }, { "epoch": 0.40866740223121245, "grad_norm": 1.8103859607084056, "learning_rate": 6.6881360793182155e-06, "loss": 0.6653, "step": 13334 }, { "epoch": 0.40869805075395366, "grad_norm": 1.4053329640786594, "learning_rate": 6.687668896993438e-06, "loss": 0.6347, "step": 13335 }, { "epoch": 0.40872869927669486, "grad_norm": 2.044777296588277, "learning_rate": 6.687201698039625e-06, "loss": 0.6857, "step": 13336 }, { "epoch": 0.40875934779943607, "grad_norm": 1.844516788771478, "learning_rate": 6.686734482461381e-06, "loss": 0.7119, "step": 13337 }, { "epoch": 0.4087899963221773, "grad_norm": 1.7986880235499356, "learning_rate": 6.686267250263314e-06, "loss": 0.6662, "step": 13338 }, { "epoch": 0.4088206448449185, "grad_norm": 1.700335602588464, "learning_rate": 6.685800001450023e-06, "loss": 0.7794, "step": 13339 }, { "epoch": 0.4088512933676597, "grad_norm": 1.6921481338250997, "learning_rate": 6.685332736026111e-06, "loss": 0.6491, "step": 13340 }, { "epoch": 0.4088819418904009, "grad_norm": 1.8138188487089064, "learning_rate": 6.684865453996185e-06, "loss": 0.663, "step": 13341 }, { "epoch": 0.4089125904131421, "grad_norm": 1.7996049186511618, "learning_rate": 6.684398155364852e-06, "loss": 0.7315, "step": 13342 }, { "epoch": 0.4089432389358833, "grad_norm": 1.8357805139592713, "learning_rate": 6.68393084013671e-06, "loss": 0.7606, "step": 13343 }, { "epoch": 0.4089738874586245, "grad_norm": 1.5631638210427206, "learning_rate": 6.683463508316367e-06, "loss": 0.6207, "step": 13344 }, { "epoch": 0.4090045359813657, "grad_norm": 0.8426826871305929, "learning_rate": 6.682996159908426e-06, "loss": 0.4641, "step": 13345 }, { "epoch": 0.4090351845041069, "grad_norm": 1.825513115712902, "learning_rate": 6.682528794917495e-06, "loss": 0.7071, "step": 13346 }, { "epoch": 0.4090658330268481, "grad_norm": 2.1095096900571964, "learning_rate": 6.682061413348178e-06, "loss": 0.6539, "step": 13347 }, { "epoch": 0.40909648154958933, "grad_norm": 0.7708103597735084, "learning_rate": 6.681594015205078e-06, "loss": 0.4628, "step": 13348 }, { "epoch": 0.40912713007233054, "grad_norm": 0.7901504484862296, "learning_rate": 6.681126600492802e-06, "loss": 0.4547, "step": 13349 }, { "epoch": 0.40915777859507174, "grad_norm": 1.4758345428020958, "learning_rate": 6.680659169215956e-06, "loss": 0.7003, "step": 13350 }, { "epoch": 0.40918842711781295, "grad_norm": 1.9879805127751315, "learning_rate": 6.6801917213791454e-06, "loss": 0.6561, "step": 13351 }, { "epoch": 0.40921907564055415, "grad_norm": 1.6665615959865119, "learning_rate": 6.679724256986974e-06, "loss": 0.6594, "step": 13352 }, { "epoch": 0.4092497241632953, "grad_norm": 1.8923301695901982, "learning_rate": 6.679256776044052e-06, "loss": 0.7121, "step": 13353 }, { "epoch": 0.4092803726860365, "grad_norm": 1.7067941403338576, "learning_rate": 6.6787892785549825e-06, "loss": 0.6733, "step": 13354 }, { "epoch": 0.4093110212087777, "grad_norm": 2.131595563217012, "learning_rate": 6.678321764524373e-06, "loss": 0.6586, "step": 13355 }, { "epoch": 0.4093416697315189, "grad_norm": 1.8246155434391846, "learning_rate": 6.67785423395683e-06, "loss": 0.6914, "step": 13356 }, { "epoch": 0.4093723182542601, "grad_norm": 1.594058384499629, "learning_rate": 6.677386686856959e-06, "loss": 0.6993, "step": 13357 }, { "epoch": 0.40940296677700133, "grad_norm": 1.5376525813637631, "learning_rate": 6.6769191232293685e-06, "loss": 0.7034, "step": 13358 }, { "epoch": 0.40943361529974254, "grad_norm": 0.9298853102962887, "learning_rate": 6.676451543078664e-06, "loss": 0.4572, "step": 13359 }, { "epoch": 0.40946426382248374, "grad_norm": 1.7491305338673624, "learning_rate": 6.675983946409454e-06, "loss": 0.583, "step": 13360 }, { "epoch": 0.40949491234522495, "grad_norm": 0.8379613989389715, "learning_rate": 6.675516333226346e-06, "loss": 0.4831, "step": 13361 }, { "epoch": 0.40952556086796615, "grad_norm": 1.9031471239169735, "learning_rate": 6.6750487035339465e-06, "loss": 0.6883, "step": 13362 }, { "epoch": 0.40955620939070736, "grad_norm": 1.708728903314733, "learning_rate": 6.674581057336862e-06, "loss": 0.6154, "step": 13363 }, { "epoch": 0.40958685791344857, "grad_norm": 1.5998969787089865, "learning_rate": 6.674113394639704e-06, "loss": 0.6259, "step": 13364 }, { "epoch": 0.40961750643618977, "grad_norm": 1.738458817392415, "learning_rate": 6.673645715447078e-06, "loss": 0.6943, "step": 13365 }, { "epoch": 0.409648154958931, "grad_norm": 1.915288820947147, "learning_rate": 6.673178019763592e-06, "loss": 0.641, "step": 13366 }, { "epoch": 0.4096788034816722, "grad_norm": 1.6505411949678883, "learning_rate": 6.672710307593855e-06, "loss": 0.7164, "step": 13367 }, { "epoch": 0.4097094520044134, "grad_norm": 0.8795412875206908, "learning_rate": 6.672242578942475e-06, "loss": 0.4467, "step": 13368 }, { "epoch": 0.4097401005271546, "grad_norm": 1.841885795589861, "learning_rate": 6.671774833814062e-06, "loss": 0.7056, "step": 13369 }, { "epoch": 0.4097707490498958, "grad_norm": 1.6027012128685254, "learning_rate": 6.671307072213223e-06, "loss": 0.6842, "step": 13370 }, { "epoch": 0.409801397572637, "grad_norm": 1.8704900340210207, "learning_rate": 6.6708392941445675e-06, "loss": 0.7387, "step": 13371 }, { "epoch": 0.4098320460953782, "grad_norm": 1.7418008745233973, "learning_rate": 6.670371499612705e-06, "loss": 0.6737, "step": 13372 }, { "epoch": 0.4098626946181194, "grad_norm": 1.9201834628347867, "learning_rate": 6.669903688622246e-06, "loss": 0.6535, "step": 13373 }, { "epoch": 0.4098933431408606, "grad_norm": 1.6831328946391373, "learning_rate": 6.669435861177798e-06, "loss": 0.6995, "step": 13374 }, { "epoch": 0.4099239916636018, "grad_norm": 1.489958867198239, "learning_rate": 6.668968017283971e-06, "loss": 0.5454, "step": 13375 }, { "epoch": 0.40995464018634303, "grad_norm": 1.733012567799383, "learning_rate": 6.668500156945376e-06, "loss": 0.7053, "step": 13376 }, { "epoch": 0.40998528870908424, "grad_norm": 1.789281730227265, "learning_rate": 6.668032280166621e-06, "loss": 0.6241, "step": 13377 }, { "epoch": 0.41001593723182544, "grad_norm": 1.5552170931351954, "learning_rate": 6.667564386952316e-06, "loss": 0.679, "step": 13378 }, { "epoch": 0.41004658575456665, "grad_norm": 1.714826477830339, "learning_rate": 6.667096477307075e-06, "loss": 0.7085, "step": 13379 }, { "epoch": 0.41007723427730786, "grad_norm": 1.7040453731667404, "learning_rate": 6.666628551235504e-06, "loss": 0.6907, "step": 13380 }, { "epoch": 0.41010788280004906, "grad_norm": 1.7658136226645111, "learning_rate": 6.666160608742217e-06, "loss": 0.6945, "step": 13381 }, { "epoch": 0.41013853132279027, "grad_norm": 1.4854580107637396, "learning_rate": 6.665692649831822e-06, "loss": 0.6331, "step": 13382 }, { "epoch": 0.4101691798455315, "grad_norm": 1.657977878443558, "learning_rate": 6.665224674508932e-06, "loss": 0.6206, "step": 13383 }, { "epoch": 0.4101998283682726, "grad_norm": 1.599719639492126, "learning_rate": 6.664756682778156e-06, "loss": 0.6479, "step": 13384 }, { "epoch": 0.41023047689101383, "grad_norm": 1.6298775133147383, "learning_rate": 6.6642886746441085e-06, "loss": 0.6337, "step": 13385 }, { "epoch": 0.41026112541375503, "grad_norm": 1.9781994330703512, "learning_rate": 6.6638206501113965e-06, "loss": 0.6171, "step": 13386 }, { "epoch": 0.41029177393649624, "grad_norm": 1.7859517777530607, "learning_rate": 6.663352609184635e-06, "loss": 0.6829, "step": 13387 }, { "epoch": 0.41032242245923745, "grad_norm": 1.8527014832491386, "learning_rate": 6.662884551868436e-06, "loss": 0.6803, "step": 13388 }, { "epoch": 0.41035307098197865, "grad_norm": 2.0314568813716045, "learning_rate": 6.662416478167407e-06, "loss": 0.7022, "step": 13389 }, { "epoch": 0.41038371950471986, "grad_norm": 1.7043638086409314, "learning_rate": 6.661948388086166e-06, "loss": 0.6414, "step": 13390 }, { "epoch": 0.41041436802746106, "grad_norm": 1.718984090807318, "learning_rate": 6.66148028162932e-06, "loss": 0.6324, "step": 13391 }, { "epoch": 0.41044501655020227, "grad_norm": 0.8681878542549326, "learning_rate": 6.661012158801487e-06, "loss": 0.4684, "step": 13392 }, { "epoch": 0.4104756650729435, "grad_norm": 1.653361113215478, "learning_rate": 6.660544019607272e-06, "loss": 0.6276, "step": 13393 }, { "epoch": 0.4105063135956847, "grad_norm": 1.6881539403033918, "learning_rate": 6.660075864051294e-06, "loss": 0.6286, "step": 13394 }, { "epoch": 0.4105369621184259, "grad_norm": 1.6448492549552072, "learning_rate": 6.659607692138164e-06, "loss": 0.5802, "step": 13395 }, { "epoch": 0.4105676106411671, "grad_norm": 1.7652925344171948, "learning_rate": 6.659139503872496e-06, "loss": 0.6821, "step": 13396 }, { "epoch": 0.4105982591639083, "grad_norm": 1.5147178267051413, "learning_rate": 6.658671299258899e-06, "loss": 0.6768, "step": 13397 }, { "epoch": 0.4106289076866495, "grad_norm": 1.5688645706745499, "learning_rate": 6.658203078301991e-06, "loss": 0.7001, "step": 13398 }, { "epoch": 0.4106595562093907, "grad_norm": 0.764777907610378, "learning_rate": 6.657734841006383e-06, "loss": 0.439, "step": 13399 }, { "epoch": 0.4106902047321319, "grad_norm": 1.5511906795973083, "learning_rate": 6.6572665873766914e-06, "loss": 0.6406, "step": 13400 }, { "epoch": 0.4107208532548731, "grad_norm": 1.5398366456832941, "learning_rate": 6.6567983174175255e-06, "loss": 0.6286, "step": 13401 }, { "epoch": 0.4107515017776143, "grad_norm": 1.7089652081101607, "learning_rate": 6.656330031133503e-06, "loss": 0.627, "step": 13402 }, { "epoch": 0.41078215030035553, "grad_norm": 1.784539682147725, "learning_rate": 6.655861728529237e-06, "loss": 0.6995, "step": 13403 }, { "epoch": 0.41081279882309674, "grad_norm": 1.7289231876608366, "learning_rate": 6.655393409609342e-06, "loss": 0.7222, "step": 13404 }, { "epoch": 0.41084344734583794, "grad_norm": 1.6232780439303525, "learning_rate": 6.654925074378432e-06, "loss": 0.6864, "step": 13405 }, { "epoch": 0.41087409586857915, "grad_norm": 1.5693924336843283, "learning_rate": 6.6544567228411206e-06, "loss": 0.7091, "step": 13406 }, { "epoch": 0.41090474439132035, "grad_norm": 1.6671995660333454, "learning_rate": 6.653988355002026e-06, "loss": 0.6505, "step": 13407 }, { "epoch": 0.41093539291406156, "grad_norm": 1.5202360594930329, "learning_rate": 6.653519970865759e-06, "loss": 0.6054, "step": 13408 }, { "epoch": 0.41096604143680276, "grad_norm": 1.7852482296500332, "learning_rate": 6.653051570436938e-06, "loss": 0.7702, "step": 13409 }, { "epoch": 0.41099668995954397, "grad_norm": 1.84822013457629, "learning_rate": 6.652583153720176e-06, "loss": 0.6262, "step": 13410 }, { "epoch": 0.4110273384822852, "grad_norm": 1.6915441145649046, "learning_rate": 6.65211472072009e-06, "loss": 0.7262, "step": 13411 }, { "epoch": 0.4110579870050264, "grad_norm": 1.5962942253030836, "learning_rate": 6.651646271441295e-06, "loss": 0.645, "step": 13412 }, { "epoch": 0.4110886355277676, "grad_norm": 1.4744439325023908, "learning_rate": 6.651177805888407e-06, "loss": 0.5856, "step": 13413 }, { "epoch": 0.4111192840505088, "grad_norm": 1.6529308949690342, "learning_rate": 6.650709324066041e-06, "loss": 0.6057, "step": 13414 }, { "epoch": 0.41114993257324994, "grad_norm": 0.7733073576484201, "learning_rate": 6.650240825978813e-06, "loss": 0.4477, "step": 13415 }, { "epoch": 0.41118058109599115, "grad_norm": 1.8379210818248846, "learning_rate": 6.6497723116313405e-06, "loss": 0.7026, "step": 13416 }, { "epoch": 0.41121122961873235, "grad_norm": 1.839627802713918, "learning_rate": 6.649303781028239e-06, "loss": 0.6628, "step": 13417 }, { "epoch": 0.41124187814147356, "grad_norm": 1.8431127330273562, "learning_rate": 6.648835234174126e-06, "loss": 0.6542, "step": 13418 }, { "epoch": 0.41127252666421477, "grad_norm": 1.6768358092960065, "learning_rate": 6.648366671073617e-06, "loss": 0.732, "step": 13419 }, { "epoch": 0.41130317518695597, "grad_norm": 1.7791006675613301, "learning_rate": 6.647898091731331e-06, "loss": 0.689, "step": 13420 }, { "epoch": 0.4113338237096972, "grad_norm": 1.6876656918951178, "learning_rate": 6.64742949615188e-06, "loss": 0.6863, "step": 13421 }, { "epoch": 0.4113644722324384, "grad_norm": 1.592646473779818, "learning_rate": 6.646960884339888e-06, "loss": 0.6919, "step": 13422 }, { "epoch": 0.4113951207551796, "grad_norm": 1.705453035959536, "learning_rate": 6.646492256299968e-06, "loss": 0.6615, "step": 13423 }, { "epoch": 0.4114257692779208, "grad_norm": 1.8662159469630692, "learning_rate": 6.6460236120367384e-06, "loss": 0.7121, "step": 13424 }, { "epoch": 0.411456417800662, "grad_norm": 1.8368841307268378, "learning_rate": 6.645554951554817e-06, "loss": 0.7355, "step": 13425 }, { "epoch": 0.4114870663234032, "grad_norm": 1.8610252062032533, "learning_rate": 6.645086274858822e-06, "loss": 0.7862, "step": 13426 }, { "epoch": 0.4115177148461444, "grad_norm": 1.6615310070307492, "learning_rate": 6.644617581953371e-06, "loss": 0.6653, "step": 13427 }, { "epoch": 0.4115483633688856, "grad_norm": 1.801961255499414, "learning_rate": 6.644148872843081e-06, "loss": 0.6635, "step": 13428 }, { "epoch": 0.4115790118916268, "grad_norm": 2.2224977315655843, "learning_rate": 6.643680147532572e-06, "loss": 0.693, "step": 13429 }, { "epoch": 0.41160966041436803, "grad_norm": 1.5449684057139765, "learning_rate": 6.643211406026463e-06, "loss": 0.6007, "step": 13430 }, { "epoch": 0.41164030893710923, "grad_norm": 1.4826382454785498, "learning_rate": 6.642742648329371e-06, "loss": 0.6254, "step": 13431 }, { "epoch": 0.41167095745985044, "grad_norm": 1.6404738180490772, "learning_rate": 6.642273874445914e-06, "loss": 0.6807, "step": 13432 }, { "epoch": 0.41170160598259165, "grad_norm": 1.6597369153960488, "learning_rate": 6.641805084380715e-06, "loss": 0.7131, "step": 13433 }, { "epoch": 0.41173225450533285, "grad_norm": 1.703342262993007, "learning_rate": 6.641336278138387e-06, "loss": 0.6349, "step": 13434 }, { "epoch": 0.41176290302807406, "grad_norm": 1.7781121237555708, "learning_rate": 6.640867455723556e-06, "loss": 0.6112, "step": 13435 }, { "epoch": 0.41179355155081526, "grad_norm": 1.6878424044083278, "learning_rate": 6.6403986171408365e-06, "loss": 0.6923, "step": 13436 }, { "epoch": 0.41182420007355647, "grad_norm": 1.8369451185832126, "learning_rate": 6.63992976239485e-06, "loss": 0.6068, "step": 13437 }, { "epoch": 0.4118548485962977, "grad_norm": 1.8841371979975174, "learning_rate": 6.639460891490217e-06, "loss": 0.6418, "step": 13438 }, { "epoch": 0.4118854971190389, "grad_norm": 1.7669740298452128, "learning_rate": 6.6389920044315545e-06, "loss": 0.6418, "step": 13439 }, { "epoch": 0.4119161456417801, "grad_norm": 1.7148366638603725, "learning_rate": 6.638523101223485e-06, "loss": 0.6507, "step": 13440 }, { "epoch": 0.4119467941645213, "grad_norm": 1.8193823327726903, "learning_rate": 6.638054181870629e-06, "loss": 0.7127, "step": 13441 }, { "epoch": 0.4119774426872625, "grad_norm": 1.5821542741847365, "learning_rate": 6.637585246377605e-06, "loss": 0.7603, "step": 13442 }, { "epoch": 0.4120080912100037, "grad_norm": 1.7970170972930137, "learning_rate": 6.637116294749035e-06, "loss": 0.7232, "step": 13443 }, { "epoch": 0.4120387397327449, "grad_norm": 1.7354312994475913, "learning_rate": 6.6366473269895395e-06, "loss": 0.6135, "step": 13444 }, { "epoch": 0.4120693882554861, "grad_norm": 1.6128213512395186, "learning_rate": 6.636178343103739e-06, "loss": 0.5915, "step": 13445 }, { "epoch": 0.41210003677822726, "grad_norm": 1.7089757630217361, "learning_rate": 6.635709343096255e-06, "loss": 0.6967, "step": 13446 }, { "epoch": 0.41213068530096847, "grad_norm": 1.6945386729843248, "learning_rate": 6.635240326971707e-06, "loss": 0.6971, "step": 13447 }, { "epoch": 0.4121613338237097, "grad_norm": 0.8337705138506429, "learning_rate": 6.634771294734719e-06, "loss": 0.4604, "step": 13448 }, { "epoch": 0.4121919823464509, "grad_norm": 1.5577772312532054, "learning_rate": 6.63430224638991e-06, "loss": 0.5998, "step": 13449 }, { "epoch": 0.4122226308691921, "grad_norm": 1.7857769367564842, "learning_rate": 6.633833181941905e-06, "loss": 0.6813, "step": 13450 }, { "epoch": 0.4122532793919333, "grad_norm": 0.774583443546709, "learning_rate": 6.633364101395321e-06, "loss": 0.4348, "step": 13451 }, { "epoch": 0.4122839279146745, "grad_norm": 1.7925069063513002, "learning_rate": 6.632895004754785e-06, "loss": 0.7085, "step": 13452 }, { "epoch": 0.4123145764374157, "grad_norm": 1.6245337621040188, "learning_rate": 6.632425892024914e-06, "loss": 0.6472, "step": 13453 }, { "epoch": 0.4123452249601569, "grad_norm": 0.7910828948049808, "learning_rate": 6.631956763210335e-06, "loss": 0.4557, "step": 13454 }, { "epoch": 0.4123758734828981, "grad_norm": 1.71660078692795, "learning_rate": 6.6314876183156686e-06, "loss": 0.627, "step": 13455 }, { "epoch": 0.4124065220056393, "grad_norm": 1.6549634517505543, "learning_rate": 6.631018457345536e-06, "loss": 0.6612, "step": 13456 }, { "epoch": 0.4124371705283805, "grad_norm": 1.4768105887353855, "learning_rate": 6.630549280304561e-06, "loss": 0.6245, "step": 13457 }, { "epoch": 0.41246781905112173, "grad_norm": 1.5751822183204283, "learning_rate": 6.630080087197368e-06, "loss": 0.5961, "step": 13458 }, { "epoch": 0.41249846757386294, "grad_norm": 0.7954091218694648, "learning_rate": 6.629610878028579e-06, "loss": 0.4623, "step": 13459 }, { "epoch": 0.41252911609660414, "grad_norm": 0.7766364557113604, "learning_rate": 6.629141652802815e-06, "loss": 0.4447, "step": 13460 }, { "epoch": 0.41255976461934535, "grad_norm": 1.559088062947135, "learning_rate": 6.628672411524704e-06, "loss": 0.5983, "step": 13461 }, { "epoch": 0.41259041314208655, "grad_norm": 2.0445792548388373, "learning_rate": 6.628203154198865e-06, "loss": 0.6862, "step": 13462 }, { "epoch": 0.41262106166482776, "grad_norm": 1.630933593739183, "learning_rate": 6.627733880829926e-06, "loss": 0.6155, "step": 13463 }, { "epoch": 0.41265171018756897, "grad_norm": 1.569345863774532, "learning_rate": 6.627264591422507e-06, "loss": 0.6549, "step": 13464 }, { "epoch": 0.41268235871031017, "grad_norm": 1.8651453985485977, "learning_rate": 6.626795285981235e-06, "loss": 0.7299, "step": 13465 }, { "epoch": 0.4127130072330514, "grad_norm": 1.6877306447952236, "learning_rate": 6.6263259645107305e-06, "loss": 0.6626, "step": 13466 }, { "epoch": 0.4127436557557926, "grad_norm": 1.6448593642046956, "learning_rate": 6.625856627015621e-06, "loss": 0.6663, "step": 13467 }, { "epoch": 0.4127743042785338, "grad_norm": 0.8510746707767077, "learning_rate": 6.6253872735005296e-06, "loss": 0.4658, "step": 13468 }, { "epoch": 0.412804952801275, "grad_norm": 1.5522479622691867, "learning_rate": 6.624917903970084e-06, "loss": 0.6423, "step": 13469 }, { "epoch": 0.4128356013240162, "grad_norm": 1.7094016820336086, "learning_rate": 6.624448518428905e-06, "loss": 0.6905, "step": 13470 }, { "epoch": 0.4128662498467574, "grad_norm": 1.7623498553549544, "learning_rate": 6.6239791168816195e-06, "loss": 0.619, "step": 13471 }, { "epoch": 0.4128968983694986, "grad_norm": 1.5538230541916616, "learning_rate": 6.623509699332851e-06, "loss": 0.7077, "step": 13472 }, { "epoch": 0.4129275468922398, "grad_norm": 0.773285247400521, "learning_rate": 6.623040265787227e-06, "loss": 0.4474, "step": 13473 }, { "epoch": 0.412958195414981, "grad_norm": 1.841030645871962, "learning_rate": 6.6225708162493715e-06, "loss": 0.7236, "step": 13474 }, { "epoch": 0.4129888439377222, "grad_norm": 1.6209255180852737, "learning_rate": 6.6221013507239105e-06, "loss": 0.6429, "step": 13475 }, { "epoch": 0.41301949246046343, "grad_norm": 1.861693684304544, "learning_rate": 6.621631869215471e-06, "loss": 0.6404, "step": 13476 }, { "epoch": 0.4130501409832046, "grad_norm": 1.6604488872223315, "learning_rate": 6.621162371728678e-06, "loss": 0.6902, "step": 13477 }, { "epoch": 0.4130807895059458, "grad_norm": 1.7868505488454258, "learning_rate": 6.620692858268156e-06, "loss": 0.5576, "step": 13478 }, { "epoch": 0.413111438028687, "grad_norm": 1.5487806551127319, "learning_rate": 6.6202233288385335e-06, "loss": 0.5979, "step": 13479 }, { "epoch": 0.4131420865514282, "grad_norm": 1.8475741791190508, "learning_rate": 6.619753783444435e-06, "loss": 0.6617, "step": 13480 }, { "epoch": 0.4131727350741694, "grad_norm": 1.7874717267985858, "learning_rate": 6.6192842220904886e-06, "loss": 0.6409, "step": 13481 }, { "epoch": 0.4132033835969106, "grad_norm": 1.7773126720399095, "learning_rate": 6.61881464478132e-06, "loss": 0.6851, "step": 13482 }, { "epoch": 0.4132340321196518, "grad_norm": 1.581219271122007, "learning_rate": 6.618345051521558e-06, "loss": 0.5948, "step": 13483 }, { "epoch": 0.413264680642393, "grad_norm": 1.8456727809364177, "learning_rate": 6.617875442315827e-06, "loss": 0.6877, "step": 13484 }, { "epoch": 0.41329532916513423, "grad_norm": 1.792082646661338, "learning_rate": 6.617405817168755e-06, "loss": 0.6545, "step": 13485 }, { "epoch": 0.41332597768787543, "grad_norm": 1.6891813135045897, "learning_rate": 6.616936176084969e-06, "loss": 0.7073, "step": 13486 }, { "epoch": 0.41335662621061664, "grad_norm": 1.560636466406356, "learning_rate": 6.616466519069099e-06, "loss": 0.5357, "step": 13487 }, { "epoch": 0.41338727473335785, "grad_norm": 1.6705833010110929, "learning_rate": 6.61599684612577e-06, "loss": 0.657, "step": 13488 }, { "epoch": 0.41341792325609905, "grad_norm": 1.5517034826066176, "learning_rate": 6.615527157259611e-06, "loss": 0.7284, "step": 13489 }, { "epoch": 0.41344857177884026, "grad_norm": 1.7467992804137902, "learning_rate": 6.615057452475249e-06, "loss": 0.68, "step": 13490 }, { "epoch": 0.41347922030158146, "grad_norm": 1.8940050585109918, "learning_rate": 6.6145877317773135e-06, "loss": 0.7171, "step": 13491 }, { "epoch": 0.41350986882432267, "grad_norm": 0.8741730330933971, "learning_rate": 6.614117995170431e-06, "loss": 0.4499, "step": 13492 }, { "epoch": 0.4135405173470639, "grad_norm": 1.6489740806853317, "learning_rate": 6.613648242659232e-06, "loss": 0.702, "step": 13493 }, { "epoch": 0.4135711658698051, "grad_norm": 1.87982346065486, "learning_rate": 6.613178474248342e-06, "loss": 0.7156, "step": 13494 }, { "epoch": 0.4136018143925463, "grad_norm": 1.646477469671486, "learning_rate": 6.6127086899423935e-06, "loss": 0.6336, "step": 13495 }, { "epoch": 0.4136324629152875, "grad_norm": 1.8348464631294286, "learning_rate": 6.612238889746013e-06, "loss": 0.6915, "step": 13496 }, { "epoch": 0.4136631114380287, "grad_norm": 1.6246946091904215, "learning_rate": 6.611769073663831e-06, "loss": 0.6101, "step": 13497 }, { "epoch": 0.4136937599607699, "grad_norm": 1.7850099465848948, "learning_rate": 6.611299241700474e-06, "loss": 0.7362, "step": 13498 }, { "epoch": 0.4137244084835111, "grad_norm": 1.517641211883032, "learning_rate": 6.610829393860575e-06, "loss": 0.5717, "step": 13499 }, { "epoch": 0.4137550570062523, "grad_norm": 1.7424245952062294, "learning_rate": 6.6103595301487625e-06, "loss": 0.6603, "step": 13500 }, { "epoch": 0.4137857055289935, "grad_norm": 1.7787380094127248, "learning_rate": 6.609889650569663e-06, "loss": 0.6745, "step": 13501 }, { "epoch": 0.4138163540517347, "grad_norm": 1.6973766123148308, "learning_rate": 6.609419755127911e-06, "loss": 0.6213, "step": 13502 }, { "epoch": 0.41384700257447593, "grad_norm": 1.7166788425185044, "learning_rate": 6.608949843828132e-06, "loss": 0.7457, "step": 13503 }, { "epoch": 0.41387765109721714, "grad_norm": 1.6225547058498049, "learning_rate": 6.6084799166749615e-06, "loss": 0.6497, "step": 13504 }, { "epoch": 0.41390829961995834, "grad_norm": 1.6840163928732261, "learning_rate": 6.608009973673025e-06, "loss": 0.643, "step": 13505 }, { "epoch": 0.41393894814269955, "grad_norm": 2.7596636420811884, "learning_rate": 6.607540014826956e-06, "loss": 0.6445, "step": 13506 }, { "epoch": 0.41396959666544075, "grad_norm": 0.7974257891480825, "learning_rate": 6.607070040141382e-06, "loss": 0.4556, "step": 13507 }, { "epoch": 0.4140002451881819, "grad_norm": 1.5165930239976992, "learning_rate": 6.606600049620938e-06, "loss": 0.5951, "step": 13508 }, { "epoch": 0.4140308937109231, "grad_norm": 1.8750971183603837, "learning_rate": 6.606130043270251e-06, "loss": 0.7833, "step": 13509 }, { "epoch": 0.4140615422336643, "grad_norm": 1.7658998602919407, "learning_rate": 6.6056600210939544e-06, "loss": 0.7556, "step": 13510 }, { "epoch": 0.4140921907564055, "grad_norm": 1.5603580915455646, "learning_rate": 6.605189983096678e-06, "loss": 0.7308, "step": 13511 }, { "epoch": 0.4141228392791467, "grad_norm": 1.558783233307359, "learning_rate": 6.604719929283056e-06, "loss": 0.6531, "step": 13512 }, { "epoch": 0.41415348780188793, "grad_norm": 1.6920411439637308, "learning_rate": 6.604249859657717e-06, "loss": 0.6275, "step": 13513 }, { "epoch": 0.41418413632462914, "grad_norm": 1.419713733932443, "learning_rate": 6.603779774225292e-06, "loss": 0.6418, "step": 13514 }, { "epoch": 0.41421478484737034, "grad_norm": 1.8225025949412603, "learning_rate": 6.6033096729904164e-06, "loss": 0.6599, "step": 13515 }, { "epoch": 0.41424543337011155, "grad_norm": 1.6663813625024726, "learning_rate": 6.60283955595772e-06, "loss": 0.7854, "step": 13516 }, { "epoch": 0.41427608189285275, "grad_norm": 1.68382422738499, "learning_rate": 6.602369423131836e-06, "loss": 0.6817, "step": 13517 }, { "epoch": 0.41430673041559396, "grad_norm": 1.6024140740694626, "learning_rate": 6.601899274517394e-06, "loss": 0.6221, "step": 13518 }, { "epoch": 0.41433737893833517, "grad_norm": 1.6147118037243724, "learning_rate": 6.601429110119031e-06, "loss": 0.6679, "step": 13519 }, { "epoch": 0.41436802746107637, "grad_norm": 1.8573685070435908, "learning_rate": 6.600958929941376e-06, "loss": 0.6287, "step": 13520 }, { "epoch": 0.4143986759838176, "grad_norm": 1.666854301180219, "learning_rate": 6.600488733989064e-06, "loss": 0.6294, "step": 13521 }, { "epoch": 0.4144293245065588, "grad_norm": 1.4592106348334044, "learning_rate": 6.600018522266724e-06, "loss": 0.6264, "step": 13522 }, { "epoch": 0.4144599730293, "grad_norm": 1.936888355854581, "learning_rate": 6.599548294778996e-06, "loss": 0.6087, "step": 13523 }, { "epoch": 0.4144906215520412, "grad_norm": 1.8925551084741774, "learning_rate": 6.599078051530506e-06, "loss": 0.6899, "step": 13524 }, { "epoch": 0.4145212700747824, "grad_norm": 1.807592872893147, "learning_rate": 6.598607792525893e-06, "loss": 0.6446, "step": 13525 }, { "epoch": 0.4145519185975236, "grad_norm": 0.8592100873664613, "learning_rate": 6.598137517769787e-06, "loss": 0.4453, "step": 13526 }, { "epoch": 0.4145825671202648, "grad_norm": 0.8037077256755613, "learning_rate": 6.597667227266825e-06, "loss": 0.4548, "step": 13527 }, { "epoch": 0.414613215643006, "grad_norm": 1.5991086227505038, "learning_rate": 6.597196921021638e-06, "loss": 0.6917, "step": 13528 }, { "epoch": 0.4146438641657472, "grad_norm": 1.5730660712633, "learning_rate": 6.5967265990388605e-06, "loss": 0.7219, "step": 13529 }, { "epoch": 0.41467451268848843, "grad_norm": 1.6660601738814607, "learning_rate": 6.596256261323128e-06, "loss": 0.6334, "step": 13530 }, { "epoch": 0.41470516121122963, "grad_norm": 1.5965776805010743, "learning_rate": 6.595785907879074e-06, "loss": 0.648, "step": 13531 }, { "epoch": 0.41473580973397084, "grad_norm": 1.779700619513507, "learning_rate": 6.595315538711334e-06, "loss": 0.786, "step": 13532 }, { "epoch": 0.41476645825671205, "grad_norm": 0.7723426320848962, "learning_rate": 6.5948451538245406e-06, "loss": 0.4589, "step": 13533 }, { "epoch": 0.41479710677945325, "grad_norm": 1.6008946227175689, "learning_rate": 6.5943747532233305e-06, "loss": 0.5903, "step": 13534 }, { "epoch": 0.41482775530219446, "grad_norm": 1.707328598516572, "learning_rate": 6.593904336912338e-06, "loss": 0.6495, "step": 13535 }, { "epoch": 0.41485840382493566, "grad_norm": 0.81679037295798, "learning_rate": 6.5934339048961986e-06, "loss": 0.463, "step": 13536 }, { "epoch": 0.41488905234767687, "grad_norm": 1.6495046252652925, "learning_rate": 6.592963457179546e-06, "loss": 0.6463, "step": 13537 }, { "epoch": 0.4149197008704181, "grad_norm": 1.7456616164430647, "learning_rate": 6.592492993767017e-06, "loss": 0.5728, "step": 13538 }, { "epoch": 0.4149503493931592, "grad_norm": 1.7318416066291569, "learning_rate": 6.592022514663248e-06, "loss": 0.666, "step": 13539 }, { "epoch": 0.41498099791590043, "grad_norm": 0.8764550345718448, "learning_rate": 6.591552019872872e-06, "loss": 0.4783, "step": 13540 }, { "epoch": 0.41501164643864163, "grad_norm": 1.516136619442212, "learning_rate": 6.591081509400529e-06, "loss": 0.6847, "step": 13541 }, { "epoch": 0.41504229496138284, "grad_norm": 1.7255661287716424, "learning_rate": 6.590610983250853e-06, "loss": 0.7134, "step": 13542 }, { "epoch": 0.41507294348412405, "grad_norm": 1.6809494029678764, "learning_rate": 6.590140441428479e-06, "loss": 0.6271, "step": 13543 }, { "epoch": 0.41510359200686525, "grad_norm": 1.670112775605455, "learning_rate": 6.589669883938043e-06, "loss": 0.6832, "step": 13544 }, { "epoch": 0.41513424052960646, "grad_norm": 1.750636059246388, "learning_rate": 6.5891993107841846e-06, "loss": 0.6593, "step": 13545 }, { "epoch": 0.41516488905234766, "grad_norm": 1.5983821483308849, "learning_rate": 6.588728721971538e-06, "loss": 0.6703, "step": 13546 }, { "epoch": 0.41519553757508887, "grad_norm": 1.6961066700639011, "learning_rate": 6.588258117504742e-06, "loss": 0.6527, "step": 13547 }, { "epoch": 0.4152261860978301, "grad_norm": 1.7173159940637668, "learning_rate": 6.587787497388431e-06, "loss": 0.6742, "step": 13548 }, { "epoch": 0.4152568346205713, "grad_norm": 1.6168289385676735, "learning_rate": 6.5873168616272445e-06, "loss": 0.6215, "step": 13549 }, { "epoch": 0.4152874831433125, "grad_norm": 1.8367100750497194, "learning_rate": 6.586846210225819e-06, "loss": 0.7522, "step": 13550 }, { "epoch": 0.4153181316660537, "grad_norm": 1.710675672177606, "learning_rate": 6.586375543188791e-06, "loss": 0.5972, "step": 13551 }, { "epoch": 0.4153487801887949, "grad_norm": 1.6433697135972805, "learning_rate": 6.585904860520798e-06, "loss": 0.6774, "step": 13552 }, { "epoch": 0.4153794287115361, "grad_norm": 0.8518244170254038, "learning_rate": 6.58543416222648e-06, "loss": 0.4583, "step": 13553 }, { "epoch": 0.4154100772342773, "grad_norm": 1.6297694370331446, "learning_rate": 6.584963448310474e-06, "loss": 0.6143, "step": 13554 }, { "epoch": 0.4154407257570185, "grad_norm": 1.8212814020465928, "learning_rate": 6.5844927187774164e-06, "loss": 0.6802, "step": 13555 }, { "epoch": 0.4154713742797597, "grad_norm": 0.8058396337613833, "learning_rate": 6.5840219736319475e-06, "loss": 0.4616, "step": 13556 }, { "epoch": 0.4155020228025009, "grad_norm": 1.830311937581392, "learning_rate": 6.583551212878704e-06, "loss": 0.7078, "step": 13557 }, { "epoch": 0.41553267132524213, "grad_norm": 1.6540535660879114, "learning_rate": 6.5830804365223266e-06, "loss": 0.5681, "step": 13558 }, { "epoch": 0.41556331984798334, "grad_norm": 1.9069540655529205, "learning_rate": 6.58260964456745e-06, "loss": 0.7228, "step": 13559 }, { "epoch": 0.41559396837072454, "grad_norm": 1.7978151489817726, "learning_rate": 6.582138837018719e-06, "loss": 0.6936, "step": 13560 }, { "epoch": 0.41562461689346575, "grad_norm": 1.7826578499552475, "learning_rate": 6.581668013880767e-06, "loss": 0.6768, "step": 13561 }, { "epoch": 0.41565526541620695, "grad_norm": 1.5970513994318856, "learning_rate": 6.581197175158236e-06, "loss": 0.6813, "step": 13562 }, { "epoch": 0.41568591393894816, "grad_norm": 1.805756643108612, "learning_rate": 6.580726320855765e-06, "loss": 0.5886, "step": 13563 }, { "epoch": 0.41571656246168937, "grad_norm": 0.8201627902798323, "learning_rate": 6.580255450977992e-06, "loss": 0.4502, "step": 13564 }, { "epoch": 0.41574721098443057, "grad_norm": 1.6883173588372369, "learning_rate": 6.579784565529558e-06, "loss": 0.7652, "step": 13565 }, { "epoch": 0.4157778595071718, "grad_norm": 1.62466895619112, "learning_rate": 6.579313664515103e-06, "loss": 0.5982, "step": 13566 }, { "epoch": 0.415808508029913, "grad_norm": 1.90507479694742, "learning_rate": 6.578842747939267e-06, "loss": 0.6626, "step": 13567 }, { "epoch": 0.4158391565526542, "grad_norm": 1.4796062473434173, "learning_rate": 6.578371815806689e-06, "loss": 0.6296, "step": 13568 }, { "epoch": 0.4158698050753954, "grad_norm": 1.736880328396366, "learning_rate": 6.5779008681220095e-06, "loss": 0.7509, "step": 13569 }, { "epoch": 0.41590045359813654, "grad_norm": 0.7723623578484957, "learning_rate": 6.577429904889868e-06, "loss": 0.4426, "step": 13570 }, { "epoch": 0.41593110212087775, "grad_norm": 0.8259547493489307, "learning_rate": 6.576958926114907e-06, "loss": 0.4674, "step": 13571 }, { "epoch": 0.41596175064361895, "grad_norm": 0.802127259561182, "learning_rate": 6.576487931801766e-06, "loss": 0.4505, "step": 13572 }, { "epoch": 0.41599239916636016, "grad_norm": 1.8907230266158161, "learning_rate": 6.576016921955087e-06, "loss": 0.6396, "step": 13573 }, { "epoch": 0.41602304768910137, "grad_norm": 1.5426586613443003, "learning_rate": 6.575545896579509e-06, "loss": 0.6211, "step": 13574 }, { "epoch": 0.41605369621184257, "grad_norm": 1.690064934689959, "learning_rate": 6.575074855679675e-06, "loss": 0.5416, "step": 13575 }, { "epoch": 0.4160843447345838, "grad_norm": 1.6201078510240954, "learning_rate": 6.574603799260224e-06, "loss": 0.6335, "step": 13576 }, { "epoch": 0.416114993257325, "grad_norm": 1.6620201886661583, "learning_rate": 6.574132727325801e-06, "loss": 0.6866, "step": 13577 }, { "epoch": 0.4161456417800662, "grad_norm": 1.693784766763086, "learning_rate": 6.5736616398810436e-06, "loss": 0.738, "step": 13578 }, { "epoch": 0.4161762903028074, "grad_norm": 1.8580756655801687, "learning_rate": 6.573190536930596e-06, "loss": 0.6558, "step": 13579 }, { "epoch": 0.4162069388255486, "grad_norm": 1.8721618410436112, "learning_rate": 6.5727194184790985e-06, "loss": 0.6637, "step": 13580 }, { "epoch": 0.4162375873482898, "grad_norm": 1.6845044035231376, "learning_rate": 6.572248284531196e-06, "loss": 0.6886, "step": 13581 }, { "epoch": 0.416268235871031, "grad_norm": 1.7656841031214245, "learning_rate": 6.571777135091528e-06, "loss": 0.6215, "step": 13582 }, { "epoch": 0.4162988843937722, "grad_norm": 1.7013525604002617, "learning_rate": 6.571305970164737e-06, "loss": 0.5755, "step": 13583 }, { "epoch": 0.4163295329165134, "grad_norm": 1.5649430469331027, "learning_rate": 6.570834789755468e-06, "loss": 0.5976, "step": 13584 }, { "epoch": 0.41636018143925463, "grad_norm": 1.512215126725242, "learning_rate": 6.570363593868361e-06, "loss": 0.6535, "step": 13585 }, { "epoch": 0.41639082996199583, "grad_norm": 1.7197849274118053, "learning_rate": 6.569892382508061e-06, "loss": 0.7353, "step": 13586 }, { "epoch": 0.41642147848473704, "grad_norm": 3.0860196971286364, "learning_rate": 6.569421155679207e-06, "loss": 0.7077, "step": 13587 }, { "epoch": 0.41645212700747825, "grad_norm": 1.5242592143571956, "learning_rate": 6.568949913386446e-06, "loss": 0.568, "step": 13588 }, { "epoch": 0.41648277553021945, "grad_norm": 1.591274703480377, "learning_rate": 6.56847865563442e-06, "loss": 0.6242, "step": 13589 }, { "epoch": 0.41651342405296066, "grad_norm": 1.704702684783561, "learning_rate": 6.568007382427773e-06, "loss": 0.715, "step": 13590 }, { "epoch": 0.41654407257570186, "grad_norm": 1.945386863824227, "learning_rate": 6.567536093771147e-06, "loss": 0.5985, "step": 13591 }, { "epoch": 0.41657472109844307, "grad_norm": 1.5415557567289846, "learning_rate": 6.5670647896691885e-06, "loss": 0.6508, "step": 13592 }, { "epoch": 0.4166053696211843, "grad_norm": 1.6845619132337908, "learning_rate": 6.5665934701265384e-06, "loss": 0.6799, "step": 13593 }, { "epoch": 0.4166360181439255, "grad_norm": 1.8166012168153645, "learning_rate": 6.566122135147843e-06, "loss": 0.7268, "step": 13594 }, { "epoch": 0.4166666666666667, "grad_norm": 1.8746151598722605, "learning_rate": 6.565650784737745e-06, "loss": 0.5876, "step": 13595 }, { "epoch": 0.4166973151894079, "grad_norm": 1.8365557223164415, "learning_rate": 6.565179418900889e-06, "loss": 0.7211, "step": 13596 }, { "epoch": 0.4167279637121491, "grad_norm": 1.6400464009334481, "learning_rate": 6.56470803764192e-06, "loss": 0.6221, "step": 13597 }, { "epoch": 0.4167586122348903, "grad_norm": 1.5174858970262908, "learning_rate": 6.5642366409654826e-06, "loss": 0.6104, "step": 13598 }, { "epoch": 0.4167892607576315, "grad_norm": 1.8777552448388555, "learning_rate": 6.56376522887622e-06, "loss": 0.6673, "step": 13599 }, { "epoch": 0.4168199092803727, "grad_norm": 1.8398113301915626, "learning_rate": 6.563293801378781e-06, "loss": 0.7284, "step": 13600 }, { "epoch": 0.41685055780311386, "grad_norm": 1.0650257600683168, "learning_rate": 6.562822358477806e-06, "loss": 0.4451, "step": 13601 }, { "epoch": 0.41688120632585507, "grad_norm": 1.6087709704814512, "learning_rate": 6.562350900177943e-06, "loss": 0.62, "step": 13602 }, { "epoch": 0.4169118548485963, "grad_norm": 2.6515654058621183, "learning_rate": 6.5618794264838374e-06, "loss": 0.6035, "step": 13603 }, { "epoch": 0.4169425033713375, "grad_norm": 2.103327810103587, "learning_rate": 6.561407937400132e-06, "loss": 0.7481, "step": 13604 }, { "epoch": 0.4169731518940787, "grad_norm": 1.5852297768547892, "learning_rate": 6.560936432931477e-06, "loss": 0.6013, "step": 13605 }, { "epoch": 0.4170038004168199, "grad_norm": 1.5288738600421836, "learning_rate": 6.560464913082515e-06, "loss": 0.5976, "step": 13606 }, { "epoch": 0.4170344489395611, "grad_norm": 1.606343457591157, "learning_rate": 6.559993377857894e-06, "loss": 0.7469, "step": 13607 }, { "epoch": 0.4170650974623023, "grad_norm": 1.9403320332003535, "learning_rate": 6.5595218272622585e-06, "loss": 0.7281, "step": 13608 }, { "epoch": 0.4170957459850435, "grad_norm": 1.6981469251735943, "learning_rate": 6.559050261300255e-06, "loss": 0.7237, "step": 13609 }, { "epoch": 0.4171263945077847, "grad_norm": 0.8449345161057402, "learning_rate": 6.55857867997653e-06, "loss": 0.4456, "step": 13610 }, { "epoch": 0.4171570430305259, "grad_norm": 1.8615362099984636, "learning_rate": 6.558107083295731e-06, "loss": 0.7442, "step": 13611 }, { "epoch": 0.4171876915532671, "grad_norm": 1.6401517326155486, "learning_rate": 6.557635471262506e-06, "loss": 0.6981, "step": 13612 }, { "epoch": 0.41721834007600833, "grad_norm": 1.682153314463918, "learning_rate": 6.557163843881498e-06, "loss": 0.6896, "step": 13613 }, { "epoch": 0.41724898859874954, "grad_norm": 1.7286854533199656, "learning_rate": 6.556692201157356e-06, "loss": 0.636, "step": 13614 }, { "epoch": 0.41727963712149074, "grad_norm": 0.7594793521772811, "learning_rate": 6.556220543094728e-06, "loss": 0.4543, "step": 13615 }, { "epoch": 0.41731028564423195, "grad_norm": 1.6760330384335231, "learning_rate": 6.555748869698262e-06, "loss": 0.6058, "step": 13616 }, { "epoch": 0.41734093416697315, "grad_norm": 1.5189261730297803, "learning_rate": 6.5552771809726034e-06, "loss": 0.6772, "step": 13617 }, { "epoch": 0.41737158268971436, "grad_norm": 1.702234571099347, "learning_rate": 6.554805476922401e-06, "loss": 0.6591, "step": 13618 }, { "epoch": 0.41740223121245557, "grad_norm": 1.6412868482228316, "learning_rate": 6.554333757552302e-06, "loss": 0.7227, "step": 13619 }, { "epoch": 0.41743287973519677, "grad_norm": 1.5351558813865374, "learning_rate": 6.553862022866956e-06, "loss": 0.5892, "step": 13620 }, { "epoch": 0.417463528257938, "grad_norm": 1.8009826211120183, "learning_rate": 6.5533902728710075e-06, "loss": 0.6077, "step": 13621 }, { "epoch": 0.4174941767806792, "grad_norm": 1.771691862777727, "learning_rate": 6.5529185075691095e-06, "loss": 0.5739, "step": 13622 }, { "epoch": 0.4175248253034204, "grad_norm": 1.9800089319236316, "learning_rate": 6.552446726965907e-06, "loss": 0.7074, "step": 13623 }, { "epoch": 0.4175554738261616, "grad_norm": 1.7089925437370226, "learning_rate": 6.55197493106605e-06, "loss": 0.5893, "step": 13624 }, { "epoch": 0.4175861223489028, "grad_norm": 1.8637043514714697, "learning_rate": 6.551503119874186e-06, "loss": 0.7428, "step": 13625 }, { "epoch": 0.417616770871644, "grad_norm": 1.7107547925007458, "learning_rate": 6.551031293394965e-06, "loss": 0.6694, "step": 13626 }, { "epoch": 0.4176474193943852, "grad_norm": 1.682607290210034, "learning_rate": 6.5505594516330385e-06, "loss": 0.7732, "step": 13627 }, { "epoch": 0.4176780679171264, "grad_norm": 1.7623947443873624, "learning_rate": 6.55008759459305e-06, "loss": 0.742, "step": 13628 }, { "epoch": 0.4177087164398676, "grad_norm": 1.7168515761564596, "learning_rate": 6.549615722279652e-06, "loss": 0.6865, "step": 13629 }, { "epoch": 0.41773936496260883, "grad_norm": 2.100675683440678, "learning_rate": 6.5491438346974945e-06, "loss": 0.7041, "step": 13630 }, { "epoch": 0.41777001348535003, "grad_norm": 0.8730678386309223, "learning_rate": 6.548671931851227e-06, "loss": 0.4401, "step": 13631 }, { "epoch": 0.4178006620080912, "grad_norm": 1.699350533050347, "learning_rate": 6.5482000137454985e-06, "loss": 0.6293, "step": 13632 }, { "epoch": 0.4178313105308324, "grad_norm": 1.9647743019170234, "learning_rate": 6.547728080384959e-06, "loss": 0.6439, "step": 13633 }, { "epoch": 0.4178619590535736, "grad_norm": 1.700963527084569, "learning_rate": 6.547256131774258e-06, "loss": 0.6832, "step": 13634 }, { "epoch": 0.4178926075763148, "grad_norm": 1.7493871623302546, "learning_rate": 6.546784167918047e-06, "loss": 0.6228, "step": 13635 }, { "epoch": 0.417923256099056, "grad_norm": 1.8013765461131812, "learning_rate": 6.546312188820976e-06, "loss": 0.6869, "step": 13636 }, { "epoch": 0.4179539046217972, "grad_norm": 1.6191244346177358, "learning_rate": 6.545840194487694e-06, "loss": 0.6562, "step": 13637 }, { "epoch": 0.4179845531445384, "grad_norm": 1.8390070711434472, "learning_rate": 6.545368184922855e-06, "loss": 0.7129, "step": 13638 }, { "epoch": 0.4180152016672796, "grad_norm": 0.7884827175883327, "learning_rate": 6.5448961601311066e-06, "loss": 0.4468, "step": 13639 }, { "epoch": 0.41804585019002083, "grad_norm": 0.8027628084513113, "learning_rate": 6.544424120117103e-06, "loss": 0.4432, "step": 13640 }, { "epoch": 0.41807649871276203, "grad_norm": 2.009588112771523, "learning_rate": 6.54395206488549e-06, "loss": 0.6929, "step": 13641 }, { "epoch": 0.41810714723550324, "grad_norm": 1.756943960101164, "learning_rate": 6.543479994440926e-06, "loss": 0.7305, "step": 13642 }, { "epoch": 0.41813779575824445, "grad_norm": 1.7751270176918537, "learning_rate": 6.543007908788057e-06, "loss": 0.7147, "step": 13643 }, { "epoch": 0.41816844428098565, "grad_norm": 0.7973242097790706, "learning_rate": 6.542535807931536e-06, "loss": 0.4461, "step": 13644 }, { "epoch": 0.41819909280372686, "grad_norm": 0.781664721790983, "learning_rate": 6.542063691876015e-06, "loss": 0.4661, "step": 13645 }, { "epoch": 0.41822974132646806, "grad_norm": 1.6823089822810477, "learning_rate": 6.541591560626147e-06, "loss": 0.5982, "step": 13646 }, { "epoch": 0.41826038984920927, "grad_norm": 1.6089791213995837, "learning_rate": 6.5411194141865804e-06, "loss": 0.7425, "step": 13647 }, { "epoch": 0.4182910383719505, "grad_norm": 1.7329020979999707, "learning_rate": 6.540647252561972e-06, "loss": 0.6369, "step": 13648 }, { "epoch": 0.4183216868946917, "grad_norm": 1.765282083746735, "learning_rate": 6.540175075756971e-06, "loss": 0.6875, "step": 13649 }, { "epoch": 0.4183523354174329, "grad_norm": 1.577404133008093, "learning_rate": 6.539702883776232e-06, "loss": 0.6541, "step": 13650 }, { "epoch": 0.4183829839401741, "grad_norm": 1.8055399376652472, "learning_rate": 6.539230676624406e-06, "loss": 0.6374, "step": 13651 }, { "epoch": 0.4184136324629153, "grad_norm": 1.889777597994, "learning_rate": 6.538758454306147e-06, "loss": 0.6729, "step": 13652 }, { "epoch": 0.4184442809856565, "grad_norm": 1.7248496188291038, "learning_rate": 6.538286216826107e-06, "loss": 0.6889, "step": 13653 }, { "epoch": 0.4184749295083977, "grad_norm": 1.8991572937335006, "learning_rate": 6.537813964188938e-06, "loss": 0.7375, "step": 13654 }, { "epoch": 0.4185055780311389, "grad_norm": 1.6659709204463995, "learning_rate": 6.537341696399295e-06, "loss": 0.5932, "step": 13655 }, { "epoch": 0.4185362265538801, "grad_norm": 1.671219520131011, "learning_rate": 6.536869413461832e-06, "loss": 0.6682, "step": 13656 }, { "epoch": 0.4185668750766213, "grad_norm": 1.8680434932385055, "learning_rate": 6.5363971153812e-06, "loss": 0.7107, "step": 13657 }, { "epoch": 0.41859752359936253, "grad_norm": 1.7076291235647536, "learning_rate": 6.5359248021620556e-06, "loss": 0.6419, "step": 13658 }, { "epoch": 0.41862817212210374, "grad_norm": 0.8370737348432876, "learning_rate": 6.5354524738090505e-06, "loss": 0.4432, "step": 13659 }, { "epoch": 0.41865882064484494, "grad_norm": 1.949862953725367, "learning_rate": 6.534980130326839e-06, "loss": 0.8174, "step": 13660 }, { "epoch": 0.41868946916758615, "grad_norm": 1.694694243785603, "learning_rate": 6.534507771720076e-06, "loss": 0.678, "step": 13661 }, { "epoch": 0.41872011769032735, "grad_norm": 1.5949691420864651, "learning_rate": 6.534035397993415e-06, "loss": 0.6011, "step": 13662 }, { "epoch": 0.4187507662130685, "grad_norm": 1.7242431043498223, "learning_rate": 6.53356300915151e-06, "loss": 0.7, "step": 13663 }, { "epoch": 0.4187814147358097, "grad_norm": 1.5973153518032992, "learning_rate": 6.533090605199017e-06, "loss": 0.6699, "step": 13664 }, { "epoch": 0.4188120632585509, "grad_norm": 1.724406996092783, "learning_rate": 6.532618186140591e-06, "loss": 0.6727, "step": 13665 }, { "epoch": 0.4188427117812921, "grad_norm": 1.7548498595781084, "learning_rate": 6.5321457519808855e-06, "loss": 0.6237, "step": 13666 }, { "epoch": 0.4188733603040333, "grad_norm": 1.7751102398178016, "learning_rate": 6.531673302724555e-06, "loss": 0.7576, "step": 13667 }, { "epoch": 0.41890400882677453, "grad_norm": 1.644075823797452, "learning_rate": 6.531200838376255e-06, "loss": 0.642, "step": 13668 }, { "epoch": 0.41893465734951574, "grad_norm": 1.7375822029523136, "learning_rate": 6.5307283589406425e-06, "loss": 0.684, "step": 13669 }, { "epoch": 0.41896530587225694, "grad_norm": 1.7290116496490116, "learning_rate": 6.530255864422372e-06, "loss": 0.6521, "step": 13670 }, { "epoch": 0.41899595439499815, "grad_norm": 1.8180493580963921, "learning_rate": 6.529783354826098e-06, "loss": 0.6642, "step": 13671 }, { "epoch": 0.41902660291773935, "grad_norm": 1.5959528854994616, "learning_rate": 6.529310830156479e-06, "loss": 0.6268, "step": 13672 }, { "epoch": 0.41905725144048056, "grad_norm": 1.8154730502347642, "learning_rate": 6.5288382904181665e-06, "loss": 0.634, "step": 13673 }, { "epoch": 0.41908789996322177, "grad_norm": 0.815378469177394, "learning_rate": 6.528365735615822e-06, "loss": 0.4507, "step": 13674 }, { "epoch": 0.41911854848596297, "grad_norm": 1.626527471180943, "learning_rate": 6.527893165754097e-06, "loss": 0.6875, "step": 13675 }, { "epoch": 0.4191491970087042, "grad_norm": 1.6933653828830677, "learning_rate": 6.5274205808376504e-06, "loss": 0.7047, "step": 13676 }, { "epoch": 0.4191798455314454, "grad_norm": 1.729967405425221, "learning_rate": 6.526947980871137e-06, "loss": 0.7055, "step": 13677 }, { "epoch": 0.4192104940541866, "grad_norm": 1.7624537185958804, "learning_rate": 6.526475365859215e-06, "loss": 0.6851, "step": 13678 }, { "epoch": 0.4192411425769278, "grad_norm": 0.7995116502164784, "learning_rate": 6.526002735806541e-06, "loss": 0.4746, "step": 13679 }, { "epoch": 0.419271791099669, "grad_norm": 1.8523761677746495, "learning_rate": 6.525530090717771e-06, "loss": 0.6205, "step": 13680 }, { "epoch": 0.4193024396224102, "grad_norm": 1.682777354541186, "learning_rate": 6.5250574305975635e-06, "loss": 0.7121, "step": 13681 }, { "epoch": 0.4193330881451514, "grad_norm": 1.6773225123628168, "learning_rate": 6.524584755450573e-06, "loss": 0.5807, "step": 13682 }, { "epoch": 0.4193637366678926, "grad_norm": 0.8075253567597176, "learning_rate": 6.524112065281461e-06, "loss": 0.4816, "step": 13683 }, { "epoch": 0.4193943851906338, "grad_norm": 1.650733004873314, "learning_rate": 6.523639360094882e-06, "loss": 0.6607, "step": 13684 }, { "epoch": 0.41942503371337503, "grad_norm": 1.7414973041312656, "learning_rate": 6.523166639895496e-06, "loss": 0.6093, "step": 13685 }, { "epoch": 0.41945568223611623, "grad_norm": 1.730512544563599, "learning_rate": 6.522693904687958e-06, "loss": 0.7046, "step": 13686 }, { "epoch": 0.41948633075885744, "grad_norm": 0.8183446163862447, "learning_rate": 6.522221154476927e-06, "loss": 0.4702, "step": 13687 }, { "epoch": 0.41951697928159865, "grad_norm": 1.7007647178934482, "learning_rate": 6.521748389267062e-06, "loss": 0.6206, "step": 13688 }, { "epoch": 0.41954762780433985, "grad_norm": 1.859065719131993, "learning_rate": 6.521275609063021e-06, "loss": 0.7212, "step": 13689 }, { "epoch": 0.41957827632708106, "grad_norm": 1.8514649626248905, "learning_rate": 6.520802813869463e-06, "loss": 0.6382, "step": 13690 }, { "epoch": 0.41960892484982226, "grad_norm": 1.8147294180937479, "learning_rate": 6.520330003691045e-06, "loss": 0.6631, "step": 13691 }, { "epoch": 0.41963957337256347, "grad_norm": 1.6648723910451952, "learning_rate": 6.519857178532424e-06, "loss": 0.615, "step": 13692 }, { "epoch": 0.4196702218953047, "grad_norm": 1.9300666676670737, "learning_rate": 6.519384338398263e-06, "loss": 0.6594, "step": 13693 }, { "epoch": 0.4197008704180458, "grad_norm": 1.6001684915510024, "learning_rate": 6.518911483293221e-06, "loss": 0.6457, "step": 13694 }, { "epoch": 0.41973151894078703, "grad_norm": 1.5972234210877863, "learning_rate": 6.5184386132219535e-06, "loss": 0.6295, "step": 13695 }, { "epoch": 0.41976216746352824, "grad_norm": 2.072261865636767, "learning_rate": 6.517965728189124e-06, "loss": 0.7405, "step": 13696 }, { "epoch": 0.41979281598626944, "grad_norm": 1.954836321630001, "learning_rate": 6.517492828199388e-06, "loss": 0.6319, "step": 13697 }, { "epoch": 0.41982346450901065, "grad_norm": 1.768647121382391, "learning_rate": 6.5170199132574075e-06, "loss": 0.6843, "step": 13698 }, { "epoch": 0.41985411303175185, "grad_norm": 1.5505738165892426, "learning_rate": 6.516546983367841e-06, "loss": 0.6535, "step": 13699 }, { "epoch": 0.41988476155449306, "grad_norm": 1.7272241277673832, "learning_rate": 6.516074038535351e-06, "loss": 0.6893, "step": 13700 }, { "epoch": 0.41991541007723426, "grad_norm": 1.5906099246345218, "learning_rate": 6.515601078764593e-06, "loss": 0.6014, "step": 13701 }, { "epoch": 0.41994605859997547, "grad_norm": 1.6046915279803478, "learning_rate": 6.5151281040602325e-06, "loss": 0.6349, "step": 13702 }, { "epoch": 0.4199767071227167, "grad_norm": 1.6144031050700636, "learning_rate": 6.514655114426924e-06, "loss": 0.6765, "step": 13703 }, { "epoch": 0.4200073556454579, "grad_norm": 1.7486618646871965, "learning_rate": 6.514182109869333e-06, "loss": 0.6636, "step": 13704 }, { "epoch": 0.4200380041681991, "grad_norm": 0.8883067489209445, "learning_rate": 6.513709090392118e-06, "loss": 0.4646, "step": 13705 }, { "epoch": 0.4200686526909403, "grad_norm": 1.8193509053700192, "learning_rate": 6.51323605599994e-06, "loss": 0.6416, "step": 13706 }, { "epoch": 0.4200993012136815, "grad_norm": 1.8462309788127174, "learning_rate": 6.51276300669746e-06, "loss": 0.6118, "step": 13707 }, { "epoch": 0.4201299497364227, "grad_norm": 1.930822945958704, "learning_rate": 6.512289942489339e-06, "loss": 0.7498, "step": 13708 }, { "epoch": 0.4201605982591639, "grad_norm": 1.6381600313611024, "learning_rate": 6.511816863380239e-06, "loss": 0.7134, "step": 13709 }, { "epoch": 0.4201912467819051, "grad_norm": 1.7215669169756969, "learning_rate": 6.511343769374819e-06, "loss": 0.6517, "step": 13710 }, { "epoch": 0.4202218953046463, "grad_norm": 1.6761933456043319, "learning_rate": 6.510870660477744e-06, "loss": 0.7458, "step": 13711 }, { "epoch": 0.4202525438273875, "grad_norm": 1.6587240282633393, "learning_rate": 6.510397536693673e-06, "loss": 0.6489, "step": 13712 }, { "epoch": 0.42028319235012873, "grad_norm": 0.8539151134066323, "learning_rate": 6.5099243980272684e-06, "loss": 0.4659, "step": 13713 }, { "epoch": 0.42031384087286994, "grad_norm": 1.8985543891173617, "learning_rate": 6.5094512444831915e-06, "loss": 0.7167, "step": 13714 }, { "epoch": 0.42034448939561114, "grad_norm": 1.7792998132711377, "learning_rate": 6.508978076066107e-06, "loss": 0.636, "step": 13715 }, { "epoch": 0.42037513791835235, "grad_norm": 0.7956407788778016, "learning_rate": 6.508504892780675e-06, "loss": 0.4425, "step": 13716 }, { "epoch": 0.42040578644109355, "grad_norm": 1.6700730039761762, "learning_rate": 6.508031694631558e-06, "loss": 0.6809, "step": 13717 }, { "epoch": 0.42043643496383476, "grad_norm": 1.6327707884797, "learning_rate": 6.507558481623419e-06, "loss": 0.57, "step": 13718 }, { "epoch": 0.42046708348657597, "grad_norm": 1.4968871308105307, "learning_rate": 6.50708525376092e-06, "loss": 0.5891, "step": 13719 }, { "epoch": 0.42049773200931717, "grad_norm": 1.6062551770528473, "learning_rate": 6.506612011048725e-06, "loss": 0.7698, "step": 13720 }, { "epoch": 0.4205283805320584, "grad_norm": 0.8066712016273876, "learning_rate": 6.506138753491496e-06, "loss": 0.4839, "step": 13721 }, { "epoch": 0.4205590290547996, "grad_norm": 1.5962426853600553, "learning_rate": 6.505665481093897e-06, "loss": 0.6727, "step": 13722 }, { "epoch": 0.4205896775775408, "grad_norm": 1.569045871321629, "learning_rate": 6.50519219386059e-06, "loss": 0.5644, "step": 13723 }, { "epoch": 0.420620326100282, "grad_norm": 0.7972364278130297, "learning_rate": 6.50471889179624e-06, "loss": 0.4521, "step": 13724 }, { "epoch": 0.42065097462302314, "grad_norm": 1.779613015580328, "learning_rate": 6.5042455749055086e-06, "loss": 0.7181, "step": 13725 }, { "epoch": 0.42068162314576435, "grad_norm": 0.7869099512308763, "learning_rate": 6.503772243193061e-06, "loss": 0.4735, "step": 13726 }, { "epoch": 0.42071227166850556, "grad_norm": 1.8261077953239135, "learning_rate": 6.5032988966635625e-06, "loss": 0.635, "step": 13727 }, { "epoch": 0.42074292019124676, "grad_norm": 1.7515455839557352, "learning_rate": 6.502825535321674e-06, "loss": 0.6592, "step": 13728 }, { "epoch": 0.42077356871398797, "grad_norm": 1.57455457438908, "learning_rate": 6.502352159172061e-06, "loss": 0.6531, "step": 13729 }, { "epoch": 0.42080421723672917, "grad_norm": 1.7468324329577536, "learning_rate": 6.501878768219387e-06, "loss": 0.6486, "step": 13730 }, { "epoch": 0.4208348657594704, "grad_norm": 1.7027666481147516, "learning_rate": 6.501405362468319e-06, "loss": 0.5709, "step": 13731 }, { "epoch": 0.4208655142822116, "grad_norm": 1.7326811205925885, "learning_rate": 6.500931941923519e-06, "loss": 0.7278, "step": 13732 }, { "epoch": 0.4208961628049528, "grad_norm": 0.8159009817720685, "learning_rate": 6.500458506589652e-06, "loss": 0.4365, "step": 13733 }, { "epoch": 0.420926811327694, "grad_norm": 1.8161476146738817, "learning_rate": 6.499985056471384e-06, "loss": 0.7266, "step": 13734 }, { "epoch": 0.4209574598504352, "grad_norm": 1.84545288518129, "learning_rate": 6.49951159157338e-06, "loss": 0.6648, "step": 13735 }, { "epoch": 0.4209881083731764, "grad_norm": 1.863410006439299, "learning_rate": 6.499038111900302e-06, "loss": 0.6374, "step": 13736 }, { "epoch": 0.4210187568959176, "grad_norm": 0.8676759512065111, "learning_rate": 6.498564617456821e-06, "loss": 0.4555, "step": 13737 }, { "epoch": 0.4210494054186588, "grad_norm": 2.680275294347155, "learning_rate": 6.498091108247597e-06, "loss": 0.6913, "step": 13738 }, { "epoch": 0.4210800539414, "grad_norm": 1.5107796544142738, "learning_rate": 6.4976175842773005e-06, "loss": 0.5836, "step": 13739 }, { "epoch": 0.42111070246414123, "grad_norm": 1.7592719786392006, "learning_rate": 6.497144045550593e-06, "loss": 0.6289, "step": 13740 }, { "epoch": 0.42114135098688243, "grad_norm": 1.73450358620179, "learning_rate": 6.496670492072144e-06, "loss": 0.6352, "step": 13741 }, { "epoch": 0.42117199950962364, "grad_norm": 0.7945244870741766, "learning_rate": 6.496196923846615e-06, "loss": 0.4516, "step": 13742 }, { "epoch": 0.42120264803236485, "grad_norm": 1.8230099978019254, "learning_rate": 6.495723340878677e-06, "loss": 0.6591, "step": 13743 }, { "epoch": 0.42123329655510605, "grad_norm": 2.6921315724920083, "learning_rate": 6.4952497431729936e-06, "loss": 0.6477, "step": 13744 }, { "epoch": 0.42126394507784726, "grad_norm": 1.8982784568695708, "learning_rate": 6.4947761307342315e-06, "loss": 0.7073, "step": 13745 }, { "epoch": 0.42129459360058846, "grad_norm": 1.6873394474007959, "learning_rate": 6.494302503567057e-06, "loss": 0.6637, "step": 13746 }, { "epoch": 0.42132524212332967, "grad_norm": 1.724624759833715, "learning_rate": 6.493828861676139e-06, "loss": 0.6982, "step": 13747 }, { "epoch": 0.4213558906460709, "grad_norm": 1.5317618067147647, "learning_rate": 6.493355205066143e-06, "loss": 0.6763, "step": 13748 }, { "epoch": 0.4213865391688121, "grad_norm": 1.4427482307119968, "learning_rate": 6.492881533741735e-06, "loss": 0.5851, "step": 13749 }, { "epoch": 0.4214171876915533, "grad_norm": 1.6705095520551352, "learning_rate": 6.492407847707584e-06, "loss": 0.6502, "step": 13750 }, { "epoch": 0.4214478362142945, "grad_norm": 1.6945094563539769, "learning_rate": 6.491934146968357e-06, "loss": 0.708, "step": 13751 }, { "epoch": 0.4214784847370357, "grad_norm": 1.577688854098726, "learning_rate": 6.491460431528721e-06, "loss": 0.5587, "step": 13752 }, { "epoch": 0.4215091332597769, "grad_norm": 1.5154223373129823, "learning_rate": 6.490986701393343e-06, "loss": 0.677, "step": 13753 }, { "epoch": 0.4215397817825181, "grad_norm": 1.562715971949809, "learning_rate": 6.490512956566894e-06, "loss": 0.6138, "step": 13754 }, { "epoch": 0.4215704303052593, "grad_norm": 1.7085850106169296, "learning_rate": 6.490039197054037e-06, "loss": 0.7039, "step": 13755 }, { "epoch": 0.42160107882800046, "grad_norm": 1.5591487428840523, "learning_rate": 6.489565422859443e-06, "loss": 0.6142, "step": 13756 }, { "epoch": 0.42163172735074167, "grad_norm": 1.4788049401461394, "learning_rate": 6.489091633987778e-06, "loss": 0.6931, "step": 13757 }, { "epoch": 0.4216623758734829, "grad_norm": 1.9410133144039983, "learning_rate": 6.488617830443715e-06, "loss": 0.6336, "step": 13758 }, { "epoch": 0.4216930243962241, "grad_norm": 1.6396733001053712, "learning_rate": 6.488144012231918e-06, "loss": 0.6195, "step": 13759 }, { "epoch": 0.4217236729189653, "grad_norm": 0.8444429718603774, "learning_rate": 6.487670179357058e-06, "loss": 0.459, "step": 13760 }, { "epoch": 0.4217543214417065, "grad_norm": 1.606085579901667, "learning_rate": 6.487196331823803e-06, "loss": 0.5113, "step": 13761 }, { "epoch": 0.4217849699644477, "grad_norm": 1.6645498619065664, "learning_rate": 6.486722469636822e-06, "loss": 0.6703, "step": 13762 }, { "epoch": 0.4218156184871889, "grad_norm": 0.8679162036989643, "learning_rate": 6.486248592800785e-06, "loss": 0.4409, "step": 13763 }, { "epoch": 0.4218462670099301, "grad_norm": 1.6780953300042212, "learning_rate": 6.485774701320358e-06, "loss": 0.63, "step": 13764 }, { "epoch": 0.4218769155326713, "grad_norm": 1.8558128803319367, "learning_rate": 6.485300795200215e-06, "loss": 0.6693, "step": 13765 }, { "epoch": 0.4219075640554125, "grad_norm": 1.8066414586016406, "learning_rate": 6.484826874445023e-06, "loss": 0.6456, "step": 13766 }, { "epoch": 0.4219382125781537, "grad_norm": 1.763549018474962, "learning_rate": 6.4843529390594505e-06, "loss": 0.7072, "step": 13767 }, { "epoch": 0.42196886110089493, "grad_norm": 1.8424110051747418, "learning_rate": 6.483878989048169e-06, "loss": 0.7201, "step": 13768 }, { "epoch": 0.42199950962363614, "grad_norm": 1.7848091258085914, "learning_rate": 6.48340502441585e-06, "loss": 0.6396, "step": 13769 }, { "epoch": 0.42203015814637734, "grad_norm": 0.8140787409800967, "learning_rate": 6.48293104516716e-06, "loss": 0.4666, "step": 13770 }, { "epoch": 0.42206080666911855, "grad_norm": 1.5621018836060463, "learning_rate": 6.482457051306772e-06, "loss": 0.6105, "step": 13771 }, { "epoch": 0.42209145519185975, "grad_norm": 0.759279891262684, "learning_rate": 6.481983042839354e-06, "loss": 0.4498, "step": 13772 }, { "epoch": 0.42212210371460096, "grad_norm": 1.844132672699512, "learning_rate": 6.481509019769579e-06, "loss": 0.6754, "step": 13773 }, { "epoch": 0.42215275223734217, "grad_norm": 1.499447081427152, "learning_rate": 6.481034982102116e-06, "loss": 0.7072, "step": 13774 }, { "epoch": 0.42218340076008337, "grad_norm": 1.648687871708743, "learning_rate": 6.480560929841636e-06, "loss": 0.6738, "step": 13775 }, { "epoch": 0.4222140492828246, "grad_norm": 0.8022505411046005, "learning_rate": 6.4800868629928116e-06, "loss": 0.4549, "step": 13776 }, { "epoch": 0.4222446978055658, "grad_norm": 1.7904847154507362, "learning_rate": 6.479612781560312e-06, "loss": 0.6624, "step": 13777 }, { "epoch": 0.422275346328307, "grad_norm": 1.746181845832458, "learning_rate": 6.4791386855488096e-06, "loss": 0.6016, "step": 13778 }, { "epoch": 0.4223059948510482, "grad_norm": 1.7036286422338074, "learning_rate": 6.478664574962974e-06, "loss": 0.629, "step": 13779 }, { "epoch": 0.4223366433737894, "grad_norm": 1.898696051274358, "learning_rate": 6.478190449807479e-06, "loss": 0.6973, "step": 13780 }, { "epoch": 0.4223672918965306, "grad_norm": 1.7614213011579867, "learning_rate": 6.4777163100869944e-06, "loss": 0.7108, "step": 13781 }, { "epoch": 0.4223979404192718, "grad_norm": 1.858213493090659, "learning_rate": 6.477242155806195e-06, "loss": 0.7679, "step": 13782 }, { "epoch": 0.422428588942013, "grad_norm": 1.8533059803659901, "learning_rate": 6.476767986969748e-06, "loss": 0.717, "step": 13783 }, { "epoch": 0.4224592374647542, "grad_norm": 1.5936095230429674, "learning_rate": 6.47629380358233e-06, "loss": 0.7274, "step": 13784 }, { "epoch": 0.42248988598749543, "grad_norm": 1.5573975499352277, "learning_rate": 6.475819605648611e-06, "loss": 0.5854, "step": 13785 }, { "epoch": 0.42252053451023663, "grad_norm": 1.9824063245483317, "learning_rate": 6.4753453931732634e-06, "loss": 0.7641, "step": 13786 }, { "epoch": 0.4225511830329778, "grad_norm": 1.7860209603204706, "learning_rate": 6.474871166160959e-06, "loss": 0.6209, "step": 13787 }, { "epoch": 0.422581831555719, "grad_norm": 0.8257261390815677, "learning_rate": 6.474396924616374e-06, "loss": 0.4233, "step": 13788 }, { "epoch": 0.4226124800784602, "grad_norm": 1.8889333953713583, "learning_rate": 6.473922668544179e-06, "loss": 0.6283, "step": 13789 }, { "epoch": 0.4226431286012014, "grad_norm": 0.7964455660172272, "learning_rate": 6.473448397949045e-06, "loss": 0.4656, "step": 13790 }, { "epoch": 0.4226737771239426, "grad_norm": 1.8661662940344774, "learning_rate": 6.472974112835647e-06, "loss": 0.7206, "step": 13791 }, { "epoch": 0.4227044256466838, "grad_norm": 1.6927645763925698, "learning_rate": 6.472499813208659e-06, "loss": 0.7021, "step": 13792 }, { "epoch": 0.422735074169425, "grad_norm": 1.7530494740380198, "learning_rate": 6.472025499072754e-06, "loss": 0.7486, "step": 13793 }, { "epoch": 0.4227657226921662, "grad_norm": 2.0036235215082248, "learning_rate": 6.471551170432604e-06, "loss": 0.6789, "step": 13794 }, { "epoch": 0.42279637121490743, "grad_norm": 0.818435689954247, "learning_rate": 6.471076827292885e-06, "loss": 0.4719, "step": 13795 }, { "epoch": 0.42282701973764864, "grad_norm": 1.6149729836055788, "learning_rate": 6.470602469658268e-06, "loss": 0.5557, "step": 13796 }, { "epoch": 0.42285766826038984, "grad_norm": 1.626895650038314, "learning_rate": 6.4701280975334316e-06, "loss": 0.64, "step": 13797 }, { "epoch": 0.42288831678313105, "grad_norm": 1.5965903596093356, "learning_rate": 6.469653710923044e-06, "loss": 0.5795, "step": 13798 }, { "epoch": 0.42291896530587225, "grad_norm": 1.797338834726794, "learning_rate": 6.469179309831783e-06, "loss": 0.6717, "step": 13799 }, { "epoch": 0.42294961382861346, "grad_norm": 1.7130565726618538, "learning_rate": 6.468704894264324e-06, "loss": 0.6304, "step": 13800 }, { "epoch": 0.42298026235135466, "grad_norm": 1.8388056670207138, "learning_rate": 6.468230464225337e-06, "loss": 0.6716, "step": 13801 }, { "epoch": 0.42301091087409587, "grad_norm": 0.8043286793740912, "learning_rate": 6.467756019719501e-06, "loss": 0.4639, "step": 13802 }, { "epoch": 0.4230415593968371, "grad_norm": 1.5782707124727253, "learning_rate": 6.467281560751489e-06, "loss": 0.6104, "step": 13803 }, { "epoch": 0.4230722079195783, "grad_norm": 1.6509610986693661, "learning_rate": 6.466807087325978e-06, "loss": 0.7785, "step": 13804 }, { "epoch": 0.4231028564423195, "grad_norm": 1.9283249889363603, "learning_rate": 6.46633259944764e-06, "loss": 0.6963, "step": 13805 }, { "epoch": 0.4231335049650607, "grad_norm": 1.6557580732983053, "learning_rate": 6.465858097121151e-06, "loss": 0.7226, "step": 13806 }, { "epoch": 0.4231641534878019, "grad_norm": 1.777047225072308, "learning_rate": 6.4653835803511884e-06, "loss": 0.6634, "step": 13807 }, { "epoch": 0.4231948020105431, "grad_norm": 0.8084997359814785, "learning_rate": 6.464909049142427e-06, "loss": 0.4478, "step": 13808 }, { "epoch": 0.4232254505332843, "grad_norm": 1.7576261938275006, "learning_rate": 6.464434503499542e-06, "loss": 0.719, "step": 13809 }, { "epoch": 0.4232560990560255, "grad_norm": 1.6557638533865782, "learning_rate": 6.463959943427207e-06, "loss": 0.7216, "step": 13810 }, { "epoch": 0.4232867475787667, "grad_norm": 0.8444375309515799, "learning_rate": 6.463485368930102e-06, "loss": 0.4602, "step": 13811 }, { "epoch": 0.4233173961015079, "grad_norm": 3.320109583641664, "learning_rate": 6.4630107800129015e-06, "loss": 0.5779, "step": 13812 }, { "epoch": 0.42334804462424913, "grad_norm": 1.5761159247508842, "learning_rate": 6.46253617668028e-06, "loss": 0.6386, "step": 13813 }, { "epoch": 0.42337869314699034, "grad_norm": 1.767741860953523, "learning_rate": 6.462061558936916e-06, "loss": 0.6465, "step": 13814 }, { "epoch": 0.42340934166973154, "grad_norm": 0.7726674822290897, "learning_rate": 6.461586926787484e-06, "loss": 0.4461, "step": 13815 }, { "epoch": 0.42343999019247275, "grad_norm": 2.012855141225537, "learning_rate": 6.461112280236663e-06, "loss": 0.6524, "step": 13816 }, { "epoch": 0.42347063871521395, "grad_norm": 0.8027823231939757, "learning_rate": 6.460637619289129e-06, "loss": 0.439, "step": 13817 }, { "epoch": 0.4235012872379551, "grad_norm": 1.6120884882010833, "learning_rate": 6.46016294394956e-06, "loss": 0.5632, "step": 13818 }, { "epoch": 0.4235319357606963, "grad_norm": 1.6039694855593454, "learning_rate": 6.45968825422263e-06, "loss": 0.6418, "step": 13819 }, { "epoch": 0.4235625842834375, "grad_norm": 1.7963951428090488, "learning_rate": 6.459213550113019e-06, "loss": 0.6742, "step": 13820 }, { "epoch": 0.4235932328061787, "grad_norm": 1.4973380524389863, "learning_rate": 6.4587388316254055e-06, "loss": 0.68, "step": 13821 }, { "epoch": 0.4236238813289199, "grad_norm": 1.708987875767616, "learning_rate": 6.458264098764462e-06, "loss": 0.6495, "step": 13822 }, { "epoch": 0.42365452985166113, "grad_norm": 1.751921501278778, "learning_rate": 6.457789351534871e-06, "loss": 0.6003, "step": 13823 }, { "epoch": 0.42368517837440234, "grad_norm": 1.694155041407784, "learning_rate": 6.457314589941308e-06, "loss": 0.6959, "step": 13824 }, { "epoch": 0.42371582689714354, "grad_norm": 1.4685077378537867, "learning_rate": 6.456839813988451e-06, "loss": 0.5282, "step": 13825 }, { "epoch": 0.42374647541988475, "grad_norm": 1.7491453001435329, "learning_rate": 6.4563650236809785e-06, "loss": 0.564, "step": 13826 }, { "epoch": 0.42377712394262596, "grad_norm": 1.654780388253273, "learning_rate": 6.455890219023569e-06, "loss": 0.5969, "step": 13827 }, { "epoch": 0.42380777246536716, "grad_norm": 1.4746922687744404, "learning_rate": 6.455415400020901e-06, "loss": 0.5897, "step": 13828 }, { "epoch": 0.42383842098810837, "grad_norm": 1.6476716641722535, "learning_rate": 6.454940566677652e-06, "loss": 0.6841, "step": 13829 }, { "epoch": 0.42386906951084957, "grad_norm": 1.596587876429569, "learning_rate": 6.454465718998503e-06, "loss": 0.6431, "step": 13830 }, { "epoch": 0.4238997180335908, "grad_norm": 1.6980327097277295, "learning_rate": 6.45399085698813e-06, "loss": 0.7051, "step": 13831 }, { "epoch": 0.423930366556332, "grad_norm": 0.9635966626958912, "learning_rate": 6.453515980651213e-06, "loss": 0.459, "step": 13832 }, { "epoch": 0.4239610150790732, "grad_norm": 1.6537523995241796, "learning_rate": 6.453041089992431e-06, "loss": 0.6533, "step": 13833 }, { "epoch": 0.4239916636018144, "grad_norm": 1.586978616546987, "learning_rate": 6.452566185016464e-06, "loss": 0.6545, "step": 13834 }, { "epoch": 0.4240223121245556, "grad_norm": 1.6877363904444926, "learning_rate": 6.452091265727991e-06, "loss": 0.6154, "step": 13835 }, { "epoch": 0.4240529606472968, "grad_norm": 1.7313148851785998, "learning_rate": 6.4516163321316905e-06, "loss": 0.6202, "step": 13836 }, { "epoch": 0.424083609170038, "grad_norm": 0.8439630081399626, "learning_rate": 6.451141384232242e-06, "loss": 0.4711, "step": 13837 }, { "epoch": 0.4241142576927792, "grad_norm": 2.5749014331327262, "learning_rate": 6.450666422034327e-06, "loss": 0.5988, "step": 13838 }, { "epoch": 0.4241449062155204, "grad_norm": 1.4144039333894962, "learning_rate": 6.450191445542625e-06, "loss": 0.5157, "step": 13839 }, { "epoch": 0.42417555473826163, "grad_norm": 1.7191430451344853, "learning_rate": 6.449716454761816e-06, "loss": 0.6801, "step": 13840 }, { "epoch": 0.42420620326100283, "grad_norm": 1.698060176319871, "learning_rate": 6.449241449696579e-06, "loss": 0.6897, "step": 13841 }, { "epoch": 0.42423685178374404, "grad_norm": 1.6278657177389333, "learning_rate": 6.448766430351595e-06, "loss": 0.6408, "step": 13842 }, { "epoch": 0.42426750030648525, "grad_norm": 0.8900557826603221, "learning_rate": 6.448291396731545e-06, "loss": 0.4576, "step": 13843 }, { "epoch": 0.42429814882922645, "grad_norm": 1.7947505235001278, "learning_rate": 6.4478163488411096e-06, "loss": 0.6682, "step": 13844 }, { "epoch": 0.42432879735196766, "grad_norm": 1.7072956257824787, "learning_rate": 6.447341286684969e-06, "loss": 0.5943, "step": 13845 }, { "epoch": 0.42435944587470886, "grad_norm": 1.6516315381547042, "learning_rate": 6.446866210267804e-06, "loss": 0.6364, "step": 13846 }, { "epoch": 0.42439009439745007, "grad_norm": 1.4255296348489894, "learning_rate": 6.446391119594297e-06, "loss": 0.5501, "step": 13847 }, { "epoch": 0.4244207429201913, "grad_norm": 1.801579930172086, "learning_rate": 6.445916014669127e-06, "loss": 0.6888, "step": 13848 }, { "epoch": 0.4244513914429324, "grad_norm": 1.649275746189209, "learning_rate": 6.445440895496977e-06, "loss": 0.5741, "step": 13849 }, { "epoch": 0.42448203996567363, "grad_norm": 1.6231187420703626, "learning_rate": 6.4449657620825275e-06, "loss": 0.6758, "step": 13850 }, { "epoch": 0.42451268848841484, "grad_norm": 1.6162916036489323, "learning_rate": 6.444490614430463e-06, "loss": 0.6091, "step": 13851 }, { "epoch": 0.42454333701115604, "grad_norm": 1.6681743263292859, "learning_rate": 6.44401545254546e-06, "loss": 0.6528, "step": 13852 }, { "epoch": 0.42457398553389725, "grad_norm": 1.7222502185587978, "learning_rate": 6.4435402764322056e-06, "loss": 0.6408, "step": 13853 }, { "epoch": 0.42460463405663845, "grad_norm": 1.5731803407131455, "learning_rate": 6.443065086095379e-06, "loss": 0.6817, "step": 13854 }, { "epoch": 0.42463528257937966, "grad_norm": 1.7279103697235143, "learning_rate": 6.442589881539662e-06, "loss": 0.8022, "step": 13855 }, { "epoch": 0.42466593110212086, "grad_norm": 1.781455699836348, "learning_rate": 6.4421146627697375e-06, "loss": 0.6964, "step": 13856 }, { "epoch": 0.42469657962486207, "grad_norm": 0.8317934595291959, "learning_rate": 6.4416394297902894e-06, "loss": 0.4549, "step": 13857 }, { "epoch": 0.4247272281476033, "grad_norm": 1.7704372261887604, "learning_rate": 6.441164182605999e-06, "loss": 0.6543, "step": 13858 }, { "epoch": 0.4247578766703445, "grad_norm": 1.7505864127246589, "learning_rate": 6.440688921221547e-06, "loss": 0.6001, "step": 13859 }, { "epoch": 0.4247885251930857, "grad_norm": 1.6897181577409834, "learning_rate": 6.440213645641621e-06, "loss": 0.6752, "step": 13860 }, { "epoch": 0.4248191737158269, "grad_norm": 1.4587347701080806, "learning_rate": 6.4397383558709005e-06, "loss": 0.6027, "step": 13861 }, { "epoch": 0.4248498222385681, "grad_norm": 1.5932428161700138, "learning_rate": 6.439263051914071e-06, "loss": 0.6753, "step": 13862 }, { "epoch": 0.4248804707613093, "grad_norm": 0.8095450016435826, "learning_rate": 6.438787733775812e-06, "loss": 0.4481, "step": 13863 }, { "epoch": 0.4249111192840505, "grad_norm": 1.6250912076854063, "learning_rate": 6.438312401460812e-06, "loss": 0.5714, "step": 13864 }, { "epoch": 0.4249417678067917, "grad_norm": 1.9378566630793999, "learning_rate": 6.437837054973748e-06, "loss": 0.6309, "step": 13865 }, { "epoch": 0.4249724163295329, "grad_norm": 1.6256426893725129, "learning_rate": 6.437361694319312e-06, "loss": 0.5925, "step": 13866 }, { "epoch": 0.4250030648522741, "grad_norm": 0.7552544069099429, "learning_rate": 6.436886319502181e-06, "loss": 0.4407, "step": 13867 }, { "epoch": 0.42503371337501533, "grad_norm": 1.9319084298733493, "learning_rate": 6.436410930527042e-06, "loss": 0.7955, "step": 13868 }, { "epoch": 0.42506436189775654, "grad_norm": 1.7717268652717408, "learning_rate": 6.435935527398578e-06, "loss": 0.6726, "step": 13869 }, { "epoch": 0.42509501042049774, "grad_norm": 1.6433765559483853, "learning_rate": 6.435460110121474e-06, "loss": 0.7622, "step": 13870 }, { "epoch": 0.42512565894323895, "grad_norm": 1.675889095382075, "learning_rate": 6.434984678700416e-06, "loss": 0.6794, "step": 13871 }, { "epoch": 0.42515630746598015, "grad_norm": 0.8562266113328677, "learning_rate": 6.434509233140084e-06, "loss": 0.4557, "step": 13872 }, { "epoch": 0.42518695598872136, "grad_norm": 1.7955264169702654, "learning_rate": 6.434033773445168e-06, "loss": 0.6206, "step": 13873 }, { "epoch": 0.42521760451146257, "grad_norm": 1.7684822696697466, "learning_rate": 6.4335582996203484e-06, "loss": 0.7241, "step": 13874 }, { "epoch": 0.42524825303420377, "grad_norm": 1.7464361459686968, "learning_rate": 6.433082811670314e-06, "loss": 0.7348, "step": 13875 }, { "epoch": 0.425278901556945, "grad_norm": 1.7544841755490626, "learning_rate": 6.432607309599745e-06, "loss": 0.6765, "step": 13876 }, { "epoch": 0.4253095500796862, "grad_norm": 1.617292012242652, "learning_rate": 6.432131793413333e-06, "loss": 0.6431, "step": 13877 }, { "epoch": 0.4253401986024274, "grad_norm": 1.5675000739336602, "learning_rate": 6.431656263115757e-06, "loss": 0.6194, "step": 13878 }, { "epoch": 0.4253708471251686, "grad_norm": 1.764200972927779, "learning_rate": 6.4311807187117085e-06, "loss": 0.6533, "step": 13879 }, { "epoch": 0.42540149564790974, "grad_norm": 1.6780455914664194, "learning_rate": 6.430705160205868e-06, "loss": 0.7074, "step": 13880 }, { "epoch": 0.42543214417065095, "grad_norm": 1.7110029657978494, "learning_rate": 6.4302295876029245e-06, "loss": 0.6286, "step": 13881 }, { "epoch": 0.42546279269339216, "grad_norm": 0.7805337670795213, "learning_rate": 6.4297540009075634e-06, "loss": 0.4521, "step": 13882 }, { "epoch": 0.42549344121613336, "grad_norm": 1.7624048856072876, "learning_rate": 6.429278400124469e-06, "loss": 0.6453, "step": 13883 }, { "epoch": 0.42552408973887457, "grad_norm": 1.648936528583531, "learning_rate": 6.428802785258329e-06, "loss": 0.6649, "step": 13884 }, { "epoch": 0.4255547382616158, "grad_norm": 1.4840888251790512, "learning_rate": 6.4283271563138305e-06, "loss": 0.6736, "step": 13885 }, { "epoch": 0.425585386784357, "grad_norm": 1.6520301039190217, "learning_rate": 6.427851513295659e-06, "loss": 0.6884, "step": 13886 }, { "epoch": 0.4256160353070982, "grad_norm": 1.604610396581829, "learning_rate": 6.4273758562085e-06, "loss": 0.6963, "step": 13887 }, { "epoch": 0.4256466838298394, "grad_norm": 0.7914516495419193, "learning_rate": 6.426900185057042e-06, "loss": 0.4554, "step": 13888 }, { "epoch": 0.4256773323525806, "grad_norm": 1.77447733527944, "learning_rate": 6.4264244998459725e-06, "loss": 0.6444, "step": 13889 }, { "epoch": 0.4257079808753218, "grad_norm": 0.7576635212295693, "learning_rate": 6.425948800579977e-06, "loss": 0.4405, "step": 13890 }, { "epoch": 0.425738629398063, "grad_norm": 1.766028270967824, "learning_rate": 6.4254730872637415e-06, "loss": 0.7019, "step": 13891 }, { "epoch": 0.4257692779208042, "grad_norm": 1.7591312909747268, "learning_rate": 6.424997359901957e-06, "loss": 0.5819, "step": 13892 }, { "epoch": 0.4257999264435454, "grad_norm": 1.5833452850239191, "learning_rate": 6.4245216184993085e-06, "loss": 0.6318, "step": 13893 }, { "epoch": 0.4258305749662866, "grad_norm": 0.8382021501464183, "learning_rate": 6.424045863060484e-06, "loss": 0.4744, "step": 13894 }, { "epoch": 0.42586122348902783, "grad_norm": 1.8209636556995128, "learning_rate": 6.42357009359017e-06, "loss": 0.6824, "step": 13895 }, { "epoch": 0.42589187201176903, "grad_norm": 1.8763895161135593, "learning_rate": 6.423094310093056e-06, "loss": 0.5974, "step": 13896 }, { "epoch": 0.42592252053451024, "grad_norm": 1.8556903830052367, "learning_rate": 6.4226185125738305e-06, "loss": 0.7105, "step": 13897 }, { "epoch": 0.42595316905725145, "grad_norm": 1.4998231587239863, "learning_rate": 6.422142701037179e-06, "loss": 0.65, "step": 13898 }, { "epoch": 0.42598381757999265, "grad_norm": 1.4456093864624955, "learning_rate": 6.4216668754877945e-06, "loss": 0.5642, "step": 13899 }, { "epoch": 0.42601446610273386, "grad_norm": 1.9426222521407357, "learning_rate": 6.42119103593036e-06, "loss": 0.7057, "step": 13900 }, { "epoch": 0.42604511462547506, "grad_norm": 0.7420995945098979, "learning_rate": 6.420715182369569e-06, "loss": 0.4361, "step": 13901 }, { "epoch": 0.42607576314821627, "grad_norm": 1.6616693887011385, "learning_rate": 6.420239314810106e-06, "loss": 0.7363, "step": 13902 }, { "epoch": 0.4261064116709575, "grad_norm": 1.650328293674452, "learning_rate": 6.419763433256663e-06, "loss": 0.6172, "step": 13903 }, { "epoch": 0.4261370601936987, "grad_norm": 1.4474367060951934, "learning_rate": 6.4192875377139265e-06, "loss": 0.5637, "step": 13904 }, { "epoch": 0.4261677087164399, "grad_norm": 1.5048547615288745, "learning_rate": 6.4188116281865875e-06, "loss": 0.598, "step": 13905 }, { "epoch": 0.4261983572391811, "grad_norm": 1.5139008004138184, "learning_rate": 6.418335704679332e-06, "loss": 0.6443, "step": 13906 }, { "epoch": 0.4262290057619223, "grad_norm": 1.8036814652585844, "learning_rate": 6.417859767196855e-06, "loss": 0.6938, "step": 13907 }, { "epoch": 0.4262596542846635, "grad_norm": 0.8085983765037149, "learning_rate": 6.4173838157438415e-06, "loss": 0.4479, "step": 13908 }, { "epoch": 0.4262903028074047, "grad_norm": 1.7377556228940856, "learning_rate": 6.4169078503249835e-06, "loss": 0.5491, "step": 13909 }, { "epoch": 0.4263209513301459, "grad_norm": 1.666256813599271, "learning_rate": 6.416431870944969e-06, "loss": 0.6092, "step": 13910 }, { "epoch": 0.42635159985288706, "grad_norm": 1.7834201029267087, "learning_rate": 6.41595587760849e-06, "loss": 0.7058, "step": 13911 }, { "epoch": 0.42638224837562827, "grad_norm": 0.8092144126082, "learning_rate": 6.415479870320233e-06, "loss": 0.4607, "step": 13912 }, { "epoch": 0.4264128968983695, "grad_norm": 1.901026857646267, "learning_rate": 6.415003849084893e-06, "loss": 0.6683, "step": 13913 }, { "epoch": 0.4264435454211107, "grad_norm": 1.7142584858221634, "learning_rate": 6.414527813907158e-06, "loss": 0.5606, "step": 13914 }, { "epoch": 0.4264741939438519, "grad_norm": 1.8092829806631665, "learning_rate": 6.414051764791717e-06, "loss": 0.6963, "step": 13915 }, { "epoch": 0.4265048424665931, "grad_norm": 1.6278850364470194, "learning_rate": 6.413575701743264e-06, "loss": 0.7041, "step": 13916 }, { "epoch": 0.4265354909893343, "grad_norm": 1.7250470021271644, "learning_rate": 6.413099624766487e-06, "loss": 0.6795, "step": 13917 }, { "epoch": 0.4265661395120755, "grad_norm": 1.6409726902046071, "learning_rate": 6.4126235338660784e-06, "loss": 0.6534, "step": 13918 }, { "epoch": 0.4265967880348167, "grad_norm": 1.7590593127783725, "learning_rate": 6.4121474290467266e-06, "loss": 0.6638, "step": 13919 }, { "epoch": 0.4266274365575579, "grad_norm": 1.7876778030299936, "learning_rate": 6.411671310313128e-06, "loss": 0.6986, "step": 13920 }, { "epoch": 0.4266580850802991, "grad_norm": 1.631389965204475, "learning_rate": 6.411195177669968e-06, "loss": 0.6043, "step": 13921 }, { "epoch": 0.4266887336030403, "grad_norm": 1.5335578904648672, "learning_rate": 6.410719031121943e-06, "loss": 0.5806, "step": 13922 }, { "epoch": 0.42671938212578153, "grad_norm": 1.711073649636695, "learning_rate": 6.410242870673739e-06, "loss": 0.5977, "step": 13923 }, { "epoch": 0.42675003064852274, "grad_norm": 1.8656930634084263, "learning_rate": 6.409766696330055e-06, "loss": 0.7096, "step": 13924 }, { "epoch": 0.42678067917126394, "grad_norm": 1.7870955118509966, "learning_rate": 6.409290508095578e-06, "loss": 0.6155, "step": 13925 }, { "epoch": 0.42681132769400515, "grad_norm": 1.4507391664115892, "learning_rate": 6.408814305974999e-06, "loss": 0.6044, "step": 13926 }, { "epoch": 0.42684197621674635, "grad_norm": 1.6270608016179762, "learning_rate": 6.408338089973015e-06, "loss": 0.6209, "step": 13927 }, { "epoch": 0.42687262473948756, "grad_norm": 2.034605773700067, "learning_rate": 6.407861860094314e-06, "loss": 0.6257, "step": 13928 }, { "epoch": 0.42690327326222877, "grad_norm": 1.7096451210200876, "learning_rate": 6.407385616343591e-06, "loss": 0.6574, "step": 13929 }, { "epoch": 0.42693392178496997, "grad_norm": 1.9615690467196516, "learning_rate": 6.406909358725536e-06, "loss": 0.6114, "step": 13930 }, { "epoch": 0.4269645703077112, "grad_norm": 1.738189556013694, "learning_rate": 6.4064330872448455e-06, "loss": 0.6865, "step": 13931 }, { "epoch": 0.4269952188304524, "grad_norm": 1.560698260564891, "learning_rate": 6.405956801906207e-06, "loss": 0.7087, "step": 13932 }, { "epoch": 0.4270258673531936, "grad_norm": 1.6605741634727507, "learning_rate": 6.405480502714319e-06, "loss": 0.6678, "step": 13933 }, { "epoch": 0.4270565158759348, "grad_norm": 1.4948298554846815, "learning_rate": 6.405004189673869e-06, "loss": 0.6298, "step": 13934 }, { "epoch": 0.427087164398676, "grad_norm": 1.7372923399293754, "learning_rate": 6.404527862789556e-06, "loss": 0.6874, "step": 13935 }, { "epoch": 0.4271178129214172, "grad_norm": 1.7351382240216393, "learning_rate": 6.404051522066068e-06, "loss": 0.64, "step": 13936 }, { "epoch": 0.4271484614441584, "grad_norm": 1.8001419946101904, "learning_rate": 6.403575167508104e-06, "loss": 0.6175, "step": 13937 }, { "epoch": 0.4271791099668996, "grad_norm": 1.7402404212457212, "learning_rate": 6.403098799120352e-06, "loss": 0.7149, "step": 13938 }, { "epoch": 0.4272097584896408, "grad_norm": 2.009710918781045, "learning_rate": 6.402622416907511e-06, "loss": 0.6634, "step": 13939 }, { "epoch": 0.42724040701238203, "grad_norm": 0.8767818620683908, "learning_rate": 6.4021460208742716e-06, "loss": 0.4486, "step": 13940 }, { "epoch": 0.42727105553512323, "grad_norm": 1.9045753686075273, "learning_rate": 6.401669611025327e-06, "loss": 0.6157, "step": 13941 }, { "epoch": 0.4273017040578644, "grad_norm": 2.100717322833554, "learning_rate": 6.401193187365375e-06, "loss": 0.7104, "step": 13942 }, { "epoch": 0.4273323525806056, "grad_norm": 1.8692713227729392, "learning_rate": 6.400716749899108e-06, "loss": 0.8061, "step": 13943 }, { "epoch": 0.4273630011033468, "grad_norm": 1.8903397170426157, "learning_rate": 6.4002402986312195e-06, "loss": 0.6508, "step": 13944 }, { "epoch": 0.427393649626088, "grad_norm": 2.279636605395078, "learning_rate": 6.3997638335664055e-06, "loss": 0.8133, "step": 13945 }, { "epoch": 0.4274242981488292, "grad_norm": 1.829747238455254, "learning_rate": 6.39928735470936e-06, "loss": 0.6676, "step": 13946 }, { "epoch": 0.4274549466715704, "grad_norm": 1.7835307957624282, "learning_rate": 6.39881086206478e-06, "loss": 0.6319, "step": 13947 }, { "epoch": 0.4274855951943116, "grad_norm": 1.8395356030816508, "learning_rate": 6.398334355637356e-06, "loss": 0.5886, "step": 13948 }, { "epoch": 0.4275162437170528, "grad_norm": 1.6742522684641699, "learning_rate": 6.397857835431787e-06, "loss": 0.6283, "step": 13949 }, { "epoch": 0.42754689223979403, "grad_norm": 1.7123935569325055, "learning_rate": 6.397381301452768e-06, "loss": 0.6246, "step": 13950 }, { "epoch": 0.42757754076253524, "grad_norm": 1.8005617789282502, "learning_rate": 6.396904753704993e-06, "loss": 0.7251, "step": 13951 }, { "epoch": 0.42760818928527644, "grad_norm": 1.726059279349296, "learning_rate": 6.396428192193156e-06, "loss": 0.7069, "step": 13952 }, { "epoch": 0.42763883780801765, "grad_norm": 1.744272400811341, "learning_rate": 6.395951616921957e-06, "loss": 0.6352, "step": 13953 }, { "epoch": 0.42766948633075885, "grad_norm": 1.6073332236700584, "learning_rate": 6.395475027896089e-06, "loss": 0.6676, "step": 13954 }, { "epoch": 0.42770013485350006, "grad_norm": 1.719909087509381, "learning_rate": 6.394998425120249e-06, "loss": 0.6933, "step": 13955 }, { "epoch": 0.42773078337624126, "grad_norm": 1.9803631411191411, "learning_rate": 6.394521808599131e-06, "loss": 0.626, "step": 13956 }, { "epoch": 0.42776143189898247, "grad_norm": 1.821698958040076, "learning_rate": 6.394045178337434e-06, "loss": 0.7246, "step": 13957 }, { "epoch": 0.4277920804217237, "grad_norm": 1.8426690553557148, "learning_rate": 6.393568534339854e-06, "loss": 0.6822, "step": 13958 }, { "epoch": 0.4278227289444649, "grad_norm": 1.568289400636583, "learning_rate": 6.393091876611086e-06, "loss": 0.5831, "step": 13959 }, { "epoch": 0.4278533774672061, "grad_norm": 0.9014304407324474, "learning_rate": 6.392615205155826e-06, "loss": 0.4629, "step": 13960 }, { "epoch": 0.4278840259899473, "grad_norm": 1.693226371742006, "learning_rate": 6.3921385199787735e-06, "loss": 0.656, "step": 13961 }, { "epoch": 0.4279146745126885, "grad_norm": 1.687882757739218, "learning_rate": 6.391661821084624e-06, "loss": 0.602, "step": 13962 }, { "epoch": 0.4279453230354297, "grad_norm": 1.8710296569401392, "learning_rate": 6.391185108478074e-06, "loss": 0.6483, "step": 13963 }, { "epoch": 0.4279759715581709, "grad_norm": 1.5750255525004888, "learning_rate": 6.39070838216382e-06, "loss": 0.6165, "step": 13964 }, { "epoch": 0.4280066200809121, "grad_norm": 1.8419405183309578, "learning_rate": 6.3902316421465626e-06, "loss": 0.6732, "step": 13965 }, { "epoch": 0.4280372686036533, "grad_norm": 1.7070893166455428, "learning_rate": 6.389754888430996e-06, "loss": 0.6356, "step": 13966 }, { "epoch": 0.4280679171263945, "grad_norm": 1.7383313199771626, "learning_rate": 6.389278121021818e-06, "loss": 0.6095, "step": 13967 }, { "epoch": 0.42809856564913573, "grad_norm": 1.736644649235304, "learning_rate": 6.388801339923729e-06, "loss": 0.6519, "step": 13968 }, { "epoch": 0.42812921417187694, "grad_norm": 1.5702588268365163, "learning_rate": 6.388324545141423e-06, "loss": 0.7169, "step": 13969 }, { "epoch": 0.42815986269461814, "grad_norm": 1.8874750712466901, "learning_rate": 6.387847736679603e-06, "loss": 0.7227, "step": 13970 }, { "epoch": 0.42819051121735935, "grad_norm": 1.6791276645505842, "learning_rate": 6.387370914542962e-06, "loss": 0.7401, "step": 13971 }, { "epoch": 0.42822115974010055, "grad_norm": 1.6544722723043794, "learning_rate": 6.386894078736201e-06, "loss": 0.6951, "step": 13972 }, { "epoch": 0.4282518082628417, "grad_norm": 1.8154043107316442, "learning_rate": 6.386417229264017e-06, "loss": 0.7395, "step": 13973 }, { "epoch": 0.4282824567855829, "grad_norm": 0.8111263860088643, "learning_rate": 6.385940366131112e-06, "loss": 0.4322, "step": 13974 }, { "epoch": 0.4283131053083241, "grad_norm": 2.0935528371134993, "learning_rate": 6.385463489342179e-06, "loss": 0.6937, "step": 13975 }, { "epoch": 0.4283437538310653, "grad_norm": 1.7537477632812628, "learning_rate": 6.384986598901921e-06, "loss": 0.7511, "step": 13976 }, { "epoch": 0.4283744023538065, "grad_norm": 1.8228029655634486, "learning_rate": 6.384509694815036e-06, "loss": 0.7335, "step": 13977 }, { "epoch": 0.42840505087654773, "grad_norm": 1.6829376322726917, "learning_rate": 6.384032777086222e-06, "loss": 0.5733, "step": 13978 }, { "epoch": 0.42843569939928894, "grad_norm": 2.0200778069040024, "learning_rate": 6.38355584572018e-06, "loss": 0.7062, "step": 13979 }, { "epoch": 0.42846634792203014, "grad_norm": 1.6488267289751009, "learning_rate": 6.383078900721607e-06, "loss": 0.6838, "step": 13980 }, { "epoch": 0.42849699644477135, "grad_norm": 1.7928969130267691, "learning_rate": 6.382601942095203e-06, "loss": 0.6811, "step": 13981 }, { "epoch": 0.42852764496751256, "grad_norm": 1.6989097528775992, "learning_rate": 6.38212496984567e-06, "loss": 0.632, "step": 13982 }, { "epoch": 0.42855829349025376, "grad_norm": 0.8664678241039241, "learning_rate": 6.381647983977706e-06, "loss": 0.4706, "step": 13983 }, { "epoch": 0.42858894201299497, "grad_norm": 1.659202846562776, "learning_rate": 6.381170984496009e-06, "loss": 0.6193, "step": 13984 }, { "epoch": 0.4286195905357362, "grad_norm": 1.4574898379703114, "learning_rate": 6.380693971405284e-06, "loss": 0.5909, "step": 13985 }, { "epoch": 0.4286502390584774, "grad_norm": 1.7459004704755123, "learning_rate": 6.380216944710224e-06, "loss": 0.7142, "step": 13986 }, { "epoch": 0.4286808875812186, "grad_norm": 1.7260848799388397, "learning_rate": 6.379739904415537e-06, "loss": 0.6849, "step": 13987 }, { "epoch": 0.4287115361039598, "grad_norm": 0.7844543918251954, "learning_rate": 6.379262850525918e-06, "loss": 0.4526, "step": 13988 }, { "epoch": 0.428742184626701, "grad_norm": 1.6783032628545567, "learning_rate": 6.3787857830460706e-06, "loss": 0.6034, "step": 13989 }, { "epoch": 0.4287728331494422, "grad_norm": 1.5244960024810237, "learning_rate": 6.378308701980692e-06, "loss": 0.663, "step": 13990 }, { "epoch": 0.4288034816721834, "grad_norm": 0.7802329602961116, "learning_rate": 6.377831607334487e-06, "loss": 0.4461, "step": 13991 }, { "epoch": 0.4288341301949246, "grad_norm": 1.8231231180576022, "learning_rate": 6.377354499112153e-06, "loss": 0.6423, "step": 13992 }, { "epoch": 0.4288647787176658, "grad_norm": 1.7469806098967777, "learning_rate": 6.376877377318393e-06, "loss": 0.7092, "step": 13993 }, { "epoch": 0.428895427240407, "grad_norm": 1.7437708367627556, "learning_rate": 6.3764002419579095e-06, "loss": 0.7157, "step": 13994 }, { "epoch": 0.42892607576314823, "grad_norm": 1.7462875268676352, "learning_rate": 6.3759230930354e-06, "loss": 0.6048, "step": 13995 }, { "epoch": 0.42895672428588943, "grad_norm": 1.5617239166414163, "learning_rate": 6.375445930555569e-06, "loss": 0.613, "step": 13996 }, { "epoch": 0.42898737280863064, "grad_norm": 1.6178844622766455, "learning_rate": 6.374968754523119e-06, "loss": 0.6289, "step": 13997 }, { "epoch": 0.42901802133137185, "grad_norm": 1.785821074761163, "learning_rate": 6.3744915649427485e-06, "loss": 0.674, "step": 13998 }, { "epoch": 0.42904866985411305, "grad_norm": 1.6653949611147252, "learning_rate": 6.374014361819161e-06, "loss": 0.6599, "step": 13999 }, { "epoch": 0.42907931837685426, "grad_norm": 1.5917302645695057, "learning_rate": 6.373537145157058e-06, "loss": 0.7449, "step": 14000 }, { "epoch": 0.42910996689959546, "grad_norm": 1.5490823062801045, "learning_rate": 6.373059914961144e-06, "loss": 0.6326, "step": 14001 }, { "epoch": 0.42914061542233667, "grad_norm": 1.644590912254796, "learning_rate": 6.372582671236118e-06, "loss": 0.7541, "step": 14002 }, { "epoch": 0.4291712639450779, "grad_norm": 0.8802614969444899, "learning_rate": 6.372105413986684e-06, "loss": 0.4721, "step": 14003 }, { "epoch": 0.429201912467819, "grad_norm": 1.760962094866583, "learning_rate": 6.371628143217543e-06, "loss": 0.7588, "step": 14004 }, { "epoch": 0.42923256099056023, "grad_norm": 1.5627549194051416, "learning_rate": 6.3711508589334e-06, "loss": 0.6594, "step": 14005 }, { "epoch": 0.42926320951330144, "grad_norm": 1.7422768813348712, "learning_rate": 6.370673561138958e-06, "loss": 0.7044, "step": 14006 }, { "epoch": 0.42929385803604264, "grad_norm": 1.6833854913932884, "learning_rate": 6.3701962498389165e-06, "loss": 0.6735, "step": 14007 }, { "epoch": 0.42932450655878385, "grad_norm": 1.8101625916054669, "learning_rate": 6.369718925037982e-06, "loss": 0.7448, "step": 14008 }, { "epoch": 0.42935515508152505, "grad_norm": 1.7892658739298333, "learning_rate": 6.369241586740856e-06, "loss": 0.6675, "step": 14009 }, { "epoch": 0.42938580360426626, "grad_norm": 1.9405915005583663, "learning_rate": 6.3687642349522425e-06, "loss": 0.6438, "step": 14010 }, { "epoch": 0.42941645212700746, "grad_norm": 1.5030657987146225, "learning_rate": 6.368286869676846e-06, "loss": 0.655, "step": 14011 }, { "epoch": 0.42944710064974867, "grad_norm": 1.8646528447718296, "learning_rate": 6.367809490919368e-06, "loss": 0.6487, "step": 14012 }, { "epoch": 0.4294777491724899, "grad_norm": 1.6253989103548652, "learning_rate": 6.367332098684512e-06, "loss": 0.619, "step": 14013 }, { "epoch": 0.4295083976952311, "grad_norm": 1.8593795302935292, "learning_rate": 6.366854692976983e-06, "loss": 0.7467, "step": 14014 }, { "epoch": 0.4295390462179723, "grad_norm": 1.6221993767886302, "learning_rate": 6.366377273801486e-06, "loss": 0.6912, "step": 14015 }, { "epoch": 0.4295696947407135, "grad_norm": 1.8617533008275282, "learning_rate": 6.365899841162725e-06, "loss": 0.71, "step": 14016 }, { "epoch": 0.4296003432634547, "grad_norm": 1.616324563778454, "learning_rate": 6.365422395065403e-06, "loss": 0.6111, "step": 14017 }, { "epoch": 0.4296309917861959, "grad_norm": 1.8657046696118083, "learning_rate": 6.3649449355142226e-06, "loss": 0.6325, "step": 14018 }, { "epoch": 0.4296616403089371, "grad_norm": 1.680948935094431, "learning_rate": 6.364467462513892e-06, "loss": 0.585, "step": 14019 }, { "epoch": 0.4296922888316783, "grad_norm": 1.6242465012784257, "learning_rate": 6.363989976069115e-06, "loss": 0.5911, "step": 14020 }, { "epoch": 0.4297229373544195, "grad_norm": 1.7805714257329626, "learning_rate": 6.363512476184595e-06, "loss": 0.5875, "step": 14021 }, { "epoch": 0.4297535858771607, "grad_norm": 1.6363141668216925, "learning_rate": 6.363034962865038e-06, "loss": 0.6274, "step": 14022 }, { "epoch": 0.42978423439990193, "grad_norm": 1.6677335054726705, "learning_rate": 6.362557436115149e-06, "loss": 0.6461, "step": 14023 }, { "epoch": 0.42981488292264314, "grad_norm": 1.7881200764712348, "learning_rate": 6.362079895939632e-06, "loss": 0.6658, "step": 14024 }, { "epoch": 0.42984553144538434, "grad_norm": 1.8443098301009795, "learning_rate": 6.361602342343194e-06, "loss": 0.8095, "step": 14025 }, { "epoch": 0.42987617996812555, "grad_norm": 2.1822838541750706, "learning_rate": 6.361124775330539e-06, "loss": 0.6662, "step": 14026 }, { "epoch": 0.42990682849086675, "grad_norm": 1.6109726187068938, "learning_rate": 6.360647194906373e-06, "loss": 0.6934, "step": 14027 }, { "epoch": 0.42993747701360796, "grad_norm": 0.8227439817493735, "learning_rate": 6.360169601075404e-06, "loss": 0.4527, "step": 14028 }, { "epoch": 0.42996812553634917, "grad_norm": 1.8179347831738295, "learning_rate": 6.359691993842335e-06, "loss": 0.66, "step": 14029 }, { "epoch": 0.42999877405909037, "grad_norm": 1.6995529722898604, "learning_rate": 6.359214373211873e-06, "loss": 0.6593, "step": 14030 }, { "epoch": 0.4300294225818316, "grad_norm": 1.4181689438732623, "learning_rate": 6.358736739188724e-06, "loss": 0.5588, "step": 14031 }, { "epoch": 0.4300600711045728, "grad_norm": 1.6092947581813628, "learning_rate": 6.3582590917775946e-06, "loss": 0.6775, "step": 14032 }, { "epoch": 0.430090719627314, "grad_norm": 1.8019041334043417, "learning_rate": 6.357781430983189e-06, "loss": 0.6127, "step": 14033 }, { "epoch": 0.4301213681500552, "grad_norm": 1.6581361810489303, "learning_rate": 6.357303756810218e-06, "loss": 0.7053, "step": 14034 }, { "epoch": 0.43015201667279634, "grad_norm": 1.4378880525172122, "learning_rate": 6.356826069263384e-06, "loss": 0.611, "step": 14035 }, { "epoch": 0.43018266519553755, "grad_norm": 1.7568476011880407, "learning_rate": 6.356348368347396e-06, "loss": 0.6783, "step": 14036 }, { "epoch": 0.43021331371827876, "grad_norm": 1.764353827811076, "learning_rate": 6.355870654066961e-06, "loss": 0.6435, "step": 14037 }, { "epoch": 0.43024396224101996, "grad_norm": 1.692612849794232, "learning_rate": 6.3553929264267845e-06, "loss": 0.6633, "step": 14038 }, { "epoch": 0.43027461076376117, "grad_norm": 1.6805183579092802, "learning_rate": 6.354915185431576e-06, "loss": 0.6641, "step": 14039 }, { "epoch": 0.4303052592865024, "grad_norm": 1.686095598141503, "learning_rate": 6.354437431086041e-06, "loss": 0.6986, "step": 14040 }, { "epoch": 0.4303359078092436, "grad_norm": 1.7252655665123247, "learning_rate": 6.353959663394887e-06, "loss": 0.654, "step": 14041 }, { "epoch": 0.4303665563319848, "grad_norm": 1.821982733110838, "learning_rate": 6.353481882362822e-06, "loss": 0.6159, "step": 14042 }, { "epoch": 0.430397204854726, "grad_norm": 0.7841632137065402, "learning_rate": 6.3530040879945565e-06, "loss": 0.453, "step": 14043 }, { "epoch": 0.4304278533774672, "grad_norm": 1.648695716255823, "learning_rate": 6.352526280294791e-06, "loss": 0.6311, "step": 14044 }, { "epoch": 0.4304585019002084, "grad_norm": 1.8430745706765277, "learning_rate": 6.352048459268241e-06, "loss": 0.6288, "step": 14045 }, { "epoch": 0.4304891504229496, "grad_norm": 2.046041976407356, "learning_rate": 6.35157062491961e-06, "loss": 0.709, "step": 14046 }, { "epoch": 0.4305197989456908, "grad_norm": 1.6310629259134242, "learning_rate": 6.351092777253609e-06, "loss": 0.5205, "step": 14047 }, { "epoch": 0.430550447468432, "grad_norm": 1.7495392285569493, "learning_rate": 6.350614916274945e-06, "loss": 0.633, "step": 14048 }, { "epoch": 0.4305810959911732, "grad_norm": 1.8644533331599524, "learning_rate": 6.350137041988327e-06, "loss": 0.6807, "step": 14049 }, { "epoch": 0.43061174451391443, "grad_norm": 1.6269114364781452, "learning_rate": 6.349659154398462e-06, "loss": 0.7111, "step": 14050 }, { "epoch": 0.43064239303665564, "grad_norm": 1.710865851793101, "learning_rate": 6.34918125351006e-06, "loss": 0.6944, "step": 14051 }, { "epoch": 0.43067304155939684, "grad_norm": 1.6003218114654345, "learning_rate": 6.348703339327832e-06, "loss": 0.627, "step": 14052 }, { "epoch": 0.43070369008213805, "grad_norm": 1.7349087824530698, "learning_rate": 6.348225411856482e-06, "loss": 0.6912, "step": 14053 }, { "epoch": 0.43073433860487925, "grad_norm": 1.7194668604773393, "learning_rate": 6.347747471100725e-06, "loss": 0.7021, "step": 14054 }, { "epoch": 0.43076498712762046, "grad_norm": 1.6559085007380003, "learning_rate": 6.347269517065265e-06, "loss": 0.6957, "step": 14055 }, { "epoch": 0.43079563565036166, "grad_norm": 1.728448094471294, "learning_rate": 6.346791549754816e-06, "loss": 0.6683, "step": 14056 }, { "epoch": 0.43082628417310287, "grad_norm": 1.7343322828197847, "learning_rate": 6.346313569174083e-06, "loss": 0.6507, "step": 14057 }, { "epoch": 0.4308569326958441, "grad_norm": 0.8020248176799155, "learning_rate": 6.34583557532778e-06, "loss": 0.456, "step": 14058 }, { "epoch": 0.4308875812185853, "grad_norm": 0.7568978651149393, "learning_rate": 6.345357568220613e-06, "loss": 0.4283, "step": 14059 }, { "epoch": 0.4309182297413265, "grad_norm": 1.7187674716056385, "learning_rate": 6.344879547857294e-06, "loss": 0.6294, "step": 14060 }, { "epoch": 0.4309488782640677, "grad_norm": 1.6792556646207804, "learning_rate": 6.3444015142425335e-06, "loss": 0.5584, "step": 14061 }, { "epoch": 0.4309795267868089, "grad_norm": 1.8733490647178863, "learning_rate": 6.34392346738104e-06, "loss": 0.6748, "step": 14062 }, { "epoch": 0.4310101753095501, "grad_norm": 1.6729415265318701, "learning_rate": 6.3434454072775255e-06, "loss": 0.6591, "step": 14063 }, { "epoch": 0.4310408238322913, "grad_norm": 1.8523996858984895, "learning_rate": 6.342967333936698e-06, "loss": 0.6586, "step": 14064 }, { "epoch": 0.4310714723550325, "grad_norm": 0.857121657321418, "learning_rate": 6.342489247363272e-06, "loss": 0.46, "step": 14065 }, { "epoch": 0.43110212087777366, "grad_norm": 0.8521970682955889, "learning_rate": 6.342011147561955e-06, "loss": 0.4447, "step": 14066 }, { "epoch": 0.43113276940051487, "grad_norm": 1.5931576827321614, "learning_rate": 6.341533034537459e-06, "loss": 0.6368, "step": 14067 }, { "epoch": 0.4311634179232561, "grad_norm": 1.6462055891153669, "learning_rate": 6.3410549082944935e-06, "loss": 0.7269, "step": 14068 }, { "epoch": 0.4311940664459973, "grad_norm": 1.6616233998154264, "learning_rate": 6.340576768837772e-06, "loss": 0.6746, "step": 14069 }, { "epoch": 0.4312247149687385, "grad_norm": 0.786599288327515, "learning_rate": 6.340098616172006e-06, "loss": 0.439, "step": 14070 }, { "epoch": 0.4312553634914797, "grad_norm": 1.6722130518611105, "learning_rate": 6.339620450301903e-06, "loss": 0.71, "step": 14071 }, { "epoch": 0.4312860120142209, "grad_norm": 1.893864089026168, "learning_rate": 6.339142271232177e-06, "loss": 0.6264, "step": 14072 }, { "epoch": 0.4313166605369621, "grad_norm": 1.7267887942086315, "learning_rate": 6.3386640789675415e-06, "loss": 0.6678, "step": 14073 }, { "epoch": 0.4313473090597033, "grad_norm": 1.6664004127636556, "learning_rate": 6.338185873512705e-06, "loss": 0.6678, "step": 14074 }, { "epoch": 0.4313779575824445, "grad_norm": 1.5826910729644668, "learning_rate": 6.337707654872382e-06, "loss": 0.5839, "step": 14075 }, { "epoch": 0.4314086061051857, "grad_norm": 1.6071197913806345, "learning_rate": 6.337229423051281e-06, "loss": 0.6005, "step": 14076 }, { "epoch": 0.4314392546279269, "grad_norm": 2.0197352936483854, "learning_rate": 6.336751178054118e-06, "loss": 0.7181, "step": 14077 }, { "epoch": 0.43146990315066813, "grad_norm": 1.7273769258575362, "learning_rate": 6.336272919885603e-06, "loss": 0.6405, "step": 14078 }, { "epoch": 0.43150055167340934, "grad_norm": 0.8162962804663377, "learning_rate": 6.335794648550448e-06, "loss": 0.4282, "step": 14079 }, { "epoch": 0.43153120019615054, "grad_norm": 1.5495985879847436, "learning_rate": 6.335316364053369e-06, "loss": 0.504, "step": 14080 }, { "epoch": 0.43156184871889175, "grad_norm": 1.8364018501378718, "learning_rate": 6.334838066399074e-06, "loss": 0.7098, "step": 14081 }, { "epoch": 0.43159249724163296, "grad_norm": 0.7692132353267312, "learning_rate": 6.33435975559228e-06, "loss": 0.4251, "step": 14082 }, { "epoch": 0.43162314576437416, "grad_norm": 1.8217487652699353, "learning_rate": 6.333881431637696e-06, "loss": 0.6568, "step": 14083 }, { "epoch": 0.43165379428711537, "grad_norm": 3.0530699115849758, "learning_rate": 6.333403094540038e-06, "loss": 0.6936, "step": 14084 }, { "epoch": 0.43168444280985657, "grad_norm": 1.7198824561531119, "learning_rate": 6.332924744304019e-06, "loss": 0.6937, "step": 14085 }, { "epoch": 0.4317150913325978, "grad_norm": 1.7466301838601195, "learning_rate": 6.332446380934349e-06, "loss": 0.7371, "step": 14086 }, { "epoch": 0.431745739855339, "grad_norm": 0.7662766893174582, "learning_rate": 6.331968004435746e-06, "loss": 0.4549, "step": 14087 }, { "epoch": 0.4317763883780802, "grad_norm": 1.533782009058267, "learning_rate": 6.3314896148129205e-06, "loss": 0.609, "step": 14088 }, { "epoch": 0.4318070369008214, "grad_norm": 1.7925551194486187, "learning_rate": 6.331011212070588e-06, "loss": 0.6819, "step": 14089 }, { "epoch": 0.4318376854235626, "grad_norm": 1.5155593237634095, "learning_rate": 6.33053279621346e-06, "loss": 0.6203, "step": 14090 }, { "epoch": 0.4318683339463038, "grad_norm": 1.8751355518443538, "learning_rate": 6.3300543672462536e-06, "loss": 0.6166, "step": 14091 }, { "epoch": 0.431898982469045, "grad_norm": 1.410875089642617, "learning_rate": 6.329575925173679e-06, "loss": 0.4223, "step": 14092 }, { "epoch": 0.4319296309917862, "grad_norm": 1.6997204894418063, "learning_rate": 6.329097470000456e-06, "loss": 0.6148, "step": 14093 }, { "epoch": 0.4319602795145274, "grad_norm": 1.8025379366353294, "learning_rate": 6.328619001731292e-06, "loss": 0.6568, "step": 14094 }, { "epoch": 0.43199092803726863, "grad_norm": 1.853933351860769, "learning_rate": 6.3281405203709065e-06, "loss": 0.6586, "step": 14095 }, { "epoch": 0.43202157656000983, "grad_norm": 1.8798971831496911, "learning_rate": 6.327662025924013e-06, "loss": 0.6648, "step": 14096 }, { "epoch": 0.432052225082751, "grad_norm": 1.8003588336358418, "learning_rate": 6.327183518395327e-06, "loss": 0.6425, "step": 14097 }, { "epoch": 0.4320828736054922, "grad_norm": 1.6597445255642542, "learning_rate": 6.32670499778956e-06, "loss": 0.5613, "step": 14098 }, { "epoch": 0.4321135221282334, "grad_norm": 1.7840418825946698, "learning_rate": 6.3262264641114305e-06, "loss": 0.6242, "step": 14099 }, { "epoch": 0.4321441706509746, "grad_norm": 1.7843029905844014, "learning_rate": 6.325747917365651e-06, "loss": 0.6537, "step": 14100 }, { "epoch": 0.4321748191737158, "grad_norm": 1.6777452146624627, "learning_rate": 6.32526935755694e-06, "loss": 0.6934, "step": 14101 }, { "epoch": 0.432205467696457, "grad_norm": 0.8755210336775381, "learning_rate": 6.3247907846900096e-06, "loss": 0.4608, "step": 14102 }, { "epoch": 0.4322361162191982, "grad_norm": 1.9882197605130907, "learning_rate": 6.324312198769576e-06, "loss": 0.6376, "step": 14103 }, { "epoch": 0.4322667647419394, "grad_norm": 0.8038337726756977, "learning_rate": 6.323833599800356e-06, "loss": 0.4554, "step": 14104 }, { "epoch": 0.43229741326468063, "grad_norm": 1.4159061416664522, "learning_rate": 6.323354987787066e-06, "loss": 0.5525, "step": 14105 }, { "epoch": 0.43232806178742184, "grad_norm": 1.6252380840213045, "learning_rate": 6.32287636273442e-06, "loss": 0.6139, "step": 14106 }, { "epoch": 0.43235871031016304, "grad_norm": 1.6973731784585417, "learning_rate": 6.322397724647134e-06, "loss": 0.6664, "step": 14107 }, { "epoch": 0.43238935883290425, "grad_norm": 1.642281942215868, "learning_rate": 6.3219190735299254e-06, "loss": 0.6505, "step": 14108 }, { "epoch": 0.43242000735564545, "grad_norm": 0.8408773835464276, "learning_rate": 6.3214404093875105e-06, "loss": 0.4429, "step": 14109 }, { "epoch": 0.43245065587838666, "grad_norm": 1.718131596332515, "learning_rate": 6.320961732224605e-06, "loss": 0.647, "step": 14110 }, { "epoch": 0.43248130440112786, "grad_norm": 1.81917531429082, "learning_rate": 6.320483042045924e-06, "loss": 0.599, "step": 14111 }, { "epoch": 0.43251195292386907, "grad_norm": 1.754954440081384, "learning_rate": 6.320004338856189e-06, "loss": 0.6707, "step": 14112 }, { "epoch": 0.4325426014466103, "grad_norm": 2.053309156500082, "learning_rate": 6.319525622660111e-06, "loss": 0.7862, "step": 14113 }, { "epoch": 0.4325732499693515, "grad_norm": 1.9585758995119773, "learning_rate": 6.31904689346241e-06, "loss": 0.6834, "step": 14114 }, { "epoch": 0.4326038984920927, "grad_norm": 1.4105939792392468, "learning_rate": 6.318568151267801e-06, "loss": 0.6162, "step": 14115 }, { "epoch": 0.4326345470148339, "grad_norm": 1.6300226040678134, "learning_rate": 6.318089396081004e-06, "loss": 0.6661, "step": 14116 }, { "epoch": 0.4326651955375751, "grad_norm": 1.6700833295596045, "learning_rate": 6.317610627906736e-06, "loss": 0.6395, "step": 14117 }, { "epoch": 0.4326958440603163, "grad_norm": 1.5655850907389972, "learning_rate": 6.317131846749711e-06, "loss": 0.5975, "step": 14118 }, { "epoch": 0.4327264925830575, "grad_norm": 1.780892794279201, "learning_rate": 6.316653052614651e-06, "loss": 0.6272, "step": 14119 }, { "epoch": 0.4327571411057987, "grad_norm": 1.7298193831474582, "learning_rate": 6.316174245506271e-06, "loss": 0.6929, "step": 14120 }, { "epoch": 0.4327877896285399, "grad_norm": 1.9107759878934023, "learning_rate": 6.315695425429289e-06, "loss": 0.5277, "step": 14121 }, { "epoch": 0.4328184381512811, "grad_norm": 1.7133660501824162, "learning_rate": 6.315216592388423e-06, "loss": 0.6042, "step": 14122 }, { "epoch": 0.43284908667402233, "grad_norm": 1.5398388395088238, "learning_rate": 6.314737746388393e-06, "loss": 0.6384, "step": 14123 }, { "epoch": 0.43287973519676354, "grad_norm": 0.8188099571207864, "learning_rate": 6.314258887433915e-06, "loss": 0.4566, "step": 14124 }, { "epoch": 0.43291038371950474, "grad_norm": 0.7759885418476685, "learning_rate": 6.313780015529707e-06, "loss": 0.4367, "step": 14125 }, { "epoch": 0.43294103224224595, "grad_norm": 1.885899646881898, "learning_rate": 6.313301130680488e-06, "loss": 0.6608, "step": 14126 }, { "epoch": 0.43297168076498715, "grad_norm": 1.7027669520940678, "learning_rate": 6.312822232890978e-06, "loss": 0.6262, "step": 14127 }, { "epoch": 0.4330023292877283, "grad_norm": 1.6796275948935846, "learning_rate": 6.312343322165895e-06, "loss": 0.7063, "step": 14128 }, { "epoch": 0.4330329778104695, "grad_norm": 1.7780688177786776, "learning_rate": 6.311864398509957e-06, "loss": 0.6975, "step": 14129 }, { "epoch": 0.4330636263332107, "grad_norm": 1.6623043462509168, "learning_rate": 6.311385461927882e-06, "loss": 0.6756, "step": 14130 }, { "epoch": 0.4330942748559519, "grad_norm": 1.6240936643305717, "learning_rate": 6.310906512424393e-06, "loss": 0.6319, "step": 14131 }, { "epoch": 0.4331249233786931, "grad_norm": 1.769316282490766, "learning_rate": 6.3104275500042055e-06, "loss": 0.677, "step": 14132 }, { "epoch": 0.43315557190143433, "grad_norm": 1.8462292809173815, "learning_rate": 6.30994857467204e-06, "loss": 0.6752, "step": 14133 }, { "epoch": 0.43318622042417554, "grad_norm": 1.6450068586060034, "learning_rate": 6.309469586432616e-06, "loss": 0.6949, "step": 14134 }, { "epoch": 0.43321686894691674, "grad_norm": 2.0217477567493405, "learning_rate": 6.308990585290653e-06, "loss": 0.5958, "step": 14135 }, { "epoch": 0.43324751746965795, "grad_norm": 1.7254179436408827, "learning_rate": 6.308511571250871e-06, "loss": 0.68, "step": 14136 }, { "epoch": 0.43327816599239916, "grad_norm": 1.6537546481465832, "learning_rate": 6.3080325443179905e-06, "loss": 0.6677, "step": 14137 }, { "epoch": 0.43330881451514036, "grad_norm": 1.6444216963780223, "learning_rate": 6.307553504496729e-06, "loss": 0.6864, "step": 14138 }, { "epoch": 0.43333946303788157, "grad_norm": 1.0471265728156636, "learning_rate": 6.3070744517918105e-06, "loss": 0.4306, "step": 14139 }, { "epoch": 0.4333701115606228, "grad_norm": 3.6585461561472448, "learning_rate": 6.306595386207952e-06, "loss": 0.6449, "step": 14140 }, { "epoch": 0.433400760083364, "grad_norm": 0.850995389727527, "learning_rate": 6.306116307749874e-06, "loss": 0.4344, "step": 14141 }, { "epoch": 0.4334314086061052, "grad_norm": 1.7650028469728936, "learning_rate": 6.305637216422298e-06, "loss": 0.7142, "step": 14142 }, { "epoch": 0.4334620571288464, "grad_norm": 1.9246712894134546, "learning_rate": 6.305158112229946e-06, "loss": 0.663, "step": 14143 }, { "epoch": 0.4334927056515876, "grad_norm": 1.6280418185066647, "learning_rate": 6.304678995177535e-06, "loss": 0.7019, "step": 14144 }, { "epoch": 0.4335233541743288, "grad_norm": 1.6589974959373857, "learning_rate": 6.304199865269789e-06, "loss": 0.7258, "step": 14145 }, { "epoch": 0.43355400269707, "grad_norm": 1.6329841533683698, "learning_rate": 6.303720722511428e-06, "loss": 0.6478, "step": 14146 }, { "epoch": 0.4335846512198112, "grad_norm": 1.8602551773427831, "learning_rate": 6.303241566907173e-06, "loss": 0.6134, "step": 14147 }, { "epoch": 0.4336152997425524, "grad_norm": 1.8558641851844047, "learning_rate": 6.302762398461746e-06, "loss": 0.7173, "step": 14148 }, { "epoch": 0.4336459482652936, "grad_norm": 1.5069491564132427, "learning_rate": 6.302283217179868e-06, "loss": 0.5949, "step": 14149 }, { "epoch": 0.43367659678803483, "grad_norm": 1.0795822029770086, "learning_rate": 6.301804023066258e-06, "loss": 0.4594, "step": 14150 }, { "epoch": 0.43370724531077604, "grad_norm": 1.62584716372244, "learning_rate": 6.3013248161256425e-06, "loss": 0.6198, "step": 14151 }, { "epoch": 0.43373789383351724, "grad_norm": 1.6736154592255235, "learning_rate": 6.300845596362739e-06, "loss": 0.6705, "step": 14152 }, { "epoch": 0.43376854235625845, "grad_norm": 1.8277364648436467, "learning_rate": 6.300366363782272e-06, "loss": 0.7483, "step": 14153 }, { "epoch": 0.43379919087899965, "grad_norm": 1.539433826472309, "learning_rate": 6.299887118388962e-06, "loss": 0.6161, "step": 14154 }, { "epoch": 0.43382983940174086, "grad_norm": 1.6755787421445583, "learning_rate": 6.2994078601875334e-06, "loss": 0.6639, "step": 14155 }, { "epoch": 0.43386048792448206, "grad_norm": 1.591109695326633, "learning_rate": 6.298928589182704e-06, "loss": 0.6795, "step": 14156 }, { "epoch": 0.43389113644722327, "grad_norm": 1.7594944998931885, "learning_rate": 6.2984493053792e-06, "loss": 0.588, "step": 14157 }, { "epoch": 0.4339217849699645, "grad_norm": 0.7451339248578059, "learning_rate": 6.297970008781742e-06, "loss": 0.4302, "step": 14158 }, { "epoch": 0.4339524334927056, "grad_norm": 1.6947131034473997, "learning_rate": 6.297490699395055e-06, "loss": 0.688, "step": 14159 }, { "epoch": 0.43398308201544683, "grad_norm": 1.746781308933776, "learning_rate": 6.297011377223859e-06, "loss": 0.7082, "step": 14160 }, { "epoch": 0.43401373053818804, "grad_norm": 1.9610454422442918, "learning_rate": 6.296532042272878e-06, "loss": 0.6137, "step": 14161 }, { "epoch": 0.43404437906092924, "grad_norm": 0.7990196708553629, "learning_rate": 6.296052694546837e-06, "loss": 0.4506, "step": 14162 }, { "epoch": 0.43407502758367045, "grad_norm": 1.7544954625164155, "learning_rate": 6.295573334050455e-06, "loss": 0.6522, "step": 14163 }, { "epoch": 0.43410567610641165, "grad_norm": 1.708188575499701, "learning_rate": 6.2950939607884574e-06, "loss": 0.7455, "step": 14164 }, { "epoch": 0.43413632462915286, "grad_norm": 0.7979117376486754, "learning_rate": 6.294614574765567e-06, "loss": 0.457, "step": 14165 }, { "epoch": 0.43416697315189406, "grad_norm": 1.7807945658745985, "learning_rate": 6.294135175986511e-06, "loss": 0.6999, "step": 14166 }, { "epoch": 0.43419762167463527, "grad_norm": 1.8165248520501718, "learning_rate": 6.293655764456008e-06, "loss": 0.6713, "step": 14167 }, { "epoch": 0.4342282701973765, "grad_norm": 1.545139198781758, "learning_rate": 6.2931763401787835e-06, "loss": 0.6429, "step": 14168 }, { "epoch": 0.4342589187201177, "grad_norm": 1.6055262801880612, "learning_rate": 6.292696903159562e-06, "loss": 0.6379, "step": 14169 }, { "epoch": 0.4342895672428589, "grad_norm": 1.970896175535036, "learning_rate": 6.292217453403068e-06, "loss": 0.6741, "step": 14170 }, { "epoch": 0.4343202157656001, "grad_norm": 1.6978367909214858, "learning_rate": 6.291737990914024e-06, "loss": 0.618, "step": 14171 }, { "epoch": 0.4343508642883413, "grad_norm": 1.7476415157436764, "learning_rate": 6.291258515697155e-06, "loss": 0.6074, "step": 14172 }, { "epoch": 0.4343815128110825, "grad_norm": 1.7137470523742369, "learning_rate": 6.290779027757186e-06, "loss": 0.7298, "step": 14173 }, { "epoch": 0.4344121613338237, "grad_norm": 1.886100293445284, "learning_rate": 6.29029952709884e-06, "loss": 0.6893, "step": 14174 }, { "epoch": 0.4344428098565649, "grad_norm": 1.6653361169055592, "learning_rate": 6.289820013726844e-06, "loss": 0.6468, "step": 14175 }, { "epoch": 0.4344734583793061, "grad_norm": 1.6881034810397428, "learning_rate": 6.2893404876459195e-06, "loss": 0.5904, "step": 14176 }, { "epoch": 0.4345041069020473, "grad_norm": 1.8330623799580312, "learning_rate": 6.288860948860794e-06, "loss": 0.5854, "step": 14177 }, { "epoch": 0.43453475542478853, "grad_norm": 0.814425397510059, "learning_rate": 6.288381397376193e-06, "loss": 0.4362, "step": 14178 }, { "epoch": 0.43456540394752974, "grad_norm": 1.4288016636710095, "learning_rate": 6.28790183319684e-06, "loss": 0.5739, "step": 14179 }, { "epoch": 0.43459605247027094, "grad_norm": 1.6029945227732791, "learning_rate": 6.28742225632746e-06, "loss": 0.607, "step": 14180 }, { "epoch": 0.43462670099301215, "grad_norm": 1.6905667686144996, "learning_rate": 6.28694266677278e-06, "loss": 0.7723, "step": 14181 }, { "epoch": 0.43465734951575336, "grad_norm": 1.6838694336722089, "learning_rate": 6.286463064537524e-06, "loss": 0.6959, "step": 14182 }, { "epoch": 0.43468799803849456, "grad_norm": 1.679424817449818, "learning_rate": 6.285983449626418e-06, "loss": 0.6375, "step": 14183 }, { "epoch": 0.43471864656123577, "grad_norm": 1.6138708598995377, "learning_rate": 6.285503822044188e-06, "loss": 0.6943, "step": 14184 }, { "epoch": 0.43474929508397697, "grad_norm": 1.6863467365873304, "learning_rate": 6.285024181795561e-06, "loss": 0.626, "step": 14185 }, { "epoch": 0.4347799436067182, "grad_norm": 1.8992929584259053, "learning_rate": 6.2845445288852615e-06, "loss": 0.7007, "step": 14186 }, { "epoch": 0.4348105921294594, "grad_norm": 1.8572958112985347, "learning_rate": 6.284064863318016e-06, "loss": 0.6751, "step": 14187 }, { "epoch": 0.4348412406522006, "grad_norm": 1.6214897417927272, "learning_rate": 6.283585185098551e-06, "loss": 0.6624, "step": 14188 }, { "epoch": 0.4348718891749418, "grad_norm": 1.7776837669255765, "learning_rate": 6.283105494231591e-06, "loss": 0.7708, "step": 14189 }, { "epoch": 0.43490253769768294, "grad_norm": 1.5778544066144595, "learning_rate": 6.282625790721867e-06, "loss": 0.6865, "step": 14190 }, { "epoch": 0.43493318622042415, "grad_norm": 1.580621620786881, "learning_rate": 6.2821460745741e-06, "loss": 0.6306, "step": 14191 }, { "epoch": 0.43496383474316536, "grad_norm": 1.7211368701938201, "learning_rate": 6.2816663457930225e-06, "loss": 0.5802, "step": 14192 }, { "epoch": 0.43499448326590656, "grad_norm": 1.7421245408753294, "learning_rate": 6.281186604383358e-06, "loss": 0.6039, "step": 14193 }, { "epoch": 0.43502513178864777, "grad_norm": 1.5450317270428144, "learning_rate": 6.280706850349834e-06, "loss": 0.5892, "step": 14194 }, { "epoch": 0.435055780311389, "grad_norm": 1.707586477616342, "learning_rate": 6.2802270836971756e-06, "loss": 0.5949, "step": 14195 }, { "epoch": 0.4350864288341302, "grad_norm": 1.7492175221774429, "learning_rate": 6.279747304430115e-06, "loss": 0.6664, "step": 14196 }, { "epoch": 0.4351170773568714, "grad_norm": 0.8171116925954721, "learning_rate": 6.279267512553375e-06, "loss": 0.4617, "step": 14197 }, { "epoch": 0.4351477258796126, "grad_norm": 1.4718102491963299, "learning_rate": 6.278787708071687e-06, "loss": 0.6567, "step": 14198 }, { "epoch": 0.4351783744023538, "grad_norm": 0.7972227136189469, "learning_rate": 6.278307890989773e-06, "loss": 0.4431, "step": 14199 }, { "epoch": 0.435209022925095, "grad_norm": 1.5442165225766031, "learning_rate": 6.277828061312367e-06, "loss": 0.6548, "step": 14200 }, { "epoch": 0.4352396714478362, "grad_norm": 1.749535181382336, "learning_rate": 6.277348219044194e-06, "loss": 0.6985, "step": 14201 }, { "epoch": 0.4352703199705774, "grad_norm": 1.6828011170782158, "learning_rate": 6.276868364189981e-06, "loss": 0.6535, "step": 14202 }, { "epoch": 0.4353009684933186, "grad_norm": 1.496143025398213, "learning_rate": 6.276388496754458e-06, "loss": 0.6212, "step": 14203 }, { "epoch": 0.4353316170160598, "grad_norm": 1.6689080382276889, "learning_rate": 6.275908616742351e-06, "loss": 0.7231, "step": 14204 }, { "epoch": 0.43536226553880103, "grad_norm": 1.608620864747018, "learning_rate": 6.275428724158393e-06, "loss": 0.5778, "step": 14205 }, { "epoch": 0.43539291406154224, "grad_norm": 1.6629570434478735, "learning_rate": 6.274948819007307e-06, "loss": 0.6371, "step": 14206 }, { "epoch": 0.43542356258428344, "grad_norm": 1.8164861457200954, "learning_rate": 6.274468901293825e-06, "loss": 0.7198, "step": 14207 }, { "epoch": 0.43545421110702465, "grad_norm": 0.8352268203947424, "learning_rate": 6.2739889710226745e-06, "loss": 0.4747, "step": 14208 }, { "epoch": 0.43548485962976585, "grad_norm": 2.0174118259483635, "learning_rate": 6.2735090281985855e-06, "loss": 0.7103, "step": 14209 }, { "epoch": 0.43551550815250706, "grad_norm": 1.5360072145479453, "learning_rate": 6.273029072826285e-06, "loss": 0.6848, "step": 14210 }, { "epoch": 0.43554615667524826, "grad_norm": 1.5675395900670777, "learning_rate": 6.272549104910504e-06, "loss": 0.5956, "step": 14211 }, { "epoch": 0.43557680519798947, "grad_norm": 0.7725829823251497, "learning_rate": 6.272069124455973e-06, "loss": 0.4659, "step": 14212 }, { "epoch": 0.4356074537207307, "grad_norm": 1.4875658909839395, "learning_rate": 6.271589131467416e-06, "loss": 0.5802, "step": 14213 }, { "epoch": 0.4356381022434719, "grad_norm": 1.716649991924082, "learning_rate": 6.271109125949568e-06, "loss": 0.6844, "step": 14214 }, { "epoch": 0.4356687507662131, "grad_norm": 1.594385317952467, "learning_rate": 6.270629107907155e-06, "loss": 0.5996, "step": 14215 }, { "epoch": 0.4356993992889543, "grad_norm": 0.7937585569500697, "learning_rate": 6.2701490773449105e-06, "loss": 0.4414, "step": 14216 }, { "epoch": 0.4357300478116955, "grad_norm": 1.9285156137873125, "learning_rate": 6.26966903426756e-06, "loss": 0.7041, "step": 14217 }, { "epoch": 0.4357606963344367, "grad_norm": 1.6376343165815985, "learning_rate": 6.269188978679837e-06, "loss": 0.6859, "step": 14218 }, { "epoch": 0.4357913448571779, "grad_norm": 1.762760907681, "learning_rate": 6.26870891058647e-06, "loss": 0.6134, "step": 14219 }, { "epoch": 0.4358219933799191, "grad_norm": 1.522391486237992, "learning_rate": 6.26822882999219e-06, "loss": 0.5167, "step": 14220 }, { "epoch": 0.43585264190266026, "grad_norm": 1.7556048203712324, "learning_rate": 6.267748736901726e-06, "loss": 0.6425, "step": 14221 }, { "epoch": 0.43588329042540147, "grad_norm": 1.8822428792306383, "learning_rate": 6.2672686313198095e-06, "loss": 0.7477, "step": 14222 }, { "epoch": 0.4359139389481427, "grad_norm": 0.7509895093601561, "learning_rate": 6.26678851325117e-06, "loss": 0.4469, "step": 14223 }, { "epoch": 0.4359445874708839, "grad_norm": 1.7085518695927555, "learning_rate": 6.266308382700541e-06, "loss": 0.674, "step": 14224 }, { "epoch": 0.4359752359936251, "grad_norm": 0.7879114698036147, "learning_rate": 6.26582823967265e-06, "loss": 0.4466, "step": 14225 }, { "epoch": 0.4360058845163663, "grad_norm": 1.8929471177242565, "learning_rate": 6.26534808417223e-06, "loss": 0.6891, "step": 14226 }, { "epoch": 0.4360365330391075, "grad_norm": 1.5089166793230142, "learning_rate": 6.264867916204011e-06, "loss": 0.5713, "step": 14227 }, { "epoch": 0.4360671815618487, "grad_norm": 1.7768470815664617, "learning_rate": 6.264387735772727e-06, "loss": 0.6087, "step": 14228 }, { "epoch": 0.4360978300845899, "grad_norm": 1.547864269159272, "learning_rate": 6.2639075428831054e-06, "loss": 0.6209, "step": 14229 }, { "epoch": 0.4361284786073311, "grad_norm": 1.8621579028284396, "learning_rate": 6.263427337539878e-06, "loss": 0.6961, "step": 14230 }, { "epoch": 0.4361591271300723, "grad_norm": 1.678773195751565, "learning_rate": 6.2629471197477795e-06, "loss": 0.6184, "step": 14231 }, { "epoch": 0.4361897756528135, "grad_norm": 1.7814466725881084, "learning_rate": 6.26246688951154e-06, "loss": 0.6366, "step": 14232 }, { "epoch": 0.43622042417555473, "grad_norm": 1.965367633381306, "learning_rate": 6.261986646835892e-06, "loss": 0.6099, "step": 14233 }, { "epoch": 0.43625107269829594, "grad_norm": 1.7467000766125653, "learning_rate": 6.261506391725565e-06, "loss": 0.6837, "step": 14234 }, { "epoch": 0.43628172122103714, "grad_norm": 1.5074415590176364, "learning_rate": 6.2610261241852946e-06, "loss": 0.6231, "step": 14235 }, { "epoch": 0.43631236974377835, "grad_norm": 1.6186677381510821, "learning_rate": 6.26054584421981e-06, "loss": 0.606, "step": 14236 }, { "epoch": 0.43634301826651956, "grad_norm": 1.8260107926486209, "learning_rate": 6.260065551833845e-06, "loss": 0.6332, "step": 14237 }, { "epoch": 0.43637366678926076, "grad_norm": 1.6632847730403506, "learning_rate": 6.259585247032129e-06, "loss": 0.6005, "step": 14238 }, { "epoch": 0.43640431531200197, "grad_norm": 1.6142332323574249, "learning_rate": 6.2591049298194005e-06, "loss": 0.6868, "step": 14239 }, { "epoch": 0.4364349638347432, "grad_norm": 0.8522176653184373, "learning_rate": 6.258624600200389e-06, "loss": 0.4591, "step": 14240 }, { "epoch": 0.4364656123574844, "grad_norm": 1.6430755451119397, "learning_rate": 6.258144258179826e-06, "loss": 0.5963, "step": 14241 }, { "epoch": 0.4364962608802256, "grad_norm": 1.6783336792688204, "learning_rate": 6.257663903762445e-06, "loss": 0.7093, "step": 14242 }, { "epoch": 0.4365269094029668, "grad_norm": 1.9145709556320858, "learning_rate": 6.257183536952982e-06, "loss": 0.6997, "step": 14243 }, { "epoch": 0.436557557925708, "grad_norm": 1.6646194058715857, "learning_rate": 6.2567031577561676e-06, "loss": 0.6271, "step": 14244 }, { "epoch": 0.4365882064484492, "grad_norm": 1.6144349068731763, "learning_rate": 6.2562227661767336e-06, "loss": 0.6044, "step": 14245 }, { "epoch": 0.4366188549711904, "grad_norm": 1.643269842641451, "learning_rate": 6.2557423622194165e-06, "loss": 0.6994, "step": 14246 }, { "epoch": 0.4366495034939316, "grad_norm": 1.6298209444397642, "learning_rate": 6.255261945888949e-06, "loss": 0.6584, "step": 14247 }, { "epoch": 0.4366801520166728, "grad_norm": 1.5634124402947467, "learning_rate": 6.254781517190064e-06, "loss": 0.6019, "step": 14248 }, { "epoch": 0.436710800539414, "grad_norm": 1.6409712068244955, "learning_rate": 6.254301076127495e-06, "loss": 0.5954, "step": 14249 }, { "epoch": 0.43674144906215523, "grad_norm": 1.3452263070932458, "learning_rate": 6.253820622705977e-06, "loss": 0.6478, "step": 14250 }, { "epoch": 0.43677209758489643, "grad_norm": 1.489764785270914, "learning_rate": 6.253340156930243e-06, "loss": 0.6022, "step": 14251 }, { "epoch": 0.4368027461076376, "grad_norm": 1.5734400449106225, "learning_rate": 6.25285967880503e-06, "loss": 0.717, "step": 14252 }, { "epoch": 0.4368333946303788, "grad_norm": 1.5731626863094395, "learning_rate": 6.252379188335067e-06, "loss": 0.6517, "step": 14253 }, { "epoch": 0.43686404315312, "grad_norm": 1.7209142754978632, "learning_rate": 6.251898685525093e-06, "loss": 0.6841, "step": 14254 }, { "epoch": 0.4368946916758612, "grad_norm": 1.6097707484393127, "learning_rate": 6.251418170379841e-06, "loss": 0.639, "step": 14255 }, { "epoch": 0.4369253401986024, "grad_norm": 1.6812914742719869, "learning_rate": 6.250937642904045e-06, "loss": 0.7002, "step": 14256 }, { "epoch": 0.4369559887213436, "grad_norm": 1.5587731138149832, "learning_rate": 6.250457103102441e-06, "loss": 0.5869, "step": 14257 }, { "epoch": 0.4369866372440848, "grad_norm": 0.8580315008818065, "learning_rate": 6.2499765509797615e-06, "loss": 0.4548, "step": 14258 }, { "epoch": 0.437017285766826, "grad_norm": 1.9116688581275407, "learning_rate": 6.249495986540746e-06, "loss": 0.6465, "step": 14259 }, { "epoch": 0.43704793428956723, "grad_norm": 1.747946109852959, "learning_rate": 6.249015409790126e-06, "loss": 0.6642, "step": 14260 }, { "epoch": 0.43707858281230844, "grad_norm": 0.7919084979500906, "learning_rate": 6.248534820732637e-06, "loss": 0.4533, "step": 14261 }, { "epoch": 0.43710923133504964, "grad_norm": 1.4141307111378751, "learning_rate": 6.248054219373014e-06, "loss": 0.5801, "step": 14262 }, { "epoch": 0.43713987985779085, "grad_norm": 1.695401930124417, "learning_rate": 6.247573605715996e-06, "loss": 0.6106, "step": 14263 }, { "epoch": 0.43717052838053205, "grad_norm": 1.7164658157517108, "learning_rate": 6.247092979766314e-06, "loss": 0.7068, "step": 14264 }, { "epoch": 0.43720117690327326, "grad_norm": 1.8032900036700608, "learning_rate": 6.246612341528706e-06, "loss": 0.5986, "step": 14265 }, { "epoch": 0.43723182542601446, "grad_norm": 1.8366895575816498, "learning_rate": 6.246131691007908e-06, "loss": 0.6553, "step": 14266 }, { "epoch": 0.43726247394875567, "grad_norm": 1.7746473164952212, "learning_rate": 6.2456510282086556e-06, "loss": 0.6549, "step": 14267 }, { "epoch": 0.4372931224714969, "grad_norm": 1.7309072607818974, "learning_rate": 6.245170353135686e-06, "loss": 0.6485, "step": 14268 }, { "epoch": 0.4373237709942381, "grad_norm": 1.502111319303909, "learning_rate": 6.244689665793733e-06, "loss": 0.5755, "step": 14269 }, { "epoch": 0.4373544195169793, "grad_norm": 1.779233001657589, "learning_rate": 6.244208966187534e-06, "loss": 0.7179, "step": 14270 }, { "epoch": 0.4373850680397205, "grad_norm": 1.5127904668905758, "learning_rate": 6.243728254321826e-06, "loss": 0.6478, "step": 14271 }, { "epoch": 0.4374157165624617, "grad_norm": 1.9485860009920843, "learning_rate": 6.243247530201345e-06, "loss": 0.6625, "step": 14272 }, { "epoch": 0.4374463650852029, "grad_norm": 1.6627755331437644, "learning_rate": 6.242766793830828e-06, "loss": 0.5866, "step": 14273 }, { "epoch": 0.4374770136079441, "grad_norm": 1.6481127065498722, "learning_rate": 6.242286045215014e-06, "loss": 0.7472, "step": 14274 }, { "epoch": 0.4375076621306853, "grad_norm": 1.8721238443827837, "learning_rate": 6.241805284358635e-06, "loss": 0.7381, "step": 14275 }, { "epoch": 0.4375383106534265, "grad_norm": 1.752159250963597, "learning_rate": 6.241324511266432e-06, "loss": 0.7225, "step": 14276 }, { "epoch": 0.4375689591761677, "grad_norm": 1.6502833937926062, "learning_rate": 6.2408437259431396e-06, "loss": 0.6294, "step": 14277 }, { "epoch": 0.43759960769890893, "grad_norm": 1.422305211182262, "learning_rate": 6.2403629283935e-06, "loss": 0.6259, "step": 14278 }, { "epoch": 0.43763025622165014, "grad_norm": 1.7619056856914475, "learning_rate": 6.239882118622244e-06, "loss": 0.5625, "step": 14279 }, { "epoch": 0.43766090474439134, "grad_norm": 5.157418350911879, "learning_rate": 6.239401296634113e-06, "loss": 0.766, "step": 14280 }, { "epoch": 0.43769155326713255, "grad_norm": 1.7933366197800424, "learning_rate": 6.238920462433843e-06, "loss": 0.6948, "step": 14281 }, { "epoch": 0.43772220178987375, "grad_norm": 1.707775487880428, "learning_rate": 6.238439616026174e-06, "loss": 0.6331, "step": 14282 }, { "epoch": 0.4377528503126149, "grad_norm": 1.5863555546498789, "learning_rate": 6.237958757415843e-06, "loss": 0.566, "step": 14283 }, { "epoch": 0.4377834988353561, "grad_norm": 10.254805357216933, "learning_rate": 6.237477886607586e-06, "loss": 0.6359, "step": 14284 }, { "epoch": 0.4378141473580973, "grad_norm": 1.8507844924884633, "learning_rate": 6.2369970036061435e-06, "loss": 0.6556, "step": 14285 }, { "epoch": 0.4378447958808385, "grad_norm": 1.9529548322068477, "learning_rate": 6.236516108416254e-06, "loss": 0.7149, "step": 14286 }, { "epoch": 0.4378754444035797, "grad_norm": 1.7795167632883526, "learning_rate": 6.236035201042654e-06, "loss": 0.6887, "step": 14287 }, { "epoch": 0.43790609292632093, "grad_norm": 3.3501849072252883, "learning_rate": 6.235554281490082e-06, "loss": 0.6715, "step": 14288 }, { "epoch": 0.43793674144906214, "grad_norm": 2.0966227607446637, "learning_rate": 6.23507334976328e-06, "loss": 0.7091, "step": 14289 }, { "epoch": 0.43796738997180334, "grad_norm": 1.5738882578783, "learning_rate": 6.234592405866981e-06, "loss": 0.584, "step": 14290 }, { "epoch": 0.43799803849454455, "grad_norm": 1.801203593208695, "learning_rate": 6.2341114498059295e-06, "loss": 0.6706, "step": 14291 }, { "epoch": 0.43802868701728576, "grad_norm": 0.8391671435734299, "learning_rate": 6.233630481584862e-06, "loss": 0.4555, "step": 14292 }, { "epoch": 0.43805933554002696, "grad_norm": 1.8426214749194907, "learning_rate": 6.233149501208518e-06, "loss": 0.5833, "step": 14293 }, { "epoch": 0.43808998406276817, "grad_norm": 1.8738673022205978, "learning_rate": 6.2326685086816355e-06, "loss": 0.7071, "step": 14294 }, { "epoch": 0.4381206325855094, "grad_norm": 1.7485665048285737, "learning_rate": 6.2321875040089555e-06, "loss": 0.5889, "step": 14295 }, { "epoch": 0.4381512811082506, "grad_norm": 1.8998359576190846, "learning_rate": 6.231706487195215e-06, "loss": 0.6396, "step": 14296 }, { "epoch": 0.4381819296309918, "grad_norm": 1.7085631588064107, "learning_rate": 6.231225458245157e-06, "loss": 0.768, "step": 14297 }, { "epoch": 0.438212578153733, "grad_norm": 1.632584261932853, "learning_rate": 6.230744417163519e-06, "loss": 0.5737, "step": 14298 }, { "epoch": 0.4382432266764742, "grad_norm": 1.7488032557545712, "learning_rate": 6.23026336395504e-06, "loss": 0.7255, "step": 14299 }, { "epoch": 0.4382738751992154, "grad_norm": 1.7749563279532583, "learning_rate": 6.229782298624464e-06, "loss": 0.6257, "step": 14300 }, { "epoch": 0.4383045237219566, "grad_norm": 1.85980059491364, "learning_rate": 6.229301221176527e-06, "loss": 0.6783, "step": 14301 }, { "epoch": 0.4383351722446978, "grad_norm": 0.7941886693483069, "learning_rate": 6.2288201316159715e-06, "loss": 0.4336, "step": 14302 }, { "epoch": 0.438365820767439, "grad_norm": 0.8031242423113579, "learning_rate": 6.228339029947534e-06, "loss": 0.4312, "step": 14303 }, { "epoch": 0.4383964692901802, "grad_norm": 0.7563316091265978, "learning_rate": 6.227857916175961e-06, "loss": 0.4441, "step": 14304 }, { "epoch": 0.43842711781292143, "grad_norm": 0.7838348123837521, "learning_rate": 6.227376790305989e-06, "loss": 0.4411, "step": 14305 }, { "epoch": 0.43845776633566264, "grad_norm": 1.9041920324032262, "learning_rate": 6.226895652342359e-06, "loss": 0.7291, "step": 14306 }, { "epoch": 0.43848841485840384, "grad_norm": 1.8709811190551713, "learning_rate": 6.226414502289811e-06, "loss": 0.7196, "step": 14307 }, { "epoch": 0.43851906338114505, "grad_norm": 1.9802248572476502, "learning_rate": 6.2259333401530896e-06, "loss": 0.6611, "step": 14308 }, { "epoch": 0.43854971190388625, "grad_norm": 0.8120395735808645, "learning_rate": 6.225452165936932e-06, "loss": 0.4592, "step": 14309 }, { "epoch": 0.43858036042662746, "grad_norm": 1.9000976926905233, "learning_rate": 6.22497097964608e-06, "loss": 0.5739, "step": 14310 }, { "epoch": 0.43861100894936866, "grad_norm": 1.8718513649755149, "learning_rate": 6.224489781285277e-06, "loss": 0.6115, "step": 14311 }, { "epoch": 0.43864165747210987, "grad_norm": 1.8094062687493704, "learning_rate": 6.224008570859262e-06, "loss": 0.6858, "step": 14312 }, { "epoch": 0.4386723059948511, "grad_norm": 1.7066747763591321, "learning_rate": 6.223527348372778e-06, "loss": 0.6542, "step": 14313 }, { "epoch": 0.4387029545175922, "grad_norm": 1.578508259524756, "learning_rate": 6.223046113830564e-06, "loss": 0.6091, "step": 14314 }, { "epoch": 0.43873360304033343, "grad_norm": 1.8493748930181906, "learning_rate": 6.222564867237366e-06, "loss": 0.727, "step": 14315 }, { "epoch": 0.43876425156307464, "grad_norm": 1.76227107552195, "learning_rate": 6.222083608597923e-06, "loss": 0.6939, "step": 14316 }, { "epoch": 0.43879490008581584, "grad_norm": 1.6719073152458688, "learning_rate": 6.221602337916978e-06, "loss": 0.6362, "step": 14317 }, { "epoch": 0.43882554860855705, "grad_norm": 1.7729863080458732, "learning_rate": 6.221121055199271e-06, "loss": 0.577, "step": 14318 }, { "epoch": 0.43885619713129825, "grad_norm": 1.7279538188647354, "learning_rate": 6.220639760449547e-06, "loss": 0.7205, "step": 14319 }, { "epoch": 0.43888684565403946, "grad_norm": 2.0571863043893934, "learning_rate": 6.220158453672547e-06, "loss": 0.7342, "step": 14320 }, { "epoch": 0.43891749417678066, "grad_norm": 1.6896822999929257, "learning_rate": 6.219677134873013e-06, "loss": 0.6549, "step": 14321 }, { "epoch": 0.43894814269952187, "grad_norm": 1.8996339563613363, "learning_rate": 6.219195804055689e-06, "loss": 0.7083, "step": 14322 }, { "epoch": 0.4389787912222631, "grad_norm": 1.6794567188340794, "learning_rate": 6.218714461225316e-06, "loss": 0.6937, "step": 14323 }, { "epoch": 0.4390094397450043, "grad_norm": 2.037017641837596, "learning_rate": 6.218233106386639e-06, "loss": 0.6733, "step": 14324 }, { "epoch": 0.4390400882677455, "grad_norm": 1.6635955043366093, "learning_rate": 6.217751739544396e-06, "loss": 0.6709, "step": 14325 }, { "epoch": 0.4390707367904867, "grad_norm": 1.5174990619413693, "learning_rate": 6.217270360703337e-06, "loss": 0.6023, "step": 14326 }, { "epoch": 0.4391013853132279, "grad_norm": 1.6576136782455697, "learning_rate": 6.216788969868199e-06, "loss": 0.6725, "step": 14327 }, { "epoch": 0.4391320338359691, "grad_norm": 1.8464171079650904, "learning_rate": 6.2163075670437324e-06, "loss": 0.7138, "step": 14328 }, { "epoch": 0.4391626823587103, "grad_norm": 0.9788623909585832, "learning_rate": 6.215826152234672e-06, "loss": 0.4715, "step": 14329 }, { "epoch": 0.4391933308814515, "grad_norm": 1.5091850746233204, "learning_rate": 6.215344725445766e-06, "loss": 0.6498, "step": 14330 }, { "epoch": 0.4392239794041927, "grad_norm": 1.9131058007079376, "learning_rate": 6.214863286681759e-06, "loss": 0.7049, "step": 14331 }, { "epoch": 0.4392546279269339, "grad_norm": 1.5217536587929597, "learning_rate": 6.214381835947393e-06, "loss": 0.6071, "step": 14332 }, { "epoch": 0.43928527644967513, "grad_norm": 1.8613092530853301, "learning_rate": 6.213900373247411e-06, "loss": 0.799, "step": 14333 }, { "epoch": 0.43931592497241634, "grad_norm": 1.8690762236507923, "learning_rate": 6.213418898586559e-06, "loss": 0.5722, "step": 14334 }, { "epoch": 0.43934657349515754, "grad_norm": 1.7796740816315328, "learning_rate": 6.212937411969579e-06, "loss": 0.604, "step": 14335 }, { "epoch": 0.43937722201789875, "grad_norm": 1.6434023325508489, "learning_rate": 6.2124559134012165e-06, "loss": 0.6385, "step": 14336 }, { "epoch": 0.43940787054063996, "grad_norm": 0.8644054774992141, "learning_rate": 6.211974402886218e-06, "loss": 0.4682, "step": 14337 }, { "epoch": 0.43943851906338116, "grad_norm": 1.6931171520513892, "learning_rate": 6.211492880429323e-06, "loss": 0.682, "step": 14338 }, { "epoch": 0.43946916758612237, "grad_norm": 2.070300227821399, "learning_rate": 6.211011346035279e-06, "loss": 0.7186, "step": 14339 }, { "epoch": 0.4394998161088636, "grad_norm": 1.595698372500152, "learning_rate": 6.210529799708831e-06, "loss": 0.6963, "step": 14340 }, { "epoch": 0.4395304646316048, "grad_norm": 1.7804171255812107, "learning_rate": 6.210048241454723e-06, "loss": 0.6416, "step": 14341 }, { "epoch": 0.439561113154346, "grad_norm": 0.7957053947685288, "learning_rate": 6.2095666712776995e-06, "loss": 0.4529, "step": 14342 }, { "epoch": 0.4395917616770872, "grad_norm": 1.6013100572487022, "learning_rate": 6.209085089182507e-06, "loss": 0.5893, "step": 14343 }, { "epoch": 0.4396224101998284, "grad_norm": 0.7972979519260305, "learning_rate": 6.20860349517389e-06, "loss": 0.4397, "step": 14344 }, { "epoch": 0.43965305872256955, "grad_norm": 1.631586369975757, "learning_rate": 6.208121889256592e-06, "loss": 0.7657, "step": 14345 }, { "epoch": 0.43968370724531075, "grad_norm": 1.6019150104438704, "learning_rate": 6.20764027143536e-06, "loss": 0.5878, "step": 14346 }, { "epoch": 0.43971435576805196, "grad_norm": 1.6863470047821443, "learning_rate": 6.207158641714942e-06, "loss": 0.6113, "step": 14347 }, { "epoch": 0.43974500429079316, "grad_norm": 1.4329686883877424, "learning_rate": 6.2066770001000784e-06, "loss": 0.5922, "step": 14348 }, { "epoch": 0.43977565281353437, "grad_norm": 1.901893977344471, "learning_rate": 6.206195346595518e-06, "loss": 0.7023, "step": 14349 }, { "epoch": 0.4398063013362756, "grad_norm": 1.5893503040494772, "learning_rate": 6.2057136812060074e-06, "loss": 0.696, "step": 14350 }, { "epoch": 0.4398369498590168, "grad_norm": 1.5895544853172807, "learning_rate": 6.20523200393629e-06, "loss": 0.5638, "step": 14351 }, { "epoch": 0.439867598381758, "grad_norm": 1.6884622606737953, "learning_rate": 6.204750314791115e-06, "loss": 0.6341, "step": 14352 }, { "epoch": 0.4398982469044992, "grad_norm": 1.7878097729079117, "learning_rate": 6.204268613775225e-06, "loss": 0.7067, "step": 14353 }, { "epoch": 0.4399288954272404, "grad_norm": 1.8340444000902925, "learning_rate": 6.203786900893369e-06, "loss": 0.6292, "step": 14354 }, { "epoch": 0.4399595439499816, "grad_norm": 1.6337752733960744, "learning_rate": 6.203305176150293e-06, "loss": 0.6321, "step": 14355 }, { "epoch": 0.4399901924727228, "grad_norm": 1.638928486406952, "learning_rate": 6.2028234395507435e-06, "loss": 0.6468, "step": 14356 }, { "epoch": 0.440020840995464, "grad_norm": 1.7586173889970822, "learning_rate": 6.202341691099465e-06, "loss": 0.6515, "step": 14357 }, { "epoch": 0.4400514895182052, "grad_norm": 1.7107992894807025, "learning_rate": 6.2018599308012085e-06, "loss": 0.6448, "step": 14358 }, { "epoch": 0.4400821380409464, "grad_norm": 1.5779684739976052, "learning_rate": 6.201378158660718e-06, "loss": 0.6226, "step": 14359 }, { "epoch": 0.44011278656368763, "grad_norm": 1.765053371236269, "learning_rate": 6.200896374682741e-06, "loss": 0.6259, "step": 14360 }, { "epoch": 0.44014343508642884, "grad_norm": 0.9735216925840341, "learning_rate": 6.200414578872024e-06, "loss": 0.4559, "step": 14361 }, { "epoch": 0.44017408360917004, "grad_norm": 1.705172686772485, "learning_rate": 6.199932771233315e-06, "loss": 0.593, "step": 14362 }, { "epoch": 0.44020473213191125, "grad_norm": 1.7225599093935675, "learning_rate": 6.199450951771363e-06, "loss": 0.6797, "step": 14363 }, { "epoch": 0.44023538065465245, "grad_norm": 1.6924358804227349, "learning_rate": 6.198969120490913e-06, "loss": 0.6413, "step": 14364 }, { "epoch": 0.44026602917739366, "grad_norm": 1.8737691361983984, "learning_rate": 6.198487277396712e-06, "loss": 0.7215, "step": 14365 }, { "epoch": 0.44029667770013486, "grad_norm": 1.683060917437606, "learning_rate": 6.198005422493511e-06, "loss": 0.5913, "step": 14366 }, { "epoch": 0.44032732622287607, "grad_norm": 1.7039984187148867, "learning_rate": 6.1975235557860554e-06, "loss": 0.6793, "step": 14367 }, { "epoch": 0.4403579747456173, "grad_norm": 1.6377757794398515, "learning_rate": 6.197041677279094e-06, "loss": 0.6659, "step": 14368 }, { "epoch": 0.4403886232683585, "grad_norm": 1.635596764390654, "learning_rate": 6.196559786977374e-06, "loss": 0.6052, "step": 14369 }, { "epoch": 0.4404192717910997, "grad_norm": 1.585217321617142, "learning_rate": 6.196077884885646e-06, "loss": 0.6111, "step": 14370 }, { "epoch": 0.4404499203138409, "grad_norm": 1.665005340246267, "learning_rate": 6.195595971008655e-06, "loss": 0.6736, "step": 14371 }, { "epoch": 0.4404805688365821, "grad_norm": 0.7807177550556498, "learning_rate": 6.195114045351151e-06, "loss": 0.4112, "step": 14372 }, { "epoch": 0.4405112173593233, "grad_norm": 1.641458575350662, "learning_rate": 6.194632107917884e-06, "loss": 0.6319, "step": 14373 }, { "epoch": 0.4405418658820645, "grad_norm": 1.6267140916148926, "learning_rate": 6.1941501587136e-06, "loss": 0.5552, "step": 14374 }, { "epoch": 0.4405725144048057, "grad_norm": 1.751949945821059, "learning_rate": 6.193668197743051e-06, "loss": 0.623, "step": 14375 }, { "epoch": 0.44060316292754687, "grad_norm": 1.6715049094834846, "learning_rate": 6.19318622501098e-06, "loss": 0.8081, "step": 14376 }, { "epoch": 0.44063381145028807, "grad_norm": 1.853907390005223, "learning_rate": 6.192704240522142e-06, "loss": 0.6838, "step": 14377 }, { "epoch": 0.4406644599730293, "grad_norm": 0.8378523956851804, "learning_rate": 6.192222244281284e-06, "loss": 0.4401, "step": 14378 }, { "epoch": 0.4406951084957705, "grad_norm": 1.7051399761999313, "learning_rate": 6.191740236293154e-06, "loss": 0.7406, "step": 14379 }, { "epoch": 0.4407257570185117, "grad_norm": 1.530135910868418, "learning_rate": 6.191258216562503e-06, "loss": 0.6352, "step": 14380 }, { "epoch": 0.4407564055412529, "grad_norm": 1.6212247475527441, "learning_rate": 6.19077618509408e-06, "loss": 0.6629, "step": 14381 }, { "epoch": 0.4407870540639941, "grad_norm": 1.7936365994795624, "learning_rate": 6.190294141892637e-06, "loss": 0.5052, "step": 14382 }, { "epoch": 0.4408177025867353, "grad_norm": 1.8336912726015577, "learning_rate": 6.1898120869629185e-06, "loss": 0.6186, "step": 14383 }, { "epoch": 0.4408483511094765, "grad_norm": 1.6792517775250835, "learning_rate": 6.189330020309678e-06, "loss": 0.7461, "step": 14384 }, { "epoch": 0.4408789996322177, "grad_norm": 0.8130237212288073, "learning_rate": 6.188847941937664e-06, "loss": 0.4394, "step": 14385 }, { "epoch": 0.4409096481549589, "grad_norm": 1.6265401821319638, "learning_rate": 6.188365851851629e-06, "loss": 0.6341, "step": 14386 }, { "epoch": 0.4409402966777001, "grad_norm": 1.7031483995347005, "learning_rate": 6.187883750056319e-06, "loss": 0.698, "step": 14387 }, { "epoch": 0.44097094520044133, "grad_norm": 1.673919792674677, "learning_rate": 6.187401636556487e-06, "loss": 0.5579, "step": 14388 }, { "epoch": 0.44100159372318254, "grad_norm": 1.550803412437091, "learning_rate": 6.186919511356882e-06, "loss": 0.6546, "step": 14389 }, { "epoch": 0.44103224224592374, "grad_norm": 2.0211905869140505, "learning_rate": 6.186437374462257e-06, "loss": 0.7933, "step": 14390 }, { "epoch": 0.44106289076866495, "grad_norm": 1.8193410726516182, "learning_rate": 6.18595522587736e-06, "loss": 0.6408, "step": 14391 }, { "epoch": 0.44109353929140616, "grad_norm": 1.6956352529852357, "learning_rate": 6.185473065606944e-06, "loss": 0.6416, "step": 14392 }, { "epoch": 0.44112418781414736, "grad_norm": 1.7386546625192816, "learning_rate": 6.184990893655758e-06, "loss": 0.6213, "step": 14393 }, { "epoch": 0.44115483633688857, "grad_norm": 1.7753369681311113, "learning_rate": 6.184508710028552e-06, "loss": 0.7292, "step": 14394 }, { "epoch": 0.4411854848596298, "grad_norm": 1.6494618763123587, "learning_rate": 6.18402651473008e-06, "loss": 0.6667, "step": 14395 }, { "epoch": 0.441216133382371, "grad_norm": 1.7527642406227493, "learning_rate": 6.18354430776509e-06, "loss": 0.7467, "step": 14396 }, { "epoch": 0.4412467819051122, "grad_norm": 1.6514348743225629, "learning_rate": 6.1830620891383384e-06, "loss": 0.6576, "step": 14397 }, { "epoch": 0.4412774304278534, "grad_norm": 1.8129917910714461, "learning_rate": 6.182579858854572e-06, "loss": 0.7401, "step": 14398 }, { "epoch": 0.4413080789505946, "grad_norm": 1.664985939000339, "learning_rate": 6.182097616918543e-06, "loss": 0.5376, "step": 14399 }, { "epoch": 0.4413387274733358, "grad_norm": 1.8213812827691813, "learning_rate": 6.1816153633350026e-06, "loss": 0.6748, "step": 14400 }, { "epoch": 0.441369375996077, "grad_norm": 1.8275001326612619, "learning_rate": 6.181133098108707e-06, "loss": 0.5673, "step": 14401 }, { "epoch": 0.4414000245188182, "grad_norm": 1.5654683020501061, "learning_rate": 6.180650821244403e-06, "loss": 0.675, "step": 14402 }, { "epoch": 0.4414306730415594, "grad_norm": 1.5841557991777604, "learning_rate": 6.1801685327468445e-06, "loss": 0.6515, "step": 14403 }, { "epoch": 0.4414613215643006, "grad_norm": 1.8216715515834805, "learning_rate": 6.1796862326207815e-06, "loss": 0.7205, "step": 14404 }, { "epoch": 0.44149197008704183, "grad_norm": 1.7763283302859076, "learning_rate": 6.179203920870971e-06, "loss": 0.6942, "step": 14405 }, { "epoch": 0.44152261860978304, "grad_norm": 1.5029544040114287, "learning_rate": 6.178721597502162e-06, "loss": 0.6249, "step": 14406 }, { "epoch": 0.4415532671325242, "grad_norm": 1.6438947555195065, "learning_rate": 6.178239262519106e-06, "loss": 0.7614, "step": 14407 }, { "epoch": 0.4415839156552654, "grad_norm": 1.7714531932412854, "learning_rate": 6.177756915926558e-06, "loss": 0.6858, "step": 14408 }, { "epoch": 0.4416145641780066, "grad_norm": 1.786364521994295, "learning_rate": 6.17727455772927e-06, "loss": 0.6529, "step": 14409 }, { "epoch": 0.4416452127007478, "grad_norm": 1.618567064562419, "learning_rate": 6.176792187931995e-06, "loss": 0.574, "step": 14410 }, { "epoch": 0.441675861223489, "grad_norm": 0.8344561262447483, "learning_rate": 6.1763098065394844e-06, "loss": 0.4746, "step": 14411 }, { "epoch": 0.4417065097462302, "grad_norm": 1.6791741917546747, "learning_rate": 6.1758274135564935e-06, "loss": 0.6729, "step": 14412 }, { "epoch": 0.4417371582689714, "grad_norm": 0.8220037453912635, "learning_rate": 6.175345008987773e-06, "loss": 0.4597, "step": 14413 }, { "epoch": 0.4417678067917126, "grad_norm": 1.6666835448806085, "learning_rate": 6.174862592838079e-06, "loss": 0.6778, "step": 14414 }, { "epoch": 0.44179845531445383, "grad_norm": 1.8307707233126833, "learning_rate": 6.174380165112161e-06, "loss": 0.6246, "step": 14415 }, { "epoch": 0.44182910383719504, "grad_norm": 1.8245541156409733, "learning_rate": 6.173897725814777e-06, "loss": 0.6967, "step": 14416 }, { "epoch": 0.44185975235993624, "grad_norm": 1.7314271139419826, "learning_rate": 6.173415274950677e-06, "loss": 0.6772, "step": 14417 }, { "epoch": 0.44189040088267745, "grad_norm": 1.7654598741177971, "learning_rate": 6.1729328125246165e-06, "loss": 0.7023, "step": 14418 }, { "epoch": 0.44192104940541865, "grad_norm": 1.6714146843148139, "learning_rate": 6.172450338541348e-06, "loss": 0.7228, "step": 14419 }, { "epoch": 0.44195169792815986, "grad_norm": 0.8371486364792434, "learning_rate": 6.171967853005628e-06, "loss": 0.4509, "step": 14420 }, { "epoch": 0.44198234645090106, "grad_norm": 1.7730105571032584, "learning_rate": 6.171485355922208e-06, "loss": 0.5715, "step": 14421 }, { "epoch": 0.44201299497364227, "grad_norm": 0.8066771974044377, "learning_rate": 6.171002847295843e-06, "loss": 0.4576, "step": 14422 }, { "epoch": 0.4420436434963835, "grad_norm": 2.0915087368308862, "learning_rate": 6.170520327131288e-06, "loss": 0.7547, "step": 14423 }, { "epoch": 0.4420742920191247, "grad_norm": 1.8131936857296505, "learning_rate": 6.170037795433296e-06, "loss": 0.6195, "step": 14424 }, { "epoch": 0.4421049405418659, "grad_norm": 1.7723709318006795, "learning_rate": 6.169555252206623e-06, "loss": 0.5981, "step": 14425 }, { "epoch": 0.4421355890646071, "grad_norm": 1.625820192236429, "learning_rate": 6.169072697456021e-06, "loss": 0.6828, "step": 14426 }, { "epoch": 0.4421662375873483, "grad_norm": 1.6674659343946792, "learning_rate": 6.168590131186247e-06, "loss": 0.5942, "step": 14427 }, { "epoch": 0.4421968861100895, "grad_norm": 1.8373618236512987, "learning_rate": 6.168107553402057e-06, "loss": 0.7868, "step": 14428 }, { "epoch": 0.4422275346328307, "grad_norm": 0.8191847701279278, "learning_rate": 6.167624964108205e-06, "loss": 0.4521, "step": 14429 }, { "epoch": 0.4422581831555719, "grad_norm": 1.7039349231252863, "learning_rate": 6.1671423633094426e-06, "loss": 0.5816, "step": 14430 }, { "epoch": 0.4422888316783131, "grad_norm": 1.4814081171459867, "learning_rate": 6.1666597510105294e-06, "loss": 0.5927, "step": 14431 }, { "epoch": 0.4423194802010543, "grad_norm": 1.661414640927167, "learning_rate": 6.16617712721622e-06, "loss": 0.6638, "step": 14432 }, { "epoch": 0.44235012872379553, "grad_norm": 1.7490810351025778, "learning_rate": 6.1656944919312675e-06, "loss": 0.5647, "step": 14433 }, { "epoch": 0.44238077724653674, "grad_norm": 0.8162794073223502, "learning_rate": 6.165211845160429e-06, "loss": 0.4378, "step": 14434 }, { "epoch": 0.44241142576927794, "grad_norm": 1.6831955990395455, "learning_rate": 6.164729186908462e-06, "loss": 0.7191, "step": 14435 }, { "epoch": 0.44244207429201915, "grad_norm": 1.586320170349236, "learning_rate": 6.164246517180119e-06, "loss": 0.638, "step": 14436 }, { "epoch": 0.44247272281476036, "grad_norm": 0.837216141920032, "learning_rate": 6.163763835980156e-06, "loss": 0.4626, "step": 14437 }, { "epoch": 0.4425033713375015, "grad_norm": 1.6288061220014323, "learning_rate": 6.1632811433133325e-06, "loss": 0.709, "step": 14438 }, { "epoch": 0.4425340198602427, "grad_norm": 1.6546236075387053, "learning_rate": 6.1627984391843995e-06, "loss": 0.6965, "step": 14439 }, { "epoch": 0.4425646683829839, "grad_norm": 1.7082770954398758, "learning_rate": 6.1623157235981194e-06, "loss": 0.6581, "step": 14440 }, { "epoch": 0.4425953169057251, "grad_norm": 1.8495713307353159, "learning_rate": 6.1618329965592415e-06, "loss": 0.6609, "step": 14441 }, { "epoch": 0.44262596542846633, "grad_norm": 0.8400557766966368, "learning_rate": 6.161350258072528e-06, "loss": 0.4468, "step": 14442 }, { "epoch": 0.44265661395120753, "grad_norm": 1.6013003528760272, "learning_rate": 6.160867508142733e-06, "loss": 0.6478, "step": 14443 }, { "epoch": 0.44268726247394874, "grad_norm": 1.5708580823485374, "learning_rate": 6.160384746774614e-06, "loss": 0.6639, "step": 14444 }, { "epoch": 0.44271791099668995, "grad_norm": 1.7788465886419487, "learning_rate": 6.159901973972926e-06, "loss": 0.6544, "step": 14445 }, { "epoch": 0.44274855951943115, "grad_norm": 0.8100091902388091, "learning_rate": 6.159419189742427e-06, "loss": 0.4385, "step": 14446 }, { "epoch": 0.44277920804217236, "grad_norm": 1.6275899998441778, "learning_rate": 6.1589363940878755e-06, "loss": 0.6662, "step": 14447 }, { "epoch": 0.44280985656491356, "grad_norm": 1.6712678000513308, "learning_rate": 6.158453587014025e-06, "loss": 0.6584, "step": 14448 }, { "epoch": 0.44284050508765477, "grad_norm": 1.9011852213401312, "learning_rate": 6.1579707685256365e-06, "loss": 0.7601, "step": 14449 }, { "epoch": 0.442871153610396, "grad_norm": 1.9450132609271755, "learning_rate": 6.157487938627464e-06, "loss": 0.7055, "step": 14450 }, { "epoch": 0.4429018021331372, "grad_norm": 1.602465556447034, "learning_rate": 6.15700509732427e-06, "loss": 0.6177, "step": 14451 }, { "epoch": 0.4429324506558784, "grad_norm": 1.6949985761665576, "learning_rate": 6.156522244620806e-06, "loss": 0.732, "step": 14452 }, { "epoch": 0.4429630991786196, "grad_norm": 1.5330195800570523, "learning_rate": 6.156039380521833e-06, "loss": 0.6931, "step": 14453 }, { "epoch": 0.4429937477013608, "grad_norm": 1.8228327898293073, "learning_rate": 6.1555565050321085e-06, "loss": 0.6526, "step": 14454 }, { "epoch": 0.443024396224102, "grad_norm": 2.0119406637039248, "learning_rate": 6.155073618156391e-06, "loss": 0.6718, "step": 14455 }, { "epoch": 0.4430550447468432, "grad_norm": 0.8914259616233939, "learning_rate": 6.154590719899436e-06, "loss": 0.4717, "step": 14456 }, { "epoch": 0.4430856932695844, "grad_norm": 0.8372843169295241, "learning_rate": 6.154107810266004e-06, "loss": 0.4592, "step": 14457 }, { "epoch": 0.4431163417923256, "grad_norm": 1.8193364842962017, "learning_rate": 6.153624889260852e-06, "loss": 0.6186, "step": 14458 }, { "epoch": 0.4431469903150668, "grad_norm": 1.551244016301106, "learning_rate": 6.15314195688874e-06, "loss": 0.7114, "step": 14459 }, { "epoch": 0.44317763883780803, "grad_norm": 1.6002804855333976, "learning_rate": 6.152659013154424e-06, "loss": 0.6881, "step": 14460 }, { "epoch": 0.44320828736054924, "grad_norm": 1.5378112328025677, "learning_rate": 6.152176058062665e-06, "loss": 0.6619, "step": 14461 }, { "epoch": 0.44323893588329044, "grad_norm": 1.3810215231751488, "learning_rate": 6.151693091618218e-06, "loss": 0.5071, "step": 14462 }, { "epoch": 0.44326958440603165, "grad_norm": 1.7052989451912792, "learning_rate": 6.151210113825846e-06, "loss": 0.5637, "step": 14463 }, { "epoch": 0.44330023292877285, "grad_norm": 1.6180689591243376, "learning_rate": 6.150727124690306e-06, "loss": 0.5564, "step": 14464 }, { "epoch": 0.44333088145151406, "grad_norm": 1.6966386543266878, "learning_rate": 6.150244124216358e-06, "loss": 0.5522, "step": 14465 }, { "epoch": 0.44336152997425526, "grad_norm": 0.9106512042062089, "learning_rate": 6.14976111240876e-06, "loss": 0.4408, "step": 14466 }, { "epoch": 0.44339217849699647, "grad_norm": 1.6983873042082422, "learning_rate": 6.149278089272271e-06, "loss": 0.709, "step": 14467 }, { "epoch": 0.4434228270197377, "grad_norm": 1.865926844600645, "learning_rate": 6.148795054811652e-06, "loss": 0.6581, "step": 14468 }, { "epoch": 0.4434534755424788, "grad_norm": 1.8334768867937286, "learning_rate": 6.1483120090316595e-06, "loss": 0.7323, "step": 14469 }, { "epoch": 0.44348412406522003, "grad_norm": 0.8773904867944416, "learning_rate": 6.147828951937057e-06, "loss": 0.4633, "step": 14470 }, { "epoch": 0.44351477258796124, "grad_norm": 1.767837801258666, "learning_rate": 6.147345883532601e-06, "loss": 0.7788, "step": 14471 }, { "epoch": 0.44354542111070244, "grad_norm": 1.8749946950070941, "learning_rate": 6.146862803823053e-06, "loss": 0.6745, "step": 14472 }, { "epoch": 0.44357606963344365, "grad_norm": 1.4986422719468242, "learning_rate": 6.1463797128131705e-06, "loss": 0.5436, "step": 14473 }, { "epoch": 0.44360671815618485, "grad_norm": 1.758842386342752, "learning_rate": 6.1458966105077176e-06, "loss": 0.693, "step": 14474 }, { "epoch": 0.44363736667892606, "grad_norm": 1.5836170076169223, "learning_rate": 6.145413496911452e-06, "loss": 0.5809, "step": 14475 }, { "epoch": 0.44366801520166727, "grad_norm": 1.6214169452211835, "learning_rate": 6.144930372029133e-06, "loss": 0.5872, "step": 14476 }, { "epoch": 0.44369866372440847, "grad_norm": 1.8269304280660594, "learning_rate": 6.144447235865522e-06, "loss": 0.7197, "step": 14477 }, { "epoch": 0.4437293122471497, "grad_norm": 1.7033051514087758, "learning_rate": 6.143964088425382e-06, "loss": 0.6126, "step": 14478 }, { "epoch": 0.4437599607698909, "grad_norm": 1.6481455729727612, "learning_rate": 6.143480929713469e-06, "loss": 0.6307, "step": 14479 }, { "epoch": 0.4437906092926321, "grad_norm": 1.0114816258972665, "learning_rate": 6.142997759734546e-06, "loss": 0.4322, "step": 14480 }, { "epoch": 0.4438212578153733, "grad_norm": 2.1741093306934687, "learning_rate": 6.142514578493374e-06, "loss": 0.7516, "step": 14481 }, { "epoch": 0.4438519063381145, "grad_norm": 1.6915845826770877, "learning_rate": 6.142031385994714e-06, "loss": 0.5599, "step": 14482 }, { "epoch": 0.4438825548608557, "grad_norm": 1.6415299508527565, "learning_rate": 6.141548182243326e-06, "loss": 0.5749, "step": 14483 }, { "epoch": 0.4439132033835969, "grad_norm": 1.579984615117357, "learning_rate": 6.141064967243972e-06, "loss": 0.6396, "step": 14484 }, { "epoch": 0.4439438519063381, "grad_norm": 1.4802198657084469, "learning_rate": 6.140581741001413e-06, "loss": 0.6016, "step": 14485 }, { "epoch": 0.4439745004290793, "grad_norm": 1.5247199125250759, "learning_rate": 6.1400985035204095e-06, "loss": 0.686, "step": 14486 }, { "epoch": 0.4440051489518205, "grad_norm": 1.578713635298507, "learning_rate": 6.139615254805724e-06, "loss": 0.6447, "step": 14487 }, { "epoch": 0.44403579747456173, "grad_norm": 1.9929578070319562, "learning_rate": 6.139131994862118e-06, "loss": 0.6792, "step": 14488 }, { "epoch": 0.44406644599730294, "grad_norm": 1.783123309255822, "learning_rate": 6.1386487236943525e-06, "loss": 0.7229, "step": 14489 }, { "epoch": 0.44409709452004414, "grad_norm": 1.5798366123411214, "learning_rate": 6.138165441307191e-06, "loss": 0.6398, "step": 14490 }, { "epoch": 0.44412774304278535, "grad_norm": 1.5103004289830373, "learning_rate": 6.137682147705392e-06, "loss": 0.5907, "step": 14491 }, { "epoch": 0.44415839156552656, "grad_norm": 0.8536468819815408, "learning_rate": 6.1371988428937215e-06, "loss": 0.4461, "step": 14492 }, { "epoch": 0.44418904008826776, "grad_norm": 1.6783754048604813, "learning_rate": 6.136715526876938e-06, "loss": 0.6451, "step": 14493 }, { "epoch": 0.44421968861100897, "grad_norm": 1.5637264319210464, "learning_rate": 6.136232199659809e-06, "loss": 0.6116, "step": 14494 }, { "epoch": 0.4442503371337502, "grad_norm": 1.9753921239182035, "learning_rate": 6.13574886124709e-06, "loss": 0.665, "step": 14495 }, { "epoch": 0.4442809856564914, "grad_norm": 1.6643801598358694, "learning_rate": 6.13526551164355e-06, "loss": 0.6322, "step": 14496 }, { "epoch": 0.4443116341792326, "grad_norm": 1.7933840993945742, "learning_rate": 6.134782150853946e-06, "loss": 0.6893, "step": 14497 }, { "epoch": 0.4443422827019738, "grad_norm": 1.6889174509891092, "learning_rate": 6.134298778883046e-06, "loss": 0.6595, "step": 14498 }, { "epoch": 0.444372931224715, "grad_norm": 1.6436970352261686, "learning_rate": 6.133815395735606e-06, "loss": 0.6991, "step": 14499 }, { "epoch": 0.44440357974745615, "grad_norm": 1.9428041301982824, "learning_rate": 6.133332001416394e-06, "loss": 0.6889, "step": 14500 }, { "epoch": 0.44443422827019735, "grad_norm": 0.7852803401500797, "learning_rate": 6.1328485959301745e-06, "loss": 0.4377, "step": 14501 }, { "epoch": 0.44446487679293856, "grad_norm": 1.685285973836446, "learning_rate": 6.1323651792817045e-06, "loss": 0.6204, "step": 14502 }, { "epoch": 0.44449552531567976, "grad_norm": 1.8199075721887763, "learning_rate": 6.131881751475752e-06, "loss": 0.6618, "step": 14503 }, { "epoch": 0.44452617383842097, "grad_norm": 1.6459928295464403, "learning_rate": 6.131398312517078e-06, "loss": 0.7, "step": 14504 }, { "epoch": 0.4445568223611622, "grad_norm": 1.6255876360880788, "learning_rate": 6.13091486241045e-06, "loss": 0.6804, "step": 14505 }, { "epoch": 0.4445874708839034, "grad_norm": 1.6267375329074256, "learning_rate": 6.130431401160626e-06, "loss": 0.609, "step": 14506 }, { "epoch": 0.4446181194066446, "grad_norm": 1.8836175111619418, "learning_rate": 6.129947928772373e-06, "loss": 0.6921, "step": 14507 }, { "epoch": 0.4446487679293858, "grad_norm": 1.6374384399146065, "learning_rate": 6.129464445250452e-06, "loss": 0.5371, "step": 14508 }, { "epoch": 0.444679416452127, "grad_norm": 1.5510448079042292, "learning_rate": 6.128980950599632e-06, "loss": 0.5687, "step": 14509 }, { "epoch": 0.4447100649748682, "grad_norm": 1.7277281254206422, "learning_rate": 6.128497444824672e-06, "loss": 0.6149, "step": 14510 }, { "epoch": 0.4447407134976094, "grad_norm": 1.6477206817815384, "learning_rate": 6.1280139279303385e-06, "loss": 0.707, "step": 14511 }, { "epoch": 0.4447713620203506, "grad_norm": 1.6907784450533445, "learning_rate": 6.127530399921393e-06, "loss": 0.6545, "step": 14512 }, { "epoch": 0.4448020105430918, "grad_norm": 1.9368842310936847, "learning_rate": 6.127046860802605e-06, "loss": 0.6896, "step": 14513 }, { "epoch": 0.444832659065833, "grad_norm": 1.9322110928436425, "learning_rate": 6.1265633105787344e-06, "loss": 0.6953, "step": 14514 }, { "epoch": 0.44486330758857423, "grad_norm": 1.754203137583388, "learning_rate": 6.1260797492545484e-06, "loss": 0.6411, "step": 14515 }, { "epoch": 0.44489395611131544, "grad_norm": 1.8143371268494244, "learning_rate": 6.125596176834809e-06, "loss": 0.7056, "step": 14516 }, { "epoch": 0.44492460463405664, "grad_norm": 1.6158639596708662, "learning_rate": 6.125112593324283e-06, "loss": 0.5841, "step": 14517 }, { "epoch": 0.44495525315679785, "grad_norm": 1.5473158365210022, "learning_rate": 6.124628998727735e-06, "loss": 0.5903, "step": 14518 }, { "epoch": 0.44498590167953905, "grad_norm": 1.7038418437429286, "learning_rate": 6.124145393049929e-06, "loss": 0.6651, "step": 14519 }, { "epoch": 0.44501655020228026, "grad_norm": 1.6384724128917896, "learning_rate": 6.123661776295632e-06, "loss": 0.6428, "step": 14520 }, { "epoch": 0.44504719872502146, "grad_norm": 1.7584893229715264, "learning_rate": 6.123178148469609e-06, "loss": 0.6846, "step": 14521 }, { "epoch": 0.44507784724776267, "grad_norm": 1.5456033791752957, "learning_rate": 6.122694509576622e-06, "loss": 0.6157, "step": 14522 }, { "epoch": 0.4451084957705039, "grad_norm": 1.6577584662675748, "learning_rate": 6.122210859621439e-06, "loss": 0.6224, "step": 14523 }, { "epoch": 0.4451391442932451, "grad_norm": 1.6350809407236515, "learning_rate": 6.121727198608827e-06, "loss": 0.6764, "step": 14524 }, { "epoch": 0.4451697928159863, "grad_norm": 1.5817687298345566, "learning_rate": 6.1212435265435475e-06, "loss": 0.5663, "step": 14525 }, { "epoch": 0.4452004413387275, "grad_norm": 1.7202973149078298, "learning_rate": 6.120759843430371e-06, "loss": 0.7061, "step": 14526 }, { "epoch": 0.4452310898614687, "grad_norm": 1.61422103929771, "learning_rate": 6.1202761492740595e-06, "loss": 0.6648, "step": 14527 }, { "epoch": 0.4452617383842099, "grad_norm": 1.536103324886504, "learning_rate": 6.119792444079381e-06, "loss": 0.5687, "step": 14528 }, { "epoch": 0.4452923869069511, "grad_norm": 0.825277099685421, "learning_rate": 6.119308727851101e-06, "loss": 0.4386, "step": 14529 }, { "epoch": 0.4453230354296923, "grad_norm": 1.8331640019379838, "learning_rate": 6.118825000593984e-06, "loss": 0.6788, "step": 14530 }, { "epoch": 0.44535368395243347, "grad_norm": 1.7658464759399914, "learning_rate": 6.1183412623128e-06, "loss": 0.6161, "step": 14531 }, { "epoch": 0.44538433247517467, "grad_norm": 1.5048363032261083, "learning_rate": 6.117857513012314e-06, "loss": 0.6293, "step": 14532 }, { "epoch": 0.4454149809979159, "grad_norm": 1.777842475221699, "learning_rate": 6.117373752697291e-06, "loss": 0.7572, "step": 14533 }, { "epoch": 0.4454456295206571, "grad_norm": 1.5272819688310575, "learning_rate": 6.116889981372498e-06, "loss": 0.731, "step": 14534 }, { "epoch": 0.4454762780433983, "grad_norm": 1.6447034512783953, "learning_rate": 6.116406199042703e-06, "loss": 0.6716, "step": 14535 }, { "epoch": 0.4455069265661395, "grad_norm": 1.6234098573271887, "learning_rate": 6.115922405712672e-06, "loss": 0.6367, "step": 14536 }, { "epoch": 0.4455375750888807, "grad_norm": 0.7832752694739943, "learning_rate": 6.115438601387172e-06, "loss": 0.4598, "step": 14537 }, { "epoch": 0.4455682236116219, "grad_norm": 1.8814520088719617, "learning_rate": 6.114954786070969e-06, "loss": 0.6479, "step": 14538 }, { "epoch": 0.4455988721343631, "grad_norm": 1.623040926464995, "learning_rate": 6.114470959768832e-06, "loss": 0.6244, "step": 14539 }, { "epoch": 0.4456295206571043, "grad_norm": 1.6861529984863628, "learning_rate": 6.1139871224855285e-06, "loss": 0.6298, "step": 14540 }, { "epoch": 0.4456601691798455, "grad_norm": 0.7377790459008731, "learning_rate": 6.113503274225824e-06, "loss": 0.4449, "step": 14541 }, { "epoch": 0.44569081770258673, "grad_norm": 1.9361896655362496, "learning_rate": 6.113019414994485e-06, "loss": 0.7007, "step": 14542 }, { "epoch": 0.44572146622532793, "grad_norm": 1.6196164914187714, "learning_rate": 6.112535544796284e-06, "loss": 0.668, "step": 14543 }, { "epoch": 0.44575211474806914, "grad_norm": 0.7307850900663118, "learning_rate": 6.112051663635985e-06, "loss": 0.4413, "step": 14544 }, { "epoch": 0.44578276327081034, "grad_norm": 1.8114742964813024, "learning_rate": 6.111567771518354e-06, "loss": 0.7309, "step": 14545 }, { "epoch": 0.44581341179355155, "grad_norm": 1.868219858097067, "learning_rate": 6.1110838684481645e-06, "loss": 0.7115, "step": 14546 }, { "epoch": 0.44584406031629276, "grad_norm": 1.787897572851519, "learning_rate": 6.11059995443018e-06, "loss": 0.6983, "step": 14547 }, { "epoch": 0.44587470883903396, "grad_norm": 1.7090091397411762, "learning_rate": 6.11011602946917e-06, "loss": 0.6901, "step": 14548 }, { "epoch": 0.44590535736177517, "grad_norm": 1.5889004157725888, "learning_rate": 6.109632093569902e-06, "loss": 0.6314, "step": 14549 }, { "epoch": 0.4459360058845164, "grad_norm": 1.649155792986373, "learning_rate": 6.109148146737146e-06, "loss": 0.7138, "step": 14550 }, { "epoch": 0.4459666544072576, "grad_norm": 1.7046097720268467, "learning_rate": 6.108664188975669e-06, "loss": 0.6972, "step": 14551 }, { "epoch": 0.4459973029299988, "grad_norm": 1.7444938988573957, "learning_rate": 6.108180220290241e-06, "loss": 0.652, "step": 14552 }, { "epoch": 0.44602795145274, "grad_norm": 1.765926629152845, "learning_rate": 6.107696240685627e-06, "loss": 0.6473, "step": 14553 }, { "epoch": 0.4460585999754812, "grad_norm": 1.5999620463745796, "learning_rate": 6.107212250166602e-06, "loss": 0.5188, "step": 14554 }, { "epoch": 0.4460892484982224, "grad_norm": 1.5158846733005384, "learning_rate": 6.1067282487379295e-06, "loss": 0.6949, "step": 14555 }, { "epoch": 0.4461198970209636, "grad_norm": 1.8531212902802063, "learning_rate": 6.10624423640438e-06, "loss": 0.7519, "step": 14556 }, { "epoch": 0.4461505455437048, "grad_norm": 1.8382950638866422, "learning_rate": 6.105760213170725e-06, "loss": 0.766, "step": 14557 }, { "epoch": 0.446181194066446, "grad_norm": 1.69866215197135, "learning_rate": 6.1052761790417315e-06, "loss": 0.6226, "step": 14558 }, { "epoch": 0.4462118425891872, "grad_norm": 1.7056327292447901, "learning_rate": 6.104792134022169e-06, "loss": 0.7491, "step": 14559 }, { "epoch": 0.44624249111192843, "grad_norm": 1.7513218936426114, "learning_rate": 6.104308078116804e-06, "loss": 0.6159, "step": 14560 }, { "epoch": 0.44627313963466964, "grad_norm": 1.7376260371728052, "learning_rate": 6.103824011330411e-06, "loss": 0.6718, "step": 14561 }, { "epoch": 0.4463037881574108, "grad_norm": 0.8082021131733731, "learning_rate": 6.103339933667757e-06, "loss": 0.4003, "step": 14562 }, { "epoch": 0.446334436680152, "grad_norm": 1.8083599106031565, "learning_rate": 6.102855845133615e-06, "loss": 0.7785, "step": 14563 }, { "epoch": 0.4463650852028932, "grad_norm": 0.7680532112789716, "learning_rate": 6.102371745732749e-06, "loss": 0.433, "step": 14564 }, { "epoch": 0.4463957337256344, "grad_norm": 1.7278421791257528, "learning_rate": 6.101887635469933e-06, "loss": 0.6036, "step": 14565 }, { "epoch": 0.4464263822483756, "grad_norm": 1.8250373726284463, "learning_rate": 6.101403514349936e-06, "loss": 0.6449, "step": 14566 }, { "epoch": 0.4464570307711168, "grad_norm": 2.1326344392730374, "learning_rate": 6.100919382377531e-06, "loss": 0.7373, "step": 14567 }, { "epoch": 0.446487679293858, "grad_norm": 1.7264645235073546, "learning_rate": 6.100435239557482e-06, "loss": 0.7106, "step": 14568 }, { "epoch": 0.4465183278165992, "grad_norm": 1.6661521759500235, "learning_rate": 6.0999510858945646e-06, "loss": 0.7216, "step": 14569 }, { "epoch": 0.44654897633934043, "grad_norm": 0.897045663980705, "learning_rate": 6.099466921393546e-06, "loss": 0.4676, "step": 14570 }, { "epoch": 0.44657962486208164, "grad_norm": 2.0393475531570298, "learning_rate": 6.098982746059201e-06, "loss": 0.7693, "step": 14571 }, { "epoch": 0.44661027338482284, "grad_norm": 1.5278554373792057, "learning_rate": 6.098498559896298e-06, "loss": 0.6028, "step": 14572 }, { "epoch": 0.44664092190756405, "grad_norm": 1.6644099280036042, "learning_rate": 6.098014362909606e-06, "loss": 0.681, "step": 14573 }, { "epoch": 0.44667157043030525, "grad_norm": 0.7683639499341881, "learning_rate": 6.097530155103899e-06, "loss": 0.4442, "step": 14574 }, { "epoch": 0.44670221895304646, "grad_norm": 1.5440862254146739, "learning_rate": 6.097045936483944e-06, "loss": 0.6631, "step": 14575 }, { "epoch": 0.44673286747578766, "grad_norm": 1.725728675314859, "learning_rate": 6.096561707054517e-06, "loss": 0.6969, "step": 14576 }, { "epoch": 0.44676351599852887, "grad_norm": 1.8740295494496642, "learning_rate": 6.096077466820386e-06, "loss": 0.7355, "step": 14577 }, { "epoch": 0.4467941645212701, "grad_norm": 1.6267110363072599, "learning_rate": 6.095593215786324e-06, "loss": 0.6026, "step": 14578 }, { "epoch": 0.4468248130440113, "grad_norm": 1.5336131207274861, "learning_rate": 6.095108953957101e-06, "loss": 0.6104, "step": 14579 }, { "epoch": 0.4468554615667525, "grad_norm": 1.79486058814839, "learning_rate": 6.09462468133749e-06, "loss": 0.5926, "step": 14580 }, { "epoch": 0.4468861100894937, "grad_norm": 1.6372090275544326, "learning_rate": 6.09414039793226e-06, "loss": 0.5727, "step": 14581 }, { "epoch": 0.4469167586122349, "grad_norm": 1.7824421870036586, "learning_rate": 6.093656103746187e-06, "loss": 0.7262, "step": 14582 }, { "epoch": 0.4469474071349761, "grad_norm": 1.6237840417493912, "learning_rate": 6.09317179878404e-06, "loss": 0.6388, "step": 14583 }, { "epoch": 0.4469780556577173, "grad_norm": 1.6493489824672984, "learning_rate": 6.092687483050592e-06, "loss": 0.6095, "step": 14584 }, { "epoch": 0.4470087041804585, "grad_norm": 1.675862803792404, "learning_rate": 6.092203156550614e-06, "loss": 0.7128, "step": 14585 }, { "epoch": 0.4470393527031997, "grad_norm": 1.8447791252825219, "learning_rate": 6.091718819288879e-06, "loss": 0.6613, "step": 14586 }, { "epoch": 0.4470700012259409, "grad_norm": 1.766061755990687, "learning_rate": 6.091234471270159e-06, "loss": 0.6265, "step": 14587 }, { "epoch": 0.44710064974868213, "grad_norm": 1.758180913445557, "learning_rate": 6.090750112499226e-06, "loss": 0.6312, "step": 14588 }, { "epoch": 0.44713129827142334, "grad_norm": 1.5899166663829438, "learning_rate": 6.0902657429808535e-06, "loss": 0.5927, "step": 14589 }, { "epoch": 0.44716194679416454, "grad_norm": 1.7763286931456763, "learning_rate": 6.089781362719813e-06, "loss": 0.5657, "step": 14590 }, { "epoch": 0.44719259531690575, "grad_norm": 1.7420090482144754, "learning_rate": 6.08929697172088e-06, "loss": 0.6068, "step": 14591 }, { "epoch": 0.44722324383964696, "grad_norm": 0.9444958749607975, "learning_rate": 6.088812569988822e-06, "loss": 0.4585, "step": 14592 }, { "epoch": 0.4472538923623881, "grad_norm": 1.5951359182745695, "learning_rate": 6.088328157528418e-06, "loss": 0.6362, "step": 14593 }, { "epoch": 0.4472845408851293, "grad_norm": 1.6971720231312923, "learning_rate": 6.0878437343444375e-06, "loss": 0.6834, "step": 14594 }, { "epoch": 0.4473151894078705, "grad_norm": 1.6417924652985338, "learning_rate": 6.087359300441655e-06, "loss": 0.629, "step": 14595 }, { "epoch": 0.4473458379306117, "grad_norm": 0.7555645342997194, "learning_rate": 6.086874855824842e-06, "loss": 0.4554, "step": 14596 }, { "epoch": 0.44737648645335293, "grad_norm": 1.6711142169356408, "learning_rate": 6.086390400498773e-06, "loss": 0.6875, "step": 14597 }, { "epoch": 0.44740713497609413, "grad_norm": 1.6096823174463668, "learning_rate": 6.085905934468221e-06, "loss": 0.5623, "step": 14598 }, { "epoch": 0.44743778349883534, "grad_norm": 1.69012005699684, "learning_rate": 6.085421457737961e-06, "loss": 0.6815, "step": 14599 }, { "epoch": 0.44746843202157655, "grad_norm": 1.7907507989386748, "learning_rate": 6.084936970312764e-06, "loss": 0.722, "step": 14600 }, { "epoch": 0.44749908054431775, "grad_norm": 1.5910210930827888, "learning_rate": 6.084452472197408e-06, "loss": 0.679, "step": 14601 }, { "epoch": 0.44752972906705896, "grad_norm": 2.7744405266765826, "learning_rate": 6.0839679633966635e-06, "loss": 0.7575, "step": 14602 }, { "epoch": 0.44756037758980016, "grad_norm": 0.8774236400811362, "learning_rate": 6.0834834439153034e-06, "loss": 0.4496, "step": 14603 }, { "epoch": 0.44759102611254137, "grad_norm": 1.813755486065661, "learning_rate": 6.082998913758106e-06, "loss": 0.5903, "step": 14604 }, { "epoch": 0.4476216746352826, "grad_norm": 0.8140177909368866, "learning_rate": 6.082514372929843e-06, "loss": 0.4675, "step": 14605 }, { "epoch": 0.4476523231580238, "grad_norm": 1.866548495080769, "learning_rate": 6.082029821435288e-06, "loss": 0.7377, "step": 14606 }, { "epoch": 0.447682971680765, "grad_norm": 1.5089490061205846, "learning_rate": 6.081545259279216e-06, "loss": 0.6424, "step": 14607 }, { "epoch": 0.4477136202035062, "grad_norm": 1.7293792734309363, "learning_rate": 6.081060686466403e-06, "loss": 0.5943, "step": 14608 }, { "epoch": 0.4477442687262474, "grad_norm": 1.661368754977721, "learning_rate": 6.080576103001622e-06, "loss": 0.7155, "step": 14609 }, { "epoch": 0.4477749172489886, "grad_norm": 1.7434242827144155, "learning_rate": 6.080091508889649e-06, "loss": 0.6868, "step": 14610 }, { "epoch": 0.4478055657717298, "grad_norm": 1.579530751142045, "learning_rate": 6.079606904135256e-06, "loss": 0.5589, "step": 14611 }, { "epoch": 0.447836214294471, "grad_norm": 1.866848929871751, "learning_rate": 6.079122288743221e-06, "loss": 0.7838, "step": 14612 }, { "epoch": 0.4478668628172122, "grad_norm": 1.7885676142701097, "learning_rate": 6.078637662718319e-06, "loss": 0.6741, "step": 14613 }, { "epoch": 0.4478975113399534, "grad_norm": 0.8530868595340987, "learning_rate": 6.078153026065321e-06, "loss": 0.4454, "step": 14614 }, { "epoch": 0.44792815986269463, "grad_norm": 1.6789371993689401, "learning_rate": 6.0776683787890075e-06, "loss": 0.5737, "step": 14615 }, { "epoch": 0.44795880838543584, "grad_norm": 2.049871079412684, "learning_rate": 6.077183720894152e-06, "loss": 0.6043, "step": 14616 }, { "epoch": 0.44798945690817704, "grad_norm": 1.8115470014826294, "learning_rate": 6.076699052385531e-06, "loss": 0.706, "step": 14617 }, { "epoch": 0.44802010543091825, "grad_norm": 1.887534075313164, "learning_rate": 6.0762143732679156e-06, "loss": 0.6466, "step": 14618 }, { "epoch": 0.44805075395365945, "grad_norm": 1.8622616503055953, "learning_rate": 6.075729683546087e-06, "loss": 0.567, "step": 14619 }, { "epoch": 0.44808140247640066, "grad_norm": 1.9081001083086384, "learning_rate": 6.075244983224816e-06, "loss": 0.6547, "step": 14620 }, { "epoch": 0.44811205099914186, "grad_norm": 0.7694232555344831, "learning_rate": 6.074760272308885e-06, "loss": 0.4686, "step": 14621 }, { "epoch": 0.44814269952188307, "grad_norm": 1.7248122383763458, "learning_rate": 6.074275550803063e-06, "loss": 0.7272, "step": 14622 }, { "epoch": 0.4481733480446243, "grad_norm": 1.7209723335504699, "learning_rate": 6.073790818712131e-06, "loss": 0.6807, "step": 14623 }, { "epoch": 0.4482039965673654, "grad_norm": 1.8675830367757047, "learning_rate": 6.073306076040861e-06, "loss": 0.7147, "step": 14624 }, { "epoch": 0.44823464509010663, "grad_norm": 1.7264914735557297, "learning_rate": 6.072821322794034e-06, "loss": 0.6228, "step": 14625 }, { "epoch": 0.44826529361284784, "grad_norm": 1.6288760449433297, "learning_rate": 6.0723365589764224e-06, "loss": 0.5969, "step": 14626 }, { "epoch": 0.44829594213558904, "grad_norm": 0.7742467753094343, "learning_rate": 6.0718517845928035e-06, "loss": 0.4311, "step": 14627 }, { "epoch": 0.44832659065833025, "grad_norm": 1.6671511209386505, "learning_rate": 6.0713669996479584e-06, "loss": 0.6793, "step": 14628 }, { "epoch": 0.44835723918107145, "grad_norm": 1.7608120642442393, "learning_rate": 6.070882204146656e-06, "loss": 0.6957, "step": 14629 }, { "epoch": 0.44838788770381266, "grad_norm": 1.613483614902363, "learning_rate": 6.070397398093681e-06, "loss": 0.6159, "step": 14630 }, { "epoch": 0.44841853622655387, "grad_norm": 1.699265235821568, "learning_rate": 6.069912581493803e-06, "loss": 0.5888, "step": 14631 }, { "epoch": 0.44844918474929507, "grad_norm": 1.6570481984524748, "learning_rate": 6.069427754351805e-06, "loss": 0.5741, "step": 14632 }, { "epoch": 0.4484798332720363, "grad_norm": 2.2691527065094763, "learning_rate": 6.068942916672461e-06, "loss": 0.7901, "step": 14633 }, { "epoch": 0.4485104817947775, "grad_norm": 1.586785832973947, "learning_rate": 6.068458068460549e-06, "loss": 0.6381, "step": 14634 }, { "epoch": 0.4485411303175187, "grad_norm": 1.7243459554815563, "learning_rate": 6.067973209720845e-06, "loss": 0.6317, "step": 14635 }, { "epoch": 0.4485717788402599, "grad_norm": 1.6149818989637785, "learning_rate": 6.067488340458131e-06, "loss": 0.6599, "step": 14636 }, { "epoch": 0.4486024273630011, "grad_norm": 1.7748733403984414, "learning_rate": 6.067003460677177e-06, "loss": 0.6976, "step": 14637 }, { "epoch": 0.4486330758857423, "grad_norm": 1.8388413719426524, "learning_rate": 6.066518570382768e-06, "loss": 0.7104, "step": 14638 }, { "epoch": 0.4486637244084835, "grad_norm": 1.0439999946534766, "learning_rate": 6.066033669579677e-06, "loss": 0.4521, "step": 14639 }, { "epoch": 0.4486943729312247, "grad_norm": 1.6077557574132924, "learning_rate": 6.065548758272684e-06, "loss": 0.5775, "step": 14640 }, { "epoch": 0.4487250214539659, "grad_norm": 1.5102489957502891, "learning_rate": 6.065063836466567e-06, "loss": 0.6239, "step": 14641 }, { "epoch": 0.44875566997670713, "grad_norm": 1.9324785294096432, "learning_rate": 6.064578904166103e-06, "loss": 0.6389, "step": 14642 }, { "epoch": 0.44878631849944833, "grad_norm": 1.9529682792129135, "learning_rate": 6.0640939613760705e-06, "loss": 0.6653, "step": 14643 }, { "epoch": 0.44881696702218954, "grad_norm": 1.6366331504072131, "learning_rate": 6.063609008101249e-06, "loss": 0.663, "step": 14644 }, { "epoch": 0.44884761554493074, "grad_norm": 1.5859924091000448, "learning_rate": 6.063124044346415e-06, "loss": 0.7047, "step": 14645 }, { "epoch": 0.44887826406767195, "grad_norm": 1.6369174823167028, "learning_rate": 6.0626390701163474e-06, "loss": 0.5828, "step": 14646 }, { "epoch": 0.44890891259041316, "grad_norm": 1.4865099329673332, "learning_rate": 6.062154085415826e-06, "loss": 0.6222, "step": 14647 }, { "epoch": 0.44893956111315436, "grad_norm": 0.906301662575853, "learning_rate": 6.061669090249628e-06, "loss": 0.45, "step": 14648 }, { "epoch": 0.44897020963589557, "grad_norm": 1.89234318227374, "learning_rate": 6.061184084622534e-06, "loss": 0.644, "step": 14649 }, { "epoch": 0.4490008581586368, "grad_norm": 1.7083727428922468, "learning_rate": 6.060699068539319e-06, "loss": 0.626, "step": 14650 }, { "epoch": 0.449031506681378, "grad_norm": 1.86245603231633, "learning_rate": 6.060214042004767e-06, "loss": 0.6174, "step": 14651 }, { "epoch": 0.4490621552041192, "grad_norm": 1.7151796589825647, "learning_rate": 6.059729005023655e-06, "loss": 0.6064, "step": 14652 }, { "epoch": 0.4490928037268604, "grad_norm": 1.6581706366471476, "learning_rate": 6.05924395760076e-06, "loss": 0.5971, "step": 14653 }, { "epoch": 0.4491234522496016, "grad_norm": 1.5772199238351656, "learning_rate": 6.0587588997408646e-06, "loss": 0.5819, "step": 14654 }, { "epoch": 0.44915410077234275, "grad_norm": 1.739379962489197, "learning_rate": 6.058273831448747e-06, "loss": 0.6538, "step": 14655 }, { "epoch": 0.44918474929508395, "grad_norm": 1.7324865944605563, "learning_rate": 6.057788752729187e-06, "loss": 0.6151, "step": 14656 }, { "epoch": 0.44921539781782516, "grad_norm": 1.6677623142541917, "learning_rate": 6.057303663586962e-06, "loss": 0.5546, "step": 14657 }, { "epoch": 0.44924604634056636, "grad_norm": 1.6398196473783015, "learning_rate": 6.056818564026855e-06, "loss": 0.7008, "step": 14658 }, { "epoch": 0.44927669486330757, "grad_norm": 1.6673702294771493, "learning_rate": 6.056333454053645e-06, "loss": 0.6583, "step": 14659 }, { "epoch": 0.4493073433860488, "grad_norm": 1.6663596386833706, "learning_rate": 6.05584833367211e-06, "loss": 0.6919, "step": 14660 }, { "epoch": 0.44933799190879, "grad_norm": 1.7093554681110275, "learning_rate": 6.0553632028870305e-06, "loss": 0.703, "step": 14661 }, { "epoch": 0.4493686404315312, "grad_norm": 1.7133905993440632, "learning_rate": 6.0548780617031875e-06, "loss": 0.5595, "step": 14662 }, { "epoch": 0.4493992889542724, "grad_norm": 1.5281565281776397, "learning_rate": 6.054392910125362e-06, "loss": 0.6177, "step": 14663 }, { "epoch": 0.4494299374770136, "grad_norm": 1.5658486698980405, "learning_rate": 6.053907748158333e-06, "loss": 0.5581, "step": 14664 }, { "epoch": 0.4494605859997548, "grad_norm": 1.559445758756294, "learning_rate": 6.053422575806881e-06, "loss": 0.5876, "step": 14665 }, { "epoch": 0.449491234522496, "grad_norm": 1.714850605546664, "learning_rate": 6.052937393075787e-06, "loss": 0.7253, "step": 14666 }, { "epoch": 0.4495218830452372, "grad_norm": 1.9698140812197125, "learning_rate": 6.0524521999698315e-06, "loss": 0.6725, "step": 14667 }, { "epoch": 0.4495525315679784, "grad_norm": 0.8602054405443833, "learning_rate": 6.051966996493795e-06, "loss": 0.448, "step": 14668 }, { "epoch": 0.4495831800907196, "grad_norm": 0.8426548229308302, "learning_rate": 6.05148178265246e-06, "loss": 0.4704, "step": 14669 }, { "epoch": 0.44961382861346083, "grad_norm": 1.7887515751137621, "learning_rate": 6.0509965584506035e-06, "loss": 0.6621, "step": 14670 }, { "epoch": 0.44964447713620204, "grad_norm": 1.7495100727089896, "learning_rate": 6.050511323893011e-06, "loss": 0.6571, "step": 14671 }, { "epoch": 0.44967512565894324, "grad_norm": 0.7573530652957384, "learning_rate": 6.05002607898446e-06, "loss": 0.4526, "step": 14672 }, { "epoch": 0.44970577418168445, "grad_norm": 0.7739492693463661, "learning_rate": 6.049540823729735e-06, "loss": 0.4468, "step": 14673 }, { "epoch": 0.44973642270442565, "grad_norm": 1.9852801488935354, "learning_rate": 6.049055558133614e-06, "loss": 0.6892, "step": 14674 }, { "epoch": 0.44976707122716686, "grad_norm": 1.731193987277517, "learning_rate": 6.048570282200883e-06, "loss": 0.6517, "step": 14675 }, { "epoch": 0.44979771974990806, "grad_norm": 1.854063034408709, "learning_rate": 6.0480849959363175e-06, "loss": 0.6546, "step": 14676 }, { "epoch": 0.44982836827264927, "grad_norm": 0.8041446353391454, "learning_rate": 6.047599699344704e-06, "loss": 0.4518, "step": 14677 }, { "epoch": 0.4498590167953905, "grad_norm": 1.6875483417507549, "learning_rate": 6.047114392430823e-06, "loss": 0.6851, "step": 14678 }, { "epoch": 0.4498896653181317, "grad_norm": 1.7920727512951438, "learning_rate": 6.046629075199456e-06, "loss": 0.6929, "step": 14679 }, { "epoch": 0.4499203138408729, "grad_norm": 1.750391919684459, "learning_rate": 6.046143747655383e-06, "loss": 0.6415, "step": 14680 }, { "epoch": 0.4499509623636141, "grad_norm": 1.5155376092470239, "learning_rate": 6.04565840980339e-06, "loss": 0.6091, "step": 14681 }, { "epoch": 0.4499816108863553, "grad_norm": 1.890179303781855, "learning_rate": 6.045173061648256e-06, "loss": 0.6107, "step": 14682 }, { "epoch": 0.4500122594090965, "grad_norm": 1.720385467611057, "learning_rate": 6.044687703194765e-06, "loss": 0.5856, "step": 14683 }, { "epoch": 0.4500429079318377, "grad_norm": 1.6625950347792429, "learning_rate": 6.044202334447698e-06, "loss": 0.6382, "step": 14684 }, { "epoch": 0.4500735564545789, "grad_norm": 1.5433875086192217, "learning_rate": 6.043716955411839e-06, "loss": 0.7079, "step": 14685 }, { "epoch": 0.45010420497732007, "grad_norm": 1.721998658450264, "learning_rate": 6.04323156609197e-06, "loss": 0.6023, "step": 14686 }, { "epoch": 0.45013485350006127, "grad_norm": 1.5058011220371768, "learning_rate": 6.042746166492873e-06, "loss": 0.651, "step": 14687 }, { "epoch": 0.4501655020228025, "grad_norm": 2.0814334350037034, "learning_rate": 6.042260756619331e-06, "loss": 0.6688, "step": 14688 }, { "epoch": 0.4501961505455437, "grad_norm": 0.8166917527973008, "learning_rate": 6.041775336476128e-06, "loss": 0.4361, "step": 14689 }, { "epoch": 0.4502267990682849, "grad_norm": 0.8272768700866728, "learning_rate": 6.041289906068046e-06, "loss": 0.452, "step": 14690 }, { "epoch": 0.4502574475910261, "grad_norm": 1.7945774937981342, "learning_rate": 6.040804465399867e-06, "loss": 0.6716, "step": 14691 }, { "epoch": 0.4502880961137673, "grad_norm": 1.6636502663406136, "learning_rate": 6.040319014476376e-06, "loss": 0.6874, "step": 14692 }, { "epoch": 0.4503187446365085, "grad_norm": 2.045757181660934, "learning_rate": 6.0398335533023546e-06, "loss": 0.7353, "step": 14693 }, { "epoch": 0.4503493931592497, "grad_norm": 1.6690498478565936, "learning_rate": 6.039348081882589e-06, "loss": 0.6537, "step": 14694 }, { "epoch": 0.4503800416819909, "grad_norm": 1.704273561451966, "learning_rate": 6.03886260022186e-06, "loss": 0.6142, "step": 14695 }, { "epoch": 0.4504106902047321, "grad_norm": 1.9024356771982176, "learning_rate": 6.038377108324951e-06, "loss": 0.6848, "step": 14696 }, { "epoch": 0.45044133872747333, "grad_norm": 0.859205783801287, "learning_rate": 6.037891606196648e-06, "loss": 0.4476, "step": 14697 }, { "epoch": 0.45047198725021453, "grad_norm": 2.0999532660719034, "learning_rate": 6.037406093841732e-06, "loss": 0.6293, "step": 14698 }, { "epoch": 0.45050263577295574, "grad_norm": 1.4769939409417054, "learning_rate": 6.03692057126499e-06, "loss": 0.5556, "step": 14699 }, { "epoch": 0.45053328429569695, "grad_norm": 0.7857516902891494, "learning_rate": 6.036435038471203e-06, "loss": 0.4398, "step": 14700 }, { "epoch": 0.45056393281843815, "grad_norm": 1.7240693901388615, "learning_rate": 6.035949495465157e-06, "loss": 0.6749, "step": 14701 }, { "epoch": 0.45059458134117936, "grad_norm": 1.6215744447512694, "learning_rate": 6.035463942251636e-06, "loss": 0.7034, "step": 14702 }, { "epoch": 0.45062522986392056, "grad_norm": 1.9569835320753195, "learning_rate": 6.0349783788354235e-06, "loss": 0.6798, "step": 14703 }, { "epoch": 0.45065587838666177, "grad_norm": 1.6264778113914868, "learning_rate": 6.034492805221304e-06, "loss": 0.6551, "step": 14704 }, { "epoch": 0.450686526909403, "grad_norm": 1.4834322628454406, "learning_rate": 6.034007221414064e-06, "loss": 0.6161, "step": 14705 }, { "epoch": 0.4507171754321442, "grad_norm": 1.6407158216795823, "learning_rate": 6.033521627418483e-06, "loss": 0.7721, "step": 14706 }, { "epoch": 0.4507478239548854, "grad_norm": 1.6330676093954524, "learning_rate": 6.033036023239352e-06, "loss": 0.7276, "step": 14707 }, { "epoch": 0.4507784724776266, "grad_norm": 1.8245118647537393, "learning_rate": 6.032550408881449e-06, "loss": 0.7294, "step": 14708 }, { "epoch": 0.4508091210003678, "grad_norm": 1.6862214939877067, "learning_rate": 6.032064784349566e-06, "loss": 0.6669, "step": 14709 }, { "epoch": 0.450839769523109, "grad_norm": 1.6189659671619758, "learning_rate": 6.031579149648483e-06, "loss": 0.6628, "step": 14710 }, { "epoch": 0.4508704180458502, "grad_norm": 1.4774699745889681, "learning_rate": 6.031093504782987e-06, "loss": 0.7893, "step": 14711 }, { "epoch": 0.4509010665685914, "grad_norm": 1.704052520066079, "learning_rate": 6.0306078497578636e-06, "loss": 0.6567, "step": 14712 }, { "epoch": 0.4509317150913326, "grad_norm": 0.878147091020303, "learning_rate": 6.030122184577897e-06, "loss": 0.4648, "step": 14713 }, { "epoch": 0.4509623636140738, "grad_norm": 1.6974591325072597, "learning_rate": 6.029636509247874e-06, "loss": 0.6808, "step": 14714 }, { "epoch": 0.45099301213681503, "grad_norm": 1.5729938831211674, "learning_rate": 6.029150823772576e-06, "loss": 0.7331, "step": 14715 }, { "epoch": 0.45102366065955624, "grad_norm": 1.787636795200323, "learning_rate": 6.028665128156794e-06, "loss": 0.6659, "step": 14716 }, { "epoch": 0.4510543091822974, "grad_norm": 1.6844003026991323, "learning_rate": 6.0281794224053115e-06, "loss": 0.5946, "step": 14717 }, { "epoch": 0.4510849577050386, "grad_norm": 1.573106514147505, "learning_rate": 6.027693706522914e-06, "loss": 0.6685, "step": 14718 }, { "epoch": 0.4511156062277798, "grad_norm": 1.7165724172224779, "learning_rate": 6.0272079805143855e-06, "loss": 0.6669, "step": 14719 }, { "epoch": 0.451146254750521, "grad_norm": 1.899349645165797, "learning_rate": 6.026722244384515e-06, "loss": 0.7425, "step": 14720 }, { "epoch": 0.4511769032732622, "grad_norm": 1.5736112608381267, "learning_rate": 6.0262364981380884e-06, "loss": 0.6688, "step": 14721 }, { "epoch": 0.4512075517960034, "grad_norm": 1.7846076567842624, "learning_rate": 6.02575074177989e-06, "loss": 0.6561, "step": 14722 }, { "epoch": 0.4512382003187446, "grad_norm": 1.5457005305477032, "learning_rate": 6.025264975314708e-06, "loss": 0.543, "step": 14723 }, { "epoch": 0.4512688488414858, "grad_norm": 1.8172277322198316, "learning_rate": 6.024779198747327e-06, "loss": 0.6497, "step": 14724 }, { "epoch": 0.45129949736422703, "grad_norm": 1.6922992097254501, "learning_rate": 6.024293412082534e-06, "loss": 0.6769, "step": 14725 }, { "epoch": 0.45133014588696824, "grad_norm": 1.8579798920545423, "learning_rate": 6.023807615325117e-06, "loss": 0.5996, "step": 14726 }, { "epoch": 0.45136079440970944, "grad_norm": 1.6307077380300825, "learning_rate": 6.023321808479862e-06, "loss": 0.6263, "step": 14727 }, { "epoch": 0.45139144293245065, "grad_norm": 1.9578867799358077, "learning_rate": 6.022835991551555e-06, "loss": 0.6299, "step": 14728 }, { "epoch": 0.45142209145519185, "grad_norm": 0.8523557243756739, "learning_rate": 6.022350164544982e-06, "loss": 0.4254, "step": 14729 }, { "epoch": 0.45145273997793306, "grad_norm": 1.672102728960697, "learning_rate": 6.021864327464933e-06, "loss": 0.7503, "step": 14730 }, { "epoch": 0.45148338850067427, "grad_norm": 2.015733861444247, "learning_rate": 6.021378480316193e-06, "loss": 0.7268, "step": 14731 }, { "epoch": 0.45151403702341547, "grad_norm": 1.700460510448195, "learning_rate": 6.020892623103548e-06, "loss": 0.654, "step": 14732 }, { "epoch": 0.4515446855461567, "grad_norm": 1.7738192452002095, "learning_rate": 6.020406755831788e-06, "loss": 0.6917, "step": 14733 }, { "epoch": 0.4515753340688979, "grad_norm": 1.5983629225296867, "learning_rate": 6.0199208785056985e-06, "loss": 0.6119, "step": 14734 }, { "epoch": 0.4516059825916391, "grad_norm": 1.8117091120699955, "learning_rate": 6.019434991130069e-06, "loss": 0.6324, "step": 14735 }, { "epoch": 0.4516366311143803, "grad_norm": 1.8076741829924112, "learning_rate": 6.018949093709684e-06, "loss": 0.6874, "step": 14736 }, { "epoch": 0.4516672796371215, "grad_norm": 1.574837109151993, "learning_rate": 6.018463186249333e-06, "loss": 0.5856, "step": 14737 }, { "epoch": 0.4516979281598627, "grad_norm": 1.6702382095649126, "learning_rate": 6.017977268753805e-06, "loss": 0.7226, "step": 14738 }, { "epoch": 0.4517285766826039, "grad_norm": 1.7223155485573454, "learning_rate": 6.017491341227884e-06, "loss": 0.5785, "step": 14739 }, { "epoch": 0.4517592252053451, "grad_norm": 1.633533783220313, "learning_rate": 6.017005403676365e-06, "loss": 0.6422, "step": 14740 }, { "epoch": 0.4517898737280863, "grad_norm": 1.8081107992729557, "learning_rate": 6.016519456104028e-06, "loss": 0.6716, "step": 14741 }, { "epoch": 0.4518205222508275, "grad_norm": 1.6519854484306462, "learning_rate": 6.016033498515665e-06, "loss": 0.6359, "step": 14742 }, { "epoch": 0.45185117077356873, "grad_norm": 1.786285796668657, "learning_rate": 6.015547530916064e-06, "loss": 0.5893, "step": 14743 }, { "epoch": 0.45188181929630994, "grad_norm": 1.6815226281838245, "learning_rate": 6.015061553310016e-06, "loss": 0.6504, "step": 14744 }, { "epoch": 0.45191246781905114, "grad_norm": 1.943132966646875, "learning_rate": 6.014575565702303e-06, "loss": 0.6853, "step": 14745 }, { "epoch": 0.45194311634179235, "grad_norm": 0.8565181159856131, "learning_rate": 6.01408956809772e-06, "loss": 0.459, "step": 14746 }, { "epoch": 0.45197376486453356, "grad_norm": 1.495397996820354, "learning_rate": 6.01360356050105e-06, "loss": 0.6968, "step": 14747 }, { "epoch": 0.4520044133872747, "grad_norm": 1.7153484254544373, "learning_rate": 6.013117542917087e-06, "loss": 0.6946, "step": 14748 }, { "epoch": 0.4520350619100159, "grad_norm": 1.5837672664634321, "learning_rate": 6.012631515350619e-06, "loss": 0.5605, "step": 14749 }, { "epoch": 0.4520657104327571, "grad_norm": 1.6131697691500413, "learning_rate": 6.012145477806431e-06, "loss": 0.6058, "step": 14750 }, { "epoch": 0.4520963589554983, "grad_norm": 1.8851598319348881, "learning_rate": 6.011659430289316e-06, "loss": 0.7085, "step": 14751 }, { "epoch": 0.45212700747823953, "grad_norm": 0.8171723975694849, "learning_rate": 6.011173372804061e-06, "loss": 0.4685, "step": 14752 }, { "epoch": 0.45215765600098073, "grad_norm": 1.796727615773298, "learning_rate": 6.010687305355457e-06, "loss": 0.6827, "step": 14753 }, { "epoch": 0.45218830452372194, "grad_norm": 1.6884028507588666, "learning_rate": 6.0102012279482915e-06, "loss": 0.6371, "step": 14754 }, { "epoch": 0.45221895304646315, "grad_norm": 1.8703985325265786, "learning_rate": 6.009715140587357e-06, "loss": 0.72, "step": 14755 }, { "epoch": 0.45224960156920435, "grad_norm": 1.6967248826567578, "learning_rate": 6.0092290432774384e-06, "loss": 0.6695, "step": 14756 }, { "epoch": 0.45228025009194556, "grad_norm": 1.709932725948683, "learning_rate": 6.008742936023328e-06, "loss": 0.6426, "step": 14757 }, { "epoch": 0.45231089861468676, "grad_norm": 1.7172393565737571, "learning_rate": 6.008256818829815e-06, "loss": 0.6425, "step": 14758 }, { "epoch": 0.45234154713742797, "grad_norm": 1.8287188599584852, "learning_rate": 6.007770691701692e-06, "loss": 0.7332, "step": 14759 }, { "epoch": 0.4523721956601692, "grad_norm": 0.7923121894554046, "learning_rate": 6.007284554643744e-06, "loss": 0.4327, "step": 14760 }, { "epoch": 0.4524028441829104, "grad_norm": 1.6990412462278306, "learning_rate": 6.006798407660764e-06, "loss": 0.6541, "step": 14761 }, { "epoch": 0.4524334927056516, "grad_norm": 2.2113082868840257, "learning_rate": 6.006312250757542e-06, "loss": 0.6978, "step": 14762 }, { "epoch": 0.4524641412283928, "grad_norm": 1.594558232219871, "learning_rate": 6.005826083938868e-06, "loss": 0.69, "step": 14763 }, { "epoch": 0.452494789751134, "grad_norm": 1.6050925896941703, "learning_rate": 6.005339907209533e-06, "loss": 0.618, "step": 14764 }, { "epoch": 0.4525254382738752, "grad_norm": 1.8005540649416427, "learning_rate": 6.004853720574325e-06, "loss": 0.7259, "step": 14765 }, { "epoch": 0.4525560867966164, "grad_norm": 1.6972897319843214, "learning_rate": 6.0043675240380385e-06, "loss": 0.6309, "step": 14766 }, { "epoch": 0.4525867353193576, "grad_norm": 1.4815370358761464, "learning_rate": 6.00388131760546e-06, "loss": 0.5745, "step": 14767 }, { "epoch": 0.4526173838420988, "grad_norm": 2.031608368936869, "learning_rate": 6.0033951012813825e-06, "loss": 0.6745, "step": 14768 }, { "epoch": 0.45264803236484, "grad_norm": 1.6580519140763146, "learning_rate": 6.002908875070597e-06, "loss": 0.6083, "step": 14769 }, { "epoch": 0.45267868088758123, "grad_norm": 1.8619549304078344, "learning_rate": 6.002422638977892e-06, "loss": 0.6274, "step": 14770 }, { "epoch": 0.45270932941032244, "grad_norm": 0.8261469759002075, "learning_rate": 6.001936393008062e-06, "loss": 0.4398, "step": 14771 }, { "epoch": 0.45273997793306364, "grad_norm": 1.662093388580578, "learning_rate": 6.001450137165896e-06, "loss": 0.6686, "step": 14772 }, { "epoch": 0.45277062645580485, "grad_norm": 1.7022628006734546, "learning_rate": 6.0009638714561846e-06, "loss": 0.6305, "step": 14773 }, { "epoch": 0.45280127497854605, "grad_norm": 1.7007739953420091, "learning_rate": 6.000477595883721e-06, "loss": 0.6573, "step": 14774 }, { "epoch": 0.45283192350128726, "grad_norm": 1.6867647826980343, "learning_rate": 5.999991310453296e-06, "loss": 0.675, "step": 14775 }, { "epoch": 0.45286257202402846, "grad_norm": 1.6907938598701329, "learning_rate": 5.999505015169701e-06, "loss": 0.6325, "step": 14776 }, { "epoch": 0.45289322054676967, "grad_norm": 1.6956743742842624, "learning_rate": 5.999018710037725e-06, "loss": 0.6617, "step": 14777 }, { "epoch": 0.4529238690695109, "grad_norm": 1.7443492164003078, "learning_rate": 5.998532395062165e-06, "loss": 0.6423, "step": 14778 }, { "epoch": 0.452954517592252, "grad_norm": 1.8652070510859209, "learning_rate": 5.9980460702478084e-06, "loss": 0.7353, "step": 14779 }, { "epoch": 0.45298516611499323, "grad_norm": 1.8494749546597617, "learning_rate": 5.997559735599448e-06, "loss": 0.6814, "step": 14780 }, { "epoch": 0.45301581463773444, "grad_norm": 1.6321898852736774, "learning_rate": 5.997073391121876e-06, "loss": 0.643, "step": 14781 }, { "epoch": 0.45304646316047564, "grad_norm": 1.6723099755599216, "learning_rate": 5.996587036819887e-06, "loss": 0.6386, "step": 14782 }, { "epoch": 0.45307711168321685, "grad_norm": 1.728814719131714, "learning_rate": 5.996100672698269e-06, "loss": 0.6904, "step": 14783 }, { "epoch": 0.45310776020595805, "grad_norm": 1.8585757942857684, "learning_rate": 5.995614298761816e-06, "loss": 0.6884, "step": 14784 }, { "epoch": 0.45313840872869926, "grad_norm": 1.536974510134653, "learning_rate": 5.995127915015322e-06, "loss": 0.6583, "step": 14785 }, { "epoch": 0.45316905725144047, "grad_norm": 2.766802311543049, "learning_rate": 5.994641521463578e-06, "loss": 0.6653, "step": 14786 }, { "epoch": 0.45319970577418167, "grad_norm": 1.5649428084548564, "learning_rate": 5.994155118111376e-06, "loss": 0.6421, "step": 14787 }, { "epoch": 0.4532303542969229, "grad_norm": 1.6280009813079919, "learning_rate": 5.9936687049635075e-06, "loss": 0.7129, "step": 14788 }, { "epoch": 0.4532610028196641, "grad_norm": 0.8504725995965158, "learning_rate": 5.99318228202477e-06, "loss": 0.4333, "step": 14789 }, { "epoch": 0.4532916513424053, "grad_norm": 0.797346619242297, "learning_rate": 5.992695849299952e-06, "loss": 0.4419, "step": 14790 }, { "epoch": 0.4533222998651465, "grad_norm": 2.0235851266573692, "learning_rate": 5.992209406793847e-06, "loss": 0.7196, "step": 14791 }, { "epoch": 0.4533529483878877, "grad_norm": 1.839844992873075, "learning_rate": 5.99172295451125e-06, "loss": 0.6604, "step": 14792 }, { "epoch": 0.4533835969106289, "grad_norm": 1.659288766819305, "learning_rate": 5.991236492456952e-06, "loss": 0.6146, "step": 14793 }, { "epoch": 0.4534142454333701, "grad_norm": 1.5555327936247487, "learning_rate": 5.99075002063575e-06, "loss": 0.6644, "step": 14794 }, { "epoch": 0.4534448939561113, "grad_norm": 0.8574271039270966, "learning_rate": 5.990263539052431e-06, "loss": 0.4491, "step": 14795 }, { "epoch": 0.4534755424788525, "grad_norm": 1.8072374616079694, "learning_rate": 5.989777047711793e-06, "loss": 0.7137, "step": 14796 }, { "epoch": 0.45350619100159373, "grad_norm": 1.7446669391355882, "learning_rate": 5.9892905466186294e-06, "loss": 0.5562, "step": 14797 }, { "epoch": 0.45353683952433493, "grad_norm": 1.5146233684359491, "learning_rate": 5.9888040357777334e-06, "loss": 0.6752, "step": 14798 }, { "epoch": 0.45356748804707614, "grad_norm": 1.854820165961491, "learning_rate": 5.988317515193897e-06, "loss": 0.6243, "step": 14799 }, { "epoch": 0.45359813656981735, "grad_norm": 1.7733045631313311, "learning_rate": 5.987830984871915e-06, "loss": 0.6976, "step": 14800 }, { "epoch": 0.45362878509255855, "grad_norm": 1.794232030025211, "learning_rate": 5.987344444816582e-06, "loss": 0.7241, "step": 14801 }, { "epoch": 0.45365943361529976, "grad_norm": 1.8439706636815814, "learning_rate": 5.9868578950326926e-06, "loss": 0.7336, "step": 14802 }, { "epoch": 0.45369008213804096, "grad_norm": 1.7406234848510713, "learning_rate": 5.986371335525038e-06, "loss": 0.6269, "step": 14803 }, { "epoch": 0.45372073066078217, "grad_norm": 1.7180713698166947, "learning_rate": 5.985884766298415e-06, "loss": 0.6541, "step": 14804 }, { "epoch": 0.4537513791835234, "grad_norm": 1.7967029749752679, "learning_rate": 5.985398187357618e-06, "loss": 0.6848, "step": 14805 }, { "epoch": 0.4537820277062646, "grad_norm": 1.6160389131499828, "learning_rate": 5.984911598707439e-06, "loss": 0.5756, "step": 14806 }, { "epoch": 0.4538126762290058, "grad_norm": 0.8731088564729937, "learning_rate": 5.9844250003526764e-06, "loss": 0.456, "step": 14807 }, { "epoch": 0.453843324751747, "grad_norm": 0.8218302496420524, "learning_rate": 5.98393839229812e-06, "loss": 0.4596, "step": 14808 }, { "epoch": 0.4538739732744882, "grad_norm": 1.6431183961142457, "learning_rate": 5.983451774548568e-06, "loss": 0.6424, "step": 14809 }, { "epoch": 0.45390462179722935, "grad_norm": 1.807967399830412, "learning_rate": 5.982965147108813e-06, "loss": 0.5627, "step": 14810 }, { "epoch": 0.45393527031997055, "grad_norm": 0.7575962372675313, "learning_rate": 5.982478509983652e-06, "loss": 0.4433, "step": 14811 }, { "epoch": 0.45396591884271176, "grad_norm": 1.5415732275888374, "learning_rate": 5.981991863177878e-06, "loss": 0.5878, "step": 14812 }, { "epoch": 0.45399656736545296, "grad_norm": 1.6486601471396252, "learning_rate": 5.98150520669629e-06, "loss": 0.6425, "step": 14813 }, { "epoch": 0.45402721588819417, "grad_norm": 1.6176017098421032, "learning_rate": 5.981018540543676e-06, "loss": 0.6849, "step": 14814 }, { "epoch": 0.4540578644109354, "grad_norm": 1.7937812893237537, "learning_rate": 5.9805318647248376e-06, "loss": 0.6374, "step": 14815 }, { "epoch": 0.4540885129336766, "grad_norm": 1.9358105204095704, "learning_rate": 5.9800451792445655e-06, "loss": 0.6479, "step": 14816 }, { "epoch": 0.4541191614564178, "grad_norm": 2.0718274103811, "learning_rate": 5.9795584841076605e-06, "loss": 0.607, "step": 14817 }, { "epoch": 0.454149809979159, "grad_norm": 2.021017858314288, "learning_rate": 5.979071779318913e-06, "loss": 0.6968, "step": 14818 }, { "epoch": 0.4541804585019002, "grad_norm": 1.6465841756795732, "learning_rate": 5.9785850648831215e-06, "loss": 0.6518, "step": 14819 }, { "epoch": 0.4542111070246414, "grad_norm": 1.6584220999097967, "learning_rate": 5.978098340805081e-06, "loss": 0.6546, "step": 14820 }, { "epoch": 0.4542417555473826, "grad_norm": 1.7938716581801664, "learning_rate": 5.977611607089588e-06, "loss": 0.6953, "step": 14821 }, { "epoch": 0.4542724040701238, "grad_norm": 1.77336279860094, "learning_rate": 5.977124863741437e-06, "loss": 0.6989, "step": 14822 }, { "epoch": 0.454303052592865, "grad_norm": 1.8217109067454231, "learning_rate": 5.976638110765424e-06, "loss": 0.7162, "step": 14823 }, { "epoch": 0.4543337011156062, "grad_norm": 1.582733483600518, "learning_rate": 5.976151348166347e-06, "loss": 0.6177, "step": 14824 }, { "epoch": 0.45436434963834743, "grad_norm": 1.7094112510572939, "learning_rate": 5.975664575949001e-06, "loss": 0.7055, "step": 14825 }, { "epoch": 0.45439499816108864, "grad_norm": 1.647538927960111, "learning_rate": 5.975177794118182e-06, "loss": 0.6646, "step": 14826 }, { "epoch": 0.45442564668382984, "grad_norm": 1.479633166062767, "learning_rate": 5.974691002678685e-06, "loss": 0.6203, "step": 14827 }, { "epoch": 0.45445629520657105, "grad_norm": 1.8651157692836242, "learning_rate": 5.974204201635311e-06, "loss": 0.5797, "step": 14828 }, { "epoch": 0.45448694372931225, "grad_norm": 1.662024179707531, "learning_rate": 5.973717390992853e-06, "loss": 0.7158, "step": 14829 }, { "epoch": 0.45451759225205346, "grad_norm": 1.750547125452739, "learning_rate": 5.973230570756108e-06, "loss": 0.659, "step": 14830 }, { "epoch": 0.45454824077479467, "grad_norm": 0.9515163820409305, "learning_rate": 5.972743740929871e-06, "loss": 0.4279, "step": 14831 }, { "epoch": 0.45457888929753587, "grad_norm": 1.7544172157050681, "learning_rate": 5.972256901518944e-06, "loss": 0.6399, "step": 14832 }, { "epoch": 0.4546095378202771, "grad_norm": 1.715341406060487, "learning_rate": 5.9717700525281195e-06, "loss": 0.7069, "step": 14833 }, { "epoch": 0.4546401863430183, "grad_norm": 1.6311891143041672, "learning_rate": 5.971283193962197e-06, "loss": 0.5874, "step": 14834 }, { "epoch": 0.4546708348657595, "grad_norm": 1.7681528327645275, "learning_rate": 5.970796325825971e-06, "loss": 0.7052, "step": 14835 }, { "epoch": 0.4547014833885007, "grad_norm": 1.6530926128037908, "learning_rate": 5.970309448124243e-06, "loss": 0.618, "step": 14836 }, { "epoch": 0.4547321319112419, "grad_norm": 1.636795605953908, "learning_rate": 5.9698225608618066e-06, "loss": 0.6176, "step": 14837 }, { "epoch": 0.4547627804339831, "grad_norm": 1.769408673958416, "learning_rate": 5.969335664043458e-06, "loss": 0.6884, "step": 14838 }, { "epoch": 0.4547934289567243, "grad_norm": 1.8222954057424277, "learning_rate": 5.968848757674e-06, "loss": 0.6841, "step": 14839 }, { "epoch": 0.4548240774794655, "grad_norm": 1.5425391076502057, "learning_rate": 5.968361841758228e-06, "loss": 0.6066, "step": 14840 }, { "epoch": 0.45485472600220667, "grad_norm": 1.8492432750746537, "learning_rate": 5.967874916300937e-06, "loss": 0.7712, "step": 14841 }, { "epoch": 0.45488537452494787, "grad_norm": 1.6066391463145353, "learning_rate": 5.967387981306927e-06, "loss": 0.6688, "step": 14842 }, { "epoch": 0.4549160230476891, "grad_norm": 0.8662757221669604, "learning_rate": 5.966901036780997e-06, "loss": 0.4645, "step": 14843 }, { "epoch": 0.4549466715704303, "grad_norm": 1.8782184992688573, "learning_rate": 5.966414082727943e-06, "loss": 0.6966, "step": 14844 }, { "epoch": 0.4549773200931715, "grad_norm": 0.8368241663435584, "learning_rate": 5.965927119152561e-06, "loss": 0.4405, "step": 14845 }, { "epoch": 0.4550079686159127, "grad_norm": 1.8834768231323882, "learning_rate": 5.965440146059656e-06, "loss": 0.7306, "step": 14846 }, { "epoch": 0.4550386171386539, "grad_norm": 1.6837856254636079, "learning_rate": 5.96495316345402e-06, "loss": 0.695, "step": 14847 }, { "epoch": 0.4550692656613951, "grad_norm": 1.6970845283576743, "learning_rate": 5.964466171340455e-06, "loss": 0.6841, "step": 14848 }, { "epoch": 0.4550999141841363, "grad_norm": 1.6382508574480568, "learning_rate": 5.963979169723757e-06, "loss": 0.5971, "step": 14849 }, { "epoch": 0.4551305627068775, "grad_norm": 1.6688940185698702, "learning_rate": 5.963492158608726e-06, "loss": 0.6467, "step": 14850 }, { "epoch": 0.4551612112296187, "grad_norm": 1.7774533766691154, "learning_rate": 5.963005138000159e-06, "loss": 0.6614, "step": 14851 }, { "epoch": 0.45519185975235993, "grad_norm": 1.8122608603856227, "learning_rate": 5.962518107902859e-06, "loss": 0.6542, "step": 14852 }, { "epoch": 0.45522250827510113, "grad_norm": 1.7221910258248483, "learning_rate": 5.962031068321619e-06, "loss": 0.8053, "step": 14853 }, { "epoch": 0.45525315679784234, "grad_norm": 1.5768776491779186, "learning_rate": 5.961544019261242e-06, "loss": 0.5658, "step": 14854 }, { "epoch": 0.45528380532058355, "grad_norm": 1.63878676737052, "learning_rate": 5.961056960726525e-06, "loss": 0.6501, "step": 14855 }, { "epoch": 0.45531445384332475, "grad_norm": 1.734567392489296, "learning_rate": 5.96056989272227e-06, "loss": 0.6399, "step": 14856 }, { "epoch": 0.45534510236606596, "grad_norm": 1.835662610169523, "learning_rate": 5.96008281525327e-06, "loss": 0.672, "step": 14857 }, { "epoch": 0.45537575088880716, "grad_norm": 1.6851245392030512, "learning_rate": 5.9595957283243326e-06, "loss": 0.6831, "step": 14858 }, { "epoch": 0.45540639941154837, "grad_norm": 1.578626304834, "learning_rate": 5.959108631940251e-06, "loss": 0.6539, "step": 14859 }, { "epoch": 0.4554370479342896, "grad_norm": 1.6653673063413856, "learning_rate": 5.958621526105825e-06, "loss": 0.6691, "step": 14860 }, { "epoch": 0.4554676964570308, "grad_norm": 0.8435489776262055, "learning_rate": 5.958134410825859e-06, "loss": 0.4575, "step": 14861 }, { "epoch": 0.455498344979772, "grad_norm": 2.2404209641923507, "learning_rate": 5.9576472861051474e-06, "loss": 0.6654, "step": 14862 }, { "epoch": 0.4555289935025132, "grad_norm": 1.5446530460626566, "learning_rate": 5.957160151948493e-06, "loss": 0.6318, "step": 14863 }, { "epoch": 0.4555596420252544, "grad_norm": 1.8637330071273936, "learning_rate": 5.956673008360695e-06, "loss": 0.715, "step": 14864 }, { "epoch": 0.4555902905479956, "grad_norm": 1.61632469240402, "learning_rate": 5.956185855346552e-06, "loss": 0.6124, "step": 14865 }, { "epoch": 0.4556209390707368, "grad_norm": 1.5683360589715822, "learning_rate": 5.955698692910865e-06, "loss": 0.6017, "step": 14866 }, { "epoch": 0.455651587593478, "grad_norm": 1.8496266174998368, "learning_rate": 5.955211521058437e-06, "loss": 0.6663, "step": 14867 }, { "epoch": 0.4556822361162192, "grad_norm": 1.4628314901828325, "learning_rate": 5.954724339794062e-06, "loss": 0.5059, "step": 14868 }, { "epoch": 0.4557128846389604, "grad_norm": 0.8031159970705307, "learning_rate": 5.954237149122546e-06, "loss": 0.4397, "step": 14869 }, { "epoch": 0.45574353316170163, "grad_norm": 1.7275315694993607, "learning_rate": 5.953749949048686e-06, "loss": 0.6747, "step": 14870 }, { "epoch": 0.45577418168444284, "grad_norm": 1.7062352498505495, "learning_rate": 5.953262739577283e-06, "loss": 0.6459, "step": 14871 }, { "epoch": 0.455804830207184, "grad_norm": 0.7479281688937528, "learning_rate": 5.952775520713141e-06, "loss": 0.4048, "step": 14872 }, { "epoch": 0.4558354787299252, "grad_norm": 1.927026474348482, "learning_rate": 5.952288292461057e-06, "loss": 0.8031, "step": 14873 }, { "epoch": 0.4558661272526664, "grad_norm": 1.6712058172600555, "learning_rate": 5.951801054825831e-06, "loss": 0.5807, "step": 14874 }, { "epoch": 0.4558967757754076, "grad_norm": 1.9402432008940635, "learning_rate": 5.951313807812268e-06, "loss": 0.6899, "step": 14875 }, { "epoch": 0.4559274242981488, "grad_norm": 1.8379524694699172, "learning_rate": 5.950826551425165e-06, "loss": 0.6753, "step": 14876 }, { "epoch": 0.45595807282089, "grad_norm": 1.5683890744465918, "learning_rate": 5.950339285669324e-06, "loss": 0.6863, "step": 14877 }, { "epoch": 0.4559887213436312, "grad_norm": 1.7384115341386714, "learning_rate": 5.94985201054955e-06, "loss": 0.7405, "step": 14878 }, { "epoch": 0.4560193698663724, "grad_norm": 1.9271041619716174, "learning_rate": 5.949364726070639e-06, "loss": 0.6469, "step": 14879 }, { "epoch": 0.45605001838911363, "grad_norm": 1.6789075666001374, "learning_rate": 5.948877432237396e-06, "loss": 0.7024, "step": 14880 }, { "epoch": 0.45608066691185484, "grad_norm": 1.8253112483414469, "learning_rate": 5.948390129054617e-06, "loss": 0.7246, "step": 14881 }, { "epoch": 0.45611131543459604, "grad_norm": 1.5419325971278315, "learning_rate": 5.947902816527112e-06, "loss": 0.6004, "step": 14882 }, { "epoch": 0.45614196395733725, "grad_norm": 1.539966519950394, "learning_rate": 5.947415494659675e-06, "loss": 0.6236, "step": 14883 }, { "epoch": 0.45617261248007845, "grad_norm": 1.7386431257953732, "learning_rate": 5.946928163457113e-06, "loss": 0.596, "step": 14884 }, { "epoch": 0.45620326100281966, "grad_norm": 1.6839153341054662, "learning_rate": 5.9464408229242235e-06, "loss": 0.6753, "step": 14885 }, { "epoch": 0.45623390952556087, "grad_norm": 1.7098304141394685, "learning_rate": 5.945953473065811e-06, "loss": 0.6872, "step": 14886 }, { "epoch": 0.45626455804830207, "grad_norm": 1.8983580846491952, "learning_rate": 5.945466113886678e-06, "loss": 0.6525, "step": 14887 }, { "epoch": 0.4562952065710433, "grad_norm": 1.5049834347932898, "learning_rate": 5.944978745391623e-06, "loss": 0.6383, "step": 14888 }, { "epoch": 0.4563258550937845, "grad_norm": 1.6872106629692, "learning_rate": 5.944491367585453e-06, "loss": 0.6454, "step": 14889 }, { "epoch": 0.4563565036165257, "grad_norm": 1.7865053357744332, "learning_rate": 5.944003980472968e-06, "loss": 0.6465, "step": 14890 }, { "epoch": 0.4563871521392669, "grad_norm": 2.34854509131332, "learning_rate": 5.9435165840589695e-06, "loss": 0.7182, "step": 14891 }, { "epoch": 0.4564178006620081, "grad_norm": 1.5701635146729542, "learning_rate": 5.9430291783482596e-06, "loss": 0.5657, "step": 14892 }, { "epoch": 0.4564484491847493, "grad_norm": 0.8579373492365686, "learning_rate": 5.942541763345643e-06, "loss": 0.4572, "step": 14893 }, { "epoch": 0.4564790977074905, "grad_norm": 1.570736642253433, "learning_rate": 5.942054339055922e-06, "loss": 0.6112, "step": 14894 }, { "epoch": 0.4565097462302317, "grad_norm": 1.6337160537794848, "learning_rate": 5.941566905483898e-06, "loss": 0.592, "step": 14895 }, { "epoch": 0.4565403947529729, "grad_norm": 1.7877783112400074, "learning_rate": 5.941079462634373e-06, "loss": 0.7399, "step": 14896 }, { "epoch": 0.45657104327571413, "grad_norm": 0.7973738017530309, "learning_rate": 5.940592010512152e-06, "loss": 0.4453, "step": 14897 }, { "epoch": 0.45660169179845533, "grad_norm": 2.001572490002811, "learning_rate": 5.940104549122039e-06, "loss": 0.6066, "step": 14898 }, { "epoch": 0.45663234032119654, "grad_norm": 1.5459687443798527, "learning_rate": 5.939617078468834e-06, "loss": 0.5836, "step": 14899 }, { "epoch": 0.45666298884393774, "grad_norm": 1.538561017094995, "learning_rate": 5.9391295985573405e-06, "loss": 0.6651, "step": 14900 }, { "epoch": 0.45669363736667895, "grad_norm": 1.5155912438849846, "learning_rate": 5.938642109392364e-06, "loss": 0.606, "step": 14901 }, { "epoch": 0.45672428588942016, "grad_norm": 1.6234513945646798, "learning_rate": 5.9381546109787055e-06, "loss": 0.6938, "step": 14902 }, { "epoch": 0.4567549344121613, "grad_norm": 1.6357631394904937, "learning_rate": 5.937667103321171e-06, "loss": 0.6909, "step": 14903 }, { "epoch": 0.4567855829349025, "grad_norm": 1.8257882933744625, "learning_rate": 5.937179586424562e-06, "loss": 0.6412, "step": 14904 }, { "epoch": 0.4568162314576437, "grad_norm": 1.8185167893405978, "learning_rate": 5.936692060293681e-06, "loss": 0.5487, "step": 14905 }, { "epoch": 0.4568468799803849, "grad_norm": 1.7191326682377057, "learning_rate": 5.936204524933338e-06, "loss": 0.725, "step": 14906 }, { "epoch": 0.45687752850312613, "grad_norm": 1.5866025284254988, "learning_rate": 5.935716980348329e-06, "loss": 0.5822, "step": 14907 }, { "epoch": 0.45690817702586733, "grad_norm": 1.5980269536654603, "learning_rate": 5.935229426543461e-06, "loss": 0.6132, "step": 14908 }, { "epoch": 0.45693882554860854, "grad_norm": 1.6309821481377889, "learning_rate": 5.93474186352354e-06, "loss": 0.6631, "step": 14909 }, { "epoch": 0.45696947407134975, "grad_norm": 1.7936572145847973, "learning_rate": 5.934254291293367e-06, "loss": 0.6293, "step": 14910 }, { "epoch": 0.45700012259409095, "grad_norm": 1.9616684969531928, "learning_rate": 5.933766709857746e-06, "loss": 0.8124, "step": 14911 }, { "epoch": 0.45703077111683216, "grad_norm": 1.7847535900698859, "learning_rate": 5.933279119221485e-06, "loss": 0.6603, "step": 14912 }, { "epoch": 0.45706141963957336, "grad_norm": 0.831325719668465, "learning_rate": 5.932791519389386e-06, "loss": 0.4294, "step": 14913 }, { "epoch": 0.45709206816231457, "grad_norm": 1.7092460033491583, "learning_rate": 5.932303910366252e-06, "loss": 0.7765, "step": 14914 }, { "epoch": 0.4571227166850558, "grad_norm": 1.6997821396588735, "learning_rate": 5.93181629215689e-06, "loss": 0.6653, "step": 14915 }, { "epoch": 0.457153365207797, "grad_norm": 1.5771602228664319, "learning_rate": 5.931328664766102e-06, "loss": 0.6059, "step": 14916 }, { "epoch": 0.4571840137305382, "grad_norm": 1.5463720650718353, "learning_rate": 5.930841028198698e-06, "loss": 0.5929, "step": 14917 }, { "epoch": 0.4572146622532794, "grad_norm": 1.57350744848473, "learning_rate": 5.930353382459476e-06, "loss": 0.597, "step": 14918 }, { "epoch": 0.4572453107760206, "grad_norm": 0.8079747689517466, "learning_rate": 5.929865727553246e-06, "loss": 0.4657, "step": 14919 }, { "epoch": 0.4572759592987618, "grad_norm": 1.8770756350485591, "learning_rate": 5.9293780634848096e-06, "loss": 0.6808, "step": 14920 }, { "epoch": 0.457306607821503, "grad_norm": 1.7492649026824463, "learning_rate": 5.928890390258975e-06, "loss": 0.6314, "step": 14921 }, { "epoch": 0.4573372563442442, "grad_norm": 1.5569075942243344, "learning_rate": 5.928402707880544e-06, "loss": 0.6098, "step": 14922 }, { "epoch": 0.4573679048669854, "grad_norm": 1.633836328419628, "learning_rate": 5.927915016354324e-06, "loss": 0.6629, "step": 14923 }, { "epoch": 0.4573985533897266, "grad_norm": 0.7734593538577855, "learning_rate": 5.92742731568512e-06, "loss": 0.4253, "step": 14924 }, { "epoch": 0.45742920191246783, "grad_norm": 0.7849528747607031, "learning_rate": 5.926939605877738e-06, "loss": 0.4495, "step": 14925 }, { "epoch": 0.45745985043520904, "grad_norm": 1.4264694528988537, "learning_rate": 5.926451886936983e-06, "loss": 0.6688, "step": 14926 }, { "epoch": 0.45749049895795024, "grad_norm": 1.5385567048534445, "learning_rate": 5.925964158867659e-06, "loss": 0.5748, "step": 14927 }, { "epoch": 0.45752114748069145, "grad_norm": 1.8229958830492927, "learning_rate": 5.925476421674574e-06, "loss": 0.7063, "step": 14928 }, { "epoch": 0.45755179600343265, "grad_norm": 1.5782002879696764, "learning_rate": 5.924988675362534e-06, "loss": 0.6792, "step": 14929 }, { "epoch": 0.45758244452617386, "grad_norm": 1.7700538608468785, "learning_rate": 5.9245009199363435e-06, "loss": 0.7008, "step": 14930 }, { "epoch": 0.45761309304891506, "grad_norm": 0.7914512172547106, "learning_rate": 5.9240131554008074e-06, "loss": 0.4249, "step": 14931 }, { "epoch": 0.45764374157165627, "grad_norm": 1.8783163909991976, "learning_rate": 5.923525381760735e-06, "loss": 0.6842, "step": 14932 }, { "epoch": 0.4576743900943975, "grad_norm": 2.0540292232414448, "learning_rate": 5.92303759902093e-06, "loss": 0.7098, "step": 14933 }, { "epoch": 0.4577050386171386, "grad_norm": 1.600910530057047, "learning_rate": 5.9225498071861994e-06, "loss": 0.6023, "step": 14934 }, { "epoch": 0.45773568713987983, "grad_norm": 0.8338434210647303, "learning_rate": 5.922062006261349e-06, "loss": 0.446, "step": 14935 }, { "epoch": 0.45776633566262104, "grad_norm": 1.611549133840614, "learning_rate": 5.921574196251188e-06, "loss": 0.6269, "step": 14936 }, { "epoch": 0.45779698418536224, "grad_norm": 1.8758669929707992, "learning_rate": 5.921086377160519e-06, "loss": 0.6606, "step": 14937 }, { "epoch": 0.45782763270810345, "grad_norm": 1.5556701440449554, "learning_rate": 5.92059854899415e-06, "loss": 0.5963, "step": 14938 }, { "epoch": 0.45785828123084465, "grad_norm": 1.8505412036705273, "learning_rate": 5.9201107117568865e-06, "loss": 0.6778, "step": 14939 }, { "epoch": 0.45788892975358586, "grad_norm": 1.8529730908548887, "learning_rate": 5.919622865453539e-06, "loss": 0.6361, "step": 14940 }, { "epoch": 0.45791957827632707, "grad_norm": 1.6478588950496773, "learning_rate": 5.9191350100889126e-06, "loss": 0.5188, "step": 14941 }, { "epoch": 0.45795022679906827, "grad_norm": 1.8250586315768718, "learning_rate": 5.918647145667811e-06, "loss": 0.6378, "step": 14942 }, { "epoch": 0.4579808753218095, "grad_norm": 1.886145885951831, "learning_rate": 5.918159272195046e-06, "loss": 0.7087, "step": 14943 }, { "epoch": 0.4580115238445507, "grad_norm": 1.6605972301666414, "learning_rate": 5.917671389675424e-06, "loss": 0.5957, "step": 14944 }, { "epoch": 0.4580421723672919, "grad_norm": 1.6429517652225463, "learning_rate": 5.917183498113749e-06, "loss": 0.5791, "step": 14945 }, { "epoch": 0.4580728208900331, "grad_norm": 1.7850065949727876, "learning_rate": 5.91669559751483e-06, "loss": 0.688, "step": 14946 }, { "epoch": 0.4581034694127743, "grad_norm": 1.8875915364843059, "learning_rate": 5.916207687883476e-06, "loss": 0.6396, "step": 14947 }, { "epoch": 0.4581341179355155, "grad_norm": 1.8577991523934996, "learning_rate": 5.9157197692244925e-06, "loss": 0.695, "step": 14948 }, { "epoch": 0.4581647664582567, "grad_norm": 1.667248689008499, "learning_rate": 5.915231841542689e-06, "loss": 0.6424, "step": 14949 }, { "epoch": 0.4581954149809979, "grad_norm": 1.8595911448850422, "learning_rate": 5.91474390484287e-06, "loss": 0.6045, "step": 14950 }, { "epoch": 0.4582260635037391, "grad_norm": 0.8129100246691185, "learning_rate": 5.914255959129846e-06, "loss": 0.4313, "step": 14951 }, { "epoch": 0.45825671202648033, "grad_norm": 1.8494223589101468, "learning_rate": 5.913768004408425e-06, "loss": 0.6925, "step": 14952 }, { "epoch": 0.45828736054922153, "grad_norm": 1.8706485863961744, "learning_rate": 5.913280040683414e-06, "loss": 0.6438, "step": 14953 }, { "epoch": 0.45831800907196274, "grad_norm": 1.870832844259727, "learning_rate": 5.912792067959619e-06, "loss": 0.6823, "step": 14954 }, { "epoch": 0.45834865759470395, "grad_norm": 1.5913784727667035, "learning_rate": 5.912304086241853e-06, "loss": 0.6495, "step": 14955 }, { "epoch": 0.45837930611744515, "grad_norm": 1.680167489138406, "learning_rate": 5.91181609553492e-06, "loss": 0.6454, "step": 14956 }, { "epoch": 0.45840995464018636, "grad_norm": 1.7226038786855888, "learning_rate": 5.911328095843629e-06, "loss": 0.6746, "step": 14957 }, { "epoch": 0.45844060316292756, "grad_norm": 1.5419094770794013, "learning_rate": 5.910840087172791e-06, "loss": 0.6877, "step": 14958 }, { "epoch": 0.45847125168566877, "grad_norm": 1.8164238962766286, "learning_rate": 5.910352069527211e-06, "loss": 0.7897, "step": 14959 }, { "epoch": 0.45850190020841, "grad_norm": 0.821843405118851, "learning_rate": 5.909864042911702e-06, "loss": 0.4549, "step": 14960 }, { "epoch": 0.4585325487311512, "grad_norm": 0.8363681445780149, "learning_rate": 5.909376007331066e-06, "loss": 0.4408, "step": 14961 }, { "epoch": 0.4585631972538924, "grad_norm": 1.8504930787256832, "learning_rate": 5.908887962790117e-06, "loss": 0.6413, "step": 14962 }, { "epoch": 0.4585938457766336, "grad_norm": 1.8027709082351155, "learning_rate": 5.9083999092936625e-06, "loss": 0.7491, "step": 14963 }, { "epoch": 0.4586244942993748, "grad_norm": 1.9957729531344846, "learning_rate": 5.907911846846511e-06, "loss": 0.5988, "step": 14964 }, { "epoch": 0.45865514282211595, "grad_norm": 1.918042384345431, "learning_rate": 5.907423775453472e-06, "loss": 0.7509, "step": 14965 }, { "epoch": 0.45868579134485715, "grad_norm": 1.896183986997033, "learning_rate": 5.906935695119354e-06, "loss": 0.6914, "step": 14966 }, { "epoch": 0.45871643986759836, "grad_norm": 0.8026333327111969, "learning_rate": 5.906447605848967e-06, "loss": 0.4426, "step": 14967 }, { "epoch": 0.45874708839033956, "grad_norm": 1.7125262892346136, "learning_rate": 5.90595950764712e-06, "loss": 0.6596, "step": 14968 }, { "epoch": 0.45877773691308077, "grad_norm": 1.8621618598783611, "learning_rate": 5.905471400518622e-06, "loss": 0.7953, "step": 14969 }, { "epoch": 0.458808385435822, "grad_norm": 1.7419207776509569, "learning_rate": 5.904983284468282e-06, "loss": 0.6448, "step": 14970 }, { "epoch": 0.4588390339585632, "grad_norm": 0.7871833943875163, "learning_rate": 5.9044951595009114e-06, "loss": 0.4432, "step": 14971 }, { "epoch": 0.4588696824813044, "grad_norm": 1.767195287130095, "learning_rate": 5.9040070256213166e-06, "loss": 0.6919, "step": 14972 }, { "epoch": 0.4589003310040456, "grad_norm": 1.8061387883388065, "learning_rate": 5.903518882834311e-06, "loss": 0.6463, "step": 14973 }, { "epoch": 0.4589309795267868, "grad_norm": 1.7306467245452823, "learning_rate": 5.903030731144701e-06, "loss": 0.6408, "step": 14974 }, { "epoch": 0.458961628049528, "grad_norm": 1.6968654399003467, "learning_rate": 5.902542570557302e-06, "loss": 0.6775, "step": 14975 }, { "epoch": 0.4589922765722692, "grad_norm": 1.6021679495429066, "learning_rate": 5.9020544010769155e-06, "loss": 0.6428, "step": 14976 }, { "epoch": 0.4590229250950104, "grad_norm": 1.5752807775451672, "learning_rate": 5.901566222708359e-06, "loss": 0.721, "step": 14977 }, { "epoch": 0.4590535736177516, "grad_norm": 1.9826251031459647, "learning_rate": 5.901078035456438e-06, "loss": 0.7592, "step": 14978 }, { "epoch": 0.4590842221404928, "grad_norm": 1.7712300838772206, "learning_rate": 5.900589839325966e-06, "loss": 0.6671, "step": 14979 }, { "epoch": 0.45911487066323403, "grad_norm": 1.3587412782603467, "learning_rate": 5.900101634321751e-06, "loss": 0.6102, "step": 14980 }, { "epoch": 0.45914551918597524, "grad_norm": 1.8382045608620274, "learning_rate": 5.899613420448606e-06, "loss": 0.6555, "step": 14981 }, { "epoch": 0.45917616770871644, "grad_norm": 1.9779949474548026, "learning_rate": 5.899125197711337e-06, "loss": 0.6465, "step": 14982 }, { "epoch": 0.45920681623145765, "grad_norm": 1.7345304633819467, "learning_rate": 5.898636966114759e-06, "loss": 0.6199, "step": 14983 }, { "epoch": 0.45923746475419885, "grad_norm": 1.8485049614436093, "learning_rate": 5.898148725663682e-06, "loss": 0.6502, "step": 14984 }, { "epoch": 0.45926811327694006, "grad_norm": 1.66322999575696, "learning_rate": 5.8976604763629135e-06, "loss": 0.5901, "step": 14985 }, { "epoch": 0.45929876179968127, "grad_norm": 1.882047587612129, "learning_rate": 5.897172218217269e-06, "loss": 0.6883, "step": 14986 }, { "epoch": 0.45932941032242247, "grad_norm": 0.8505277928268201, "learning_rate": 5.896683951231554e-06, "loss": 0.4499, "step": 14987 }, { "epoch": 0.4593600588451637, "grad_norm": 1.918683545891323, "learning_rate": 5.896195675410586e-06, "loss": 0.6518, "step": 14988 }, { "epoch": 0.4593907073679049, "grad_norm": 1.516836689977487, "learning_rate": 5.89570739075917e-06, "loss": 0.5881, "step": 14989 }, { "epoch": 0.4594213558906461, "grad_norm": 1.801337344439613, "learning_rate": 5.895219097282123e-06, "loss": 0.6196, "step": 14990 }, { "epoch": 0.4594520044133873, "grad_norm": 2.0213131572182865, "learning_rate": 5.89473079498425e-06, "loss": 0.735, "step": 14991 }, { "epoch": 0.4594826529361285, "grad_norm": 1.6978443977062783, "learning_rate": 5.894242483870367e-06, "loss": 0.6682, "step": 14992 }, { "epoch": 0.4595133014588697, "grad_norm": 1.9254368233515444, "learning_rate": 5.893754163945283e-06, "loss": 0.7397, "step": 14993 }, { "epoch": 0.4595439499816109, "grad_norm": 1.7893900924023807, "learning_rate": 5.893265835213813e-06, "loss": 0.6678, "step": 14994 }, { "epoch": 0.4595745985043521, "grad_norm": 1.5986176203552995, "learning_rate": 5.8927774976807635e-06, "loss": 0.6352, "step": 14995 }, { "epoch": 0.45960524702709327, "grad_norm": 1.8605902744729612, "learning_rate": 5.892289151350951e-06, "loss": 0.6623, "step": 14996 }, { "epoch": 0.4596358955498345, "grad_norm": 1.7104563895473668, "learning_rate": 5.891800796229183e-06, "loss": 0.6132, "step": 14997 }, { "epoch": 0.4596665440725757, "grad_norm": 1.9207786056498493, "learning_rate": 5.891312432320275e-06, "loss": 0.7021, "step": 14998 }, { "epoch": 0.4596971925953169, "grad_norm": 1.7311544780327217, "learning_rate": 5.890824059629038e-06, "loss": 0.6278, "step": 14999 }, { "epoch": 0.4597278411180581, "grad_norm": 1.7221170173400033, "learning_rate": 5.890335678160282e-06, "loss": 0.6454, "step": 15000 }, { "epoch": 0.4597584896407993, "grad_norm": 1.8229187189239893, "learning_rate": 5.8898472879188216e-06, "loss": 0.586, "step": 15001 }, { "epoch": 0.4597891381635405, "grad_norm": 0.7961952117232435, "learning_rate": 5.8893588889094684e-06, "loss": 0.4343, "step": 15002 }, { "epoch": 0.4598197866862817, "grad_norm": 1.4402605022804147, "learning_rate": 5.888870481137036e-06, "loss": 0.5894, "step": 15003 }, { "epoch": 0.4598504352090229, "grad_norm": 1.579172951654577, "learning_rate": 5.888382064606333e-06, "loss": 0.638, "step": 15004 }, { "epoch": 0.4598810837317641, "grad_norm": 1.631950243934095, "learning_rate": 5.887893639322174e-06, "loss": 0.6404, "step": 15005 }, { "epoch": 0.4599117322545053, "grad_norm": 1.577199003166251, "learning_rate": 5.8874052052893734e-06, "loss": 0.6355, "step": 15006 }, { "epoch": 0.45994238077724653, "grad_norm": 1.6623316153972274, "learning_rate": 5.886916762512742e-06, "loss": 0.6292, "step": 15007 }, { "epoch": 0.45997302929998773, "grad_norm": 1.8235700523234868, "learning_rate": 5.886428310997092e-06, "loss": 0.6379, "step": 15008 }, { "epoch": 0.46000367782272894, "grad_norm": 1.9793022998539904, "learning_rate": 5.885939850747237e-06, "loss": 0.6824, "step": 15009 }, { "epoch": 0.46003432634547015, "grad_norm": 1.9170162751498796, "learning_rate": 5.885451381767991e-06, "loss": 0.7329, "step": 15010 }, { "epoch": 0.46006497486821135, "grad_norm": 1.830744494836067, "learning_rate": 5.884962904064164e-06, "loss": 0.5328, "step": 15011 }, { "epoch": 0.46009562339095256, "grad_norm": 1.612305725809677, "learning_rate": 5.884474417640573e-06, "loss": 0.6688, "step": 15012 }, { "epoch": 0.46012627191369376, "grad_norm": 1.8918491843914245, "learning_rate": 5.883985922502029e-06, "loss": 0.6487, "step": 15013 }, { "epoch": 0.46015692043643497, "grad_norm": 1.5827711740627255, "learning_rate": 5.8834974186533456e-06, "loss": 0.6647, "step": 15014 }, { "epoch": 0.4601875689591762, "grad_norm": 1.7226744980053104, "learning_rate": 5.883008906099336e-06, "loss": 0.7193, "step": 15015 }, { "epoch": 0.4602182174819174, "grad_norm": 1.5802106703201177, "learning_rate": 5.882520384844813e-06, "loss": 0.6296, "step": 15016 }, { "epoch": 0.4602488660046586, "grad_norm": 0.8531902701412596, "learning_rate": 5.8820318548945925e-06, "loss": 0.4449, "step": 15017 }, { "epoch": 0.4602795145273998, "grad_norm": 0.8479161606510741, "learning_rate": 5.881543316253485e-06, "loss": 0.4303, "step": 15018 }, { "epoch": 0.460310163050141, "grad_norm": 1.6893082323755098, "learning_rate": 5.881054768926306e-06, "loss": 0.621, "step": 15019 }, { "epoch": 0.4603408115728822, "grad_norm": 1.6175855610185992, "learning_rate": 5.880566212917868e-06, "loss": 0.6484, "step": 15020 }, { "epoch": 0.4603714600956234, "grad_norm": 1.649660577038679, "learning_rate": 5.880077648232987e-06, "loss": 0.6318, "step": 15021 }, { "epoch": 0.4604021086183646, "grad_norm": 0.8173417043117454, "learning_rate": 5.879589074876476e-06, "loss": 0.4539, "step": 15022 }, { "epoch": 0.4604327571411058, "grad_norm": 1.741547701988968, "learning_rate": 5.879100492853147e-06, "loss": 0.5353, "step": 15023 }, { "epoch": 0.460463405663847, "grad_norm": 1.9614894151693112, "learning_rate": 5.878611902167818e-06, "loss": 0.6924, "step": 15024 }, { "epoch": 0.46049405418658823, "grad_norm": 1.7121154229548115, "learning_rate": 5.8781233028253e-06, "loss": 0.6416, "step": 15025 }, { "epoch": 0.46052470270932944, "grad_norm": 1.5862601412352995, "learning_rate": 5.8776346948304075e-06, "loss": 0.6515, "step": 15026 }, { "epoch": 0.4605553512320706, "grad_norm": 1.7722470802406642, "learning_rate": 5.877146078187957e-06, "loss": 0.6336, "step": 15027 }, { "epoch": 0.4605859997548118, "grad_norm": 1.746431215030762, "learning_rate": 5.876657452902762e-06, "loss": 0.6575, "step": 15028 }, { "epoch": 0.460616648277553, "grad_norm": 0.7832023376532832, "learning_rate": 5.8761688189796376e-06, "loss": 0.4228, "step": 15029 }, { "epoch": 0.4606472968002942, "grad_norm": 1.642173369034907, "learning_rate": 5.875680176423396e-06, "loss": 0.641, "step": 15030 }, { "epoch": 0.4606779453230354, "grad_norm": 0.7795519277914658, "learning_rate": 5.8751915252388546e-06, "loss": 0.4595, "step": 15031 }, { "epoch": 0.4607085938457766, "grad_norm": 1.6620672301747474, "learning_rate": 5.874702865430826e-06, "loss": 0.6905, "step": 15032 }, { "epoch": 0.4607392423685178, "grad_norm": 0.7992754191455869, "learning_rate": 5.874214197004128e-06, "loss": 0.4657, "step": 15033 }, { "epoch": 0.460769890891259, "grad_norm": 1.682919873756366, "learning_rate": 5.873725519963572e-06, "loss": 0.6039, "step": 15034 }, { "epoch": 0.46080053941400023, "grad_norm": 0.771041269333698, "learning_rate": 5.873236834313976e-06, "loss": 0.4522, "step": 15035 }, { "epoch": 0.46083118793674144, "grad_norm": 1.7839408703324195, "learning_rate": 5.872748140060152e-06, "loss": 0.6281, "step": 15036 }, { "epoch": 0.46086183645948264, "grad_norm": 1.8841137268198709, "learning_rate": 5.872259437206921e-06, "loss": 0.7214, "step": 15037 }, { "epoch": 0.46089248498222385, "grad_norm": 1.7108471877612166, "learning_rate": 5.871770725759093e-06, "loss": 0.6737, "step": 15038 }, { "epoch": 0.46092313350496505, "grad_norm": 1.608345800185223, "learning_rate": 5.871282005721484e-06, "loss": 0.6908, "step": 15039 }, { "epoch": 0.46095378202770626, "grad_norm": 1.8330613587473092, "learning_rate": 5.870793277098912e-06, "loss": 0.6838, "step": 15040 }, { "epoch": 0.46098443055044747, "grad_norm": 0.7848478870257379, "learning_rate": 5.870304539896189e-06, "loss": 0.4398, "step": 15041 }, { "epoch": 0.46101507907318867, "grad_norm": 1.6790392561733303, "learning_rate": 5.8698157941181344e-06, "loss": 0.5886, "step": 15042 }, { "epoch": 0.4610457275959299, "grad_norm": 0.791807237377808, "learning_rate": 5.869327039769561e-06, "loss": 0.4423, "step": 15043 }, { "epoch": 0.4610763761186711, "grad_norm": 1.4981713011817597, "learning_rate": 5.868838276855287e-06, "loss": 0.6927, "step": 15044 }, { "epoch": 0.4611070246414123, "grad_norm": 1.9925268035600578, "learning_rate": 5.868349505380127e-06, "loss": 0.6639, "step": 15045 }, { "epoch": 0.4611376731641535, "grad_norm": 1.734848794653148, "learning_rate": 5.867860725348896e-06, "loss": 0.601, "step": 15046 }, { "epoch": 0.4611683216868947, "grad_norm": 1.594121330700238, "learning_rate": 5.867371936766412e-06, "loss": 0.6993, "step": 15047 }, { "epoch": 0.4611989702096359, "grad_norm": 1.7583033965332917, "learning_rate": 5.866883139637492e-06, "loss": 0.5901, "step": 15048 }, { "epoch": 0.4612296187323771, "grad_norm": 1.7907056583498573, "learning_rate": 5.866394333966948e-06, "loss": 0.6043, "step": 15049 }, { "epoch": 0.4612602672551183, "grad_norm": 1.66956925923563, "learning_rate": 5.865905519759601e-06, "loss": 0.6304, "step": 15050 }, { "epoch": 0.4612909157778595, "grad_norm": 0.8539892879998839, "learning_rate": 5.865416697020263e-06, "loss": 0.4503, "step": 15051 }, { "epoch": 0.46132156430060073, "grad_norm": 1.5624361953177008, "learning_rate": 5.864927865753754e-06, "loss": 0.6027, "step": 15052 }, { "epoch": 0.46135221282334193, "grad_norm": 1.7199992416675285, "learning_rate": 5.86443902596489e-06, "loss": 0.6713, "step": 15053 }, { "epoch": 0.46138286134608314, "grad_norm": 0.821093182552182, "learning_rate": 5.863950177658486e-06, "loss": 0.4496, "step": 15054 }, { "epoch": 0.46141350986882435, "grad_norm": 1.5013377064415026, "learning_rate": 5.863461320839361e-06, "loss": 0.6783, "step": 15055 }, { "epoch": 0.46144415839156555, "grad_norm": 1.809434928980658, "learning_rate": 5.862972455512331e-06, "loss": 0.6157, "step": 15056 }, { "epoch": 0.46147480691430676, "grad_norm": 1.7604603950569604, "learning_rate": 5.862483581682211e-06, "loss": 0.5984, "step": 15057 }, { "epoch": 0.4615054554370479, "grad_norm": 0.7608999233980241, "learning_rate": 5.86199469935382e-06, "loss": 0.4546, "step": 15058 }, { "epoch": 0.4615361039597891, "grad_norm": 1.6493334510781479, "learning_rate": 5.861505808531975e-06, "loss": 0.5991, "step": 15059 }, { "epoch": 0.4615667524825303, "grad_norm": 1.5680100390550264, "learning_rate": 5.861016909221494e-06, "loss": 0.6137, "step": 15060 }, { "epoch": 0.4615974010052715, "grad_norm": 1.638515157825199, "learning_rate": 5.860528001427193e-06, "loss": 0.6456, "step": 15061 }, { "epoch": 0.46162804952801273, "grad_norm": 2.010428495427904, "learning_rate": 5.860039085153887e-06, "loss": 0.6561, "step": 15062 }, { "epoch": 0.46165869805075394, "grad_norm": 0.8181209913758593, "learning_rate": 5.859550160406397e-06, "loss": 0.4224, "step": 15063 }, { "epoch": 0.46168934657349514, "grad_norm": 1.769500448871125, "learning_rate": 5.859061227189541e-06, "loss": 0.566, "step": 15064 }, { "epoch": 0.46171999509623635, "grad_norm": 1.955594714995619, "learning_rate": 5.858572285508134e-06, "loss": 0.6613, "step": 15065 }, { "epoch": 0.46175064361897755, "grad_norm": 1.7952929509831166, "learning_rate": 5.858083335366993e-06, "loss": 0.7234, "step": 15066 }, { "epoch": 0.46178129214171876, "grad_norm": 1.7471454585853152, "learning_rate": 5.8575943767709384e-06, "loss": 0.7083, "step": 15067 }, { "epoch": 0.46181194066445996, "grad_norm": 1.6322874440755375, "learning_rate": 5.857105409724788e-06, "loss": 0.6182, "step": 15068 }, { "epoch": 0.46184258918720117, "grad_norm": 1.6719466312791882, "learning_rate": 5.856616434233358e-06, "loss": 0.6395, "step": 15069 }, { "epoch": 0.4618732377099424, "grad_norm": 1.83801811403753, "learning_rate": 5.856127450301467e-06, "loss": 0.6793, "step": 15070 }, { "epoch": 0.4619038862326836, "grad_norm": 1.6277917516486284, "learning_rate": 5.855638457933933e-06, "loss": 0.6107, "step": 15071 }, { "epoch": 0.4619345347554248, "grad_norm": 1.6991288054435942, "learning_rate": 5.855149457135575e-06, "loss": 0.6489, "step": 15072 }, { "epoch": 0.461965183278166, "grad_norm": 1.8852913250375107, "learning_rate": 5.854660447911209e-06, "loss": 0.7353, "step": 15073 }, { "epoch": 0.4619958318009072, "grad_norm": 1.8576243699794772, "learning_rate": 5.854171430265656e-06, "loss": 0.5248, "step": 15074 }, { "epoch": 0.4620264803236484, "grad_norm": 1.6664545777697886, "learning_rate": 5.853682404203733e-06, "loss": 0.5776, "step": 15075 }, { "epoch": 0.4620571288463896, "grad_norm": 1.5965168879247926, "learning_rate": 5.85319336973026e-06, "loss": 0.5667, "step": 15076 }, { "epoch": 0.4620877773691308, "grad_norm": 1.8382925546943651, "learning_rate": 5.852704326850053e-06, "loss": 0.6172, "step": 15077 }, { "epoch": 0.462118425891872, "grad_norm": 1.767896020050351, "learning_rate": 5.852215275567933e-06, "loss": 0.6538, "step": 15078 }, { "epoch": 0.4621490744146132, "grad_norm": 1.963060631749633, "learning_rate": 5.851726215888718e-06, "loss": 0.6073, "step": 15079 }, { "epoch": 0.46217972293735443, "grad_norm": 1.6608372750132419, "learning_rate": 5.851237147817226e-06, "loss": 0.7378, "step": 15080 }, { "epoch": 0.46221037146009564, "grad_norm": 1.6718397686951703, "learning_rate": 5.8507480713582765e-06, "loss": 0.664, "step": 15081 }, { "epoch": 0.46224101998283684, "grad_norm": 1.7711428998866252, "learning_rate": 5.850258986516688e-06, "loss": 0.6646, "step": 15082 }, { "epoch": 0.46227166850557805, "grad_norm": 1.5885789824940755, "learning_rate": 5.8497698932972826e-06, "loss": 0.6073, "step": 15083 }, { "epoch": 0.46230231702831925, "grad_norm": 1.708514457894335, "learning_rate": 5.849280791704874e-06, "loss": 0.6915, "step": 15084 }, { "epoch": 0.46233296555106046, "grad_norm": 0.8045612293430491, "learning_rate": 5.848791681744287e-06, "loss": 0.4436, "step": 15085 }, { "epoch": 0.46236361407380167, "grad_norm": 1.6890437007125099, "learning_rate": 5.848302563420336e-06, "loss": 0.7105, "step": 15086 }, { "epoch": 0.46239426259654287, "grad_norm": 0.7798246625030107, "learning_rate": 5.8478134367378455e-06, "loss": 0.4458, "step": 15087 }, { "epoch": 0.4624249111192841, "grad_norm": 0.8342014202048715, "learning_rate": 5.84732430170163e-06, "loss": 0.443, "step": 15088 }, { "epoch": 0.4624555596420252, "grad_norm": 1.9424076986923615, "learning_rate": 5.846835158316513e-06, "loss": 0.7067, "step": 15089 }, { "epoch": 0.46248620816476643, "grad_norm": 1.6694296534024904, "learning_rate": 5.846346006587312e-06, "loss": 0.6761, "step": 15090 }, { "epoch": 0.46251685668750764, "grad_norm": 1.7909156912314368, "learning_rate": 5.845856846518847e-06, "loss": 0.5753, "step": 15091 }, { "epoch": 0.46254750521024884, "grad_norm": 0.8144214288550056, "learning_rate": 5.8453676781159375e-06, "loss": 0.4368, "step": 15092 }, { "epoch": 0.46257815373299005, "grad_norm": 1.7706979283901367, "learning_rate": 5.844878501383406e-06, "loss": 0.7763, "step": 15093 }, { "epoch": 0.46260880225573126, "grad_norm": 1.767019300460785, "learning_rate": 5.844389316326069e-06, "loss": 0.6512, "step": 15094 }, { "epoch": 0.46263945077847246, "grad_norm": 1.7380693566825967, "learning_rate": 5.843900122948747e-06, "loss": 0.6988, "step": 15095 }, { "epoch": 0.46267009930121367, "grad_norm": 1.8601414493546942, "learning_rate": 5.843410921256262e-06, "loss": 0.6548, "step": 15096 }, { "epoch": 0.46270074782395487, "grad_norm": 1.579948537506221, "learning_rate": 5.842921711253433e-06, "loss": 0.6152, "step": 15097 }, { "epoch": 0.4627313963466961, "grad_norm": 1.693781298448835, "learning_rate": 5.842432492945083e-06, "loss": 0.5816, "step": 15098 }, { "epoch": 0.4627620448694373, "grad_norm": 1.6969236255723092, "learning_rate": 5.841943266336027e-06, "loss": 0.6227, "step": 15099 }, { "epoch": 0.4627926933921785, "grad_norm": 0.8161534440016094, "learning_rate": 5.841454031431091e-06, "loss": 0.4305, "step": 15100 }, { "epoch": 0.4628233419149197, "grad_norm": 1.8590226367246587, "learning_rate": 5.840964788235091e-06, "loss": 0.6492, "step": 15101 }, { "epoch": 0.4628539904376609, "grad_norm": 1.883113004174209, "learning_rate": 5.840475536752852e-06, "loss": 0.7605, "step": 15102 }, { "epoch": 0.4628846389604021, "grad_norm": 1.507584327939436, "learning_rate": 5.839986276989191e-06, "loss": 0.6104, "step": 15103 }, { "epoch": 0.4629152874831433, "grad_norm": 1.5440337800151038, "learning_rate": 5.839497008948931e-06, "loss": 0.6199, "step": 15104 }, { "epoch": 0.4629459360058845, "grad_norm": 0.7702414883931997, "learning_rate": 5.839007732636891e-06, "loss": 0.4207, "step": 15105 }, { "epoch": 0.4629765845286257, "grad_norm": 1.6564767690372757, "learning_rate": 5.838518448057894e-06, "loss": 0.6719, "step": 15106 }, { "epoch": 0.46300723305136693, "grad_norm": 2.342415847440356, "learning_rate": 5.83802915521676e-06, "loss": 0.7512, "step": 15107 }, { "epoch": 0.46303788157410813, "grad_norm": 1.8077652147675976, "learning_rate": 5.8375398541183106e-06, "loss": 0.7136, "step": 15108 }, { "epoch": 0.46306853009684934, "grad_norm": 0.7532110107877398, "learning_rate": 5.837050544767367e-06, "loss": 0.4629, "step": 15109 }, { "epoch": 0.46309917861959055, "grad_norm": 1.809682957435512, "learning_rate": 5.83656122716875e-06, "loss": 0.7109, "step": 15110 }, { "epoch": 0.46312982714233175, "grad_norm": 1.8842718019798288, "learning_rate": 5.836071901327281e-06, "loss": 0.7334, "step": 15111 }, { "epoch": 0.46316047566507296, "grad_norm": 1.705282629114447, "learning_rate": 5.8355825672477805e-06, "loss": 0.6069, "step": 15112 }, { "epoch": 0.46319112418781416, "grad_norm": 1.7086458750657043, "learning_rate": 5.835093224935073e-06, "loss": 0.6483, "step": 15113 }, { "epoch": 0.46322177271055537, "grad_norm": 1.7966225068955082, "learning_rate": 5.834603874393978e-06, "loss": 0.546, "step": 15114 }, { "epoch": 0.4632524212332966, "grad_norm": 1.537315881060872, "learning_rate": 5.8341145156293175e-06, "loss": 0.5818, "step": 15115 }, { "epoch": 0.4632830697560378, "grad_norm": 2.031096374952793, "learning_rate": 5.8336251486459114e-06, "loss": 0.703, "step": 15116 }, { "epoch": 0.463313718278779, "grad_norm": 1.5638772009068078, "learning_rate": 5.833135773448587e-06, "loss": 0.6124, "step": 15117 }, { "epoch": 0.4633443668015202, "grad_norm": 1.5808457964796387, "learning_rate": 5.832646390042159e-06, "loss": 0.5594, "step": 15118 }, { "epoch": 0.4633750153242614, "grad_norm": 1.7824040304233326, "learning_rate": 5.832156998431456e-06, "loss": 0.6168, "step": 15119 }, { "epoch": 0.46340566384700255, "grad_norm": 1.5946951551515154, "learning_rate": 5.831667598621294e-06, "loss": 0.6578, "step": 15120 }, { "epoch": 0.46343631236974375, "grad_norm": 2.0769022741943686, "learning_rate": 5.831178190616501e-06, "loss": 0.6522, "step": 15121 }, { "epoch": 0.46346696089248496, "grad_norm": 1.7929087983867968, "learning_rate": 5.830688774421896e-06, "loss": 0.5962, "step": 15122 }, { "epoch": 0.46349760941522616, "grad_norm": 1.6913228394104907, "learning_rate": 5.8301993500423e-06, "loss": 0.591, "step": 15123 }, { "epoch": 0.46352825793796737, "grad_norm": 1.779081051890894, "learning_rate": 5.82970991748254e-06, "loss": 0.6155, "step": 15124 }, { "epoch": 0.4635589064607086, "grad_norm": 1.6757769205873785, "learning_rate": 5.829220476747436e-06, "loss": 0.5651, "step": 15125 }, { "epoch": 0.4635895549834498, "grad_norm": 1.8107510568098963, "learning_rate": 5.82873102784181e-06, "loss": 0.6015, "step": 15126 }, { "epoch": 0.463620203506191, "grad_norm": 1.8771220869602454, "learning_rate": 5.828241570770483e-06, "loss": 0.7058, "step": 15127 }, { "epoch": 0.4636508520289322, "grad_norm": 1.536665855132083, "learning_rate": 5.827752105538282e-06, "loss": 0.6327, "step": 15128 }, { "epoch": 0.4636815005516734, "grad_norm": 1.694357282424998, "learning_rate": 5.827262632150028e-06, "loss": 0.6055, "step": 15129 }, { "epoch": 0.4637121490744146, "grad_norm": 1.5791257322539929, "learning_rate": 5.826773150610543e-06, "loss": 0.6787, "step": 15130 }, { "epoch": 0.4637427975971558, "grad_norm": 1.7496885961816813, "learning_rate": 5.82628366092465e-06, "loss": 0.6552, "step": 15131 }, { "epoch": 0.463773446119897, "grad_norm": 1.7246366448462256, "learning_rate": 5.825794163097173e-06, "loss": 0.6451, "step": 15132 }, { "epoch": 0.4638040946426382, "grad_norm": 1.792397685078719, "learning_rate": 5.825304657132935e-06, "loss": 0.6855, "step": 15133 }, { "epoch": 0.4638347431653794, "grad_norm": 0.8302633339744655, "learning_rate": 5.824815143036758e-06, "loss": 0.4432, "step": 15134 }, { "epoch": 0.46386539168812063, "grad_norm": 1.8026222214101875, "learning_rate": 5.824325620813468e-06, "loss": 0.6289, "step": 15135 }, { "epoch": 0.46389604021086184, "grad_norm": 1.8815713347381515, "learning_rate": 5.823836090467887e-06, "loss": 0.7604, "step": 15136 }, { "epoch": 0.46392668873360304, "grad_norm": 1.5220082675168736, "learning_rate": 5.8233465520048375e-06, "loss": 0.6348, "step": 15137 }, { "epoch": 0.46395733725634425, "grad_norm": 1.6063925956258591, "learning_rate": 5.822857005429142e-06, "loss": 0.7585, "step": 15138 }, { "epoch": 0.46398798577908545, "grad_norm": 0.822638480373699, "learning_rate": 5.8223674507456285e-06, "loss": 0.4685, "step": 15139 }, { "epoch": 0.46401863430182666, "grad_norm": 1.5510156398590924, "learning_rate": 5.8218778879591175e-06, "loss": 0.6002, "step": 15140 }, { "epoch": 0.46404928282456787, "grad_norm": 0.7540027451622787, "learning_rate": 5.821388317074434e-06, "loss": 0.4193, "step": 15141 }, { "epoch": 0.46407993134730907, "grad_norm": 2.03870842057595, "learning_rate": 5.820898738096399e-06, "loss": 0.6758, "step": 15142 }, { "epoch": 0.4641105798700503, "grad_norm": 1.648684181163835, "learning_rate": 5.82040915102984e-06, "loss": 0.642, "step": 15143 }, { "epoch": 0.4641412283927915, "grad_norm": 1.6426218926250713, "learning_rate": 5.819919555879579e-06, "loss": 0.5624, "step": 15144 }, { "epoch": 0.4641718769155327, "grad_norm": 1.9487564103669108, "learning_rate": 5.8194299526504425e-06, "loss": 0.6801, "step": 15145 }, { "epoch": 0.4642025254382739, "grad_norm": 1.6693678402818608, "learning_rate": 5.818940341347251e-06, "loss": 0.5865, "step": 15146 }, { "epoch": 0.4642331739610151, "grad_norm": 1.8323503854903989, "learning_rate": 5.818450721974832e-06, "loss": 0.6768, "step": 15147 }, { "epoch": 0.4642638224837563, "grad_norm": 1.7319258342910873, "learning_rate": 5.817961094538008e-06, "loss": 0.6495, "step": 15148 }, { "epoch": 0.4642944710064975, "grad_norm": 1.926755597684182, "learning_rate": 5.817471459041605e-06, "loss": 0.8043, "step": 15149 }, { "epoch": 0.4643251195292387, "grad_norm": 1.8188039859906078, "learning_rate": 5.816981815490446e-06, "loss": 0.651, "step": 15150 }, { "epoch": 0.46435576805197987, "grad_norm": 1.8354709070642954, "learning_rate": 5.816492163889355e-06, "loss": 0.7307, "step": 15151 }, { "epoch": 0.4643864165747211, "grad_norm": 1.904729232741421, "learning_rate": 5.81600250424316e-06, "loss": 0.7081, "step": 15152 }, { "epoch": 0.4644170650974623, "grad_norm": 0.8520463335441937, "learning_rate": 5.815512836556683e-06, "loss": 0.444, "step": 15153 }, { "epoch": 0.4644477136202035, "grad_norm": 1.8795234087746138, "learning_rate": 5.815023160834749e-06, "loss": 0.6349, "step": 15154 }, { "epoch": 0.4644783621429447, "grad_norm": 1.6062176562993307, "learning_rate": 5.814533477082182e-06, "loss": 0.67, "step": 15155 }, { "epoch": 0.4645090106656859, "grad_norm": 0.8699081010862701, "learning_rate": 5.81404378530381e-06, "loss": 0.4653, "step": 15156 }, { "epoch": 0.4645396591884271, "grad_norm": 1.764698723150222, "learning_rate": 5.813554085504455e-06, "loss": 0.591, "step": 15157 }, { "epoch": 0.4645703077111683, "grad_norm": 1.8757665067480769, "learning_rate": 5.813064377688944e-06, "loss": 0.6214, "step": 15158 }, { "epoch": 0.4646009562339095, "grad_norm": 1.540987712293716, "learning_rate": 5.812574661862101e-06, "loss": 0.6011, "step": 15159 }, { "epoch": 0.4646316047566507, "grad_norm": 1.8243629653777202, "learning_rate": 5.812084938028753e-06, "loss": 0.6978, "step": 15160 }, { "epoch": 0.4646622532793919, "grad_norm": 1.8022282311104856, "learning_rate": 5.811595206193725e-06, "loss": 0.6331, "step": 15161 }, { "epoch": 0.46469290180213313, "grad_norm": 1.7035121462235527, "learning_rate": 5.81110546636184e-06, "loss": 0.6585, "step": 15162 }, { "epoch": 0.46472355032487433, "grad_norm": 1.7607472111222806, "learning_rate": 5.8106157185379264e-06, "loss": 0.6374, "step": 15163 }, { "epoch": 0.46475419884761554, "grad_norm": 0.8650966899383514, "learning_rate": 5.810125962726808e-06, "loss": 0.4431, "step": 15164 }, { "epoch": 0.46478484737035675, "grad_norm": 1.6672331296660488, "learning_rate": 5.809636198933313e-06, "loss": 0.6016, "step": 15165 }, { "epoch": 0.46481549589309795, "grad_norm": 2.242048554833845, "learning_rate": 5.809146427162262e-06, "loss": 0.6917, "step": 15166 }, { "epoch": 0.46484614441583916, "grad_norm": 1.5410934832975047, "learning_rate": 5.808656647418488e-06, "loss": 0.5821, "step": 15167 }, { "epoch": 0.46487679293858036, "grad_norm": 1.8397114586241656, "learning_rate": 5.808166859706811e-06, "loss": 0.5888, "step": 15168 }, { "epoch": 0.46490744146132157, "grad_norm": 1.616079858086225, "learning_rate": 5.80767706403206e-06, "loss": 0.6291, "step": 15169 }, { "epoch": 0.4649380899840628, "grad_norm": 0.787953442771804, "learning_rate": 5.807187260399058e-06, "loss": 0.4445, "step": 15170 }, { "epoch": 0.464968738506804, "grad_norm": 1.614588008999774, "learning_rate": 5.806697448812637e-06, "loss": 0.7049, "step": 15171 }, { "epoch": 0.4649993870295452, "grad_norm": 0.7774094285776775, "learning_rate": 5.806207629277617e-06, "loss": 0.4466, "step": 15172 }, { "epoch": 0.4650300355522864, "grad_norm": 1.9725513341852083, "learning_rate": 5.805717801798828e-06, "loss": 0.6216, "step": 15173 }, { "epoch": 0.4650606840750276, "grad_norm": 1.8783674019445082, "learning_rate": 5.805227966381095e-06, "loss": 0.6925, "step": 15174 }, { "epoch": 0.4650913325977688, "grad_norm": 1.7452083863575958, "learning_rate": 5.8047381230292455e-06, "loss": 0.5953, "step": 15175 }, { "epoch": 0.46512198112051, "grad_norm": 1.6471500763647955, "learning_rate": 5.804248271748104e-06, "loss": 0.6833, "step": 15176 }, { "epoch": 0.4651526296432512, "grad_norm": 1.8798820536331233, "learning_rate": 5.8037584125425e-06, "loss": 0.6446, "step": 15177 }, { "epoch": 0.4651832781659924, "grad_norm": 1.8879385352946452, "learning_rate": 5.8032685454172574e-06, "loss": 0.7146, "step": 15178 }, { "epoch": 0.4652139266887336, "grad_norm": 0.8035801911931518, "learning_rate": 5.802778670377205e-06, "loss": 0.4385, "step": 15179 }, { "epoch": 0.46524457521147483, "grad_norm": 0.7677985395228235, "learning_rate": 5.802288787427169e-06, "loss": 0.4483, "step": 15180 }, { "epoch": 0.46527522373421604, "grad_norm": 1.6993085458664796, "learning_rate": 5.801798896571975e-06, "loss": 0.7249, "step": 15181 }, { "epoch": 0.4653058722569572, "grad_norm": 1.6536988639249992, "learning_rate": 5.8013089978164535e-06, "loss": 0.6265, "step": 15182 }, { "epoch": 0.4653365207796984, "grad_norm": 1.689963044371769, "learning_rate": 5.800819091165428e-06, "loss": 0.639, "step": 15183 }, { "epoch": 0.4653671693024396, "grad_norm": 1.8367260896162034, "learning_rate": 5.800329176623728e-06, "loss": 0.7199, "step": 15184 }, { "epoch": 0.4653978178251808, "grad_norm": 1.655447229421824, "learning_rate": 5.799839254196179e-06, "loss": 0.6766, "step": 15185 }, { "epoch": 0.465428466347922, "grad_norm": 1.6949730692007947, "learning_rate": 5.79934932388761e-06, "loss": 0.7119, "step": 15186 }, { "epoch": 0.4654591148706632, "grad_norm": 1.7673307036060073, "learning_rate": 5.798859385702848e-06, "loss": 0.6142, "step": 15187 }, { "epoch": 0.4654897633934044, "grad_norm": 1.3995552408010346, "learning_rate": 5.798369439646718e-06, "loss": 0.5809, "step": 15188 }, { "epoch": 0.4655204119161456, "grad_norm": 1.8775810946124303, "learning_rate": 5.7978794857240506e-06, "loss": 0.7196, "step": 15189 }, { "epoch": 0.46555106043888683, "grad_norm": 1.755442488086937, "learning_rate": 5.797389523939674e-06, "loss": 0.7203, "step": 15190 }, { "epoch": 0.46558170896162804, "grad_norm": 1.7186039821870798, "learning_rate": 5.796899554298413e-06, "loss": 0.6384, "step": 15191 }, { "epoch": 0.46561235748436924, "grad_norm": 1.6590875833038923, "learning_rate": 5.796409576805096e-06, "loss": 0.6475, "step": 15192 }, { "epoch": 0.46564300600711045, "grad_norm": 2.589111780436161, "learning_rate": 5.795919591464553e-06, "loss": 0.7278, "step": 15193 }, { "epoch": 0.46567365452985165, "grad_norm": 1.8740268754655593, "learning_rate": 5.79542959828161e-06, "loss": 0.5791, "step": 15194 }, { "epoch": 0.46570430305259286, "grad_norm": 1.8471531106389385, "learning_rate": 5.794939597261097e-06, "loss": 0.6586, "step": 15195 }, { "epoch": 0.46573495157533407, "grad_norm": 1.725808865966762, "learning_rate": 5.794449588407838e-06, "loss": 0.6381, "step": 15196 }, { "epoch": 0.46576560009807527, "grad_norm": 1.6925157346623196, "learning_rate": 5.793959571726666e-06, "loss": 0.6854, "step": 15197 }, { "epoch": 0.4657962486208165, "grad_norm": 1.6112932217094929, "learning_rate": 5.7934695472224066e-06, "loss": 0.5716, "step": 15198 }, { "epoch": 0.4658268971435577, "grad_norm": 1.6573761149099813, "learning_rate": 5.79297951489989e-06, "loss": 0.657, "step": 15199 }, { "epoch": 0.4658575456662989, "grad_norm": 1.6530769825546574, "learning_rate": 5.792489474763941e-06, "loss": 0.7364, "step": 15200 }, { "epoch": 0.4658881941890401, "grad_norm": 1.683201293279754, "learning_rate": 5.791999426819393e-06, "loss": 0.5966, "step": 15201 }, { "epoch": 0.4659188427117813, "grad_norm": 1.7489080811575575, "learning_rate": 5.79150937107107e-06, "loss": 0.6182, "step": 15202 }, { "epoch": 0.4659494912345225, "grad_norm": 1.705181293313091, "learning_rate": 5.7910193075238034e-06, "loss": 0.6075, "step": 15203 }, { "epoch": 0.4659801397572637, "grad_norm": 1.6356636307806873, "learning_rate": 5.790529236182421e-06, "loss": 0.7208, "step": 15204 }, { "epoch": 0.4660107882800049, "grad_norm": 1.4784377415223806, "learning_rate": 5.7900391570517504e-06, "loss": 0.5945, "step": 15205 }, { "epoch": 0.4660414368027461, "grad_norm": 1.5207240549090841, "learning_rate": 5.789549070136625e-06, "loss": 0.571, "step": 15206 }, { "epoch": 0.46607208532548733, "grad_norm": 1.578774005800256, "learning_rate": 5.789058975441868e-06, "loss": 0.7292, "step": 15207 }, { "epoch": 0.46610273384822853, "grad_norm": 1.6362615005755354, "learning_rate": 5.788568872972312e-06, "loss": 0.6205, "step": 15208 }, { "epoch": 0.46613338237096974, "grad_norm": 1.8224295019686167, "learning_rate": 5.788078762732785e-06, "loss": 0.5948, "step": 15209 }, { "epoch": 0.46616403089371095, "grad_norm": 1.88440575097643, "learning_rate": 5.787588644728117e-06, "loss": 0.6705, "step": 15210 }, { "epoch": 0.46619467941645215, "grad_norm": 0.9091144402188898, "learning_rate": 5.787098518963136e-06, "loss": 0.46, "step": 15211 }, { "epoch": 0.46622532793919336, "grad_norm": 1.6586378455760915, "learning_rate": 5.786608385442671e-06, "loss": 0.5739, "step": 15212 }, { "epoch": 0.4662559764619345, "grad_norm": 1.659986315322739, "learning_rate": 5.786118244171552e-06, "loss": 0.6652, "step": 15213 }, { "epoch": 0.4662866249846757, "grad_norm": 1.7134862760586294, "learning_rate": 5.7856280951546116e-06, "loss": 0.5618, "step": 15214 }, { "epoch": 0.4663172735074169, "grad_norm": 1.585449426477338, "learning_rate": 5.785137938396674e-06, "loss": 0.6234, "step": 15215 }, { "epoch": 0.4663479220301581, "grad_norm": 1.7679455764979115, "learning_rate": 5.784647773902574e-06, "loss": 0.6412, "step": 15216 }, { "epoch": 0.46637857055289933, "grad_norm": 1.7071457426293037, "learning_rate": 5.784157601677136e-06, "loss": 0.7124, "step": 15217 }, { "epoch": 0.46640921907564054, "grad_norm": 1.7416644748648027, "learning_rate": 5.7836674217251945e-06, "loss": 0.6381, "step": 15218 }, { "epoch": 0.46643986759838174, "grad_norm": 1.7485245879814688, "learning_rate": 5.783177234051576e-06, "loss": 0.7031, "step": 15219 }, { "epoch": 0.46647051612112295, "grad_norm": 0.7599728505002189, "learning_rate": 5.782687038661111e-06, "loss": 0.4527, "step": 15220 }, { "epoch": 0.46650116464386415, "grad_norm": 1.6194179779014812, "learning_rate": 5.782196835558633e-06, "loss": 0.6381, "step": 15221 }, { "epoch": 0.46653181316660536, "grad_norm": 1.6272257637690615, "learning_rate": 5.781706624748968e-06, "loss": 0.6274, "step": 15222 }, { "epoch": 0.46656246168934656, "grad_norm": 1.7963186450358803, "learning_rate": 5.781216406236948e-06, "loss": 0.6873, "step": 15223 }, { "epoch": 0.46659311021208777, "grad_norm": 1.6538176511876073, "learning_rate": 5.780726180027402e-06, "loss": 0.6736, "step": 15224 }, { "epoch": 0.466623758734829, "grad_norm": 0.7926631183922496, "learning_rate": 5.780235946125163e-06, "loss": 0.4589, "step": 15225 }, { "epoch": 0.4666544072575702, "grad_norm": 1.661348408829997, "learning_rate": 5.779745704535057e-06, "loss": 0.6909, "step": 15226 }, { "epoch": 0.4666850557803114, "grad_norm": 1.5361157078247152, "learning_rate": 5.7792554552619184e-06, "loss": 0.6631, "step": 15227 }, { "epoch": 0.4667157043030526, "grad_norm": 1.7884310245903297, "learning_rate": 5.778765198310576e-06, "loss": 0.7191, "step": 15228 }, { "epoch": 0.4667463528257938, "grad_norm": 1.7170818363382214, "learning_rate": 5.778274933685863e-06, "loss": 0.6005, "step": 15229 }, { "epoch": 0.466777001348535, "grad_norm": 1.8075327586420697, "learning_rate": 5.777784661392606e-06, "loss": 0.676, "step": 15230 }, { "epoch": 0.4668076498712762, "grad_norm": 1.648422277686206, "learning_rate": 5.777294381435636e-06, "loss": 0.6321, "step": 15231 }, { "epoch": 0.4668382983940174, "grad_norm": 1.8720627706649824, "learning_rate": 5.776804093819789e-06, "loss": 0.6717, "step": 15232 }, { "epoch": 0.4668689469167586, "grad_norm": 1.682479482939991, "learning_rate": 5.776313798549891e-06, "loss": 0.7645, "step": 15233 }, { "epoch": 0.4668995954394998, "grad_norm": 1.7655793958620059, "learning_rate": 5.7758234956307745e-06, "loss": 0.642, "step": 15234 }, { "epoch": 0.46693024396224103, "grad_norm": 1.6115935035994078, "learning_rate": 5.77533318506727e-06, "loss": 0.7229, "step": 15235 }, { "epoch": 0.46696089248498224, "grad_norm": 1.7266347963969564, "learning_rate": 5.7748428668642095e-06, "loss": 0.7064, "step": 15236 }, { "epoch": 0.46699154100772344, "grad_norm": 0.8090454850709186, "learning_rate": 5.7743525410264256e-06, "loss": 0.4493, "step": 15237 }, { "epoch": 0.46702218953046465, "grad_norm": 1.6690364171303056, "learning_rate": 5.773862207558747e-06, "loss": 0.682, "step": 15238 }, { "epoch": 0.46705283805320585, "grad_norm": 1.7653819193024425, "learning_rate": 5.773371866466004e-06, "loss": 0.6612, "step": 15239 }, { "epoch": 0.46708348657594706, "grad_norm": 1.7882436340586776, "learning_rate": 5.772881517753033e-06, "loss": 0.7097, "step": 15240 }, { "epoch": 0.46711413509868827, "grad_norm": 0.77720987928066, "learning_rate": 5.772391161424662e-06, "loss": 0.4419, "step": 15241 }, { "epoch": 0.46714478362142947, "grad_norm": 1.9695914597526862, "learning_rate": 5.771900797485723e-06, "loss": 0.6541, "step": 15242 }, { "epoch": 0.4671754321441707, "grad_norm": 1.7204031095758625, "learning_rate": 5.771410425941047e-06, "loss": 0.5932, "step": 15243 }, { "epoch": 0.4672060806669118, "grad_norm": 1.6320049302295363, "learning_rate": 5.770920046795468e-06, "loss": 0.5051, "step": 15244 }, { "epoch": 0.46723672918965303, "grad_norm": 1.784324471475959, "learning_rate": 5.7704296600538165e-06, "loss": 0.6506, "step": 15245 }, { "epoch": 0.46726737771239424, "grad_norm": 1.5499448959694917, "learning_rate": 5.769939265720923e-06, "loss": 0.5971, "step": 15246 }, { "epoch": 0.46729802623513544, "grad_norm": 1.7668255682538363, "learning_rate": 5.769448863801622e-06, "loss": 0.6603, "step": 15247 }, { "epoch": 0.46732867475787665, "grad_norm": 1.6952577282866717, "learning_rate": 5.7689584543007446e-06, "loss": 0.6392, "step": 15248 }, { "epoch": 0.46735932328061786, "grad_norm": 1.6738958276632017, "learning_rate": 5.768468037223124e-06, "loss": 0.6496, "step": 15249 }, { "epoch": 0.46738997180335906, "grad_norm": 1.6359301335005743, "learning_rate": 5.767977612573589e-06, "loss": 0.6726, "step": 15250 }, { "epoch": 0.46742062032610027, "grad_norm": 1.6012940959219244, "learning_rate": 5.767487180356974e-06, "loss": 0.6506, "step": 15251 }, { "epoch": 0.4674512688488415, "grad_norm": 0.8276960990068464, "learning_rate": 5.766996740578113e-06, "loss": 0.4583, "step": 15252 }, { "epoch": 0.4674819173715827, "grad_norm": 1.8904426315253826, "learning_rate": 5.766506293241837e-06, "loss": 0.6407, "step": 15253 }, { "epoch": 0.4675125658943239, "grad_norm": 1.6546432825740036, "learning_rate": 5.766015838352976e-06, "loss": 0.6, "step": 15254 }, { "epoch": 0.4675432144170651, "grad_norm": 1.7566972289049263, "learning_rate": 5.765525375916368e-06, "loss": 0.6618, "step": 15255 }, { "epoch": 0.4675738629398063, "grad_norm": 1.6908717270565383, "learning_rate": 5.76503490593684e-06, "loss": 0.7014, "step": 15256 }, { "epoch": 0.4676045114625475, "grad_norm": 1.7066001317657695, "learning_rate": 5.764544428419229e-06, "loss": 0.6216, "step": 15257 }, { "epoch": 0.4676351599852887, "grad_norm": 1.691815422697069, "learning_rate": 5.764053943368365e-06, "loss": 0.6554, "step": 15258 }, { "epoch": 0.4676658085080299, "grad_norm": 1.7106084201777099, "learning_rate": 5.7635634507890836e-06, "loss": 0.7081, "step": 15259 }, { "epoch": 0.4676964570307711, "grad_norm": 1.7512705092373402, "learning_rate": 5.763072950686215e-06, "loss": 0.6569, "step": 15260 }, { "epoch": 0.4677271055535123, "grad_norm": 1.8751642407883347, "learning_rate": 5.762582443064593e-06, "loss": 0.6528, "step": 15261 }, { "epoch": 0.46775775407625353, "grad_norm": 0.8187095035155024, "learning_rate": 5.762091927929052e-06, "loss": 0.4461, "step": 15262 }, { "epoch": 0.46778840259899473, "grad_norm": 0.8321825278806166, "learning_rate": 5.761601405284423e-06, "loss": 0.4664, "step": 15263 }, { "epoch": 0.46781905112173594, "grad_norm": 1.665276559375373, "learning_rate": 5.761110875135543e-06, "loss": 0.5815, "step": 15264 }, { "epoch": 0.46784969964447715, "grad_norm": 1.620878952773822, "learning_rate": 5.7606203374872395e-06, "loss": 0.6284, "step": 15265 }, { "epoch": 0.46788034816721835, "grad_norm": 1.641437292830842, "learning_rate": 5.760129792344351e-06, "loss": 0.6839, "step": 15266 }, { "epoch": 0.46791099668995956, "grad_norm": 2.0404661956567423, "learning_rate": 5.759639239711709e-06, "loss": 0.6683, "step": 15267 }, { "epoch": 0.46794164521270076, "grad_norm": 1.727179442535462, "learning_rate": 5.7591486795941484e-06, "loss": 0.5693, "step": 15268 }, { "epoch": 0.46797229373544197, "grad_norm": 1.8295636689100288, "learning_rate": 5.758658111996499e-06, "loss": 0.6332, "step": 15269 }, { "epoch": 0.4680029422581832, "grad_norm": 1.7564238095479099, "learning_rate": 5.758167536923599e-06, "loss": 0.5729, "step": 15270 }, { "epoch": 0.4680335907809244, "grad_norm": 0.8688210260909248, "learning_rate": 5.7576769543802805e-06, "loss": 0.4561, "step": 15271 }, { "epoch": 0.4680642393036656, "grad_norm": 1.7845933611631553, "learning_rate": 5.7571863643713755e-06, "loss": 0.6396, "step": 15272 }, { "epoch": 0.4680948878264068, "grad_norm": 1.893538276685192, "learning_rate": 5.756695766901721e-06, "loss": 0.7206, "step": 15273 }, { "epoch": 0.468125536349148, "grad_norm": 1.715128727886111, "learning_rate": 5.756205161976148e-06, "loss": 0.6873, "step": 15274 }, { "epoch": 0.46815618487188915, "grad_norm": 1.6537227273046302, "learning_rate": 5.755714549599495e-06, "loss": 0.6968, "step": 15275 }, { "epoch": 0.46818683339463035, "grad_norm": 2.0195505153686866, "learning_rate": 5.75522392977659e-06, "loss": 0.6433, "step": 15276 }, { "epoch": 0.46821748191737156, "grad_norm": 0.8032114978444873, "learning_rate": 5.754733302512272e-06, "loss": 0.4599, "step": 15277 }, { "epoch": 0.46824813044011276, "grad_norm": 1.780977391004402, "learning_rate": 5.754242667811372e-06, "loss": 0.6536, "step": 15278 }, { "epoch": 0.46827877896285397, "grad_norm": 1.7543203890125052, "learning_rate": 5.753752025678728e-06, "loss": 0.6618, "step": 15279 }, { "epoch": 0.4683094274855952, "grad_norm": 1.8839013300288747, "learning_rate": 5.753261376119172e-06, "loss": 0.7546, "step": 15280 }, { "epoch": 0.4683400760083364, "grad_norm": 1.6934422508541807, "learning_rate": 5.752770719137538e-06, "loss": 0.6663, "step": 15281 }, { "epoch": 0.4683707245310776, "grad_norm": 0.7533126181439422, "learning_rate": 5.752280054738662e-06, "loss": 0.4314, "step": 15282 }, { "epoch": 0.4684013730538188, "grad_norm": 0.7782333379450317, "learning_rate": 5.751789382927379e-06, "loss": 0.4233, "step": 15283 }, { "epoch": 0.46843202157656, "grad_norm": 0.781332004699337, "learning_rate": 5.751298703708522e-06, "loss": 0.4678, "step": 15284 }, { "epoch": 0.4684626700993012, "grad_norm": 1.6514420968656232, "learning_rate": 5.750808017086927e-06, "loss": 0.5925, "step": 15285 }, { "epoch": 0.4684933186220424, "grad_norm": 1.8160302695487625, "learning_rate": 5.750317323067427e-06, "loss": 0.6502, "step": 15286 }, { "epoch": 0.4685239671447836, "grad_norm": 0.7600296019996711, "learning_rate": 5.74982662165486e-06, "loss": 0.4215, "step": 15287 }, { "epoch": 0.4685546156675248, "grad_norm": 1.6115587706647372, "learning_rate": 5.749335912854059e-06, "loss": 0.6439, "step": 15288 }, { "epoch": 0.468585264190266, "grad_norm": 1.8348904315070877, "learning_rate": 5.74884519666986e-06, "loss": 0.7068, "step": 15289 }, { "epoch": 0.46861591271300723, "grad_norm": 1.6749238021847899, "learning_rate": 5.748354473107097e-06, "loss": 0.6442, "step": 15290 }, { "epoch": 0.46864656123574844, "grad_norm": 1.7239030933669937, "learning_rate": 5.747863742170607e-06, "loss": 0.6451, "step": 15291 }, { "epoch": 0.46867720975848964, "grad_norm": 1.6776117780305095, "learning_rate": 5.7473730038652245e-06, "loss": 0.5747, "step": 15292 }, { "epoch": 0.46870785828123085, "grad_norm": 1.6723576508684366, "learning_rate": 5.746882258195782e-06, "loss": 0.6134, "step": 15293 }, { "epoch": 0.46873850680397205, "grad_norm": 1.6471451588372181, "learning_rate": 5.746391505167119e-06, "loss": 0.6051, "step": 15294 }, { "epoch": 0.46876915532671326, "grad_norm": 1.8051756200484579, "learning_rate": 5.74590074478407e-06, "loss": 0.6748, "step": 15295 }, { "epoch": 0.46879980384945447, "grad_norm": 1.4626061286004468, "learning_rate": 5.74540997705147e-06, "loss": 0.5415, "step": 15296 }, { "epoch": 0.46883045237219567, "grad_norm": 1.8485483340539095, "learning_rate": 5.744919201974154e-06, "loss": 0.6638, "step": 15297 }, { "epoch": 0.4688611008949369, "grad_norm": 1.8425028118751972, "learning_rate": 5.744428419556959e-06, "loss": 0.7281, "step": 15298 }, { "epoch": 0.4688917494176781, "grad_norm": 1.739929126303711, "learning_rate": 5.743937629804721e-06, "loss": 0.7118, "step": 15299 }, { "epoch": 0.4689223979404193, "grad_norm": 1.827010651066358, "learning_rate": 5.743446832722274e-06, "loss": 0.6201, "step": 15300 }, { "epoch": 0.4689530464631605, "grad_norm": 1.7858739767720306, "learning_rate": 5.742956028314455e-06, "loss": 0.6054, "step": 15301 }, { "epoch": 0.4689836949859017, "grad_norm": 1.7574725753343534, "learning_rate": 5.742465216586102e-06, "loss": 0.6441, "step": 15302 }, { "epoch": 0.4690143435086429, "grad_norm": 1.6263178872176105, "learning_rate": 5.741974397542047e-06, "loss": 0.6664, "step": 15303 }, { "epoch": 0.4690449920313841, "grad_norm": 1.741402072741425, "learning_rate": 5.741483571187129e-06, "loss": 0.6926, "step": 15304 }, { "epoch": 0.4690756405541253, "grad_norm": 1.5085596576306282, "learning_rate": 5.7409927375261845e-06, "loss": 0.6017, "step": 15305 }, { "epoch": 0.4691062890768665, "grad_norm": 1.9465180696454096, "learning_rate": 5.740501896564049e-06, "loss": 0.6362, "step": 15306 }, { "epoch": 0.4691369375996077, "grad_norm": 1.5558928363811628, "learning_rate": 5.740011048305558e-06, "loss": 0.629, "step": 15307 }, { "epoch": 0.4691675861223489, "grad_norm": 1.6997598144335508, "learning_rate": 5.7395201927555486e-06, "loss": 0.6112, "step": 15308 }, { "epoch": 0.4691982346450901, "grad_norm": 0.9194280929110819, "learning_rate": 5.739029329918859e-06, "loss": 0.4619, "step": 15309 }, { "epoch": 0.4692288831678313, "grad_norm": 1.6561347249304739, "learning_rate": 5.738538459800323e-06, "loss": 0.5055, "step": 15310 }, { "epoch": 0.4692595316905725, "grad_norm": 1.62379374953995, "learning_rate": 5.73804758240478e-06, "loss": 0.6251, "step": 15311 }, { "epoch": 0.4692901802133137, "grad_norm": 0.7955556072947267, "learning_rate": 5.737556697737063e-06, "loss": 0.4362, "step": 15312 }, { "epoch": 0.4693208287360549, "grad_norm": 1.7112912547894072, "learning_rate": 5.737065805802013e-06, "loss": 0.6373, "step": 15313 }, { "epoch": 0.4693514772587961, "grad_norm": 1.5865339068471014, "learning_rate": 5.736574906604465e-06, "loss": 0.7287, "step": 15314 }, { "epoch": 0.4693821257815373, "grad_norm": 1.8947464282531068, "learning_rate": 5.736084000149254e-06, "loss": 0.678, "step": 15315 }, { "epoch": 0.4694127743042785, "grad_norm": 1.6680319448615564, "learning_rate": 5.7355930864412215e-06, "loss": 0.5682, "step": 15316 }, { "epoch": 0.46944342282701973, "grad_norm": 1.882276229501566, "learning_rate": 5.7351021654852004e-06, "loss": 0.6142, "step": 15317 }, { "epoch": 0.46947407134976094, "grad_norm": 0.813264945202007, "learning_rate": 5.734611237286032e-06, "loss": 0.4327, "step": 15318 }, { "epoch": 0.46950471987250214, "grad_norm": 0.793306333558807, "learning_rate": 5.734120301848548e-06, "loss": 0.4263, "step": 15319 }, { "epoch": 0.46953536839524335, "grad_norm": 1.6703689469082317, "learning_rate": 5.733629359177591e-06, "loss": 0.5592, "step": 15320 }, { "epoch": 0.46956601691798455, "grad_norm": 1.6999425851614094, "learning_rate": 5.733138409277996e-06, "loss": 0.7382, "step": 15321 }, { "epoch": 0.46959666544072576, "grad_norm": 1.6379901762635702, "learning_rate": 5.7326474521546e-06, "loss": 0.683, "step": 15322 }, { "epoch": 0.46962731396346696, "grad_norm": 1.7954029879127296, "learning_rate": 5.732156487812241e-06, "loss": 0.6511, "step": 15323 }, { "epoch": 0.46965796248620817, "grad_norm": 1.7918315317963103, "learning_rate": 5.731665516255758e-06, "loss": 0.6411, "step": 15324 }, { "epoch": 0.4696886110089494, "grad_norm": 1.806801625005755, "learning_rate": 5.731174537489986e-06, "loss": 0.7184, "step": 15325 }, { "epoch": 0.4697192595316906, "grad_norm": 1.7572993740858958, "learning_rate": 5.730683551519764e-06, "loss": 0.5818, "step": 15326 }, { "epoch": 0.4697499080544318, "grad_norm": 1.715496104112758, "learning_rate": 5.7301925583499314e-06, "loss": 0.5924, "step": 15327 }, { "epoch": 0.469780556577173, "grad_norm": 0.7767813460114579, "learning_rate": 5.729701557985325e-06, "loss": 0.4389, "step": 15328 }, { "epoch": 0.4698112050999142, "grad_norm": 1.59555523309083, "learning_rate": 5.729210550430782e-06, "loss": 0.5559, "step": 15329 }, { "epoch": 0.4698418536226554, "grad_norm": 1.6387605576949797, "learning_rate": 5.728719535691139e-06, "loss": 0.6578, "step": 15330 }, { "epoch": 0.4698725021453966, "grad_norm": 1.8417867935225791, "learning_rate": 5.728228513771238e-06, "loss": 0.6185, "step": 15331 }, { "epoch": 0.4699031506681378, "grad_norm": 1.9098281266775572, "learning_rate": 5.727737484675914e-06, "loss": 0.7399, "step": 15332 }, { "epoch": 0.469933799190879, "grad_norm": 1.59263729345396, "learning_rate": 5.727246448410008e-06, "loss": 0.6095, "step": 15333 }, { "epoch": 0.4699644477136202, "grad_norm": 1.638216306387881, "learning_rate": 5.726755404978355e-06, "loss": 0.6166, "step": 15334 }, { "epoch": 0.46999509623636143, "grad_norm": 1.8293710630175708, "learning_rate": 5.726264354385795e-06, "loss": 0.6867, "step": 15335 }, { "epoch": 0.47002574475910264, "grad_norm": 1.7660102783975913, "learning_rate": 5.725773296637167e-06, "loss": 0.5867, "step": 15336 }, { "epoch": 0.47005639328184384, "grad_norm": 1.6730914108113797, "learning_rate": 5.7252822317373105e-06, "loss": 0.594, "step": 15337 }, { "epoch": 0.470087041804585, "grad_norm": 1.7591210649541273, "learning_rate": 5.724791159691061e-06, "loss": 0.6299, "step": 15338 }, { "epoch": 0.4701176903273262, "grad_norm": 1.7711220583153275, "learning_rate": 5.7243000805032585e-06, "loss": 0.6209, "step": 15339 }, { "epoch": 0.4701483388500674, "grad_norm": 1.800975221228765, "learning_rate": 5.723808994178742e-06, "loss": 0.7112, "step": 15340 }, { "epoch": 0.4701789873728086, "grad_norm": 1.647575850077172, "learning_rate": 5.7233179007223514e-06, "loss": 0.6108, "step": 15341 }, { "epoch": 0.4702096358955498, "grad_norm": 1.6591686715795064, "learning_rate": 5.722826800138924e-06, "loss": 0.6684, "step": 15342 }, { "epoch": 0.470240284418291, "grad_norm": 1.7051016378000465, "learning_rate": 5.7223356924332986e-06, "loss": 0.6324, "step": 15343 }, { "epoch": 0.4702709329410322, "grad_norm": 1.8349487604410322, "learning_rate": 5.721844577610315e-06, "loss": 0.7291, "step": 15344 }, { "epoch": 0.47030158146377343, "grad_norm": 1.8188873007551327, "learning_rate": 5.721353455674813e-06, "loss": 0.7062, "step": 15345 }, { "epoch": 0.47033222998651464, "grad_norm": 1.9685039771115298, "learning_rate": 5.7208623266316296e-06, "loss": 0.7738, "step": 15346 }, { "epoch": 0.47036287850925584, "grad_norm": 0.854166814505216, "learning_rate": 5.720371190485605e-06, "loss": 0.4235, "step": 15347 }, { "epoch": 0.47039352703199705, "grad_norm": 1.9199123176045358, "learning_rate": 5.719880047241582e-06, "loss": 0.6169, "step": 15348 }, { "epoch": 0.47042417555473826, "grad_norm": 1.513707837066518, "learning_rate": 5.719388896904393e-06, "loss": 0.6086, "step": 15349 }, { "epoch": 0.47045482407747946, "grad_norm": 1.6537890281004073, "learning_rate": 5.718897739478883e-06, "loss": 0.602, "step": 15350 }, { "epoch": 0.47048547260022067, "grad_norm": 1.6791768352162801, "learning_rate": 5.718406574969888e-06, "loss": 0.6945, "step": 15351 }, { "epoch": 0.4705161211229619, "grad_norm": 1.6035804015618733, "learning_rate": 5.717915403382251e-06, "loss": 0.6968, "step": 15352 }, { "epoch": 0.4705467696457031, "grad_norm": 1.694381530380286, "learning_rate": 5.717424224720809e-06, "loss": 0.7407, "step": 15353 }, { "epoch": 0.4705774181684443, "grad_norm": 1.6173432692310228, "learning_rate": 5.716933038990402e-06, "loss": 0.5707, "step": 15354 }, { "epoch": 0.4706080666911855, "grad_norm": 1.7905930239656171, "learning_rate": 5.71644184619587e-06, "loss": 0.6441, "step": 15355 }, { "epoch": 0.4706387152139267, "grad_norm": 1.6513959775791671, "learning_rate": 5.715950646342055e-06, "loss": 0.6968, "step": 15356 }, { "epoch": 0.4706693637366679, "grad_norm": 1.633612886873287, "learning_rate": 5.715459439433795e-06, "loss": 0.6822, "step": 15357 }, { "epoch": 0.4707000122594091, "grad_norm": 1.6553838827364769, "learning_rate": 5.714968225475927e-06, "loss": 0.654, "step": 15358 }, { "epoch": 0.4707306607821503, "grad_norm": 1.4520706460233468, "learning_rate": 5.7144770044732976e-06, "loss": 0.5455, "step": 15359 }, { "epoch": 0.4707613093048915, "grad_norm": 1.8009397231982984, "learning_rate": 5.7139857764307424e-06, "loss": 0.581, "step": 15360 }, { "epoch": 0.4707919578276327, "grad_norm": 1.644374878189106, "learning_rate": 5.713494541353103e-06, "loss": 0.6893, "step": 15361 }, { "epoch": 0.47082260635037393, "grad_norm": 1.7121843276249857, "learning_rate": 5.713003299245219e-06, "loss": 0.7218, "step": 15362 }, { "epoch": 0.47085325487311513, "grad_norm": 1.5809833110198084, "learning_rate": 5.712512050111931e-06, "loss": 0.6171, "step": 15363 }, { "epoch": 0.47088390339585634, "grad_norm": 1.6734547813068552, "learning_rate": 5.71202079395808e-06, "loss": 0.666, "step": 15364 }, { "epoch": 0.47091455191859755, "grad_norm": 0.8904781488664311, "learning_rate": 5.711529530788505e-06, "loss": 0.4449, "step": 15365 }, { "epoch": 0.47094520044133875, "grad_norm": 1.6060624989158292, "learning_rate": 5.711038260608047e-06, "loss": 0.5721, "step": 15366 }, { "epoch": 0.47097584896407996, "grad_norm": 1.492334107303944, "learning_rate": 5.7105469834215485e-06, "loss": 0.6312, "step": 15367 }, { "epoch": 0.47100649748682116, "grad_norm": 1.9397429759628484, "learning_rate": 5.710055699233848e-06, "loss": 0.6469, "step": 15368 }, { "epoch": 0.4710371460095623, "grad_norm": 1.8313182209040848, "learning_rate": 5.709564408049787e-06, "loss": 0.6729, "step": 15369 }, { "epoch": 0.4710677945323035, "grad_norm": 1.8414256168802179, "learning_rate": 5.709073109874207e-06, "loss": 0.6997, "step": 15370 }, { "epoch": 0.4710984430550447, "grad_norm": 1.9947735413402874, "learning_rate": 5.708581804711947e-06, "loss": 0.6864, "step": 15371 }, { "epoch": 0.47112909157778593, "grad_norm": 1.7563913573284242, "learning_rate": 5.708090492567851e-06, "loss": 0.7186, "step": 15372 }, { "epoch": 0.47115974010052714, "grad_norm": 0.7477822848281048, "learning_rate": 5.707599173446756e-06, "loss": 0.429, "step": 15373 }, { "epoch": 0.47119038862326834, "grad_norm": 1.8354635530203083, "learning_rate": 5.707107847353508e-06, "loss": 0.6334, "step": 15374 }, { "epoch": 0.47122103714600955, "grad_norm": 1.788379770093719, "learning_rate": 5.706616514292944e-06, "loss": 0.5641, "step": 15375 }, { "epoch": 0.47125168566875075, "grad_norm": 0.786486707449813, "learning_rate": 5.7061251742699066e-06, "loss": 0.4509, "step": 15376 }, { "epoch": 0.47128233419149196, "grad_norm": 0.7940277048653479, "learning_rate": 5.705633827289236e-06, "loss": 0.431, "step": 15377 }, { "epoch": 0.47131298271423316, "grad_norm": 1.68842667338076, "learning_rate": 5.705142473355777e-06, "loss": 0.62, "step": 15378 }, { "epoch": 0.47134363123697437, "grad_norm": 1.8998471970625614, "learning_rate": 5.7046511124743676e-06, "loss": 0.6976, "step": 15379 }, { "epoch": 0.4713742797597156, "grad_norm": 1.7286014028847168, "learning_rate": 5.7041597446498505e-06, "loss": 0.6634, "step": 15380 }, { "epoch": 0.4714049282824568, "grad_norm": 1.8574770346974345, "learning_rate": 5.703668369887068e-06, "loss": 0.7337, "step": 15381 }, { "epoch": 0.471435576805198, "grad_norm": 0.8589766842777545, "learning_rate": 5.703176988190861e-06, "loss": 0.4497, "step": 15382 }, { "epoch": 0.4714662253279392, "grad_norm": 1.8713308177362231, "learning_rate": 5.70268559956607e-06, "loss": 0.6173, "step": 15383 }, { "epoch": 0.4714968738506804, "grad_norm": 1.6464431862987428, "learning_rate": 5.702194204017539e-06, "loss": 0.6438, "step": 15384 }, { "epoch": 0.4715275223734216, "grad_norm": 1.5937044468931707, "learning_rate": 5.7017028015501075e-06, "loss": 0.6689, "step": 15385 }, { "epoch": 0.4715581708961628, "grad_norm": 1.7506835711766462, "learning_rate": 5.701211392168619e-06, "loss": 0.6367, "step": 15386 }, { "epoch": 0.471588819418904, "grad_norm": 1.6627046258242368, "learning_rate": 5.700719975877917e-06, "loss": 0.6303, "step": 15387 }, { "epoch": 0.4716194679416452, "grad_norm": 1.6786133055341086, "learning_rate": 5.70022855268284e-06, "loss": 0.7209, "step": 15388 }, { "epoch": 0.4716501164643864, "grad_norm": 1.8091631294297499, "learning_rate": 5.6997371225882335e-06, "loss": 0.666, "step": 15389 }, { "epoch": 0.47168076498712763, "grad_norm": 0.8140956735330429, "learning_rate": 5.699245685598936e-06, "loss": 0.4463, "step": 15390 }, { "epoch": 0.47171141350986884, "grad_norm": 1.6034552786750857, "learning_rate": 5.698754241719795e-06, "loss": 0.7129, "step": 15391 }, { "epoch": 0.47174206203261004, "grad_norm": 1.5827359808971335, "learning_rate": 5.698262790955647e-06, "loss": 0.667, "step": 15392 }, { "epoch": 0.47177271055535125, "grad_norm": 1.7184585401496169, "learning_rate": 5.697771333311338e-06, "loss": 0.6025, "step": 15393 }, { "epoch": 0.47180335907809245, "grad_norm": 1.8618852401145904, "learning_rate": 5.697279868791709e-06, "loss": 0.6871, "step": 15394 }, { "epoch": 0.47183400760083366, "grad_norm": 1.5657790523717543, "learning_rate": 5.696788397401603e-06, "loss": 0.5838, "step": 15395 }, { "epoch": 0.47186465612357487, "grad_norm": 2.1065597177177278, "learning_rate": 5.696296919145864e-06, "loss": 0.6491, "step": 15396 }, { "epoch": 0.47189530464631607, "grad_norm": 1.9543870387733442, "learning_rate": 5.695805434029331e-06, "loss": 0.62, "step": 15397 }, { "epoch": 0.4719259531690573, "grad_norm": 1.8344680247422591, "learning_rate": 5.695313942056852e-06, "loss": 0.6657, "step": 15398 }, { "epoch": 0.4719566016917985, "grad_norm": 1.6846381384744842, "learning_rate": 5.6948224432332655e-06, "loss": 0.6544, "step": 15399 }, { "epoch": 0.47198725021453963, "grad_norm": 1.6147268963144097, "learning_rate": 5.694330937563415e-06, "loss": 0.6394, "step": 15400 }, { "epoch": 0.47201789873728084, "grad_norm": 1.6357211980001114, "learning_rate": 5.693839425052145e-06, "loss": 0.6246, "step": 15401 }, { "epoch": 0.47204854726002204, "grad_norm": 1.6927399984896738, "learning_rate": 5.6933479057042975e-06, "loss": 0.6739, "step": 15402 }, { "epoch": 0.47207919578276325, "grad_norm": 0.83356458453688, "learning_rate": 5.692856379524715e-06, "loss": 0.4582, "step": 15403 }, { "epoch": 0.47210984430550446, "grad_norm": 1.7725002693453424, "learning_rate": 5.6923648465182415e-06, "loss": 0.5445, "step": 15404 }, { "epoch": 0.47214049282824566, "grad_norm": 1.7163231702131232, "learning_rate": 5.6918733066897195e-06, "loss": 0.6787, "step": 15405 }, { "epoch": 0.47217114135098687, "grad_norm": 1.9603665874062464, "learning_rate": 5.691381760043995e-06, "loss": 0.5801, "step": 15406 }, { "epoch": 0.4722017898737281, "grad_norm": 0.7865545860854599, "learning_rate": 5.6908902065859065e-06, "loss": 0.4437, "step": 15407 }, { "epoch": 0.4722324383964693, "grad_norm": 1.6880776695263866, "learning_rate": 5.690398646320302e-06, "loss": 0.7309, "step": 15408 }, { "epoch": 0.4722630869192105, "grad_norm": 1.8866379399185822, "learning_rate": 5.6899070792520206e-06, "loss": 0.6441, "step": 15409 }, { "epoch": 0.4722937354419517, "grad_norm": 1.5729228496846799, "learning_rate": 5.68941550538591e-06, "loss": 0.6545, "step": 15410 }, { "epoch": 0.4723243839646929, "grad_norm": 1.410986477251837, "learning_rate": 5.6889239247268125e-06, "loss": 0.6472, "step": 15411 }, { "epoch": 0.4723550324874341, "grad_norm": 1.8685650748162517, "learning_rate": 5.68843233727957e-06, "loss": 0.6304, "step": 15412 }, { "epoch": 0.4723856810101753, "grad_norm": 1.7371435329218123, "learning_rate": 5.687940743049028e-06, "loss": 0.5405, "step": 15413 }, { "epoch": 0.4724163295329165, "grad_norm": 2.2713301816430467, "learning_rate": 5.6874491420400305e-06, "loss": 0.7014, "step": 15414 }, { "epoch": 0.4724469780556577, "grad_norm": 1.752102678649657, "learning_rate": 5.68695753425742e-06, "loss": 0.7025, "step": 15415 }, { "epoch": 0.4724776265783989, "grad_norm": 0.809975469247889, "learning_rate": 5.686465919706041e-06, "loss": 0.4569, "step": 15416 }, { "epoch": 0.47250827510114013, "grad_norm": 1.9950550602051478, "learning_rate": 5.685974298390739e-06, "loss": 0.6342, "step": 15417 }, { "epoch": 0.47253892362388134, "grad_norm": 1.7346463624262958, "learning_rate": 5.685482670316356e-06, "loss": 0.6771, "step": 15418 }, { "epoch": 0.47256957214662254, "grad_norm": 1.8043902221937422, "learning_rate": 5.6849910354877355e-06, "loss": 0.6907, "step": 15419 }, { "epoch": 0.47260022066936375, "grad_norm": 1.7292877058918072, "learning_rate": 5.684499393909724e-06, "loss": 0.5575, "step": 15420 }, { "epoch": 0.47263086919210495, "grad_norm": 0.7891527730656096, "learning_rate": 5.6840077455871655e-06, "loss": 0.4429, "step": 15421 }, { "epoch": 0.47266151771484616, "grad_norm": 1.662570530558954, "learning_rate": 5.683516090524903e-06, "loss": 0.6349, "step": 15422 }, { "epoch": 0.47269216623758736, "grad_norm": 1.772029443791544, "learning_rate": 5.6830244287277814e-06, "loss": 0.5903, "step": 15423 }, { "epoch": 0.47272281476032857, "grad_norm": 1.5428238842387811, "learning_rate": 5.682532760200646e-06, "loss": 0.5991, "step": 15424 }, { "epoch": 0.4727534632830698, "grad_norm": 1.821685473482376, "learning_rate": 5.68204108494834e-06, "loss": 0.8071, "step": 15425 }, { "epoch": 0.472784111805811, "grad_norm": 2.004035457907489, "learning_rate": 5.681549402975709e-06, "loss": 0.7268, "step": 15426 }, { "epoch": 0.4728147603285522, "grad_norm": 1.6177425709537347, "learning_rate": 5.681057714287597e-06, "loss": 0.5799, "step": 15427 }, { "epoch": 0.4728454088512934, "grad_norm": 1.7108024675960278, "learning_rate": 5.68056601888885e-06, "loss": 0.6243, "step": 15428 }, { "epoch": 0.4728760573740346, "grad_norm": 0.830837692327954, "learning_rate": 5.680074316784312e-06, "loss": 0.434, "step": 15429 }, { "epoch": 0.4729067058967758, "grad_norm": 1.5312459523455786, "learning_rate": 5.679582607978826e-06, "loss": 0.5369, "step": 15430 }, { "epoch": 0.47293735441951695, "grad_norm": 1.6323867489038737, "learning_rate": 5.6790908924772395e-06, "loss": 0.7124, "step": 15431 }, { "epoch": 0.47296800294225816, "grad_norm": 0.754140696131826, "learning_rate": 5.6785991702843976e-06, "loss": 0.4355, "step": 15432 }, { "epoch": 0.47299865146499936, "grad_norm": 1.67795683944206, "learning_rate": 5.6781074414051444e-06, "loss": 0.6746, "step": 15433 }, { "epoch": 0.47302929998774057, "grad_norm": 1.8039461810635964, "learning_rate": 5.677615705844325e-06, "loss": 0.6682, "step": 15434 }, { "epoch": 0.4730599485104818, "grad_norm": 1.7581545899644249, "learning_rate": 5.677123963606783e-06, "loss": 0.5576, "step": 15435 }, { "epoch": 0.473090597033223, "grad_norm": 1.4378611472575276, "learning_rate": 5.676632214697367e-06, "loss": 0.6139, "step": 15436 }, { "epoch": 0.4731212455559642, "grad_norm": 1.6772841614754057, "learning_rate": 5.67614045912092e-06, "loss": 0.6772, "step": 15437 }, { "epoch": 0.4731518940787054, "grad_norm": 1.822537478718402, "learning_rate": 5.675648696882288e-06, "loss": 0.6959, "step": 15438 }, { "epoch": 0.4731825426014466, "grad_norm": 1.6789924375331204, "learning_rate": 5.675156927986316e-06, "loss": 0.6473, "step": 15439 }, { "epoch": 0.4732131911241878, "grad_norm": 1.5964482101607675, "learning_rate": 5.674665152437851e-06, "loss": 0.6174, "step": 15440 }, { "epoch": 0.473243839646929, "grad_norm": 0.816301118786572, "learning_rate": 5.674173370241738e-06, "loss": 0.431, "step": 15441 }, { "epoch": 0.4732744881696702, "grad_norm": 1.7571656546057848, "learning_rate": 5.673681581402821e-06, "loss": 0.6819, "step": 15442 }, { "epoch": 0.4733051366924114, "grad_norm": 1.825838093879898, "learning_rate": 5.6731897859259475e-06, "loss": 0.713, "step": 15443 }, { "epoch": 0.4733357852151526, "grad_norm": 1.5985376943805123, "learning_rate": 5.672697983815962e-06, "loss": 0.6254, "step": 15444 }, { "epoch": 0.47336643373789383, "grad_norm": 0.9401603708105577, "learning_rate": 5.672206175077715e-06, "loss": 0.4269, "step": 15445 }, { "epoch": 0.47339708226063504, "grad_norm": 1.7425100453269824, "learning_rate": 5.671714359716044e-06, "loss": 0.6589, "step": 15446 }, { "epoch": 0.47342773078337624, "grad_norm": 1.716044889081237, "learning_rate": 5.671222537735802e-06, "loss": 0.7123, "step": 15447 }, { "epoch": 0.47345837930611745, "grad_norm": 1.7219126101659372, "learning_rate": 5.670730709141831e-06, "loss": 0.6978, "step": 15448 }, { "epoch": 0.47348902782885866, "grad_norm": 2.0177365531069578, "learning_rate": 5.67023887393898e-06, "loss": 0.7142, "step": 15449 }, { "epoch": 0.47351967635159986, "grad_norm": 1.6777377819379096, "learning_rate": 5.6697470321320936e-06, "loss": 0.5876, "step": 15450 }, { "epoch": 0.47355032487434107, "grad_norm": 1.8650971247186574, "learning_rate": 5.669255183726019e-06, "loss": 0.6965, "step": 15451 }, { "epoch": 0.47358097339708227, "grad_norm": 1.8170359420279198, "learning_rate": 5.6687633287256e-06, "loss": 0.7209, "step": 15452 }, { "epoch": 0.4736116219198235, "grad_norm": 0.8764290692905162, "learning_rate": 5.668271467135686e-06, "loss": 0.4788, "step": 15453 }, { "epoch": 0.4736422704425647, "grad_norm": 1.4910091264024439, "learning_rate": 5.667779598961122e-06, "loss": 0.5993, "step": 15454 }, { "epoch": 0.4736729189653059, "grad_norm": 2.109227883469829, "learning_rate": 5.667287724206754e-06, "loss": 0.7047, "step": 15455 }, { "epoch": 0.4737035674880471, "grad_norm": 1.5363143264244963, "learning_rate": 5.666795842877432e-06, "loss": 0.5974, "step": 15456 }, { "epoch": 0.4737342160107883, "grad_norm": 1.7554539004266738, "learning_rate": 5.666303954977998e-06, "loss": 0.6511, "step": 15457 }, { "epoch": 0.4737648645335295, "grad_norm": 1.5185403606752326, "learning_rate": 5.6658120605133004e-06, "loss": 0.6247, "step": 15458 }, { "epoch": 0.4737955130562707, "grad_norm": 1.7001453083798104, "learning_rate": 5.6653201594881855e-06, "loss": 0.6823, "step": 15459 }, { "epoch": 0.4738261615790119, "grad_norm": 1.61185931503769, "learning_rate": 5.664828251907504e-06, "loss": 0.6625, "step": 15460 }, { "epoch": 0.4738568101017531, "grad_norm": 1.8282768548847317, "learning_rate": 5.664336337776097e-06, "loss": 0.6338, "step": 15461 }, { "epoch": 0.4738874586244943, "grad_norm": 1.4511020272977548, "learning_rate": 5.6638444170988136e-06, "loss": 0.6113, "step": 15462 }, { "epoch": 0.4739181071472355, "grad_norm": 0.7779504020987157, "learning_rate": 5.6633524898805015e-06, "loss": 0.4559, "step": 15463 }, { "epoch": 0.4739487556699767, "grad_norm": 1.6579556582426402, "learning_rate": 5.662860556126009e-06, "loss": 0.6666, "step": 15464 }, { "epoch": 0.4739794041927179, "grad_norm": 1.5562531288093215, "learning_rate": 5.662368615840182e-06, "loss": 0.4865, "step": 15465 }, { "epoch": 0.4740100527154591, "grad_norm": 0.840914634136671, "learning_rate": 5.661876669027866e-06, "loss": 0.4345, "step": 15466 }, { "epoch": 0.4740407012382003, "grad_norm": 1.644418376057417, "learning_rate": 5.661384715693911e-06, "loss": 0.5534, "step": 15467 }, { "epoch": 0.4740713497609415, "grad_norm": 1.6902600747260668, "learning_rate": 5.660892755843162e-06, "loss": 0.6699, "step": 15468 }, { "epoch": 0.4741019982836827, "grad_norm": 0.7729899829324477, "learning_rate": 5.6604007894804695e-06, "loss": 0.445, "step": 15469 }, { "epoch": 0.4741326468064239, "grad_norm": 1.5820941284600258, "learning_rate": 5.659908816610677e-06, "loss": 0.6452, "step": 15470 }, { "epoch": 0.4741632953291651, "grad_norm": 1.5450605215544297, "learning_rate": 5.659416837238636e-06, "loss": 0.63, "step": 15471 }, { "epoch": 0.47419394385190633, "grad_norm": 1.8045104819131237, "learning_rate": 5.658924851369191e-06, "loss": 0.6514, "step": 15472 }, { "epoch": 0.47422459237464754, "grad_norm": 1.7270009988909556, "learning_rate": 5.658432859007191e-06, "loss": 0.6934, "step": 15473 }, { "epoch": 0.47425524089738874, "grad_norm": 1.9851646169025834, "learning_rate": 5.657940860157484e-06, "loss": 0.6637, "step": 15474 }, { "epoch": 0.47428588942012995, "grad_norm": 1.7380968835443777, "learning_rate": 5.657448854824916e-06, "loss": 0.6057, "step": 15475 }, { "epoch": 0.47431653794287115, "grad_norm": 1.8552283282747923, "learning_rate": 5.656956843014338e-06, "loss": 0.6655, "step": 15476 }, { "epoch": 0.47434718646561236, "grad_norm": 1.787781093211889, "learning_rate": 5.656464824730595e-06, "loss": 0.7087, "step": 15477 }, { "epoch": 0.47437783498835356, "grad_norm": 1.5545905446797201, "learning_rate": 5.655972799978536e-06, "loss": 0.6156, "step": 15478 }, { "epoch": 0.47440848351109477, "grad_norm": 0.7944217690621942, "learning_rate": 5.655480768763011e-06, "loss": 0.4691, "step": 15479 }, { "epoch": 0.474439132033836, "grad_norm": 0.7664186571804441, "learning_rate": 5.654988731088864e-06, "loss": 0.43, "step": 15480 }, { "epoch": 0.4744697805565772, "grad_norm": 1.7208063986095954, "learning_rate": 5.654496686960946e-06, "loss": 0.7042, "step": 15481 }, { "epoch": 0.4745004290793184, "grad_norm": 0.7933061595124745, "learning_rate": 5.654004636384105e-06, "loss": 0.4583, "step": 15482 }, { "epoch": 0.4745310776020596, "grad_norm": 1.754999064195664, "learning_rate": 5.653512579363189e-06, "loss": 0.6361, "step": 15483 }, { "epoch": 0.4745617261248008, "grad_norm": 1.6849058380689685, "learning_rate": 5.653020515903047e-06, "loss": 0.689, "step": 15484 }, { "epoch": 0.474592374647542, "grad_norm": 0.7512360486332472, "learning_rate": 5.652528446008526e-06, "loss": 0.4156, "step": 15485 }, { "epoch": 0.4746230231702832, "grad_norm": 1.686927197400893, "learning_rate": 5.652036369684476e-06, "loss": 0.6369, "step": 15486 }, { "epoch": 0.4746536716930244, "grad_norm": 1.919688063551495, "learning_rate": 5.651544286935744e-06, "loss": 0.616, "step": 15487 }, { "epoch": 0.4746843202157656, "grad_norm": 1.7525719029733853, "learning_rate": 5.65105219776718e-06, "loss": 0.6828, "step": 15488 }, { "epoch": 0.4747149687385068, "grad_norm": 1.6571343324253363, "learning_rate": 5.650560102183631e-06, "loss": 0.5963, "step": 15489 }, { "epoch": 0.47474561726124803, "grad_norm": 1.9598980322599082, "learning_rate": 5.6500680001899474e-06, "loss": 0.7026, "step": 15490 }, { "epoch": 0.47477626578398924, "grad_norm": 1.5412733396152707, "learning_rate": 5.649575891790978e-06, "loss": 0.6398, "step": 15491 }, { "epoch": 0.47480691430673044, "grad_norm": 1.734341110457538, "learning_rate": 5.649083776991571e-06, "loss": 0.607, "step": 15492 }, { "epoch": 0.4748375628294716, "grad_norm": 0.8189212342904613, "learning_rate": 5.6485916557965755e-06, "loss": 0.4426, "step": 15493 }, { "epoch": 0.4748682113522128, "grad_norm": 1.8896561893076729, "learning_rate": 5.648099528210839e-06, "loss": 0.6364, "step": 15494 }, { "epoch": 0.474898859874954, "grad_norm": 1.881748470621892, "learning_rate": 5.647607394239215e-06, "loss": 0.7073, "step": 15495 }, { "epoch": 0.4749295083976952, "grad_norm": 1.479417889794829, "learning_rate": 5.647115253886547e-06, "loss": 0.6813, "step": 15496 }, { "epoch": 0.4749601569204364, "grad_norm": 1.5384055221240505, "learning_rate": 5.646623107157688e-06, "loss": 0.6629, "step": 15497 }, { "epoch": 0.4749908054431776, "grad_norm": 1.718932637304251, "learning_rate": 5.646130954057485e-06, "loss": 0.709, "step": 15498 }, { "epoch": 0.4750214539659188, "grad_norm": 1.6856376798766473, "learning_rate": 5.645638794590791e-06, "loss": 0.6461, "step": 15499 }, { "epoch": 0.47505210248866003, "grad_norm": 1.987366670400017, "learning_rate": 5.6451466287624505e-06, "loss": 0.6322, "step": 15500 }, { "epoch": 0.47508275101140124, "grad_norm": 0.7924886851557382, "learning_rate": 5.644654456577315e-06, "loss": 0.4689, "step": 15501 }, { "epoch": 0.47511339953414244, "grad_norm": 1.7758566048257967, "learning_rate": 5.6441622780402365e-06, "loss": 0.7002, "step": 15502 }, { "epoch": 0.47514404805688365, "grad_norm": 1.611598699517901, "learning_rate": 5.64367009315606e-06, "loss": 0.7204, "step": 15503 }, { "epoch": 0.47517469657962486, "grad_norm": 1.5759132067062087, "learning_rate": 5.643177901929637e-06, "loss": 0.6183, "step": 15504 }, { "epoch": 0.47520534510236606, "grad_norm": 1.8760579026261686, "learning_rate": 5.64268570436582e-06, "loss": 0.6414, "step": 15505 }, { "epoch": 0.47523599362510727, "grad_norm": 1.9783614665159077, "learning_rate": 5.642193500469455e-06, "loss": 0.6879, "step": 15506 }, { "epoch": 0.4752666421478485, "grad_norm": 1.6169990838548705, "learning_rate": 5.641701290245391e-06, "loss": 0.6318, "step": 15507 }, { "epoch": 0.4752972906705897, "grad_norm": 1.5911096811454053, "learning_rate": 5.641209073698483e-06, "loss": 0.5898, "step": 15508 }, { "epoch": 0.4753279391933309, "grad_norm": 1.804579813084678, "learning_rate": 5.640716850833576e-06, "loss": 0.7345, "step": 15509 }, { "epoch": 0.4753585877160721, "grad_norm": 0.7846693613683985, "learning_rate": 5.640224621655526e-06, "loss": 0.4496, "step": 15510 }, { "epoch": 0.4753892362388133, "grad_norm": 0.7780770846903071, "learning_rate": 5.6397323861691744e-06, "loss": 0.4577, "step": 15511 }, { "epoch": 0.4754198847615545, "grad_norm": 1.6915694875567588, "learning_rate": 5.639240144379378e-06, "loss": 0.655, "step": 15512 }, { "epoch": 0.4754505332842957, "grad_norm": 1.6529625347872212, "learning_rate": 5.638747896290984e-06, "loss": 0.6766, "step": 15513 }, { "epoch": 0.4754811818070369, "grad_norm": 1.728793554861915, "learning_rate": 5.638255641908844e-06, "loss": 0.6106, "step": 15514 }, { "epoch": 0.4755118303297781, "grad_norm": 0.7875520048017741, "learning_rate": 5.6377633812378076e-06, "loss": 0.4418, "step": 15515 }, { "epoch": 0.4755424788525193, "grad_norm": 1.5897993629763938, "learning_rate": 5.6372711142827255e-06, "loss": 0.6074, "step": 15516 }, { "epoch": 0.47557312737526053, "grad_norm": 0.8053703758052364, "learning_rate": 5.636778841048447e-06, "loss": 0.4596, "step": 15517 }, { "epoch": 0.47560377589800173, "grad_norm": 1.7972521464521396, "learning_rate": 5.6362865615398255e-06, "loss": 0.6283, "step": 15518 }, { "epoch": 0.47563442442074294, "grad_norm": 0.81293461126903, "learning_rate": 5.635794275761709e-06, "loss": 0.4479, "step": 15519 }, { "epoch": 0.47566507294348415, "grad_norm": 1.8525301062048631, "learning_rate": 5.635301983718948e-06, "loss": 0.5749, "step": 15520 }, { "epoch": 0.47569572146622535, "grad_norm": 1.5491544902145036, "learning_rate": 5.634809685416396e-06, "loss": 0.6513, "step": 15521 }, { "epoch": 0.47572636998896656, "grad_norm": 1.8977290142424985, "learning_rate": 5.6343173808589e-06, "loss": 0.6559, "step": 15522 }, { "epoch": 0.47575701851170776, "grad_norm": 1.6832039994360215, "learning_rate": 5.633825070051314e-06, "loss": 0.6645, "step": 15523 }, { "epoch": 0.4757876670344489, "grad_norm": 1.8411277663368941, "learning_rate": 5.633332752998486e-06, "loss": 0.5749, "step": 15524 }, { "epoch": 0.4758183155571901, "grad_norm": 1.60534455963053, "learning_rate": 5.632840429705269e-06, "loss": 0.6392, "step": 15525 }, { "epoch": 0.4758489640799313, "grad_norm": 1.9409461107712593, "learning_rate": 5.632348100176513e-06, "loss": 0.7559, "step": 15526 }, { "epoch": 0.47587961260267253, "grad_norm": 0.8654519454960313, "learning_rate": 5.63185576441707e-06, "loss": 0.4728, "step": 15527 }, { "epoch": 0.47591026112541374, "grad_norm": 1.7425351439795411, "learning_rate": 5.631363422431789e-06, "loss": 0.5893, "step": 15528 }, { "epoch": 0.47594090964815494, "grad_norm": 1.8034287357966114, "learning_rate": 5.6308710742255255e-06, "loss": 0.6269, "step": 15529 }, { "epoch": 0.47597155817089615, "grad_norm": 1.7130612385167228, "learning_rate": 5.630378719803126e-06, "loss": 0.6683, "step": 15530 }, { "epoch": 0.47600220669363735, "grad_norm": 1.8984953115466603, "learning_rate": 5.629886359169443e-06, "loss": 0.6587, "step": 15531 }, { "epoch": 0.47603285521637856, "grad_norm": 1.5796170250212163, "learning_rate": 5.62939399232933e-06, "loss": 0.5905, "step": 15532 }, { "epoch": 0.47606350373911976, "grad_norm": 1.5715323829082242, "learning_rate": 5.628901619287636e-06, "loss": 0.5574, "step": 15533 }, { "epoch": 0.47609415226186097, "grad_norm": 1.8791626368884362, "learning_rate": 5.628409240049214e-06, "loss": 0.6717, "step": 15534 }, { "epoch": 0.4761248007846022, "grad_norm": 1.5938077708757987, "learning_rate": 5.627916854618914e-06, "loss": 0.6407, "step": 15535 }, { "epoch": 0.4761554493073434, "grad_norm": 1.717222595805861, "learning_rate": 5.62742446300159e-06, "loss": 0.6917, "step": 15536 }, { "epoch": 0.4761860978300846, "grad_norm": 1.938558382289328, "learning_rate": 5.626932065202091e-06, "loss": 0.6374, "step": 15537 }, { "epoch": 0.4762167463528258, "grad_norm": 1.7012256179579708, "learning_rate": 5.626439661225272e-06, "loss": 0.5862, "step": 15538 }, { "epoch": 0.476247394875567, "grad_norm": 1.8624569502356618, "learning_rate": 5.625947251075979e-06, "loss": 0.7282, "step": 15539 }, { "epoch": 0.4762780433983082, "grad_norm": 1.8273464500307197, "learning_rate": 5.6254548347590695e-06, "loss": 0.6632, "step": 15540 }, { "epoch": 0.4763086919210494, "grad_norm": 1.8183089520483169, "learning_rate": 5.624962412279395e-06, "loss": 0.6648, "step": 15541 }, { "epoch": 0.4763393404437906, "grad_norm": 1.8202389720064491, "learning_rate": 5.624469983641805e-06, "loss": 0.5173, "step": 15542 }, { "epoch": 0.4763699889665318, "grad_norm": 1.5332525816630531, "learning_rate": 5.623977548851151e-06, "loss": 0.5359, "step": 15543 }, { "epoch": 0.476400637489273, "grad_norm": 1.65670474379285, "learning_rate": 5.623485107912289e-06, "loss": 0.601, "step": 15544 }, { "epoch": 0.47643128601201423, "grad_norm": 1.7782441141026049, "learning_rate": 5.622992660830066e-06, "loss": 0.6449, "step": 15545 }, { "epoch": 0.47646193453475544, "grad_norm": 1.6446102061576298, "learning_rate": 5.622500207609338e-06, "loss": 0.7251, "step": 15546 }, { "epoch": 0.47649258305749664, "grad_norm": 0.7990946382112932, "learning_rate": 5.622007748254957e-06, "loss": 0.4362, "step": 15547 }, { "epoch": 0.47652323158023785, "grad_norm": 0.8377296073899956, "learning_rate": 5.6215152827717745e-06, "loss": 0.4561, "step": 15548 }, { "epoch": 0.47655388010297905, "grad_norm": 1.8677725397074028, "learning_rate": 5.621022811164643e-06, "loss": 0.6589, "step": 15549 }, { "epoch": 0.47658452862572026, "grad_norm": 0.7645586918276411, "learning_rate": 5.620530333438413e-06, "loss": 0.4346, "step": 15550 }, { "epoch": 0.47661517714846147, "grad_norm": 2.1099795527546323, "learning_rate": 5.620037849597942e-06, "loss": 0.7206, "step": 15551 }, { "epoch": 0.47664582567120267, "grad_norm": 1.586889589532007, "learning_rate": 5.619545359648075e-06, "loss": 0.6323, "step": 15552 }, { "epoch": 0.4766764741939439, "grad_norm": 0.7445736434761381, "learning_rate": 5.6190528635936735e-06, "loss": 0.4087, "step": 15553 }, { "epoch": 0.4767071227166851, "grad_norm": 1.6053893911054078, "learning_rate": 5.618560361439583e-06, "loss": 0.5237, "step": 15554 }, { "epoch": 0.47673777123942623, "grad_norm": 1.8139360389186776, "learning_rate": 5.618067853190661e-06, "loss": 0.6754, "step": 15555 }, { "epoch": 0.47676841976216744, "grad_norm": 1.7416192727575008, "learning_rate": 5.617575338851757e-06, "loss": 0.6585, "step": 15556 }, { "epoch": 0.47679906828490864, "grad_norm": 1.7472715884291887, "learning_rate": 5.617082818427726e-06, "loss": 0.6725, "step": 15557 }, { "epoch": 0.47682971680764985, "grad_norm": 1.757446125786569, "learning_rate": 5.6165902919234186e-06, "loss": 0.6694, "step": 15558 }, { "epoch": 0.47686036533039106, "grad_norm": 1.5740038552281257, "learning_rate": 5.616097759343691e-06, "loss": 0.5997, "step": 15559 }, { "epoch": 0.47689101385313226, "grad_norm": 1.5311214964640032, "learning_rate": 5.615605220693393e-06, "loss": 0.5569, "step": 15560 }, { "epoch": 0.47692166237587347, "grad_norm": 1.7640035651801176, "learning_rate": 5.615112675977379e-06, "loss": 0.701, "step": 15561 }, { "epoch": 0.4769523108986147, "grad_norm": 2.1060929913151374, "learning_rate": 5.614620125200504e-06, "loss": 0.5983, "step": 15562 }, { "epoch": 0.4769829594213559, "grad_norm": 1.6348999800721873, "learning_rate": 5.614127568367618e-06, "loss": 0.6781, "step": 15563 }, { "epoch": 0.4770136079440971, "grad_norm": 1.8118640666583852, "learning_rate": 5.6136350054835785e-06, "loss": 0.6431, "step": 15564 }, { "epoch": 0.4770442564668383, "grad_norm": 1.6097345687178892, "learning_rate": 5.6131424365532335e-06, "loss": 0.6724, "step": 15565 }, { "epoch": 0.4770749049895795, "grad_norm": 1.7293266491026056, "learning_rate": 5.6126498615814405e-06, "loss": 0.6609, "step": 15566 }, { "epoch": 0.4771055535123207, "grad_norm": 1.7893939232841394, "learning_rate": 5.612157280573052e-06, "loss": 0.699, "step": 15567 }, { "epoch": 0.4771362020350619, "grad_norm": 1.4928099674397612, "learning_rate": 5.6116646935329215e-06, "loss": 0.6314, "step": 15568 }, { "epoch": 0.4771668505578031, "grad_norm": 1.8770323900333565, "learning_rate": 5.611172100465902e-06, "loss": 0.7063, "step": 15569 }, { "epoch": 0.4771974990805443, "grad_norm": 1.6757565507209966, "learning_rate": 5.610679501376847e-06, "loss": 0.5922, "step": 15570 }, { "epoch": 0.4772281476032855, "grad_norm": 1.552202591711004, "learning_rate": 5.610186896270608e-06, "loss": 0.5207, "step": 15571 }, { "epoch": 0.47725879612602673, "grad_norm": 0.8890319779902363, "learning_rate": 5.609694285152045e-06, "loss": 0.4296, "step": 15572 }, { "epoch": 0.47728944464876794, "grad_norm": 1.6500154741235182, "learning_rate": 5.6092016680260074e-06, "loss": 0.5972, "step": 15573 }, { "epoch": 0.47732009317150914, "grad_norm": 1.6695740451547878, "learning_rate": 5.6087090448973505e-06, "loss": 0.6105, "step": 15574 }, { "epoch": 0.47735074169425035, "grad_norm": 1.8476268273582428, "learning_rate": 5.608216415770926e-06, "loss": 0.6975, "step": 15575 }, { "epoch": 0.47738139021699155, "grad_norm": 1.966366387315669, "learning_rate": 5.60772378065159e-06, "loss": 0.6709, "step": 15576 }, { "epoch": 0.47741203873973276, "grad_norm": 2.1032519242407957, "learning_rate": 5.607231139544197e-06, "loss": 0.6647, "step": 15577 }, { "epoch": 0.47744268726247396, "grad_norm": 1.6044138250258386, "learning_rate": 5.606738492453599e-06, "loss": 0.6191, "step": 15578 }, { "epoch": 0.47747333578521517, "grad_norm": 1.7048485818277592, "learning_rate": 5.606245839384653e-06, "loss": 0.6991, "step": 15579 }, { "epoch": 0.4775039843079564, "grad_norm": 1.477265903198017, "learning_rate": 5.60575318034221e-06, "loss": 0.6449, "step": 15580 }, { "epoch": 0.4775346328306976, "grad_norm": 1.819466959528439, "learning_rate": 5.605260515331128e-06, "loss": 0.6402, "step": 15581 }, { "epoch": 0.4775652813534388, "grad_norm": 0.8116012431780533, "learning_rate": 5.604767844356256e-06, "loss": 0.4439, "step": 15582 }, { "epoch": 0.47759592987618, "grad_norm": 1.8976645308593283, "learning_rate": 5.604275167422455e-06, "loss": 0.6413, "step": 15583 }, { "epoch": 0.4776265783989212, "grad_norm": 1.6749811186919041, "learning_rate": 5.603782484534574e-06, "loss": 0.6556, "step": 15584 }, { "epoch": 0.4776572269216624, "grad_norm": 1.9157857130455715, "learning_rate": 5.60328979569747e-06, "loss": 0.6362, "step": 15585 }, { "epoch": 0.47768787544440355, "grad_norm": 1.5572251157237975, "learning_rate": 5.6027971009159975e-06, "loss": 0.5913, "step": 15586 }, { "epoch": 0.47771852396714476, "grad_norm": 1.975077616383793, "learning_rate": 5.602304400195012e-06, "loss": 0.6921, "step": 15587 }, { "epoch": 0.47774917248988596, "grad_norm": 1.8083686370438878, "learning_rate": 5.601811693539366e-06, "loss": 0.6373, "step": 15588 }, { "epoch": 0.47777982101262717, "grad_norm": 0.7698683495515339, "learning_rate": 5.601318980953916e-06, "loss": 0.4469, "step": 15589 }, { "epoch": 0.4778104695353684, "grad_norm": 1.698929625438872, "learning_rate": 5.600826262443516e-06, "loss": 0.5971, "step": 15590 }, { "epoch": 0.4778411180581096, "grad_norm": 1.7688677936885404, "learning_rate": 5.6003335380130205e-06, "loss": 0.6269, "step": 15591 }, { "epoch": 0.4778717665808508, "grad_norm": 1.7773407170855278, "learning_rate": 5.599840807667286e-06, "loss": 0.6782, "step": 15592 }, { "epoch": 0.477902415103592, "grad_norm": 1.6863076529377763, "learning_rate": 5.599348071411166e-06, "loss": 0.603, "step": 15593 }, { "epoch": 0.4779330636263332, "grad_norm": 1.7216771627048855, "learning_rate": 5.598855329249516e-06, "loss": 0.7271, "step": 15594 }, { "epoch": 0.4779637121490744, "grad_norm": 1.7331383230166535, "learning_rate": 5.598362581187192e-06, "loss": 0.6266, "step": 15595 }, { "epoch": 0.4779943606718156, "grad_norm": 1.786961303316964, "learning_rate": 5.597869827229048e-06, "loss": 0.6108, "step": 15596 }, { "epoch": 0.4780250091945568, "grad_norm": 1.7247579585714095, "learning_rate": 5.597377067379939e-06, "loss": 0.6241, "step": 15597 }, { "epoch": 0.478055657717298, "grad_norm": 1.5540757193005086, "learning_rate": 5.596884301644722e-06, "loss": 0.5818, "step": 15598 }, { "epoch": 0.4780863062400392, "grad_norm": 1.6472451304742606, "learning_rate": 5.59639153002825e-06, "loss": 0.5648, "step": 15599 }, { "epoch": 0.47811695476278043, "grad_norm": 1.9116180284450104, "learning_rate": 5.595898752535381e-06, "loss": 0.6554, "step": 15600 }, { "epoch": 0.47814760328552164, "grad_norm": 1.709798007197622, "learning_rate": 5.5954059691709674e-06, "loss": 0.6111, "step": 15601 }, { "epoch": 0.47817825180826284, "grad_norm": 2.0221135060318973, "learning_rate": 5.594913179939868e-06, "loss": 0.6523, "step": 15602 }, { "epoch": 0.47820890033100405, "grad_norm": 1.6460440484324923, "learning_rate": 5.594420384846937e-06, "loss": 0.5766, "step": 15603 }, { "epoch": 0.47823954885374526, "grad_norm": 0.8693781213211843, "learning_rate": 5.593927583897027e-06, "loss": 0.4735, "step": 15604 }, { "epoch": 0.47827019737648646, "grad_norm": 1.5905833404360576, "learning_rate": 5.593434777095001e-06, "loss": 0.5293, "step": 15605 }, { "epoch": 0.47830084589922767, "grad_norm": 1.7818658926969055, "learning_rate": 5.5929419644457075e-06, "loss": 0.6347, "step": 15606 }, { "epoch": 0.4783314944219689, "grad_norm": 0.7762191351084722, "learning_rate": 5.5924491459540055e-06, "loss": 0.4432, "step": 15607 }, { "epoch": 0.4783621429447101, "grad_norm": 0.7722137631110161, "learning_rate": 5.59195632162475e-06, "loss": 0.4327, "step": 15608 }, { "epoch": 0.4783927914674513, "grad_norm": 0.7949009098874537, "learning_rate": 5.591463491462799e-06, "loss": 0.4656, "step": 15609 }, { "epoch": 0.4784234399901925, "grad_norm": 1.6259145592328128, "learning_rate": 5.5909706554730055e-06, "loss": 0.629, "step": 15610 }, { "epoch": 0.4784540885129337, "grad_norm": 1.6526604324454144, "learning_rate": 5.5904778136602274e-06, "loss": 0.6461, "step": 15611 }, { "epoch": 0.4784847370356749, "grad_norm": 1.7867521897621095, "learning_rate": 5.589984966029319e-06, "loss": 0.6528, "step": 15612 }, { "epoch": 0.4785153855584161, "grad_norm": 0.7484012172388018, "learning_rate": 5.5894921125851395e-06, "loss": 0.4136, "step": 15613 }, { "epoch": 0.4785460340811573, "grad_norm": 1.7425635611961117, "learning_rate": 5.588999253332543e-06, "loss": 0.5768, "step": 15614 }, { "epoch": 0.4785766826038985, "grad_norm": 1.5550781070084223, "learning_rate": 5.588506388276386e-06, "loss": 0.6891, "step": 15615 }, { "epoch": 0.4786073311266397, "grad_norm": 1.709947174514684, "learning_rate": 5.5880135174215254e-06, "loss": 0.7044, "step": 15616 }, { "epoch": 0.4786379796493809, "grad_norm": 1.5886867256407573, "learning_rate": 5.587520640772816e-06, "loss": 0.5897, "step": 15617 }, { "epoch": 0.4786686281721221, "grad_norm": 1.6007577249088158, "learning_rate": 5.587027758335118e-06, "loss": 0.6324, "step": 15618 }, { "epoch": 0.4786992766948633, "grad_norm": 1.5512604085254218, "learning_rate": 5.586534870113282e-06, "loss": 0.6803, "step": 15619 }, { "epoch": 0.4787299252176045, "grad_norm": 1.7576724940817925, "learning_rate": 5.58604197611217e-06, "loss": 0.6107, "step": 15620 }, { "epoch": 0.4787605737403457, "grad_norm": 1.8173678999584204, "learning_rate": 5.585549076336635e-06, "loss": 0.6398, "step": 15621 }, { "epoch": 0.4787912222630869, "grad_norm": 1.755779921052031, "learning_rate": 5.585056170791537e-06, "loss": 0.6511, "step": 15622 }, { "epoch": 0.4788218707858281, "grad_norm": 1.6724254968731276, "learning_rate": 5.584563259481729e-06, "loss": 0.6086, "step": 15623 }, { "epoch": 0.4788525193085693, "grad_norm": 1.5961965210515088, "learning_rate": 5.584070342412071e-06, "loss": 0.6309, "step": 15624 }, { "epoch": 0.4788831678313105, "grad_norm": 1.7793921711392748, "learning_rate": 5.583577419587417e-06, "loss": 0.6291, "step": 15625 }, { "epoch": 0.4789138163540517, "grad_norm": 1.9845772304818186, "learning_rate": 5.583084491012628e-06, "loss": 0.6597, "step": 15626 }, { "epoch": 0.47894446487679293, "grad_norm": 1.4276944027134046, "learning_rate": 5.582591556692556e-06, "loss": 0.6777, "step": 15627 }, { "epoch": 0.47897511339953414, "grad_norm": 2.0200834673044104, "learning_rate": 5.58209861663206e-06, "loss": 0.7198, "step": 15628 }, { "epoch": 0.47900576192227534, "grad_norm": 1.8122000519142376, "learning_rate": 5.5816056708359975e-06, "loss": 0.7022, "step": 15629 }, { "epoch": 0.47903641044501655, "grad_norm": 1.4797547541118696, "learning_rate": 5.581112719309227e-06, "loss": 0.6124, "step": 15630 }, { "epoch": 0.47906705896775775, "grad_norm": 1.625455800137405, "learning_rate": 5.580619762056604e-06, "loss": 0.6117, "step": 15631 }, { "epoch": 0.47909770749049896, "grad_norm": 1.8752368364225718, "learning_rate": 5.580126799082984e-06, "loss": 0.5393, "step": 15632 }, { "epoch": 0.47912835601324016, "grad_norm": 2.14980039168143, "learning_rate": 5.57963383039323e-06, "loss": 0.6762, "step": 15633 }, { "epoch": 0.47915900453598137, "grad_norm": 1.6939497108376567, "learning_rate": 5.5791408559921925e-06, "loss": 0.612, "step": 15634 }, { "epoch": 0.4791896530587226, "grad_norm": 1.8126544226637673, "learning_rate": 5.578647875884733e-06, "loss": 0.6479, "step": 15635 }, { "epoch": 0.4792203015814638, "grad_norm": 0.8657916172878326, "learning_rate": 5.578154890075707e-06, "loss": 0.4595, "step": 15636 }, { "epoch": 0.479250950104205, "grad_norm": 1.5499544341820883, "learning_rate": 5.577661898569975e-06, "loss": 0.6866, "step": 15637 }, { "epoch": 0.4792815986269462, "grad_norm": 1.7134083646487333, "learning_rate": 5.57716890137239e-06, "loss": 0.5714, "step": 15638 }, { "epoch": 0.4793122471496874, "grad_norm": 0.7879964076346224, "learning_rate": 5.576675898487813e-06, "loss": 0.4422, "step": 15639 }, { "epoch": 0.4793428956724286, "grad_norm": 1.5932975257622493, "learning_rate": 5.5761828899211e-06, "loss": 0.652, "step": 15640 }, { "epoch": 0.4793735441951698, "grad_norm": 0.8220474624561189, "learning_rate": 5.575689875677112e-06, "loss": 0.4567, "step": 15641 }, { "epoch": 0.479404192717911, "grad_norm": 1.864955029049998, "learning_rate": 5.575196855760703e-06, "loss": 0.6573, "step": 15642 }, { "epoch": 0.4794348412406522, "grad_norm": 1.7235525630156625, "learning_rate": 5.574703830176732e-06, "loss": 0.6282, "step": 15643 }, { "epoch": 0.4794654897633934, "grad_norm": 1.8145996836478624, "learning_rate": 5.574210798930056e-06, "loss": 0.7177, "step": 15644 }, { "epoch": 0.47949613828613463, "grad_norm": 1.814619179015747, "learning_rate": 5.573717762025537e-06, "loss": 0.6658, "step": 15645 }, { "epoch": 0.47952678680887584, "grad_norm": 1.6881071861989227, "learning_rate": 5.573224719468028e-06, "loss": 0.5687, "step": 15646 }, { "epoch": 0.47955743533161704, "grad_norm": 0.7786817045453475, "learning_rate": 5.572731671262389e-06, "loss": 0.4444, "step": 15647 }, { "epoch": 0.4795880838543582, "grad_norm": 1.5216321899671466, "learning_rate": 5.572238617413479e-06, "loss": 0.622, "step": 15648 }, { "epoch": 0.4796187323770994, "grad_norm": 1.6454105596478699, "learning_rate": 5.5717455579261555e-06, "loss": 0.6338, "step": 15649 }, { "epoch": 0.4796493808998406, "grad_norm": 1.706489676598938, "learning_rate": 5.571252492805278e-06, "loss": 0.6815, "step": 15650 }, { "epoch": 0.4796800294225818, "grad_norm": 1.5026634941959698, "learning_rate": 5.5707594220557005e-06, "loss": 0.561, "step": 15651 }, { "epoch": 0.479710677945323, "grad_norm": 1.62118140099748, "learning_rate": 5.570266345682287e-06, "loss": 0.6042, "step": 15652 }, { "epoch": 0.4797413264680642, "grad_norm": 1.7241707873035241, "learning_rate": 5.569773263689893e-06, "loss": 0.5367, "step": 15653 }, { "epoch": 0.4797719749908054, "grad_norm": 1.62143673809291, "learning_rate": 5.569280176083376e-06, "loss": 0.5434, "step": 15654 }, { "epoch": 0.47980262351354663, "grad_norm": 1.470905245675293, "learning_rate": 5.568787082867596e-06, "loss": 0.5718, "step": 15655 }, { "epoch": 0.47983327203628784, "grad_norm": 0.8638944008749874, "learning_rate": 5.5682939840474126e-06, "loss": 0.425, "step": 15656 }, { "epoch": 0.47986392055902904, "grad_norm": 1.8429533036957715, "learning_rate": 5.567800879627682e-06, "loss": 0.7527, "step": 15657 }, { "epoch": 0.47989456908177025, "grad_norm": 1.6560135807423926, "learning_rate": 5.5673077696132635e-06, "loss": 0.6278, "step": 15658 }, { "epoch": 0.47992521760451146, "grad_norm": 1.8138516556811852, "learning_rate": 5.566814654009017e-06, "loss": 0.7181, "step": 15659 }, { "epoch": 0.47995586612725266, "grad_norm": 1.8449466692250251, "learning_rate": 5.566321532819802e-06, "loss": 0.7625, "step": 15660 }, { "epoch": 0.47998651464999387, "grad_norm": 1.5664780080537242, "learning_rate": 5.565828406050476e-06, "loss": 0.5854, "step": 15661 }, { "epoch": 0.4800171631727351, "grad_norm": 1.6815059862684287, "learning_rate": 5.5653352737058955e-06, "loss": 0.6507, "step": 15662 }, { "epoch": 0.4800478116954763, "grad_norm": 1.576926611889412, "learning_rate": 5.5648421357909235e-06, "loss": 0.5487, "step": 15663 }, { "epoch": 0.4800784602182175, "grad_norm": 1.810270717134344, "learning_rate": 5.564348992310417e-06, "loss": 0.7322, "step": 15664 }, { "epoch": 0.4801091087409587, "grad_norm": 1.7783340346842877, "learning_rate": 5.563855843269237e-06, "loss": 0.6703, "step": 15665 }, { "epoch": 0.4801397572636999, "grad_norm": 1.6031449031547425, "learning_rate": 5.563362688672238e-06, "loss": 0.666, "step": 15666 }, { "epoch": 0.4801704057864411, "grad_norm": 1.6062615830039904, "learning_rate": 5.562869528524285e-06, "loss": 0.6247, "step": 15667 }, { "epoch": 0.4802010543091823, "grad_norm": 1.4603545481720022, "learning_rate": 5.5623763628302345e-06, "loss": 0.547, "step": 15668 }, { "epoch": 0.4802317028319235, "grad_norm": 1.786682262969251, "learning_rate": 5.561883191594945e-06, "loss": 0.5992, "step": 15669 }, { "epoch": 0.4802623513546647, "grad_norm": 1.9091633636967196, "learning_rate": 5.561390014823276e-06, "loss": 0.7749, "step": 15670 }, { "epoch": 0.4802929998774059, "grad_norm": 1.658110604609876, "learning_rate": 5.5608968325200875e-06, "loss": 0.5899, "step": 15671 }, { "epoch": 0.48032364840014713, "grad_norm": 0.8597043913670884, "learning_rate": 5.56040364469024e-06, "loss": 0.4564, "step": 15672 }, { "epoch": 0.48035429692288834, "grad_norm": 1.70656110939501, "learning_rate": 5.55991045133859e-06, "loss": 0.6444, "step": 15673 }, { "epoch": 0.48038494544562954, "grad_norm": 0.86753300282286, "learning_rate": 5.559417252470002e-06, "loss": 0.4493, "step": 15674 }, { "epoch": 0.48041559396837075, "grad_norm": 0.7931415166601012, "learning_rate": 5.55892404808933e-06, "loss": 0.4456, "step": 15675 }, { "epoch": 0.48044624249111195, "grad_norm": 0.7686561773066666, "learning_rate": 5.55843083820144e-06, "loss": 0.4411, "step": 15676 }, { "epoch": 0.48047689101385316, "grad_norm": 0.7704106989481533, "learning_rate": 5.557937622811185e-06, "loss": 0.454, "step": 15677 }, { "epoch": 0.48050753953659436, "grad_norm": 1.9553693928350577, "learning_rate": 5.5574444019234285e-06, "loss": 0.6486, "step": 15678 }, { "epoch": 0.4805381880593355, "grad_norm": 1.6893632951814324, "learning_rate": 5.556951175543029e-06, "loss": 0.6058, "step": 15679 }, { "epoch": 0.4805688365820767, "grad_norm": 1.5995744000812728, "learning_rate": 5.556457943674849e-06, "loss": 0.544, "step": 15680 }, { "epoch": 0.4805994851048179, "grad_norm": 1.748500124338794, "learning_rate": 5.555964706323746e-06, "loss": 0.6812, "step": 15681 }, { "epoch": 0.48063013362755913, "grad_norm": 1.7499480700532453, "learning_rate": 5.555471463494579e-06, "loss": 0.5746, "step": 15682 }, { "epoch": 0.48066078215030034, "grad_norm": 1.6218779011941686, "learning_rate": 5.554978215192211e-06, "loss": 0.6751, "step": 15683 }, { "epoch": 0.48069143067304154, "grad_norm": 1.9277919492571702, "learning_rate": 5.554484961421499e-06, "loss": 0.6448, "step": 15684 }, { "epoch": 0.48072207919578275, "grad_norm": 0.908737019473093, "learning_rate": 5.553991702187307e-06, "loss": 0.4323, "step": 15685 }, { "epoch": 0.48075272771852395, "grad_norm": 1.606974359449174, "learning_rate": 5.55349843749449e-06, "loss": 0.6326, "step": 15686 }, { "epoch": 0.48078337624126516, "grad_norm": 1.5664645980077871, "learning_rate": 5.553005167347914e-06, "loss": 0.6535, "step": 15687 }, { "epoch": 0.48081402476400636, "grad_norm": 1.628066331539518, "learning_rate": 5.552511891752435e-06, "loss": 0.585, "step": 15688 }, { "epoch": 0.48084467328674757, "grad_norm": 1.736239206831339, "learning_rate": 5.552018610712917e-06, "loss": 0.6818, "step": 15689 }, { "epoch": 0.4808753218094888, "grad_norm": 1.7010228473774307, "learning_rate": 5.551525324234216e-06, "loss": 0.5928, "step": 15690 }, { "epoch": 0.48090597033223, "grad_norm": 1.749445759473527, "learning_rate": 5.5510320323211975e-06, "loss": 0.7007, "step": 15691 }, { "epoch": 0.4809366188549712, "grad_norm": 0.8210066207354734, "learning_rate": 5.5505387349787175e-06, "loss": 0.4465, "step": 15692 }, { "epoch": 0.4809672673777124, "grad_norm": 1.6506873496371013, "learning_rate": 5.5500454322116395e-06, "loss": 0.6628, "step": 15693 }, { "epoch": 0.4809979159004536, "grad_norm": 1.66081034638066, "learning_rate": 5.54955212402482e-06, "loss": 0.632, "step": 15694 }, { "epoch": 0.4810285644231948, "grad_norm": 1.7191272999794986, "learning_rate": 5.549058810423128e-06, "loss": 0.6428, "step": 15695 }, { "epoch": 0.481059212945936, "grad_norm": 1.8105141102118385, "learning_rate": 5.548565491411415e-06, "loss": 0.6739, "step": 15696 }, { "epoch": 0.4810898614686772, "grad_norm": 1.732919940804733, "learning_rate": 5.548072166994548e-06, "loss": 0.5267, "step": 15697 }, { "epoch": 0.4811205099914184, "grad_norm": 1.6129740405100514, "learning_rate": 5.547578837177384e-06, "loss": 0.565, "step": 15698 }, { "epoch": 0.4811511585141596, "grad_norm": 1.7837162012239036, "learning_rate": 5.547085501964787e-06, "loss": 0.7198, "step": 15699 }, { "epoch": 0.48118180703690083, "grad_norm": 1.6732227855921689, "learning_rate": 5.5465921613616155e-06, "loss": 0.7071, "step": 15700 }, { "epoch": 0.48121245555964204, "grad_norm": 1.5797230973453518, "learning_rate": 5.546098815372732e-06, "loss": 0.5632, "step": 15701 }, { "epoch": 0.48124310408238324, "grad_norm": 1.5625075672476478, "learning_rate": 5.545605464002998e-06, "loss": 0.6107, "step": 15702 }, { "epoch": 0.48127375260512445, "grad_norm": 1.756252929824732, "learning_rate": 5.545112107257273e-06, "loss": 0.7046, "step": 15703 }, { "epoch": 0.48130440112786566, "grad_norm": 1.7759183022670042, "learning_rate": 5.54461874514042e-06, "loss": 0.5369, "step": 15704 }, { "epoch": 0.48133504965060686, "grad_norm": 1.517983997492629, "learning_rate": 5.544125377657297e-06, "loss": 0.5906, "step": 15705 }, { "epoch": 0.48136569817334807, "grad_norm": 0.8364172873272396, "learning_rate": 5.543632004812769e-06, "loss": 0.4453, "step": 15706 }, { "epoch": 0.4813963466960893, "grad_norm": 1.4815223215658022, "learning_rate": 5.543138626611696e-06, "loss": 0.5562, "step": 15707 }, { "epoch": 0.4814269952188305, "grad_norm": 1.5587935425421886, "learning_rate": 5.542645243058938e-06, "loss": 0.5691, "step": 15708 }, { "epoch": 0.4814576437415717, "grad_norm": 1.771298611789703, "learning_rate": 5.5421518541593575e-06, "loss": 0.592, "step": 15709 }, { "epoch": 0.48148829226431283, "grad_norm": 1.7871349608241458, "learning_rate": 5.541658459917817e-06, "loss": 0.646, "step": 15710 }, { "epoch": 0.48151894078705404, "grad_norm": 1.527110684125726, "learning_rate": 5.541165060339178e-06, "loss": 0.5994, "step": 15711 }, { "epoch": 0.48154958930979525, "grad_norm": 1.7490862895027737, "learning_rate": 5.540671655428298e-06, "loss": 0.713, "step": 15712 }, { "epoch": 0.48158023783253645, "grad_norm": 1.8471865143939057, "learning_rate": 5.540178245190044e-06, "loss": 0.5353, "step": 15713 }, { "epoch": 0.48161088635527766, "grad_norm": 1.8029415031189278, "learning_rate": 5.539684829629276e-06, "loss": 0.5805, "step": 15714 }, { "epoch": 0.48164153487801886, "grad_norm": 1.823031300165881, "learning_rate": 5.5391914087508545e-06, "loss": 0.6395, "step": 15715 }, { "epoch": 0.48167218340076007, "grad_norm": 1.6724483159721795, "learning_rate": 5.538697982559642e-06, "loss": 0.6313, "step": 15716 }, { "epoch": 0.4817028319235013, "grad_norm": 1.6895754410839905, "learning_rate": 5.538204551060501e-06, "loss": 0.7391, "step": 15717 }, { "epoch": 0.4817334804462425, "grad_norm": 1.5655801170910624, "learning_rate": 5.537711114258293e-06, "loss": 0.6269, "step": 15718 }, { "epoch": 0.4817641289689837, "grad_norm": 1.588743233978677, "learning_rate": 5.53721767215788e-06, "loss": 0.6351, "step": 15719 }, { "epoch": 0.4817947774917249, "grad_norm": 1.8121021549651097, "learning_rate": 5.536724224764122e-06, "loss": 0.6503, "step": 15720 }, { "epoch": 0.4818254260144661, "grad_norm": 1.6655457753382825, "learning_rate": 5.536230772081884e-06, "loss": 0.6636, "step": 15721 }, { "epoch": 0.4818560745372073, "grad_norm": 1.9145003713671265, "learning_rate": 5.535737314116027e-06, "loss": 0.7231, "step": 15722 }, { "epoch": 0.4818867230599485, "grad_norm": 1.7486131555829871, "learning_rate": 5.535243850871414e-06, "loss": 0.6465, "step": 15723 }, { "epoch": 0.4819173715826897, "grad_norm": 1.7407693481532756, "learning_rate": 5.534750382352905e-06, "loss": 0.5925, "step": 15724 }, { "epoch": 0.4819480201054309, "grad_norm": 1.617721719638258, "learning_rate": 5.534256908565365e-06, "loss": 0.6894, "step": 15725 }, { "epoch": 0.4819786686281721, "grad_norm": 1.7360017220037387, "learning_rate": 5.533763429513655e-06, "loss": 0.6572, "step": 15726 }, { "epoch": 0.48200931715091333, "grad_norm": 1.7702227968404223, "learning_rate": 5.5332699452026354e-06, "loss": 0.6429, "step": 15727 }, { "epoch": 0.48203996567365454, "grad_norm": 2.047048825707372, "learning_rate": 5.5327764556371725e-06, "loss": 0.6396, "step": 15728 }, { "epoch": 0.48207061419639574, "grad_norm": 1.5474973923581898, "learning_rate": 5.5322829608221255e-06, "loss": 0.4987, "step": 15729 }, { "epoch": 0.48210126271913695, "grad_norm": 1.5860342703598005, "learning_rate": 5.53178946076236e-06, "loss": 0.5841, "step": 15730 }, { "epoch": 0.48213191124187815, "grad_norm": 0.8250639715961207, "learning_rate": 5.531295955462735e-06, "loss": 0.4369, "step": 15731 }, { "epoch": 0.48216255976461936, "grad_norm": 1.768897879196321, "learning_rate": 5.5308024449281165e-06, "loss": 0.7244, "step": 15732 }, { "epoch": 0.48219320828736056, "grad_norm": 0.7643307347820756, "learning_rate": 5.530308929163364e-06, "loss": 0.4254, "step": 15733 }, { "epoch": 0.48222385681010177, "grad_norm": 1.6282836730270842, "learning_rate": 5.5298154081733436e-06, "loss": 0.603, "step": 15734 }, { "epoch": 0.482254505332843, "grad_norm": 1.7041881762421296, "learning_rate": 5.529321881962916e-06, "loss": 0.6942, "step": 15735 }, { "epoch": 0.4822851538555842, "grad_norm": 1.7772028585663668, "learning_rate": 5.528828350536944e-06, "loss": 0.5818, "step": 15736 }, { "epoch": 0.4823158023783254, "grad_norm": 1.9044673609268215, "learning_rate": 5.528334813900291e-06, "loss": 0.6331, "step": 15737 }, { "epoch": 0.4823464509010666, "grad_norm": 2.0857069491265925, "learning_rate": 5.52784127205782e-06, "loss": 0.668, "step": 15738 }, { "epoch": 0.4823770994238078, "grad_norm": 1.6348438230984723, "learning_rate": 5.527347725014395e-06, "loss": 0.6454, "step": 15739 }, { "epoch": 0.482407747946549, "grad_norm": 1.6346792336882154, "learning_rate": 5.526854172774877e-06, "loss": 0.6153, "step": 15740 }, { "epoch": 0.48243839646929015, "grad_norm": 2.476922618825902, "learning_rate": 5.526360615344129e-06, "loss": 0.6716, "step": 15741 }, { "epoch": 0.48246904499203136, "grad_norm": 1.6094738699581448, "learning_rate": 5.525867052727016e-06, "loss": 0.6179, "step": 15742 }, { "epoch": 0.48249969351477257, "grad_norm": 0.9884607034878936, "learning_rate": 5.525373484928401e-06, "loss": 0.446, "step": 15743 }, { "epoch": 0.48253034203751377, "grad_norm": 1.8675965971991688, "learning_rate": 5.524879911953146e-06, "loss": 0.6517, "step": 15744 }, { "epoch": 0.482560990560255, "grad_norm": 1.8234499165274192, "learning_rate": 5.5243863338061165e-06, "loss": 0.6543, "step": 15745 }, { "epoch": 0.4825916390829962, "grad_norm": 1.8032267488638418, "learning_rate": 5.523892750492171e-06, "loss": 0.585, "step": 15746 }, { "epoch": 0.4826222876057374, "grad_norm": 1.7406267087408411, "learning_rate": 5.523399162016179e-06, "loss": 0.6785, "step": 15747 }, { "epoch": 0.4826529361284786, "grad_norm": 1.9789592756408731, "learning_rate": 5.5229055683829995e-06, "loss": 0.7249, "step": 15748 }, { "epoch": 0.4826835846512198, "grad_norm": 0.7865988370369688, "learning_rate": 5.5224119695975e-06, "loss": 0.4294, "step": 15749 }, { "epoch": 0.482714233173961, "grad_norm": 1.888914294901852, "learning_rate": 5.521918365664539e-06, "loss": 0.64, "step": 15750 }, { "epoch": 0.4827448816967022, "grad_norm": 1.502687095260885, "learning_rate": 5.521424756588984e-06, "loss": 0.5596, "step": 15751 }, { "epoch": 0.4827755302194434, "grad_norm": 0.8058231273231562, "learning_rate": 5.520931142375697e-06, "loss": 0.4482, "step": 15752 }, { "epoch": 0.4828061787421846, "grad_norm": 1.856301406609348, "learning_rate": 5.520437523029542e-06, "loss": 0.6213, "step": 15753 }, { "epoch": 0.4828368272649258, "grad_norm": 1.5644298241925338, "learning_rate": 5.519943898555384e-06, "loss": 0.6286, "step": 15754 }, { "epoch": 0.48286747578766703, "grad_norm": 1.7776665576972581, "learning_rate": 5.519450268958084e-06, "loss": 0.5402, "step": 15755 }, { "epoch": 0.48289812431040824, "grad_norm": 1.7113964397099866, "learning_rate": 5.518956634242509e-06, "loss": 0.7002, "step": 15756 }, { "epoch": 0.48292877283314944, "grad_norm": 1.8380171679280373, "learning_rate": 5.518462994413522e-06, "loss": 0.7081, "step": 15757 }, { "epoch": 0.48295942135589065, "grad_norm": 1.6171859209968722, "learning_rate": 5.517969349475987e-06, "loss": 0.6644, "step": 15758 }, { "epoch": 0.48299006987863186, "grad_norm": 1.664054165748235, "learning_rate": 5.517475699434764e-06, "loss": 0.6756, "step": 15759 }, { "epoch": 0.48302071840137306, "grad_norm": 1.8112640251777914, "learning_rate": 5.5169820442947255e-06, "loss": 0.6961, "step": 15760 }, { "epoch": 0.48305136692411427, "grad_norm": 1.754345311987408, "learning_rate": 5.516488384060726e-06, "loss": 0.6547, "step": 15761 }, { "epoch": 0.4830820154468555, "grad_norm": 1.7921746968847914, "learning_rate": 5.515994718737637e-06, "loss": 0.6658, "step": 15762 }, { "epoch": 0.4831126639695967, "grad_norm": 1.730679944317756, "learning_rate": 5.515501048330319e-06, "loss": 0.7058, "step": 15763 }, { "epoch": 0.4831433124923379, "grad_norm": 1.783756215660874, "learning_rate": 5.515007372843637e-06, "loss": 0.629, "step": 15764 }, { "epoch": 0.4831739610150791, "grad_norm": 1.7707686087997623, "learning_rate": 5.514513692282457e-06, "loss": 0.7019, "step": 15765 }, { "epoch": 0.4832046095378203, "grad_norm": 1.874678090895228, "learning_rate": 5.514020006651641e-06, "loss": 0.5898, "step": 15766 }, { "epoch": 0.4832352580605615, "grad_norm": 1.7764049444892451, "learning_rate": 5.513526315956053e-06, "loss": 0.5777, "step": 15767 }, { "epoch": 0.4832659065833027, "grad_norm": 2.1810427054640433, "learning_rate": 5.513032620200561e-06, "loss": 0.7454, "step": 15768 }, { "epoch": 0.4832965551060439, "grad_norm": 2.0823551486465814, "learning_rate": 5.512538919390027e-06, "loss": 0.6817, "step": 15769 }, { "epoch": 0.4833272036287851, "grad_norm": 1.7061741872987906, "learning_rate": 5.512045213529315e-06, "loss": 0.615, "step": 15770 }, { "epoch": 0.4833578521515263, "grad_norm": 1.6347063841806138, "learning_rate": 5.51155150262329e-06, "loss": 0.6552, "step": 15771 }, { "epoch": 0.4833885006742675, "grad_norm": 1.5225217572139853, "learning_rate": 5.511057786676819e-06, "loss": 0.6168, "step": 15772 }, { "epoch": 0.4834191491970087, "grad_norm": 1.7035696872442374, "learning_rate": 5.510564065694764e-06, "loss": 0.5762, "step": 15773 }, { "epoch": 0.4834497977197499, "grad_norm": 1.7138656204805665, "learning_rate": 5.5100703396819895e-06, "loss": 0.678, "step": 15774 }, { "epoch": 0.4834804462424911, "grad_norm": 1.7000833794993866, "learning_rate": 5.5095766086433635e-06, "loss": 0.5524, "step": 15775 }, { "epoch": 0.4835110947652323, "grad_norm": 1.8168034254578596, "learning_rate": 5.509082872583747e-06, "loss": 0.6699, "step": 15776 }, { "epoch": 0.4835417432879735, "grad_norm": 1.7893173836878606, "learning_rate": 5.508589131508009e-06, "loss": 0.8067, "step": 15777 }, { "epoch": 0.4835723918107147, "grad_norm": 1.5199823166085844, "learning_rate": 5.50809538542101e-06, "loss": 0.5743, "step": 15778 }, { "epoch": 0.4836030403334559, "grad_norm": 1.842548848890921, "learning_rate": 5.507601634327617e-06, "loss": 0.5957, "step": 15779 }, { "epoch": 0.4836336888561971, "grad_norm": 1.5806339286510611, "learning_rate": 5.507107878232697e-06, "loss": 0.6016, "step": 15780 }, { "epoch": 0.4836643373789383, "grad_norm": 1.749140025393735, "learning_rate": 5.506614117141112e-06, "loss": 0.7345, "step": 15781 }, { "epoch": 0.48369498590167953, "grad_norm": 1.7843826303378618, "learning_rate": 5.506120351057729e-06, "loss": 0.6311, "step": 15782 }, { "epoch": 0.48372563442442074, "grad_norm": 1.7463151495531235, "learning_rate": 5.505626579987411e-06, "loss": 0.6703, "step": 15783 }, { "epoch": 0.48375628294716194, "grad_norm": 1.662253820850021, "learning_rate": 5.505132803935028e-06, "loss": 0.6183, "step": 15784 }, { "epoch": 0.48378693146990315, "grad_norm": 1.7450983500904893, "learning_rate": 5.50463902290544e-06, "loss": 0.6858, "step": 15785 }, { "epoch": 0.48381757999264435, "grad_norm": 1.9569918876747279, "learning_rate": 5.504145236903515e-06, "loss": 0.5883, "step": 15786 }, { "epoch": 0.48384822851538556, "grad_norm": 1.6392844719930482, "learning_rate": 5.503651445934119e-06, "loss": 0.626, "step": 15787 }, { "epoch": 0.48387887703812676, "grad_norm": 1.621231502563741, "learning_rate": 5.5031576500021155e-06, "loss": 0.5376, "step": 15788 }, { "epoch": 0.48390952556086797, "grad_norm": 2.521543868676282, "learning_rate": 5.502663849112371e-06, "loss": 0.6764, "step": 15789 }, { "epoch": 0.4839401740836092, "grad_norm": 1.6234751497911686, "learning_rate": 5.5021700432697515e-06, "loss": 0.667, "step": 15790 }, { "epoch": 0.4839708226063504, "grad_norm": 1.5914305778134634, "learning_rate": 5.501676232479122e-06, "loss": 0.5526, "step": 15791 }, { "epoch": 0.4840014711290916, "grad_norm": 1.6984275651400977, "learning_rate": 5.501182416745347e-06, "loss": 0.622, "step": 15792 }, { "epoch": 0.4840321196518328, "grad_norm": 1.532457132253177, "learning_rate": 5.500688596073295e-06, "loss": 0.5531, "step": 15793 }, { "epoch": 0.484062768174574, "grad_norm": 1.8374947837572344, "learning_rate": 5.50019477046783e-06, "loss": 0.5437, "step": 15794 }, { "epoch": 0.4840934166973152, "grad_norm": 1.7555205167729906, "learning_rate": 5.4997009399338176e-06, "loss": 0.6915, "step": 15795 }, { "epoch": 0.4841240652200564, "grad_norm": 1.8513561165146322, "learning_rate": 5.499207104476123e-06, "loss": 0.6869, "step": 15796 }, { "epoch": 0.4841547137427976, "grad_norm": 0.8537889153312361, "learning_rate": 5.498713264099615e-06, "loss": 0.4143, "step": 15797 }, { "epoch": 0.4841853622655388, "grad_norm": 0.8645193143232978, "learning_rate": 5.4982194188091545e-06, "loss": 0.4437, "step": 15798 }, { "epoch": 0.48421601078828, "grad_norm": 1.9108915761601621, "learning_rate": 5.497725568609614e-06, "loss": 0.6652, "step": 15799 }, { "epoch": 0.48424665931102123, "grad_norm": 1.840607247028583, "learning_rate": 5.497231713505854e-06, "loss": 0.6192, "step": 15800 }, { "epoch": 0.48427730783376244, "grad_norm": 1.928926318303481, "learning_rate": 5.496737853502744e-06, "loss": 0.6463, "step": 15801 }, { "epoch": 0.48430795635650364, "grad_norm": 1.6096526966862361, "learning_rate": 5.496243988605147e-06, "loss": 0.6197, "step": 15802 }, { "epoch": 0.4843386048792448, "grad_norm": 1.644744795846705, "learning_rate": 5.4957501188179345e-06, "loss": 0.609, "step": 15803 }, { "epoch": 0.484369253401986, "grad_norm": 1.6806800793393761, "learning_rate": 5.495256244145966e-06, "loss": 0.6219, "step": 15804 }, { "epoch": 0.4843999019247272, "grad_norm": 1.7257901513143548, "learning_rate": 5.494762364594112e-06, "loss": 0.61, "step": 15805 }, { "epoch": 0.4844305504474684, "grad_norm": 1.6905190816814086, "learning_rate": 5.494268480167237e-06, "loss": 0.6924, "step": 15806 }, { "epoch": 0.4844611989702096, "grad_norm": 1.7445337622854593, "learning_rate": 5.493774590870209e-06, "loss": 0.7126, "step": 15807 }, { "epoch": 0.4844918474929508, "grad_norm": 1.6415179192024376, "learning_rate": 5.493280696707894e-06, "loss": 0.6783, "step": 15808 }, { "epoch": 0.48452249601569203, "grad_norm": 1.8832467787910394, "learning_rate": 5.492786797685157e-06, "loss": 0.6558, "step": 15809 }, { "epoch": 0.48455314453843323, "grad_norm": 1.8403822814738715, "learning_rate": 5.492292893806866e-06, "loss": 0.7642, "step": 15810 }, { "epoch": 0.48458379306117444, "grad_norm": 1.5837516355339354, "learning_rate": 5.491798985077889e-06, "loss": 0.578, "step": 15811 }, { "epoch": 0.48461444158391564, "grad_norm": 1.8462837351626893, "learning_rate": 5.491305071503089e-06, "loss": 0.6394, "step": 15812 }, { "epoch": 0.48464509010665685, "grad_norm": 1.7126741499041715, "learning_rate": 5.490811153087334e-06, "loss": 0.5951, "step": 15813 }, { "epoch": 0.48467573862939806, "grad_norm": 1.7357205727207865, "learning_rate": 5.490317229835493e-06, "loss": 0.5249, "step": 15814 }, { "epoch": 0.48470638715213926, "grad_norm": 0.9837731954658124, "learning_rate": 5.48982330175243e-06, "loss": 0.4266, "step": 15815 }, { "epoch": 0.48473703567488047, "grad_norm": 1.8194366823797812, "learning_rate": 5.489329368843012e-06, "loss": 0.6241, "step": 15816 }, { "epoch": 0.4847676841976217, "grad_norm": 1.8133548985680086, "learning_rate": 5.488835431112106e-06, "loss": 0.7067, "step": 15817 }, { "epoch": 0.4847983327203629, "grad_norm": 1.874752511921787, "learning_rate": 5.488341488564582e-06, "loss": 0.7419, "step": 15818 }, { "epoch": 0.4848289812431041, "grad_norm": 1.6175655834001557, "learning_rate": 5.487847541205302e-06, "loss": 0.5054, "step": 15819 }, { "epoch": 0.4848596297658453, "grad_norm": 1.7208762587744784, "learning_rate": 5.487353589039136e-06, "loss": 0.6402, "step": 15820 }, { "epoch": 0.4848902782885865, "grad_norm": 1.7248384389452769, "learning_rate": 5.48685963207095e-06, "loss": 0.6433, "step": 15821 }, { "epoch": 0.4849209268113277, "grad_norm": 1.862604383078436, "learning_rate": 5.486365670305612e-06, "loss": 0.6722, "step": 15822 }, { "epoch": 0.4849515753340689, "grad_norm": 1.7971028712538837, "learning_rate": 5.485871703747989e-06, "loss": 0.68, "step": 15823 }, { "epoch": 0.4849822238568101, "grad_norm": 1.7258512992093968, "learning_rate": 5.4853777324029464e-06, "loss": 0.6096, "step": 15824 }, { "epoch": 0.4850128723795513, "grad_norm": 1.5036005444821274, "learning_rate": 5.484883756275354e-06, "loss": 0.5654, "step": 15825 }, { "epoch": 0.4850435209022925, "grad_norm": 0.8145530183506257, "learning_rate": 5.484389775370078e-06, "loss": 0.4136, "step": 15826 }, { "epoch": 0.48507416942503373, "grad_norm": 1.6689942486093186, "learning_rate": 5.483895789691985e-06, "loss": 0.6382, "step": 15827 }, { "epoch": 0.48510481794777494, "grad_norm": 1.7553717860897073, "learning_rate": 5.483401799245943e-06, "loss": 0.7121, "step": 15828 }, { "epoch": 0.48513546647051614, "grad_norm": 2.0523394401440083, "learning_rate": 5.48290780403682e-06, "loss": 0.6997, "step": 15829 }, { "epoch": 0.48516611499325735, "grad_norm": 1.6497751592462793, "learning_rate": 5.482413804069483e-06, "loss": 0.7412, "step": 15830 }, { "epoch": 0.48519676351599855, "grad_norm": 1.7298323945642273, "learning_rate": 5.481919799348799e-06, "loss": 0.6127, "step": 15831 }, { "epoch": 0.48522741203873976, "grad_norm": 1.75170141082604, "learning_rate": 5.481425789879635e-06, "loss": 0.6741, "step": 15832 }, { "epoch": 0.48525806056148096, "grad_norm": 1.735024214032782, "learning_rate": 5.48093177566686e-06, "loss": 0.6045, "step": 15833 }, { "epoch": 0.4852887090842221, "grad_norm": 1.749643099513447, "learning_rate": 5.4804377567153424e-06, "loss": 0.5504, "step": 15834 }, { "epoch": 0.4853193576069633, "grad_norm": 1.6155719929494723, "learning_rate": 5.479943733029947e-06, "loss": 0.7198, "step": 15835 }, { "epoch": 0.4853500061297045, "grad_norm": 1.9215128438976798, "learning_rate": 5.479449704615543e-06, "loss": 0.6572, "step": 15836 }, { "epoch": 0.48538065465244573, "grad_norm": 1.69127731750518, "learning_rate": 5.478955671477e-06, "loss": 0.6227, "step": 15837 }, { "epoch": 0.48541130317518694, "grad_norm": 1.6627628478381191, "learning_rate": 5.478461633619185e-06, "loss": 0.6585, "step": 15838 }, { "epoch": 0.48544195169792814, "grad_norm": 0.9276451612854842, "learning_rate": 5.477967591046962e-06, "loss": 0.4522, "step": 15839 }, { "epoch": 0.48547260022066935, "grad_norm": 1.8563994043554366, "learning_rate": 5.4774735437652036e-06, "loss": 0.666, "step": 15840 }, { "epoch": 0.48550324874341055, "grad_norm": 1.6466433392335036, "learning_rate": 5.476979491778777e-06, "loss": 0.6353, "step": 15841 }, { "epoch": 0.48553389726615176, "grad_norm": 1.9481663083505238, "learning_rate": 5.476485435092549e-06, "loss": 0.6126, "step": 15842 }, { "epoch": 0.48556454578889297, "grad_norm": 1.7298376869564505, "learning_rate": 5.475991373711387e-06, "loss": 0.7016, "step": 15843 }, { "epoch": 0.48559519431163417, "grad_norm": 1.7773665807867653, "learning_rate": 5.47549730764016e-06, "loss": 0.7614, "step": 15844 }, { "epoch": 0.4856258428343754, "grad_norm": 0.8278728440350146, "learning_rate": 5.475003236883738e-06, "loss": 0.4459, "step": 15845 }, { "epoch": 0.4856564913571166, "grad_norm": 1.6376403580956016, "learning_rate": 5.474509161446987e-06, "loss": 0.635, "step": 15846 }, { "epoch": 0.4856871398798578, "grad_norm": 1.4568916293594392, "learning_rate": 5.474015081334776e-06, "loss": 0.5738, "step": 15847 }, { "epoch": 0.485717788402599, "grad_norm": 0.802305018686493, "learning_rate": 5.473520996551972e-06, "loss": 0.4524, "step": 15848 }, { "epoch": 0.4857484369253402, "grad_norm": 1.9021007151698157, "learning_rate": 5.473026907103446e-06, "loss": 0.7241, "step": 15849 }, { "epoch": 0.4857790854480814, "grad_norm": 1.6378518327930318, "learning_rate": 5.472532812994063e-06, "loss": 0.5859, "step": 15850 }, { "epoch": 0.4858097339708226, "grad_norm": 0.8214033596139939, "learning_rate": 5.472038714228695e-06, "loss": 0.46, "step": 15851 }, { "epoch": 0.4858403824935638, "grad_norm": 1.662657490461652, "learning_rate": 5.471544610812207e-06, "loss": 0.6031, "step": 15852 }, { "epoch": 0.485871031016305, "grad_norm": 1.8472131932896148, "learning_rate": 5.471050502749472e-06, "loss": 0.6514, "step": 15853 }, { "epoch": 0.4859016795390462, "grad_norm": 1.8653922752800227, "learning_rate": 5.470556390045354e-06, "loss": 0.6863, "step": 15854 }, { "epoch": 0.48593232806178743, "grad_norm": 1.7763660935728718, "learning_rate": 5.470062272704724e-06, "loss": 0.6898, "step": 15855 }, { "epoch": 0.48596297658452864, "grad_norm": 1.5287597976613845, "learning_rate": 5.46956815073245e-06, "loss": 0.6279, "step": 15856 }, { "epoch": 0.48599362510726984, "grad_norm": 1.6045676775871989, "learning_rate": 5.469074024133401e-06, "loss": 0.6017, "step": 15857 }, { "epoch": 0.48602427363001105, "grad_norm": 1.9129988712133343, "learning_rate": 5.468579892912446e-06, "loss": 0.6482, "step": 15858 }, { "epoch": 0.48605492215275226, "grad_norm": 2.1725399213942125, "learning_rate": 5.468085757074453e-06, "loss": 0.7017, "step": 15859 }, { "epoch": 0.48608557067549346, "grad_norm": 2.439728230545245, "learning_rate": 5.4675916166242904e-06, "loss": 0.7275, "step": 15860 }, { "epoch": 0.48611621919823467, "grad_norm": 1.8745688060613153, "learning_rate": 5.467097471566829e-06, "loss": 0.6086, "step": 15861 }, { "epoch": 0.4861468677209759, "grad_norm": 1.7084149287661865, "learning_rate": 5.466603321906937e-06, "loss": 0.5571, "step": 15862 }, { "epoch": 0.4861775162437171, "grad_norm": 1.6491435345104652, "learning_rate": 5.466109167649483e-06, "loss": 0.6173, "step": 15863 }, { "epoch": 0.4862081647664583, "grad_norm": 0.8256161821620547, "learning_rate": 5.465615008799336e-06, "loss": 0.4493, "step": 15864 }, { "epoch": 0.48623881328919943, "grad_norm": 1.567837625292595, "learning_rate": 5.4651208453613634e-06, "loss": 0.5505, "step": 15865 }, { "epoch": 0.48626946181194064, "grad_norm": 1.7622252653707438, "learning_rate": 5.464626677340438e-06, "loss": 0.5646, "step": 15866 }, { "epoch": 0.48630011033468185, "grad_norm": 1.5173831755645715, "learning_rate": 5.464132504741426e-06, "loss": 0.5452, "step": 15867 }, { "epoch": 0.48633075885742305, "grad_norm": 1.6963516904794318, "learning_rate": 5.4636383275692e-06, "loss": 0.605, "step": 15868 }, { "epoch": 0.48636140738016426, "grad_norm": 1.7444697549313877, "learning_rate": 5.463144145828624e-06, "loss": 0.6637, "step": 15869 }, { "epoch": 0.48639205590290546, "grad_norm": 1.80190504252318, "learning_rate": 5.462649959524572e-06, "loss": 0.6954, "step": 15870 }, { "epoch": 0.48642270442564667, "grad_norm": 1.5610954647613677, "learning_rate": 5.46215576866191e-06, "loss": 0.5908, "step": 15871 }, { "epoch": 0.4864533529483879, "grad_norm": 0.7627889710219808, "learning_rate": 5.461661573245512e-06, "loss": 0.4294, "step": 15872 }, { "epoch": 0.4864840014711291, "grad_norm": 1.744480816016109, "learning_rate": 5.4611673732802405e-06, "loss": 0.6744, "step": 15873 }, { "epoch": 0.4865146499938703, "grad_norm": 1.562278503320459, "learning_rate": 5.460673168770971e-06, "loss": 0.6708, "step": 15874 }, { "epoch": 0.4865452985166115, "grad_norm": 1.6252648919238706, "learning_rate": 5.460178959722571e-06, "loss": 0.6723, "step": 15875 }, { "epoch": 0.4865759470393527, "grad_norm": 1.7570307379881436, "learning_rate": 5.4596847461399095e-06, "loss": 0.6438, "step": 15876 }, { "epoch": 0.4866065955620939, "grad_norm": 0.7500224078376408, "learning_rate": 5.459190528027857e-06, "loss": 0.4167, "step": 15877 }, { "epoch": 0.4866372440848351, "grad_norm": 1.9758614921422475, "learning_rate": 5.458696305391281e-06, "loss": 0.567, "step": 15878 }, { "epoch": 0.4866678926075763, "grad_norm": 0.8010232619056361, "learning_rate": 5.458202078235056e-06, "loss": 0.4461, "step": 15879 }, { "epoch": 0.4866985411303175, "grad_norm": 1.8436355131526034, "learning_rate": 5.457707846564046e-06, "loss": 0.6554, "step": 15880 }, { "epoch": 0.4867291896530587, "grad_norm": 1.7870749051404426, "learning_rate": 5.457213610383125e-06, "loss": 0.7122, "step": 15881 }, { "epoch": 0.48675983817579993, "grad_norm": 0.7569330563704816, "learning_rate": 5.456719369697161e-06, "loss": 0.4317, "step": 15882 }, { "epoch": 0.48679048669854114, "grad_norm": 0.7497853383306794, "learning_rate": 5.456225124511024e-06, "loss": 0.4491, "step": 15883 }, { "epoch": 0.48682113522128234, "grad_norm": 1.5209422706516091, "learning_rate": 5.455730874829584e-06, "loss": 0.674, "step": 15884 }, { "epoch": 0.48685178374402355, "grad_norm": 1.6182220812855779, "learning_rate": 5.455236620657712e-06, "loss": 0.6608, "step": 15885 }, { "epoch": 0.48688243226676475, "grad_norm": 1.7770233790383998, "learning_rate": 5.454742362000276e-06, "loss": 0.6623, "step": 15886 }, { "epoch": 0.48691308078950596, "grad_norm": 1.834756556532752, "learning_rate": 5.454248098862147e-06, "loss": 0.645, "step": 15887 }, { "epoch": 0.48694372931224716, "grad_norm": 1.7684385592241725, "learning_rate": 5.453753831248196e-06, "loss": 0.6358, "step": 15888 }, { "epoch": 0.48697437783498837, "grad_norm": 1.8001070118342257, "learning_rate": 5.453259559163293e-06, "loss": 0.6844, "step": 15889 }, { "epoch": 0.4870050263577296, "grad_norm": 1.809110666177827, "learning_rate": 5.4527652826123055e-06, "loss": 0.572, "step": 15890 }, { "epoch": 0.4870356748804708, "grad_norm": 0.7981752931626935, "learning_rate": 5.452271001600108e-06, "loss": 0.4421, "step": 15891 }, { "epoch": 0.487066323403212, "grad_norm": 1.6254815845557773, "learning_rate": 5.451776716131569e-06, "loss": 0.5988, "step": 15892 }, { "epoch": 0.4870969719259532, "grad_norm": 1.7096008144654025, "learning_rate": 5.451282426211555e-06, "loss": 0.6446, "step": 15893 }, { "epoch": 0.4871276204486944, "grad_norm": 1.5979126863835176, "learning_rate": 5.450788131844943e-06, "loss": 0.6913, "step": 15894 }, { "epoch": 0.4871582689714356, "grad_norm": 1.6495768303640006, "learning_rate": 5.4502938330365996e-06, "loss": 0.5461, "step": 15895 }, { "epoch": 0.48718891749417675, "grad_norm": 1.838392952020897, "learning_rate": 5.449799529791395e-06, "loss": 0.5897, "step": 15896 }, { "epoch": 0.48721956601691796, "grad_norm": 1.5682457615367986, "learning_rate": 5.4493052221142005e-06, "loss": 0.6257, "step": 15897 }, { "epoch": 0.48725021453965917, "grad_norm": 1.7447493088649655, "learning_rate": 5.448810910009888e-06, "loss": 0.6219, "step": 15898 }, { "epoch": 0.48728086306240037, "grad_norm": 1.7136590030758523, "learning_rate": 5.448316593483325e-06, "loss": 0.5777, "step": 15899 }, { "epoch": 0.4873115115851416, "grad_norm": 0.7748411029111291, "learning_rate": 5.4478222725393856e-06, "loss": 0.4303, "step": 15900 }, { "epoch": 0.4873421601078828, "grad_norm": 0.7934004542186871, "learning_rate": 5.4473279471829364e-06, "loss": 0.4462, "step": 15901 }, { "epoch": 0.487372808630624, "grad_norm": 1.726836941117933, "learning_rate": 5.446833617418853e-06, "loss": 0.72, "step": 15902 }, { "epoch": 0.4874034571533652, "grad_norm": 1.8382959157912835, "learning_rate": 5.4463392832520035e-06, "loss": 0.6679, "step": 15903 }, { "epoch": 0.4874341056761064, "grad_norm": 1.6866892650450573, "learning_rate": 5.445844944687256e-06, "loss": 0.5487, "step": 15904 }, { "epoch": 0.4874647541988476, "grad_norm": 2.0474613286016496, "learning_rate": 5.445350601729488e-06, "loss": 0.6213, "step": 15905 }, { "epoch": 0.4874954027215888, "grad_norm": 1.5947120747748555, "learning_rate": 5.444856254383564e-06, "loss": 0.6086, "step": 15906 }, { "epoch": 0.48752605124433, "grad_norm": 1.5499811347610248, "learning_rate": 5.444361902654359e-06, "loss": 0.5746, "step": 15907 }, { "epoch": 0.4875566997670712, "grad_norm": 0.7834321235357506, "learning_rate": 5.443867546546741e-06, "loss": 0.4313, "step": 15908 }, { "epoch": 0.48758734828981243, "grad_norm": 1.903829478499705, "learning_rate": 5.443373186065583e-06, "loss": 0.6287, "step": 15909 }, { "epoch": 0.48761799681255363, "grad_norm": 1.6927144232039082, "learning_rate": 5.4428788212157555e-06, "loss": 0.6306, "step": 15910 }, { "epoch": 0.48764864533529484, "grad_norm": 1.8477806607768306, "learning_rate": 5.442384452002132e-06, "loss": 0.6887, "step": 15911 }, { "epoch": 0.48767929385803604, "grad_norm": 1.694305015917879, "learning_rate": 5.441890078429578e-06, "loss": 0.686, "step": 15912 }, { "epoch": 0.48770994238077725, "grad_norm": 1.6732771072353212, "learning_rate": 5.441395700502969e-06, "loss": 0.5992, "step": 15913 }, { "epoch": 0.48774059090351846, "grad_norm": 1.743028585735475, "learning_rate": 5.4409013182271766e-06, "loss": 0.6603, "step": 15914 }, { "epoch": 0.48777123942625966, "grad_norm": 1.8305633632497957, "learning_rate": 5.44040693160707e-06, "loss": 0.63, "step": 15915 }, { "epoch": 0.48780188794900087, "grad_norm": 1.5608319213197446, "learning_rate": 5.43991254064752e-06, "loss": 0.6611, "step": 15916 }, { "epoch": 0.4878325364717421, "grad_norm": 1.641818834990967, "learning_rate": 5.4394181453534e-06, "loss": 0.5847, "step": 15917 }, { "epoch": 0.4878631849944833, "grad_norm": 1.6987923332251307, "learning_rate": 5.438923745729581e-06, "loss": 0.6784, "step": 15918 }, { "epoch": 0.4878938335172245, "grad_norm": 1.7238635369279272, "learning_rate": 5.438429341780932e-06, "loss": 0.6796, "step": 15919 }, { "epoch": 0.4879244820399657, "grad_norm": 2.0299021696425457, "learning_rate": 5.437934933512329e-06, "loss": 0.6821, "step": 15920 }, { "epoch": 0.4879551305627069, "grad_norm": 1.848852220255245, "learning_rate": 5.43744052092864e-06, "loss": 0.6682, "step": 15921 }, { "epoch": 0.4879857790854481, "grad_norm": 1.8455683687178643, "learning_rate": 5.4369461040347385e-06, "loss": 0.6736, "step": 15922 }, { "epoch": 0.4880164276081893, "grad_norm": 1.7930856294457118, "learning_rate": 5.436451682835494e-06, "loss": 0.6888, "step": 15923 }, { "epoch": 0.4880470761309305, "grad_norm": 0.8170567640925451, "learning_rate": 5.43595725733578e-06, "loss": 0.4299, "step": 15924 }, { "epoch": 0.4880777246536717, "grad_norm": 1.6707729143075412, "learning_rate": 5.435462827540466e-06, "loss": 0.6848, "step": 15925 }, { "epoch": 0.4881083731764129, "grad_norm": 1.5427232280913783, "learning_rate": 5.4349683934544294e-06, "loss": 0.559, "step": 15926 }, { "epoch": 0.4881390216991541, "grad_norm": 1.8026631929036199, "learning_rate": 5.434473955082534e-06, "loss": 0.678, "step": 15927 }, { "epoch": 0.4881696702218953, "grad_norm": 1.9606710053803056, "learning_rate": 5.433979512429658e-06, "loss": 0.7419, "step": 15928 }, { "epoch": 0.4882003187446365, "grad_norm": 1.6079437085374444, "learning_rate": 5.4334850655006686e-06, "loss": 0.605, "step": 15929 }, { "epoch": 0.4882309672673777, "grad_norm": 1.778705201191175, "learning_rate": 5.432990614300442e-06, "loss": 0.6654, "step": 15930 }, { "epoch": 0.4882616157901189, "grad_norm": 0.8038164856513702, "learning_rate": 5.432496158833846e-06, "loss": 0.4428, "step": 15931 }, { "epoch": 0.4882922643128601, "grad_norm": 1.6777293318578073, "learning_rate": 5.432001699105756e-06, "loss": 0.6611, "step": 15932 }, { "epoch": 0.4883229128356013, "grad_norm": 1.6696499051236282, "learning_rate": 5.431507235121043e-06, "loss": 0.5527, "step": 15933 }, { "epoch": 0.4883535613583425, "grad_norm": 1.7582977099858415, "learning_rate": 5.4310127668845795e-06, "loss": 0.5486, "step": 15934 }, { "epoch": 0.4883842098810837, "grad_norm": 0.7762509320473416, "learning_rate": 5.430518294401236e-06, "loss": 0.4401, "step": 15935 }, { "epoch": 0.4884148584038249, "grad_norm": 1.6843520801730472, "learning_rate": 5.430023817675883e-06, "loss": 0.7351, "step": 15936 }, { "epoch": 0.48844550692656613, "grad_norm": 1.616965368936947, "learning_rate": 5.429529336713399e-06, "loss": 0.5215, "step": 15937 }, { "epoch": 0.48847615544930734, "grad_norm": 1.7400478045822831, "learning_rate": 5.429034851518652e-06, "loss": 0.6101, "step": 15938 }, { "epoch": 0.48850680397204854, "grad_norm": 1.8176519939433256, "learning_rate": 5.428540362096514e-06, "loss": 0.5578, "step": 15939 }, { "epoch": 0.48853745249478975, "grad_norm": 0.7609319065509222, "learning_rate": 5.428045868451858e-06, "loss": 0.432, "step": 15940 }, { "epoch": 0.48856810101753095, "grad_norm": 1.6146928116864079, "learning_rate": 5.427551370589558e-06, "loss": 0.6435, "step": 15941 }, { "epoch": 0.48859874954027216, "grad_norm": 1.7160173715957534, "learning_rate": 5.4270568685144835e-06, "loss": 0.7386, "step": 15942 }, { "epoch": 0.48862939806301336, "grad_norm": 1.7324266275561582, "learning_rate": 5.426562362231509e-06, "loss": 0.7779, "step": 15943 }, { "epoch": 0.48866004658575457, "grad_norm": 1.7602577176126, "learning_rate": 5.426067851745504e-06, "loss": 0.6693, "step": 15944 }, { "epoch": 0.4886906951084958, "grad_norm": 1.8604500442230678, "learning_rate": 5.425573337061346e-06, "loss": 0.691, "step": 15945 }, { "epoch": 0.488721343631237, "grad_norm": 1.8610814165948866, "learning_rate": 5.425078818183905e-06, "loss": 0.734, "step": 15946 }, { "epoch": 0.4887519921539782, "grad_norm": 1.7312926239517163, "learning_rate": 5.424584295118053e-06, "loss": 0.5341, "step": 15947 }, { "epoch": 0.4887826406767194, "grad_norm": 1.8319683981250958, "learning_rate": 5.424089767868663e-06, "loss": 0.6464, "step": 15948 }, { "epoch": 0.4888132891994606, "grad_norm": 1.5849428002785775, "learning_rate": 5.42359523644061e-06, "loss": 0.6217, "step": 15949 }, { "epoch": 0.4888439377222018, "grad_norm": 0.7853999893322102, "learning_rate": 5.423100700838763e-06, "loss": 0.441, "step": 15950 }, { "epoch": 0.488874586244943, "grad_norm": 1.6845846357912346, "learning_rate": 5.422606161067996e-06, "loss": 0.5684, "step": 15951 }, { "epoch": 0.4889052347676842, "grad_norm": 0.815694623432629, "learning_rate": 5.4221116171331835e-06, "loss": 0.4558, "step": 15952 }, { "epoch": 0.4889358832904254, "grad_norm": 1.8434628612526822, "learning_rate": 5.421617069039198e-06, "loss": 0.6748, "step": 15953 }, { "epoch": 0.4889665318131666, "grad_norm": 1.9106277803258338, "learning_rate": 5.42112251679091e-06, "loss": 0.688, "step": 15954 }, { "epoch": 0.48899718033590783, "grad_norm": 1.5903342041517, "learning_rate": 5.420627960393194e-06, "loss": 0.6405, "step": 15955 }, { "epoch": 0.48902782885864904, "grad_norm": 1.8035425334761825, "learning_rate": 5.420133399850924e-06, "loss": 0.6113, "step": 15956 }, { "epoch": 0.48905847738139024, "grad_norm": 1.752982958347368, "learning_rate": 5.419638835168972e-06, "loss": 0.7399, "step": 15957 }, { "epoch": 0.4890891259041314, "grad_norm": 1.6772626637376242, "learning_rate": 5.419144266352211e-06, "loss": 0.6448, "step": 15958 }, { "epoch": 0.4891197744268726, "grad_norm": 1.645041657870058, "learning_rate": 5.418649693405514e-06, "loss": 0.6998, "step": 15959 }, { "epoch": 0.4891504229496138, "grad_norm": 1.7897498912352388, "learning_rate": 5.418155116333755e-06, "loss": 0.7522, "step": 15960 }, { "epoch": 0.489181071472355, "grad_norm": 1.8377067137372993, "learning_rate": 5.417660535141806e-06, "loss": 0.6649, "step": 15961 }, { "epoch": 0.4892117199950962, "grad_norm": 1.5898289559549548, "learning_rate": 5.417165949834542e-06, "loss": 0.6629, "step": 15962 }, { "epoch": 0.4892423685178374, "grad_norm": 1.8255926225626322, "learning_rate": 5.416671360416834e-06, "loss": 0.6841, "step": 15963 }, { "epoch": 0.48927301704057863, "grad_norm": 1.6704032130925537, "learning_rate": 5.416176766893556e-06, "loss": 0.6073, "step": 15964 }, { "epoch": 0.48930366556331983, "grad_norm": 0.8935215806942955, "learning_rate": 5.415682169269585e-06, "loss": 0.4455, "step": 15965 }, { "epoch": 0.48933431408606104, "grad_norm": 1.6834448072126187, "learning_rate": 5.415187567549788e-06, "loss": 0.6152, "step": 15966 }, { "epoch": 0.48936496260880225, "grad_norm": 1.6789270620668142, "learning_rate": 5.414692961739043e-06, "loss": 0.5241, "step": 15967 }, { "epoch": 0.48939561113154345, "grad_norm": 1.7203283863201497, "learning_rate": 5.414198351842223e-06, "loss": 0.6706, "step": 15968 }, { "epoch": 0.48942625965428466, "grad_norm": 1.7136495636500557, "learning_rate": 5.413703737864199e-06, "loss": 0.4707, "step": 15969 }, { "epoch": 0.48945690817702586, "grad_norm": 1.557638669657221, "learning_rate": 5.4132091198098455e-06, "loss": 0.6225, "step": 15970 }, { "epoch": 0.48948755669976707, "grad_norm": 0.7831794614898636, "learning_rate": 5.412714497684039e-06, "loss": 0.4403, "step": 15971 }, { "epoch": 0.4895182052225083, "grad_norm": 1.4378596789324445, "learning_rate": 5.4122198714916495e-06, "loss": 0.642, "step": 15972 }, { "epoch": 0.4895488537452495, "grad_norm": 1.6653277922835494, "learning_rate": 5.411725241237552e-06, "loss": 0.7114, "step": 15973 }, { "epoch": 0.4895795022679907, "grad_norm": 2.107600928378901, "learning_rate": 5.411230606926622e-06, "loss": 0.5767, "step": 15974 }, { "epoch": 0.4896101507907319, "grad_norm": 1.652311136317443, "learning_rate": 5.41073596856373e-06, "loss": 0.6579, "step": 15975 }, { "epoch": 0.4896407993134731, "grad_norm": 0.8280815574880565, "learning_rate": 5.410241326153753e-06, "loss": 0.4501, "step": 15976 }, { "epoch": 0.4896714478362143, "grad_norm": 0.8053330229678843, "learning_rate": 5.4097466797015615e-06, "loss": 0.4537, "step": 15977 }, { "epoch": 0.4897020963589555, "grad_norm": 1.4106326851417166, "learning_rate": 5.409252029212032e-06, "loss": 0.5589, "step": 15978 }, { "epoch": 0.4897327448816967, "grad_norm": 1.775716224132647, "learning_rate": 5.408757374690037e-06, "loss": 0.6485, "step": 15979 }, { "epoch": 0.4897633934044379, "grad_norm": 1.6957070674070704, "learning_rate": 5.408262716140452e-06, "loss": 0.5961, "step": 15980 }, { "epoch": 0.4897940419271791, "grad_norm": 0.7466283966918306, "learning_rate": 5.407768053568148e-06, "loss": 0.4354, "step": 15981 }, { "epoch": 0.48982469044992033, "grad_norm": 1.900793852858367, "learning_rate": 5.407273386978003e-06, "loss": 0.6067, "step": 15982 }, { "epoch": 0.48985533897266154, "grad_norm": 1.7482828629950364, "learning_rate": 5.406778716374888e-06, "loss": 0.6481, "step": 15983 }, { "epoch": 0.48988598749540274, "grad_norm": 1.7846122483409768, "learning_rate": 5.40628404176368e-06, "loss": 0.7174, "step": 15984 }, { "epoch": 0.48991663601814395, "grad_norm": 0.7698328751693427, "learning_rate": 5.405789363149251e-06, "loss": 0.4276, "step": 15985 }, { "epoch": 0.48994728454088515, "grad_norm": 1.7471986472868044, "learning_rate": 5.405294680536475e-06, "loss": 0.7246, "step": 15986 }, { "epoch": 0.48997793306362636, "grad_norm": 1.8703240311843046, "learning_rate": 5.404799993930226e-06, "loss": 0.6271, "step": 15987 }, { "epoch": 0.49000858158636756, "grad_norm": 1.4754579734686835, "learning_rate": 5.404305303335379e-06, "loss": 0.5178, "step": 15988 }, { "epoch": 0.4900392301091087, "grad_norm": 1.728683137167847, "learning_rate": 5.40381060875681e-06, "loss": 0.6787, "step": 15989 }, { "epoch": 0.4900698786318499, "grad_norm": 1.5216025736532788, "learning_rate": 5.403315910199389e-06, "loss": 0.6292, "step": 15990 }, { "epoch": 0.4901005271545911, "grad_norm": 1.6564766423189548, "learning_rate": 5.402821207667998e-06, "loss": 0.6573, "step": 15991 }, { "epoch": 0.49013117567733233, "grad_norm": 0.7680996193859738, "learning_rate": 5.402326501167502e-06, "loss": 0.4246, "step": 15992 }, { "epoch": 0.49016182420007354, "grad_norm": 1.7315093730732536, "learning_rate": 5.4018317907027816e-06, "loss": 0.6393, "step": 15993 }, { "epoch": 0.49019247272281474, "grad_norm": 1.6291323254620302, "learning_rate": 5.401337076278709e-06, "loss": 0.6642, "step": 15994 }, { "epoch": 0.49022312124555595, "grad_norm": 0.7887975702927075, "learning_rate": 5.400842357900161e-06, "loss": 0.4582, "step": 15995 }, { "epoch": 0.49025376976829715, "grad_norm": 1.8345960695237402, "learning_rate": 5.40034763557201e-06, "loss": 0.7408, "step": 15996 }, { "epoch": 0.49028441829103836, "grad_norm": 1.794952758133336, "learning_rate": 5.399852909299131e-06, "loss": 0.5652, "step": 15997 }, { "epoch": 0.49031506681377957, "grad_norm": 1.9740920944230156, "learning_rate": 5.399358179086399e-06, "loss": 0.7445, "step": 15998 }, { "epoch": 0.49034571533652077, "grad_norm": 0.7579875559224953, "learning_rate": 5.398863444938689e-06, "loss": 0.4265, "step": 15999 }, { "epoch": 0.490376363859262, "grad_norm": 1.8352396626227823, "learning_rate": 5.398368706860876e-06, "loss": 0.6542, "step": 16000 }, { "epoch": 0.4904070123820032, "grad_norm": 1.6371739466022601, "learning_rate": 5.397873964857833e-06, "loss": 0.6618, "step": 16001 }, { "epoch": 0.4904376609047444, "grad_norm": 1.5605585907247619, "learning_rate": 5.3973792189344366e-06, "loss": 0.6159, "step": 16002 }, { "epoch": 0.4904683094274856, "grad_norm": 1.4936440723665492, "learning_rate": 5.396884469095562e-06, "loss": 0.5975, "step": 16003 }, { "epoch": 0.4904989579502268, "grad_norm": 1.7544472661210855, "learning_rate": 5.396389715346082e-06, "loss": 0.6936, "step": 16004 }, { "epoch": 0.490529606472968, "grad_norm": 1.8474032799500357, "learning_rate": 5.395894957690871e-06, "loss": 0.6535, "step": 16005 }, { "epoch": 0.4905602549957092, "grad_norm": 1.6407636042769167, "learning_rate": 5.395400196134809e-06, "loss": 0.6112, "step": 16006 }, { "epoch": 0.4905909035184504, "grad_norm": 1.6700211152501354, "learning_rate": 5.394905430682766e-06, "loss": 0.6364, "step": 16007 }, { "epoch": 0.4906215520411916, "grad_norm": 1.6214377827981339, "learning_rate": 5.3944106613396196e-06, "loss": 0.6287, "step": 16008 }, { "epoch": 0.4906522005639328, "grad_norm": 1.6134488800145381, "learning_rate": 5.393915888110242e-06, "loss": 0.6411, "step": 16009 }, { "epoch": 0.49068284908667403, "grad_norm": 1.6236974189278595, "learning_rate": 5.393421110999513e-06, "loss": 0.4927, "step": 16010 }, { "epoch": 0.49071349760941524, "grad_norm": 1.6128371606605174, "learning_rate": 5.392926330012305e-06, "loss": 0.6431, "step": 16011 }, { "epoch": 0.49074414613215644, "grad_norm": 0.8091348662292467, "learning_rate": 5.3924315451534915e-06, "loss": 0.4242, "step": 16012 }, { "epoch": 0.49077479465489765, "grad_norm": 1.770134107906551, "learning_rate": 5.39193675642795e-06, "loss": 0.6531, "step": 16013 }, { "epoch": 0.49080544317763886, "grad_norm": 1.8579220141748045, "learning_rate": 5.391441963840556e-06, "loss": 0.5375, "step": 16014 }, { "epoch": 0.49083609170038006, "grad_norm": 1.7990255141905103, "learning_rate": 5.3909471673961844e-06, "loss": 0.6976, "step": 16015 }, { "epoch": 0.49086674022312127, "grad_norm": 1.6333552340274575, "learning_rate": 5.3904523670997085e-06, "loss": 0.6664, "step": 16016 }, { "epoch": 0.4908973887458625, "grad_norm": 0.7546947411598194, "learning_rate": 5.389957562956007e-06, "loss": 0.4285, "step": 16017 }, { "epoch": 0.4909280372686037, "grad_norm": 1.816489287804029, "learning_rate": 5.389462754969955e-06, "loss": 0.6273, "step": 16018 }, { "epoch": 0.4909586857913449, "grad_norm": 0.8130943191924145, "learning_rate": 5.388967943146426e-06, "loss": 0.4368, "step": 16019 }, { "epoch": 0.49098933431408603, "grad_norm": 1.9569358679172675, "learning_rate": 5.388473127490295e-06, "loss": 0.6866, "step": 16020 }, { "epoch": 0.49101998283682724, "grad_norm": 1.672319848170703, "learning_rate": 5.3879783080064396e-06, "loss": 0.6097, "step": 16021 }, { "epoch": 0.49105063135956845, "grad_norm": 1.9590703849362394, "learning_rate": 5.387483484699736e-06, "loss": 0.7043, "step": 16022 }, { "epoch": 0.49108127988230965, "grad_norm": 0.7889595999560701, "learning_rate": 5.3869886575750575e-06, "loss": 0.427, "step": 16023 }, { "epoch": 0.49111192840505086, "grad_norm": 1.9511339172896986, "learning_rate": 5.386493826637279e-06, "loss": 0.6915, "step": 16024 }, { "epoch": 0.49114257692779206, "grad_norm": 0.8112900322252339, "learning_rate": 5.38599899189128e-06, "loss": 0.4275, "step": 16025 }, { "epoch": 0.49117322545053327, "grad_norm": 0.7430861606981914, "learning_rate": 5.385504153341934e-06, "loss": 0.4236, "step": 16026 }, { "epoch": 0.4912038739732745, "grad_norm": 1.681345020823609, "learning_rate": 5.385009310994116e-06, "loss": 0.539, "step": 16027 }, { "epoch": 0.4912345224960157, "grad_norm": 1.5994445816060003, "learning_rate": 5.384514464852704e-06, "loss": 0.6116, "step": 16028 }, { "epoch": 0.4912651710187569, "grad_norm": 1.611921414361951, "learning_rate": 5.384019614922572e-06, "loss": 0.6542, "step": 16029 }, { "epoch": 0.4912958195414981, "grad_norm": 1.8466687271414737, "learning_rate": 5.383524761208597e-06, "loss": 0.7131, "step": 16030 }, { "epoch": 0.4913264680642393, "grad_norm": 2.0646195971475527, "learning_rate": 5.383029903715653e-06, "loss": 0.6577, "step": 16031 }, { "epoch": 0.4913571165869805, "grad_norm": 1.8656741721691141, "learning_rate": 5.382535042448619e-06, "loss": 0.5957, "step": 16032 }, { "epoch": 0.4913877651097217, "grad_norm": 1.6717273227547487, "learning_rate": 5.382040177412368e-06, "loss": 0.686, "step": 16033 }, { "epoch": 0.4914184136324629, "grad_norm": 2.003326509187147, "learning_rate": 5.3815453086117785e-06, "loss": 0.6871, "step": 16034 }, { "epoch": 0.4914490621552041, "grad_norm": 0.8165762897805785, "learning_rate": 5.381050436051724e-06, "loss": 0.4259, "step": 16035 }, { "epoch": 0.4914797106779453, "grad_norm": 1.7072064151189057, "learning_rate": 5.380555559737084e-06, "loss": 0.6925, "step": 16036 }, { "epoch": 0.49151035920068653, "grad_norm": 0.8206073643743199, "learning_rate": 5.38006067967273e-06, "loss": 0.4243, "step": 16037 }, { "epoch": 0.49154100772342774, "grad_norm": 1.6809350083066834, "learning_rate": 5.379565795863545e-06, "loss": 0.6601, "step": 16038 }, { "epoch": 0.49157165624616894, "grad_norm": 1.5803265686626526, "learning_rate": 5.379070908314398e-06, "loss": 0.6334, "step": 16039 }, { "epoch": 0.49160230476891015, "grad_norm": 0.7658406531084696, "learning_rate": 5.378576017030168e-06, "loss": 0.4269, "step": 16040 }, { "epoch": 0.49163295329165135, "grad_norm": 0.7526138500017153, "learning_rate": 5.378081122015733e-06, "loss": 0.4155, "step": 16041 }, { "epoch": 0.49166360181439256, "grad_norm": 0.7953775991945798, "learning_rate": 5.377586223275968e-06, "loss": 0.4303, "step": 16042 }, { "epoch": 0.49169425033713376, "grad_norm": 1.7600185999560458, "learning_rate": 5.377091320815748e-06, "loss": 0.6338, "step": 16043 }, { "epoch": 0.49172489885987497, "grad_norm": 1.703220533195655, "learning_rate": 5.376596414639952e-06, "loss": 0.6554, "step": 16044 }, { "epoch": 0.4917555473826162, "grad_norm": 0.7663600909153662, "learning_rate": 5.376101504753456e-06, "loss": 0.4391, "step": 16045 }, { "epoch": 0.4917861959053574, "grad_norm": 0.7782005332987554, "learning_rate": 5.375606591161133e-06, "loss": 0.4213, "step": 16046 }, { "epoch": 0.4918168444280986, "grad_norm": 1.6825657099136764, "learning_rate": 5.375111673867865e-06, "loss": 0.6656, "step": 16047 }, { "epoch": 0.4918474929508398, "grad_norm": 1.6593616059323208, "learning_rate": 5.374616752878523e-06, "loss": 0.6304, "step": 16048 }, { "epoch": 0.491878141473581, "grad_norm": 0.7747663472179278, "learning_rate": 5.374121828197989e-06, "loss": 0.4474, "step": 16049 }, { "epoch": 0.4919087899963222, "grad_norm": 0.8119737204774655, "learning_rate": 5.373626899831135e-06, "loss": 0.4354, "step": 16050 }, { "epoch": 0.49193943851906335, "grad_norm": 0.7543659433467145, "learning_rate": 5.37313196778284e-06, "loss": 0.46, "step": 16051 }, { "epoch": 0.49197008704180456, "grad_norm": 1.7954686254910708, "learning_rate": 5.37263703205798e-06, "loss": 0.6194, "step": 16052 }, { "epoch": 0.49200073556454577, "grad_norm": 2.0014178131917046, "learning_rate": 5.372142092661432e-06, "loss": 0.7568, "step": 16053 }, { "epoch": 0.49203138408728697, "grad_norm": 1.603748694529132, "learning_rate": 5.371647149598074e-06, "loss": 0.6062, "step": 16054 }, { "epoch": 0.4920620326100282, "grad_norm": 0.7785704815564803, "learning_rate": 5.371152202872781e-06, "loss": 0.4445, "step": 16055 }, { "epoch": 0.4920926811327694, "grad_norm": 1.7549128789447235, "learning_rate": 5.370657252490429e-06, "loss": 0.6274, "step": 16056 }, { "epoch": 0.4921233296555106, "grad_norm": 1.8435648008064067, "learning_rate": 5.370162298455898e-06, "loss": 0.6508, "step": 16057 }, { "epoch": 0.4921539781782518, "grad_norm": 1.56616426488381, "learning_rate": 5.369667340774062e-06, "loss": 0.6143, "step": 16058 }, { "epoch": 0.492184626700993, "grad_norm": 1.6458535727519454, "learning_rate": 5.369172379449798e-06, "loss": 0.7135, "step": 16059 }, { "epoch": 0.4922152752237342, "grad_norm": 1.6776617768159565, "learning_rate": 5.368677414487987e-06, "loss": 0.6985, "step": 16060 }, { "epoch": 0.4922459237464754, "grad_norm": 1.8117728413725709, "learning_rate": 5.3681824458935015e-06, "loss": 0.6852, "step": 16061 }, { "epoch": 0.4922765722692166, "grad_norm": 1.643628148292832, "learning_rate": 5.367687473671221e-06, "loss": 0.6272, "step": 16062 }, { "epoch": 0.4923072207919578, "grad_norm": 1.5670415253227081, "learning_rate": 5.36719249782602e-06, "loss": 0.6483, "step": 16063 }, { "epoch": 0.49233786931469903, "grad_norm": 1.4940893680460785, "learning_rate": 5.366697518362779e-06, "loss": 0.6272, "step": 16064 }, { "epoch": 0.49236851783744023, "grad_norm": 1.7421343919491972, "learning_rate": 5.366202535286373e-06, "loss": 0.6653, "step": 16065 }, { "epoch": 0.49239916636018144, "grad_norm": 2.3406061955133732, "learning_rate": 5.3657075486016805e-06, "loss": 0.5123, "step": 16066 }, { "epoch": 0.49242981488292265, "grad_norm": 0.7637094793062287, "learning_rate": 5.365212558313576e-06, "loss": 0.4205, "step": 16067 }, { "epoch": 0.49246046340566385, "grad_norm": 1.9845261150718128, "learning_rate": 5.3647175644269404e-06, "loss": 0.6694, "step": 16068 }, { "epoch": 0.49249111192840506, "grad_norm": 1.7550682064986203, "learning_rate": 5.364222566946649e-06, "loss": 0.6801, "step": 16069 }, { "epoch": 0.49252176045114626, "grad_norm": 1.6974308664777373, "learning_rate": 5.363727565877579e-06, "loss": 0.6752, "step": 16070 }, { "epoch": 0.49255240897388747, "grad_norm": 0.7477107699947779, "learning_rate": 5.363232561224608e-06, "loss": 0.4299, "step": 16071 }, { "epoch": 0.4925830574966287, "grad_norm": 1.7714686130595305, "learning_rate": 5.362737552992615e-06, "loss": 0.5888, "step": 16072 }, { "epoch": 0.4926137060193699, "grad_norm": 1.7385271470463806, "learning_rate": 5.362242541186475e-06, "loss": 0.6124, "step": 16073 }, { "epoch": 0.4926443545421111, "grad_norm": 1.6574013866080144, "learning_rate": 5.361747525811066e-06, "loss": 0.6037, "step": 16074 }, { "epoch": 0.4926750030648523, "grad_norm": 1.577270620231314, "learning_rate": 5.3612525068712675e-06, "loss": 0.542, "step": 16075 }, { "epoch": 0.4927056515875935, "grad_norm": 1.7492528795476323, "learning_rate": 5.360757484371956e-06, "loss": 0.7102, "step": 16076 }, { "epoch": 0.4927363001103347, "grad_norm": 1.8141515128698509, "learning_rate": 5.360262458318008e-06, "loss": 0.6261, "step": 16077 }, { "epoch": 0.4927669486330759, "grad_norm": 1.805449629825261, "learning_rate": 5.359767428714299e-06, "loss": 0.7024, "step": 16078 }, { "epoch": 0.4927975971558171, "grad_norm": 1.8396061700180693, "learning_rate": 5.359272395565713e-06, "loss": 0.7657, "step": 16079 }, { "epoch": 0.4928282456785583, "grad_norm": 1.6338698605059552, "learning_rate": 5.358777358877124e-06, "loss": 0.6725, "step": 16080 }, { "epoch": 0.4928588942012995, "grad_norm": 1.6037737943619195, "learning_rate": 5.358282318653409e-06, "loss": 0.5275, "step": 16081 }, { "epoch": 0.4928895427240407, "grad_norm": 1.847724701612787, "learning_rate": 5.3577872748994465e-06, "loss": 0.5967, "step": 16082 }, { "epoch": 0.4929201912467819, "grad_norm": 1.5709591406458567, "learning_rate": 5.357292227620115e-06, "loss": 0.5635, "step": 16083 }, { "epoch": 0.4929508397695231, "grad_norm": 0.8023495991307018, "learning_rate": 5.356797176820291e-06, "loss": 0.4278, "step": 16084 }, { "epoch": 0.4929814882922643, "grad_norm": 1.6384652022164499, "learning_rate": 5.3563021225048525e-06, "loss": 0.6545, "step": 16085 }, { "epoch": 0.4930121368150055, "grad_norm": 1.895882311942351, "learning_rate": 5.35580706467868e-06, "loss": 0.7035, "step": 16086 }, { "epoch": 0.4930427853377467, "grad_norm": 1.6962408982455934, "learning_rate": 5.355312003346648e-06, "loss": 0.6736, "step": 16087 }, { "epoch": 0.4930734338604879, "grad_norm": 1.6752623617084637, "learning_rate": 5.354816938513638e-06, "loss": 0.6023, "step": 16088 }, { "epoch": 0.4931040823832291, "grad_norm": 0.8257996723600691, "learning_rate": 5.354321870184522e-06, "loss": 0.4358, "step": 16089 }, { "epoch": 0.4931347309059703, "grad_norm": 1.7224151595334953, "learning_rate": 5.3538267983641855e-06, "loss": 0.6016, "step": 16090 }, { "epoch": 0.4931653794287115, "grad_norm": 1.6218723957159369, "learning_rate": 5.353331723057501e-06, "loss": 0.6189, "step": 16091 }, { "epoch": 0.49319602795145273, "grad_norm": 1.6965035974744578, "learning_rate": 5.35283664426935e-06, "loss": 0.562, "step": 16092 }, { "epoch": 0.49322667647419394, "grad_norm": 2.025777452606229, "learning_rate": 5.352341562004608e-06, "loss": 0.5523, "step": 16093 }, { "epoch": 0.49325732499693514, "grad_norm": 1.7122617154478668, "learning_rate": 5.351846476268157e-06, "loss": 0.711, "step": 16094 }, { "epoch": 0.49328797351967635, "grad_norm": 1.7492718136280472, "learning_rate": 5.35135138706487e-06, "loss": 0.6293, "step": 16095 }, { "epoch": 0.49331862204241755, "grad_norm": 1.6450832027007465, "learning_rate": 5.3508562943996275e-06, "loss": 0.6207, "step": 16096 }, { "epoch": 0.49334927056515876, "grad_norm": 1.592709184826643, "learning_rate": 5.35036119827731e-06, "loss": 0.738, "step": 16097 }, { "epoch": 0.49337991908789997, "grad_norm": 1.8266120830799573, "learning_rate": 5.349866098702792e-06, "loss": 0.687, "step": 16098 }, { "epoch": 0.49341056761064117, "grad_norm": 1.6562999484237644, "learning_rate": 5.349370995680957e-06, "loss": 0.6494, "step": 16099 }, { "epoch": 0.4934412161333824, "grad_norm": 1.7903513001659743, "learning_rate": 5.3488758892166785e-06, "loss": 0.6334, "step": 16100 }, { "epoch": 0.4934718646561236, "grad_norm": 1.72033900506453, "learning_rate": 5.3483807793148355e-06, "loss": 0.5864, "step": 16101 }, { "epoch": 0.4935025131788648, "grad_norm": 0.8449237529045672, "learning_rate": 5.347885665980308e-06, "loss": 0.444, "step": 16102 }, { "epoch": 0.493533161701606, "grad_norm": 1.8415143303075792, "learning_rate": 5.347390549217976e-06, "loss": 0.6208, "step": 16103 }, { "epoch": 0.4935638102243472, "grad_norm": 1.758567592894372, "learning_rate": 5.346895429032714e-06, "loss": 0.5669, "step": 16104 }, { "epoch": 0.4935944587470884, "grad_norm": 0.7820506252407078, "learning_rate": 5.346400305429403e-06, "loss": 0.4371, "step": 16105 }, { "epoch": 0.4936251072698296, "grad_norm": 0.7585942154671667, "learning_rate": 5.34590517841292e-06, "loss": 0.4303, "step": 16106 }, { "epoch": 0.4936557557925708, "grad_norm": 1.5223223136800024, "learning_rate": 5.345410047988148e-06, "loss": 0.5048, "step": 16107 }, { "epoch": 0.493686404315312, "grad_norm": 1.8456920624253488, "learning_rate": 5.34491491415996e-06, "loss": 0.5949, "step": 16108 }, { "epoch": 0.4937170528380532, "grad_norm": 1.8608814739146342, "learning_rate": 5.344419776933237e-06, "loss": 0.5769, "step": 16109 }, { "epoch": 0.49374770136079443, "grad_norm": 1.8645847542253176, "learning_rate": 5.343924636312858e-06, "loss": 0.5884, "step": 16110 }, { "epoch": 0.49377834988353564, "grad_norm": 1.8273493248223822, "learning_rate": 5.343429492303702e-06, "loss": 0.6908, "step": 16111 }, { "epoch": 0.49380899840627684, "grad_norm": 1.5365324086708663, "learning_rate": 5.342934344910648e-06, "loss": 0.6089, "step": 16112 }, { "epoch": 0.493839646929018, "grad_norm": 1.8261958989629672, "learning_rate": 5.3424391941385724e-06, "loss": 0.6138, "step": 16113 }, { "epoch": 0.4938702954517592, "grad_norm": 0.8507387982940463, "learning_rate": 5.341944039992357e-06, "loss": 0.4485, "step": 16114 }, { "epoch": 0.4939009439745004, "grad_norm": 0.7919326049805275, "learning_rate": 5.34144888247688e-06, "loss": 0.4564, "step": 16115 }, { "epoch": 0.4939315924972416, "grad_norm": 1.7096240266069145, "learning_rate": 5.340953721597019e-06, "loss": 0.7025, "step": 16116 }, { "epoch": 0.4939622410199828, "grad_norm": 1.874086685661637, "learning_rate": 5.340458557357653e-06, "loss": 0.5857, "step": 16117 }, { "epoch": 0.493992889542724, "grad_norm": 1.676411800845107, "learning_rate": 5.339963389763663e-06, "loss": 0.5827, "step": 16118 }, { "epoch": 0.49402353806546523, "grad_norm": 1.7010345239292484, "learning_rate": 5.339468218819926e-06, "loss": 0.6038, "step": 16119 }, { "epoch": 0.49405418658820643, "grad_norm": 1.5461111317603349, "learning_rate": 5.338973044531323e-06, "loss": 0.5705, "step": 16120 }, { "epoch": 0.49408483511094764, "grad_norm": 1.7168873143514527, "learning_rate": 5.33847786690273e-06, "loss": 0.5741, "step": 16121 }, { "epoch": 0.49411548363368885, "grad_norm": 1.9585186254912619, "learning_rate": 5.337982685939029e-06, "loss": 0.6281, "step": 16122 }, { "epoch": 0.49414613215643005, "grad_norm": 1.6753766543350317, "learning_rate": 5.337487501645099e-06, "loss": 0.535, "step": 16123 }, { "epoch": 0.49417678067917126, "grad_norm": 1.73926828742593, "learning_rate": 5.3369923140258165e-06, "loss": 0.6772, "step": 16124 }, { "epoch": 0.49420742920191246, "grad_norm": 1.6744237066173218, "learning_rate": 5.336497123086063e-06, "loss": 0.6424, "step": 16125 }, { "epoch": 0.49423807772465367, "grad_norm": 1.739840410241923, "learning_rate": 5.336001928830719e-06, "loss": 0.6284, "step": 16126 }, { "epoch": 0.4942687262473949, "grad_norm": 1.8017811406263042, "learning_rate": 5.3355067312646605e-06, "loss": 0.5964, "step": 16127 }, { "epoch": 0.4942993747701361, "grad_norm": 1.8155654579925051, "learning_rate": 5.335011530392767e-06, "loss": 0.6425, "step": 16128 }, { "epoch": 0.4943300232928773, "grad_norm": 1.7995735017755548, "learning_rate": 5.334516326219921e-06, "loss": 0.7261, "step": 16129 }, { "epoch": 0.4943606718156185, "grad_norm": 1.6865349304016821, "learning_rate": 5.334021118751e-06, "loss": 0.7361, "step": 16130 }, { "epoch": 0.4943913203383597, "grad_norm": 1.657318445262679, "learning_rate": 5.3335259079908845e-06, "loss": 0.5254, "step": 16131 }, { "epoch": 0.4944219688611009, "grad_norm": 1.6897935390057186, "learning_rate": 5.33303069394445e-06, "loss": 0.6309, "step": 16132 }, { "epoch": 0.4944526173838421, "grad_norm": 2.0220340857851005, "learning_rate": 5.33253547661658e-06, "loss": 0.6675, "step": 16133 }, { "epoch": 0.4944832659065833, "grad_norm": 1.742923156425449, "learning_rate": 5.332040256012154e-06, "loss": 0.6368, "step": 16134 }, { "epoch": 0.4945139144293245, "grad_norm": 1.8299956365878243, "learning_rate": 5.331545032136049e-06, "loss": 0.617, "step": 16135 }, { "epoch": 0.4945445629520657, "grad_norm": 1.683319225850707, "learning_rate": 5.331049804993147e-06, "loss": 0.6885, "step": 16136 }, { "epoch": 0.49457521147480693, "grad_norm": 1.8889825190185927, "learning_rate": 5.330554574588327e-06, "loss": 0.7122, "step": 16137 }, { "epoch": 0.49460585999754814, "grad_norm": 1.7776190105939396, "learning_rate": 5.330059340926466e-06, "loss": 0.6996, "step": 16138 }, { "epoch": 0.49463650852028934, "grad_norm": 1.5858780518470335, "learning_rate": 5.329564104012448e-06, "loss": 0.6117, "step": 16139 }, { "epoch": 0.49466715704303055, "grad_norm": 0.8304596776468546, "learning_rate": 5.32906886385115e-06, "loss": 0.4489, "step": 16140 }, { "epoch": 0.49469780556577175, "grad_norm": 1.6674749541351133, "learning_rate": 5.328573620447452e-06, "loss": 0.5991, "step": 16141 }, { "epoch": 0.49472845408851296, "grad_norm": 1.690890285465648, "learning_rate": 5.328078373806235e-06, "loss": 0.5536, "step": 16142 }, { "epoch": 0.49475910261125416, "grad_norm": 1.7327810974324842, "learning_rate": 5.327583123932376e-06, "loss": 0.5985, "step": 16143 }, { "epoch": 0.4947897511339953, "grad_norm": 0.8183580116624664, "learning_rate": 5.327087870830757e-06, "loss": 0.4386, "step": 16144 }, { "epoch": 0.4948203996567365, "grad_norm": 1.6485506247209176, "learning_rate": 5.3265926145062585e-06, "loss": 0.6152, "step": 16145 }, { "epoch": 0.4948510481794777, "grad_norm": 1.557531114862725, "learning_rate": 5.326097354963759e-06, "loss": 0.6382, "step": 16146 }, { "epoch": 0.49488169670221893, "grad_norm": 1.7555343476759528, "learning_rate": 5.325602092208139e-06, "loss": 0.6882, "step": 16147 }, { "epoch": 0.49491234522496014, "grad_norm": 1.6297461909751965, "learning_rate": 5.325106826244278e-06, "loss": 0.5432, "step": 16148 }, { "epoch": 0.49494299374770134, "grad_norm": 1.5056530114780466, "learning_rate": 5.324611557077057e-06, "loss": 0.5581, "step": 16149 }, { "epoch": 0.49497364227044255, "grad_norm": 1.6549291043364578, "learning_rate": 5.324116284711355e-06, "loss": 0.6232, "step": 16150 }, { "epoch": 0.49500429079318375, "grad_norm": 1.5794959077069513, "learning_rate": 5.323621009152051e-06, "loss": 0.5635, "step": 16151 }, { "epoch": 0.49503493931592496, "grad_norm": 1.6133686537495864, "learning_rate": 5.323125730404029e-06, "loss": 0.5869, "step": 16152 }, { "epoch": 0.49506558783866617, "grad_norm": 1.72088771615478, "learning_rate": 5.322630448472165e-06, "loss": 0.6065, "step": 16153 }, { "epoch": 0.49509623636140737, "grad_norm": 1.4770866896563803, "learning_rate": 5.322135163361339e-06, "loss": 0.6449, "step": 16154 }, { "epoch": 0.4951268848841486, "grad_norm": 1.509101474892127, "learning_rate": 5.321639875076435e-06, "loss": 0.621, "step": 16155 }, { "epoch": 0.4951575334068898, "grad_norm": 0.9209557293979038, "learning_rate": 5.32114458362233e-06, "loss": 0.4145, "step": 16156 }, { "epoch": 0.495188181929631, "grad_norm": 1.7355348289044947, "learning_rate": 5.320649289003906e-06, "loss": 0.6542, "step": 16157 }, { "epoch": 0.4952188304523722, "grad_norm": 1.7643293475657649, "learning_rate": 5.3201539912260426e-06, "loss": 0.6746, "step": 16158 }, { "epoch": 0.4952494789751134, "grad_norm": 1.662570495906049, "learning_rate": 5.31965869029362e-06, "loss": 0.6277, "step": 16159 }, { "epoch": 0.4952801274978546, "grad_norm": 0.8394003373892022, "learning_rate": 5.319163386211517e-06, "loss": 0.4255, "step": 16160 }, { "epoch": 0.4953107760205958, "grad_norm": 1.8864210202635516, "learning_rate": 5.318668078984618e-06, "loss": 0.6711, "step": 16161 }, { "epoch": 0.495341424543337, "grad_norm": 1.7275420536381418, "learning_rate": 5.318172768617798e-06, "loss": 0.6595, "step": 16162 }, { "epoch": 0.4953720730660782, "grad_norm": 1.875916715623536, "learning_rate": 5.317677455115943e-06, "loss": 0.6342, "step": 16163 }, { "epoch": 0.49540272158881943, "grad_norm": 1.770625012265086, "learning_rate": 5.3171821384839284e-06, "loss": 0.6685, "step": 16164 }, { "epoch": 0.49543337011156063, "grad_norm": 2.0676067015392805, "learning_rate": 5.316686818726639e-06, "loss": 0.5896, "step": 16165 }, { "epoch": 0.49546401863430184, "grad_norm": 1.7257031049954823, "learning_rate": 5.3161914958489525e-06, "loss": 0.647, "step": 16166 }, { "epoch": 0.49549466715704304, "grad_norm": 1.8106321701864991, "learning_rate": 5.3156961698557495e-06, "loss": 0.6189, "step": 16167 }, { "epoch": 0.49552531567978425, "grad_norm": 1.8035464175017997, "learning_rate": 5.315200840751912e-06, "loss": 0.6261, "step": 16168 }, { "epoch": 0.49555596420252546, "grad_norm": 1.5712568598212209, "learning_rate": 5.314705508542321e-06, "loss": 0.6496, "step": 16169 }, { "epoch": 0.49558661272526666, "grad_norm": 0.8209638044908621, "learning_rate": 5.314210173231855e-06, "loss": 0.4421, "step": 16170 }, { "epoch": 0.49561726124800787, "grad_norm": 1.612611032282978, "learning_rate": 5.313714834825395e-06, "loss": 0.6723, "step": 16171 }, { "epoch": 0.4956479097707491, "grad_norm": 1.7551779682344304, "learning_rate": 5.313219493327826e-06, "loss": 0.6736, "step": 16172 }, { "epoch": 0.4956785582934903, "grad_norm": 1.8345278738951796, "learning_rate": 5.312724148744022e-06, "loss": 0.6942, "step": 16173 }, { "epoch": 0.4957092068162315, "grad_norm": 1.560907074380846, "learning_rate": 5.312228801078867e-06, "loss": 0.6287, "step": 16174 }, { "epoch": 0.49573985533897263, "grad_norm": 1.7392768381872878, "learning_rate": 5.311733450337242e-06, "loss": 0.7522, "step": 16175 }, { "epoch": 0.49577050386171384, "grad_norm": 1.5029904089714354, "learning_rate": 5.311238096524027e-06, "loss": 0.6901, "step": 16176 }, { "epoch": 0.49580115238445505, "grad_norm": 1.5447338638097288, "learning_rate": 5.310742739644106e-06, "loss": 0.6415, "step": 16177 }, { "epoch": 0.49583180090719625, "grad_norm": 1.695844799278974, "learning_rate": 5.310247379702356e-06, "loss": 0.5341, "step": 16178 }, { "epoch": 0.49586244942993746, "grad_norm": 1.769239966859743, "learning_rate": 5.309752016703657e-06, "loss": 0.6164, "step": 16179 }, { "epoch": 0.49589309795267866, "grad_norm": 1.7504371953086837, "learning_rate": 5.309256650652894e-06, "loss": 0.6406, "step": 16180 }, { "epoch": 0.49592374647541987, "grad_norm": 1.6429172009580546, "learning_rate": 5.3087612815549476e-06, "loss": 0.6942, "step": 16181 }, { "epoch": 0.4959543949981611, "grad_norm": 1.6822166067989301, "learning_rate": 5.308265909414694e-06, "loss": 0.6394, "step": 16182 }, { "epoch": 0.4959850435209023, "grad_norm": 1.8553854800087966, "learning_rate": 5.3077705342370204e-06, "loss": 0.7206, "step": 16183 }, { "epoch": 0.4960156920436435, "grad_norm": 1.6674787367669293, "learning_rate": 5.307275156026804e-06, "loss": 0.6675, "step": 16184 }, { "epoch": 0.4960463405663847, "grad_norm": 1.8334103009925462, "learning_rate": 5.306779774788926e-06, "loss": 0.6957, "step": 16185 }, { "epoch": 0.4960769890891259, "grad_norm": 0.8119664043884982, "learning_rate": 5.306284390528269e-06, "loss": 0.4395, "step": 16186 }, { "epoch": 0.4961076376118671, "grad_norm": 1.6640080831687534, "learning_rate": 5.305789003249714e-06, "loss": 0.654, "step": 16187 }, { "epoch": 0.4961382861346083, "grad_norm": 1.8682878902423141, "learning_rate": 5.305293612958141e-06, "loss": 0.7101, "step": 16188 }, { "epoch": 0.4961689346573495, "grad_norm": 1.7571001029791942, "learning_rate": 5.304798219658433e-06, "loss": 0.6778, "step": 16189 }, { "epoch": 0.4961995831800907, "grad_norm": 1.831878486024662, "learning_rate": 5.304302823355468e-06, "loss": 0.7178, "step": 16190 }, { "epoch": 0.4962302317028319, "grad_norm": 1.818354053696709, "learning_rate": 5.303807424054131e-06, "loss": 0.6431, "step": 16191 }, { "epoch": 0.49626088022557313, "grad_norm": 2.005465493661631, "learning_rate": 5.303312021759302e-06, "loss": 0.7799, "step": 16192 }, { "epoch": 0.49629152874831434, "grad_norm": 1.7675363482898265, "learning_rate": 5.302816616475861e-06, "loss": 0.604, "step": 16193 }, { "epoch": 0.49632217727105554, "grad_norm": 1.716426175801718, "learning_rate": 5.302321208208692e-06, "loss": 0.6508, "step": 16194 }, { "epoch": 0.49635282579379675, "grad_norm": 0.7972839268053123, "learning_rate": 5.301825796962672e-06, "loss": 0.4653, "step": 16195 }, { "epoch": 0.49638347431653795, "grad_norm": 1.6739983135889211, "learning_rate": 5.3013303827426885e-06, "loss": 0.651, "step": 16196 }, { "epoch": 0.49641412283927916, "grad_norm": 0.7680385314093521, "learning_rate": 5.300834965553617e-06, "loss": 0.4339, "step": 16197 }, { "epoch": 0.49644477136202037, "grad_norm": 0.7915307905164682, "learning_rate": 5.300339545400344e-06, "loss": 0.4438, "step": 16198 }, { "epoch": 0.49647541988476157, "grad_norm": 1.7703923123806267, "learning_rate": 5.2998441222877475e-06, "loss": 0.718, "step": 16199 }, { "epoch": 0.4965060684075028, "grad_norm": 0.7690494188980301, "learning_rate": 5.2993486962207095e-06, "loss": 0.4615, "step": 16200 }, { "epoch": 0.496536716930244, "grad_norm": 1.7950803477735118, "learning_rate": 5.298853267204111e-06, "loss": 0.7, "step": 16201 }, { "epoch": 0.4965673654529852, "grad_norm": 1.840365279950168, "learning_rate": 5.298357835242838e-06, "loss": 0.6415, "step": 16202 }, { "epoch": 0.4965980139757264, "grad_norm": 1.4608339818560758, "learning_rate": 5.297862400341768e-06, "loss": 0.5663, "step": 16203 }, { "epoch": 0.4966286624984676, "grad_norm": 1.4069978828142073, "learning_rate": 5.2973669625057825e-06, "loss": 0.5058, "step": 16204 }, { "epoch": 0.4966593110212088, "grad_norm": 1.9520320704779057, "learning_rate": 5.296871521739763e-06, "loss": 0.646, "step": 16205 }, { "epoch": 0.49668995954394995, "grad_norm": 1.6122230248968379, "learning_rate": 5.296376078048595e-06, "loss": 0.5713, "step": 16206 }, { "epoch": 0.49672060806669116, "grad_norm": 1.8517604359538629, "learning_rate": 5.295880631437158e-06, "loss": 0.6668, "step": 16207 }, { "epoch": 0.49675125658943237, "grad_norm": 1.796154948994176, "learning_rate": 5.2953851819103305e-06, "loss": 0.6152, "step": 16208 }, { "epoch": 0.49678190511217357, "grad_norm": 1.5440177565700564, "learning_rate": 5.2948897294729995e-06, "loss": 0.6492, "step": 16209 }, { "epoch": 0.4968125536349148, "grad_norm": 1.9489949259313937, "learning_rate": 5.294394274130044e-06, "loss": 0.6266, "step": 16210 }, { "epoch": 0.496843202157656, "grad_norm": 1.7831261087492747, "learning_rate": 5.293898815886347e-06, "loss": 0.6687, "step": 16211 }, { "epoch": 0.4968738506803972, "grad_norm": 1.8334078195838506, "learning_rate": 5.293403354746789e-06, "loss": 0.6943, "step": 16212 }, { "epoch": 0.4969044992031384, "grad_norm": 2.123253820739389, "learning_rate": 5.292907890716252e-06, "loss": 0.6984, "step": 16213 }, { "epoch": 0.4969351477258796, "grad_norm": 1.9978680966328148, "learning_rate": 5.292412423799619e-06, "loss": 0.7318, "step": 16214 }, { "epoch": 0.4969657962486208, "grad_norm": 0.919874092658123, "learning_rate": 5.291916954001773e-06, "loss": 0.4447, "step": 16215 }, { "epoch": 0.496996444771362, "grad_norm": 1.812503962233617, "learning_rate": 5.2914214813275935e-06, "loss": 0.7352, "step": 16216 }, { "epoch": 0.4970270932941032, "grad_norm": 0.8717694223571926, "learning_rate": 5.290926005781964e-06, "loss": 0.4255, "step": 16217 }, { "epoch": 0.4970577418168444, "grad_norm": 1.7891434508945134, "learning_rate": 5.290430527369764e-06, "loss": 0.6048, "step": 16218 }, { "epoch": 0.49708839033958563, "grad_norm": 1.7905600502992196, "learning_rate": 5.28993504609588e-06, "loss": 0.703, "step": 16219 }, { "epoch": 0.49711903886232683, "grad_norm": 1.793874606883862, "learning_rate": 5.289439561965192e-06, "loss": 0.6611, "step": 16220 }, { "epoch": 0.49714968738506804, "grad_norm": 1.6234893246014805, "learning_rate": 5.28894407498258e-06, "loss": 0.6318, "step": 16221 }, { "epoch": 0.49718033590780925, "grad_norm": 1.580442383981767, "learning_rate": 5.28844858515293e-06, "loss": 0.6184, "step": 16222 }, { "epoch": 0.49721098443055045, "grad_norm": 0.8350527760856444, "learning_rate": 5.287953092481122e-06, "loss": 0.4357, "step": 16223 }, { "epoch": 0.49724163295329166, "grad_norm": 1.700320671032362, "learning_rate": 5.287457596972039e-06, "loss": 0.6289, "step": 16224 }, { "epoch": 0.49727228147603286, "grad_norm": 1.769881324416044, "learning_rate": 5.28696209863056e-06, "loss": 0.6022, "step": 16225 }, { "epoch": 0.49730292999877407, "grad_norm": 1.788719610764909, "learning_rate": 5.286466597461574e-06, "loss": 0.6665, "step": 16226 }, { "epoch": 0.4973335785215153, "grad_norm": 1.824438730953216, "learning_rate": 5.285971093469956e-06, "loss": 0.612, "step": 16227 }, { "epoch": 0.4973642270442565, "grad_norm": 0.8097451778139932, "learning_rate": 5.285475586660593e-06, "loss": 0.413, "step": 16228 }, { "epoch": 0.4973948755669977, "grad_norm": 1.7968016562304905, "learning_rate": 5.284980077038365e-06, "loss": 0.5431, "step": 16229 }, { "epoch": 0.4974255240897389, "grad_norm": 2.002233256316254, "learning_rate": 5.284484564608158e-06, "loss": 0.6203, "step": 16230 }, { "epoch": 0.4974561726124801, "grad_norm": 1.7073010477759207, "learning_rate": 5.2839890493748495e-06, "loss": 0.6811, "step": 16231 }, { "epoch": 0.4974868211352213, "grad_norm": 1.6150762709998148, "learning_rate": 5.283493531343324e-06, "loss": 0.5472, "step": 16232 }, { "epoch": 0.4975174696579625, "grad_norm": 1.5958402866539139, "learning_rate": 5.282998010518465e-06, "loss": 0.6107, "step": 16233 }, { "epoch": 0.4975481181807037, "grad_norm": 1.6903615160656917, "learning_rate": 5.282502486905154e-06, "loss": 0.6662, "step": 16234 }, { "epoch": 0.4975787667034449, "grad_norm": 1.767014846876855, "learning_rate": 5.282006960508275e-06, "loss": 0.7521, "step": 16235 }, { "epoch": 0.4976094152261861, "grad_norm": 0.8454458262549763, "learning_rate": 5.281511431332707e-06, "loss": 0.422, "step": 16236 }, { "epoch": 0.4976400637489273, "grad_norm": 1.9094308214094078, "learning_rate": 5.281015899383336e-06, "loss": 0.7358, "step": 16237 }, { "epoch": 0.4976707122716685, "grad_norm": 1.6448684113136975, "learning_rate": 5.280520364665044e-06, "loss": 0.6635, "step": 16238 }, { "epoch": 0.4977013607944097, "grad_norm": 1.7222719072113675, "learning_rate": 5.2800248271827124e-06, "loss": 0.6148, "step": 16239 }, { "epoch": 0.4977320093171509, "grad_norm": 1.8100061231442117, "learning_rate": 5.279529286941224e-06, "loss": 0.6559, "step": 16240 }, { "epoch": 0.4977626578398921, "grad_norm": 1.7384075191448602, "learning_rate": 5.279033743945463e-06, "loss": 0.6291, "step": 16241 }, { "epoch": 0.4977933063626333, "grad_norm": 1.720311553237497, "learning_rate": 5.27853819820031e-06, "loss": 0.6188, "step": 16242 }, { "epoch": 0.4978239548853745, "grad_norm": 1.8045505104989654, "learning_rate": 5.278042649710651e-06, "loss": 0.6069, "step": 16243 }, { "epoch": 0.4978546034081157, "grad_norm": 1.6628289077077685, "learning_rate": 5.277547098481364e-06, "loss": 0.6235, "step": 16244 }, { "epoch": 0.4978852519308569, "grad_norm": 1.6206118828382994, "learning_rate": 5.277051544517337e-06, "loss": 0.5787, "step": 16245 }, { "epoch": 0.4979159004535981, "grad_norm": 1.837455715115055, "learning_rate": 5.276555987823448e-06, "loss": 0.6938, "step": 16246 }, { "epoch": 0.49794654897633933, "grad_norm": 1.6948273196895736, "learning_rate": 5.276060428404582e-06, "loss": 0.6434, "step": 16247 }, { "epoch": 0.49797719749908054, "grad_norm": 1.7000129518177407, "learning_rate": 5.275564866265624e-06, "loss": 0.6892, "step": 16248 }, { "epoch": 0.49800784602182174, "grad_norm": 1.77846245291664, "learning_rate": 5.275069301411454e-06, "loss": 0.6229, "step": 16249 }, { "epoch": 0.49803849454456295, "grad_norm": 1.6688412467240554, "learning_rate": 5.274573733846956e-06, "loss": 0.6232, "step": 16250 }, { "epoch": 0.49806914306730415, "grad_norm": 0.7980025072903154, "learning_rate": 5.274078163577011e-06, "loss": 0.42, "step": 16251 }, { "epoch": 0.49809979159004536, "grad_norm": 1.521336437218647, "learning_rate": 5.2735825906065065e-06, "loss": 0.6144, "step": 16252 }, { "epoch": 0.49813044011278657, "grad_norm": 1.7716418736111956, "learning_rate": 5.273087014940321e-06, "loss": 0.6745, "step": 16253 }, { "epoch": 0.49816108863552777, "grad_norm": 0.7905851258965905, "learning_rate": 5.27259143658334e-06, "loss": 0.4363, "step": 16254 }, { "epoch": 0.498191737158269, "grad_norm": 1.5557504153925894, "learning_rate": 5.272095855540444e-06, "loss": 0.6408, "step": 16255 }, { "epoch": 0.4982223856810102, "grad_norm": 1.7963213155596618, "learning_rate": 5.271600271816521e-06, "loss": 0.5913, "step": 16256 }, { "epoch": 0.4982530342037514, "grad_norm": 0.7790407052117992, "learning_rate": 5.271104685416449e-06, "loss": 0.4346, "step": 16257 }, { "epoch": 0.4982836827264926, "grad_norm": 1.6110818940469427, "learning_rate": 5.270609096345114e-06, "loss": 0.6725, "step": 16258 }, { "epoch": 0.4983143312492338, "grad_norm": 1.8265561375613275, "learning_rate": 5.270113504607397e-06, "loss": 0.6801, "step": 16259 }, { "epoch": 0.498344979771975, "grad_norm": 1.6646586729173434, "learning_rate": 5.269617910208183e-06, "loss": 0.6108, "step": 16260 }, { "epoch": 0.4983756282947162, "grad_norm": 1.686067223342425, "learning_rate": 5.269122313152356e-06, "loss": 0.6495, "step": 16261 }, { "epoch": 0.4984062768174574, "grad_norm": 1.7748592639676952, "learning_rate": 5.268626713444797e-06, "loss": 0.6169, "step": 16262 }, { "epoch": 0.4984369253401986, "grad_norm": 1.6198817000523027, "learning_rate": 5.26813111109039e-06, "loss": 0.5902, "step": 16263 }, { "epoch": 0.49846757386293983, "grad_norm": 0.7860097356646888, "learning_rate": 5.267635506094019e-06, "loss": 0.4214, "step": 16264 }, { "epoch": 0.49849822238568103, "grad_norm": 0.8191357742909876, "learning_rate": 5.267139898460568e-06, "loss": 0.4262, "step": 16265 }, { "epoch": 0.49852887090842224, "grad_norm": 1.908414770195076, "learning_rate": 5.266644288194918e-06, "loss": 0.7291, "step": 16266 }, { "epoch": 0.49855951943116344, "grad_norm": 1.76886958000415, "learning_rate": 5.266148675301953e-06, "loss": 0.6264, "step": 16267 }, { "epoch": 0.4985901679539046, "grad_norm": 0.7627568303602125, "learning_rate": 5.265653059786558e-06, "loss": 0.4263, "step": 16268 }, { "epoch": 0.4986208164766458, "grad_norm": 1.6841948377035079, "learning_rate": 5.265157441653616e-06, "loss": 0.7191, "step": 16269 }, { "epoch": 0.498651464999387, "grad_norm": 1.5603145520628245, "learning_rate": 5.264661820908008e-06, "loss": 0.6346, "step": 16270 }, { "epoch": 0.4986821135221282, "grad_norm": 1.840343568434805, "learning_rate": 5.264166197554621e-06, "loss": 0.6464, "step": 16271 }, { "epoch": 0.4987127620448694, "grad_norm": 1.859546966377986, "learning_rate": 5.263670571598335e-06, "loss": 0.5968, "step": 16272 }, { "epoch": 0.4987434105676106, "grad_norm": 1.7147258379655788, "learning_rate": 5.263174943044037e-06, "loss": 0.6938, "step": 16273 }, { "epoch": 0.49877405909035183, "grad_norm": 1.621776423814195, "learning_rate": 5.262679311896609e-06, "loss": 0.5585, "step": 16274 }, { "epoch": 0.49880470761309303, "grad_norm": 1.6731265003059677, "learning_rate": 5.262183678160935e-06, "loss": 0.6914, "step": 16275 }, { "epoch": 0.49883535613583424, "grad_norm": 1.703191286198811, "learning_rate": 5.261688041841897e-06, "loss": 0.5998, "step": 16276 }, { "epoch": 0.49886600465857545, "grad_norm": 0.8639307050473861, "learning_rate": 5.26119240294438e-06, "loss": 0.4623, "step": 16277 }, { "epoch": 0.49889665318131665, "grad_norm": 1.6972000764700064, "learning_rate": 5.260696761473268e-06, "loss": 0.6294, "step": 16278 }, { "epoch": 0.49892730170405786, "grad_norm": 1.5310949750416512, "learning_rate": 5.260201117433441e-06, "loss": 0.515, "step": 16279 }, { "epoch": 0.49895795022679906, "grad_norm": 1.4901175947335066, "learning_rate": 5.25970547082979e-06, "loss": 0.6177, "step": 16280 }, { "epoch": 0.49898859874954027, "grad_norm": 1.8768603831651067, "learning_rate": 5.259209821667193e-06, "loss": 0.5895, "step": 16281 }, { "epoch": 0.4990192472722815, "grad_norm": 1.8674696908807167, "learning_rate": 5.2587141699505355e-06, "loss": 0.7497, "step": 16282 }, { "epoch": 0.4990498957950227, "grad_norm": 1.7737205111573622, "learning_rate": 5.2582185156847e-06, "loss": 0.5776, "step": 16283 }, { "epoch": 0.4990805443177639, "grad_norm": 1.7946524073136, "learning_rate": 5.2577228588745736e-06, "loss": 0.6157, "step": 16284 }, { "epoch": 0.4991111928405051, "grad_norm": 1.813336946980399, "learning_rate": 5.257227199525035e-06, "loss": 0.699, "step": 16285 }, { "epoch": 0.4991418413632463, "grad_norm": 1.7007717789448482, "learning_rate": 5.256731537640973e-06, "loss": 0.649, "step": 16286 }, { "epoch": 0.4991724898859875, "grad_norm": 1.612380028412785, "learning_rate": 5.256235873227268e-06, "loss": 0.5839, "step": 16287 }, { "epoch": 0.4992031384087287, "grad_norm": 1.7615788705834585, "learning_rate": 5.255740206288808e-06, "loss": 0.6956, "step": 16288 }, { "epoch": 0.4992337869314699, "grad_norm": 1.9208165932561232, "learning_rate": 5.255244536830472e-06, "loss": 0.6514, "step": 16289 }, { "epoch": 0.4992644354542111, "grad_norm": 1.7206759729800838, "learning_rate": 5.254748864857147e-06, "loss": 0.6156, "step": 16290 }, { "epoch": 0.4992950839769523, "grad_norm": 1.7918312037134048, "learning_rate": 5.254253190373716e-06, "loss": 0.6189, "step": 16291 }, { "epoch": 0.49932573249969353, "grad_norm": 3.75120288104131, "learning_rate": 5.253757513385064e-06, "loss": 0.6748, "step": 16292 }, { "epoch": 0.49935638102243474, "grad_norm": 1.8889172895671205, "learning_rate": 5.253261833896074e-06, "loss": 0.6507, "step": 16293 }, { "epoch": 0.49938702954517594, "grad_norm": 0.8121156518107633, "learning_rate": 5.252766151911629e-06, "loss": 0.4404, "step": 16294 }, { "epoch": 0.49941767806791715, "grad_norm": 1.799940445593437, "learning_rate": 5.252270467436615e-06, "loss": 0.705, "step": 16295 }, { "epoch": 0.49944832659065835, "grad_norm": 1.5711672431608186, "learning_rate": 5.251774780475916e-06, "loss": 0.5993, "step": 16296 }, { "epoch": 0.49947897511339956, "grad_norm": 1.824715967088055, "learning_rate": 5.251279091034417e-06, "loss": 0.6737, "step": 16297 }, { "epoch": 0.49950962363614076, "grad_norm": 1.6604012975519888, "learning_rate": 5.250783399116998e-06, "loss": 0.5614, "step": 16298 }, { "epoch": 0.4995402721588819, "grad_norm": 1.5899715253850586, "learning_rate": 5.25028770472855e-06, "loss": 0.6012, "step": 16299 }, { "epoch": 0.4995709206816231, "grad_norm": 1.6880487577160896, "learning_rate": 5.24979200787395e-06, "loss": 0.6059, "step": 16300 }, { "epoch": 0.4996015692043643, "grad_norm": 1.6857333305320914, "learning_rate": 5.249296308558086e-06, "loss": 0.6195, "step": 16301 }, { "epoch": 0.49963221772710553, "grad_norm": 1.6958205771900965, "learning_rate": 5.248800606785842e-06, "loss": 0.6395, "step": 16302 }, { "epoch": 0.49966286624984674, "grad_norm": 1.7037810207950388, "learning_rate": 5.2483049025621025e-06, "loss": 0.6467, "step": 16303 }, { "epoch": 0.49969351477258794, "grad_norm": 1.6769534437608078, "learning_rate": 5.24780919589175e-06, "loss": 0.5895, "step": 16304 }, { "epoch": 0.49972416329532915, "grad_norm": 1.8124097444123612, "learning_rate": 5.247313486779671e-06, "loss": 0.5557, "step": 16305 }, { "epoch": 0.49975481181807035, "grad_norm": 1.843210857369111, "learning_rate": 5.246817775230748e-06, "loss": 0.6138, "step": 16306 }, { "epoch": 0.49978546034081156, "grad_norm": 1.6187795158968563, "learning_rate": 5.2463220612498675e-06, "loss": 0.649, "step": 16307 }, { "epoch": 0.49981610886355277, "grad_norm": 1.6126184957535543, "learning_rate": 5.245826344841912e-06, "loss": 0.6016, "step": 16308 }, { "epoch": 0.49984675738629397, "grad_norm": 2.0045313036592507, "learning_rate": 5.2453306260117665e-06, "loss": 0.7202, "step": 16309 }, { "epoch": 0.4998774059090352, "grad_norm": 1.5120911188937776, "learning_rate": 5.2448349047643165e-06, "loss": 0.5785, "step": 16310 }, { "epoch": 0.4999080544317764, "grad_norm": 1.9371628488922454, "learning_rate": 5.244339181104446e-06, "loss": 0.6586, "step": 16311 }, { "epoch": 0.4999387029545176, "grad_norm": 1.6392489978148133, "learning_rate": 5.243843455037038e-06, "loss": 0.5062, "step": 16312 }, { "epoch": 0.4999693514772588, "grad_norm": 1.7071353056238667, "learning_rate": 5.243347726566977e-06, "loss": 0.6652, "step": 16313 }, { "epoch": 0.5, "grad_norm": 1.7085496659286128, "learning_rate": 5.242851995699149e-06, "loss": 0.6943, "step": 16314 }, { "epoch": 0.5000306485227412, "grad_norm": 1.4232604348469646, "learning_rate": 5.2423562624384394e-06, "loss": 0.5121, "step": 16315 }, { "epoch": 0.5000612970454824, "grad_norm": 0.8034202790709951, "learning_rate": 5.24186052678973e-06, "loss": 0.4203, "step": 16316 }, { "epoch": 0.5000919455682236, "grad_norm": 0.7712192049248275, "learning_rate": 5.241364788757907e-06, "loss": 0.4398, "step": 16317 }, { "epoch": 0.5001225940909648, "grad_norm": 1.7398843801418735, "learning_rate": 5.240869048347857e-06, "loss": 0.8038, "step": 16318 }, { "epoch": 0.500153242613706, "grad_norm": 0.8007505981024847, "learning_rate": 5.240373305564463e-06, "loss": 0.4529, "step": 16319 }, { "epoch": 0.5001838911364472, "grad_norm": 0.8187166920887079, "learning_rate": 5.239877560412606e-06, "loss": 0.4558, "step": 16320 }, { "epoch": 0.5002145396591884, "grad_norm": 1.7358015678308318, "learning_rate": 5.239381812897176e-06, "loss": 0.626, "step": 16321 }, { "epoch": 0.5002451881819296, "grad_norm": 1.8374229936663662, "learning_rate": 5.238886063023055e-06, "loss": 0.6259, "step": 16322 }, { "epoch": 0.5002758367046708, "grad_norm": 0.7929078909328924, "learning_rate": 5.2383903107951305e-06, "loss": 0.44, "step": 16323 }, { "epoch": 0.5003064852274121, "grad_norm": 1.8592195605745958, "learning_rate": 5.2378945562182825e-06, "loss": 0.6532, "step": 16324 }, { "epoch": 0.5003371337501532, "grad_norm": 1.7577750151745173, "learning_rate": 5.2373987992974005e-06, "loss": 0.5914, "step": 16325 }, { "epoch": 0.5003677822728945, "grad_norm": 0.8121926722166212, "learning_rate": 5.236903040037366e-06, "loss": 0.4404, "step": 16326 }, { "epoch": 0.5003984307956356, "grad_norm": 0.7357677652080362, "learning_rate": 5.236407278443068e-06, "loss": 0.4284, "step": 16327 }, { "epoch": 0.5004290793183769, "grad_norm": 2.185834114512189, "learning_rate": 5.235911514519385e-06, "loss": 0.6035, "step": 16328 }, { "epoch": 0.500459727841118, "grad_norm": 1.968677390757535, "learning_rate": 5.235415748271208e-06, "loss": 0.6599, "step": 16329 }, { "epoch": 0.5004903763638593, "grad_norm": 1.7463360558518084, "learning_rate": 5.234919979703419e-06, "loss": 0.6499, "step": 16330 }, { "epoch": 0.5005210248866004, "grad_norm": 1.6938498870460819, "learning_rate": 5.234424208820902e-06, "loss": 0.6597, "step": 16331 }, { "epoch": 0.5005516734093417, "grad_norm": 1.625344724512027, "learning_rate": 5.233928435628543e-06, "loss": 0.5517, "step": 16332 }, { "epoch": 0.5005823219320829, "grad_norm": 1.8206913524669353, "learning_rate": 5.233432660131228e-06, "loss": 0.7007, "step": 16333 }, { "epoch": 0.5006129704548241, "grad_norm": 0.8636490727381161, "learning_rate": 5.232936882333844e-06, "loss": 0.4532, "step": 16334 }, { "epoch": 0.5006436189775653, "grad_norm": 1.9464114763207305, "learning_rate": 5.232441102241269e-06, "loss": 0.6595, "step": 16335 }, { "epoch": 0.5006742675003065, "grad_norm": 1.7446098239383134, "learning_rate": 5.231945319858395e-06, "loss": 0.6882, "step": 16336 }, { "epoch": 0.5007049160230477, "grad_norm": 1.6209058553054676, "learning_rate": 5.231449535190103e-06, "loss": 0.5224, "step": 16337 }, { "epoch": 0.5007355645457889, "grad_norm": 1.613797266678913, "learning_rate": 5.230953748241282e-06, "loss": 0.6035, "step": 16338 }, { "epoch": 0.5007662130685301, "grad_norm": 2.0417044462908356, "learning_rate": 5.230457959016812e-06, "loss": 0.724, "step": 16339 }, { "epoch": 0.5007968615912713, "grad_norm": 1.699747156963151, "learning_rate": 5.229962167521582e-06, "loss": 0.645, "step": 16340 }, { "epoch": 0.5008275101140125, "grad_norm": 0.7916076353534405, "learning_rate": 5.229466373760474e-06, "loss": 0.4343, "step": 16341 }, { "epoch": 0.5008581586367538, "grad_norm": 1.713436246116799, "learning_rate": 5.228970577738377e-06, "loss": 0.6589, "step": 16342 }, { "epoch": 0.5008888071594949, "grad_norm": 1.8107658326149483, "learning_rate": 5.2284747794601745e-06, "loss": 0.6173, "step": 16343 }, { "epoch": 0.5009194556822362, "grad_norm": 1.8952091913890687, "learning_rate": 5.2279789789307515e-06, "loss": 0.5981, "step": 16344 }, { "epoch": 0.5009501042049773, "grad_norm": 0.769785475040978, "learning_rate": 5.227483176154991e-06, "loss": 0.4494, "step": 16345 }, { "epoch": 0.5009807527277185, "grad_norm": 1.830190590187198, "learning_rate": 5.226987371137784e-06, "loss": 0.6045, "step": 16346 }, { "epoch": 0.5010114012504597, "grad_norm": 1.6993455690968213, "learning_rate": 5.226491563884011e-06, "loss": 0.517, "step": 16347 }, { "epoch": 0.5010420497732009, "grad_norm": 1.8241267647276642, "learning_rate": 5.225995754398557e-06, "loss": 0.6577, "step": 16348 }, { "epoch": 0.5010726982959421, "grad_norm": 1.8443171506669793, "learning_rate": 5.22549994268631e-06, "loss": 0.8224, "step": 16349 }, { "epoch": 0.5011033468186833, "grad_norm": 0.8046175597249243, "learning_rate": 5.225004128752156e-06, "loss": 0.4213, "step": 16350 }, { "epoch": 0.5011339953414246, "grad_norm": 1.7521672300941715, "learning_rate": 5.224508312600978e-06, "loss": 0.5898, "step": 16351 }, { "epoch": 0.5011646438641657, "grad_norm": 1.7794519363756232, "learning_rate": 5.224012494237661e-06, "loss": 0.6366, "step": 16352 }, { "epoch": 0.501195292386907, "grad_norm": 1.7394510358593496, "learning_rate": 5.2235166736670925e-06, "loss": 0.6632, "step": 16353 }, { "epoch": 0.5012259409096481, "grad_norm": 1.6803937730349394, "learning_rate": 5.2230208508941575e-06, "loss": 0.61, "step": 16354 }, { "epoch": 0.5012565894323894, "grad_norm": 1.9458492904751878, "learning_rate": 5.22252502592374e-06, "loss": 0.6297, "step": 16355 }, { "epoch": 0.5012872379551305, "grad_norm": 1.4628792149300358, "learning_rate": 5.222029198760725e-06, "loss": 0.6371, "step": 16356 }, { "epoch": 0.5013178864778718, "grad_norm": 1.6957565882799441, "learning_rate": 5.221533369410002e-06, "loss": 0.6267, "step": 16357 }, { "epoch": 0.5013485350006129, "grad_norm": 1.6303688467977027, "learning_rate": 5.221037537876454e-06, "loss": 0.6511, "step": 16358 }, { "epoch": 0.5013791835233542, "grad_norm": 1.5959910067231247, "learning_rate": 5.2205417041649655e-06, "loss": 0.6516, "step": 16359 }, { "epoch": 0.5014098320460953, "grad_norm": 1.816700526335924, "learning_rate": 5.220045868280424e-06, "loss": 0.658, "step": 16360 }, { "epoch": 0.5014404805688366, "grad_norm": 1.824679980470203, "learning_rate": 5.219550030227714e-06, "loss": 0.6632, "step": 16361 }, { "epoch": 0.5014711290915778, "grad_norm": 0.8472591312979034, "learning_rate": 5.219054190011721e-06, "loss": 0.4346, "step": 16362 }, { "epoch": 0.501501777614319, "grad_norm": 1.7141144040859, "learning_rate": 5.2185583476373306e-06, "loss": 0.5925, "step": 16363 }, { "epoch": 0.5015324261370602, "grad_norm": 1.687023972751839, "learning_rate": 5.218062503109429e-06, "loss": 0.6641, "step": 16364 }, { "epoch": 0.5015630746598014, "grad_norm": 1.833332475335243, "learning_rate": 5.217566656432903e-06, "loss": 0.6644, "step": 16365 }, { "epoch": 0.5015937231825426, "grad_norm": 1.7227173312105346, "learning_rate": 5.217070807612636e-06, "loss": 0.5759, "step": 16366 }, { "epoch": 0.5016243717052838, "grad_norm": 1.81961316429812, "learning_rate": 5.216574956653515e-06, "loss": 0.6126, "step": 16367 }, { "epoch": 0.501655020228025, "grad_norm": 1.833671521069984, "learning_rate": 5.216079103560425e-06, "loss": 0.5978, "step": 16368 }, { "epoch": 0.5016856687507663, "grad_norm": 1.6595428438714865, "learning_rate": 5.215583248338254e-06, "loss": 0.6174, "step": 16369 }, { "epoch": 0.5017163172735074, "grad_norm": 1.8215476850085126, "learning_rate": 5.215087390991885e-06, "loss": 0.7372, "step": 16370 }, { "epoch": 0.5017469657962487, "grad_norm": 1.8834038316734116, "learning_rate": 5.214591531526204e-06, "loss": 0.6347, "step": 16371 }, { "epoch": 0.5017776143189898, "grad_norm": 1.70171923400506, "learning_rate": 5.2140956699460986e-06, "loss": 0.653, "step": 16372 }, { "epoch": 0.5018082628417311, "grad_norm": 1.8131182977295943, "learning_rate": 5.213599806256455e-06, "loss": 0.6385, "step": 16373 }, { "epoch": 0.5018389113644722, "grad_norm": 1.8445262151564548, "learning_rate": 5.213103940462155e-06, "loss": 0.7024, "step": 16374 }, { "epoch": 0.5018695598872135, "grad_norm": 1.5404185292782906, "learning_rate": 5.212608072568089e-06, "loss": 0.6583, "step": 16375 }, { "epoch": 0.5019002084099546, "grad_norm": 0.8659058955799628, "learning_rate": 5.2121122025791415e-06, "loss": 0.4379, "step": 16376 }, { "epoch": 0.5019308569326958, "grad_norm": 1.9665325580161315, "learning_rate": 5.2116163305002e-06, "loss": 0.585, "step": 16377 }, { "epoch": 0.501961505455437, "grad_norm": 1.699035975494023, "learning_rate": 5.211120456336145e-06, "loss": 0.6566, "step": 16378 }, { "epoch": 0.5019921539781782, "grad_norm": 1.747303270034652, "learning_rate": 5.210624580091869e-06, "loss": 0.6185, "step": 16379 }, { "epoch": 0.5020228025009195, "grad_norm": 1.8880806024529684, "learning_rate": 5.210128701772254e-06, "loss": 0.6059, "step": 16380 }, { "epoch": 0.5020534510236606, "grad_norm": 0.7524720484572776, "learning_rate": 5.209632821382187e-06, "loss": 0.4345, "step": 16381 }, { "epoch": 0.5020840995464019, "grad_norm": 1.6260139446263293, "learning_rate": 5.209136938926553e-06, "loss": 0.7231, "step": 16382 }, { "epoch": 0.502114748069143, "grad_norm": 1.7830771813515192, "learning_rate": 5.2086410544102405e-06, "loss": 0.6429, "step": 16383 }, { "epoch": 0.5021453965918843, "grad_norm": 1.6420524639987784, "learning_rate": 5.208145167838134e-06, "loss": 0.6396, "step": 16384 }, { "epoch": 0.5021760451146254, "grad_norm": 1.7723835195834188, "learning_rate": 5.20764927921512e-06, "loss": 0.6103, "step": 16385 }, { "epoch": 0.5022066936373667, "grad_norm": 0.7892216086320889, "learning_rate": 5.207153388546085e-06, "loss": 0.4468, "step": 16386 }, { "epoch": 0.5022373421601078, "grad_norm": 1.6889049752006338, "learning_rate": 5.206657495835914e-06, "loss": 0.6158, "step": 16387 }, { "epoch": 0.5022679906828491, "grad_norm": 0.8267107630666807, "learning_rate": 5.206161601089495e-06, "loss": 0.4456, "step": 16388 }, { "epoch": 0.5022986392055903, "grad_norm": 1.6490568154923917, "learning_rate": 5.2056657043117124e-06, "loss": 0.6303, "step": 16389 }, { "epoch": 0.5023292877283315, "grad_norm": 1.6611740002475412, "learning_rate": 5.2051698055074526e-06, "loss": 0.6517, "step": 16390 }, { "epoch": 0.5023599362510727, "grad_norm": 1.5905615473700974, "learning_rate": 5.204673904681601e-06, "loss": 0.5918, "step": 16391 }, { "epoch": 0.5023905847738139, "grad_norm": 1.7401483363759334, "learning_rate": 5.204178001839049e-06, "loss": 0.6542, "step": 16392 }, { "epoch": 0.5024212332965551, "grad_norm": 1.7309457650908167, "learning_rate": 5.203682096984674e-06, "loss": 0.6441, "step": 16393 }, { "epoch": 0.5024518818192963, "grad_norm": 1.647256329175675, "learning_rate": 5.203186190123371e-06, "loss": 0.5689, "step": 16394 }, { "epoch": 0.5024825303420375, "grad_norm": 1.7766902691821342, "learning_rate": 5.20269028126002e-06, "loss": 0.7493, "step": 16395 }, { "epoch": 0.5025131788647788, "grad_norm": 0.800668979858291, "learning_rate": 5.202194370399511e-06, "loss": 0.4176, "step": 16396 }, { "epoch": 0.5025438273875199, "grad_norm": 1.7030691888110956, "learning_rate": 5.201698457546729e-06, "loss": 0.7288, "step": 16397 }, { "epoch": 0.5025744759102612, "grad_norm": 1.7868735328712786, "learning_rate": 5.2012025427065606e-06, "loss": 0.7233, "step": 16398 }, { "epoch": 0.5026051244330023, "grad_norm": 0.7977131654290214, "learning_rate": 5.200706625883891e-06, "loss": 0.4355, "step": 16399 }, { "epoch": 0.5026357729557436, "grad_norm": 0.787530818590338, "learning_rate": 5.2002107070836095e-06, "loss": 0.4279, "step": 16400 }, { "epoch": 0.5026664214784847, "grad_norm": 1.930972509686876, "learning_rate": 5.199714786310599e-06, "loss": 0.6357, "step": 16401 }, { "epoch": 0.502697070001226, "grad_norm": 0.8255140472709614, "learning_rate": 5.199218863569748e-06, "loss": 0.4229, "step": 16402 }, { "epoch": 0.5027277185239671, "grad_norm": 1.6794701153258933, "learning_rate": 5.198722938865944e-06, "loss": 0.6377, "step": 16403 }, { "epoch": 0.5027583670467084, "grad_norm": 1.6400125683556792, "learning_rate": 5.19822701220407e-06, "loss": 0.6586, "step": 16404 }, { "epoch": 0.5027890155694495, "grad_norm": 1.983444478888393, "learning_rate": 5.197731083589016e-06, "loss": 0.7031, "step": 16405 }, { "epoch": 0.5028196640921908, "grad_norm": 1.6521482577120876, "learning_rate": 5.197235153025666e-06, "loss": 0.6127, "step": 16406 }, { "epoch": 0.502850312614932, "grad_norm": 1.6124288008394059, "learning_rate": 5.1967392205189094e-06, "loss": 0.6463, "step": 16407 }, { "epoch": 0.5028809611376731, "grad_norm": 1.58039096942022, "learning_rate": 5.196243286073629e-06, "loss": 0.6802, "step": 16408 }, { "epoch": 0.5029116096604144, "grad_norm": 1.6194907007739434, "learning_rate": 5.195747349694714e-06, "loss": 0.5906, "step": 16409 }, { "epoch": 0.5029422581831555, "grad_norm": 1.9389613849782321, "learning_rate": 5.195251411387049e-06, "loss": 0.6762, "step": 16410 }, { "epoch": 0.5029729067058968, "grad_norm": 1.7690152689793615, "learning_rate": 5.1947554711555235e-06, "loss": 0.6773, "step": 16411 }, { "epoch": 0.5030035552286379, "grad_norm": 1.5593080065750222, "learning_rate": 5.1942595290050225e-06, "loss": 0.6313, "step": 16412 }, { "epoch": 0.5030342037513792, "grad_norm": 0.8512457984447679, "learning_rate": 5.193763584940431e-06, "loss": 0.4399, "step": 16413 }, { "epoch": 0.5030648522741203, "grad_norm": 1.9279154881714364, "learning_rate": 5.1932676389666395e-06, "loss": 0.6361, "step": 16414 }, { "epoch": 0.5030955007968616, "grad_norm": 1.8774186697265347, "learning_rate": 5.1927716910885314e-06, "loss": 0.723, "step": 16415 }, { "epoch": 0.5031261493196028, "grad_norm": 0.8087053711140475, "learning_rate": 5.192275741310995e-06, "loss": 0.4313, "step": 16416 }, { "epoch": 0.503156797842344, "grad_norm": 1.9502507527510404, "learning_rate": 5.1917797896389155e-06, "loss": 0.6677, "step": 16417 }, { "epoch": 0.5031874463650852, "grad_norm": 1.6090226935990428, "learning_rate": 5.191283836077181e-06, "loss": 0.6047, "step": 16418 }, { "epoch": 0.5032180948878264, "grad_norm": 0.7877908220975133, "learning_rate": 5.190787880630679e-06, "loss": 0.452, "step": 16419 }, { "epoch": 0.5032487434105676, "grad_norm": 0.790970469270689, "learning_rate": 5.190291923304295e-06, "loss": 0.4513, "step": 16420 }, { "epoch": 0.5032793919333088, "grad_norm": 0.7728051879143654, "learning_rate": 5.189795964102915e-06, "loss": 0.4541, "step": 16421 }, { "epoch": 0.50331004045605, "grad_norm": 1.6572003334527796, "learning_rate": 5.189300003031426e-06, "loss": 0.6064, "step": 16422 }, { "epoch": 0.5033406889787913, "grad_norm": 1.919210952821336, "learning_rate": 5.188804040094718e-06, "loss": 0.7982, "step": 16423 }, { "epoch": 0.5033713375015324, "grad_norm": 0.7657940120104079, "learning_rate": 5.188308075297674e-06, "loss": 0.4394, "step": 16424 }, { "epoch": 0.5034019860242737, "grad_norm": 1.7008733426553317, "learning_rate": 5.1878121086451824e-06, "loss": 0.7197, "step": 16425 }, { "epoch": 0.5034326345470148, "grad_norm": 1.5992639449604167, "learning_rate": 5.187316140142131e-06, "loss": 0.6781, "step": 16426 }, { "epoch": 0.5034632830697561, "grad_norm": 1.688827071153518, "learning_rate": 5.1868201697934054e-06, "loss": 0.5985, "step": 16427 }, { "epoch": 0.5034939315924972, "grad_norm": 0.7851086686139455, "learning_rate": 5.1863241976038915e-06, "loss": 0.4331, "step": 16428 }, { "epoch": 0.5035245801152385, "grad_norm": 1.561314192078714, "learning_rate": 5.185828223578479e-06, "loss": 0.6257, "step": 16429 }, { "epoch": 0.5035552286379796, "grad_norm": 0.7721637322230792, "learning_rate": 5.185332247722053e-06, "loss": 0.4286, "step": 16430 }, { "epoch": 0.5035858771607209, "grad_norm": 1.6504057400589631, "learning_rate": 5.184836270039503e-06, "loss": 0.5756, "step": 16431 }, { "epoch": 0.503616525683462, "grad_norm": 1.625534162062772, "learning_rate": 5.184340290535711e-06, "loss": 0.6172, "step": 16432 }, { "epoch": 0.5036471742062033, "grad_norm": 1.7106699371645537, "learning_rate": 5.183844309215567e-06, "loss": 0.6772, "step": 16433 }, { "epoch": 0.5036778227289445, "grad_norm": 0.8159812131705697, "learning_rate": 5.18334832608396e-06, "loss": 0.4702, "step": 16434 }, { "epoch": 0.5037084712516857, "grad_norm": 1.7912311245333268, "learning_rate": 5.182852341145774e-06, "loss": 0.6018, "step": 16435 }, { "epoch": 0.5037391197744269, "grad_norm": 1.843528858684504, "learning_rate": 5.182356354405896e-06, "loss": 0.6271, "step": 16436 }, { "epoch": 0.5037697682971681, "grad_norm": 1.733952764597927, "learning_rate": 5.1818603658692155e-06, "loss": 0.5423, "step": 16437 }, { "epoch": 0.5038004168199093, "grad_norm": 1.5686446874669024, "learning_rate": 5.18136437554062e-06, "loss": 0.6242, "step": 16438 }, { "epoch": 0.5038310653426504, "grad_norm": 1.7615272989767325, "learning_rate": 5.18086838342499e-06, "loss": 0.6448, "step": 16439 }, { "epoch": 0.5038617138653917, "grad_norm": 1.8145377991078273, "learning_rate": 5.180372389527221e-06, "loss": 0.6398, "step": 16440 }, { "epoch": 0.5038923623881328, "grad_norm": 1.5525283844844067, "learning_rate": 5.179876393852198e-06, "loss": 0.5832, "step": 16441 }, { "epoch": 0.5039230109108741, "grad_norm": 1.7952425055601344, "learning_rate": 5.179380396404805e-06, "loss": 0.6491, "step": 16442 }, { "epoch": 0.5039536594336153, "grad_norm": 1.7639816597976385, "learning_rate": 5.178884397189931e-06, "loss": 0.701, "step": 16443 }, { "epoch": 0.5039843079563565, "grad_norm": 1.912288024355431, "learning_rate": 5.178388396212462e-06, "loss": 0.7148, "step": 16444 }, { "epoch": 0.5040149564790977, "grad_norm": 1.7049496906843082, "learning_rate": 5.1778923934772885e-06, "loss": 0.6396, "step": 16445 }, { "epoch": 0.5040456050018389, "grad_norm": 1.9240747464869432, "learning_rate": 5.177396388989296e-06, "loss": 0.6788, "step": 16446 }, { "epoch": 0.5040762535245801, "grad_norm": 1.9648843308003117, "learning_rate": 5.176900382753369e-06, "loss": 0.6798, "step": 16447 }, { "epoch": 0.5041069020473213, "grad_norm": 1.1195803483547244, "learning_rate": 5.1764043747744e-06, "loss": 0.42, "step": 16448 }, { "epoch": 0.5041375505700625, "grad_norm": 1.5766218216585426, "learning_rate": 5.175908365057272e-06, "loss": 0.6079, "step": 16449 }, { "epoch": 0.5041681990928037, "grad_norm": 1.7417569850769459, "learning_rate": 5.175412353606876e-06, "loss": 0.6609, "step": 16450 }, { "epoch": 0.5041988476155449, "grad_norm": 1.847910230371017, "learning_rate": 5.1749163404280945e-06, "loss": 0.6747, "step": 16451 }, { "epoch": 0.5042294961382862, "grad_norm": 1.7757876791010692, "learning_rate": 5.1744203255258185e-06, "loss": 0.5758, "step": 16452 }, { "epoch": 0.5042601446610273, "grad_norm": 1.899517307078199, "learning_rate": 5.173924308904934e-06, "loss": 0.5631, "step": 16453 }, { "epoch": 0.5042907931837686, "grad_norm": 0.7669006664090152, "learning_rate": 5.1734282905703295e-06, "loss": 0.4045, "step": 16454 }, { "epoch": 0.5043214417065097, "grad_norm": 1.7276354525079387, "learning_rate": 5.172932270526891e-06, "loss": 0.6248, "step": 16455 }, { "epoch": 0.504352090229251, "grad_norm": 1.8096953881860876, "learning_rate": 5.172436248779507e-06, "loss": 0.7555, "step": 16456 }, { "epoch": 0.5043827387519921, "grad_norm": 1.665913137031637, "learning_rate": 5.171940225333065e-06, "loss": 0.6167, "step": 16457 }, { "epoch": 0.5044133872747334, "grad_norm": 1.8271846458256975, "learning_rate": 5.171444200192451e-06, "loss": 0.684, "step": 16458 }, { "epoch": 0.5044440357974745, "grad_norm": 1.862076035316359, "learning_rate": 5.170948173362555e-06, "loss": 0.5654, "step": 16459 }, { "epoch": 0.5044746843202158, "grad_norm": 1.7865794599192861, "learning_rate": 5.17045214484826e-06, "loss": 0.6177, "step": 16460 }, { "epoch": 0.504505332842957, "grad_norm": 1.6102993650358641, "learning_rate": 5.1699561146544595e-06, "loss": 0.5879, "step": 16461 }, { "epoch": 0.5045359813656982, "grad_norm": 1.7245204123909585, "learning_rate": 5.1694600827860365e-06, "loss": 0.6653, "step": 16462 }, { "epoch": 0.5045666298884394, "grad_norm": 1.5264327135337878, "learning_rate": 5.16896404924788e-06, "loss": 0.5736, "step": 16463 }, { "epoch": 0.5045972784111806, "grad_norm": 1.6164775157317615, "learning_rate": 5.1684680140448775e-06, "loss": 0.5964, "step": 16464 }, { "epoch": 0.5046279269339218, "grad_norm": 2.347423555381885, "learning_rate": 5.167971977181916e-06, "loss": 0.6315, "step": 16465 }, { "epoch": 0.504658575456663, "grad_norm": 1.6661450017207857, "learning_rate": 5.167475938663885e-06, "loss": 0.6191, "step": 16466 }, { "epoch": 0.5046892239794042, "grad_norm": 1.6814004359985797, "learning_rate": 5.16697989849567e-06, "loss": 0.5969, "step": 16467 }, { "epoch": 0.5047198725021455, "grad_norm": 1.7023741821388505, "learning_rate": 5.166483856682158e-06, "loss": 0.6462, "step": 16468 }, { "epoch": 0.5047505210248866, "grad_norm": 0.8355568840966475, "learning_rate": 5.1659878132282406e-06, "loss": 0.445, "step": 16469 }, { "epoch": 0.5047811695476278, "grad_norm": 2.009294029112707, "learning_rate": 5.165491768138801e-06, "loss": 0.6506, "step": 16470 }, { "epoch": 0.504811818070369, "grad_norm": 1.8208208997537734, "learning_rate": 5.164995721418729e-06, "loss": 0.5737, "step": 16471 }, { "epoch": 0.5048424665931102, "grad_norm": 1.7161904547849451, "learning_rate": 5.164499673072913e-06, "loss": 0.6674, "step": 16472 }, { "epoch": 0.5048731151158514, "grad_norm": 2.066929894258744, "learning_rate": 5.164003623106238e-06, "loss": 0.6584, "step": 16473 }, { "epoch": 0.5049037636385926, "grad_norm": 1.5447745459653626, "learning_rate": 5.163507571523595e-06, "loss": 0.6246, "step": 16474 }, { "epoch": 0.5049344121613338, "grad_norm": 1.8441282266235328, "learning_rate": 5.163011518329868e-06, "loss": 0.6573, "step": 16475 }, { "epoch": 0.504965060684075, "grad_norm": 1.5766287402655723, "learning_rate": 5.162515463529949e-06, "loss": 0.661, "step": 16476 }, { "epoch": 0.5049957092068162, "grad_norm": 1.5978239886238539, "learning_rate": 5.162019407128722e-06, "loss": 0.6292, "step": 16477 }, { "epoch": 0.5050263577295574, "grad_norm": 1.6378858075792189, "learning_rate": 5.161523349131078e-06, "loss": 0.6603, "step": 16478 }, { "epoch": 0.5050570062522987, "grad_norm": 1.865585045673601, "learning_rate": 5.1610272895419e-06, "loss": 0.6631, "step": 16479 }, { "epoch": 0.5050876547750398, "grad_norm": 1.7170592252475723, "learning_rate": 5.160531228366081e-06, "loss": 0.7054, "step": 16480 }, { "epoch": 0.5051183032977811, "grad_norm": 1.5433191609909485, "learning_rate": 5.160035165608508e-06, "loss": 0.6071, "step": 16481 }, { "epoch": 0.5051489518205222, "grad_norm": 0.7863732404403623, "learning_rate": 5.159539101274065e-06, "loss": 0.4189, "step": 16482 }, { "epoch": 0.5051796003432635, "grad_norm": 1.6014702013848106, "learning_rate": 5.159043035367643e-06, "loss": 0.6212, "step": 16483 }, { "epoch": 0.5052102488660046, "grad_norm": 2.0119903213204076, "learning_rate": 5.158546967894131e-06, "loss": 0.6744, "step": 16484 }, { "epoch": 0.5052408973887459, "grad_norm": 1.948202892119166, "learning_rate": 5.158050898858415e-06, "loss": 0.6646, "step": 16485 }, { "epoch": 0.505271545911487, "grad_norm": 1.5207390850910099, "learning_rate": 5.157554828265381e-06, "loss": 0.6818, "step": 16486 }, { "epoch": 0.5053021944342283, "grad_norm": 1.6425177848313863, "learning_rate": 5.15705875611992e-06, "loss": 0.656, "step": 16487 }, { "epoch": 0.5053328429569695, "grad_norm": 1.6983397849315847, "learning_rate": 5.156562682426919e-06, "loss": 0.5788, "step": 16488 }, { "epoch": 0.5053634914797107, "grad_norm": 0.7815899895609385, "learning_rate": 5.156066607191266e-06, "loss": 0.4431, "step": 16489 }, { "epoch": 0.5053941400024519, "grad_norm": 1.689937729965657, "learning_rate": 5.155570530417848e-06, "loss": 0.6512, "step": 16490 }, { "epoch": 0.5054247885251931, "grad_norm": 1.7412087800748157, "learning_rate": 5.155074452111555e-06, "loss": 0.6531, "step": 16491 }, { "epoch": 0.5054554370479343, "grad_norm": 2.0361935926581656, "learning_rate": 5.1545783722772725e-06, "loss": 0.6177, "step": 16492 }, { "epoch": 0.5054860855706755, "grad_norm": 1.8999023804416704, "learning_rate": 5.154082290919891e-06, "loss": 0.5519, "step": 16493 }, { "epoch": 0.5055167340934167, "grad_norm": 1.6748936782607546, "learning_rate": 5.153586208044296e-06, "loss": 0.6906, "step": 16494 }, { "epoch": 0.505547382616158, "grad_norm": 1.6496813462314315, "learning_rate": 5.153090123655378e-06, "loss": 0.6852, "step": 16495 }, { "epoch": 0.5055780311388991, "grad_norm": 1.7971226965583647, "learning_rate": 5.152594037758023e-06, "loss": 0.606, "step": 16496 }, { "epoch": 0.5056086796616404, "grad_norm": 1.8899865983745279, "learning_rate": 5.152097950357119e-06, "loss": 0.6078, "step": 16497 }, { "epoch": 0.5056393281843815, "grad_norm": 1.8569161112508512, "learning_rate": 5.151601861457557e-06, "loss": 0.6251, "step": 16498 }, { "epoch": 0.5056699767071228, "grad_norm": 1.9052389916576613, "learning_rate": 5.151105771064221e-06, "loss": 0.6563, "step": 16499 }, { "epoch": 0.5057006252298639, "grad_norm": 1.9734729181821575, "learning_rate": 5.150609679182004e-06, "loss": 0.6477, "step": 16500 }, { "epoch": 0.5057312737526051, "grad_norm": 1.8978409683743318, "learning_rate": 5.150113585815788e-06, "loss": 0.6725, "step": 16501 }, { "epoch": 0.5057619222753463, "grad_norm": 1.7306185248130053, "learning_rate": 5.149617490970466e-06, "loss": 0.6571, "step": 16502 }, { "epoch": 0.5057925707980875, "grad_norm": 0.8406443690366311, "learning_rate": 5.149121394650924e-06, "loss": 0.4602, "step": 16503 }, { "epoch": 0.5058232193208287, "grad_norm": 0.8228719623863187, "learning_rate": 5.148625296862053e-06, "loss": 0.446, "step": 16504 }, { "epoch": 0.5058538678435699, "grad_norm": 1.7073988594679055, "learning_rate": 5.148129197608737e-06, "loss": 0.5933, "step": 16505 }, { "epoch": 0.5058845163663112, "grad_norm": 1.887921234869021, "learning_rate": 5.147633096895866e-06, "loss": 0.6482, "step": 16506 }, { "epoch": 0.5059151648890523, "grad_norm": 1.948955293320806, "learning_rate": 5.14713699472833e-06, "loss": 0.7271, "step": 16507 }, { "epoch": 0.5059458134117936, "grad_norm": 1.8452443499105233, "learning_rate": 5.146640891111013e-06, "loss": 0.7399, "step": 16508 }, { "epoch": 0.5059764619345347, "grad_norm": 0.7992083949327303, "learning_rate": 5.146144786048808e-06, "loss": 0.4609, "step": 16509 }, { "epoch": 0.506007110457276, "grad_norm": 1.9596159717576735, "learning_rate": 5.145648679546598e-06, "loss": 0.6105, "step": 16510 }, { "epoch": 0.5060377589800171, "grad_norm": 1.6542685006920215, "learning_rate": 5.145152571609279e-06, "loss": 0.61, "step": 16511 }, { "epoch": 0.5060684075027584, "grad_norm": 1.6129870380282274, "learning_rate": 5.14465646224173e-06, "loss": 0.6301, "step": 16512 }, { "epoch": 0.5060990560254995, "grad_norm": 1.6200020496269343, "learning_rate": 5.144160351448847e-06, "loss": 0.5586, "step": 16513 }, { "epoch": 0.5061297045482408, "grad_norm": 1.8755837001892885, "learning_rate": 5.143664239235513e-06, "loss": 0.709, "step": 16514 }, { "epoch": 0.506160353070982, "grad_norm": 1.6184928387260251, "learning_rate": 5.143168125606621e-06, "loss": 0.6707, "step": 16515 }, { "epoch": 0.5061910015937232, "grad_norm": 1.5611143836863175, "learning_rate": 5.1426720105670545e-06, "loss": 0.548, "step": 16516 }, { "epoch": 0.5062216501164644, "grad_norm": 1.6777668021235337, "learning_rate": 5.142175894121706e-06, "loss": 0.6266, "step": 16517 }, { "epoch": 0.5062522986392056, "grad_norm": 1.6380846453427689, "learning_rate": 5.14167977627546e-06, "loss": 0.6052, "step": 16518 }, { "epoch": 0.5062829471619468, "grad_norm": 1.5977901020439138, "learning_rate": 5.141183657033208e-06, "loss": 0.6433, "step": 16519 }, { "epoch": 0.506313595684688, "grad_norm": 1.6583143964478475, "learning_rate": 5.140687536399838e-06, "loss": 0.5115, "step": 16520 }, { "epoch": 0.5063442442074292, "grad_norm": 1.8997875549038241, "learning_rate": 5.140191414380236e-06, "loss": 0.6529, "step": 16521 }, { "epoch": 0.5063748927301704, "grad_norm": 1.6862302256917647, "learning_rate": 5.139695290979293e-06, "loss": 0.6413, "step": 16522 }, { "epoch": 0.5064055412529116, "grad_norm": 1.7755934924127905, "learning_rate": 5.139199166201897e-06, "loss": 0.6806, "step": 16523 }, { "epoch": 0.5064361897756529, "grad_norm": 1.628391254984528, "learning_rate": 5.138703040052936e-06, "loss": 0.6969, "step": 16524 }, { "epoch": 0.506466838298394, "grad_norm": 1.9424973806416195, "learning_rate": 5.138206912537297e-06, "loss": 0.6746, "step": 16525 }, { "epoch": 0.5064974868211353, "grad_norm": 2.001181916495822, "learning_rate": 5.1377107836598715e-06, "loss": 0.6834, "step": 16526 }, { "epoch": 0.5065281353438764, "grad_norm": 1.50796224152637, "learning_rate": 5.137214653425546e-06, "loss": 0.575, "step": 16527 }, { "epoch": 0.5065587838666177, "grad_norm": 1.9290156972633083, "learning_rate": 5.13671852183921e-06, "loss": 0.7115, "step": 16528 }, { "epoch": 0.5065894323893588, "grad_norm": 1.6585229457871986, "learning_rate": 5.13622238890575e-06, "loss": 0.5905, "step": 16529 }, { "epoch": 0.5066200809121001, "grad_norm": 1.6876171383178384, "learning_rate": 5.1357262546300565e-06, "loss": 0.6206, "step": 16530 }, { "epoch": 0.5066507294348412, "grad_norm": 1.4615693505853247, "learning_rate": 5.135230119017019e-06, "loss": 0.5717, "step": 16531 }, { "epoch": 0.5066813779575824, "grad_norm": 1.6804395487927641, "learning_rate": 5.134733982071523e-06, "loss": 0.6441, "step": 16532 }, { "epoch": 0.5067120264803237, "grad_norm": 1.7259429053251878, "learning_rate": 5.134237843798457e-06, "loss": 0.639, "step": 16533 }, { "epoch": 0.5067426750030648, "grad_norm": 1.9869901019522667, "learning_rate": 5.133741704202714e-06, "loss": 0.6095, "step": 16534 }, { "epoch": 0.5067733235258061, "grad_norm": 1.7910956649758425, "learning_rate": 5.13324556328918e-06, "loss": 0.712, "step": 16535 }, { "epoch": 0.5068039720485472, "grad_norm": 1.880427796750579, "learning_rate": 5.13274942106274e-06, "loss": 0.6772, "step": 16536 }, { "epoch": 0.5068346205712885, "grad_norm": 1.8901723823205918, "learning_rate": 5.13225327752829e-06, "loss": 0.5224, "step": 16537 }, { "epoch": 0.5068652690940296, "grad_norm": 0.8756519717001764, "learning_rate": 5.131757132690713e-06, "loss": 0.4397, "step": 16538 }, { "epoch": 0.5068959176167709, "grad_norm": 1.6715913969719296, "learning_rate": 5.131260986554899e-06, "loss": 0.6127, "step": 16539 }, { "epoch": 0.506926566139512, "grad_norm": 1.6856992032584381, "learning_rate": 5.130764839125736e-06, "loss": 0.605, "step": 16540 }, { "epoch": 0.5069572146622533, "grad_norm": 0.8344946313080327, "learning_rate": 5.130268690408114e-06, "loss": 0.4526, "step": 16541 }, { "epoch": 0.5069878631849944, "grad_norm": 1.6505724495729683, "learning_rate": 5.1297725404069234e-06, "loss": 0.6138, "step": 16542 }, { "epoch": 0.5070185117077357, "grad_norm": 1.7268680748169492, "learning_rate": 5.129276389127049e-06, "loss": 0.6141, "step": 16543 }, { "epoch": 0.5070491602304769, "grad_norm": 0.8339183101179434, "learning_rate": 5.128780236573381e-06, "loss": 0.4427, "step": 16544 }, { "epoch": 0.5070798087532181, "grad_norm": 1.79679996170508, "learning_rate": 5.1282840827508085e-06, "loss": 0.5738, "step": 16545 }, { "epoch": 0.5071104572759593, "grad_norm": 0.8473451249088759, "learning_rate": 5.1277879276642206e-06, "loss": 0.4314, "step": 16546 }, { "epoch": 0.5071411057987005, "grad_norm": 1.9353112632246114, "learning_rate": 5.1272917713185055e-06, "loss": 0.7055, "step": 16547 }, { "epoch": 0.5071717543214417, "grad_norm": 1.556253293908934, "learning_rate": 5.1267956137185514e-06, "loss": 0.6741, "step": 16548 }, { "epoch": 0.5072024028441829, "grad_norm": 0.8324071054569203, "learning_rate": 5.126299454869248e-06, "loss": 0.4488, "step": 16549 }, { "epoch": 0.5072330513669241, "grad_norm": 1.8067353680768719, "learning_rate": 5.1258032947754845e-06, "loss": 0.6194, "step": 16550 }, { "epoch": 0.5072636998896654, "grad_norm": 1.7662907609904506, "learning_rate": 5.125307133442148e-06, "loss": 0.5531, "step": 16551 }, { "epoch": 0.5072943484124065, "grad_norm": 1.6043539277939718, "learning_rate": 5.124810970874129e-06, "loss": 0.6193, "step": 16552 }, { "epoch": 0.5073249969351478, "grad_norm": 1.765744368690706, "learning_rate": 5.124314807076314e-06, "loss": 0.6178, "step": 16553 }, { "epoch": 0.5073556454578889, "grad_norm": 1.7738083975688614, "learning_rate": 5.1238186420535965e-06, "loss": 0.6691, "step": 16554 }, { "epoch": 0.5073862939806302, "grad_norm": 1.6847843516466485, "learning_rate": 5.123322475810859e-06, "loss": 0.6585, "step": 16555 }, { "epoch": 0.5074169425033713, "grad_norm": 1.9554465817331617, "learning_rate": 5.122826308352995e-06, "loss": 0.6937, "step": 16556 }, { "epoch": 0.5074475910261126, "grad_norm": 1.5818666403321944, "learning_rate": 5.122330139684892e-06, "loss": 0.6204, "step": 16557 }, { "epoch": 0.5074782395488537, "grad_norm": 0.7928578901864669, "learning_rate": 5.1218339698114395e-06, "loss": 0.4396, "step": 16558 }, { "epoch": 0.507508888071595, "grad_norm": 0.7935888084713261, "learning_rate": 5.121337798737523e-06, "loss": 0.4264, "step": 16559 }, { "epoch": 0.5075395365943361, "grad_norm": 1.5382674406139831, "learning_rate": 5.1208416264680376e-06, "loss": 0.5624, "step": 16560 }, { "epoch": 0.5075701851170774, "grad_norm": 1.6539417807122114, "learning_rate": 5.120345453007867e-06, "loss": 0.5946, "step": 16561 }, { "epoch": 0.5076008336398186, "grad_norm": 1.5339180494977012, "learning_rate": 5.1198492783619015e-06, "loss": 0.6232, "step": 16562 }, { "epoch": 0.5076314821625597, "grad_norm": 0.8310511447819374, "learning_rate": 5.119353102535031e-06, "loss": 0.4219, "step": 16563 }, { "epoch": 0.507662130685301, "grad_norm": 1.7692949832014848, "learning_rate": 5.118856925532144e-06, "loss": 0.7074, "step": 16564 }, { "epoch": 0.5076927792080421, "grad_norm": 1.4709249398646456, "learning_rate": 5.11836074735813e-06, "loss": 0.643, "step": 16565 }, { "epoch": 0.5077234277307834, "grad_norm": 1.9576575214570655, "learning_rate": 5.117864568017875e-06, "loss": 0.6749, "step": 16566 }, { "epoch": 0.5077540762535245, "grad_norm": 2.055552537843354, "learning_rate": 5.117368387516272e-06, "loss": 0.6839, "step": 16567 }, { "epoch": 0.5077847247762658, "grad_norm": 1.6322853624318365, "learning_rate": 5.116872205858207e-06, "loss": 0.6135, "step": 16568 }, { "epoch": 0.507815373299007, "grad_norm": 1.6417880635793942, "learning_rate": 5.116376023048573e-06, "loss": 0.6697, "step": 16569 }, { "epoch": 0.5078460218217482, "grad_norm": 0.8082503119717082, "learning_rate": 5.115879839092253e-06, "loss": 0.4551, "step": 16570 }, { "epoch": 0.5078766703444894, "grad_norm": 1.8715230939690712, "learning_rate": 5.115383653994141e-06, "loss": 0.528, "step": 16571 }, { "epoch": 0.5079073188672306, "grad_norm": 1.639533431693074, "learning_rate": 5.114887467759123e-06, "loss": 0.5898, "step": 16572 }, { "epoch": 0.5079379673899718, "grad_norm": 1.5424226973031185, "learning_rate": 5.114391280392092e-06, "loss": 0.663, "step": 16573 }, { "epoch": 0.507968615912713, "grad_norm": 0.795055979961058, "learning_rate": 5.1138950918979315e-06, "loss": 0.4298, "step": 16574 }, { "epoch": 0.5079992644354542, "grad_norm": 0.7607218036512566, "learning_rate": 5.113398902281536e-06, "loss": 0.4308, "step": 16575 }, { "epoch": 0.5080299129581954, "grad_norm": 0.8007539860986522, "learning_rate": 5.112902711547789e-06, "loss": 0.4265, "step": 16576 }, { "epoch": 0.5080605614809366, "grad_norm": 1.5671908145126856, "learning_rate": 5.112406519701586e-06, "loss": 0.588, "step": 16577 }, { "epoch": 0.5080912100036779, "grad_norm": 1.9290030401197347, "learning_rate": 5.111910326747811e-06, "loss": 0.7106, "step": 16578 }, { "epoch": 0.508121858526419, "grad_norm": 1.5511219307937947, "learning_rate": 5.111414132691355e-06, "loss": 0.5985, "step": 16579 }, { "epoch": 0.5081525070491603, "grad_norm": 1.6444393538888173, "learning_rate": 5.110917937537108e-06, "loss": 0.6138, "step": 16580 }, { "epoch": 0.5081831555719014, "grad_norm": 1.6516313543048207, "learning_rate": 5.110421741289957e-06, "loss": 0.7043, "step": 16581 }, { "epoch": 0.5082138040946427, "grad_norm": 1.5938358308092428, "learning_rate": 5.109925543954793e-06, "loss": 0.6161, "step": 16582 }, { "epoch": 0.5082444526173838, "grad_norm": 1.8268019354864102, "learning_rate": 5.109429345536504e-06, "loss": 0.6348, "step": 16583 }, { "epoch": 0.5082751011401251, "grad_norm": 1.6933117944895244, "learning_rate": 5.108933146039981e-06, "loss": 0.6669, "step": 16584 }, { "epoch": 0.5083057496628662, "grad_norm": 1.7440116404825006, "learning_rate": 5.108436945470111e-06, "loss": 0.6934, "step": 16585 }, { "epoch": 0.5083363981856075, "grad_norm": 1.7925062880183893, "learning_rate": 5.107940743831784e-06, "loss": 0.6785, "step": 16586 }, { "epoch": 0.5083670467083486, "grad_norm": 1.6974423589882115, "learning_rate": 5.107444541129889e-06, "loss": 0.705, "step": 16587 }, { "epoch": 0.5083976952310899, "grad_norm": 0.890895075248693, "learning_rate": 5.106948337369315e-06, "loss": 0.4303, "step": 16588 }, { "epoch": 0.5084283437538311, "grad_norm": 1.673266533487511, "learning_rate": 5.106452132554953e-06, "loss": 0.706, "step": 16589 }, { "epoch": 0.5084589922765723, "grad_norm": 1.6869038484320233, "learning_rate": 5.10595592669169e-06, "loss": 0.6374, "step": 16590 }, { "epoch": 0.5084896407993135, "grad_norm": 1.7188855951982582, "learning_rate": 5.105459719784416e-06, "loss": 0.5973, "step": 16591 }, { "epoch": 0.5085202893220547, "grad_norm": 1.872573629480021, "learning_rate": 5.104963511838021e-06, "loss": 0.625, "step": 16592 }, { "epoch": 0.5085509378447959, "grad_norm": 1.8770167603271255, "learning_rate": 5.104467302857393e-06, "loss": 0.6694, "step": 16593 }, { "epoch": 0.508581586367537, "grad_norm": 1.6295323104599158, "learning_rate": 5.103971092847422e-06, "loss": 0.7595, "step": 16594 }, { "epoch": 0.5086122348902783, "grad_norm": 2.1127233408424617, "learning_rate": 5.103474881812998e-06, "loss": 0.7081, "step": 16595 }, { "epoch": 0.5086428834130194, "grad_norm": 1.766917463341789, "learning_rate": 5.102978669759009e-06, "loss": 0.6215, "step": 16596 }, { "epoch": 0.5086735319357607, "grad_norm": 1.7416576662328043, "learning_rate": 5.102482456690345e-06, "loss": 0.663, "step": 16597 }, { "epoch": 0.5087041804585019, "grad_norm": 1.6376289579244854, "learning_rate": 5.101986242611895e-06, "loss": 0.6222, "step": 16598 }, { "epoch": 0.5087348289812431, "grad_norm": 1.7150034732445698, "learning_rate": 5.101490027528548e-06, "loss": 0.6569, "step": 16599 }, { "epoch": 0.5087654775039843, "grad_norm": 1.9963710839911188, "learning_rate": 5.100993811445195e-06, "loss": 0.6207, "step": 16600 }, { "epoch": 0.5087961260267255, "grad_norm": 1.7164991747831064, "learning_rate": 5.100497594366724e-06, "loss": 0.7528, "step": 16601 }, { "epoch": 0.5088267745494667, "grad_norm": 1.7771327015421947, "learning_rate": 5.100001376298023e-06, "loss": 0.6336, "step": 16602 }, { "epoch": 0.5088574230722079, "grad_norm": 0.8252116208123415, "learning_rate": 5.099505157243984e-06, "loss": 0.4252, "step": 16603 }, { "epoch": 0.5088880715949491, "grad_norm": 2.0454714699404866, "learning_rate": 5.099008937209495e-06, "loss": 0.6498, "step": 16604 }, { "epoch": 0.5089187201176903, "grad_norm": 1.578317503498672, "learning_rate": 5.098512716199445e-06, "loss": 0.5945, "step": 16605 }, { "epoch": 0.5089493686404315, "grad_norm": 0.8122556790039557, "learning_rate": 5.098016494218725e-06, "loss": 0.4403, "step": 16606 }, { "epoch": 0.5089800171631728, "grad_norm": 1.6009318073707135, "learning_rate": 5.097520271272223e-06, "loss": 0.6706, "step": 16607 }, { "epoch": 0.5090106656859139, "grad_norm": 1.7763312873508732, "learning_rate": 5.097024047364829e-06, "loss": 0.6678, "step": 16608 }, { "epoch": 0.5090413142086552, "grad_norm": 1.615911367238483, "learning_rate": 5.096527822501431e-06, "loss": 0.6557, "step": 16609 }, { "epoch": 0.5090719627313963, "grad_norm": 1.9047688909353881, "learning_rate": 5.0960315966869215e-06, "loss": 0.7186, "step": 16610 }, { "epoch": 0.5091026112541376, "grad_norm": 1.6577068912301882, "learning_rate": 5.095535369926188e-06, "loss": 0.6349, "step": 16611 }, { "epoch": 0.5091332597768787, "grad_norm": 0.7684380086842336, "learning_rate": 5.09503914222412e-06, "loss": 0.4349, "step": 16612 }, { "epoch": 0.50916390829962, "grad_norm": 1.5120200632142837, "learning_rate": 5.094542913585605e-06, "loss": 0.5806, "step": 16613 }, { "epoch": 0.5091945568223611, "grad_norm": 2.0412922169943672, "learning_rate": 5.094046684015536e-06, "loss": 0.6901, "step": 16614 }, { "epoch": 0.5092252053451024, "grad_norm": 2.089075678030547, "learning_rate": 5.0935504535188005e-06, "loss": 0.6694, "step": 16615 }, { "epoch": 0.5092558538678436, "grad_norm": 1.6779574584113024, "learning_rate": 5.09305422210029e-06, "loss": 0.6836, "step": 16616 }, { "epoch": 0.5092865023905848, "grad_norm": 1.6859163046075056, "learning_rate": 5.09255798976489e-06, "loss": 0.6414, "step": 16617 }, { "epoch": 0.509317150913326, "grad_norm": 1.6202194963499454, "learning_rate": 5.092061756517494e-06, "loss": 0.6384, "step": 16618 }, { "epoch": 0.5093477994360672, "grad_norm": 1.7366789329398986, "learning_rate": 5.09156552236299e-06, "loss": 0.6602, "step": 16619 }, { "epoch": 0.5093784479588084, "grad_norm": 1.797772328087729, "learning_rate": 5.091069287306266e-06, "loss": 0.6867, "step": 16620 }, { "epoch": 0.5094090964815496, "grad_norm": 1.88535543845707, "learning_rate": 5.090573051352215e-06, "loss": 0.6992, "step": 16621 }, { "epoch": 0.5094397450042908, "grad_norm": 1.738989543568604, "learning_rate": 5.0900768145057224e-06, "loss": 0.6221, "step": 16622 }, { "epoch": 0.509470393527032, "grad_norm": 0.7959536547606337, "learning_rate": 5.089580576771682e-06, "loss": 0.4433, "step": 16623 }, { "epoch": 0.5095010420497732, "grad_norm": 2.105915670859422, "learning_rate": 5.089084338154981e-06, "loss": 0.6709, "step": 16624 }, { "epoch": 0.5095316905725144, "grad_norm": 1.925676717397161, "learning_rate": 5.088588098660508e-06, "loss": 0.7061, "step": 16625 }, { "epoch": 0.5095623390952556, "grad_norm": 1.712737425267269, "learning_rate": 5.088091858293153e-06, "loss": 0.6286, "step": 16626 }, { "epoch": 0.5095929876179968, "grad_norm": 1.9058372356259903, "learning_rate": 5.08759561705781e-06, "loss": 0.6182, "step": 16627 }, { "epoch": 0.509623636140738, "grad_norm": 0.787540705046771, "learning_rate": 5.087099374959362e-06, "loss": 0.4305, "step": 16628 }, { "epoch": 0.5096542846634792, "grad_norm": 0.7774440912231363, "learning_rate": 5.086603132002702e-06, "loss": 0.4386, "step": 16629 }, { "epoch": 0.5096849331862204, "grad_norm": 0.7855677519019597, "learning_rate": 5.0861068881927185e-06, "loss": 0.4326, "step": 16630 }, { "epoch": 0.5097155817089616, "grad_norm": 1.7953346806997177, "learning_rate": 5.085610643534305e-06, "loss": 0.5832, "step": 16631 }, { "epoch": 0.5097462302317028, "grad_norm": 1.920384367520524, "learning_rate": 5.0851143980323445e-06, "loss": 0.6072, "step": 16632 }, { "epoch": 0.509776878754444, "grad_norm": 2.047628865616506, "learning_rate": 5.084618151691733e-06, "loss": 0.6591, "step": 16633 }, { "epoch": 0.5098075272771853, "grad_norm": 1.7296103994352794, "learning_rate": 5.084121904517354e-06, "loss": 0.6765, "step": 16634 }, { "epoch": 0.5098381757999264, "grad_norm": 1.7279915165446573, "learning_rate": 5.083625656514101e-06, "loss": 0.6116, "step": 16635 }, { "epoch": 0.5098688243226677, "grad_norm": 0.8048913592943445, "learning_rate": 5.083129407686865e-06, "loss": 0.4361, "step": 16636 }, { "epoch": 0.5098994728454088, "grad_norm": 1.6142868345620867, "learning_rate": 5.082633158040532e-06, "loss": 0.5841, "step": 16637 }, { "epoch": 0.5099301213681501, "grad_norm": 1.9058185283286786, "learning_rate": 5.082136907579995e-06, "loss": 0.7384, "step": 16638 }, { "epoch": 0.5099607698908912, "grad_norm": 1.7582084946518337, "learning_rate": 5.081640656310141e-06, "loss": 0.6551, "step": 16639 }, { "epoch": 0.5099914184136325, "grad_norm": 2.042596754512278, "learning_rate": 5.081144404235861e-06, "loss": 0.5952, "step": 16640 }, { "epoch": 0.5100220669363736, "grad_norm": 1.670862687646357, "learning_rate": 5.080648151362043e-06, "loss": 0.623, "step": 16641 }, { "epoch": 0.5100527154591149, "grad_norm": 1.6051717380065846, "learning_rate": 5.080151897693581e-06, "loss": 0.5758, "step": 16642 }, { "epoch": 0.510083363981856, "grad_norm": 0.8323571743652639, "learning_rate": 5.079655643235358e-06, "loss": 0.4428, "step": 16643 }, { "epoch": 0.5101140125045973, "grad_norm": 1.9860084266980464, "learning_rate": 5.079159387992271e-06, "loss": 0.6733, "step": 16644 }, { "epoch": 0.5101446610273385, "grad_norm": 1.6021040975817178, "learning_rate": 5.0786631319692034e-06, "loss": 0.5792, "step": 16645 }, { "epoch": 0.5101753095500797, "grad_norm": 1.6569002863063784, "learning_rate": 5.07816687517105e-06, "loss": 0.6671, "step": 16646 }, { "epoch": 0.5102059580728209, "grad_norm": 1.722738663565071, "learning_rate": 5.077670617602698e-06, "loss": 0.6325, "step": 16647 }, { "epoch": 0.5102366065955621, "grad_norm": 1.826350485433878, "learning_rate": 5.0771743592690356e-06, "loss": 0.6794, "step": 16648 }, { "epoch": 0.5102672551183033, "grad_norm": 1.764768102809877, "learning_rate": 5.076678100174958e-06, "loss": 0.6873, "step": 16649 }, { "epoch": 0.5102979036410445, "grad_norm": 0.7727740071744215, "learning_rate": 5.0761818403253496e-06, "loss": 0.4342, "step": 16650 }, { "epoch": 0.5103285521637857, "grad_norm": 1.6984275512209068, "learning_rate": 5.0756855797251015e-06, "loss": 0.5989, "step": 16651 }, { "epoch": 0.510359200686527, "grad_norm": 1.7245040303151982, "learning_rate": 5.0751893183791046e-06, "loss": 0.6946, "step": 16652 }, { "epoch": 0.5103898492092681, "grad_norm": 1.7542849475563307, "learning_rate": 5.074693056292248e-06, "loss": 0.6186, "step": 16653 }, { "epoch": 0.5104204977320094, "grad_norm": 1.9360052077787713, "learning_rate": 5.0741967934694224e-06, "loss": 0.7045, "step": 16654 }, { "epoch": 0.5104511462547505, "grad_norm": 0.7850153195581879, "learning_rate": 5.073700529915516e-06, "loss": 0.424, "step": 16655 }, { "epoch": 0.5104817947774917, "grad_norm": 1.7041433312238679, "learning_rate": 5.073204265635418e-06, "loss": 0.5961, "step": 16656 }, { "epoch": 0.5105124433002329, "grad_norm": 1.8132667382703833, "learning_rate": 5.072708000634023e-06, "loss": 0.596, "step": 16657 }, { "epoch": 0.5105430918229741, "grad_norm": 1.771185586738061, "learning_rate": 5.072211734916215e-06, "loss": 0.8201, "step": 16658 }, { "epoch": 0.5105737403457153, "grad_norm": 1.831947735760908, "learning_rate": 5.071715468486887e-06, "loss": 0.5798, "step": 16659 }, { "epoch": 0.5106043888684565, "grad_norm": 1.9024385755426036, "learning_rate": 5.071219201350928e-06, "loss": 0.6805, "step": 16660 }, { "epoch": 0.5106350373911978, "grad_norm": 1.8063815317172558, "learning_rate": 5.070722933513228e-06, "loss": 0.6671, "step": 16661 }, { "epoch": 0.5106656859139389, "grad_norm": 1.8279155296261653, "learning_rate": 5.070226664978677e-06, "loss": 0.669, "step": 16662 }, { "epoch": 0.5106963344366802, "grad_norm": 1.8865738798772875, "learning_rate": 5.069730395752164e-06, "loss": 0.6944, "step": 16663 }, { "epoch": 0.5107269829594213, "grad_norm": 1.8650065715243809, "learning_rate": 5.06923412583858e-06, "loss": 0.5712, "step": 16664 }, { "epoch": 0.5107576314821626, "grad_norm": 1.8949602276012965, "learning_rate": 5.068737855242816e-06, "loss": 0.7681, "step": 16665 }, { "epoch": 0.5107882800049037, "grad_norm": 1.665156179196561, "learning_rate": 5.0682415839697585e-06, "loss": 0.61, "step": 16666 }, { "epoch": 0.510818928527645, "grad_norm": 1.7719173059695073, "learning_rate": 5.0677453120242995e-06, "loss": 0.6876, "step": 16667 }, { "epoch": 0.5108495770503861, "grad_norm": 0.8308416845123301, "learning_rate": 5.067249039411329e-06, "loss": 0.4291, "step": 16668 }, { "epoch": 0.5108802255731274, "grad_norm": 1.5653981009876003, "learning_rate": 5.0667527661357365e-06, "loss": 0.596, "step": 16669 }, { "epoch": 0.5109108740958686, "grad_norm": 1.7540540114839285, "learning_rate": 5.0662564922024115e-06, "loss": 0.6921, "step": 16670 }, { "epoch": 0.5109415226186098, "grad_norm": 0.7976576219284609, "learning_rate": 5.065760217616243e-06, "loss": 0.4473, "step": 16671 }, { "epoch": 0.510972171141351, "grad_norm": 1.7752676760218316, "learning_rate": 5.065263942382125e-06, "loss": 0.5376, "step": 16672 }, { "epoch": 0.5110028196640922, "grad_norm": 1.6558367149712905, "learning_rate": 5.064767666504944e-06, "loss": 0.6602, "step": 16673 }, { "epoch": 0.5110334681868334, "grad_norm": 1.5500348794221321, "learning_rate": 5.064271389989589e-06, "loss": 0.6661, "step": 16674 }, { "epoch": 0.5110641167095746, "grad_norm": 1.9123511918461595, "learning_rate": 5.063775112840953e-06, "loss": 0.7058, "step": 16675 }, { "epoch": 0.5110947652323158, "grad_norm": 0.783679078355533, "learning_rate": 5.063278835063923e-06, "loss": 0.4464, "step": 16676 }, { "epoch": 0.511125413755057, "grad_norm": 1.8493430201114467, "learning_rate": 5.062782556663393e-06, "loss": 0.6477, "step": 16677 }, { "epoch": 0.5111560622777982, "grad_norm": 1.779986715713368, "learning_rate": 5.062286277644248e-06, "loss": 0.6622, "step": 16678 }, { "epoch": 0.5111867108005395, "grad_norm": 1.6333065915302722, "learning_rate": 5.0617899980113815e-06, "loss": 0.6999, "step": 16679 }, { "epoch": 0.5112173593232806, "grad_norm": 1.6176284463590915, "learning_rate": 5.061293717769682e-06, "loss": 0.5551, "step": 16680 }, { "epoch": 0.5112480078460219, "grad_norm": 1.7155259408521952, "learning_rate": 5.060797436924041e-06, "loss": 0.6665, "step": 16681 }, { "epoch": 0.511278656368763, "grad_norm": 1.5772693088365102, "learning_rate": 5.060301155479346e-06, "loss": 0.6291, "step": 16682 }, { "epoch": 0.5113093048915043, "grad_norm": 0.8537961193343604, "learning_rate": 5.059804873440488e-06, "loss": 0.4337, "step": 16683 }, { "epoch": 0.5113399534142454, "grad_norm": 0.8519380647951952, "learning_rate": 5.059308590812357e-06, "loss": 0.4601, "step": 16684 }, { "epoch": 0.5113706019369867, "grad_norm": 1.6683983461629002, "learning_rate": 5.058812307599846e-06, "loss": 0.6806, "step": 16685 }, { "epoch": 0.5114012504597278, "grad_norm": 1.6724514764665523, "learning_rate": 5.05831602380784e-06, "loss": 0.7253, "step": 16686 }, { "epoch": 0.511431898982469, "grad_norm": 2.0025827563695384, "learning_rate": 5.057819739441231e-06, "loss": 0.5922, "step": 16687 }, { "epoch": 0.5114625475052103, "grad_norm": 1.6840213153216583, "learning_rate": 5.057323454504911e-06, "loss": 0.5626, "step": 16688 }, { "epoch": 0.5114931960279514, "grad_norm": 0.9286339747896172, "learning_rate": 5.056827169003766e-06, "loss": 0.4297, "step": 16689 }, { "epoch": 0.5115238445506927, "grad_norm": 0.7262810023402565, "learning_rate": 5.05633088294269e-06, "loss": 0.404, "step": 16690 }, { "epoch": 0.5115544930734338, "grad_norm": 1.740734196821161, "learning_rate": 5.055834596326571e-06, "loss": 0.6063, "step": 16691 }, { "epoch": 0.5115851415961751, "grad_norm": 1.769630723250134, "learning_rate": 5.055338309160301e-06, "loss": 0.6273, "step": 16692 }, { "epoch": 0.5116157901189162, "grad_norm": 1.4718689878196287, "learning_rate": 5.054842021448766e-06, "loss": 0.5981, "step": 16693 }, { "epoch": 0.5116464386416575, "grad_norm": 1.52203863068357, "learning_rate": 5.05434573319686e-06, "loss": 0.5737, "step": 16694 }, { "epoch": 0.5116770871643986, "grad_norm": 1.6655993847450528, "learning_rate": 5.05384944440947e-06, "loss": 0.6246, "step": 16695 }, { "epoch": 0.5117077356871399, "grad_norm": 0.7799954119040073, "learning_rate": 5.053353155091491e-06, "loss": 0.4275, "step": 16696 }, { "epoch": 0.511738384209881, "grad_norm": 1.6574652498600435, "learning_rate": 5.052856865247806e-06, "loss": 0.5996, "step": 16697 }, { "epoch": 0.5117690327326223, "grad_norm": 1.806700721068525, "learning_rate": 5.05236057488331e-06, "loss": 0.5808, "step": 16698 }, { "epoch": 0.5117996812553635, "grad_norm": 1.7325659816124204, "learning_rate": 5.051864284002892e-06, "loss": 0.6429, "step": 16699 }, { "epoch": 0.5118303297781047, "grad_norm": 1.8678996883749153, "learning_rate": 5.051367992611442e-06, "loss": 0.6917, "step": 16700 }, { "epoch": 0.5118609783008459, "grad_norm": 1.7399624684884967, "learning_rate": 5.050871700713851e-06, "loss": 0.6464, "step": 16701 }, { "epoch": 0.5118916268235871, "grad_norm": 1.790926613895976, "learning_rate": 5.050375408315006e-06, "loss": 0.6584, "step": 16702 }, { "epoch": 0.5119222753463283, "grad_norm": 1.7969859326063136, "learning_rate": 5.049879115419801e-06, "loss": 0.6152, "step": 16703 }, { "epoch": 0.5119529238690695, "grad_norm": 1.7665936480993718, "learning_rate": 5.049382822033124e-06, "loss": 0.6468, "step": 16704 }, { "epoch": 0.5119835723918107, "grad_norm": 1.9021180126592463, "learning_rate": 5.048886528159865e-06, "loss": 0.6254, "step": 16705 }, { "epoch": 0.512014220914552, "grad_norm": 1.836767818091365, "learning_rate": 5.048390233804914e-06, "loss": 0.6252, "step": 16706 }, { "epoch": 0.5120448694372931, "grad_norm": 1.896850295335641, "learning_rate": 5.047893938973163e-06, "loss": 0.6939, "step": 16707 }, { "epoch": 0.5120755179600344, "grad_norm": 1.8088054514254945, "learning_rate": 5.0473976436695e-06, "loss": 0.6643, "step": 16708 }, { "epoch": 0.5121061664827755, "grad_norm": 0.8484643176129377, "learning_rate": 5.046901347898816e-06, "loss": 0.4316, "step": 16709 }, { "epoch": 0.5121368150055168, "grad_norm": 1.8815086346555165, "learning_rate": 5.046405051666001e-06, "loss": 0.7031, "step": 16710 }, { "epoch": 0.5121674635282579, "grad_norm": 1.8344637635243364, "learning_rate": 5.045908754975946e-06, "loss": 0.5773, "step": 16711 }, { "epoch": 0.5121981120509992, "grad_norm": 1.6820896017032565, "learning_rate": 5.0454124578335395e-06, "loss": 0.6393, "step": 16712 }, { "epoch": 0.5122287605737403, "grad_norm": 2.120392257302482, "learning_rate": 5.044916160243673e-06, "loss": 0.6876, "step": 16713 }, { "epoch": 0.5122594090964816, "grad_norm": 1.813141000836346, "learning_rate": 5.044419862211234e-06, "loss": 0.7223, "step": 16714 }, { "epoch": 0.5122900576192227, "grad_norm": 1.4207924160602077, "learning_rate": 5.043923563741117e-06, "loss": 0.7115, "step": 16715 }, { "epoch": 0.512320706141964, "grad_norm": 1.6797579156074673, "learning_rate": 5.043427264838209e-06, "loss": 0.6595, "step": 16716 }, { "epoch": 0.5123513546647052, "grad_norm": 1.5186138140983008, "learning_rate": 5.0429309655074e-06, "loss": 0.5844, "step": 16717 }, { "epoch": 0.5123820031874463, "grad_norm": 1.8961736556658266, "learning_rate": 5.042434665753582e-06, "loss": 0.7173, "step": 16718 }, { "epoch": 0.5124126517101876, "grad_norm": 1.626272101835465, "learning_rate": 5.041938365581646e-06, "loss": 0.5468, "step": 16719 }, { "epoch": 0.5124433002329287, "grad_norm": 1.3929187277709718, "learning_rate": 5.041442064996479e-06, "loss": 0.5786, "step": 16720 }, { "epoch": 0.51247394875567, "grad_norm": 1.7240589633728656, "learning_rate": 5.040945764002972e-06, "loss": 0.6657, "step": 16721 }, { "epoch": 0.5125045972784111, "grad_norm": 1.7060234804966457, "learning_rate": 5.0404494626060175e-06, "loss": 0.6184, "step": 16722 }, { "epoch": 0.5125352458011524, "grad_norm": 1.7393922881269246, "learning_rate": 5.039953160810504e-06, "loss": 0.6391, "step": 16723 }, { "epoch": 0.5125658943238935, "grad_norm": 1.6003798675373873, "learning_rate": 5.039456858621322e-06, "loss": 0.6356, "step": 16724 }, { "epoch": 0.5125965428466348, "grad_norm": 1.7147772003661779, "learning_rate": 5.03896055604336e-06, "loss": 0.607, "step": 16725 }, { "epoch": 0.512627191369376, "grad_norm": 1.8800477708655865, "learning_rate": 5.038464253081511e-06, "loss": 0.7048, "step": 16726 }, { "epoch": 0.5126578398921172, "grad_norm": 1.570328128331986, "learning_rate": 5.037967949740663e-06, "loss": 0.5976, "step": 16727 }, { "epoch": 0.5126884884148584, "grad_norm": 1.5667093340572957, "learning_rate": 5.037471646025707e-06, "loss": 0.5368, "step": 16728 }, { "epoch": 0.5127191369375996, "grad_norm": 0.8216373685711106, "learning_rate": 5.0369753419415335e-06, "loss": 0.4149, "step": 16729 }, { "epoch": 0.5127497854603408, "grad_norm": 1.7618709787575615, "learning_rate": 5.036479037493034e-06, "loss": 0.5916, "step": 16730 }, { "epoch": 0.512780433983082, "grad_norm": 0.7791855760265488, "learning_rate": 5.035982732685095e-06, "loss": 0.4379, "step": 16731 }, { "epoch": 0.5128110825058232, "grad_norm": 1.8964985526247733, "learning_rate": 5.03548642752261e-06, "loss": 0.6243, "step": 16732 }, { "epoch": 0.5128417310285645, "grad_norm": 1.6017012739429086, "learning_rate": 5.034990122010468e-06, "loss": 0.5842, "step": 16733 }, { "epoch": 0.5128723795513056, "grad_norm": 1.6030317385252228, "learning_rate": 5.034493816153558e-06, "loss": 0.6113, "step": 16734 }, { "epoch": 0.5129030280740469, "grad_norm": 0.8025091368748026, "learning_rate": 5.033997509956775e-06, "loss": 0.4304, "step": 16735 }, { "epoch": 0.512933676596788, "grad_norm": 1.9685189574750683, "learning_rate": 5.0335012034250034e-06, "loss": 0.6671, "step": 16736 }, { "epoch": 0.5129643251195293, "grad_norm": 1.8793286593363046, "learning_rate": 5.033004896563136e-06, "loss": 0.7047, "step": 16737 }, { "epoch": 0.5129949736422704, "grad_norm": 1.7730555881408558, "learning_rate": 5.0325085893760625e-06, "loss": 0.7382, "step": 16738 }, { "epoch": 0.5130256221650117, "grad_norm": 0.7776382692731904, "learning_rate": 5.032012281868676e-06, "loss": 0.4465, "step": 16739 }, { "epoch": 0.5130562706877528, "grad_norm": 1.747695737361174, "learning_rate": 5.031515974045861e-06, "loss": 0.6192, "step": 16740 }, { "epoch": 0.5130869192104941, "grad_norm": 1.7639315398066115, "learning_rate": 5.031019665912513e-06, "loss": 0.6307, "step": 16741 }, { "epoch": 0.5131175677332352, "grad_norm": 1.663788343360232, "learning_rate": 5.0305233574735205e-06, "loss": 0.7053, "step": 16742 }, { "epoch": 0.5131482162559765, "grad_norm": 0.7734895196948302, "learning_rate": 5.030027048733772e-06, "loss": 0.4356, "step": 16743 }, { "epoch": 0.5131788647787177, "grad_norm": 1.7869702621406827, "learning_rate": 5.029530739698161e-06, "loss": 0.5934, "step": 16744 }, { "epoch": 0.5132095133014589, "grad_norm": 0.7795696575753962, "learning_rate": 5.029034430371574e-06, "loss": 0.4514, "step": 16745 }, { "epoch": 0.5132401618242001, "grad_norm": 1.7284357318942734, "learning_rate": 5.0285381207589055e-06, "loss": 0.6413, "step": 16746 }, { "epoch": 0.5132708103469413, "grad_norm": 1.8361409853838824, "learning_rate": 5.028041810865042e-06, "loss": 0.5938, "step": 16747 }, { "epoch": 0.5133014588696825, "grad_norm": 1.6842727699108009, "learning_rate": 5.027545500694877e-06, "loss": 0.5726, "step": 16748 }, { "epoch": 0.5133321073924236, "grad_norm": 1.7633849337403136, "learning_rate": 5.027049190253297e-06, "loss": 0.5612, "step": 16749 }, { "epoch": 0.5133627559151649, "grad_norm": 1.6891998353196989, "learning_rate": 5.026552879545197e-06, "loss": 0.6487, "step": 16750 }, { "epoch": 0.513393404437906, "grad_norm": 0.8305478455846715, "learning_rate": 5.026056568575462e-06, "loss": 0.4497, "step": 16751 }, { "epoch": 0.5134240529606473, "grad_norm": 0.8019127049567744, "learning_rate": 5.025560257348987e-06, "loss": 0.4571, "step": 16752 }, { "epoch": 0.5134547014833885, "grad_norm": 1.801382933585202, "learning_rate": 5.025063945870659e-06, "loss": 0.6042, "step": 16753 }, { "epoch": 0.5134853500061297, "grad_norm": 1.7742925247902974, "learning_rate": 5.02456763414537e-06, "loss": 0.6519, "step": 16754 }, { "epoch": 0.5135159985288709, "grad_norm": 1.6439945843920953, "learning_rate": 5.024071322178011e-06, "loss": 0.6316, "step": 16755 }, { "epoch": 0.5135466470516121, "grad_norm": 1.5722252387298017, "learning_rate": 5.0235750099734695e-06, "loss": 0.574, "step": 16756 }, { "epoch": 0.5135772955743533, "grad_norm": 1.786779758888415, "learning_rate": 5.023078697536637e-06, "loss": 0.6708, "step": 16757 }, { "epoch": 0.5136079440970945, "grad_norm": 1.7454431608550334, "learning_rate": 5.0225823848724054e-06, "loss": 0.6624, "step": 16758 }, { "epoch": 0.5136385926198357, "grad_norm": 1.778499933947951, "learning_rate": 5.022086071985663e-06, "loss": 0.6062, "step": 16759 }, { "epoch": 0.513669241142577, "grad_norm": 1.3891536891920129, "learning_rate": 5.021589758881301e-06, "loss": 0.5903, "step": 16760 }, { "epoch": 0.5136998896653181, "grad_norm": 1.902986628138991, "learning_rate": 5.02109344556421e-06, "loss": 0.664, "step": 16761 }, { "epoch": 0.5137305381880594, "grad_norm": 2.050241162056447, "learning_rate": 5.020597132039281e-06, "loss": 0.6689, "step": 16762 }, { "epoch": 0.5137611867108005, "grad_norm": 2.3819396794366017, "learning_rate": 5.020100818311402e-06, "loss": 0.6082, "step": 16763 }, { "epoch": 0.5137918352335418, "grad_norm": 1.8423839589409516, "learning_rate": 5.019604504385464e-06, "loss": 0.7397, "step": 16764 }, { "epoch": 0.5138224837562829, "grad_norm": 1.8243980028869868, "learning_rate": 5.019108190266358e-06, "loss": 0.6366, "step": 16765 }, { "epoch": 0.5138531322790242, "grad_norm": 1.8108750322953673, "learning_rate": 5.018611875958974e-06, "loss": 0.645, "step": 16766 }, { "epoch": 0.5138837808017653, "grad_norm": 1.7697368991722777, "learning_rate": 5.0181155614682045e-06, "loss": 0.6219, "step": 16767 }, { "epoch": 0.5139144293245066, "grad_norm": 1.761686672371292, "learning_rate": 5.017619246798935e-06, "loss": 0.6209, "step": 16768 }, { "epoch": 0.5139450778472477, "grad_norm": 1.5453974770303638, "learning_rate": 5.017122931956059e-06, "loss": 0.5393, "step": 16769 }, { "epoch": 0.513975726369989, "grad_norm": 1.6204529827747167, "learning_rate": 5.0166266169444675e-06, "loss": 0.6279, "step": 16770 }, { "epoch": 0.5140063748927302, "grad_norm": 1.6870014749558886, "learning_rate": 5.016130301769049e-06, "loss": 0.6161, "step": 16771 }, { "epoch": 0.5140370234154714, "grad_norm": 1.7959726108937533, "learning_rate": 5.015633986434695e-06, "loss": 0.7309, "step": 16772 }, { "epoch": 0.5140676719382126, "grad_norm": 2.0028285542927073, "learning_rate": 5.015137670946295e-06, "loss": 0.6375, "step": 16773 }, { "epoch": 0.5140983204609538, "grad_norm": 1.6635870584260324, "learning_rate": 5.014641355308739e-06, "loss": 0.6739, "step": 16774 }, { "epoch": 0.514128968983695, "grad_norm": 1.8129805843104227, "learning_rate": 5.0141450395269184e-06, "loss": 0.6619, "step": 16775 }, { "epoch": 0.5141596175064362, "grad_norm": 1.679200869892332, "learning_rate": 5.013648723605724e-06, "loss": 0.5954, "step": 16776 }, { "epoch": 0.5141902660291774, "grad_norm": 1.62374099503961, "learning_rate": 5.013152407550045e-06, "loss": 0.5057, "step": 16777 }, { "epoch": 0.5142209145519187, "grad_norm": 1.54335791604734, "learning_rate": 5.012656091364772e-06, "loss": 0.5635, "step": 16778 }, { "epoch": 0.5142515630746598, "grad_norm": 1.7022645203522246, "learning_rate": 5.012159775054793e-06, "loss": 0.5672, "step": 16779 }, { "epoch": 0.514282211597401, "grad_norm": 1.5113274834705837, "learning_rate": 5.011663458625004e-06, "loss": 0.4871, "step": 16780 }, { "epoch": 0.5143128601201422, "grad_norm": 1.689261912624616, "learning_rate": 5.01116714208029e-06, "loss": 0.5824, "step": 16781 }, { "epoch": 0.5143435086428834, "grad_norm": 1.7148732775549822, "learning_rate": 5.010670825425543e-06, "loss": 0.5885, "step": 16782 }, { "epoch": 0.5143741571656246, "grad_norm": 1.0183560186039746, "learning_rate": 5.010174508665654e-06, "loss": 0.442, "step": 16783 }, { "epoch": 0.5144048056883658, "grad_norm": 1.6365315870098895, "learning_rate": 5.009678191805514e-06, "loss": 0.6141, "step": 16784 }, { "epoch": 0.514435454211107, "grad_norm": 1.6849489133354507, "learning_rate": 5.009181874850012e-06, "loss": 0.5981, "step": 16785 }, { "epoch": 0.5144661027338482, "grad_norm": 1.8960059724476888, "learning_rate": 5.008685557804038e-06, "loss": 0.7661, "step": 16786 }, { "epoch": 0.5144967512565894, "grad_norm": 1.7700218834564643, "learning_rate": 5.008189240672483e-06, "loss": 0.6488, "step": 16787 }, { "epoch": 0.5145273997793306, "grad_norm": 1.809875651751608, "learning_rate": 5.007692923460236e-06, "loss": 0.6912, "step": 16788 }, { "epoch": 0.5145580483020719, "grad_norm": 1.703211372662847, "learning_rate": 5.007196606172192e-06, "loss": 0.6577, "step": 16789 }, { "epoch": 0.514588696824813, "grad_norm": 1.9524893827861125, "learning_rate": 5.0067002888132355e-06, "loss": 0.6462, "step": 16790 }, { "epoch": 0.5146193453475543, "grad_norm": 1.7660517450134408, "learning_rate": 5.00620397138826e-06, "loss": 0.7219, "step": 16791 }, { "epoch": 0.5146499938702954, "grad_norm": 1.65300778228227, "learning_rate": 5.005707653902155e-06, "loss": 0.5249, "step": 16792 }, { "epoch": 0.5146806423930367, "grad_norm": 1.559381592785846, "learning_rate": 5.005211336359812e-06, "loss": 0.5726, "step": 16793 }, { "epoch": 0.5147112909157778, "grad_norm": 1.68856451024282, "learning_rate": 5.004715018766119e-06, "loss": 0.6416, "step": 16794 }, { "epoch": 0.5147419394385191, "grad_norm": 1.5141813358205498, "learning_rate": 5.004218701125968e-06, "loss": 0.5836, "step": 16795 }, { "epoch": 0.5147725879612602, "grad_norm": 1.7515316985033822, "learning_rate": 5.00372238344425e-06, "loss": 0.5466, "step": 16796 }, { "epoch": 0.5148032364840015, "grad_norm": 1.6442378079429838, "learning_rate": 5.003226065725853e-06, "loss": 0.677, "step": 16797 }, { "epoch": 0.5148338850067427, "grad_norm": 1.6262850690280355, "learning_rate": 5.00272974797567e-06, "loss": 0.5338, "step": 16798 }, { "epoch": 0.5148645335294839, "grad_norm": 1.7228265883193716, "learning_rate": 5.002233430198588e-06, "loss": 0.6744, "step": 16799 }, { "epoch": 0.5148951820522251, "grad_norm": 1.9657140393862493, "learning_rate": 5.001737112399503e-06, "loss": 0.639, "step": 16800 }, { "epoch": 0.5149258305749663, "grad_norm": 1.8081693710701554, "learning_rate": 5.001240794583298e-06, "loss": 0.6028, "step": 16801 }, { "epoch": 0.5149564790977075, "grad_norm": 1.7444426038881085, "learning_rate": 5.000744476754871e-06, "loss": 0.7551, "step": 16802 }, { "epoch": 0.5149871276204487, "grad_norm": 1.7071269093820365, "learning_rate": 5.000248158919106e-06, "loss": 0.5789, "step": 16803 }, { "epoch": 0.5150177761431899, "grad_norm": 1.7751548575123934, "learning_rate": 4.999751841080895e-06, "loss": 0.66, "step": 16804 }, { "epoch": 0.5150484246659311, "grad_norm": 1.7560833557342863, "learning_rate": 4.999255523245132e-06, "loss": 0.7171, "step": 16805 }, { "epoch": 0.5150790731886723, "grad_norm": 1.6672691272845472, "learning_rate": 4.9987592054167026e-06, "loss": 0.7323, "step": 16806 }, { "epoch": 0.5151097217114136, "grad_norm": 1.7449511989401012, "learning_rate": 4.998262887600499e-06, "loss": 0.5476, "step": 16807 }, { "epoch": 0.5151403702341547, "grad_norm": 1.7780887856705785, "learning_rate": 4.997766569801413e-06, "loss": 0.6577, "step": 16808 }, { "epoch": 0.515171018756896, "grad_norm": 1.7192919721004363, "learning_rate": 4.997270252024331e-06, "loss": 0.6771, "step": 16809 }, { "epoch": 0.5152016672796371, "grad_norm": 1.7172850620798432, "learning_rate": 4.996773934274148e-06, "loss": 0.5776, "step": 16810 }, { "epoch": 0.5152323158023783, "grad_norm": 1.8283193483055933, "learning_rate": 4.996277616555753e-06, "loss": 0.6851, "step": 16811 }, { "epoch": 0.5152629643251195, "grad_norm": 1.0189498377245694, "learning_rate": 4.995781298874033e-06, "loss": 0.4453, "step": 16812 }, { "epoch": 0.5152936128478607, "grad_norm": 1.6152804703757748, "learning_rate": 4.995284981233883e-06, "loss": 0.6511, "step": 16813 }, { "epoch": 0.5153242613706019, "grad_norm": 1.8861994538611366, "learning_rate": 4.99478866364019e-06, "loss": 0.6149, "step": 16814 }, { "epoch": 0.5153549098933431, "grad_norm": 1.819535403584399, "learning_rate": 4.994292346097846e-06, "loss": 0.615, "step": 16815 }, { "epoch": 0.5153855584160844, "grad_norm": 1.5750288868742364, "learning_rate": 4.9937960286117415e-06, "loss": 0.5871, "step": 16816 }, { "epoch": 0.5154162069388255, "grad_norm": 1.8377756384464083, "learning_rate": 4.993299711186768e-06, "loss": 0.5545, "step": 16817 }, { "epoch": 0.5154468554615668, "grad_norm": 1.8124052726149937, "learning_rate": 4.99280339382781e-06, "loss": 0.5864, "step": 16818 }, { "epoch": 0.5154775039843079, "grad_norm": 1.732338704741199, "learning_rate": 4.992307076539765e-06, "loss": 0.6094, "step": 16819 }, { "epoch": 0.5155081525070492, "grad_norm": 1.7556154467995984, "learning_rate": 4.991810759327518e-06, "loss": 0.6754, "step": 16820 }, { "epoch": 0.5155388010297903, "grad_norm": 0.7959342686898984, "learning_rate": 4.991314442195964e-06, "loss": 0.4377, "step": 16821 }, { "epoch": 0.5155694495525316, "grad_norm": 1.5335239143502757, "learning_rate": 4.990818125149991e-06, "loss": 0.5769, "step": 16822 }, { "epoch": 0.5156000980752727, "grad_norm": 0.8015793539696515, "learning_rate": 4.990321808194488e-06, "loss": 0.4379, "step": 16823 }, { "epoch": 0.515630746598014, "grad_norm": 1.4284303128274503, "learning_rate": 4.989825491334347e-06, "loss": 0.605, "step": 16824 }, { "epoch": 0.5156613951207552, "grad_norm": 0.7520845987203412, "learning_rate": 4.9893291745744586e-06, "loss": 0.4497, "step": 16825 }, { "epoch": 0.5156920436434964, "grad_norm": 1.8450896569542177, "learning_rate": 4.988832857919711e-06, "loss": 0.6665, "step": 16826 }, { "epoch": 0.5157226921662376, "grad_norm": 1.7938764480515146, "learning_rate": 4.988336541374998e-06, "loss": 0.5979, "step": 16827 }, { "epoch": 0.5157533406889788, "grad_norm": 0.7555575575177971, "learning_rate": 4.987840224945207e-06, "loss": 0.4367, "step": 16828 }, { "epoch": 0.51578398921172, "grad_norm": 1.8741092962466228, "learning_rate": 4.987343908635231e-06, "loss": 0.6315, "step": 16829 }, { "epoch": 0.5158146377344612, "grad_norm": 1.801666603470692, "learning_rate": 4.986847592449958e-06, "loss": 0.735, "step": 16830 }, { "epoch": 0.5158452862572024, "grad_norm": 1.8027728559211564, "learning_rate": 4.986351276394277e-06, "loss": 0.6628, "step": 16831 }, { "epoch": 0.5158759347799436, "grad_norm": 1.7915078770641693, "learning_rate": 4.985854960473083e-06, "loss": 0.7225, "step": 16832 }, { "epoch": 0.5159065833026848, "grad_norm": 1.6470047449399408, "learning_rate": 4.985358644691263e-06, "loss": 0.6252, "step": 16833 }, { "epoch": 0.5159372318254261, "grad_norm": 1.8579488701753963, "learning_rate": 4.9848623290537065e-06, "loss": 0.7197, "step": 16834 }, { "epoch": 0.5159678803481672, "grad_norm": 1.8920835331385712, "learning_rate": 4.984366013565307e-06, "loss": 0.6769, "step": 16835 }, { "epoch": 0.5159985288709085, "grad_norm": 2.031154996380277, "learning_rate": 4.983869698230952e-06, "loss": 0.6689, "step": 16836 }, { "epoch": 0.5160291773936496, "grad_norm": 1.637382306297846, "learning_rate": 4.983373383055533e-06, "loss": 0.6417, "step": 16837 }, { "epoch": 0.5160598259163909, "grad_norm": 1.7483302521249937, "learning_rate": 4.982877068043942e-06, "loss": 0.6602, "step": 16838 }, { "epoch": 0.516090474439132, "grad_norm": 1.8823956933649746, "learning_rate": 4.982380753201066e-06, "loss": 0.5932, "step": 16839 }, { "epoch": 0.5161211229618733, "grad_norm": 1.5222710297466922, "learning_rate": 4.981884438531798e-06, "loss": 0.6266, "step": 16840 }, { "epoch": 0.5161517714846144, "grad_norm": 1.6655458403791918, "learning_rate": 4.981388124041028e-06, "loss": 0.5854, "step": 16841 }, { "epoch": 0.5161824200073556, "grad_norm": 1.587456151792444, "learning_rate": 4.980891809733643e-06, "loss": 0.6538, "step": 16842 }, { "epoch": 0.5162130685300969, "grad_norm": 0.8365586305731375, "learning_rate": 4.980395495614538e-06, "loss": 0.4312, "step": 16843 }, { "epoch": 0.516243717052838, "grad_norm": 0.8378747979895028, "learning_rate": 4.9798991816886e-06, "loss": 0.4269, "step": 16844 }, { "epoch": 0.5162743655755793, "grad_norm": 1.4963144562793105, "learning_rate": 4.979402867960721e-06, "loss": 0.6119, "step": 16845 }, { "epoch": 0.5163050140983204, "grad_norm": 1.544742380112405, "learning_rate": 4.978906554435791e-06, "loss": 0.5714, "step": 16846 }, { "epoch": 0.5163356626210617, "grad_norm": 1.7100793363169235, "learning_rate": 4.978410241118699e-06, "loss": 0.6495, "step": 16847 }, { "epoch": 0.5163663111438028, "grad_norm": 1.7089078329816965, "learning_rate": 4.977913928014338e-06, "loss": 0.6483, "step": 16848 }, { "epoch": 0.5163969596665441, "grad_norm": 0.8192593219844878, "learning_rate": 4.977417615127596e-06, "loss": 0.4313, "step": 16849 }, { "epoch": 0.5164276081892852, "grad_norm": 0.8234860721617654, "learning_rate": 4.976921302463364e-06, "loss": 0.4559, "step": 16850 }, { "epoch": 0.5164582567120265, "grad_norm": 1.7815085624780902, "learning_rate": 4.976424990026532e-06, "loss": 0.5876, "step": 16851 }, { "epoch": 0.5164889052347676, "grad_norm": 0.7767211468647158, "learning_rate": 4.975928677821992e-06, "loss": 0.4457, "step": 16852 }, { "epoch": 0.5165195537575089, "grad_norm": 1.7831188491767305, "learning_rate": 4.975432365854631e-06, "loss": 0.7124, "step": 16853 }, { "epoch": 0.5165502022802501, "grad_norm": 1.6061108144902712, "learning_rate": 4.974936054129343e-06, "loss": 0.6722, "step": 16854 }, { "epoch": 0.5165808508029913, "grad_norm": 1.659981202209019, "learning_rate": 4.974439742651014e-06, "loss": 0.5711, "step": 16855 }, { "epoch": 0.5166114993257325, "grad_norm": 1.6772849713898936, "learning_rate": 4.9739434314245385e-06, "loss": 0.5922, "step": 16856 }, { "epoch": 0.5166421478484737, "grad_norm": 1.662060810709716, "learning_rate": 4.973447120454805e-06, "loss": 0.6788, "step": 16857 }, { "epoch": 0.5166727963712149, "grad_norm": 1.5650362078382387, "learning_rate": 4.972950809746703e-06, "loss": 0.6421, "step": 16858 }, { "epoch": 0.5167034448939561, "grad_norm": 0.8022992644019968, "learning_rate": 4.972454499305125e-06, "loss": 0.4307, "step": 16859 }, { "epoch": 0.5167340934166973, "grad_norm": 1.6773647391912567, "learning_rate": 4.97195818913496e-06, "loss": 0.6837, "step": 16860 }, { "epoch": 0.5167647419394386, "grad_norm": 1.6034791655845648, "learning_rate": 4.971461879241095e-06, "loss": 0.5688, "step": 16861 }, { "epoch": 0.5167953904621797, "grad_norm": 1.5726923290527615, "learning_rate": 4.970965569628428e-06, "loss": 0.5897, "step": 16862 }, { "epoch": 0.516826038984921, "grad_norm": 1.812984816911528, "learning_rate": 4.97046926030184e-06, "loss": 0.6265, "step": 16863 }, { "epoch": 0.5168566875076621, "grad_norm": 1.727578524569593, "learning_rate": 4.96997295126623e-06, "loss": 0.6233, "step": 16864 }, { "epoch": 0.5168873360304034, "grad_norm": 1.7077181817945997, "learning_rate": 4.969476642526482e-06, "loss": 0.5832, "step": 16865 }, { "epoch": 0.5169179845531445, "grad_norm": 1.7269807959390826, "learning_rate": 4.9689803340874886e-06, "loss": 0.6379, "step": 16866 }, { "epoch": 0.5169486330758858, "grad_norm": 1.6509495396775626, "learning_rate": 4.96848402595414e-06, "loss": 0.5856, "step": 16867 }, { "epoch": 0.5169792815986269, "grad_norm": 1.8038852233091276, "learning_rate": 4.967987718131327e-06, "loss": 0.6254, "step": 16868 }, { "epoch": 0.5170099301213682, "grad_norm": 0.8224450160205817, "learning_rate": 4.967491410623938e-06, "loss": 0.4452, "step": 16869 }, { "epoch": 0.5170405786441093, "grad_norm": 2.0687276428165706, "learning_rate": 4.9669951034368655e-06, "loss": 0.6292, "step": 16870 }, { "epoch": 0.5170712271668506, "grad_norm": 0.8373893536023814, "learning_rate": 4.966498796575e-06, "loss": 0.449, "step": 16871 }, { "epoch": 0.5171018756895918, "grad_norm": 1.7646238809920483, "learning_rate": 4.9660024900432265e-06, "loss": 0.6846, "step": 16872 }, { "epoch": 0.5171325242123329, "grad_norm": 1.7595468331045838, "learning_rate": 4.965506183846443e-06, "loss": 0.632, "step": 16873 }, { "epoch": 0.5171631727350742, "grad_norm": 1.7567077188357234, "learning_rate": 4.965009877989532e-06, "loss": 0.6725, "step": 16874 }, { "epoch": 0.5171938212578153, "grad_norm": 1.7553916548883073, "learning_rate": 4.964513572477392e-06, "loss": 0.6497, "step": 16875 }, { "epoch": 0.5172244697805566, "grad_norm": 1.5977031664152423, "learning_rate": 4.964017267314907e-06, "loss": 0.6862, "step": 16876 }, { "epoch": 0.5172551183032977, "grad_norm": 1.7547623536841053, "learning_rate": 4.963520962506968e-06, "loss": 0.6293, "step": 16877 }, { "epoch": 0.517285766826039, "grad_norm": 2.0182395712863226, "learning_rate": 4.963024658058467e-06, "loss": 0.6764, "step": 16878 }, { "epoch": 0.5173164153487801, "grad_norm": 1.9477659869900796, "learning_rate": 4.962528353974295e-06, "loss": 0.6247, "step": 16879 }, { "epoch": 0.5173470638715214, "grad_norm": 0.7731345170515894, "learning_rate": 4.962032050259339e-06, "loss": 0.4101, "step": 16880 }, { "epoch": 0.5173777123942626, "grad_norm": 0.8173393629135701, "learning_rate": 4.961535746918491e-06, "loss": 0.4549, "step": 16881 }, { "epoch": 0.5174083609170038, "grad_norm": 1.7283787025622366, "learning_rate": 4.96103944395664e-06, "loss": 0.593, "step": 16882 }, { "epoch": 0.517439009439745, "grad_norm": 1.6489820067584284, "learning_rate": 4.9605431413786795e-06, "loss": 0.5761, "step": 16883 }, { "epoch": 0.5174696579624862, "grad_norm": 1.8780766887381897, "learning_rate": 4.960046839189498e-06, "loss": 0.7073, "step": 16884 }, { "epoch": 0.5175003064852274, "grad_norm": 1.8268830231731863, "learning_rate": 4.9595505373939825e-06, "loss": 0.5998, "step": 16885 }, { "epoch": 0.5175309550079686, "grad_norm": 1.8718339580678296, "learning_rate": 4.959054235997029e-06, "loss": 0.661, "step": 16886 }, { "epoch": 0.5175616035307098, "grad_norm": 1.8193711475529633, "learning_rate": 4.958557935003523e-06, "loss": 0.6248, "step": 16887 }, { "epoch": 0.517592252053451, "grad_norm": 0.9160463213104721, "learning_rate": 4.958061634418356e-06, "loss": 0.4496, "step": 16888 }, { "epoch": 0.5176229005761922, "grad_norm": 2.206094660782556, "learning_rate": 4.957565334246418e-06, "loss": 0.7393, "step": 16889 }, { "epoch": 0.5176535490989335, "grad_norm": 1.7636842820370255, "learning_rate": 4.9570690344926e-06, "loss": 0.6658, "step": 16890 }, { "epoch": 0.5176841976216746, "grad_norm": 1.5810495431341802, "learning_rate": 4.956572735161793e-06, "loss": 0.7259, "step": 16891 }, { "epoch": 0.5177148461444159, "grad_norm": 1.6410053767799577, "learning_rate": 4.956076436258885e-06, "loss": 0.706, "step": 16892 }, { "epoch": 0.517745494667157, "grad_norm": 1.73543764158164, "learning_rate": 4.955580137788766e-06, "loss": 0.7492, "step": 16893 }, { "epoch": 0.5177761431898983, "grad_norm": 1.690570419899828, "learning_rate": 4.95508383975633e-06, "loss": 0.6748, "step": 16894 }, { "epoch": 0.5178067917126394, "grad_norm": 1.584928422208863, "learning_rate": 4.954587542166464e-06, "loss": 0.6054, "step": 16895 }, { "epoch": 0.5178374402353807, "grad_norm": 1.742615524881434, "learning_rate": 4.954091245024055e-06, "loss": 0.6387, "step": 16896 }, { "epoch": 0.5178680887581218, "grad_norm": 1.8683364985266493, "learning_rate": 4.9535949483340005e-06, "loss": 0.6637, "step": 16897 }, { "epoch": 0.5178987372808631, "grad_norm": 1.6361374887693365, "learning_rate": 4.9530986521011855e-06, "loss": 0.6355, "step": 16898 }, { "epoch": 0.5179293858036043, "grad_norm": 1.7084640038431604, "learning_rate": 4.952602356330501e-06, "loss": 0.6597, "step": 16899 }, { "epoch": 0.5179600343263455, "grad_norm": 1.8876326107192634, "learning_rate": 4.9521060610268385e-06, "loss": 0.7178, "step": 16900 }, { "epoch": 0.5179906828490867, "grad_norm": 1.8505098798079178, "learning_rate": 4.951609766195086e-06, "loss": 0.6858, "step": 16901 }, { "epoch": 0.5180213313718279, "grad_norm": 1.7511575811722757, "learning_rate": 4.951113471840136e-06, "loss": 0.6196, "step": 16902 }, { "epoch": 0.5180519798945691, "grad_norm": 1.56039059983696, "learning_rate": 4.9506171779668776e-06, "loss": 0.542, "step": 16903 }, { "epoch": 0.5180826284173102, "grad_norm": 0.7565857055906011, "learning_rate": 4.9501208845802e-06, "loss": 0.4416, "step": 16904 }, { "epoch": 0.5181132769400515, "grad_norm": 1.6568503807694643, "learning_rate": 4.9496245916849955e-06, "loss": 0.6379, "step": 16905 }, { "epoch": 0.5181439254627926, "grad_norm": 0.7775028818185872, "learning_rate": 4.9491282992861515e-06, "loss": 0.4116, "step": 16906 }, { "epoch": 0.5181745739855339, "grad_norm": 1.8915640781701175, "learning_rate": 4.948632007388559e-06, "loss": 0.6079, "step": 16907 }, { "epoch": 0.518205222508275, "grad_norm": 1.575382459207478, "learning_rate": 4.9481357159971096e-06, "loss": 0.5967, "step": 16908 }, { "epoch": 0.5182358710310163, "grad_norm": 1.5512602824602462, "learning_rate": 4.947639425116691e-06, "loss": 0.6122, "step": 16909 }, { "epoch": 0.5182665195537575, "grad_norm": 0.8090771895309409, "learning_rate": 4.947143134752195e-06, "loss": 0.4301, "step": 16910 }, { "epoch": 0.5182971680764987, "grad_norm": 1.6695598890951422, "learning_rate": 4.946646844908513e-06, "loss": 0.6512, "step": 16911 }, { "epoch": 0.5183278165992399, "grad_norm": 1.7518357944635403, "learning_rate": 4.946150555590531e-06, "loss": 0.6756, "step": 16912 }, { "epoch": 0.5183584651219811, "grad_norm": 1.630818011692094, "learning_rate": 4.945654266803141e-06, "loss": 0.6518, "step": 16913 }, { "epoch": 0.5183891136447223, "grad_norm": 1.7267739746688098, "learning_rate": 4.945157978551237e-06, "loss": 0.6191, "step": 16914 }, { "epoch": 0.5184197621674635, "grad_norm": 1.4915259279929538, "learning_rate": 4.944661690839701e-06, "loss": 0.5889, "step": 16915 }, { "epoch": 0.5184504106902047, "grad_norm": 1.9399412926028674, "learning_rate": 4.944165403673431e-06, "loss": 0.6273, "step": 16916 }, { "epoch": 0.518481059212946, "grad_norm": 1.8190302128303675, "learning_rate": 4.94366911705731e-06, "loss": 0.5623, "step": 16917 }, { "epoch": 0.5185117077356871, "grad_norm": 1.4882206573126098, "learning_rate": 4.9431728309962355e-06, "loss": 0.5568, "step": 16918 }, { "epoch": 0.5185423562584284, "grad_norm": 1.8542676601905688, "learning_rate": 4.942676545495092e-06, "loss": 0.5801, "step": 16919 }, { "epoch": 0.5185730047811695, "grad_norm": 1.880383958160186, "learning_rate": 4.94218026055877e-06, "loss": 0.7101, "step": 16920 }, { "epoch": 0.5186036533039108, "grad_norm": 1.7412675829916018, "learning_rate": 4.941683976192162e-06, "loss": 0.6432, "step": 16921 }, { "epoch": 0.5186343018266519, "grad_norm": 2.1247090269854283, "learning_rate": 4.941187692400157e-06, "loss": 0.6419, "step": 16922 }, { "epoch": 0.5186649503493932, "grad_norm": 1.917376636341976, "learning_rate": 4.940691409187643e-06, "loss": 0.7008, "step": 16923 }, { "epoch": 0.5186955988721343, "grad_norm": 1.8857827823152158, "learning_rate": 4.940195126559514e-06, "loss": 0.5912, "step": 16924 }, { "epoch": 0.5187262473948756, "grad_norm": 1.7134333506264554, "learning_rate": 4.9396988445206575e-06, "loss": 0.7135, "step": 16925 }, { "epoch": 0.5187568959176168, "grad_norm": 1.8922396565414825, "learning_rate": 4.93920256307596e-06, "loss": 0.6563, "step": 16926 }, { "epoch": 0.518787544440358, "grad_norm": 1.5709854663019318, "learning_rate": 4.93870628223032e-06, "loss": 0.5601, "step": 16927 }, { "epoch": 0.5188181929630992, "grad_norm": 1.6681457693578687, "learning_rate": 4.9382100019886185e-06, "loss": 0.6474, "step": 16928 }, { "epoch": 0.5188488414858404, "grad_norm": 1.7160266653960097, "learning_rate": 4.937713722355754e-06, "loss": 0.629, "step": 16929 }, { "epoch": 0.5188794900085816, "grad_norm": 1.4948352903625366, "learning_rate": 4.937217443336609e-06, "loss": 0.568, "step": 16930 }, { "epoch": 0.5189101385313228, "grad_norm": 1.5824361903753192, "learning_rate": 4.936721164936077e-06, "loss": 0.5792, "step": 16931 }, { "epoch": 0.518940787054064, "grad_norm": 1.6363847116072543, "learning_rate": 4.936224887159049e-06, "loss": 0.6641, "step": 16932 }, { "epoch": 0.5189714355768053, "grad_norm": 1.8941626796456101, "learning_rate": 4.935728610010412e-06, "loss": 0.6981, "step": 16933 }, { "epoch": 0.5190020840995464, "grad_norm": 1.8106200992892043, "learning_rate": 4.935232333495058e-06, "loss": 0.6548, "step": 16934 }, { "epoch": 0.5190327326222876, "grad_norm": 1.7149286104811057, "learning_rate": 4.934736057617877e-06, "loss": 0.6713, "step": 16935 }, { "epoch": 0.5190633811450288, "grad_norm": 1.7727897033304172, "learning_rate": 4.9342397823837565e-06, "loss": 0.583, "step": 16936 }, { "epoch": 0.51909402966777, "grad_norm": 1.46052384846951, "learning_rate": 4.93374350779759e-06, "loss": 0.5636, "step": 16937 }, { "epoch": 0.5191246781905112, "grad_norm": 1.852587043371929, "learning_rate": 4.933247233864267e-06, "loss": 0.6641, "step": 16938 }, { "epoch": 0.5191553267132524, "grad_norm": 1.8671484940741503, "learning_rate": 4.932750960588672e-06, "loss": 0.6135, "step": 16939 }, { "epoch": 0.5191859752359936, "grad_norm": 1.640900123135185, "learning_rate": 4.932254687975703e-06, "loss": 0.706, "step": 16940 }, { "epoch": 0.5192166237587348, "grad_norm": 1.62184115191935, "learning_rate": 4.931758416030244e-06, "loss": 0.633, "step": 16941 }, { "epoch": 0.519247272281476, "grad_norm": 1.7824767646102269, "learning_rate": 4.931262144757186e-06, "loss": 0.6946, "step": 16942 }, { "epoch": 0.5192779208042172, "grad_norm": 1.6550657027367572, "learning_rate": 4.9307658741614216e-06, "loss": 0.5596, "step": 16943 }, { "epoch": 0.5193085693269585, "grad_norm": 1.5243600206309897, "learning_rate": 4.930269604247836e-06, "loss": 0.6192, "step": 16944 }, { "epoch": 0.5193392178496996, "grad_norm": 0.8933675509720981, "learning_rate": 4.929773335021324e-06, "loss": 0.4382, "step": 16945 }, { "epoch": 0.5193698663724409, "grad_norm": 1.7220643861082068, "learning_rate": 4.929277066486774e-06, "loss": 0.7221, "step": 16946 }, { "epoch": 0.519400514895182, "grad_norm": 1.7715419130537096, "learning_rate": 4.928780798649073e-06, "loss": 0.6519, "step": 16947 }, { "epoch": 0.5194311634179233, "grad_norm": 1.7754084625604056, "learning_rate": 4.928284531513114e-06, "loss": 0.6567, "step": 16948 }, { "epoch": 0.5194618119406644, "grad_norm": 1.778344230284368, "learning_rate": 4.927788265083787e-06, "loss": 0.6392, "step": 16949 }, { "epoch": 0.5194924604634057, "grad_norm": 1.7437840332755496, "learning_rate": 4.927291999365979e-06, "loss": 0.7318, "step": 16950 }, { "epoch": 0.5195231089861468, "grad_norm": 1.6209376045306012, "learning_rate": 4.9267957343645824e-06, "loss": 0.6388, "step": 16951 }, { "epoch": 0.5195537575088881, "grad_norm": 1.6743395605666906, "learning_rate": 4.926299470084486e-06, "loss": 0.569, "step": 16952 }, { "epoch": 0.5195844060316293, "grad_norm": 1.6902294358063024, "learning_rate": 4.925803206530579e-06, "loss": 0.6388, "step": 16953 }, { "epoch": 0.5196150545543705, "grad_norm": 1.6380549542209486, "learning_rate": 4.925306943707753e-06, "loss": 0.6653, "step": 16954 }, { "epoch": 0.5196457030771117, "grad_norm": 1.7777642997850966, "learning_rate": 4.924810681620896e-06, "loss": 0.6937, "step": 16955 }, { "epoch": 0.5196763515998529, "grad_norm": 1.646764569098974, "learning_rate": 4.924314420274899e-06, "loss": 0.606, "step": 16956 }, { "epoch": 0.5197070001225941, "grad_norm": 1.784709998098849, "learning_rate": 4.923818159674653e-06, "loss": 0.5884, "step": 16957 }, { "epoch": 0.5197376486453353, "grad_norm": 0.8406116974917864, "learning_rate": 4.923321899825043e-06, "loss": 0.4326, "step": 16958 }, { "epoch": 0.5197682971680765, "grad_norm": 1.8895004000180284, "learning_rate": 4.922825640730965e-06, "loss": 0.7034, "step": 16959 }, { "epoch": 0.5197989456908177, "grad_norm": 0.8105089317301916, "learning_rate": 4.9223293823973045e-06, "loss": 0.4327, "step": 16960 }, { "epoch": 0.5198295942135589, "grad_norm": 1.7706559234180637, "learning_rate": 4.921833124828952e-06, "loss": 0.6451, "step": 16961 }, { "epoch": 0.5198602427363002, "grad_norm": 1.6992840258529105, "learning_rate": 4.921336868030797e-06, "loss": 0.6574, "step": 16962 }, { "epoch": 0.5198908912590413, "grad_norm": 1.6238294855884947, "learning_rate": 4.920840612007731e-06, "loss": 0.6546, "step": 16963 }, { "epoch": 0.5199215397817826, "grad_norm": 1.6439756537366395, "learning_rate": 4.9203443567646434e-06, "loss": 0.6158, "step": 16964 }, { "epoch": 0.5199521883045237, "grad_norm": 1.6955385320353187, "learning_rate": 4.919848102306422e-06, "loss": 0.5116, "step": 16965 }, { "epoch": 0.5199828368272649, "grad_norm": 1.7910006501872733, "learning_rate": 4.919351848637958e-06, "loss": 0.6458, "step": 16966 }, { "epoch": 0.5200134853500061, "grad_norm": 1.7997525628018416, "learning_rate": 4.918855595764141e-06, "loss": 0.6301, "step": 16967 }, { "epoch": 0.5200441338727473, "grad_norm": 1.6865933205455803, "learning_rate": 4.9183593436898615e-06, "loss": 0.6644, "step": 16968 }, { "epoch": 0.5200747823954885, "grad_norm": 2.4812009815670195, "learning_rate": 4.917863092420006e-06, "loss": 0.6819, "step": 16969 }, { "epoch": 0.5201054309182297, "grad_norm": 1.6583746967497688, "learning_rate": 4.91736684195947e-06, "loss": 0.5759, "step": 16970 }, { "epoch": 0.520136079440971, "grad_norm": 1.7031935961048175, "learning_rate": 4.916870592313135e-06, "loss": 0.7058, "step": 16971 }, { "epoch": 0.5201667279637121, "grad_norm": 1.3827902970051071, "learning_rate": 4.9163743434858994e-06, "loss": 0.6677, "step": 16972 }, { "epoch": 0.5201973764864534, "grad_norm": 1.8109147973588795, "learning_rate": 4.9158780954826475e-06, "loss": 0.5792, "step": 16973 }, { "epoch": 0.5202280250091945, "grad_norm": 1.8531461981848825, "learning_rate": 4.915381848308269e-06, "loss": 0.6117, "step": 16974 }, { "epoch": 0.5202586735319358, "grad_norm": 1.6373835147927698, "learning_rate": 4.914885601967656e-06, "loss": 0.6056, "step": 16975 }, { "epoch": 0.5202893220546769, "grad_norm": 1.5922841190959631, "learning_rate": 4.914389356465698e-06, "loss": 0.6572, "step": 16976 }, { "epoch": 0.5203199705774182, "grad_norm": 1.7524763948035587, "learning_rate": 4.913893111807281e-06, "loss": 0.6447, "step": 16977 }, { "epoch": 0.5203506191001593, "grad_norm": 1.9102653946138992, "learning_rate": 4.913396867997299e-06, "loss": 0.627, "step": 16978 }, { "epoch": 0.5203812676229006, "grad_norm": 1.5481181945376665, "learning_rate": 4.912900625040641e-06, "loss": 0.6171, "step": 16979 }, { "epoch": 0.5204119161456418, "grad_norm": 1.7976346910196521, "learning_rate": 4.912404382942191e-06, "loss": 0.5742, "step": 16980 }, { "epoch": 0.520442564668383, "grad_norm": 1.06725352017923, "learning_rate": 4.9119081417068474e-06, "loss": 0.4324, "step": 16981 }, { "epoch": 0.5204732131911242, "grad_norm": 0.9676789237873205, "learning_rate": 4.9114119013394925e-06, "loss": 0.4332, "step": 16982 }, { "epoch": 0.5205038617138654, "grad_norm": 1.875049387779561, "learning_rate": 4.910915661845021e-06, "loss": 0.5978, "step": 16983 }, { "epoch": 0.5205345102366066, "grad_norm": 1.6341496827638953, "learning_rate": 4.91041942322832e-06, "loss": 0.5874, "step": 16984 }, { "epoch": 0.5205651587593478, "grad_norm": 1.7936292576128097, "learning_rate": 4.909923185494278e-06, "loss": 0.6438, "step": 16985 }, { "epoch": 0.520595807282089, "grad_norm": 1.9894908650925682, "learning_rate": 4.909426948647787e-06, "loss": 0.6055, "step": 16986 }, { "epoch": 0.5206264558048302, "grad_norm": 1.9210438657612634, "learning_rate": 4.908930712693735e-06, "loss": 0.6723, "step": 16987 }, { "epoch": 0.5206571043275714, "grad_norm": 1.7273503041623284, "learning_rate": 4.908434477637011e-06, "loss": 0.6371, "step": 16988 }, { "epoch": 0.5206877528503127, "grad_norm": 1.6981500377548027, "learning_rate": 4.907938243482507e-06, "loss": 0.6735, "step": 16989 }, { "epoch": 0.5207184013730538, "grad_norm": 1.6974214406103398, "learning_rate": 4.90744201023511e-06, "loss": 0.6279, "step": 16990 }, { "epoch": 0.5207490498957951, "grad_norm": 1.924236073083775, "learning_rate": 4.906945777899712e-06, "loss": 0.683, "step": 16991 }, { "epoch": 0.5207796984185362, "grad_norm": 1.7998773463865687, "learning_rate": 4.906449546481201e-06, "loss": 0.598, "step": 16992 }, { "epoch": 0.5208103469412775, "grad_norm": 1.8012896246732764, "learning_rate": 4.905953315984465e-06, "loss": 0.6499, "step": 16993 }, { "epoch": 0.5208409954640186, "grad_norm": 1.70209347177317, "learning_rate": 4.905457086414397e-06, "loss": 0.6835, "step": 16994 }, { "epoch": 0.5208716439867599, "grad_norm": 1.172476393120044, "learning_rate": 4.904960857775883e-06, "loss": 0.4429, "step": 16995 }, { "epoch": 0.520902292509501, "grad_norm": 1.8146425099183057, "learning_rate": 4.904464630073814e-06, "loss": 0.6476, "step": 16996 }, { "epoch": 0.5209329410322422, "grad_norm": 1.807797119539539, "learning_rate": 4.90396840331308e-06, "loss": 0.6071, "step": 16997 }, { "epoch": 0.5209635895549835, "grad_norm": 1.7186103281517107, "learning_rate": 4.90347217749857e-06, "loss": 0.5784, "step": 16998 }, { "epoch": 0.5209942380777246, "grad_norm": 1.658683011160559, "learning_rate": 4.9029759526351726e-06, "loss": 0.6516, "step": 16999 }, { "epoch": 0.5210248866004659, "grad_norm": 1.7330237133058386, "learning_rate": 4.902479728727778e-06, "loss": 0.6084, "step": 17000 }, { "epoch": 0.521055535123207, "grad_norm": 0.7965315745129088, "learning_rate": 4.901983505781276e-06, "loss": 0.4459, "step": 17001 }, { "epoch": 0.5210861836459483, "grad_norm": 1.8523551376340945, "learning_rate": 4.901487283800556e-06, "loss": 0.5662, "step": 17002 }, { "epoch": 0.5211168321686894, "grad_norm": 1.7862190517852343, "learning_rate": 4.900991062790507e-06, "loss": 0.6753, "step": 17003 }, { "epoch": 0.5211474806914307, "grad_norm": 1.5883991100988089, "learning_rate": 4.900494842756017e-06, "loss": 0.6585, "step": 17004 }, { "epoch": 0.5211781292141718, "grad_norm": 1.586064625567306, "learning_rate": 4.899998623701979e-06, "loss": 0.5331, "step": 17005 }, { "epoch": 0.5212087777369131, "grad_norm": 1.8280799795098563, "learning_rate": 4.899502405633279e-06, "loss": 0.6215, "step": 17006 }, { "epoch": 0.5212394262596542, "grad_norm": 2.1704948815682674, "learning_rate": 4.899006188554807e-06, "loss": 0.6351, "step": 17007 }, { "epoch": 0.5212700747823955, "grad_norm": 1.7574471722911922, "learning_rate": 4.898509972471453e-06, "loss": 0.5806, "step": 17008 }, { "epoch": 0.5213007233051367, "grad_norm": 1.765858404344238, "learning_rate": 4.898013757388106e-06, "loss": 0.6428, "step": 17009 }, { "epoch": 0.5213313718278779, "grad_norm": 0.871862504518672, "learning_rate": 4.897517543309656e-06, "loss": 0.439, "step": 17010 }, { "epoch": 0.5213620203506191, "grad_norm": 1.7067487874332927, "learning_rate": 4.897021330240993e-06, "loss": 0.6182, "step": 17011 }, { "epoch": 0.5213926688733603, "grad_norm": 1.9188754999508932, "learning_rate": 4.896525118187002e-06, "loss": 0.6375, "step": 17012 }, { "epoch": 0.5214233173961015, "grad_norm": 1.6338089479838578, "learning_rate": 4.896028907152579e-06, "loss": 0.539, "step": 17013 }, { "epoch": 0.5214539659188427, "grad_norm": 1.7051061908923555, "learning_rate": 4.8955326971426085e-06, "loss": 0.6169, "step": 17014 }, { "epoch": 0.5214846144415839, "grad_norm": 1.619131840388474, "learning_rate": 4.895036488161981e-06, "loss": 0.7052, "step": 17015 }, { "epoch": 0.5215152629643252, "grad_norm": 1.8307494709606906, "learning_rate": 4.894540280215586e-06, "loss": 0.6744, "step": 17016 }, { "epoch": 0.5215459114870663, "grad_norm": 1.75408867207646, "learning_rate": 4.894044073308311e-06, "loss": 0.6043, "step": 17017 }, { "epoch": 0.5215765600098076, "grad_norm": 1.5343640208590235, "learning_rate": 4.893547867445049e-06, "loss": 0.6034, "step": 17018 }, { "epoch": 0.5216072085325487, "grad_norm": 2.005032456448723, "learning_rate": 4.893051662630686e-06, "loss": 0.562, "step": 17019 }, { "epoch": 0.52163785705529, "grad_norm": 1.5633696164366582, "learning_rate": 4.892555458870112e-06, "loss": 0.5683, "step": 17020 }, { "epoch": 0.5216685055780311, "grad_norm": 1.69980321503174, "learning_rate": 4.892059256168217e-06, "loss": 0.6215, "step": 17021 }, { "epoch": 0.5216991541007724, "grad_norm": 1.8333220345002965, "learning_rate": 4.891563054529892e-06, "loss": 0.7097, "step": 17022 }, { "epoch": 0.5217298026235135, "grad_norm": 1.8459481119755226, "learning_rate": 4.89106685396002e-06, "loss": 0.6974, "step": 17023 }, { "epoch": 0.5217604511462548, "grad_norm": 0.8760926579581677, "learning_rate": 4.890570654463497e-06, "loss": 0.4484, "step": 17024 }, { "epoch": 0.521791099668996, "grad_norm": 1.7020822454475906, "learning_rate": 4.890074456045209e-06, "loss": 0.7302, "step": 17025 }, { "epoch": 0.5218217481917372, "grad_norm": 2.0983877441906253, "learning_rate": 4.8895782587100434e-06, "loss": 0.7605, "step": 17026 }, { "epoch": 0.5218523967144784, "grad_norm": 0.7861760790469434, "learning_rate": 4.889082062462894e-06, "loss": 0.4284, "step": 17027 }, { "epoch": 0.5218830452372195, "grad_norm": 1.672417787533036, "learning_rate": 4.888585867308646e-06, "loss": 0.5644, "step": 17028 }, { "epoch": 0.5219136937599608, "grad_norm": 0.7881033297392507, "learning_rate": 4.8880896732521905e-06, "loss": 0.4364, "step": 17029 }, { "epoch": 0.5219443422827019, "grad_norm": 1.720186180390703, "learning_rate": 4.887593480298416e-06, "loss": 0.5592, "step": 17030 }, { "epoch": 0.5219749908054432, "grad_norm": 1.632510456219012, "learning_rate": 4.88709728845221e-06, "loss": 0.6143, "step": 17031 }, { "epoch": 0.5220056393281843, "grad_norm": 1.6644353719821199, "learning_rate": 4.886601097718466e-06, "loss": 0.6572, "step": 17032 }, { "epoch": 0.5220362878509256, "grad_norm": 1.7349109720375673, "learning_rate": 4.886104908102071e-06, "loss": 0.6786, "step": 17033 }, { "epoch": 0.5220669363736667, "grad_norm": 1.7975205622528903, "learning_rate": 4.885608719607909e-06, "loss": 0.6578, "step": 17034 }, { "epoch": 0.522097584896408, "grad_norm": 1.674332272762426, "learning_rate": 4.8851125322408786e-06, "loss": 0.6269, "step": 17035 }, { "epoch": 0.5221282334191492, "grad_norm": 0.9036243602069247, "learning_rate": 4.884616346005859e-06, "loss": 0.4535, "step": 17036 }, { "epoch": 0.5221588819418904, "grad_norm": 1.703224627793162, "learning_rate": 4.884120160907749e-06, "loss": 0.6559, "step": 17037 }, { "epoch": 0.5221895304646316, "grad_norm": 1.6221360467605745, "learning_rate": 4.8836239769514305e-06, "loss": 0.6161, "step": 17038 }, { "epoch": 0.5222201789873728, "grad_norm": 1.7270171409897994, "learning_rate": 4.883127794141794e-06, "loss": 0.6896, "step": 17039 }, { "epoch": 0.522250827510114, "grad_norm": 0.8270425069983794, "learning_rate": 4.88263161248373e-06, "loss": 0.4474, "step": 17040 }, { "epoch": 0.5222814760328552, "grad_norm": 1.7110806504547709, "learning_rate": 4.882135431982127e-06, "loss": 0.692, "step": 17041 }, { "epoch": 0.5223121245555964, "grad_norm": 1.6720971430577531, "learning_rate": 4.881639252641872e-06, "loss": 0.6577, "step": 17042 }, { "epoch": 0.5223427730783377, "grad_norm": 0.7767060766257146, "learning_rate": 4.881143074467857e-06, "loss": 0.4118, "step": 17043 }, { "epoch": 0.5223734216010788, "grad_norm": 1.5786850247438298, "learning_rate": 4.88064689746497e-06, "loss": 0.6438, "step": 17044 }, { "epoch": 0.5224040701238201, "grad_norm": 1.5818658392728036, "learning_rate": 4.8801507216381e-06, "loss": 0.5506, "step": 17045 }, { "epoch": 0.5224347186465612, "grad_norm": 1.7146417189228107, "learning_rate": 4.879654546992136e-06, "loss": 0.7042, "step": 17046 }, { "epoch": 0.5224653671693025, "grad_norm": 1.752115686803171, "learning_rate": 4.879158373531964e-06, "loss": 0.6482, "step": 17047 }, { "epoch": 0.5224960156920436, "grad_norm": 1.7534179361700273, "learning_rate": 4.878662201262478e-06, "loss": 0.7135, "step": 17048 }, { "epoch": 0.5225266642147849, "grad_norm": 1.5656321366911845, "learning_rate": 4.878166030188564e-06, "loss": 0.507, "step": 17049 }, { "epoch": 0.522557312737526, "grad_norm": 1.8220782700615172, "learning_rate": 4.877669860315109e-06, "loss": 0.6682, "step": 17050 }, { "epoch": 0.5225879612602673, "grad_norm": 1.5914949559408034, "learning_rate": 4.877173691647006e-06, "loss": 0.6344, "step": 17051 }, { "epoch": 0.5226186097830084, "grad_norm": 1.734461469228005, "learning_rate": 4.876677524189144e-06, "loss": 0.7412, "step": 17052 }, { "epoch": 0.5226492583057497, "grad_norm": 1.6348906631641804, "learning_rate": 4.876181357946406e-06, "loss": 0.6866, "step": 17053 }, { "epoch": 0.5226799068284909, "grad_norm": 1.6910516968289573, "learning_rate": 4.875685192923688e-06, "loss": 0.5792, "step": 17054 }, { "epoch": 0.5227105553512321, "grad_norm": 1.7790227854397607, "learning_rate": 4.875189029125872e-06, "loss": 0.6883, "step": 17055 }, { "epoch": 0.5227412038739733, "grad_norm": 1.7457968582398515, "learning_rate": 4.8746928665578545e-06, "loss": 0.6557, "step": 17056 }, { "epoch": 0.5227718523967145, "grad_norm": 3.6371806710095784, "learning_rate": 4.874196705224518e-06, "loss": 0.6332, "step": 17057 }, { "epoch": 0.5228025009194557, "grad_norm": 1.82654687838075, "learning_rate": 4.8737005451307524e-06, "loss": 0.5508, "step": 17058 }, { "epoch": 0.5228331494421968, "grad_norm": 1.6980754065307655, "learning_rate": 4.87320438628145e-06, "loss": 0.681, "step": 17059 }, { "epoch": 0.5228637979649381, "grad_norm": 1.6727651078998442, "learning_rate": 4.872708228681497e-06, "loss": 0.6037, "step": 17060 }, { "epoch": 0.5228944464876792, "grad_norm": 1.7096193293590223, "learning_rate": 4.872212072335781e-06, "loss": 0.5938, "step": 17061 }, { "epoch": 0.5229250950104205, "grad_norm": 1.7384597552957244, "learning_rate": 4.871715917249193e-06, "loss": 0.6763, "step": 17062 }, { "epoch": 0.5229557435331617, "grad_norm": 1.5457490075342322, "learning_rate": 4.87121976342662e-06, "loss": 0.5938, "step": 17063 }, { "epoch": 0.5229863920559029, "grad_norm": 1.765384526532429, "learning_rate": 4.870723610872953e-06, "loss": 0.6644, "step": 17064 }, { "epoch": 0.5230170405786441, "grad_norm": 1.9441641521602462, "learning_rate": 4.87022745959308e-06, "loss": 0.5993, "step": 17065 }, { "epoch": 0.5230476891013853, "grad_norm": 1.6738861716657556, "learning_rate": 4.8697313095918856e-06, "loss": 0.6636, "step": 17066 }, { "epoch": 0.5230783376241265, "grad_norm": 0.8880871644601603, "learning_rate": 4.8692351608742654e-06, "loss": 0.4356, "step": 17067 }, { "epoch": 0.5231089861468677, "grad_norm": 1.8185481019155614, "learning_rate": 4.868739013445104e-06, "loss": 0.6021, "step": 17068 }, { "epoch": 0.5231396346696089, "grad_norm": 1.5704372621431872, "learning_rate": 4.868242867309289e-06, "loss": 0.5956, "step": 17069 }, { "epoch": 0.5231702831923501, "grad_norm": 1.741981138894768, "learning_rate": 4.8677467224717125e-06, "loss": 0.6472, "step": 17070 }, { "epoch": 0.5232009317150913, "grad_norm": 1.8912408142454404, "learning_rate": 4.86725057893726e-06, "loss": 0.6731, "step": 17071 }, { "epoch": 0.5232315802378326, "grad_norm": 0.7972190645111145, "learning_rate": 4.8667544367108225e-06, "loss": 0.4427, "step": 17072 }, { "epoch": 0.5232622287605737, "grad_norm": 1.7817018949629047, "learning_rate": 4.866258295797287e-06, "loss": 0.6273, "step": 17073 }, { "epoch": 0.523292877283315, "grad_norm": 2.0626439360129805, "learning_rate": 4.865762156201543e-06, "loss": 0.723, "step": 17074 }, { "epoch": 0.5233235258060561, "grad_norm": 1.4495203735309783, "learning_rate": 4.865266017928478e-06, "loss": 0.6418, "step": 17075 }, { "epoch": 0.5233541743287974, "grad_norm": 1.9086884666018467, "learning_rate": 4.864769880982984e-06, "loss": 0.6525, "step": 17076 }, { "epoch": 0.5233848228515385, "grad_norm": 0.8582215729123063, "learning_rate": 4.8642737453699434e-06, "loss": 0.4593, "step": 17077 }, { "epoch": 0.5234154713742798, "grad_norm": 1.6456139399498477, "learning_rate": 4.863777611094251e-06, "loss": 0.5283, "step": 17078 }, { "epoch": 0.523446119897021, "grad_norm": 1.6949837367378313, "learning_rate": 4.863281478160793e-06, "loss": 0.6119, "step": 17079 }, { "epoch": 0.5234767684197622, "grad_norm": 1.5559098817790988, "learning_rate": 4.8627853465744555e-06, "loss": 0.5491, "step": 17080 }, { "epoch": 0.5235074169425034, "grad_norm": 1.7506674151080455, "learning_rate": 4.862289216340129e-06, "loss": 0.6898, "step": 17081 }, { "epoch": 0.5235380654652446, "grad_norm": 1.7437919827675303, "learning_rate": 4.861793087462703e-06, "loss": 0.7485, "step": 17082 }, { "epoch": 0.5235687139879858, "grad_norm": 1.6794634060081068, "learning_rate": 4.861296959947066e-06, "loss": 0.6009, "step": 17083 }, { "epoch": 0.523599362510727, "grad_norm": 1.5451356252903896, "learning_rate": 4.860800833798104e-06, "loss": 0.5576, "step": 17084 }, { "epoch": 0.5236300110334682, "grad_norm": 1.6894846831666308, "learning_rate": 4.860304709020707e-06, "loss": 0.5901, "step": 17085 }, { "epoch": 0.5236606595562094, "grad_norm": 1.6001408317693697, "learning_rate": 4.8598085856197644e-06, "loss": 0.6181, "step": 17086 }, { "epoch": 0.5236913080789506, "grad_norm": 1.718248327060134, "learning_rate": 4.859312463600165e-06, "loss": 0.6349, "step": 17087 }, { "epoch": 0.5237219566016919, "grad_norm": 1.9773619735117494, "learning_rate": 4.8588163429667926e-06, "loss": 0.6263, "step": 17088 }, { "epoch": 0.523752605124433, "grad_norm": 1.7262271205693918, "learning_rate": 4.858320223724542e-06, "loss": 0.7219, "step": 17089 }, { "epoch": 0.5237832536471742, "grad_norm": 1.689127607930628, "learning_rate": 4.857824105878296e-06, "loss": 0.6185, "step": 17090 }, { "epoch": 0.5238139021699154, "grad_norm": 1.690519035681251, "learning_rate": 4.857327989432946e-06, "loss": 0.7174, "step": 17091 }, { "epoch": 0.5238445506926566, "grad_norm": 1.5693398036364816, "learning_rate": 4.856831874393382e-06, "loss": 0.5804, "step": 17092 }, { "epoch": 0.5238751992153978, "grad_norm": 1.737404562004694, "learning_rate": 4.856335760764488e-06, "loss": 0.6456, "step": 17093 }, { "epoch": 0.523905847738139, "grad_norm": 1.5646416624762218, "learning_rate": 4.855839648551154e-06, "loss": 0.6257, "step": 17094 }, { "epoch": 0.5239364962608802, "grad_norm": 1.7763059916731716, "learning_rate": 4.8553435377582704e-06, "loss": 0.5636, "step": 17095 }, { "epoch": 0.5239671447836214, "grad_norm": 1.7631208600547137, "learning_rate": 4.854847428390723e-06, "loss": 0.6525, "step": 17096 }, { "epoch": 0.5239977933063626, "grad_norm": 1.7832688618474113, "learning_rate": 4.854351320453402e-06, "loss": 0.7332, "step": 17097 }, { "epoch": 0.5240284418291038, "grad_norm": 1.6506457185362275, "learning_rate": 4.853855213951193e-06, "loss": 0.5974, "step": 17098 }, { "epoch": 0.5240590903518451, "grad_norm": 1.595681378365227, "learning_rate": 4.8533591088889885e-06, "loss": 0.5742, "step": 17099 }, { "epoch": 0.5240897388745862, "grad_norm": 1.8454627808166426, "learning_rate": 4.852863005271673e-06, "loss": 0.6724, "step": 17100 }, { "epoch": 0.5241203873973275, "grad_norm": 1.6609261359809686, "learning_rate": 4.8523669031041345e-06, "loss": 0.6089, "step": 17101 }, { "epoch": 0.5241510359200686, "grad_norm": 1.6249474378448918, "learning_rate": 4.851870802391264e-06, "loss": 0.5306, "step": 17102 }, { "epoch": 0.5241816844428099, "grad_norm": 0.9053065289305129, "learning_rate": 4.851374703137949e-06, "loss": 0.457, "step": 17103 }, { "epoch": 0.524212332965551, "grad_norm": 1.6245926281614902, "learning_rate": 4.850878605349076e-06, "loss": 0.6224, "step": 17104 }, { "epoch": 0.5242429814882923, "grad_norm": 1.6806781834888036, "learning_rate": 4.8503825090295345e-06, "loss": 0.5842, "step": 17105 }, { "epoch": 0.5242736300110334, "grad_norm": 1.7712685894132418, "learning_rate": 4.8498864141842136e-06, "loss": 0.5676, "step": 17106 }, { "epoch": 0.5243042785337747, "grad_norm": 0.8266488900246675, "learning_rate": 4.849390320817998e-06, "loss": 0.4362, "step": 17107 }, { "epoch": 0.5243349270565159, "grad_norm": 1.5913070388961013, "learning_rate": 4.84889422893578e-06, "loss": 0.6171, "step": 17108 }, { "epoch": 0.5243655755792571, "grad_norm": 1.9916119038612572, "learning_rate": 4.848398138542444e-06, "loss": 0.6929, "step": 17109 }, { "epoch": 0.5243962241019983, "grad_norm": 1.7267276635931346, "learning_rate": 4.847902049642882e-06, "loss": 0.5995, "step": 17110 }, { "epoch": 0.5244268726247395, "grad_norm": 1.7218402056813835, "learning_rate": 4.847405962241979e-06, "loss": 0.6409, "step": 17111 }, { "epoch": 0.5244575211474807, "grad_norm": 1.7407402283883886, "learning_rate": 4.846909876344624e-06, "loss": 0.5992, "step": 17112 }, { "epoch": 0.5244881696702219, "grad_norm": 1.64874140995681, "learning_rate": 4.846413791955706e-06, "loss": 0.7112, "step": 17113 }, { "epoch": 0.5245188181929631, "grad_norm": 0.807139467443859, "learning_rate": 4.845917709080111e-06, "loss": 0.4313, "step": 17114 }, { "epoch": 0.5245494667157043, "grad_norm": 1.8651488293266465, "learning_rate": 4.845421627722728e-06, "loss": 0.6373, "step": 17115 }, { "epoch": 0.5245801152384455, "grad_norm": 0.7916988821247838, "learning_rate": 4.8449255478884465e-06, "loss": 0.4237, "step": 17116 }, { "epoch": 0.5246107637611868, "grad_norm": 1.711239815996429, "learning_rate": 4.844429469582153e-06, "loss": 0.6025, "step": 17117 }, { "epoch": 0.5246414122839279, "grad_norm": 1.5869358071936313, "learning_rate": 4.843933392808735e-06, "loss": 0.6621, "step": 17118 }, { "epoch": 0.5246720608066692, "grad_norm": 1.7842641535578783, "learning_rate": 4.843437317573083e-06, "loss": 0.6061, "step": 17119 }, { "epoch": 0.5247027093294103, "grad_norm": 0.7953345819892482, "learning_rate": 4.84294124388008e-06, "loss": 0.4244, "step": 17120 }, { "epoch": 0.5247333578521515, "grad_norm": 1.845213644839652, "learning_rate": 4.842445171734621e-06, "loss": 0.6404, "step": 17121 }, { "epoch": 0.5247640063748927, "grad_norm": 1.7292590873595455, "learning_rate": 4.841949101141588e-06, "loss": 0.5994, "step": 17122 }, { "epoch": 0.5247946548976339, "grad_norm": 1.7021214325504308, "learning_rate": 4.841453032105871e-06, "loss": 0.6141, "step": 17123 }, { "epoch": 0.5248253034203751, "grad_norm": 1.778158646691678, "learning_rate": 4.840956964632358e-06, "loss": 0.6443, "step": 17124 }, { "epoch": 0.5248559519431163, "grad_norm": 1.6261328677563838, "learning_rate": 4.840460898725935e-06, "loss": 0.6227, "step": 17125 }, { "epoch": 0.5248866004658576, "grad_norm": 1.9376292890107263, "learning_rate": 4.839964834391494e-06, "loss": 0.7415, "step": 17126 }, { "epoch": 0.5249172489885987, "grad_norm": 1.8617963270217008, "learning_rate": 4.83946877163392e-06, "loss": 0.7643, "step": 17127 }, { "epoch": 0.52494789751134, "grad_norm": 1.6479371044029174, "learning_rate": 4.8389727104581e-06, "loss": 0.5998, "step": 17128 }, { "epoch": 0.5249785460340811, "grad_norm": 1.6877157100841063, "learning_rate": 4.838476650868924e-06, "loss": 0.5528, "step": 17129 }, { "epoch": 0.5250091945568224, "grad_norm": 1.6878401112465242, "learning_rate": 4.83798059287128e-06, "loss": 0.5417, "step": 17130 }, { "epoch": 0.5250398430795635, "grad_norm": 0.7831747133725416, "learning_rate": 4.837484536470052e-06, "loss": 0.4112, "step": 17131 }, { "epoch": 0.5250704916023048, "grad_norm": 1.7471344736977412, "learning_rate": 4.836988481670133e-06, "loss": 0.667, "step": 17132 }, { "epoch": 0.5251011401250459, "grad_norm": 1.7778452136819562, "learning_rate": 4.836492428476407e-06, "loss": 0.6611, "step": 17133 }, { "epoch": 0.5251317886477872, "grad_norm": 1.7260945201031328, "learning_rate": 4.835996376893763e-06, "loss": 0.6066, "step": 17134 }, { "epoch": 0.5251624371705284, "grad_norm": 1.8380271648123656, "learning_rate": 4.835500326927089e-06, "loss": 0.6683, "step": 17135 }, { "epoch": 0.5251930856932696, "grad_norm": 1.5853829483615471, "learning_rate": 4.835004278581271e-06, "loss": 0.6537, "step": 17136 }, { "epoch": 0.5252237342160108, "grad_norm": 1.7695596964673834, "learning_rate": 4.8345082318611995e-06, "loss": 0.6467, "step": 17137 }, { "epoch": 0.525254382738752, "grad_norm": 1.6777004908049418, "learning_rate": 4.834012186771761e-06, "loss": 0.6171, "step": 17138 }, { "epoch": 0.5252850312614932, "grad_norm": 1.9067987299153926, "learning_rate": 4.833516143317841e-06, "loss": 0.7037, "step": 17139 }, { "epoch": 0.5253156797842344, "grad_norm": 0.8056202906208232, "learning_rate": 4.833020101504331e-06, "loss": 0.423, "step": 17140 }, { "epoch": 0.5253463283069756, "grad_norm": 1.706420846834696, "learning_rate": 4.832524061336118e-06, "loss": 0.5924, "step": 17141 }, { "epoch": 0.5253769768297168, "grad_norm": 0.81406978763941, "learning_rate": 4.832028022818085e-06, "loss": 0.422, "step": 17142 }, { "epoch": 0.525407625352458, "grad_norm": 1.5582440757106253, "learning_rate": 4.831531985955124e-06, "loss": 0.642, "step": 17143 }, { "epoch": 0.5254382738751993, "grad_norm": 1.503745926711847, "learning_rate": 4.8310359507521205e-06, "loss": 0.5779, "step": 17144 }, { "epoch": 0.5254689223979404, "grad_norm": 2.233660561697853, "learning_rate": 4.830539917213965e-06, "loss": 0.5773, "step": 17145 }, { "epoch": 0.5254995709206817, "grad_norm": 1.530078692292291, "learning_rate": 4.830043885345543e-06, "loss": 0.5203, "step": 17146 }, { "epoch": 0.5255302194434228, "grad_norm": 0.7942454546406149, "learning_rate": 4.82954785515174e-06, "loss": 0.4302, "step": 17147 }, { "epoch": 0.5255608679661641, "grad_norm": 2.0296809158167335, "learning_rate": 4.829051826637447e-06, "loss": 0.6322, "step": 17148 }, { "epoch": 0.5255915164889052, "grad_norm": 1.738029253042746, "learning_rate": 4.828555799807552e-06, "loss": 0.6903, "step": 17149 }, { "epoch": 0.5256221650116465, "grad_norm": 1.7678465862971313, "learning_rate": 4.828059774666936e-06, "loss": 0.599, "step": 17150 }, { "epoch": 0.5256528135343876, "grad_norm": 1.735563544151993, "learning_rate": 4.827563751220495e-06, "loss": 0.6993, "step": 17151 }, { "epoch": 0.5256834620571288, "grad_norm": 1.73029548958537, "learning_rate": 4.827067729473111e-06, "loss": 0.6098, "step": 17152 }, { "epoch": 0.52571411057987, "grad_norm": 1.7539600459425346, "learning_rate": 4.826571709429673e-06, "loss": 0.6264, "step": 17153 }, { "epoch": 0.5257447591026112, "grad_norm": 1.7499220988923991, "learning_rate": 4.826075691095068e-06, "loss": 0.6751, "step": 17154 }, { "epoch": 0.5257754076253525, "grad_norm": 1.6674189856372772, "learning_rate": 4.825579674474183e-06, "loss": 0.6204, "step": 17155 }, { "epoch": 0.5258060561480936, "grad_norm": 1.856615989711433, "learning_rate": 4.825083659571907e-06, "loss": 0.6761, "step": 17156 }, { "epoch": 0.5258367046708349, "grad_norm": 1.712933251244837, "learning_rate": 4.824587646393127e-06, "loss": 0.6359, "step": 17157 }, { "epoch": 0.525867353193576, "grad_norm": 0.7717226670886485, "learning_rate": 4.824091634942728e-06, "loss": 0.4392, "step": 17158 }, { "epoch": 0.5258980017163173, "grad_norm": 1.7474548943278343, "learning_rate": 4.823595625225602e-06, "loss": 0.6131, "step": 17159 }, { "epoch": 0.5259286502390584, "grad_norm": 1.4539476670238116, "learning_rate": 4.8230996172466325e-06, "loss": 0.641, "step": 17160 }, { "epoch": 0.5259592987617997, "grad_norm": 1.8233237726731177, "learning_rate": 4.8226036110107055e-06, "loss": 0.6432, "step": 17161 }, { "epoch": 0.5259899472845408, "grad_norm": 1.8285494626863437, "learning_rate": 4.822107606522713e-06, "loss": 0.6551, "step": 17162 }, { "epoch": 0.5260205958072821, "grad_norm": 1.6122956138315412, "learning_rate": 4.821611603787538e-06, "loss": 0.6213, "step": 17163 }, { "epoch": 0.5260512443300233, "grad_norm": 1.6223657692445321, "learning_rate": 4.821115602810072e-06, "loss": 0.5187, "step": 17164 }, { "epoch": 0.5260818928527645, "grad_norm": 1.8015720009948046, "learning_rate": 4.8206196035951975e-06, "loss": 0.5842, "step": 17165 }, { "epoch": 0.5261125413755057, "grad_norm": 1.5379970564411394, "learning_rate": 4.820123606147804e-06, "loss": 0.6024, "step": 17166 }, { "epoch": 0.5261431898982469, "grad_norm": 1.5986733140818317, "learning_rate": 4.8196276104727795e-06, "loss": 0.5382, "step": 17167 }, { "epoch": 0.5261738384209881, "grad_norm": 1.750120289650125, "learning_rate": 4.8191316165750105e-06, "loss": 0.6306, "step": 17168 }, { "epoch": 0.5262044869437293, "grad_norm": 1.7825014049278702, "learning_rate": 4.818635624459383e-06, "loss": 0.702, "step": 17169 }, { "epoch": 0.5262351354664705, "grad_norm": 1.6027114555117885, "learning_rate": 4.818139634130785e-06, "loss": 0.7004, "step": 17170 }, { "epoch": 0.5262657839892118, "grad_norm": 2.026083699633362, "learning_rate": 4.817643645594104e-06, "loss": 0.7008, "step": 17171 }, { "epoch": 0.5262964325119529, "grad_norm": 1.586960862822858, "learning_rate": 4.817147658854227e-06, "loss": 0.6083, "step": 17172 }, { "epoch": 0.5263270810346942, "grad_norm": 1.6336131791063735, "learning_rate": 4.816651673916043e-06, "loss": 0.6107, "step": 17173 }, { "epoch": 0.5263577295574353, "grad_norm": 1.8016804330540437, "learning_rate": 4.816155690784433e-06, "loss": 0.6859, "step": 17174 }, { "epoch": 0.5263883780801766, "grad_norm": 1.7709248575826808, "learning_rate": 4.815659709464291e-06, "loss": 0.7495, "step": 17175 }, { "epoch": 0.5264190266029177, "grad_norm": 1.7520757175741535, "learning_rate": 4.8151637299605e-06, "loss": 0.599, "step": 17176 }, { "epoch": 0.526449675125659, "grad_norm": 1.6142858403883846, "learning_rate": 4.814667752277948e-06, "loss": 0.6275, "step": 17177 }, { "epoch": 0.5264803236484001, "grad_norm": 1.790179590647761, "learning_rate": 4.814171776421521e-06, "loss": 0.639, "step": 17178 }, { "epoch": 0.5265109721711414, "grad_norm": 1.7067881007484789, "learning_rate": 4.813675802396109e-06, "loss": 0.563, "step": 17179 }, { "epoch": 0.5265416206938826, "grad_norm": 1.8248770304789035, "learning_rate": 4.813179830206595e-06, "loss": 0.6625, "step": 17180 }, { "epoch": 0.5265722692166238, "grad_norm": 1.851950377854255, "learning_rate": 4.81268385985787e-06, "loss": 0.5963, "step": 17181 }, { "epoch": 0.526602917739365, "grad_norm": 1.7555870894656678, "learning_rate": 4.8121878913548175e-06, "loss": 0.6658, "step": 17182 }, { "epoch": 0.5266335662621061, "grad_norm": 1.474687578149039, "learning_rate": 4.811691924702327e-06, "loss": 0.6254, "step": 17183 }, { "epoch": 0.5266642147848474, "grad_norm": 1.7221195133169622, "learning_rate": 4.811195959905284e-06, "loss": 0.633, "step": 17184 }, { "epoch": 0.5266948633075885, "grad_norm": 1.7366335763742569, "learning_rate": 4.810699996968573e-06, "loss": 0.6502, "step": 17185 }, { "epoch": 0.5267255118303298, "grad_norm": 1.812002497541649, "learning_rate": 4.810204035897087e-06, "loss": 0.6727, "step": 17186 }, { "epoch": 0.5267561603530709, "grad_norm": 0.853726006548383, "learning_rate": 4.809708076695708e-06, "loss": 0.427, "step": 17187 }, { "epoch": 0.5267868088758122, "grad_norm": 1.8384216668622864, "learning_rate": 4.8092121193693225e-06, "loss": 0.6605, "step": 17188 }, { "epoch": 0.5268174573985533, "grad_norm": 1.7414530334723433, "learning_rate": 4.80871616392282e-06, "loss": 0.646, "step": 17189 }, { "epoch": 0.5268481059212946, "grad_norm": 1.675690771260797, "learning_rate": 4.8082202103610844e-06, "loss": 0.5839, "step": 17190 }, { "epoch": 0.5268787544440358, "grad_norm": 1.6331095483442852, "learning_rate": 4.807724258689006e-06, "loss": 0.6279, "step": 17191 }, { "epoch": 0.526909402966777, "grad_norm": 1.801610252329306, "learning_rate": 4.807228308911469e-06, "loss": 0.7115, "step": 17192 }, { "epoch": 0.5269400514895182, "grad_norm": 0.7868744373444476, "learning_rate": 4.806732361033361e-06, "loss": 0.4483, "step": 17193 }, { "epoch": 0.5269707000122594, "grad_norm": 1.7158987049444174, "learning_rate": 4.8062364150595704e-06, "loss": 0.5975, "step": 17194 }, { "epoch": 0.5270013485350006, "grad_norm": 1.4138613292425384, "learning_rate": 4.80574047099498e-06, "loss": 0.5137, "step": 17195 }, { "epoch": 0.5270319970577418, "grad_norm": 1.6098086066223996, "learning_rate": 4.805244528844477e-06, "loss": 0.6087, "step": 17196 }, { "epoch": 0.527062645580483, "grad_norm": 0.7848175023566697, "learning_rate": 4.8047485886129516e-06, "loss": 0.4448, "step": 17197 }, { "epoch": 0.5270932941032243, "grad_norm": 0.768995854891381, "learning_rate": 4.8042526503052875e-06, "loss": 0.4297, "step": 17198 }, { "epoch": 0.5271239426259654, "grad_norm": 1.6146732422161552, "learning_rate": 4.803756713926373e-06, "loss": 0.6352, "step": 17199 }, { "epoch": 0.5271545911487067, "grad_norm": 1.6582042504612244, "learning_rate": 4.803260779481093e-06, "loss": 0.6703, "step": 17200 }, { "epoch": 0.5271852396714478, "grad_norm": 0.7588305034737717, "learning_rate": 4.802764846974334e-06, "loss": 0.444, "step": 17201 }, { "epoch": 0.5272158881941891, "grad_norm": 1.6117881249106338, "learning_rate": 4.8022689164109855e-06, "loss": 0.5731, "step": 17202 }, { "epoch": 0.5272465367169302, "grad_norm": 1.5517049220402739, "learning_rate": 4.801772987795932e-06, "loss": 0.661, "step": 17203 }, { "epoch": 0.5272771852396715, "grad_norm": 1.8719403349860624, "learning_rate": 4.801277061134057e-06, "loss": 0.6799, "step": 17204 }, { "epoch": 0.5273078337624126, "grad_norm": 1.73707006610965, "learning_rate": 4.800781136430254e-06, "loss": 0.5652, "step": 17205 }, { "epoch": 0.5273384822851539, "grad_norm": 1.5491633411256756, "learning_rate": 4.800285213689403e-06, "loss": 0.6948, "step": 17206 }, { "epoch": 0.527369130807895, "grad_norm": 1.7186945872038482, "learning_rate": 4.799789292916392e-06, "loss": 0.6216, "step": 17207 }, { "epoch": 0.5273997793306363, "grad_norm": 1.695614892653983, "learning_rate": 4.79929337411611e-06, "loss": 0.6391, "step": 17208 }, { "epoch": 0.5274304278533775, "grad_norm": 1.6671875284868296, "learning_rate": 4.798797457293441e-06, "loss": 0.6439, "step": 17209 }, { "epoch": 0.5274610763761187, "grad_norm": 1.6545055712022458, "learning_rate": 4.798301542453273e-06, "loss": 0.6111, "step": 17210 }, { "epoch": 0.5274917248988599, "grad_norm": 1.9945788443576267, "learning_rate": 4.7978056296004905e-06, "loss": 0.6328, "step": 17211 }, { "epoch": 0.5275223734216011, "grad_norm": 1.9436594005771344, "learning_rate": 4.797309718739981e-06, "loss": 0.6533, "step": 17212 }, { "epoch": 0.5275530219443423, "grad_norm": 1.7890725964680083, "learning_rate": 4.796813809876631e-06, "loss": 0.6425, "step": 17213 }, { "epoch": 0.5275836704670834, "grad_norm": 0.9204319404143607, "learning_rate": 4.7963179030153275e-06, "loss": 0.4536, "step": 17214 }, { "epoch": 0.5276143189898247, "grad_norm": 1.785641258888243, "learning_rate": 4.7958219981609535e-06, "loss": 0.6453, "step": 17215 }, { "epoch": 0.5276449675125658, "grad_norm": 1.7680720383291342, "learning_rate": 4.7953260953184e-06, "loss": 0.7418, "step": 17216 }, { "epoch": 0.5276756160353071, "grad_norm": 2.1320505483650463, "learning_rate": 4.794830194492548e-06, "loss": 0.7467, "step": 17217 }, { "epoch": 0.5277062645580483, "grad_norm": 1.6840482300551434, "learning_rate": 4.79433429568829e-06, "loss": 0.5998, "step": 17218 }, { "epoch": 0.5277369130807895, "grad_norm": 1.8290067106437784, "learning_rate": 4.7938383989105065e-06, "loss": 0.6398, "step": 17219 }, { "epoch": 0.5277675616035307, "grad_norm": 1.732989315698536, "learning_rate": 4.793342504164086e-06, "loss": 0.6908, "step": 17220 }, { "epoch": 0.5277982101262719, "grad_norm": 1.6322539008798695, "learning_rate": 4.792846611453916e-06, "loss": 0.6907, "step": 17221 }, { "epoch": 0.5278288586490131, "grad_norm": 1.5827517869705778, "learning_rate": 4.792350720784881e-06, "loss": 0.6149, "step": 17222 }, { "epoch": 0.5278595071717543, "grad_norm": 1.8476533022041348, "learning_rate": 4.791854832161867e-06, "loss": 0.6813, "step": 17223 }, { "epoch": 0.5278901556944955, "grad_norm": 1.7090230308213228, "learning_rate": 4.79135894558976e-06, "loss": 0.6978, "step": 17224 }, { "epoch": 0.5279208042172367, "grad_norm": 1.8528096613738392, "learning_rate": 4.790863061073447e-06, "loss": 0.6738, "step": 17225 }, { "epoch": 0.5279514527399779, "grad_norm": 1.8955910760635117, "learning_rate": 4.790367178617815e-06, "loss": 0.5973, "step": 17226 }, { "epoch": 0.5279821012627192, "grad_norm": 1.626529933362141, "learning_rate": 4.789871298227749e-06, "loss": 0.6088, "step": 17227 }, { "epoch": 0.5280127497854603, "grad_norm": 1.7181974120120118, "learning_rate": 4.789375419908132e-06, "loss": 0.7504, "step": 17228 }, { "epoch": 0.5280433983082016, "grad_norm": 1.6023713474284769, "learning_rate": 4.788879543663856e-06, "loss": 0.6209, "step": 17229 }, { "epoch": 0.5280740468309427, "grad_norm": 1.9257321143820183, "learning_rate": 4.788383669499803e-06, "loss": 0.7341, "step": 17230 }, { "epoch": 0.528104695353684, "grad_norm": 1.6388267736852218, "learning_rate": 4.7878877974208585e-06, "loss": 0.6304, "step": 17231 }, { "epoch": 0.5281353438764251, "grad_norm": 0.8355628602810383, "learning_rate": 4.7873919274319116e-06, "loss": 0.4426, "step": 17232 }, { "epoch": 0.5281659923991664, "grad_norm": 1.7686750311664399, "learning_rate": 4.7868960595378465e-06, "loss": 0.6069, "step": 17233 }, { "epoch": 0.5281966409219075, "grad_norm": 1.7660057880937554, "learning_rate": 4.7864001937435465e-06, "loss": 0.5426, "step": 17234 }, { "epoch": 0.5282272894446488, "grad_norm": 1.5635934702774634, "learning_rate": 4.785904330053902e-06, "loss": 0.6657, "step": 17235 }, { "epoch": 0.52825793796739, "grad_norm": 1.783283255279513, "learning_rate": 4.785408468473796e-06, "loss": 0.6319, "step": 17236 }, { "epoch": 0.5282885864901312, "grad_norm": 1.976854199068246, "learning_rate": 4.784912609008116e-06, "loss": 0.7026, "step": 17237 }, { "epoch": 0.5283192350128724, "grad_norm": 1.6218324155779908, "learning_rate": 4.784416751661749e-06, "loss": 0.6478, "step": 17238 }, { "epoch": 0.5283498835356136, "grad_norm": 1.6033860425834292, "learning_rate": 4.783920896439576e-06, "loss": 0.5818, "step": 17239 }, { "epoch": 0.5283805320583548, "grad_norm": 1.6671168844609632, "learning_rate": 4.7834250433464866e-06, "loss": 0.5622, "step": 17240 }, { "epoch": 0.528411180581096, "grad_norm": 1.7074672979711438, "learning_rate": 4.782929192387366e-06, "loss": 0.5361, "step": 17241 }, { "epoch": 0.5284418291038372, "grad_norm": 1.6176187457773716, "learning_rate": 4.782433343567099e-06, "loss": 0.5527, "step": 17242 }, { "epoch": 0.5284724776265785, "grad_norm": 1.6583588853768034, "learning_rate": 4.7819374968905725e-06, "loss": 0.6126, "step": 17243 }, { "epoch": 0.5285031261493196, "grad_norm": 0.7975195078231418, "learning_rate": 4.78144165236267e-06, "loss": 0.4344, "step": 17244 }, { "epoch": 0.5285337746720608, "grad_norm": 0.7509303862454239, "learning_rate": 4.780945809988281e-06, "loss": 0.4126, "step": 17245 }, { "epoch": 0.528564423194802, "grad_norm": 1.6544242382648242, "learning_rate": 4.780449969772289e-06, "loss": 0.7066, "step": 17246 }, { "epoch": 0.5285950717175432, "grad_norm": 1.8486776806908434, "learning_rate": 4.779954131719577e-06, "loss": 0.6146, "step": 17247 }, { "epoch": 0.5286257202402844, "grad_norm": 0.7687319353798829, "learning_rate": 4.779458295835037e-06, "loss": 0.4406, "step": 17248 }, { "epoch": 0.5286563687630256, "grad_norm": 1.871691491095665, "learning_rate": 4.778962462123549e-06, "loss": 0.6812, "step": 17249 }, { "epoch": 0.5286870172857668, "grad_norm": 1.6845053095250593, "learning_rate": 4.7784666305899995e-06, "loss": 0.5572, "step": 17250 }, { "epoch": 0.528717665808508, "grad_norm": 1.8520043427621828, "learning_rate": 4.777970801239276e-06, "loss": 0.6121, "step": 17251 }, { "epoch": 0.5287483143312492, "grad_norm": 1.3969640391993392, "learning_rate": 4.777474974076261e-06, "loss": 0.6084, "step": 17252 }, { "epoch": 0.5287789628539904, "grad_norm": 1.6840116783855337, "learning_rate": 4.776979149105845e-06, "loss": 0.6691, "step": 17253 }, { "epoch": 0.5288096113767317, "grad_norm": 1.7264314035830175, "learning_rate": 4.776483326332909e-06, "loss": 0.7001, "step": 17254 }, { "epoch": 0.5288402598994728, "grad_norm": 1.6175882359229714, "learning_rate": 4.775987505762339e-06, "loss": 0.5588, "step": 17255 }, { "epoch": 0.5288709084222141, "grad_norm": 1.8956011464951619, "learning_rate": 4.775491687399024e-06, "loss": 0.7083, "step": 17256 }, { "epoch": 0.5289015569449552, "grad_norm": 1.5561925127979281, "learning_rate": 4.774995871247847e-06, "loss": 0.5948, "step": 17257 }, { "epoch": 0.5289322054676965, "grad_norm": 1.7006201674374801, "learning_rate": 4.77450005731369e-06, "loss": 0.5277, "step": 17258 }, { "epoch": 0.5289628539904376, "grad_norm": 1.5978000820953848, "learning_rate": 4.774004245601444e-06, "loss": 0.645, "step": 17259 }, { "epoch": 0.5289935025131789, "grad_norm": 1.5898610182881117, "learning_rate": 4.773508436115992e-06, "loss": 0.6029, "step": 17260 }, { "epoch": 0.52902415103592, "grad_norm": 0.837955642563936, "learning_rate": 4.773012628862218e-06, "loss": 0.4293, "step": 17261 }, { "epoch": 0.5290547995586613, "grad_norm": 1.7306541026147857, "learning_rate": 4.7725168238450096e-06, "loss": 0.6075, "step": 17262 }, { "epoch": 0.5290854480814025, "grad_norm": 1.8434289764427656, "learning_rate": 4.772021021069249e-06, "loss": 0.6469, "step": 17263 }, { "epoch": 0.5291160966041437, "grad_norm": 0.8155201705705245, "learning_rate": 4.771525220539826e-06, "loss": 0.4197, "step": 17264 }, { "epoch": 0.5291467451268849, "grad_norm": 1.613456636109744, "learning_rate": 4.771029422261624e-06, "loss": 0.5367, "step": 17265 }, { "epoch": 0.5291773936496261, "grad_norm": 1.5714594387811749, "learning_rate": 4.770533626239526e-06, "loss": 0.5993, "step": 17266 }, { "epoch": 0.5292080421723673, "grad_norm": 1.9392410478405497, "learning_rate": 4.7700378324784195e-06, "loss": 0.6741, "step": 17267 }, { "epoch": 0.5292386906951085, "grad_norm": 1.687661185874196, "learning_rate": 4.769542040983191e-06, "loss": 0.5867, "step": 17268 }, { "epoch": 0.5292693392178497, "grad_norm": 0.7956364828317848, "learning_rate": 4.76904625175872e-06, "loss": 0.4391, "step": 17269 }, { "epoch": 0.529299987740591, "grad_norm": 1.8481733931300461, "learning_rate": 4.7685504648098984e-06, "loss": 0.6456, "step": 17270 }, { "epoch": 0.5293306362633321, "grad_norm": 1.651676826079063, "learning_rate": 4.768054680141605e-06, "loss": 0.6816, "step": 17271 }, { "epoch": 0.5293612847860734, "grad_norm": 1.4729384296963288, "learning_rate": 4.767558897758732e-06, "loss": 0.6267, "step": 17272 }, { "epoch": 0.5293919333088145, "grad_norm": 0.7830221536932322, "learning_rate": 4.767063117666159e-06, "loss": 0.4181, "step": 17273 }, { "epoch": 0.5294225818315558, "grad_norm": 1.4408752682092214, "learning_rate": 4.766567339868772e-06, "loss": 0.5856, "step": 17274 }, { "epoch": 0.5294532303542969, "grad_norm": 1.5585574867967098, "learning_rate": 4.766071564371458e-06, "loss": 0.5904, "step": 17275 }, { "epoch": 0.5294838788770381, "grad_norm": 1.651319164765854, "learning_rate": 4.7655757911791e-06, "loss": 0.6009, "step": 17276 }, { "epoch": 0.5295145273997793, "grad_norm": 1.7590451585123954, "learning_rate": 4.765080020296583e-06, "loss": 0.6616, "step": 17277 }, { "epoch": 0.5295451759225205, "grad_norm": 1.9199398829602519, "learning_rate": 4.764584251728794e-06, "loss": 0.6193, "step": 17278 }, { "epoch": 0.5295758244452617, "grad_norm": 1.8596489406111945, "learning_rate": 4.764088485480615e-06, "loss": 0.6134, "step": 17279 }, { "epoch": 0.5296064729680029, "grad_norm": 0.7894409275315628, "learning_rate": 4.763592721556934e-06, "loss": 0.447, "step": 17280 }, { "epoch": 0.5296371214907442, "grad_norm": 1.788530637335222, "learning_rate": 4.763096959962635e-06, "loss": 0.5588, "step": 17281 }, { "epoch": 0.5296677700134853, "grad_norm": 1.7348902528998993, "learning_rate": 4.7626012007026e-06, "loss": 0.5903, "step": 17282 }, { "epoch": 0.5296984185362266, "grad_norm": 1.7182126787869396, "learning_rate": 4.762105443781719e-06, "loss": 0.6108, "step": 17283 }, { "epoch": 0.5297290670589677, "grad_norm": 1.7030567623163886, "learning_rate": 4.761609689204872e-06, "loss": 0.672, "step": 17284 }, { "epoch": 0.529759715581709, "grad_norm": 1.7495629465588356, "learning_rate": 4.7611139369769455e-06, "loss": 0.6362, "step": 17285 }, { "epoch": 0.5297903641044501, "grad_norm": 1.8767808781165431, "learning_rate": 4.760618187102825e-06, "loss": 0.7226, "step": 17286 }, { "epoch": 0.5298210126271914, "grad_norm": 1.8523885344987623, "learning_rate": 4.7601224395873955e-06, "loss": 0.66, "step": 17287 }, { "epoch": 0.5298516611499325, "grad_norm": 1.5786533841662505, "learning_rate": 4.75962669443554e-06, "loss": 0.6895, "step": 17288 }, { "epoch": 0.5298823096726738, "grad_norm": 1.7007152732064195, "learning_rate": 4.759130951652144e-06, "loss": 0.6005, "step": 17289 }, { "epoch": 0.529912958195415, "grad_norm": 0.7643042567541958, "learning_rate": 4.758635211242092e-06, "loss": 0.4214, "step": 17290 }, { "epoch": 0.5299436067181562, "grad_norm": 1.9143212053048484, "learning_rate": 4.7581394732102714e-06, "loss": 0.5246, "step": 17291 }, { "epoch": 0.5299742552408974, "grad_norm": 1.670531176956011, "learning_rate": 4.757643737561563e-06, "loss": 0.4754, "step": 17292 }, { "epoch": 0.5300049037636386, "grad_norm": 0.7946409515079994, "learning_rate": 4.757148004300852e-06, "loss": 0.4526, "step": 17293 }, { "epoch": 0.5300355522863798, "grad_norm": 0.8065203509276236, "learning_rate": 4.756652273433025e-06, "loss": 0.4354, "step": 17294 }, { "epoch": 0.530066200809121, "grad_norm": 1.6931048577276477, "learning_rate": 4.756156544962966e-06, "loss": 0.5456, "step": 17295 }, { "epoch": 0.5300968493318622, "grad_norm": 1.6968635321023553, "learning_rate": 4.755660818895557e-06, "loss": 0.7213, "step": 17296 }, { "epoch": 0.5301274978546034, "grad_norm": 2.010371036913974, "learning_rate": 4.755165095235685e-06, "loss": 0.6324, "step": 17297 }, { "epoch": 0.5301581463773446, "grad_norm": 1.4147544506074008, "learning_rate": 4.7546693739882335e-06, "loss": 0.5975, "step": 17298 }, { "epoch": 0.5301887949000859, "grad_norm": 1.6396368788547724, "learning_rate": 4.7541736551580885e-06, "loss": 0.633, "step": 17299 }, { "epoch": 0.530219443422827, "grad_norm": 1.666758350066823, "learning_rate": 4.753677938750135e-06, "loss": 0.6311, "step": 17300 }, { "epoch": 0.5302500919455683, "grad_norm": 1.7739046172153572, "learning_rate": 4.753182224769252e-06, "loss": 0.6042, "step": 17301 }, { "epoch": 0.5302807404683094, "grad_norm": 1.7348513295345827, "learning_rate": 4.752686513220331e-06, "loss": 0.7174, "step": 17302 }, { "epoch": 0.5303113889910507, "grad_norm": 0.833745379375176, "learning_rate": 4.752190804108252e-06, "loss": 0.4531, "step": 17303 }, { "epoch": 0.5303420375137918, "grad_norm": 0.7806270803157008, "learning_rate": 4.751695097437899e-06, "loss": 0.4387, "step": 17304 }, { "epoch": 0.5303726860365331, "grad_norm": 1.790174128184844, "learning_rate": 4.75119939321416e-06, "loss": 0.66, "step": 17305 }, { "epoch": 0.5304033345592742, "grad_norm": 0.7883534153533815, "learning_rate": 4.750703691441915e-06, "loss": 0.4267, "step": 17306 }, { "epoch": 0.5304339830820154, "grad_norm": 1.858341676105463, "learning_rate": 4.750207992126051e-06, "loss": 0.7125, "step": 17307 }, { "epoch": 0.5304646316047567, "grad_norm": 1.6716993272649117, "learning_rate": 4.749712295271453e-06, "loss": 0.6115, "step": 17308 }, { "epoch": 0.5304952801274978, "grad_norm": 1.860477275440261, "learning_rate": 4.749216600883002e-06, "loss": 0.6962, "step": 17309 }, { "epoch": 0.5305259286502391, "grad_norm": 1.7039607192730066, "learning_rate": 4.748720908965584e-06, "loss": 0.6242, "step": 17310 }, { "epoch": 0.5305565771729802, "grad_norm": 1.6111812937975867, "learning_rate": 4.748225219524085e-06, "loss": 0.5207, "step": 17311 }, { "epoch": 0.5305872256957215, "grad_norm": 1.662487718425246, "learning_rate": 4.747729532563384e-06, "loss": 0.6178, "step": 17312 }, { "epoch": 0.5306178742184626, "grad_norm": 1.800616336527116, "learning_rate": 4.747233848088373e-06, "loss": 0.6744, "step": 17313 }, { "epoch": 0.5306485227412039, "grad_norm": 1.7453617605470468, "learning_rate": 4.746738166103929e-06, "loss": 0.6317, "step": 17314 }, { "epoch": 0.530679171263945, "grad_norm": 1.7820022573319692, "learning_rate": 4.746242486614938e-06, "loss": 0.5814, "step": 17315 }, { "epoch": 0.5307098197866863, "grad_norm": 2.01022850204285, "learning_rate": 4.745746809626286e-06, "loss": 0.7112, "step": 17316 }, { "epoch": 0.5307404683094274, "grad_norm": 0.9240377632547886, "learning_rate": 4.745251135142854e-06, "loss": 0.4307, "step": 17317 }, { "epoch": 0.5307711168321687, "grad_norm": 1.6281654156093992, "learning_rate": 4.744755463169529e-06, "loss": 0.5953, "step": 17318 }, { "epoch": 0.5308017653549099, "grad_norm": 1.7389066552614156, "learning_rate": 4.744259793711195e-06, "loss": 0.6269, "step": 17319 }, { "epoch": 0.5308324138776511, "grad_norm": 1.7756814829324075, "learning_rate": 4.743764126772732e-06, "loss": 0.6232, "step": 17320 }, { "epoch": 0.5308630624003923, "grad_norm": 1.7512484742826546, "learning_rate": 4.743268462359029e-06, "loss": 0.6754, "step": 17321 }, { "epoch": 0.5308937109231335, "grad_norm": 1.61357078430452, "learning_rate": 4.742772800474967e-06, "loss": 0.6053, "step": 17322 }, { "epoch": 0.5309243594458747, "grad_norm": 1.559182473194278, "learning_rate": 4.742277141125428e-06, "loss": 0.5594, "step": 17323 }, { "epoch": 0.5309550079686159, "grad_norm": 1.6067152217764857, "learning_rate": 4.741781484315302e-06, "loss": 0.6821, "step": 17324 }, { "epoch": 0.5309856564913571, "grad_norm": 1.7917843615236915, "learning_rate": 4.741285830049465e-06, "loss": 0.6084, "step": 17325 }, { "epoch": 0.5310163050140984, "grad_norm": 1.6150977333252372, "learning_rate": 4.7407901783328096e-06, "loss": 0.6064, "step": 17326 }, { "epoch": 0.5310469535368395, "grad_norm": 1.8747437687135688, "learning_rate": 4.740294529170212e-06, "loss": 0.6551, "step": 17327 }, { "epoch": 0.5310776020595808, "grad_norm": 1.9158381908188231, "learning_rate": 4.739798882566558e-06, "loss": 0.6977, "step": 17328 }, { "epoch": 0.5311082505823219, "grad_norm": 1.7358481106776238, "learning_rate": 4.739303238526735e-06, "loss": 0.6859, "step": 17329 }, { "epoch": 0.5311388991050632, "grad_norm": 1.7490067603125963, "learning_rate": 4.738807597055623e-06, "loss": 0.7022, "step": 17330 }, { "epoch": 0.5311695476278043, "grad_norm": 1.7801993594601209, "learning_rate": 4.738311958158104e-06, "loss": 0.6664, "step": 17331 }, { "epoch": 0.5312001961505456, "grad_norm": 1.8581312312450577, "learning_rate": 4.7378163218390674e-06, "loss": 0.6838, "step": 17332 }, { "epoch": 0.5312308446732867, "grad_norm": 1.6285573917468785, "learning_rate": 4.737320688103394e-06, "loss": 0.7062, "step": 17333 }, { "epoch": 0.531261493196028, "grad_norm": 1.9562388786785843, "learning_rate": 4.736825056955964e-06, "loss": 0.6773, "step": 17334 }, { "epoch": 0.5312921417187692, "grad_norm": 1.5012379085111924, "learning_rate": 4.736329428401667e-06, "loss": 0.5545, "step": 17335 }, { "epoch": 0.5313227902415104, "grad_norm": 1.6104223935291704, "learning_rate": 4.735833802445381e-06, "loss": 0.6133, "step": 17336 }, { "epoch": 0.5313534387642516, "grad_norm": 1.9447647394972383, "learning_rate": 4.735338179091994e-06, "loss": 0.6939, "step": 17337 }, { "epoch": 0.5313840872869927, "grad_norm": 1.739667733959929, "learning_rate": 4.734842558346387e-06, "loss": 0.6083, "step": 17338 }, { "epoch": 0.531414735809734, "grad_norm": 0.8177238246054879, "learning_rate": 4.734346940213443e-06, "loss": 0.4485, "step": 17339 }, { "epoch": 0.5314453843324751, "grad_norm": 1.755282246787942, "learning_rate": 4.733851324698048e-06, "loss": 0.5504, "step": 17340 }, { "epoch": 0.5314760328552164, "grad_norm": 1.6443370176714054, "learning_rate": 4.733355711805085e-06, "loss": 0.6545, "step": 17341 }, { "epoch": 0.5315066813779575, "grad_norm": 1.8413756430387385, "learning_rate": 4.732860101539434e-06, "loss": 0.6756, "step": 17342 }, { "epoch": 0.5315373299006988, "grad_norm": 1.6017954834102575, "learning_rate": 4.732364493905983e-06, "loss": 0.618, "step": 17343 }, { "epoch": 0.53156797842344, "grad_norm": 1.6218330038856283, "learning_rate": 4.73186888890961e-06, "loss": 0.5864, "step": 17344 }, { "epoch": 0.5315986269461812, "grad_norm": 1.6766392469466636, "learning_rate": 4.731373286555205e-06, "loss": 0.6341, "step": 17345 }, { "epoch": 0.5316292754689224, "grad_norm": 0.7865121432313098, "learning_rate": 4.730877686847647e-06, "loss": 0.4325, "step": 17346 }, { "epoch": 0.5316599239916636, "grad_norm": 1.6055371076046627, "learning_rate": 4.730382089791818e-06, "loss": 0.6541, "step": 17347 }, { "epoch": 0.5316905725144048, "grad_norm": 0.7882927694190845, "learning_rate": 4.729886495392604e-06, "loss": 0.4599, "step": 17348 }, { "epoch": 0.531721221037146, "grad_norm": 1.9228042899864932, "learning_rate": 4.729390903654888e-06, "loss": 0.6556, "step": 17349 }, { "epoch": 0.5317518695598872, "grad_norm": 1.6809297701837864, "learning_rate": 4.728895314583553e-06, "loss": 0.6058, "step": 17350 }, { "epoch": 0.5317825180826284, "grad_norm": 1.8199639887737653, "learning_rate": 4.728399728183481e-06, "loss": 0.6225, "step": 17351 }, { "epoch": 0.5318131666053696, "grad_norm": 1.6193562502522914, "learning_rate": 4.727904144459556e-06, "loss": 0.6659, "step": 17352 }, { "epoch": 0.5318438151281109, "grad_norm": 1.6845345544070387, "learning_rate": 4.727408563416661e-06, "loss": 0.6249, "step": 17353 }, { "epoch": 0.531874463650852, "grad_norm": 1.5193356822826136, "learning_rate": 4.7269129850596815e-06, "loss": 0.6522, "step": 17354 }, { "epoch": 0.5319051121735933, "grad_norm": 0.7875476957111055, "learning_rate": 4.726417409393494e-06, "loss": 0.4231, "step": 17355 }, { "epoch": 0.5319357606963344, "grad_norm": 1.8008035698900495, "learning_rate": 4.7259218364229896e-06, "loss": 0.6555, "step": 17356 }, { "epoch": 0.5319664092190757, "grad_norm": 1.938149068686533, "learning_rate": 4.725426266153046e-06, "loss": 0.7016, "step": 17357 }, { "epoch": 0.5319970577418168, "grad_norm": 1.8535081134615932, "learning_rate": 4.724930698588548e-06, "loss": 0.6424, "step": 17358 }, { "epoch": 0.5320277062645581, "grad_norm": 1.7962530021981367, "learning_rate": 4.724435133734377e-06, "loss": 0.5268, "step": 17359 }, { "epoch": 0.5320583547872992, "grad_norm": 1.666529499492938, "learning_rate": 4.723939571595419e-06, "loss": 0.659, "step": 17360 }, { "epoch": 0.5320890033100405, "grad_norm": 1.6625246630491226, "learning_rate": 4.7234440121765525e-06, "loss": 0.5415, "step": 17361 }, { "epoch": 0.5321196518327816, "grad_norm": 0.7812775593935175, "learning_rate": 4.722948455482665e-06, "loss": 0.4155, "step": 17362 }, { "epoch": 0.5321503003555229, "grad_norm": 1.7164224140798499, "learning_rate": 4.722452901518636e-06, "loss": 0.6736, "step": 17363 }, { "epoch": 0.5321809488782641, "grad_norm": 1.7489995863654606, "learning_rate": 4.721957350289351e-06, "loss": 0.6588, "step": 17364 }, { "epoch": 0.5322115974010053, "grad_norm": 1.6673207778020314, "learning_rate": 4.721461801799692e-06, "loss": 0.5176, "step": 17365 }, { "epoch": 0.5322422459237465, "grad_norm": 1.9295135705185664, "learning_rate": 4.720966256054538e-06, "loss": 0.6055, "step": 17366 }, { "epoch": 0.5322728944464877, "grad_norm": 0.8453705304029162, "learning_rate": 4.720470713058777e-06, "loss": 0.4598, "step": 17367 }, { "epoch": 0.5323035429692289, "grad_norm": 1.862749101430272, "learning_rate": 4.71997517281729e-06, "loss": 0.6289, "step": 17368 }, { "epoch": 0.53233419149197, "grad_norm": 1.5752993177258843, "learning_rate": 4.719479635334958e-06, "loss": 0.5676, "step": 17369 }, { "epoch": 0.5323648400147113, "grad_norm": 1.7780947997798453, "learning_rate": 4.718984100616665e-06, "loss": 0.5891, "step": 17370 }, { "epoch": 0.5323954885374524, "grad_norm": 1.7488745183070764, "learning_rate": 4.718488568667294e-06, "loss": 0.6543, "step": 17371 }, { "epoch": 0.5324261370601937, "grad_norm": 1.57252868329563, "learning_rate": 4.7179930394917274e-06, "loss": 0.6066, "step": 17372 }, { "epoch": 0.5324567855829349, "grad_norm": 1.785267869353525, "learning_rate": 4.717497513094847e-06, "loss": 0.6635, "step": 17373 }, { "epoch": 0.5324874341056761, "grad_norm": 1.919343062907238, "learning_rate": 4.717001989481536e-06, "loss": 0.6804, "step": 17374 }, { "epoch": 0.5325180826284173, "grad_norm": 0.7766154425214823, "learning_rate": 4.716506468656677e-06, "loss": 0.4208, "step": 17375 }, { "epoch": 0.5325487311511585, "grad_norm": 1.7776887988087653, "learning_rate": 4.716010950625153e-06, "loss": 0.6594, "step": 17376 }, { "epoch": 0.5325793796738997, "grad_norm": 1.5590940555787751, "learning_rate": 4.7155154353918436e-06, "loss": 0.5618, "step": 17377 }, { "epoch": 0.5326100281966409, "grad_norm": 2.0968936640465388, "learning_rate": 4.715019922961636e-06, "loss": 0.7284, "step": 17378 }, { "epoch": 0.5326406767193821, "grad_norm": 1.6275810923852276, "learning_rate": 4.7145244133394085e-06, "loss": 0.7255, "step": 17379 }, { "epoch": 0.5326713252421234, "grad_norm": 0.80166551437377, "learning_rate": 4.714028906530046e-06, "loss": 0.442, "step": 17380 }, { "epoch": 0.5327019737648645, "grad_norm": 0.7751135902909128, "learning_rate": 4.713533402538429e-06, "loss": 0.4236, "step": 17381 }, { "epoch": 0.5327326222876058, "grad_norm": 1.7599711159665565, "learning_rate": 4.71303790136944e-06, "loss": 0.6749, "step": 17382 }, { "epoch": 0.5327632708103469, "grad_norm": 0.807103217825373, "learning_rate": 4.712542403027963e-06, "loss": 0.4346, "step": 17383 }, { "epoch": 0.5327939193330882, "grad_norm": 1.7552099863913764, "learning_rate": 4.71204690751888e-06, "loss": 0.5393, "step": 17384 }, { "epoch": 0.5328245678558293, "grad_norm": 1.6069148771053636, "learning_rate": 4.71155141484707e-06, "loss": 0.6222, "step": 17385 }, { "epoch": 0.5328552163785706, "grad_norm": 0.811925758811521, "learning_rate": 4.711055925017421e-06, "loss": 0.4381, "step": 17386 }, { "epoch": 0.5328858649013117, "grad_norm": 1.5931454560327925, "learning_rate": 4.710560438034811e-06, "loss": 0.5208, "step": 17387 }, { "epoch": 0.532916513424053, "grad_norm": 1.6311021522335705, "learning_rate": 4.710064953904121e-06, "loss": 0.548, "step": 17388 }, { "epoch": 0.5329471619467941, "grad_norm": 1.6307841395936935, "learning_rate": 4.7095694726302365e-06, "loss": 0.5733, "step": 17389 }, { "epoch": 0.5329778104695354, "grad_norm": 1.8118281487018577, "learning_rate": 4.709073994218038e-06, "loss": 0.6615, "step": 17390 }, { "epoch": 0.5330084589922766, "grad_norm": 1.4712879990338006, "learning_rate": 4.708578518672408e-06, "loss": 0.5896, "step": 17391 }, { "epoch": 0.5330391075150178, "grad_norm": 1.65539411916748, "learning_rate": 4.708083045998229e-06, "loss": 0.6249, "step": 17392 }, { "epoch": 0.533069756037759, "grad_norm": 3.86392957132895, "learning_rate": 4.707587576200381e-06, "loss": 0.6376, "step": 17393 }, { "epoch": 0.5331004045605002, "grad_norm": 1.6778635959080694, "learning_rate": 4.707092109283749e-06, "loss": 0.5486, "step": 17394 }, { "epoch": 0.5331310530832414, "grad_norm": 1.921019520590209, "learning_rate": 4.706596645253214e-06, "loss": 0.6537, "step": 17395 }, { "epoch": 0.5331617016059826, "grad_norm": 1.651423589237798, "learning_rate": 4.706101184113655e-06, "loss": 0.6331, "step": 17396 }, { "epoch": 0.5331923501287238, "grad_norm": 1.5827359696947825, "learning_rate": 4.7056057258699585e-06, "loss": 0.5865, "step": 17397 }, { "epoch": 0.533222998651465, "grad_norm": 1.7352272739975976, "learning_rate": 4.7051102705270004e-06, "loss": 0.6355, "step": 17398 }, { "epoch": 0.5332536471742062, "grad_norm": 1.3631896387907119, "learning_rate": 4.70461481808967e-06, "loss": 0.5957, "step": 17399 }, { "epoch": 0.5332842956969474, "grad_norm": 1.7547896271096028, "learning_rate": 4.704119368562845e-06, "loss": 0.6797, "step": 17400 }, { "epoch": 0.5333149442196886, "grad_norm": 1.7107684067427555, "learning_rate": 4.703623921951406e-06, "loss": 0.6353, "step": 17401 }, { "epoch": 0.5333455927424298, "grad_norm": 1.8343553151659104, "learning_rate": 4.703128478260237e-06, "loss": 0.5933, "step": 17402 }, { "epoch": 0.533376241265171, "grad_norm": 1.7228409511752263, "learning_rate": 4.70263303749422e-06, "loss": 0.6433, "step": 17403 }, { "epoch": 0.5334068897879122, "grad_norm": 0.8965781895634278, "learning_rate": 4.702137599658234e-06, "loss": 0.4192, "step": 17404 }, { "epoch": 0.5334375383106534, "grad_norm": 1.7633118103942416, "learning_rate": 4.701642164757164e-06, "loss": 0.6808, "step": 17405 }, { "epoch": 0.5334681868333946, "grad_norm": 1.9085685602146683, "learning_rate": 4.7011467327958886e-06, "loss": 0.6755, "step": 17406 }, { "epoch": 0.5334988353561358, "grad_norm": 1.9143283875381887, "learning_rate": 4.700651303779291e-06, "loss": 0.6435, "step": 17407 }, { "epoch": 0.533529483878877, "grad_norm": 1.7785454056349488, "learning_rate": 4.700155877712256e-06, "loss": 0.6725, "step": 17408 }, { "epoch": 0.5335601324016183, "grad_norm": 1.7862301101239575, "learning_rate": 4.699660454599657e-06, "loss": 0.6526, "step": 17409 }, { "epoch": 0.5335907809243594, "grad_norm": 0.8216241891188147, "learning_rate": 4.699165034446384e-06, "loss": 0.4451, "step": 17410 }, { "epoch": 0.5336214294471007, "grad_norm": 1.7216055143924445, "learning_rate": 4.698669617257314e-06, "loss": 0.6387, "step": 17411 }, { "epoch": 0.5336520779698418, "grad_norm": 1.8037427382173488, "learning_rate": 4.698174203037328e-06, "loss": 0.5352, "step": 17412 }, { "epoch": 0.5336827264925831, "grad_norm": 1.6585639964118144, "learning_rate": 4.697678791791311e-06, "loss": 0.61, "step": 17413 }, { "epoch": 0.5337133750153242, "grad_norm": 1.75961674562892, "learning_rate": 4.697183383524141e-06, "loss": 0.6232, "step": 17414 }, { "epoch": 0.5337440235380655, "grad_norm": 1.7254250942639184, "learning_rate": 4.696687978240699e-06, "loss": 0.6973, "step": 17415 }, { "epoch": 0.5337746720608066, "grad_norm": 1.965535205643349, "learning_rate": 4.69619257594587e-06, "loss": 0.6848, "step": 17416 }, { "epoch": 0.5338053205835479, "grad_norm": 1.5685902534491583, "learning_rate": 4.695697176644532e-06, "loss": 0.5247, "step": 17417 }, { "epoch": 0.533835969106289, "grad_norm": 1.659117683356685, "learning_rate": 4.695201780341569e-06, "loss": 0.5845, "step": 17418 }, { "epoch": 0.5338666176290303, "grad_norm": 0.8321450019374609, "learning_rate": 4.694706387041861e-06, "loss": 0.4209, "step": 17419 }, { "epoch": 0.5338972661517715, "grad_norm": 1.9280753857137967, "learning_rate": 4.694210996750287e-06, "loss": 0.7321, "step": 17420 }, { "epoch": 0.5339279146745127, "grad_norm": 1.7037347934850866, "learning_rate": 4.693715609471733e-06, "loss": 0.5363, "step": 17421 }, { "epoch": 0.5339585631972539, "grad_norm": 1.7794590857256791, "learning_rate": 4.693220225211076e-06, "loss": 0.7235, "step": 17422 }, { "epoch": 0.5339892117199951, "grad_norm": 1.5656455912730742, "learning_rate": 4.692724843973198e-06, "loss": 0.6121, "step": 17423 }, { "epoch": 0.5340198602427363, "grad_norm": 0.8523990110085031, "learning_rate": 4.692229465762982e-06, "loss": 0.4183, "step": 17424 }, { "epoch": 0.5340505087654775, "grad_norm": 2.006781146918783, "learning_rate": 4.691734090585306e-06, "loss": 0.6604, "step": 17425 }, { "epoch": 0.5340811572882187, "grad_norm": 2.047552398178804, "learning_rate": 4.691238718445055e-06, "loss": 0.6279, "step": 17426 }, { "epoch": 0.53411180581096, "grad_norm": 0.7784702801663548, "learning_rate": 4.690743349347107e-06, "loss": 0.4438, "step": 17427 }, { "epoch": 0.5341424543337011, "grad_norm": 1.7683459008514772, "learning_rate": 4.690247983296343e-06, "loss": 0.5391, "step": 17428 }, { "epoch": 0.5341731028564424, "grad_norm": 1.8808039605569389, "learning_rate": 4.689752620297647e-06, "loss": 0.5806, "step": 17429 }, { "epoch": 0.5342037513791835, "grad_norm": 1.6915865834666528, "learning_rate": 4.6892572603558975e-06, "loss": 0.6233, "step": 17430 }, { "epoch": 0.5342343999019247, "grad_norm": 1.745113051310851, "learning_rate": 4.6887619034759735e-06, "loss": 0.5535, "step": 17431 }, { "epoch": 0.5342650484246659, "grad_norm": 1.8540900819387875, "learning_rate": 4.68826654966276e-06, "loss": 0.6082, "step": 17432 }, { "epoch": 0.5342956969474071, "grad_norm": 1.668333741919333, "learning_rate": 4.687771198921134e-06, "loss": 0.5627, "step": 17433 }, { "epoch": 0.5343263454701483, "grad_norm": 0.820795075249107, "learning_rate": 4.68727585125598e-06, "loss": 0.4185, "step": 17434 }, { "epoch": 0.5343569939928895, "grad_norm": 1.7699058753051515, "learning_rate": 4.686780506672177e-06, "loss": 0.627, "step": 17435 }, { "epoch": 0.5343876425156308, "grad_norm": 0.8533906321372862, "learning_rate": 4.686285165174605e-06, "loss": 0.4547, "step": 17436 }, { "epoch": 0.5344182910383719, "grad_norm": 1.7358494544047864, "learning_rate": 4.6857898267681465e-06, "loss": 0.6803, "step": 17437 }, { "epoch": 0.5344489395611132, "grad_norm": 1.9964383418018095, "learning_rate": 4.685294491457682e-06, "loss": 0.7346, "step": 17438 }, { "epoch": 0.5344795880838543, "grad_norm": 1.9421560347131803, "learning_rate": 4.684799159248088e-06, "loss": 0.6518, "step": 17439 }, { "epoch": 0.5345102366065956, "grad_norm": 1.7669441418484868, "learning_rate": 4.684303830144252e-06, "loss": 0.6721, "step": 17440 }, { "epoch": 0.5345408851293367, "grad_norm": 1.8963572988035078, "learning_rate": 4.683808504151051e-06, "loss": 0.6547, "step": 17441 }, { "epoch": 0.534571533652078, "grad_norm": 1.7718825634277051, "learning_rate": 4.683313181273363e-06, "loss": 0.615, "step": 17442 }, { "epoch": 0.5346021821748191, "grad_norm": 0.7837113321562795, "learning_rate": 4.682817861516073e-06, "loss": 0.4063, "step": 17443 }, { "epoch": 0.5346328306975604, "grad_norm": 1.9606521514471704, "learning_rate": 4.682322544884059e-06, "loss": 0.7004, "step": 17444 }, { "epoch": 0.5346634792203016, "grad_norm": 2.060733504784079, "learning_rate": 4.681827231382203e-06, "loss": 0.6663, "step": 17445 }, { "epoch": 0.5346941277430428, "grad_norm": 0.7887008172007915, "learning_rate": 4.681331921015385e-06, "loss": 0.4302, "step": 17446 }, { "epoch": 0.534724776265784, "grad_norm": 1.640639041540056, "learning_rate": 4.680836613788483e-06, "loss": 0.5087, "step": 17447 }, { "epoch": 0.5347554247885252, "grad_norm": 1.5049863910641401, "learning_rate": 4.680341309706382e-06, "loss": 0.5506, "step": 17448 }, { "epoch": 0.5347860733112664, "grad_norm": 1.4818873629340832, "learning_rate": 4.679846008773961e-06, "loss": 0.552, "step": 17449 }, { "epoch": 0.5348167218340076, "grad_norm": 1.5015109994487552, "learning_rate": 4.679350710996094e-06, "loss": 0.639, "step": 17450 }, { "epoch": 0.5348473703567488, "grad_norm": 1.857116234969272, "learning_rate": 4.678855416377672e-06, "loss": 0.71, "step": 17451 }, { "epoch": 0.53487801887949, "grad_norm": 1.7887488950489252, "learning_rate": 4.6783601249235655e-06, "loss": 0.6726, "step": 17452 }, { "epoch": 0.5349086674022312, "grad_norm": 1.7690231001653263, "learning_rate": 4.677864836638662e-06, "loss": 0.6544, "step": 17453 }, { "epoch": 0.5349393159249725, "grad_norm": 1.8243269843805365, "learning_rate": 4.6773695515278376e-06, "loss": 0.6377, "step": 17454 }, { "epoch": 0.5349699644477136, "grad_norm": 1.5440934894483838, "learning_rate": 4.676874269595973e-06, "loss": 0.643, "step": 17455 }, { "epoch": 0.5350006129704549, "grad_norm": 1.7061081918491148, "learning_rate": 4.6763789908479495e-06, "loss": 0.5334, "step": 17456 }, { "epoch": 0.535031261493196, "grad_norm": 1.7598536954058113, "learning_rate": 4.6758837152886475e-06, "loss": 0.6666, "step": 17457 }, { "epoch": 0.5350619100159373, "grad_norm": 1.9435858580289669, "learning_rate": 4.675388442922944e-06, "loss": 0.6548, "step": 17458 }, { "epoch": 0.5350925585386784, "grad_norm": 1.7188050258801022, "learning_rate": 4.674893173755723e-06, "loss": 0.5775, "step": 17459 }, { "epoch": 0.5351232070614197, "grad_norm": 1.624583850774213, "learning_rate": 4.674397907791861e-06, "loss": 0.594, "step": 17460 }, { "epoch": 0.5351538555841608, "grad_norm": 1.5383826608176432, "learning_rate": 4.673902645036242e-06, "loss": 0.5194, "step": 17461 }, { "epoch": 0.535184504106902, "grad_norm": 1.866535091473537, "learning_rate": 4.673407385493743e-06, "loss": 0.7335, "step": 17462 }, { "epoch": 0.5352151526296433, "grad_norm": 1.736551778702997, "learning_rate": 4.672912129169242e-06, "loss": 0.6074, "step": 17463 }, { "epoch": 0.5352458011523844, "grad_norm": 1.8693085953318962, "learning_rate": 4.672416876067626e-06, "loss": 0.6149, "step": 17464 }, { "epoch": 0.5352764496751257, "grad_norm": 0.7968505713467741, "learning_rate": 4.6719216261937685e-06, "loss": 0.4272, "step": 17465 }, { "epoch": 0.5353070981978668, "grad_norm": 1.6960538273968202, "learning_rate": 4.671426379552549e-06, "loss": 0.6388, "step": 17466 }, { "epoch": 0.5353377467206081, "grad_norm": 0.79897595154646, "learning_rate": 4.6709311361488515e-06, "loss": 0.4372, "step": 17467 }, { "epoch": 0.5353683952433492, "grad_norm": 1.7191674005873434, "learning_rate": 4.670435895987554e-06, "loss": 0.541, "step": 17468 }, { "epoch": 0.5353990437660905, "grad_norm": 1.7976442919707867, "learning_rate": 4.6699406590735345e-06, "loss": 0.6613, "step": 17469 }, { "epoch": 0.5354296922888316, "grad_norm": 0.7877296379709249, "learning_rate": 4.669445425411675e-06, "loss": 0.4365, "step": 17470 }, { "epoch": 0.5354603408115729, "grad_norm": 1.8919733766804185, "learning_rate": 4.668950195006854e-06, "loss": 0.6162, "step": 17471 }, { "epoch": 0.535490989334314, "grad_norm": 1.7099961845969132, "learning_rate": 4.668454967863952e-06, "loss": 0.6342, "step": 17472 }, { "epoch": 0.5355216378570553, "grad_norm": 1.5904102591297422, "learning_rate": 4.667959743987848e-06, "loss": 0.619, "step": 17473 }, { "epoch": 0.5355522863797965, "grad_norm": 1.87597906891774, "learning_rate": 4.6674645233834196e-06, "loss": 0.6146, "step": 17474 }, { "epoch": 0.5355829349025377, "grad_norm": 0.7587581558725377, "learning_rate": 4.666969306055552e-06, "loss": 0.4171, "step": 17475 }, { "epoch": 0.5356135834252789, "grad_norm": 1.5377594542052784, "learning_rate": 4.666474092009119e-06, "loss": 0.6, "step": 17476 }, { "epoch": 0.5356442319480201, "grad_norm": 1.6031128770066834, "learning_rate": 4.665978881249001e-06, "loss": 0.6076, "step": 17477 }, { "epoch": 0.5356748804707613, "grad_norm": 1.7641091832266138, "learning_rate": 4.66548367378008e-06, "loss": 0.7032, "step": 17478 }, { "epoch": 0.5357055289935025, "grad_norm": 1.5253355317447215, "learning_rate": 4.664988469607233e-06, "loss": 0.5674, "step": 17479 }, { "epoch": 0.5357361775162437, "grad_norm": 1.701736460638395, "learning_rate": 4.664493268735341e-06, "loss": 0.6537, "step": 17480 }, { "epoch": 0.535766826038985, "grad_norm": 1.6364806008632202, "learning_rate": 4.663998071169283e-06, "loss": 0.6339, "step": 17481 }, { "epoch": 0.5357974745617261, "grad_norm": 1.6165709039944625, "learning_rate": 4.663502876913937e-06, "loss": 0.5736, "step": 17482 }, { "epoch": 0.5358281230844674, "grad_norm": 1.7850996953329834, "learning_rate": 4.663007685974185e-06, "loss": 0.6591, "step": 17483 }, { "epoch": 0.5358587716072085, "grad_norm": 1.6340938024165512, "learning_rate": 4.662512498354904e-06, "loss": 0.6413, "step": 17484 }, { "epoch": 0.5358894201299498, "grad_norm": 1.9765376399019996, "learning_rate": 4.662017314060972e-06, "loss": 0.6973, "step": 17485 }, { "epoch": 0.5359200686526909, "grad_norm": 1.5360247835886294, "learning_rate": 4.6615221330972715e-06, "loss": 0.6936, "step": 17486 }, { "epoch": 0.5359507171754322, "grad_norm": 1.8488541310938362, "learning_rate": 4.661026955468678e-06, "loss": 0.7267, "step": 17487 }, { "epoch": 0.5359813656981733, "grad_norm": 1.7119325689041593, "learning_rate": 4.660531781180075e-06, "loss": 0.7226, "step": 17488 }, { "epoch": 0.5360120142209146, "grad_norm": 1.821243587766829, "learning_rate": 4.660036610236339e-06, "loss": 0.546, "step": 17489 }, { "epoch": 0.5360426627436558, "grad_norm": 1.8165130256794393, "learning_rate": 4.659541442642348e-06, "loss": 0.6414, "step": 17490 }, { "epoch": 0.536073311266397, "grad_norm": 1.4128958850810973, "learning_rate": 4.659046278402982e-06, "loss": 0.627, "step": 17491 }, { "epoch": 0.5361039597891382, "grad_norm": 1.9643769848359478, "learning_rate": 4.658551117523123e-06, "loss": 0.643, "step": 17492 }, { "epoch": 0.5361346083118793, "grad_norm": 1.9253533876941342, "learning_rate": 4.6580559600076435e-06, "loss": 0.6025, "step": 17493 }, { "epoch": 0.5361652568346206, "grad_norm": 1.7844004145722765, "learning_rate": 4.657560805861429e-06, "loss": 0.6714, "step": 17494 }, { "epoch": 0.5361959053573617, "grad_norm": 1.7171344160168804, "learning_rate": 4.657065655089355e-06, "loss": 0.6764, "step": 17495 }, { "epoch": 0.536226553880103, "grad_norm": 1.7611496052980515, "learning_rate": 4.6565705076962995e-06, "loss": 0.6188, "step": 17496 }, { "epoch": 0.5362572024028441, "grad_norm": 2.129702622380898, "learning_rate": 4.6560753636871435e-06, "loss": 0.7117, "step": 17497 }, { "epoch": 0.5362878509255854, "grad_norm": 1.6608176235250323, "learning_rate": 4.655580223066764e-06, "loss": 0.5524, "step": 17498 }, { "epoch": 0.5363184994483265, "grad_norm": 1.8774057818609087, "learning_rate": 4.655085085840042e-06, "loss": 0.6407, "step": 17499 }, { "epoch": 0.5363491479710678, "grad_norm": 1.5862069561443715, "learning_rate": 4.6545899520118545e-06, "loss": 0.524, "step": 17500 }, { "epoch": 0.536379796493809, "grad_norm": 1.8360700104213774, "learning_rate": 4.654094821587079e-06, "loss": 0.7467, "step": 17501 }, { "epoch": 0.5364104450165502, "grad_norm": 1.6772064500183315, "learning_rate": 4.653599694570598e-06, "loss": 0.6356, "step": 17502 }, { "epoch": 0.5364410935392914, "grad_norm": 1.6423352373633942, "learning_rate": 4.6531045709672886e-06, "loss": 0.6036, "step": 17503 }, { "epoch": 0.5364717420620326, "grad_norm": 1.7828317970788472, "learning_rate": 4.652609450782026e-06, "loss": 0.6791, "step": 17504 }, { "epoch": 0.5365023905847738, "grad_norm": 1.7878776261221654, "learning_rate": 4.652114334019693e-06, "loss": 0.5877, "step": 17505 }, { "epoch": 0.536533039107515, "grad_norm": 1.6242212295318736, "learning_rate": 4.6516192206851644e-06, "loss": 0.6609, "step": 17506 }, { "epoch": 0.5365636876302562, "grad_norm": 1.6283773690681793, "learning_rate": 4.651124110783324e-06, "loss": 0.599, "step": 17507 }, { "epoch": 0.5365943361529975, "grad_norm": 1.7634986474847194, "learning_rate": 4.650629004319046e-06, "loss": 0.5907, "step": 17508 }, { "epoch": 0.5366249846757386, "grad_norm": 1.7633620916825672, "learning_rate": 4.6501339012972076e-06, "loss": 0.5555, "step": 17509 }, { "epoch": 0.5366556331984799, "grad_norm": 1.6470221138802814, "learning_rate": 4.649638801722691e-06, "loss": 0.6183, "step": 17510 }, { "epoch": 0.536686281721221, "grad_norm": 1.8033389836160474, "learning_rate": 4.649143705600373e-06, "loss": 0.5461, "step": 17511 }, { "epoch": 0.5367169302439623, "grad_norm": 1.8078899533108668, "learning_rate": 4.648648612935131e-06, "loss": 0.6786, "step": 17512 }, { "epoch": 0.5367475787667034, "grad_norm": 1.6415963294332685, "learning_rate": 4.648153523731846e-06, "loss": 0.6331, "step": 17513 }, { "epoch": 0.5367782272894447, "grad_norm": 1.7334936364871543, "learning_rate": 4.647658437995394e-06, "loss": 0.6428, "step": 17514 }, { "epoch": 0.5368088758121858, "grad_norm": 1.8599611414607589, "learning_rate": 4.647163355730651e-06, "loss": 0.576, "step": 17515 }, { "epoch": 0.5368395243349271, "grad_norm": 1.5856218074649338, "learning_rate": 4.6466682769425e-06, "loss": 0.5068, "step": 17516 }, { "epoch": 0.5368701728576682, "grad_norm": 1.923896620151049, "learning_rate": 4.646173201635815e-06, "loss": 0.6534, "step": 17517 }, { "epoch": 0.5369008213804095, "grad_norm": 1.59436844953436, "learning_rate": 4.6456781298154784e-06, "loss": 0.5715, "step": 17518 }, { "epoch": 0.5369314699031507, "grad_norm": 1.7731316097057306, "learning_rate": 4.645183061486365e-06, "loss": 0.6155, "step": 17519 }, { "epoch": 0.5369621184258919, "grad_norm": 1.8518999781946077, "learning_rate": 4.644687996653353e-06, "loss": 0.6675, "step": 17520 }, { "epoch": 0.5369927669486331, "grad_norm": 1.9426094579747684, "learning_rate": 4.644192935321322e-06, "loss": 0.5915, "step": 17521 }, { "epoch": 0.5370234154713743, "grad_norm": 0.8197038764504234, "learning_rate": 4.643697877495148e-06, "loss": 0.4332, "step": 17522 }, { "epoch": 0.5370540639941155, "grad_norm": 1.7723641202421343, "learning_rate": 4.6432028231797095e-06, "loss": 0.6636, "step": 17523 }, { "epoch": 0.5370847125168566, "grad_norm": 1.7951272004977603, "learning_rate": 4.642707772379887e-06, "loss": 0.6707, "step": 17524 }, { "epoch": 0.5371153610395979, "grad_norm": 1.7628178535629493, "learning_rate": 4.642212725100554e-06, "loss": 0.6888, "step": 17525 }, { "epoch": 0.537146009562339, "grad_norm": 1.7069182026507492, "learning_rate": 4.641717681346592e-06, "loss": 0.6658, "step": 17526 }, { "epoch": 0.5371766580850803, "grad_norm": 1.7655484777497246, "learning_rate": 4.641222641122878e-06, "loss": 0.6182, "step": 17527 }, { "epoch": 0.5372073066078215, "grad_norm": 1.8358988222958716, "learning_rate": 4.640727604434288e-06, "loss": 0.6891, "step": 17528 }, { "epoch": 0.5372379551305627, "grad_norm": 1.7324311592918684, "learning_rate": 4.640232571285701e-06, "loss": 0.6729, "step": 17529 }, { "epoch": 0.5372686036533039, "grad_norm": 1.9353915224622003, "learning_rate": 4.639737541681995e-06, "loss": 0.6051, "step": 17530 }, { "epoch": 0.5372992521760451, "grad_norm": 0.7953096366952886, "learning_rate": 4.639242515628046e-06, "loss": 0.4384, "step": 17531 }, { "epoch": 0.5373299006987863, "grad_norm": 1.7161721120275237, "learning_rate": 4.638747493128733e-06, "loss": 0.5593, "step": 17532 }, { "epoch": 0.5373605492215275, "grad_norm": 1.7572442734177502, "learning_rate": 4.638252474188934e-06, "loss": 0.6576, "step": 17533 }, { "epoch": 0.5373911977442687, "grad_norm": 1.9016698226438857, "learning_rate": 4.637757458813526e-06, "loss": 0.5807, "step": 17534 }, { "epoch": 0.53742184626701, "grad_norm": 1.9198658031142983, "learning_rate": 4.637262447007387e-06, "loss": 0.6403, "step": 17535 }, { "epoch": 0.5374524947897511, "grad_norm": 1.7009507523470109, "learning_rate": 4.636767438775392e-06, "loss": 0.5994, "step": 17536 }, { "epoch": 0.5374831433124924, "grad_norm": 1.813481717278424, "learning_rate": 4.636272434122422e-06, "loss": 0.689, "step": 17537 }, { "epoch": 0.5375137918352335, "grad_norm": 1.6616503820981718, "learning_rate": 4.635777433053353e-06, "loss": 0.5629, "step": 17538 }, { "epoch": 0.5375444403579748, "grad_norm": 1.601579859527588, "learning_rate": 4.63528243557306e-06, "loss": 0.5659, "step": 17539 }, { "epoch": 0.5375750888807159, "grad_norm": 1.566825768530324, "learning_rate": 4.634787441686425e-06, "loss": 0.6269, "step": 17540 }, { "epoch": 0.5376057374034572, "grad_norm": 1.6865376891208697, "learning_rate": 4.634292451398322e-06, "loss": 0.6025, "step": 17541 }, { "epoch": 0.5376363859261983, "grad_norm": 0.7787034671389703, "learning_rate": 4.633797464713628e-06, "loss": 0.4215, "step": 17542 }, { "epoch": 0.5376670344489396, "grad_norm": 1.6153887920808847, "learning_rate": 4.633302481637222e-06, "loss": 0.6452, "step": 17543 }, { "epoch": 0.5376976829716807, "grad_norm": 2.003379299834303, "learning_rate": 4.63280750217398e-06, "loss": 0.7262, "step": 17544 }, { "epoch": 0.537728331494422, "grad_norm": 1.7055866969321853, "learning_rate": 4.6323125263287805e-06, "loss": 0.5946, "step": 17545 }, { "epoch": 0.5377589800171632, "grad_norm": 1.7929354510509643, "learning_rate": 4.631817554106501e-06, "loss": 0.7037, "step": 17546 }, { "epoch": 0.5377896285399044, "grad_norm": 0.7571217668281346, "learning_rate": 4.631322585512014e-06, "loss": 0.4146, "step": 17547 }, { "epoch": 0.5378202770626456, "grad_norm": 1.6845245521779615, "learning_rate": 4.6308276205502026e-06, "loss": 0.5777, "step": 17548 }, { "epoch": 0.5378509255853868, "grad_norm": 1.5058862868817973, "learning_rate": 4.6303326592259405e-06, "loss": 0.4617, "step": 17549 }, { "epoch": 0.537881574108128, "grad_norm": 1.7204142710283536, "learning_rate": 4.629837701544104e-06, "loss": 0.5361, "step": 17550 }, { "epoch": 0.5379122226308692, "grad_norm": 1.892926279214769, "learning_rate": 4.6293427475095725e-06, "loss": 0.6448, "step": 17551 }, { "epoch": 0.5379428711536104, "grad_norm": 1.5826965107483093, "learning_rate": 4.62884779712722e-06, "loss": 0.6525, "step": 17552 }, { "epoch": 0.5379735196763517, "grad_norm": 1.8252805427709768, "learning_rate": 4.628352850401928e-06, "loss": 0.6138, "step": 17553 }, { "epoch": 0.5380041681990928, "grad_norm": 1.711546374259232, "learning_rate": 4.6278579073385685e-06, "loss": 0.6115, "step": 17554 }, { "epoch": 0.538034816721834, "grad_norm": 1.8645625511794803, "learning_rate": 4.627362967942021e-06, "loss": 0.682, "step": 17555 }, { "epoch": 0.5380654652445752, "grad_norm": 1.6121102806121255, "learning_rate": 4.626868032217161e-06, "loss": 0.5575, "step": 17556 }, { "epoch": 0.5380961137673164, "grad_norm": 1.7662489518095745, "learning_rate": 4.6263731001688676e-06, "loss": 0.5843, "step": 17557 }, { "epoch": 0.5381267622900576, "grad_norm": 2.027681562737041, "learning_rate": 4.625878171802012e-06, "loss": 0.7012, "step": 17558 }, { "epoch": 0.5381574108127988, "grad_norm": 1.7696810544355521, "learning_rate": 4.625383247121479e-06, "loss": 0.7027, "step": 17559 }, { "epoch": 0.53818805933554, "grad_norm": 1.6085792698348345, "learning_rate": 4.624888326132136e-06, "loss": 0.685, "step": 17560 }, { "epoch": 0.5382187078582812, "grad_norm": 1.8941846194087497, "learning_rate": 4.624393408838868e-06, "loss": 0.6353, "step": 17561 }, { "epoch": 0.5382493563810224, "grad_norm": 1.844048185542162, "learning_rate": 4.623898495246547e-06, "loss": 0.6826, "step": 17562 }, { "epoch": 0.5382800049037636, "grad_norm": 0.830198673082686, "learning_rate": 4.6234035853600486e-06, "loss": 0.4562, "step": 17563 }, { "epoch": 0.5383106534265049, "grad_norm": 1.669581943558097, "learning_rate": 4.622908679184253e-06, "loss": 0.6015, "step": 17564 }, { "epoch": 0.538341301949246, "grad_norm": 2.0535649449069764, "learning_rate": 4.6224137767240344e-06, "loss": 0.5386, "step": 17565 }, { "epoch": 0.5383719504719873, "grad_norm": 1.617730494024874, "learning_rate": 4.621918877984268e-06, "loss": 0.6435, "step": 17566 }, { "epoch": 0.5384025989947284, "grad_norm": 1.758104235713165, "learning_rate": 4.621423982969833e-06, "loss": 0.633, "step": 17567 }, { "epoch": 0.5384332475174697, "grad_norm": 1.5642357264534503, "learning_rate": 4.620929091685605e-06, "loss": 0.6164, "step": 17568 }, { "epoch": 0.5384638960402108, "grad_norm": 2.1425240227995506, "learning_rate": 4.620434204136457e-06, "loss": 0.6765, "step": 17569 }, { "epoch": 0.5384945445629521, "grad_norm": 1.8446214949804751, "learning_rate": 4.619939320327271e-06, "loss": 0.6728, "step": 17570 }, { "epoch": 0.5385251930856932, "grad_norm": 1.9072995959112091, "learning_rate": 4.619444440262918e-06, "loss": 0.6672, "step": 17571 }, { "epoch": 0.5385558416084345, "grad_norm": 1.6997148976526117, "learning_rate": 4.618949563948277e-06, "loss": 0.7333, "step": 17572 }, { "epoch": 0.5385864901311757, "grad_norm": 0.8118490176043107, "learning_rate": 4.618454691388224e-06, "loss": 0.4252, "step": 17573 }, { "epoch": 0.5386171386539169, "grad_norm": 1.7678023475562967, "learning_rate": 4.617959822587633e-06, "loss": 0.6672, "step": 17574 }, { "epoch": 0.5386477871766581, "grad_norm": 0.8365517008767226, "learning_rate": 4.617464957551383e-06, "loss": 0.4334, "step": 17575 }, { "epoch": 0.5386784356993993, "grad_norm": 1.9812689914142947, "learning_rate": 4.616970096284348e-06, "loss": 0.6874, "step": 17576 }, { "epoch": 0.5387090842221405, "grad_norm": 1.8269668405387463, "learning_rate": 4.616475238791405e-06, "loss": 0.6667, "step": 17577 }, { "epoch": 0.5387397327448817, "grad_norm": 2.0602429063014474, "learning_rate": 4.61598038507743e-06, "loss": 0.7186, "step": 17578 }, { "epoch": 0.5387703812676229, "grad_norm": 1.837986856200391, "learning_rate": 4.615485535147296e-06, "loss": 0.657, "step": 17579 }, { "epoch": 0.5388010297903641, "grad_norm": 1.7227763637405138, "learning_rate": 4.6149906890058855e-06, "loss": 0.5887, "step": 17580 }, { "epoch": 0.5388316783131053, "grad_norm": 0.7617413155753756, "learning_rate": 4.614495846658068e-06, "loss": 0.4199, "step": 17581 }, { "epoch": 0.5388623268358466, "grad_norm": 0.8222344697515642, "learning_rate": 4.614001008108721e-06, "loss": 0.4361, "step": 17582 }, { "epoch": 0.5388929753585877, "grad_norm": 1.6740708253868053, "learning_rate": 4.613506173362722e-06, "loss": 0.6694, "step": 17583 }, { "epoch": 0.538923623881329, "grad_norm": 1.6372386271201653, "learning_rate": 4.613011342424945e-06, "loss": 0.5919, "step": 17584 }, { "epoch": 0.5389542724040701, "grad_norm": 1.7316131004982889, "learning_rate": 4.612516515300266e-06, "loss": 0.6867, "step": 17585 }, { "epoch": 0.5389849209268113, "grad_norm": 1.6498884138828251, "learning_rate": 4.612021691993561e-06, "loss": 0.6502, "step": 17586 }, { "epoch": 0.5390155694495525, "grad_norm": 1.707395833914051, "learning_rate": 4.611526872509705e-06, "loss": 0.6814, "step": 17587 }, { "epoch": 0.5390462179722937, "grad_norm": 1.7381626987687764, "learning_rate": 4.611032056853575e-06, "loss": 0.6685, "step": 17588 }, { "epoch": 0.539076866495035, "grad_norm": 0.7539755395751385, "learning_rate": 4.610537245030048e-06, "loss": 0.405, "step": 17589 }, { "epoch": 0.5391075150177761, "grad_norm": 1.9417382504316334, "learning_rate": 4.610042437043993e-06, "loss": 0.6605, "step": 17590 }, { "epoch": 0.5391381635405174, "grad_norm": 0.7718212915858734, "learning_rate": 4.609547632900292e-06, "loss": 0.438, "step": 17591 }, { "epoch": 0.5391688120632585, "grad_norm": 1.9754201819387798, "learning_rate": 4.609052832603818e-06, "loss": 0.6997, "step": 17592 }, { "epoch": 0.5391994605859998, "grad_norm": 1.6190644775435916, "learning_rate": 4.608558036159445e-06, "loss": 0.5825, "step": 17593 }, { "epoch": 0.5392301091087409, "grad_norm": 1.9650608834941805, "learning_rate": 4.608063243572051e-06, "loss": 0.6804, "step": 17594 }, { "epoch": 0.5392607576314822, "grad_norm": 1.4890727587803767, "learning_rate": 4.60756845484651e-06, "loss": 0.6177, "step": 17595 }, { "epoch": 0.5392914061542233, "grad_norm": 1.69009044304811, "learning_rate": 4.607073669987698e-06, "loss": 0.6627, "step": 17596 }, { "epoch": 0.5393220546769646, "grad_norm": 0.8021892575134817, "learning_rate": 4.606578889000489e-06, "loss": 0.4287, "step": 17597 }, { "epoch": 0.5393527031997057, "grad_norm": 1.6983851360429785, "learning_rate": 4.606084111889758e-06, "loss": 0.5965, "step": 17598 }, { "epoch": 0.539383351722447, "grad_norm": 1.6167262129315196, "learning_rate": 4.605589338660382e-06, "loss": 0.5229, "step": 17599 }, { "epoch": 0.5394140002451882, "grad_norm": 1.7025145208114847, "learning_rate": 4.605094569317236e-06, "loss": 0.5699, "step": 17600 }, { "epoch": 0.5394446487679294, "grad_norm": 1.8366056249121303, "learning_rate": 4.604599803865192e-06, "loss": 0.59, "step": 17601 }, { "epoch": 0.5394752972906706, "grad_norm": 1.8427729283263359, "learning_rate": 4.60410504230913e-06, "loss": 0.6141, "step": 17602 }, { "epoch": 0.5395059458134118, "grad_norm": 0.8122929303995454, "learning_rate": 4.603610284653921e-06, "loss": 0.441, "step": 17603 }, { "epoch": 0.539536594336153, "grad_norm": 2.0380254810317457, "learning_rate": 4.603115530904441e-06, "loss": 0.6441, "step": 17604 }, { "epoch": 0.5395672428588942, "grad_norm": 1.7519598277305464, "learning_rate": 4.602620781065565e-06, "loss": 0.6501, "step": 17605 }, { "epoch": 0.5395978913816354, "grad_norm": 0.8069288284807751, "learning_rate": 4.602126035142168e-06, "loss": 0.4353, "step": 17606 }, { "epoch": 0.5396285399043766, "grad_norm": 1.8653625391536162, "learning_rate": 4.601631293139126e-06, "loss": 0.6604, "step": 17607 }, { "epoch": 0.5396591884271178, "grad_norm": 1.6663182749105099, "learning_rate": 4.601136555061312e-06, "loss": 0.6223, "step": 17608 }, { "epoch": 0.5396898369498591, "grad_norm": 1.6763593148805533, "learning_rate": 4.600641820913601e-06, "loss": 0.6167, "step": 17609 }, { "epoch": 0.5397204854726002, "grad_norm": 1.6382956368722918, "learning_rate": 4.60014709070087e-06, "loss": 0.644, "step": 17610 }, { "epoch": 0.5397511339953415, "grad_norm": 1.4546480767527832, "learning_rate": 4.599652364427993e-06, "loss": 0.5174, "step": 17611 }, { "epoch": 0.5397817825180826, "grad_norm": 1.6607607020820372, "learning_rate": 4.59915764209984e-06, "loss": 0.5818, "step": 17612 }, { "epoch": 0.5398124310408239, "grad_norm": 1.8882570302008985, "learning_rate": 4.598662923721293e-06, "loss": 0.5741, "step": 17613 }, { "epoch": 0.539843079563565, "grad_norm": 1.6322820927504615, "learning_rate": 4.598168209297218e-06, "loss": 0.5604, "step": 17614 }, { "epoch": 0.5398737280863063, "grad_norm": 1.853404273483041, "learning_rate": 4.5976734988325e-06, "loss": 0.6492, "step": 17615 }, { "epoch": 0.5399043766090474, "grad_norm": 1.749047736904495, "learning_rate": 4.5971787923320055e-06, "loss": 0.6735, "step": 17616 }, { "epoch": 0.5399350251317886, "grad_norm": 1.6272244465125114, "learning_rate": 4.59668408980061e-06, "loss": 0.6869, "step": 17617 }, { "epoch": 0.5399656736545299, "grad_norm": 1.6830356429540214, "learning_rate": 4.596189391243192e-06, "loss": 0.5992, "step": 17618 }, { "epoch": 0.539996322177271, "grad_norm": 1.747424875684164, "learning_rate": 4.595694696664622e-06, "loss": 0.6045, "step": 17619 }, { "epoch": 0.5400269707000123, "grad_norm": 1.5136068708402437, "learning_rate": 4.595200006069775e-06, "loss": 0.6991, "step": 17620 }, { "epoch": 0.5400576192227534, "grad_norm": 1.5665415784500456, "learning_rate": 4.5947053194635275e-06, "loss": 0.6305, "step": 17621 }, { "epoch": 0.5400882677454947, "grad_norm": 0.8510747002605299, "learning_rate": 4.594210636850752e-06, "loss": 0.4117, "step": 17622 }, { "epoch": 0.5401189162682358, "grad_norm": 0.8413596459777912, "learning_rate": 4.593715958236322e-06, "loss": 0.4346, "step": 17623 }, { "epoch": 0.5401495647909771, "grad_norm": 1.920039172146737, "learning_rate": 4.593221283625113e-06, "loss": 0.7484, "step": 17624 }, { "epoch": 0.5401802133137182, "grad_norm": 1.7548456350558261, "learning_rate": 4.592726613021997e-06, "loss": 0.638, "step": 17625 }, { "epoch": 0.5402108618364595, "grad_norm": 1.8449324372000444, "learning_rate": 4.5922319464318524e-06, "loss": 0.7039, "step": 17626 }, { "epoch": 0.5402415103592006, "grad_norm": 1.802542276565198, "learning_rate": 4.59173728385955e-06, "loss": 0.6364, "step": 17627 }, { "epoch": 0.5402721588819419, "grad_norm": 1.643725752212191, "learning_rate": 4.591242625309963e-06, "loss": 0.6416, "step": 17628 }, { "epoch": 0.5403028074046831, "grad_norm": 0.84760985989571, "learning_rate": 4.5907479707879694e-06, "loss": 0.4239, "step": 17629 }, { "epoch": 0.5403334559274243, "grad_norm": 1.8300249813916136, "learning_rate": 4.59025332029844e-06, "loss": 0.6501, "step": 17630 }, { "epoch": 0.5403641044501655, "grad_norm": 0.8458453978573444, "learning_rate": 4.589758673846249e-06, "loss": 0.4457, "step": 17631 }, { "epoch": 0.5403947529729067, "grad_norm": 1.779474044068733, "learning_rate": 4.589264031436272e-06, "loss": 0.6201, "step": 17632 }, { "epoch": 0.5404254014956479, "grad_norm": 1.803896001402324, "learning_rate": 4.588769393073379e-06, "loss": 0.6029, "step": 17633 }, { "epoch": 0.5404560500183891, "grad_norm": 0.7688540676230105, "learning_rate": 4.588274758762449e-06, "loss": 0.4159, "step": 17634 }, { "epoch": 0.5404866985411303, "grad_norm": 1.5515604185396306, "learning_rate": 4.587780128508352e-06, "loss": 0.5932, "step": 17635 }, { "epoch": 0.5405173470638716, "grad_norm": 1.6904193233354707, "learning_rate": 4.587285502315963e-06, "loss": 0.6037, "step": 17636 }, { "epoch": 0.5405479955866127, "grad_norm": 1.5508868954267974, "learning_rate": 4.586790880190155e-06, "loss": 0.572, "step": 17637 }, { "epoch": 0.540578644109354, "grad_norm": 1.6366509247393741, "learning_rate": 4.586296262135804e-06, "loss": 0.5614, "step": 17638 }, { "epoch": 0.5406092926320951, "grad_norm": 1.8052982593092142, "learning_rate": 4.58580164815778e-06, "loss": 0.6676, "step": 17639 }, { "epoch": 0.5406399411548364, "grad_norm": 0.8654964407717722, "learning_rate": 4.5853070382609584e-06, "loss": 0.4433, "step": 17640 }, { "epoch": 0.5406705896775775, "grad_norm": 1.5879520377885867, "learning_rate": 4.584812432450212e-06, "loss": 0.6214, "step": 17641 }, { "epoch": 0.5407012382003188, "grad_norm": 1.6249013707326456, "learning_rate": 4.584317830730417e-06, "loss": 0.6059, "step": 17642 }, { "epoch": 0.5407318867230599, "grad_norm": 1.900926058089687, "learning_rate": 4.583823233106445e-06, "loss": 0.6797, "step": 17643 }, { "epoch": 0.5407625352458012, "grad_norm": 1.5820046512235821, "learning_rate": 4.583328639583166e-06, "loss": 0.5501, "step": 17644 }, { "epoch": 0.5407931837685424, "grad_norm": 2.083161397365613, "learning_rate": 4.58283405016546e-06, "loss": 0.6476, "step": 17645 }, { "epoch": 0.5408238322912836, "grad_norm": 1.671333491442986, "learning_rate": 4.582339464858195e-06, "loss": 0.5462, "step": 17646 }, { "epoch": 0.5408544808140248, "grad_norm": 1.532197060603869, "learning_rate": 4.581844883666246e-06, "loss": 0.5914, "step": 17647 }, { "epoch": 0.5408851293367659, "grad_norm": 1.6471351011722661, "learning_rate": 4.581350306594487e-06, "loss": 0.614, "step": 17648 }, { "epoch": 0.5409157778595072, "grad_norm": 1.6142025286353607, "learning_rate": 4.580855733647791e-06, "loss": 0.6123, "step": 17649 }, { "epoch": 0.5409464263822483, "grad_norm": 1.8245377410349914, "learning_rate": 4.5803611648310295e-06, "loss": 0.677, "step": 17650 }, { "epoch": 0.5409770749049896, "grad_norm": 1.610996475444641, "learning_rate": 4.579866600149077e-06, "loss": 0.6409, "step": 17651 }, { "epoch": 0.5410077234277307, "grad_norm": 1.9198730319542496, "learning_rate": 4.579372039606806e-06, "loss": 0.7084, "step": 17652 }, { "epoch": 0.541038371950472, "grad_norm": 1.845562613303968, "learning_rate": 4.578877483209091e-06, "loss": 0.5968, "step": 17653 }, { "epoch": 0.5410690204732131, "grad_norm": 1.8642463029950775, "learning_rate": 4.578382930960805e-06, "loss": 0.5921, "step": 17654 }, { "epoch": 0.5410996689959544, "grad_norm": 1.926048269462034, "learning_rate": 4.5778883828668165e-06, "loss": 0.7613, "step": 17655 }, { "epoch": 0.5411303175186956, "grad_norm": 1.745050066980814, "learning_rate": 4.577393838932006e-06, "loss": 0.7213, "step": 17656 }, { "epoch": 0.5411609660414368, "grad_norm": 2.046749051212574, "learning_rate": 4.576899299161239e-06, "loss": 0.574, "step": 17657 }, { "epoch": 0.541191614564178, "grad_norm": 1.704244457282487, "learning_rate": 4.576404763559392e-06, "loss": 0.6423, "step": 17658 }, { "epoch": 0.5412222630869192, "grad_norm": 1.7362915295233061, "learning_rate": 4.575910232131338e-06, "loss": 0.5706, "step": 17659 }, { "epoch": 0.5412529116096604, "grad_norm": 2.5682852712724644, "learning_rate": 4.575415704881947e-06, "loss": 0.5566, "step": 17660 }, { "epoch": 0.5412835601324016, "grad_norm": 1.7151689906236192, "learning_rate": 4.5749211818160964e-06, "loss": 0.5542, "step": 17661 }, { "epoch": 0.5413142086551428, "grad_norm": 0.8218532004310775, "learning_rate": 4.574426662938655e-06, "loss": 0.4395, "step": 17662 }, { "epoch": 0.541344857177884, "grad_norm": 0.8226924372252994, "learning_rate": 4.573932148254496e-06, "loss": 0.4341, "step": 17663 }, { "epoch": 0.5413755057006252, "grad_norm": 1.9284633242911315, "learning_rate": 4.573437637768493e-06, "loss": 0.5817, "step": 17664 }, { "epoch": 0.5414061542233665, "grad_norm": 1.6033323616235038, "learning_rate": 4.57294313148552e-06, "loss": 0.5475, "step": 17665 }, { "epoch": 0.5414368027461076, "grad_norm": 0.7395819894560498, "learning_rate": 4.572448629410444e-06, "loss": 0.4237, "step": 17666 }, { "epoch": 0.5414674512688489, "grad_norm": 1.7511253015307648, "learning_rate": 4.571954131548144e-06, "loss": 0.6203, "step": 17667 }, { "epoch": 0.54149809979159, "grad_norm": 1.7821488044446898, "learning_rate": 4.571459637903489e-06, "loss": 0.592, "step": 17668 }, { "epoch": 0.5415287483143313, "grad_norm": 0.7548903277764495, "learning_rate": 4.57096514848135e-06, "loss": 0.4291, "step": 17669 }, { "epoch": 0.5415593968370724, "grad_norm": 1.5674317127161344, "learning_rate": 4.570470663286603e-06, "loss": 0.5845, "step": 17670 }, { "epoch": 0.5415900453598137, "grad_norm": 1.6614465122646258, "learning_rate": 4.569976182324116e-06, "loss": 0.6663, "step": 17671 }, { "epoch": 0.5416206938825548, "grad_norm": 1.6346949506100015, "learning_rate": 4.569481705598766e-06, "loss": 0.665, "step": 17672 }, { "epoch": 0.5416513424052961, "grad_norm": 1.7378742118833246, "learning_rate": 4.568987233115423e-06, "loss": 0.6446, "step": 17673 }, { "epoch": 0.5416819909280373, "grad_norm": 0.7967893385189685, "learning_rate": 4.568492764878958e-06, "loss": 0.436, "step": 17674 }, { "epoch": 0.5417126394507785, "grad_norm": 1.6243462388896752, "learning_rate": 4.567998300894245e-06, "loss": 0.5336, "step": 17675 }, { "epoch": 0.5417432879735197, "grad_norm": 1.4976830342854692, "learning_rate": 4.567503841166155e-06, "loss": 0.5898, "step": 17676 }, { "epoch": 0.5417739364962609, "grad_norm": 0.8149854375377007, "learning_rate": 4.56700938569956e-06, "loss": 0.4467, "step": 17677 }, { "epoch": 0.5418045850190021, "grad_norm": 1.784888445689272, "learning_rate": 4.566514934499333e-06, "loss": 0.6371, "step": 17678 }, { "epoch": 0.5418352335417432, "grad_norm": 0.791000054416554, "learning_rate": 4.566020487570344e-06, "loss": 0.4344, "step": 17679 }, { "epoch": 0.5418658820644845, "grad_norm": 0.7919366502065541, "learning_rate": 4.565526044917467e-06, "loss": 0.4323, "step": 17680 }, { "epoch": 0.5418965305872256, "grad_norm": 1.607695619405609, "learning_rate": 4.565031606545574e-06, "loss": 0.593, "step": 17681 }, { "epoch": 0.5419271791099669, "grad_norm": 1.8915467980014762, "learning_rate": 4.564537172459533e-06, "loss": 0.6468, "step": 17682 }, { "epoch": 0.5419578276327081, "grad_norm": 1.8995501434892683, "learning_rate": 4.564042742664221e-06, "loss": 0.5775, "step": 17683 }, { "epoch": 0.5419884761554493, "grad_norm": 1.729199033181408, "learning_rate": 4.563548317164509e-06, "loss": 0.6322, "step": 17684 }, { "epoch": 0.5420191246781905, "grad_norm": 0.7472899651763578, "learning_rate": 4.563053895965263e-06, "loss": 0.4316, "step": 17685 }, { "epoch": 0.5420497732009317, "grad_norm": 0.7653618975033067, "learning_rate": 4.562559479071362e-06, "loss": 0.4298, "step": 17686 }, { "epoch": 0.5420804217236729, "grad_norm": 1.7200489089867528, "learning_rate": 4.562065066487672e-06, "loss": 0.6377, "step": 17687 }, { "epoch": 0.5421110702464141, "grad_norm": 1.919456705124247, "learning_rate": 4.561570658219069e-06, "loss": 0.5621, "step": 17688 }, { "epoch": 0.5421417187691553, "grad_norm": 1.789498328220634, "learning_rate": 4.561076254270422e-06, "loss": 0.7298, "step": 17689 }, { "epoch": 0.5421723672918966, "grad_norm": 1.6163430559447385, "learning_rate": 4.560581854646602e-06, "loss": 0.697, "step": 17690 }, { "epoch": 0.5422030158146377, "grad_norm": 0.7528295582423489, "learning_rate": 4.560087459352482e-06, "loss": 0.4255, "step": 17691 }, { "epoch": 0.542233664337379, "grad_norm": 1.635806447265174, "learning_rate": 4.559593068392933e-06, "loss": 0.7233, "step": 17692 }, { "epoch": 0.5422643128601201, "grad_norm": 0.7453117964427001, "learning_rate": 4.559098681772826e-06, "loss": 0.4326, "step": 17693 }, { "epoch": 0.5422949613828614, "grad_norm": 1.9392253328872127, "learning_rate": 4.558604299497032e-06, "loss": 0.6495, "step": 17694 }, { "epoch": 0.5423256099056025, "grad_norm": 1.6216712775967965, "learning_rate": 4.558109921570425e-06, "loss": 0.5789, "step": 17695 }, { "epoch": 0.5423562584283438, "grad_norm": 1.7083230583708844, "learning_rate": 4.55761554799787e-06, "loss": 0.6163, "step": 17696 }, { "epoch": 0.5423869069510849, "grad_norm": 1.4745365964016377, "learning_rate": 4.557121178784246e-06, "loss": 0.6032, "step": 17697 }, { "epoch": 0.5424175554738262, "grad_norm": 1.9769656935374627, "learning_rate": 4.556626813934417e-06, "loss": 0.76, "step": 17698 }, { "epoch": 0.5424482039965673, "grad_norm": 1.8408664532289456, "learning_rate": 4.55613245345326e-06, "loss": 0.6547, "step": 17699 }, { "epoch": 0.5424788525193086, "grad_norm": 2.017258163515001, "learning_rate": 4.555638097345644e-06, "loss": 0.7954, "step": 17700 }, { "epoch": 0.5425095010420498, "grad_norm": 1.5771825239637463, "learning_rate": 4.555143745616437e-06, "loss": 0.558, "step": 17701 }, { "epoch": 0.542540149564791, "grad_norm": 1.5377720532409196, "learning_rate": 4.554649398270515e-06, "loss": 0.7183, "step": 17702 }, { "epoch": 0.5425707980875322, "grad_norm": 1.6864092832240816, "learning_rate": 4.5541550553127445e-06, "loss": 0.642, "step": 17703 }, { "epoch": 0.5426014466102734, "grad_norm": 0.8129662582627311, "learning_rate": 4.553660716747998e-06, "loss": 0.4309, "step": 17704 }, { "epoch": 0.5426320951330146, "grad_norm": 1.643457922473451, "learning_rate": 4.5531663825811486e-06, "loss": 0.6953, "step": 17705 }, { "epoch": 0.5426627436557558, "grad_norm": 1.6790247209942524, "learning_rate": 4.5526720528170635e-06, "loss": 0.6521, "step": 17706 }, { "epoch": 0.542693392178497, "grad_norm": 0.7885802499598368, "learning_rate": 4.552177727460616e-06, "loss": 0.4334, "step": 17707 }, { "epoch": 0.5427240407012383, "grad_norm": 1.9897194363011719, "learning_rate": 4.551683406516677e-06, "loss": 0.6925, "step": 17708 }, { "epoch": 0.5427546892239794, "grad_norm": 1.6417376733917681, "learning_rate": 4.551189089990113e-06, "loss": 0.6186, "step": 17709 }, { "epoch": 0.5427853377467206, "grad_norm": 1.7767864205641597, "learning_rate": 4.550694777885801e-06, "loss": 0.5758, "step": 17710 }, { "epoch": 0.5428159862694618, "grad_norm": 0.8208620156887023, "learning_rate": 4.5502004702086076e-06, "loss": 0.4536, "step": 17711 }, { "epoch": 0.542846634792203, "grad_norm": 1.7480246771752055, "learning_rate": 4.549706166963402e-06, "loss": 0.6504, "step": 17712 }, { "epoch": 0.5428772833149442, "grad_norm": 1.5277585472716297, "learning_rate": 4.549211868155059e-06, "loss": 0.6305, "step": 17713 }, { "epoch": 0.5429079318376854, "grad_norm": 1.741557855401053, "learning_rate": 4.548717573788445e-06, "loss": 0.5862, "step": 17714 }, { "epoch": 0.5429385803604266, "grad_norm": 1.6587569435153602, "learning_rate": 4.548223283868433e-06, "loss": 0.6431, "step": 17715 }, { "epoch": 0.5429692288831678, "grad_norm": 1.5840105584153017, "learning_rate": 4.547728998399894e-06, "loss": 0.6644, "step": 17716 }, { "epoch": 0.542999877405909, "grad_norm": 1.7112338391954547, "learning_rate": 4.547234717387694e-06, "loss": 0.5567, "step": 17717 }, { "epoch": 0.5430305259286502, "grad_norm": 1.6526737771617686, "learning_rate": 4.546740440836709e-06, "loss": 0.6194, "step": 17718 }, { "epoch": 0.5430611744513915, "grad_norm": 0.8005817321315349, "learning_rate": 4.546246168751806e-06, "loss": 0.4389, "step": 17719 }, { "epoch": 0.5430918229741326, "grad_norm": 0.7889826237809647, "learning_rate": 4.545751901137853e-06, "loss": 0.4423, "step": 17720 }, { "epoch": 0.5431224714968739, "grad_norm": 1.7031415738401445, "learning_rate": 4.545257637999726e-06, "loss": 0.5726, "step": 17721 }, { "epoch": 0.543153120019615, "grad_norm": 1.9030953099172105, "learning_rate": 4.544763379342291e-06, "loss": 0.5909, "step": 17722 }, { "epoch": 0.5431837685423563, "grad_norm": 1.8432190045343297, "learning_rate": 4.544269125170417e-06, "loss": 0.6218, "step": 17723 }, { "epoch": 0.5432144170650974, "grad_norm": 1.8136666518691704, "learning_rate": 4.543774875488978e-06, "loss": 0.5553, "step": 17724 }, { "epoch": 0.5432450655878387, "grad_norm": 1.8094399349317667, "learning_rate": 4.54328063030284e-06, "loss": 0.6848, "step": 17725 }, { "epoch": 0.5432757141105798, "grad_norm": 1.7001537972808813, "learning_rate": 4.542786389616876e-06, "loss": 0.6143, "step": 17726 }, { "epoch": 0.5433063626333211, "grad_norm": 1.9733683566527267, "learning_rate": 4.542292153435956e-06, "loss": 0.5914, "step": 17727 }, { "epoch": 0.5433370111560623, "grad_norm": 1.5592892391943707, "learning_rate": 4.541797921764945e-06, "loss": 0.663, "step": 17728 }, { "epoch": 0.5433676596788035, "grad_norm": 2.1144256810068622, "learning_rate": 4.5413036946087195e-06, "loss": 0.7197, "step": 17729 }, { "epoch": 0.5433983082015447, "grad_norm": 0.838147891945538, "learning_rate": 4.540809471972146e-06, "loss": 0.4382, "step": 17730 }, { "epoch": 0.5434289567242859, "grad_norm": 1.786210689512305, "learning_rate": 4.540315253860092e-06, "loss": 0.647, "step": 17731 }, { "epoch": 0.5434596052470271, "grad_norm": 1.6604030523167796, "learning_rate": 4.5398210402774315e-06, "loss": 0.628, "step": 17732 }, { "epoch": 0.5434902537697683, "grad_norm": 0.7907591791453016, "learning_rate": 4.5393268312290304e-06, "loss": 0.3897, "step": 17733 }, { "epoch": 0.5435209022925095, "grad_norm": 1.8015430482632553, "learning_rate": 4.53883262671976e-06, "loss": 0.774, "step": 17734 }, { "epoch": 0.5435515508152508, "grad_norm": 1.9445042614360954, "learning_rate": 4.538338426754491e-06, "loss": 0.7507, "step": 17735 }, { "epoch": 0.5435821993379919, "grad_norm": 1.8054009219267504, "learning_rate": 4.537844231338091e-06, "loss": 0.5381, "step": 17736 }, { "epoch": 0.5436128478607332, "grad_norm": 0.789775594921309, "learning_rate": 4.53735004047543e-06, "loss": 0.4244, "step": 17737 }, { "epoch": 0.5436434963834743, "grad_norm": 2.0525862018075025, "learning_rate": 4.536855854171378e-06, "loss": 0.773, "step": 17738 }, { "epoch": 0.5436741449062156, "grad_norm": 1.8010504131351226, "learning_rate": 4.536361672430802e-06, "loss": 0.6334, "step": 17739 }, { "epoch": 0.5437047934289567, "grad_norm": 1.6595741489686038, "learning_rate": 4.535867495258576e-06, "loss": 0.6306, "step": 17740 }, { "epoch": 0.5437354419516979, "grad_norm": 1.749773070384078, "learning_rate": 4.535373322659563e-06, "loss": 0.6273, "step": 17741 }, { "epoch": 0.5437660904744391, "grad_norm": 1.7712893665090237, "learning_rate": 4.534879154638637e-06, "loss": 0.61, "step": 17742 }, { "epoch": 0.5437967389971803, "grad_norm": 1.6682396330507823, "learning_rate": 4.534384991200667e-06, "loss": 0.6669, "step": 17743 }, { "epoch": 0.5438273875199215, "grad_norm": 1.7866080511035853, "learning_rate": 4.533890832350519e-06, "loss": 0.6837, "step": 17744 }, { "epoch": 0.5438580360426627, "grad_norm": 1.7678232918048749, "learning_rate": 4.533396678093065e-06, "loss": 0.7423, "step": 17745 }, { "epoch": 0.543888684565404, "grad_norm": 1.8654023339915968, "learning_rate": 4.532902528433173e-06, "loss": 0.6231, "step": 17746 }, { "epoch": 0.5439193330881451, "grad_norm": 1.7618082516569078, "learning_rate": 4.53240838337571e-06, "loss": 0.7428, "step": 17747 }, { "epoch": 0.5439499816108864, "grad_norm": 1.88214088364251, "learning_rate": 4.531914242925548e-06, "loss": 0.6546, "step": 17748 }, { "epoch": 0.5439806301336275, "grad_norm": 1.5549247706266358, "learning_rate": 4.531420107087557e-06, "loss": 0.6162, "step": 17749 }, { "epoch": 0.5440112786563688, "grad_norm": 1.7969118656444631, "learning_rate": 4.530925975866599e-06, "loss": 0.6255, "step": 17750 }, { "epoch": 0.5440419271791099, "grad_norm": 1.895764012229568, "learning_rate": 4.530431849267552e-06, "loss": 0.5838, "step": 17751 }, { "epoch": 0.5440725757018512, "grad_norm": 0.8530756769434482, "learning_rate": 4.529937727295276e-06, "loss": 0.4302, "step": 17752 }, { "epoch": 0.5441032242245923, "grad_norm": 0.8065621605087043, "learning_rate": 4.529443609954647e-06, "loss": 0.415, "step": 17753 }, { "epoch": 0.5441338727473336, "grad_norm": 1.6027471193016103, "learning_rate": 4.52894949725053e-06, "loss": 0.6383, "step": 17754 }, { "epoch": 0.5441645212700748, "grad_norm": 1.6655112986859137, "learning_rate": 4.528455389187792e-06, "loss": 0.6715, "step": 17755 }, { "epoch": 0.544195169792816, "grad_norm": 1.6664141226969442, "learning_rate": 4.527961285771306e-06, "loss": 0.6181, "step": 17756 }, { "epoch": 0.5442258183155572, "grad_norm": 1.6941680878704755, "learning_rate": 4.527467187005938e-06, "loss": 0.6016, "step": 17757 }, { "epoch": 0.5442564668382984, "grad_norm": 1.8842036362105707, "learning_rate": 4.526973092896556e-06, "loss": 0.5957, "step": 17758 }, { "epoch": 0.5442871153610396, "grad_norm": 1.835780720966295, "learning_rate": 4.526479003448029e-06, "loss": 0.6796, "step": 17759 }, { "epoch": 0.5443177638837808, "grad_norm": 1.7228081604703158, "learning_rate": 4.525984918665225e-06, "loss": 0.6086, "step": 17760 }, { "epoch": 0.544348412406522, "grad_norm": 1.8617677996487387, "learning_rate": 4.5254908385530135e-06, "loss": 0.6252, "step": 17761 }, { "epoch": 0.5443790609292632, "grad_norm": 1.7314200137706983, "learning_rate": 4.524996763116265e-06, "loss": 0.61, "step": 17762 }, { "epoch": 0.5444097094520044, "grad_norm": 1.9979647040854245, "learning_rate": 4.5245026923598396e-06, "loss": 0.6339, "step": 17763 }, { "epoch": 0.5444403579747457, "grad_norm": 1.8252543120648126, "learning_rate": 4.5240086262886154e-06, "loss": 0.6487, "step": 17764 }, { "epoch": 0.5444710064974868, "grad_norm": 1.8247773792153628, "learning_rate": 4.523514564907454e-06, "loss": 0.6219, "step": 17765 }, { "epoch": 0.5445016550202281, "grad_norm": 1.632783606684848, "learning_rate": 4.523020508221225e-06, "loss": 0.7046, "step": 17766 }, { "epoch": 0.5445323035429692, "grad_norm": 1.6787779343262474, "learning_rate": 4.522526456234797e-06, "loss": 0.6198, "step": 17767 }, { "epoch": 0.5445629520657105, "grad_norm": 1.6615006582913383, "learning_rate": 4.522032408953039e-06, "loss": 0.5636, "step": 17768 }, { "epoch": 0.5445936005884516, "grad_norm": 1.9748363331869687, "learning_rate": 4.521538366380818e-06, "loss": 0.7284, "step": 17769 }, { "epoch": 0.5446242491111929, "grad_norm": 1.7106081174989, "learning_rate": 4.521044328523001e-06, "loss": 0.6409, "step": 17770 }, { "epoch": 0.544654897633934, "grad_norm": 1.7192666800639604, "learning_rate": 4.520550295384457e-06, "loss": 0.6977, "step": 17771 }, { "epoch": 0.5446855461566752, "grad_norm": 1.52093255603134, "learning_rate": 4.520056266970055e-06, "loss": 0.6979, "step": 17772 }, { "epoch": 0.5447161946794165, "grad_norm": 1.8138973890293582, "learning_rate": 4.51956224328466e-06, "loss": 0.6484, "step": 17773 }, { "epoch": 0.5447468432021576, "grad_norm": 1.7222890585247963, "learning_rate": 4.519068224333141e-06, "loss": 0.6722, "step": 17774 }, { "epoch": 0.5447774917248989, "grad_norm": 0.9588630905175688, "learning_rate": 4.518574210120366e-06, "loss": 0.45, "step": 17775 }, { "epoch": 0.54480814024764, "grad_norm": 1.9787086371838394, "learning_rate": 4.518080200651203e-06, "loss": 0.6768, "step": 17776 }, { "epoch": 0.5448387887703813, "grad_norm": 1.585928838061218, "learning_rate": 4.517586195930519e-06, "loss": 0.5581, "step": 17777 }, { "epoch": 0.5448694372931224, "grad_norm": 1.5993050589734183, "learning_rate": 4.517092195963182e-06, "loss": 0.5388, "step": 17778 }, { "epoch": 0.5449000858158637, "grad_norm": 1.7324081396686866, "learning_rate": 4.516598200754057e-06, "loss": 0.645, "step": 17779 }, { "epoch": 0.5449307343386048, "grad_norm": 1.6586288117655505, "learning_rate": 4.5161042103080156e-06, "loss": 0.7008, "step": 17780 }, { "epoch": 0.5449613828613461, "grad_norm": 2.1349196353926043, "learning_rate": 4.515610224629924e-06, "loss": 0.626, "step": 17781 }, { "epoch": 0.5449920313840872, "grad_norm": 1.6598988423105183, "learning_rate": 4.515116243724646e-06, "loss": 0.6273, "step": 17782 }, { "epoch": 0.5450226799068285, "grad_norm": 0.792466104162375, "learning_rate": 4.514622267597054e-06, "loss": 0.4421, "step": 17783 }, { "epoch": 0.5450533284295697, "grad_norm": 1.638488619230122, "learning_rate": 4.514128296252013e-06, "loss": 0.5989, "step": 17784 }, { "epoch": 0.5450839769523109, "grad_norm": 1.883723562592468, "learning_rate": 4.513634329694389e-06, "loss": 0.678, "step": 17785 }, { "epoch": 0.5451146254750521, "grad_norm": 1.6536608790528744, "learning_rate": 4.513140367929052e-06, "loss": 0.5596, "step": 17786 }, { "epoch": 0.5451452739977933, "grad_norm": 1.5713940430097284, "learning_rate": 4.512646410960865e-06, "loss": 0.6412, "step": 17787 }, { "epoch": 0.5451759225205345, "grad_norm": 1.520120924458014, "learning_rate": 4.5121524587947e-06, "loss": 0.5611, "step": 17788 }, { "epoch": 0.5452065710432757, "grad_norm": 1.6786102963478278, "learning_rate": 4.51165851143542e-06, "loss": 0.6464, "step": 17789 }, { "epoch": 0.5452372195660169, "grad_norm": 0.8393061520720417, "learning_rate": 4.511164568887894e-06, "loss": 0.4279, "step": 17790 }, { "epoch": 0.5452678680887582, "grad_norm": 1.7376312706554387, "learning_rate": 4.510670631156989e-06, "loss": 0.5397, "step": 17791 }, { "epoch": 0.5452985166114993, "grad_norm": 1.7196695256253878, "learning_rate": 4.510176698247573e-06, "loss": 0.6921, "step": 17792 }, { "epoch": 0.5453291651342406, "grad_norm": 1.8963361590777985, "learning_rate": 4.509682770164508e-06, "loss": 0.6587, "step": 17793 }, { "epoch": 0.5453598136569817, "grad_norm": 1.7559280386015312, "learning_rate": 4.509188846912667e-06, "loss": 0.5807, "step": 17794 }, { "epoch": 0.545390462179723, "grad_norm": 1.7047729732024286, "learning_rate": 4.508694928496911e-06, "loss": 0.604, "step": 17795 }, { "epoch": 0.5454211107024641, "grad_norm": 1.6499704791601377, "learning_rate": 4.508201014922113e-06, "loss": 0.5512, "step": 17796 }, { "epoch": 0.5454517592252054, "grad_norm": 1.8426198776638434, "learning_rate": 4.507707106193135e-06, "loss": 0.6435, "step": 17797 }, { "epoch": 0.5454824077479465, "grad_norm": 1.8041826385284072, "learning_rate": 4.507213202314843e-06, "loss": 0.62, "step": 17798 }, { "epoch": 0.5455130562706878, "grad_norm": 1.9304506822107634, "learning_rate": 4.506719303292107e-06, "loss": 0.6956, "step": 17799 }, { "epoch": 0.545543704793429, "grad_norm": 0.7899803220013999, "learning_rate": 4.506225409129792e-06, "loss": 0.4404, "step": 17800 }, { "epoch": 0.5455743533161702, "grad_norm": 1.8774465921993697, "learning_rate": 4.505731519832763e-06, "loss": 0.6453, "step": 17801 }, { "epoch": 0.5456050018389114, "grad_norm": 1.843167074273254, "learning_rate": 4.50523763540589e-06, "loss": 0.6312, "step": 17802 }, { "epoch": 0.5456356503616525, "grad_norm": 1.556755528762679, "learning_rate": 4.504743755854037e-06, "loss": 0.5902, "step": 17803 }, { "epoch": 0.5456662988843938, "grad_norm": 1.6558341723294365, "learning_rate": 4.504249881182067e-06, "loss": 0.5711, "step": 17804 }, { "epoch": 0.5456969474071349, "grad_norm": 1.8199634059012322, "learning_rate": 4.503756011394854e-06, "loss": 0.649, "step": 17805 }, { "epoch": 0.5457275959298762, "grad_norm": 1.6788278261141358, "learning_rate": 4.503262146497256e-06, "loss": 0.6004, "step": 17806 }, { "epoch": 0.5457582444526173, "grad_norm": 1.718230920848199, "learning_rate": 4.502768286494148e-06, "loss": 0.5875, "step": 17807 }, { "epoch": 0.5457888929753586, "grad_norm": 2.0646065786848506, "learning_rate": 4.502274431390388e-06, "loss": 0.6791, "step": 17808 }, { "epoch": 0.5458195414980997, "grad_norm": 1.7162308505913537, "learning_rate": 4.501780581190845e-06, "loss": 0.7025, "step": 17809 }, { "epoch": 0.545850190020841, "grad_norm": 1.6481990511929037, "learning_rate": 4.501286735900388e-06, "loss": 0.5562, "step": 17810 }, { "epoch": 0.5458808385435822, "grad_norm": 1.7137096964365028, "learning_rate": 4.500792895523879e-06, "loss": 0.5609, "step": 17811 }, { "epoch": 0.5459114870663234, "grad_norm": 2.046077292934159, "learning_rate": 4.500299060066184e-06, "loss": 0.625, "step": 17812 }, { "epoch": 0.5459421355890646, "grad_norm": 1.8844937717103665, "learning_rate": 4.499805229532172e-06, "loss": 0.6788, "step": 17813 }, { "epoch": 0.5459727841118058, "grad_norm": 0.7971268122699962, "learning_rate": 4.499311403926705e-06, "loss": 0.4233, "step": 17814 }, { "epoch": 0.546003432634547, "grad_norm": 1.7985990667085219, "learning_rate": 4.498817583254654e-06, "loss": 0.6505, "step": 17815 }, { "epoch": 0.5460340811572882, "grad_norm": 0.7738222276951698, "learning_rate": 4.4983237675208805e-06, "loss": 0.4302, "step": 17816 }, { "epoch": 0.5460647296800294, "grad_norm": 1.5314339840387026, "learning_rate": 4.49782995673025e-06, "loss": 0.6691, "step": 17817 }, { "epoch": 0.5460953782027707, "grad_norm": 0.8185506416133536, "learning_rate": 4.4973361508876306e-06, "loss": 0.4375, "step": 17818 }, { "epoch": 0.5461260267255118, "grad_norm": 1.563202698727265, "learning_rate": 4.496842349997886e-06, "loss": 0.5837, "step": 17819 }, { "epoch": 0.5461566752482531, "grad_norm": 2.0697781366766104, "learning_rate": 4.4963485540658824e-06, "loss": 0.6246, "step": 17820 }, { "epoch": 0.5461873237709942, "grad_norm": 1.5716404066724656, "learning_rate": 4.495854763096486e-06, "loss": 0.5764, "step": 17821 }, { "epoch": 0.5462179722937355, "grad_norm": 1.6603846723249014, "learning_rate": 4.495360977094561e-06, "loss": 0.7, "step": 17822 }, { "epoch": 0.5462486208164766, "grad_norm": 1.7043724543402061, "learning_rate": 4.494867196064973e-06, "loss": 0.6035, "step": 17823 }, { "epoch": 0.5462792693392179, "grad_norm": 1.9045721755173792, "learning_rate": 4.49437342001259e-06, "loss": 0.757, "step": 17824 }, { "epoch": 0.546309917861959, "grad_norm": 1.6738454730814825, "learning_rate": 4.493879648942272e-06, "loss": 0.5089, "step": 17825 }, { "epoch": 0.5463405663847003, "grad_norm": 0.8066208582912912, "learning_rate": 4.49338588285889e-06, "loss": 0.4115, "step": 17826 }, { "epoch": 0.5463712149074414, "grad_norm": 1.7475544469454962, "learning_rate": 4.4928921217673055e-06, "loss": 0.6331, "step": 17827 }, { "epoch": 0.5464018634301827, "grad_norm": 1.8922510957770566, "learning_rate": 4.492398365672384e-06, "loss": 0.5959, "step": 17828 }, { "epoch": 0.5464325119529239, "grad_norm": 1.7284573028638384, "learning_rate": 4.491904614578992e-06, "loss": 0.6331, "step": 17829 }, { "epoch": 0.5464631604756651, "grad_norm": 1.7425657130760417, "learning_rate": 4.491410868491994e-06, "loss": 0.6266, "step": 17830 }, { "epoch": 0.5464938089984063, "grad_norm": 0.8246620483131503, "learning_rate": 4.490917127416254e-06, "loss": 0.4132, "step": 17831 }, { "epoch": 0.5465244575211475, "grad_norm": 1.6058445650680966, "learning_rate": 4.490423391356638e-06, "loss": 0.6671, "step": 17832 }, { "epoch": 0.5465551060438887, "grad_norm": 1.6128298158684966, "learning_rate": 4.4899296603180105e-06, "loss": 0.6562, "step": 17833 }, { "epoch": 0.5465857545666298, "grad_norm": 0.7846272436596664, "learning_rate": 4.4894359343052375e-06, "loss": 0.4255, "step": 17834 }, { "epoch": 0.5466164030893711, "grad_norm": 1.9389186257845177, "learning_rate": 4.488942213323184e-06, "loss": 0.7027, "step": 17835 }, { "epoch": 0.5466470516121122, "grad_norm": 1.708837872729404, "learning_rate": 4.48844849737671e-06, "loss": 0.6528, "step": 17836 }, { "epoch": 0.5466777001348535, "grad_norm": 1.6327624859255359, "learning_rate": 4.487954786470687e-06, "loss": 0.5702, "step": 17837 }, { "epoch": 0.5467083486575947, "grad_norm": 2.189999013555777, "learning_rate": 4.487461080609976e-06, "loss": 0.6397, "step": 17838 }, { "epoch": 0.5467389971803359, "grad_norm": 1.7796481109698037, "learning_rate": 4.486967379799441e-06, "loss": 0.6446, "step": 17839 }, { "epoch": 0.5467696457030771, "grad_norm": 1.5908358732993875, "learning_rate": 4.486473684043948e-06, "loss": 0.6457, "step": 17840 }, { "epoch": 0.5468002942258183, "grad_norm": 1.885135002154278, "learning_rate": 4.485979993348361e-06, "loss": 0.7336, "step": 17841 }, { "epoch": 0.5468309427485595, "grad_norm": 1.8496966558033114, "learning_rate": 4.4854863077175445e-06, "loss": 0.6195, "step": 17842 }, { "epoch": 0.5468615912713007, "grad_norm": 1.6874327143379266, "learning_rate": 4.484992627156365e-06, "loss": 0.6848, "step": 17843 }, { "epoch": 0.5468922397940419, "grad_norm": 1.8300914121600682, "learning_rate": 4.484498951669682e-06, "loss": 0.6536, "step": 17844 }, { "epoch": 0.5469228883167832, "grad_norm": 1.7563303431481032, "learning_rate": 4.484005281262364e-06, "loss": 0.5966, "step": 17845 }, { "epoch": 0.5469535368395243, "grad_norm": 1.6102132734577737, "learning_rate": 4.483511615939276e-06, "loss": 0.5412, "step": 17846 }, { "epoch": 0.5469841853622656, "grad_norm": 2.01614117431148, "learning_rate": 4.483017955705277e-06, "loss": 0.6162, "step": 17847 }, { "epoch": 0.5470148338850067, "grad_norm": 0.8127636918586179, "learning_rate": 4.4825243005652364e-06, "loss": 0.4214, "step": 17848 }, { "epoch": 0.547045482407748, "grad_norm": 1.4111184101943224, "learning_rate": 4.4820306505240166e-06, "loss": 0.5528, "step": 17849 }, { "epoch": 0.5470761309304891, "grad_norm": 1.8856124068695215, "learning_rate": 4.4815370055864785e-06, "loss": 0.6527, "step": 17850 }, { "epoch": 0.5471067794532304, "grad_norm": 0.8032053263521748, "learning_rate": 4.481043365757492e-06, "loss": 0.427, "step": 17851 }, { "epoch": 0.5471374279759715, "grad_norm": 1.8065046008129875, "learning_rate": 4.480549731041915e-06, "loss": 0.5741, "step": 17852 }, { "epoch": 0.5471680764987128, "grad_norm": 0.784273967732316, "learning_rate": 4.480056101444617e-06, "loss": 0.4265, "step": 17853 }, { "epoch": 0.547198725021454, "grad_norm": 1.7527015202958853, "learning_rate": 4.4795624769704585e-06, "loss": 0.6465, "step": 17854 }, { "epoch": 0.5472293735441952, "grad_norm": 1.7607068417288256, "learning_rate": 4.479068857624304e-06, "loss": 0.5861, "step": 17855 }, { "epoch": 0.5472600220669364, "grad_norm": 1.7873425650318986, "learning_rate": 4.478575243411017e-06, "loss": 0.5862, "step": 17856 }, { "epoch": 0.5472906705896776, "grad_norm": 2.2480173859460133, "learning_rate": 4.478081634335464e-06, "loss": 0.6617, "step": 17857 }, { "epoch": 0.5473213191124188, "grad_norm": 1.6889112357432807, "learning_rate": 4.477588030402502e-06, "loss": 0.6377, "step": 17858 }, { "epoch": 0.54735196763516, "grad_norm": 1.7847914650002121, "learning_rate": 4.477094431617001e-06, "loss": 0.532, "step": 17859 }, { "epoch": 0.5473826161579012, "grad_norm": 1.7356697194923485, "learning_rate": 4.476600837983821e-06, "loss": 0.628, "step": 17860 }, { "epoch": 0.5474132646806424, "grad_norm": 0.7674764893558116, "learning_rate": 4.47610724950783e-06, "loss": 0.43, "step": 17861 }, { "epoch": 0.5474439132033836, "grad_norm": 0.7877247278023424, "learning_rate": 4.475613666193887e-06, "loss": 0.431, "step": 17862 }, { "epoch": 0.5474745617261249, "grad_norm": 1.8103976862148379, "learning_rate": 4.475120088046855e-06, "loss": 0.6253, "step": 17863 }, { "epoch": 0.547505210248866, "grad_norm": 1.748705988605216, "learning_rate": 4.4746265150716e-06, "loss": 0.6096, "step": 17864 }, { "epoch": 0.5475358587716072, "grad_norm": 0.7939035826177573, "learning_rate": 4.474132947272985e-06, "loss": 0.4074, "step": 17865 }, { "epoch": 0.5475665072943484, "grad_norm": 2.029956462943008, "learning_rate": 4.4736393846558716e-06, "loss": 0.7549, "step": 17866 }, { "epoch": 0.5475971558170896, "grad_norm": 1.824093275024584, "learning_rate": 4.473145827225125e-06, "loss": 0.6962, "step": 17867 }, { "epoch": 0.5476278043398308, "grad_norm": 1.6387549624112348, "learning_rate": 4.472652274985606e-06, "loss": 0.6775, "step": 17868 }, { "epoch": 0.547658452862572, "grad_norm": 1.8242994368844856, "learning_rate": 4.472158727942181e-06, "loss": 0.5506, "step": 17869 }, { "epoch": 0.5476891013853132, "grad_norm": 1.754087859845289, "learning_rate": 4.471665186099711e-06, "loss": 0.7038, "step": 17870 }, { "epoch": 0.5477197499080544, "grad_norm": 1.6111727454990283, "learning_rate": 4.471171649463057e-06, "loss": 0.5891, "step": 17871 }, { "epoch": 0.5477503984307956, "grad_norm": 1.7773609599496902, "learning_rate": 4.470678118037086e-06, "loss": 0.7114, "step": 17872 }, { "epoch": 0.5477810469535368, "grad_norm": 1.7650060888927661, "learning_rate": 4.470184591826658e-06, "loss": 0.6471, "step": 17873 }, { "epoch": 0.5478116954762781, "grad_norm": 1.947347196821568, "learning_rate": 4.469691070836637e-06, "loss": 0.6679, "step": 17874 }, { "epoch": 0.5478423439990192, "grad_norm": 1.9249979282500886, "learning_rate": 4.469197555071886e-06, "loss": 0.7288, "step": 17875 }, { "epoch": 0.5478729925217605, "grad_norm": 1.8420801691553095, "learning_rate": 4.468704044537268e-06, "loss": 0.6742, "step": 17876 }, { "epoch": 0.5479036410445016, "grad_norm": 1.6461112444628527, "learning_rate": 4.468210539237642e-06, "loss": 0.7129, "step": 17877 }, { "epoch": 0.5479342895672429, "grad_norm": 1.6523324314234955, "learning_rate": 4.467717039177877e-06, "loss": 0.545, "step": 17878 }, { "epoch": 0.547964938089984, "grad_norm": 1.5956710944390393, "learning_rate": 4.467223544362828e-06, "loss": 0.638, "step": 17879 }, { "epoch": 0.5479955866127253, "grad_norm": 1.517677650059064, "learning_rate": 4.466730054797366e-06, "loss": 0.6115, "step": 17880 }, { "epoch": 0.5480262351354664, "grad_norm": 1.5275687942146734, "learning_rate": 4.466236570486348e-06, "loss": 0.5852, "step": 17881 }, { "epoch": 0.5480568836582077, "grad_norm": 1.541282286455491, "learning_rate": 4.465743091434636e-06, "loss": 0.6306, "step": 17882 }, { "epoch": 0.5480875321809489, "grad_norm": 1.7095639985979048, "learning_rate": 4.465249617647096e-06, "loss": 0.6139, "step": 17883 }, { "epoch": 0.5481181807036901, "grad_norm": 0.8914079842924535, "learning_rate": 4.464756149128588e-06, "loss": 0.4372, "step": 17884 }, { "epoch": 0.5481488292264313, "grad_norm": 1.9410554783449945, "learning_rate": 4.4642626858839736e-06, "loss": 0.6619, "step": 17885 }, { "epoch": 0.5481794777491725, "grad_norm": 1.80716400351385, "learning_rate": 4.4637692279181174e-06, "loss": 0.7176, "step": 17886 }, { "epoch": 0.5482101262719137, "grad_norm": 1.6801142762166401, "learning_rate": 4.463275775235878e-06, "loss": 0.6111, "step": 17887 }, { "epoch": 0.5482407747946549, "grad_norm": 0.8101868359495488, "learning_rate": 4.462782327842122e-06, "loss": 0.4506, "step": 17888 }, { "epoch": 0.5482714233173961, "grad_norm": 1.832106005988081, "learning_rate": 4.462288885741709e-06, "loss": 0.6897, "step": 17889 }, { "epoch": 0.5483020718401374, "grad_norm": 1.5408846140711068, "learning_rate": 4.461795448939499e-06, "loss": 0.5342, "step": 17890 }, { "epoch": 0.5483327203628785, "grad_norm": 0.7928786686470796, "learning_rate": 4.46130201744036e-06, "loss": 0.4445, "step": 17891 }, { "epoch": 0.5483633688856198, "grad_norm": 2.0761456495653743, "learning_rate": 4.460808591249147e-06, "loss": 0.6532, "step": 17892 }, { "epoch": 0.5483940174083609, "grad_norm": 0.7595034527694549, "learning_rate": 4.460315170370725e-06, "loss": 0.4403, "step": 17893 }, { "epoch": 0.5484246659311022, "grad_norm": 1.9107449055144206, "learning_rate": 4.4598217548099575e-06, "loss": 0.6738, "step": 17894 }, { "epoch": 0.5484553144538433, "grad_norm": 1.6814722428805917, "learning_rate": 4.459328344571702e-06, "loss": 0.6943, "step": 17895 }, { "epoch": 0.5484859629765845, "grad_norm": 1.6028619500384829, "learning_rate": 4.458834939660824e-06, "loss": 0.6123, "step": 17896 }, { "epoch": 0.5485166114993257, "grad_norm": 1.730769431936031, "learning_rate": 4.458341540082185e-06, "loss": 0.6896, "step": 17897 }, { "epoch": 0.5485472600220669, "grad_norm": 0.871068221148961, "learning_rate": 4.4578481458406425e-06, "loss": 0.4465, "step": 17898 }, { "epoch": 0.5485779085448081, "grad_norm": 2.061006708421774, "learning_rate": 4.4573547569410634e-06, "loss": 0.6171, "step": 17899 }, { "epoch": 0.5486085570675493, "grad_norm": 1.6041904005462333, "learning_rate": 4.456861373388307e-06, "loss": 0.5534, "step": 17900 }, { "epoch": 0.5486392055902906, "grad_norm": 1.7904341919791467, "learning_rate": 4.456367995187231e-06, "loss": 0.5812, "step": 17901 }, { "epoch": 0.5486698541130317, "grad_norm": 1.7921675199292513, "learning_rate": 4.455874622342705e-06, "loss": 0.6327, "step": 17902 }, { "epoch": 0.548700502635773, "grad_norm": 1.7619497942639952, "learning_rate": 4.455381254859583e-06, "loss": 0.6731, "step": 17903 }, { "epoch": 0.5487311511585141, "grad_norm": 1.5899091754905792, "learning_rate": 4.454887892742728e-06, "loss": 0.6085, "step": 17904 }, { "epoch": 0.5487617996812554, "grad_norm": 1.6662831381441112, "learning_rate": 4.454394535997004e-06, "loss": 0.6064, "step": 17905 }, { "epoch": 0.5487924482039965, "grad_norm": 1.6880894101128707, "learning_rate": 4.4539011846272684e-06, "loss": 0.5804, "step": 17906 }, { "epoch": 0.5488230967267378, "grad_norm": 1.6794937572946822, "learning_rate": 4.453407838638385e-06, "loss": 0.6493, "step": 17907 }, { "epoch": 0.5488537452494789, "grad_norm": 1.7382315700837023, "learning_rate": 4.452914498035215e-06, "loss": 0.6727, "step": 17908 }, { "epoch": 0.5488843937722202, "grad_norm": 1.6228061556528894, "learning_rate": 4.452421162822616e-06, "loss": 0.565, "step": 17909 }, { "epoch": 0.5489150422949614, "grad_norm": 1.933514962391402, "learning_rate": 4.451927833005453e-06, "loss": 0.6188, "step": 17910 }, { "epoch": 0.5489456908177026, "grad_norm": 0.8176756018300521, "learning_rate": 4.451434508588587e-06, "loss": 0.4544, "step": 17911 }, { "epoch": 0.5489763393404438, "grad_norm": 1.9292227481170303, "learning_rate": 4.450941189576874e-06, "loss": 0.6311, "step": 17912 }, { "epoch": 0.549006987863185, "grad_norm": 1.7401583856863663, "learning_rate": 4.4504478759751805e-06, "loss": 0.5476, "step": 17913 }, { "epoch": 0.5490376363859262, "grad_norm": 1.5458374340573073, "learning_rate": 4.449954567788363e-06, "loss": 0.5411, "step": 17914 }, { "epoch": 0.5490682849086674, "grad_norm": 1.6174063489459078, "learning_rate": 4.449461265021284e-06, "loss": 0.6367, "step": 17915 }, { "epoch": 0.5490989334314086, "grad_norm": 0.7773053635079572, "learning_rate": 4.448967967678805e-06, "loss": 0.4224, "step": 17916 }, { "epoch": 0.5491295819541498, "grad_norm": 1.8093254975460031, "learning_rate": 4.448474675765783e-06, "loss": 0.6044, "step": 17917 }, { "epoch": 0.549160230476891, "grad_norm": 1.7765536577156686, "learning_rate": 4.447981389287085e-06, "loss": 0.5397, "step": 17918 }, { "epoch": 0.5491908789996323, "grad_norm": 1.7535048347456137, "learning_rate": 4.4474881082475655e-06, "loss": 0.7285, "step": 17919 }, { "epoch": 0.5492215275223734, "grad_norm": 1.6250347624048456, "learning_rate": 4.4469948326520865e-06, "loss": 0.6206, "step": 17920 }, { "epoch": 0.5492521760451147, "grad_norm": 1.6170420499550548, "learning_rate": 4.446501562505511e-06, "loss": 0.6691, "step": 17921 }, { "epoch": 0.5492828245678558, "grad_norm": 1.830960953090353, "learning_rate": 4.446008297812694e-06, "loss": 0.6462, "step": 17922 }, { "epoch": 0.5493134730905971, "grad_norm": 1.5835732682236707, "learning_rate": 4.445515038578502e-06, "loss": 0.6175, "step": 17923 }, { "epoch": 0.5493441216133382, "grad_norm": 1.742995883705481, "learning_rate": 4.445021784807792e-06, "loss": 0.6532, "step": 17924 }, { "epoch": 0.5493747701360795, "grad_norm": 1.721503880598056, "learning_rate": 4.444528536505423e-06, "loss": 0.5906, "step": 17925 }, { "epoch": 0.5494054186588206, "grad_norm": 1.5803502712076893, "learning_rate": 4.444035293676257e-06, "loss": 0.6232, "step": 17926 }, { "epoch": 0.5494360671815618, "grad_norm": 1.6907443237955497, "learning_rate": 4.443542056325153e-06, "loss": 0.5325, "step": 17927 }, { "epoch": 0.549466715704303, "grad_norm": 0.775566496574197, "learning_rate": 4.4430488244569715e-06, "loss": 0.4237, "step": 17928 }, { "epoch": 0.5494973642270442, "grad_norm": 1.7863018153339614, "learning_rate": 4.442555598076573e-06, "loss": 0.7076, "step": 17929 }, { "epoch": 0.5495280127497855, "grad_norm": 1.7369191102113102, "learning_rate": 4.442062377188818e-06, "loss": 0.5986, "step": 17930 }, { "epoch": 0.5495586612725266, "grad_norm": 1.5257958600038661, "learning_rate": 4.441569161798562e-06, "loss": 0.6482, "step": 17931 }, { "epoch": 0.5495893097952679, "grad_norm": 1.66350009473373, "learning_rate": 4.441075951910671e-06, "loss": 0.5939, "step": 17932 }, { "epoch": 0.549619958318009, "grad_norm": 2.0973095807914217, "learning_rate": 4.440582747529998e-06, "loss": 0.73, "step": 17933 }, { "epoch": 0.5496506068407503, "grad_norm": 1.6904568613873237, "learning_rate": 4.440089548661411e-06, "loss": 0.5942, "step": 17934 }, { "epoch": 0.5496812553634914, "grad_norm": 1.7227055282533055, "learning_rate": 4.439596355309763e-06, "loss": 0.6357, "step": 17935 }, { "epoch": 0.5497119038862327, "grad_norm": 0.7665167650083545, "learning_rate": 4.439103167479913e-06, "loss": 0.4199, "step": 17936 }, { "epoch": 0.5497425524089739, "grad_norm": 1.8154904757560197, "learning_rate": 4.438609985176726e-06, "loss": 0.6934, "step": 17937 }, { "epoch": 0.5497732009317151, "grad_norm": 1.736306858434117, "learning_rate": 4.438116808405058e-06, "loss": 0.6587, "step": 17938 }, { "epoch": 0.5498038494544563, "grad_norm": 1.8695445313694035, "learning_rate": 4.437623637169768e-06, "loss": 0.6584, "step": 17939 }, { "epoch": 0.5498344979771975, "grad_norm": 1.667168476879837, "learning_rate": 4.437130471475717e-06, "loss": 0.5953, "step": 17940 }, { "epoch": 0.5498651464999387, "grad_norm": 2.094331581074104, "learning_rate": 4.4366373113277615e-06, "loss": 0.6577, "step": 17941 }, { "epoch": 0.5498957950226799, "grad_norm": 1.7383464565863287, "learning_rate": 4.436144156730765e-06, "loss": 0.5599, "step": 17942 }, { "epoch": 0.5499264435454211, "grad_norm": 1.7261381705588017, "learning_rate": 4.435651007689585e-06, "loss": 0.6614, "step": 17943 }, { "epoch": 0.5499570920681623, "grad_norm": 0.7632268291695954, "learning_rate": 4.435157864209077e-06, "loss": 0.4267, "step": 17944 }, { "epoch": 0.5499877405909035, "grad_norm": 1.6552176846444178, "learning_rate": 4.434664726294106e-06, "loss": 0.6745, "step": 17945 }, { "epoch": 0.5500183891136448, "grad_norm": 0.810717681722344, "learning_rate": 4.434171593949527e-06, "loss": 0.4477, "step": 17946 }, { "epoch": 0.5500490376363859, "grad_norm": 1.6538426921989047, "learning_rate": 4.4336784671802e-06, "loss": 0.6403, "step": 17947 }, { "epoch": 0.5500796861591272, "grad_norm": 1.8119856218444022, "learning_rate": 4.433185345990984e-06, "loss": 0.6496, "step": 17948 }, { "epoch": 0.5501103346818683, "grad_norm": 1.893737439649935, "learning_rate": 4.432692230386737e-06, "loss": 0.5811, "step": 17949 }, { "epoch": 0.5501409832046096, "grad_norm": 1.6677034408437406, "learning_rate": 4.432199120372319e-06, "loss": 0.6372, "step": 17950 }, { "epoch": 0.5501716317273507, "grad_norm": 3.062897137890888, "learning_rate": 4.431706015952589e-06, "loss": 0.5388, "step": 17951 }, { "epoch": 0.550202280250092, "grad_norm": 1.8949702049869308, "learning_rate": 4.431212917132404e-06, "loss": 0.6394, "step": 17952 }, { "epoch": 0.5502329287728331, "grad_norm": 1.6160305375486936, "learning_rate": 4.4307198239166245e-06, "loss": 0.6076, "step": 17953 }, { "epoch": 0.5502635772955744, "grad_norm": 1.6447587039202087, "learning_rate": 4.43022673631011e-06, "loss": 0.6596, "step": 17954 }, { "epoch": 0.5502942258183156, "grad_norm": 1.7953644087158582, "learning_rate": 4.429733654317713e-06, "loss": 0.5973, "step": 17955 }, { "epoch": 0.5503248743410568, "grad_norm": 1.7172043858301307, "learning_rate": 4.4292405779443e-06, "loss": 0.5914, "step": 17956 }, { "epoch": 0.550355522863798, "grad_norm": 0.7917629537346013, "learning_rate": 4.428747507194725e-06, "loss": 0.4334, "step": 17957 }, { "epoch": 0.5503861713865391, "grad_norm": 1.7590232112704611, "learning_rate": 4.428254442073845e-06, "loss": 0.6044, "step": 17958 }, { "epoch": 0.5504168199092804, "grad_norm": 2.045179691527631, "learning_rate": 4.427761382586522e-06, "loss": 0.6742, "step": 17959 }, { "epoch": 0.5504474684320215, "grad_norm": 0.7668518293413231, "learning_rate": 4.427268328737611e-06, "loss": 0.4258, "step": 17960 }, { "epoch": 0.5504781169547628, "grad_norm": 1.7919385931050853, "learning_rate": 4.426775280531973e-06, "loss": 0.6729, "step": 17961 }, { "epoch": 0.5505087654775039, "grad_norm": 0.7747451471348215, "learning_rate": 4.426282237974465e-06, "loss": 0.4307, "step": 17962 }, { "epoch": 0.5505394140002452, "grad_norm": 1.503238269286537, "learning_rate": 4.425789201069943e-06, "loss": 0.5962, "step": 17963 }, { "epoch": 0.5505700625229863, "grad_norm": 1.6099341191253966, "learning_rate": 4.425296169823269e-06, "loss": 0.6195, "step": 17964 }, { "epoch": 0.5506007110457276, "grad_norm": 1.6536215858578365, "learning_rate": 4.4248031442392995e-06, "loss": 0.5709, "step": 17965 }, { "epoch": 0.5506313595684688, "grad_norm": 1.7665659011894783, "learning_rate": 4.42431012432289e-06, "loss": 0.6079, "step": 17966 }, { "epoch": 0.55066200809121, "grad_norm": 1.705545722065179, "learning_rate": 4.423817110078901e-06, "loss": 0.587, "step": 17967 }, { "epoch": 0.5506926566139512, "grad_norm": 1.7391169750906967, "learning_rate": 4.423324101512188e-06, "loss": 0.5973, "step": 17968 }, { "epoch": 0.5507233051366924, "grad_norm": 1.8100239213853566, "learning_rate": 4.422831098627611e-06, "loss": 0.5913, "step": 17969 }, { "epoch": 0.5507539536594336, "grad_norm": 1.5345631807668128, "learning_rate": 4.4223381014300285e-06, "loss": 0.5308, "step": 17970 }, { "epoch": 0.5507846021821748, "grad_norm": 1.9743950061266524, "learning_rate": 4.421845109924294e-06, "loss": 0.6289, "step": 17971 }, { "epoch": 0.550815250704916, "grad_norm": 1.7530439838331588, "learning_rate": 4.421352124115269e-06, "loss": 0.7094, "step": 17972 }, { "epoch": 0.5508458992276573, "grad_norm": 1.5996374440959775, "learning_rate": 4.42085914400781e-06, "loss": 0.6524, "step": 17973 }, { "epoch": 0.5508765477503984, "grad_norm": 1.7234343735298727, "learning_rate": 4.420366169606772e-06, "loss": 0.6187, "step": 17974 }, { "epoch": 0.5509071962731397, "grad_norm": 1.7312656558146842, "learning_rate": 4.4198732009170165e-06, "loss": 0.6762, "step": 17975 }, { "epoch": 0.5509378447958808, "grad_norm": 1.7599712022924139, "learning_rate": 4.419380237943396e-06, "loss": 0.6639, "step": 17976 }, { "epoch": 0.5509684933186221, "grad_norm": 1.9338838382969554, "learning_rate": 4.418887280690774e-06, "loss": 0.636, "step": 17977 }, { "epoch": 0.5509991418413632, "grad_norm": 1.657899166877422, "learning_rate": 4.418394329164003e-06, "loss": 0.6347, "step": 17978 }, { "epoch": 0.5510297903641045, "grad_norm": 1.5651064762341609, "learning_rate": 4.417901383367941e-06, "loss": 0.5915, "step": 17979 }, { "epoch": 0.5510604388868456, "grad_norm": 1.696680732579792, "learning_rate": 4.417408443307446e-06, "loss": 0.5418, "step": 17980 }, { "epoch": 0.5510910874095869, "grad_norm": 1.823770305539363, "learning_rate": 4.416915508987375e-06, "loss": 0.6965, "step": 17981 }, { "epoch": 0.551121735932328, "grad_norm": 1.5014123208306576, "learning_rate": 4.416422580412584e-06, "loss": 0.6119, "step": 17982 }, { "epoch": 0.5511523844550693, "grad_norm": 1.592546707919526, "learning_rate": 4.4159296575879305e-06, "loss": 0.5995, "step": 17983 }, { "epoch": 0.5511830329778105, "grad_norm": 1.58159457247724, "learning_rate": 4.415436740518273e-06, "loss": 0.6197, "step": 17984 }, { "epoch": 0.5512136815005517, "grad_norm": 1.8993923696311081, "learning_rate": 4.4149438292084645e-06, "loss": 0.6804, "step": 17985 }, { "epoch": 0.5512443300232929, "grad_norm": 1.7428593215538044, "learning_rate": 4.414450923663367e-06, "loss": 0.7027, "step": 17986 }, { "epoch": 0.5512749785460341, "grad_norm": 1.6108311633498023, "learning_rate": 4.413958023887831e-06, "loss": 0.6502, "step": 17987 }, { "epoch": 0.5513056270687753, "grad_norm": 1.6721022464863176, "learning_rate": 4.413465129886719e-06, "loss": 0.6698, "step": 17988 }, { "epoch": 0.5513362755915164, "grad_norm": 1.654019288554884, "learning_rate": 4.412972241664885e-06, "loss": 0.6587, "step": 17989 }, { "epoch": 0.5513669241142577, "grad_norm": 1.8632958038778995, "learning_rate": 4.412479359227185e-06, "loss": 0.6116, "step": 17990 }, { "epoch": 0.5513975726369988, "grad_norm": 0.9663066404037907, "learning_rate": 4.411986482578476e-06, "loss": 0.4126, "step": 17991 }, { "epoch": 0.5514282211597401, "grad_norm": 1.5513547750556518, "learning_rate": 4.411493611723616e-06, "loss": 0.6187, "step": 17992 }, { "epoch": 0.5514588696824813, "grad_norm": 1.7251309557132093, "learning_rate": 4.4110007466674575e-06, "loss": 0.6697, "step": 17993 }, { "epoch": 0.5514895182052225, "grad_norm": 1.8775803914169507, "learning_rate": 4.410507887414861e-06, "loss": 0.611, "step": 17994 }, { "epoch": 0.5515201667279637, "grad_norm": 0.8208928876614926, "learning_rate": 4.410015033970681e-06, "loss": 0.4339, "step": 17995 }, { "epoch": 0.5515508152507049, "grad_norm": 1.6767736610126909, "learning_rate": 4.409522186339774e-06, "loss": 0.6677, "step": 17996 }, { "epoch": 0.5515814637734461, "grad_norm": 1.8067475857965467, "learning_rate": 4.409029344526997e-06, "loss": 0.6328, "step": 17997 }, { "epoch": 0.5516121122961873, "grad_norm": 0.7513976192911566, "learning_rate": 4.408536508537202e-06, "loss": 0.401, "step": 17998 }, { "epoch": 0.5516427608189285, "grad_norm": 0.8612918029009913, "learning_rate": 4.408043678375251e-06, "loss": 0.4345, "step": 17999 }, { "epoch": 0.5516734093416698, "grad_norm": 1.9276025915496127, "learning_rate": 4.407550854045996e-06, "loss": 0.5788, "step": 18000 }, { "epoch": 0.5517040578644109, "grad_norm": 1.6801933634720045, "learning_rate": 4.407058035554294e-06, "loss": 0.661, "step": 18001 }, { "epoch": 0.5517347063871522, "grad_norm": 1.805471441901074, "learning_rate": 4.406565222905002e-06, "loss": 0.6257, "step": 18002 }, { "epoch": 0.5517653549098933, "grad_norm": 1.9090455761575689, "learning_rate": 4.406072416102974e-06, "loss": 0.6695, "step": 18003 }, { "epoch": 0.5517960034326346, "grad_norm": 2.0397858928527572, "learning_rate": 4.405579615153065e-06, "loss": 0.6809, "step": 18004 }, { "epoch": 0.5518266519553757, "grad_norm": 1.7748289695219508, "learning_rate": 4.405086820060133e-06, "loss": 0.682, "step": 18005 }, { "epoch": 0.551857300478117, "grad_norm": 1.8038516255738837, "learning_rate": 4.4045940308290325e-06, "loss": 0.5446, "step": 18006 }, { "epoch": 0.5518879490008581, "grad_norm": 1.8315118262110084, "learning_rate": 4.40410124746462e-06, "loss": 0.7629, "step": 18007 }, { "epoch": 0.5519185975235994, "grad_norm": 1.9878614193948, "learning_rate": 4.4036084699717515e-06, "loss": 0.6042, "step": 18008 }, { "epoch": 0.5519492460463405, "grad_norm": 0.7668585229805419, "learning_rate": 4.40311569835528e-06, "loss": 0.4228, "step": 18009 }, { "epoch": 0.5519798945690818, "grad_norm": 0.7996556012526216, "learning_rate": 4.402622932620063e-06, "loss": 0.4347, "step": 18010 }, { "epoch": 0.552010543091823, "grad_norm": 1.7878549262931607, "learning_rate": 4.4021301727709545e-06, "loss": 0.5536, "step": 18011 }, { "epoch": 0.5520411916145642, "grad_norm": 1.625266848576819, "learning_rate": 4.401637418812809e-06, "loss": 0.6324, "step": 18012 }, { "epoch": 0.5520718401373054, "grad_norm": 1.7654767335581616, "learning_rate": 4.401144670750485e-06, "loss": 0.7159, "step": 18013 }, { "epoch": 0.5521024886600466, "grad_norm": 0.7896605566900536, "learning_rate": 4.400651928588835e-06, "loss": 0.4233, "step": 18014 }, { "epoch": 0.5521331371827878, "grad_norm": 1.849599224143912, "learning_rate": 4.4001591923327146e-06, "loss": 0.7093, "step": 18015 }, { "epoch": 0.552163785705529, "grad_norm": 0.7933532786160507, "learning_rate": 4.399666461986982e-06, "loss": 0.4429, "step": 18016 }, { "epoch": 0.5521944342282702, "grad_norm": 1.787778089589185, "learning_rate": 4.399173737556485e-06, "loss": 0.6131, "step": 18017 }, { "epoch": 0.5522250827510115, "grad_norm": 1.828272274971419, "learning_rate": 4.398681019046086e-06, "loss": 0.7102, "step": 18018 }, { "epoch": 0.5522557312737526, "grad_norm": 1.6682273670469403, "learning_rate": 4.398188306460635e-06, "loss": 0.6434, "step": 18019 }, { "epoch": 0.5522863797964938, "grad_norm": 1.7123293077675756, "learning_rate": 4.39769559980499e-06, "loss": 0.5658, "step": 18020 }, { "epoch": 0.552317028319235, "grad_norm": 1.8122104467792393, "learning_rate": 4.397202899084003e-06, "loss": 0.5789, "step": 18021 }, { "epoch": 0.5523476768419762, "grad_norm": 1.7669543471237563, "learning_rate": 4.3967102043025305e-06, "loss": 0.642, "step": 18022 }, { "epoch": 0.5523783253647174, "grad_norm": 1.863540030232622, "learning_rate": 4.3962175154654264e-06, "loss": 0.6232, "step": 18023 }, { "epoch": 0.5524089738874586, "grad_norm": 1.7653061195916748, "learning_rate": 4.395724832577547e-06, "loss": 0.6642, "step": 18024 }, { "epoch": 0.5524396224101998, "grad_norm": 0.7975834346817162, "learning_rate": 4.395232155643744e-06, "loss": 0.4127, "step": 18025 }, { "epoch": 0.552470270932941, "grad_norm": 1.7202929624992025, "learning_rate": 4.394739484668874e-06, "loss": 0.57, "step": 18026 }, { "epoch": 0.5525009194556822, "grad_norm": 0.7825363527352863, "learning_rate": 4.394246819657792e-06, "loss": 0.4223, "step": 18027 }, { "epoch": 0.5525315679784234, "grad_norm": 1.826671713864539, "learning_rate": 4.393754160615348e-06, "loss": 0.6568, "step": 18028 }, { "epoch": 0.5525622165011647, "grad_norm": 1.6445335932604634, "learning_rate": 4.3932615075464025e-06, "loss": 0.6509, "step": 18029 }, { "epoch": 0.5525928650239058, "grad_norm": 0.7864245415868775, "learning_rate": 4.392768860455805e-06, "loss": 0.4104, "step": 18030 }, { "epoch": 0.5526235135466471, "grad_norm": 1.5000907330233428, "learning_rate": 4.392276219348411e-06, "loss": 0.6281, "step": 18031 }, { "epoch": 0.5526541620693882, "grad_norm": 1.6587463394625528, "learning_rate": 4.391783584229076e-06, "loss": 0.6247, "step": 18032 }, { "epoch": 0.5526848105921295, "grad_norm": 1.6835374580821036, "learning_rate": 4.391290955102651e-06, "loss": 0.5978, "step": 18033 }, { "epoch": 0.5527154591148706, "grad_norm": 0.7434822951524487, "learning_rate": 4.390798331973994e-06, "loss": 0.4057, "step": 18034 }, { "epoch": 0.5527461076376119, "grad_norm": 1.7545742924771277, "learning_rate": 4.390305714847956e-06, "loss": 0.6082, "step": 18035 }, { "epoch": 0.552776756160353, "grad_norm": 1.8237654996833168, "learning_rate": 4.389813103729392e-06, "loss": 0.675, "step": 18036 }, { "epoch": 0.5528074046830943, "grad_norm": 1.882119125027646, "learning_rate": 4.3893204986231554e-06, "loss": 0.5387, "step": 18037 }, { "epoch": 0.5528380532058355, "grad_norm": 1.7274238984868069, "learning_rate": 4.388827899534102e-06, "loss": 0.6411, "step": 18038 }, { "epoch": 0.5528687017285767, "grad_norm": 1.7298672876520855, "learning_rate": 4.388335306467079e-06, "loss": 0.5552, "step": 18039 }, { "epoch": 0.5528993502513179, "grad_norm": 1.7181919675883437, "learning_rate": 4.3878427194269506e-06, "loss": 0.728, "step": 18040 }, { "epoch": 0.5529299987740591, "grad_norm": 1.858744281653967, "learning_rate": 4.387350138418559e-06, "loss": 0.594, "step": 18041 }, { "epoch": 0.5529606472968003, "grad_norm": 1.886103263800503, "learning_rate": 4.386857563446767e-06, "loss": 0.6911, "step": 18042 }, { "epoch": 0.5529912958195415, "grad_norm": 0.8990880355088942, "learning_rate": 4.386364994516424e-06, "loss": 0.4308, "step": 18043 }, { "epoch": 0.5530219443422827, "grad_norm": 0.8178086510483132, "learning_rate": 4.385872431632382e-06, "loss": 0.4219, "step": 18044 }, { "epoch": 0.553052592865024, "grad_norm": 1.8507939128514528, "learning_rate": 4.3853798747994975e-06, "loss": 0.6415, "step": 18045 }, { "epoch": 0.5530832413877651, "grad_norm": 1.719383127394268, "learning_rate": 4.384887324022622e-06, "loss": 0.5919, "step": 18046 }, { "epoch": 0.5531138899105064, "grad_norm": 0.7677352185276447, "learning_rate": 4.384394779306609e-06, "loss": 0.4163, "step": 18047 }, { "epoch": 0.5531445384332475, "grad_norm": 1.6283042723945553, "learning_rate": 4.383902240656312e-06, "loss": 0.588, "step": 18048 }, { "epoch": 0.5531751869559888, "grad_norm": 1.8692666829007294, "learning_rate": 4.383409708076582e-06, "loss": 0.6343, "step": 18049 }, { "epoch": 0.5532058354787299, "grad_norm": 0.7505561889301743, "learning_rate": 4.382917181572276e-06, "loss": 0.4128, "step": 18050 }, { "epoch": 0.5532364840014711, "grad_norm": 1.9011337009898306, "learning_rate": 4.382424661148245e-06, "loss": 0.6289, "step": 18051 }, { "epoch": 0.5532671325242123, "grad_norm": 1.6346756962162046, "learning_rate": 4.38193214680934e-06, "loss": 0.5981, "step": 18052 }, { "epoch": 0.5532977810469535, "grad_norm": 1.6718677711913295, "learning_rate": 4.381439638560418e-06, "loss": 0.7384, "step": 18053 }, { "epoch": 0.5533284295696947, "grad_norm": 1.8155973795264426, "learning_rate": 4.380947136406329e-06, "loss": 0.5678, "step": 18054 }, { "epoch": 0.5533590780924359, "grad_norm": 1.9164873579150337, "learning_rate": 4.380454640351924e-06, "loss": 0.6377, "step": 18055 }, { "epoch": 0.5533897266151772, "grad_norm": 1.6345254119082497, "learning_rate": 4.379962150402061e-06, "loss": 0.5429, "step": 18056 }, { "epoch": 0.5534203751379183, "grad_norm": 1.9099799254359608, "learning_rate": 4.379469666561588e-06, "loss": 0.5899, "step": 18057 }, { "epoch": 0.5534510236606596, "grad_norm": 1.8139201457501735, "learning_rate": 4.378977188835358e-06, "loss": 0.6161, "step": 18058 }, { "epoch": 0.5534816721834007, "grad_norm": 1.8221850718801176, "learning_rate": 4.378484717228226e-06, "loss": 0.6674, "step": 18059 }, { "epoch": 0.553512320706142, "grad_norm": 0.7882828333446491, "learning_rate": 4.377992251745043e-06, "loss": 0.4065, "step": 18060 }, { "epoch": 0.5535429692288831, "grad_norm": 1.5841515262456538, "learning_rate": 4.377499792390663e-06, "loss": 0.6431, "step": 18061 }, { "epoch": 0.5535736177516244, "grad_norm": 1.9112532256358334, "learning_rate": 4.377007339169935e-06, "loss": 0.5539, "step": 18062 }, { "epoch": 0.5536042662743655, "grad_norm": 1.8379531310915123, "learning_rate": 4.376514892087713e-06, "loss": 0.632, "step": 18063 }, { "epoch": 0.5536349147971068, "grad_norm": 0.7953353759079034, "learning_rate": 4.37602245114885e-06, "loss": 0.449, "step": 18064 }, { "epoch": 0.553665563319848, "grad_norm": 1.7163255966177933, "learning_rate": 4.375530016358198e-06, "loss": 0.6164, "step": 18065 }, { "epoch": 0.5536962118425892, "grad_norm": 1.9745769999609288, "learning_rate": 4.375037587720606e-06, "loss": 0.7054, "step": 18066 }, { "epoch": 0.5537268603653304, "grad_norm": 1.8966467234334916, "learning_rate": 4.374545165240931e-06, "loss": 0.6441, "step": 18067 }, { "epoch": 0.5537575088880716, "grad_norm": 1.84587622301921, "learning_rate": 4.374052748924022e-06, "loss": 0.701, "step": 18068 }, { "epoch": 0.5537881574108128, "grad_norm": 0.7549580015137799, "learning_rate": 4.373560338774731e-06, "loss": 0.4273, "step": 18069 }, { "epoch": 0.553818805933554, "grad_norm": 1.7100156983607064, "learning_rate": 4.3730679347979114e-06, "loss": 0.6885, "step": 18070 }, { "epoch": 0.5538494544562952, "grad_norm": 1.6773546339858123, "learning_rate": 4.372575536998411e-06, "loss": 0.5297, "step": 18071 }, { "epoch": 0.5538801029790364, "grad_norm": 0.7834298510216331, "learning_rate": 4.372083145381087e-06, "loss": 0.3887, "step": 18072 }, { "epoch": 0.5539107515017776, "grad_norm": 1.7606381157613862, "learning_rate": 4.371590759950789e-06, "loss": 0.5686, "step": 18073 }, { "epoch": 0.5539414000245189, "grad_norm": 1.7633683365296868, "learning_rate": 4.371098380712366e-06, "loss": 0.677, "step": 18074 }, { "epoch": 0.55397204854726, "grad_norm": 0.8349658155451529, "learning_rate": 4.370606007670673e-06, "loss": 0.4181, "step": 18075 }, { "epoch": 0.5540026970700013, "grad_norm": 2.534582390268363, "learning_rate": 4.3701136408305575e-06, "loss": 0.6421, "step": 18076 }, { "epoch": 0.5540333455927424, "grad_norm": 1.7667417207283613, "learning_rate": 4.3696212801968765e-06, "loss": 0.6332, "step": 18077 }, { "epoch": 0.5540639941154837, "grad_norm": 1.5325313511336653, "learning_rate": 4.369128925774477e-06, "loss": 0.5609, "step": 18078 }, { "epoch": 0.5540946426382248, "grad_norm": 1.577530026319836, "learning_rate": 4.368636577568211e-06, "loss": 0.5807, "step": 18079 }, { "epoch": 0.5541252911609661, "grad_norm": 1.8258934301853833, "learning_rate": 4.368144235582931e-06, "loss": 0.6346, "step": 18080 }, { "epoch": 0.5541559396837072, "grad_norm": 1.8828498988183946, "learning_rate": 4.367651899823489e-06, "loss": 0.6726, "step": 18081 }, { "epoch": 0.5541865882064484, "grad_norm": 1.8236501461808488, "learning_rate": 4.367159570294731e-06, "loss": 0.6599, "step": 18082 }, { "epoch": 0.5542172367291897, "grad_norm": 1.7675868185141503, "learning_rate": 4.366667247001516e-06, "loss": 0.6626, "step": 18083 }, { "epoch": 0.5542478852519308, "grad_norm": 1.7423401674383647, "learning_rate": 4.3661749299486886e-06, "loss": 0.6471, "step": 18084 }, { "epoch": 0.5542785337746721, "grad_norm": 1.954617924408033, "learning_rate": 4.3656826191411014e-06, "loss": 0.6396, "step": 18085 }, { "epoch": 0.5543091822974132, "grad_norm": 0.8755825224813867, "learning_rate": 4.365190314583606e-06, "loss": 0.4246, "step": 18086 }, { "epoch": 0.5543398308201545, "grad_norm": 1.8685813533454363, "learning_rate": 4.364698016281052e-06, "loss": 0.6717, "step": 18087 }, { "epoch": 0.5543704793428956, "grad_norm": 1.5820127171551148, "learning_rate": 4.364205724238292e-06, "loss": 0.6209, "step": 18088 }, { "epoch": 0.5544011278656369, "grad_norm": 0.7954903293509484, "learning_rate": 4.363713438460176e-06, "loss": 0.4192, "step": 18089 }, { "epoch": 0.554431776388378, "grad_norm": 1.6596025875048788, "learning_rate": 4.3632211589515525e-06, "loss": 0.6012, "step": 18090 }, { "epoch": 0.5544624249111193, "grad_norm": 1.6338846190801706, "learning_rate": 4.362728885717275e-06, "loss": 0.6528, "step": 18091 }, { "epoch": 0.5544930734338605, "grad_norm": 0.7647129505111321, "learning_rate": 4.362236618762195e-06, "loss": 0.4145, "step": 18092 }, { "epoch": 0.5545237219566017, "grad_norm": 1.6993550352875257, "learning_rate": 4.361744358091157e-06, "loss": 0.6644, "step": 18093 }, { "epoch": 0.5545543704793429, "grad_norm": 1.7616654523396866, "learning_rate": 4.361252103709018e-06, "loss": 0.5864, "step": 18094 }, { "epoch": 0.5545850190020841, "grad_norm": 1.7805207286816098, "learning_rate": 4.360759855620622e-06, "loss": 0.6406, "step": 18095 }, { "epoch": 0.5546156675248253, "grad_norm": 1.7093329015526082, "learning_rate": 4.360267613830827e-06, "loss": 0.6812, "step": 18096 }, { "epoch": 0.5546463160475665, "grad_norm": 1.9683971635114847, "learning_rate": 4.359775378344478e-06, "loss": 0.6052, "step": 18097 }, { "epoch": 0.5546769645703077, "grad_norm": 1.7462694649723836, "learning_rate": 4.359283149166423e-06, "loss": 0.6878, "step": 18098 }, { "epoch": 0.554707613093049, "grad_norm": 1.665311672069378, "learning_rate": 4.358790926301518e-06, "loss": 0.5876, "step": 18099 }, { "epoch": 0.5547382616157901, "grad_norm": 1.7031086629349712, "learning_rate": 4.3582987097546095e-06, "loss": 0.5431, "step": 18100 }, { "epoch": 0.5547689101385314, "grad_norm": 1.527675950771673, "learning_rate": 4.357806499530547e-06, "loss": 0.5893, "step": 18101 }, { "epoch": 0.5547995586612725, "grad_norm": 1.8519594536980188, "learning_rate": 4.357314295634182e-06, "loss": 0.632, "step": 18102 }, { "epoch": 0.5548302071840138, "grad_norm": 1.9164449846410951, "learning_rate": 4.356822098070362e-06, "loss": 0.7052, "step": 18103 }, { "epoch": 0.5548608557067549, "grad_norm": 1.595291742289142, "learning_rate": 4.356329906843941e-06, "loss": 0.6369, "step": 18104 }, { "epoch": 0.5548915042294962, "grad_norm": 0.9126585788476878, "learning_rate": 4.355837721959766e-06, "loss": 0.4582, "step": 18105 }, { "epoch": 0.5549221527522373, "grad_norm": 1.5875263551039132, "learning_rate": 4.355345543422686e-06, "loss": 0.6327, "step": 18106 }, { "epoch": 0.5549528012749786, "grad_norm": 1.6889426060132848, "learning_rate": 4.354853371237551e-06, "loss": 0.5422, "step": 18107 }, { "epoch": 0.5549834497977197, "grad_norm": 0.8501194803078754, "learning_rate": 4.354361205409212e-06, "loss": 0.4219, "step": 18108 }, { "epoch": 0.555014098320461, "grad_norm": 0.8278790076650129, "learning_rate": 4.353869045942515e-06, "loss": 0.4316, "step": 18109 }, { "epoch": 0.5550447468432022, "grad_norm": 0.8625919780056834, "learning_rate": 4.353376892842313e-06, "loss": 0.4159, "step": 18110 }, { "epoch": 0.5550753953659434, "grad_norm": 1.6617109031653352, "learning_rate": 4.352884746113454e-06, "loss": 0.5862, "step": 18111 }, { "epoch": 0.5551060438886846, "grad_norm": 1.7639755096767407, "learning_rate": 4.3523926057607866e-06, "loss": 0.622, "step": 18112 }, { "epoch": 0.5551366924114257, "grad_norm": 0.7775630592063527, "learning_rate": 4.351900471789162e-06, "loss": 0.4173, "step": 18113 }, { "epoch": 0.555167340934167, "grad_norm": 1.5117165626324283, "learning_rate": 4.351408344203425e-06, "loss": 0.5276, "step": 18114 }, { "epoch": 0.5551979894569081, "grad_norm": 1.7507839893413182, "learning_rate": 4.350916223008431e-06, "loss": 0.6397, "step": 18115 }, { "epoch": 0.5552286379796494, "grad_norm": 1.6148981776286087, "learning_rate": 4.350424108209024e-06, "loss": 0.4938, "step": 18116 }, { "epoch": 0.5552592865023905, "grad_norm": 1.6077777073952555, "learning_rate": 4.349931999810053e-06, "loss": 0.4928, "step": 18117 }, { "epoch": 0.5552899350251318, "grad_norm": 1.739826296399786, "learning_rate": 4.349439897816371e-06, "loss": 0.715, "step": 18118 }, { "epoch": 0.555320583547873, "grad_norm": 2.149103311111994, "learning_rate": 4.348947802232823e-06, "loss": 0.6383, "step": 18119 }, { "epoch": 0.5553512320706142, "grad_norm": 1.7725033271960777, "learning_rate": 4.348455713064257e-06, "loss": 0.7183, "step": 18120 }, { "epoch": 0.5553818805933554, "grad_norm": 1.8054733196568442, "learning_rate": 4.347963630315526e-06, "loss": 0.6566, "step": 18121 }, { "epoch": 0.5554125291160966, "grad_norm": 1.76861585507201, "learning_rate": 4.347471553991475e-06, "loss": 0.5892, "step": 18122 }, { "epoch": 0.5554431776388378, "grad_norm": 1.7210083485877332, "learning_rate": 4.346979484096954e-06, "loss": 0.6238, "step": 18123 }, { "epoch": 0.555473826161579, "grad_norm": 1.7528677419677954, "learning_rate": 4.346487420636812e-06, "loss": 0.552, "step": 18124 }, { "epoch": 0.5555044746843202, "grad_norm": 0.8567810874509721, "learning_rate": 4.345995363615894e-06, "loss": 0.4346, "step": 18125 }, { "epoch": 0.5555351232070614, "grad_norm": 1.6089224643446387, "learning_rate": 4.345503313039056e-06, "loss": 0.5546, "step": 18126 }, { "epoch": 0.5555657717298026, "grad_norm": 1.7586590636397301, "learning_rate": 4.345011268911138e-06, "loss": 0.5978, "step": 18127 }, { "epoch": 0.5555964202525439, "grad_norm": 0.8225388257050933, "learning_rate": 4.344519231236991e-06, "loss": 0.4444, "step": 18128 }, { "epoch": 0.555627068775285, "grad_norm": 0.8223994936163471, "learning_rate": 4.344027200021465e-06, "loss": 0.4372, "step": 18129 }, { "epoch": 0.5556577172980263, "grad_norm": 1.7580994602189852, "learning_rate": 4.343535175269406e-06, "loss": 0.6089, "step": 18130 }, { "epoch": 0.5556883658207674, "grad_norm": 1.674267680031877, "learning_rate": 4.3430431569856635e-06, "loss": 0.5568, "step": 18131 }, { "epoch": 0.5557190143435087, "grad_norm": 1.8113383958607614, "learning_rate": 4.342551145175085e-06, "loss": 0.5911, "step": 18132 }, { "epoch": 0.5557496628662498, "grad_norm": 1.7727048992146854, "learning_rate": 4.342059139842517e-06, "loss": 0.5721, "step": 18133 }, { "epoch": 0.5557803113889911, "grad_norm": 1.7470013266095339, "learning_rate": 4.34156714099281e-06, "loss": 0.6891, "step": 18134 }, { "epoch": 0.5558109599117322, "grad_norm": 1.758341039690562, "learning_rate": 4.3410751486308116e-06, "loss": 0.6154, "step": 18135 }, { "epoch": 0.5558416084344735, "grad_norm": 1.6982860208229766, "learning_rate": 4.340583162761365e-06, "loss": 0.5978, "step": 18136 }, { "epoch": 0.5558722569572146, "grad_norm": 1.790847954967096, "learning_rate": 4.340091183389324e-06, "loss": 0.6347, "step": 18137 }, { "epoch": 0.5559029054799559, "grad_norm": 1.8884436431926144, "learning_rate": 4.339599210519533e-06, "loss": 0.7412, "step": 18138 }, { "epoch": 0.5559335540026971, "grad_norm": 1.8478512684629964, "learning_rate": 4.3391072441568384e-06, "loss": 0.6703, "step": 18139 }, { "epoch": 0.5559642025254383, "grad_norm": 1.7337975430574442, "learning_rate": 4.338615284306091e-06, "loss": 0.6565, "step": 18140 }, { "epoch": 0.5559948510481795, "grad_norm": 0.8082860417345223, "learning_rate": 4.338123330972135e-06, "loss": 0.4125, "step": 18141 }, { "epoch": 0.5560254995709207, "grad_norm": 1.6202321855368647, "learning_rate": 4.337631384159819e-06, "loss": 0.5057, "step": 18142 }, { "epoch": 0.5560561480936619, "grad_norm": 1.7424562641648897, "learning_rate": 4.337139443873992e-06, "loss": 0.5769, "step": 18143 }, { "epoch": 0.556086796616403, "grad_norm": 1.6144002704154936, "learning_rate": 4.3366475101194984e-06, "loss": 0.6759, "step": 18144 }, { "epoch": 0.5561174451391443, "grad_norm": 1.427549515785848, "learning_rate": 4.336155582901187e-06, "loss": 0.5024, "step": 18145 }, { "epoch": 0.5561480936618854, "grad_norm": 2.010285227350116, "learning_rate": 4.335663662223907e-06, "loss": 0.7088, "step": 18146 }, { "epoch": 0.5561787421846267, "grad_norm": 0.8071548549393178, "learning_rate": 4.3351717480924986e-06, "loss": 0.46, "step": 18147 }, { "epoch": 0.5562093907073679, "grad_norm": 1.7403211295488743, "learning_rate": 4.334679840511816e-06, "loss": 0.6233, "step": 18148 }, { "epoch": 0.5562400392301091, "grad_norm": 1.8231852389195702, "learning_rate": 4.3341879394867e-06, "loss": 0.7125, "step": 18149 }, { "epoch": 0.5562706877528503, "grad_norm": 0.7663845736425469, "learning_rate": 4.333696045022005e-06, "loss": 0.4161, "step": 18150 }, { "epoch": 0.5563013362755915, "grad_norm": 1.8138411238089014, "learning_rate": 4.333204157122571e-06, "loss": 0.6013, "step": 18151 }, { "epoch": 0.5563319847983327, "grad_norm": 1.6646924315766454, "learning_rate": 4.332712275793246e-06, "loss": 0.5179, "step": 18152 }, { "epoch": 0.5563626333210739, "grad_norm": 1.7113492234833838, "learning_rate": 4.332220401038879e-06, "loss": 0.6069, "step": 18153 }, { "epoch": 0.5563932818438151, "grad_norm": 1.658254247162296, "learning_rate": 4.331728532864315e-06, "loss": 0.621, "step": 18154 }, { "epoch": 0.5564239303665564, "grad_norm": 1.6754812347224382, "learning_rate": 4.3312366712744e-06, "loss": 0.5305, "step": 18155 }, { "epoch": 0.5564545788892975, "grad_norm": 1.9784871407978897, "learning_rate": 4.330744816273983e-06, "loss": 0.7179, "step": 18156 }, { "epoch": 0.5564852274120388, "grad_norm": 1.6563595741720578, "learning_rate": 4.330252967867906e-06, "loss": 0.6427, "step": 18157 }, { "epoch": 0.5565158759347799, "grad_norm": 1.8773662394931567, "learning_rate": 4.329761126061021e-06, "loss": 0.6373, "step": 18158 }, { "epoch": 0.5565465244575212, "grad_norm": 0.7993402488394595, "learning_rate": 4.329269290858171e-06, "loss": 0.4455, "step": 18159 }, { "epoch": 0.5565771729802623, "grad_norm": 1.6658616253021101, "learning_rate": 4.3287774622641995e-06, "loss": 0.6239, "step": 18160 }, { "epoch": 0.5566078215030036, "grad_norm": 1.5092601041921054, "learning_rate": 4.328285640283957e-06, "loss": 0.5584, "step": 18161 }, { "epoch": 0.5566384700257447, "grad_norm": 1.981855055807902, "learning_rate": 4.327793824922288e-06, "loss": 0.6427, "step": 18162 }, { "epoch": 0.556669118548486, "grad_norm": 0.7815716599096074, "learning_rate": 4.327302016184037e-06, "loss": 0.4393, "step": 18163 }, { "epoch": 0.5566997670712271, "grad_norm": 1.8067743017458604, "learning_rate": 4.326810214074053e-06, "loss": 0.6449, "step": 18164 }, { "epoch": 0.5567304155939684, "grad_norm": 1.9077435767622228, "learning_rate": 4.326318418597181e-06, "loss": 0.6603, "step": 18165 }, { "epoch": 0.5567610641167096, "grad_norm": 1.638484502732098, "learning_rate": 4.325826629758263e-06, "loss": 0.674, "step": 18166 }, { "epoch": 0.5567917126394508, "grad_norm": 1.9489895751238193, "learning_rate": 4.325334847562151e-06, "loss": 0.634, "step": 18167 }, { "epoch": 0.556822361162192, "grad_norm": 1.6006391806565843, "learning_rate": 4.324843072013684e-06, "loss": 0.5814, "step": 18168 }, { "epoch": 0.5568530096849332, "grad_norm": 1.9295230581824923, "learning_rate": 4.324351303117714e-06, "loss": 0.6021, "step": 18169 }, { "epoch": 0.5568836582076744, "grad_norm": 0.7632752239142216, "learning_rate": 4.3238595408790825e-06, "loss": 0.4241, "step": 18170 }, { "epoch": 0.5569143067304156, "grad_norm": 0.7975467334515438, "learning_rate": 4.323367785302634e-06, "loss": 0.4335, "step": 18171 }, { "epoch": 0.5569449552531568, "grad_norm": 1.681949263864094, "learning_rate": 4.3228760363932186e-06, "loss": 0.5432, "step": 18172 }, { "epoch": 0.556975603775898, "grad_norm": 1.740128325630817, "learning_rate": 4.322384294155678e-06, "loss": 0.6554, "step": 18173 }, { "epoch": 0.5570062522986392, "grad_norm": 0.7324400560136118, "learning_rate": 4.321892558594857e-06, "loss": 0.4206, "step": 18174 }, { "epoch": 0.5570369008213804, "grad_norm": 2.053925304574529, "learning_rate": 4.321400829715604e-06, "loss": 0.7348, "step": 18175 }, { "epoch": 0.5570675493441216, "grad_norm": 1.8391529647037108, "learning_rate": 4.3209091075227605e-06, "loss": 0.7132, "step": 18176 }, { "epoch": 0.5570981978668628, "grad_norm": 1.5477755053115254, "learning_rate": 4.320417392021175e-06, "loss": 0.5756, "step": 18177 }, { "epoch": 0.557128846389604, "grad_norm": 1.7122409475321987, "learning_rate": 4.319925683215691e-06, "loss": 0.6593, "step": 18178 }, { "epoch": 0.5571594949123452, "grad_norm": 1.7102954731286215, "learning_rate": 4.319433981111151e-06, "loss": 0.6393, "step": 18179 }, { "epoch": 0.5571901434350864, "grad_norm": 1.7253658147816584, "learning_rate": 4.318942285712404e-06, "loss": 0.6336, "step": 18180 }, { "epoch": 0.5572207919578276, "grad_norm": 1.5947748614819008, "learning_rate": 4.3184505970242926e-06, "loss": 0.6229, "step": 18181 }, { "epoch": 0.5572514404805688, "grad_norm": 1.7518764737595398, "learning_rate": 4.317958915051661e-06, "loss": 0.6887, "step": 18182 }, { "epoch": 0.55728208900331, "grad_norm": 1.6464466868903223, "learning_rate": 4.317467239799355e-06, "loss": 0.6171, "step": 18183 }, { "epoch": 0.5573127375260513, "grad_norm": 1.742090916112131, "learning_rate": 4.31697557127222e-06, "loss": 0.6794, "step": 18184 }, { "epoch": 0.5573433860487924, "grad_norm": 1.6563087738355686, "learning_rate": 4.316483909475098e-06, "loss": 0.6299, "step": 18185 }, { "epoch": 0.5573740345715337, "grad_norm": 1.5837049334527946, "learning_rate": 4.315992254412836e-06, "loss": 0.5742, "step": 18186 }, { "epoch": 0.5574046830942748, "grad_norm": 1.5328613764090715, "learning_rate": 4.315500606090276e-06, "loss": 0.6272, "step": 18187 }, { "epoch": 0.5574353316170161, "grad_norm": 0.8334696181623037, "learning_rate": 4.315008964512265e-06, "loss": 0.4171, "step": 18188 }, { "epoch": 0.5574659801397572, "grad_norm": 0.8616883923097378, "learning_rate": 4.3145173296836475e-06, "loss": 0.4321, "step": 18189 }, { "epoch": 0.5574966286624985, "grad_norm": 0.8141938935811389, "learning_rate": 4.314025701609262e-06, "loss": 0.425, "step": 18190 }, { "epoch": 0.5575272771852396, "grad_norm": 1.841581076079749, "learning_rate": 4.3135340802939605e-06, "loss": 0.6292, "step": 18191 }, { "epoch": 0.5575579257079809, "grad_norm": 0.7604096238462, "learning_rate": 4.313042465742582e-06, "loss": 0.4081, "step": 18192 }, { "epoch": 0.5575885742307221, "grad_norm": 1.8461274251865876, "learning_rate": 4.31255085795997e-06, "loss": 0.6542, "step": 18193 }, { "epoch": 0.5576192227534633, "grad_norm": 0.8657464940140652, "learning_rate": 4.312059256950973e-06, "loss": 0.4214, "step": 18194 }, { "epoch": 0.5576498712762045, "grad_norm": 1.6323000524634934, "learning_rate": 4.3115676627204305e-06, "loss": 0.6454, "step": 18195 }, { "epoch": 0.5576805197989457, "grad_norm": 2.0212797740494715, "learning_rate": 4.311076075273189e-06, "loss": 0.6005, "step": 18196 }, { "epoch": 0.5577111683216869, "grad_norm": 1.7594960783100821, "learning_rate": 4.310584494614091e-06, "loss": 0.6954, "step": 18197 }, { "epoch": 0.5577418168444281, "grad_norm": 1.7572071145933317, "learning_rate": 4.310092920747979e-06, "loss": 0.6691, "step": 18198 }, { "epoch": 0.5577724653671693, "grad_norm": 1.9137425297984312, "learning_rate": 4.3096013536797e-06, "loss": 0.5264, "step": 18199 }, { "epoch": 0.5578031138899106, "grad_norm": 1.6687860524205502, "learning_rate": 4.309109793414096e-06, "loss": 0.6282, "step": 18200 }, { "epoch": 0.5578337624126517, "grad_norm": 1.7561524143409744, "learning_rate": 4.308618239956006e-06, "loss": 0.7043, "step": 18201 }, { "epoch": 0.557864410935393, "grad_norm": 0.827190322844107, "learning_rate": 4.308126693310281e-06, "loss": 0.421, "step": 18202 }, { "epoch": 0.5578950594581341, "grad_norm": 1.802653164367574, "learning_rate": 4.307635153481759e-06, "loss": 0.6978, "step": 18203 }, { "epoch": 0.5579257079808754, "grad_norm": 1.7110174402916845, "learning_rate": 4.307143620475287e-06, "loss": 0.6341, "step": 18204 }, { "epoch": 0.5579563565036165, "grad_norm": 1.8155268095183494, "learning_rate": 4.306652094295705e-06, "loss": 0.6548, "step": 18205 }, { "epoch": 0.5579870050263577, "grad_norm": 0.8624619116237059, "learning_rate": 4.306160574947856e-06, "loss": 0.4512, "step": 18206 }, { "epoch": 0.5580176535490989, "grad_norm": 1.7673158504777466, "learning_rate": 4.305669062436586e-06, "loss": 0.6209, "step": 18207 }, { "epoch": 0.5580483020718401, "grad_norm": 1.9166957551965549, "learning_rate": 4.305177556766736e-06, "loss": 0.6357, "step": 18208 }, { "epoch": 0.5580789505945813, "grad_norm": 1.6273459166187048, "learning_rate": 4.3046860579431485e-06, "loss": 0.6834, "step": 18209 }, { "epoch": 0.5581095991173225, "grad_norm": 1.854297253691532, "learning_rate": 4.30419456597067e-06, "loss": 0.6488, "step": 18210 }, { "epoch": 0.5581402476400638, "grad_norm": 2.177098064441358, "learning_rate": 4.303703080854138e-06, "loss": 0.6078, "step": 18211 }, { "epoch": 0.5581708961628049, "grad_norm": 1.808427810625711, "learning_rate": 4.3032116025983975e-06, "loss": 0.6365, "step": 18212 }, { "epoch": 0.5582015446855462, "grad_norm": 1.5772504058565677, "learning_rate": 4.302720131208292e-06, "loss": 0.5218, "step": 18213 }, { "epoch": 0.5582321932082873, "grad_norm": 0.8102777136019015, "learning_rate": 4.302228666688663e-06, "loss": 0.4325, "step": 18214 }, { "epoch": 0.5582628417310286, "grad_norm": 1.916863421921085, "learning_rate": 4.3017372090443545e-06, "loss": 0.6115, "step": 18215 }, { "epoch": 0.5582934902537697, "grad_norm": 1.6716891403149299, "learning_rate": 4.3012457582802076e-06, "loss": 0.472, "step": 18216 }, { "epoch": 0.558324138776511, "grad_norm": 0.8000982991022793, "learning_rate": 4.300754314401064e-06, "loss": 0.4226, "step": 18217 }, { "epoch": 0.5583547872992521, "grad_norm": 1.532066073612832, "learning_rate": 4.300262877411767e-06, "loss": 0.5629, "step": 18218 }, { "epoch": 0.5583854358219934, "grad_norm": 1.6469285108284124, "learning_rate": 4.299771447317162e-06, "loss": 0.6048, "step": 18219 }, { "epoch": 0.5584160843447346, "grad_norm": 1.6697007840103992, "learning_rate": 4.299280024122084e-06, "loss": 0.6451, "step": 18220 }, { "epoch": 0.5584467328674758, "grad_norm": 1.7735965092590515, "learning_rate": 4.298788607831382e-06, "loss": 0.6544, "step": 18221 }, { "epoch": 0.558477381390217, "grad_norm": 1.7580214915442545, "learning_rate": 4.2982971984498924e-06, "loss": 0.6313, "step": 18222 }, { "epoch": 0.5585080299129582, "grad_norm": 1.7955074827093684, "learning_rate": 4.297805795982464e-06, "loss": 0.5953, "step": 18223 }, { "epoch": 0.5585386784356994, "grad_norm": 1.7687288577730584, "learning_rate": 4.2973144004339325e-06, "loss": 0.6711, "step": 18224 }, { "epoch": 0.5585693269584406, "grad_norm": 1.7134020542985393, "learning_rate": 4.296823011809142e-06, "loss": 0.7034, "step": 18225 }, { "epoch": 0.5585999754811818, "grad_norm": 1.763558065979225, "learning_rate": 4.2963316301129345e-06, "loss": 0.661, "step": 18226 }, { "epoch": 0.558630624003923, "grad_norm": 1.6098691138213215, "learning_rate": 4.295840255350151e-06, "loss": 0.6282, "step": 18227 }, { "epoch": 0.5586612725266642, "grad_norm": 1.819467883840178, "learning_rate": 4.295348887525633e-06, "loss": 0.6691, "step": 18228 }, { "epoch": 0.5586919210494055, "grad_norm": 1.7232561463964167, "learning_rate": 4.294857526644225e-06, "loss": 0.58, "step": 18229 }, { "epoch": 0.5587225695721466, "grad_norm": 1.973755419411058, "learning_rate": 4.294366172710764e-06, "loss": 0.6818, "step": 18230 }, { "epoch": 0.5587532180948879, "grad_norm": 1.91378868013843, "learning_rate": 4.293874825730095e-06, "loss": 0.5916, "step": 18231 }, { "epoch": 0.558783866617629, "grad_norm": 1.7754878855544414, "learning_rate": 4.293383485707059e-06, "loss": 0.5812, "step": 18232 }, { "epoch": 0.5588145151403703, "grad_norm": 2.206454463870113, "learning_rate": 4.292892152646493e-06, "loss": 0.7403, "step": 18233 }, { "epoch": 0.5588451636631114, "grad_norm": 1.7273086799634774, "learning_rate": 4.292400826553245e-06, "loss": 0.5779, "step": 18234 }, { "epoch": 0.5588758121858527, "grad_norm": 1.6012889286971348, "learning_rate": 4.291909507432151e-06, "loss": 0.6467, "step": 18235 }, { "epoch": 0.5589064607085938, "grad_norm": 2.2101341521943736, "learning_rate": 4.291418195288053e-06, "loss": 0.6936, "step": 18236 }, { "epoch": 0.558937109231335, "grad_norm": 1.8532168012200856, "learning_rate": 4.290926890125794e-06, "loss": 0.7509, "step": 18237 }, { "epoch": 0.5589677577540763, "grad_norm": 1.6060339270547654, "learning_rate": 4.290435591950215e-06, "loss": 0.5984, "step": 18238 }, { "epoch": 0.5589984062768174, "grad_norm": 1.7708824173788507, "learning_rate": 4.289944300766153e-06, "loss": 0.6675, "step": 18239 }, { "epoch": 0.5590290547995587, "grad_norm": 1.739239857525158, "learning_rate": 4.289453016578453e-06, "loss": 0.5807, "step": 18240 }, { "epoch": 0.5590597033222998, "grad_norm": 1.6639312317794535, "learning_rate": 4.288961739391953e-06, "loss": 0.5859, "step": 18241 }, { "epoch": 0.5590903518450411, "grad_norm": 0.8425177875965819, "learning_rate": 4.2884704692114965e-06, "loss": 0.4321, "step": 18242 }, { "epoch": 0.5591210003677822, "grad_norm": 1.8566325308227511, "learning_rate": 4.287979206041923e-06, "loss": 0.6469, "step": 18243 }, { "epoch": 0.5591516488905235, "grad_norm": 0.830291333683026, "learning_rate": 4.287487949888069e-06, "loss": 0.4158, "step": 18244 }, { "epoch": 0.5591822974132646, "grad_norm": 1.8209368977757487, "learning_rate": 4.286996700754783e-06, "loss": 0.6447, "step": 18245 }, { "epoch": 0.5592129459360059, "grad_norm": 1.8115211974341345, "learning_rate": 4.286505458646899e-06, "loss": 0.6304, "step": 18246 }, { "epoch": 0.559243594458747, "grad_norm": 1.7447624757025266, "learning_rate": 4.286014223569258e-06, "loss": 0.6714, "step": 18247 }, { "epoch": 0.5592742429814883, "grad_norm": 1.5792811317166957, "learning_rate": 4.285522995526703e-06, "loss": 0.6478, "step": 18248 }, { "epoch": 0.5593048915042295, "grad_norm": 0.7888279172824352, "learning_rate": 4.285031774524072e-06, "loss": 0.4253, "step": 18249 }, { "epoch": 0.5593355400269707, "grad_norm": 1.7523924685181238, "learning_rate": 4.284540560566207e-06, "loss": 0.5784, "step": 18250 }, { "epoch": 0.5593661885497119, "grad_norm": 1.7011559146679114, "learning_rate": 4.284049353657946e-06, "loss": 0.6307, "step": 18251 }, { "epoch": 0.5593968370724531, "grad_norm": 1.5613858212079106, "learning_rate": 4.28355815380413e-06, "loss": 0.542, "step": 18252 }, { "epoch": 0.5594274855951943, "grad_norm": 1.7633779496527795, "learning_rate": 4.283066961009599e-06, "loss": 0.6991, "step": 18253 }, { "epoch": 0.5594581341179355, "grad_norm": 1.5985714362733818, "learning_rate": 4.282575775279194e-06, "loss": 0.5875, "step": 18254 }, { "epoch": 0.5594887826406767, "grad_norm": 1.9756623288757447, "learning_rate": 4.282084596617752e-06, "loss": 0.602, "step": 18255 }, { "epoch": 0.559519431163418, "grad_norm": 1.7726845840603358, "learning_rate": 4.281593425030114e-06, "loss": 0.6773, "step": 18256 }, { "epoch": 0.5595500796861591, "grad_norm": 1.7280410336334477, "learning_rate": 4.281102260521119e-06, "loss": 0.5273, "step": 18257 }, { "epoch": 0.5595807282089004, "grad_norm": 1.9195354088216923, "learning_rate": 4.280611103095609e-06, "loss": 0.6859, "step": 18258 }, { "epoch": 0.5596113767316415, "grad_norm": 1.8163723203618833, "learning_rate": 4.280119952758422e-06, "loss": 0.6519, "step": 18259 }, { "epoch": 0.5596420252543828, "grad_norm": 1.643940080491509, "learning_rate": 4.279628809514395e-06, "loss": 0.584, "step": 18260 }, { "epoch": 0.5596726737771239, "grad_norm": 1.7126262810693318, "learning_rate": 4.279137673368371e-06, "loss": 0.5575, "step": 18261 }, { "epoch": 0.5597033222998652, "grad_norm": 1.7448847030204635, "learning_rate": 4.27864654432519e-06, "loss": 0.639, "step": 18262 }, { "epoch": 0.5597339708226063, "grad_norm": 2.0485779553683736, "learning_rate": 4.278155422389685e-06, "loss": 0.6412, "step": 18263 }, { "epoch": 0.5597646193453476, "grad_norm": 1.557340546068751, "learning_rate": 4.277664307566703e-06, "loss": 0.6657, "step": 18264 }, { "epoch": 0.5597952678680888, "grad_norm": 1.8992871336818722, "learning_rate": 4.277173199861079e-06, "loss": 0.6608, "step": 18265 }, { "epoch": 0.55982591639083, "grad_norm": 1.814599466872143, "learning_rate": 4.27668209927765e-06, "loss": 0.6781, "step": 18266 }, { "epoch": 0.5598565649135712, "grad_norm": 1.6782371920294197, "learning_rate": 4.2761910058212595e-06, "loss": 0.6115, "step": 18267 }, { "epoch": 0.5598872134363123, "grad_norm": 1.7468173937283942, "learning_rate": 4.275699919496742e-06, "loss": 0.6713, "step": 18268 }, { "epoch": 0.5599178619590536, "grad_norm": 1.7966261829743484, "learning_rate": 4.275208840308941e-06, "loss": 0.713, "step": 18269 }, { "epoch": 0.5599485104817947, "grad_norm": 0.8152900317103857, "learning_rate": 4.274717768262692e-06, "loss": 0.4506, "step": 18270 }, { "epoch": 0.559979159004536, "grad_norm": 0.798134758673108, "learning_rate": 4.274226703362833e-06, "loss": 0.4128, "step": 18271 }, { "epoch": 0.5600098075272771, "grad_norm": 1.7174773311001468, "learning_rate": 4.273735645614206e-06, "loss": 0.6506, "step": 18272 }, { "epoch": 0.5600404560500184, "grad_norm": 1.7488535544618395, "learning_rate": 4.273244595021648e-06, "loss": 0.6847, "step": 18273 }, { "epoch": 0.5600711045727595, "grad_norm": 1.5757435554003856, "learning_rate": 4.272753551589993e-06, "loss": 0.6457, "step": 18274 }, { "epoch": 0.5601017530955008, "grad_norm": 1.7831378897501755, "learning_rate": 4.272262515324088e-06, "loss": 0.6406, "step": 18275 }, { "epoch": 0.560132401618242, "grad_norm": 1.4767467725064283, "learning_rate": 4.271771486228762e-06, "loss": 0.5448, "step": 18276 }, { "epoch": 0.5601630501409832, "grad_norm": 0.7767069492215278, "learning_rate": 4.2712804643088625e-06, "loss": 0.4235, "step": 18277 }, { "epoch": 0.5601936986637244, "grad_norm": 1.7750347698856626, "learning_rate": 4.2707894495692205e-06, "loss": 0.61, "step": 18278 }, { "epoch": 0.5602243471864656, "grad_norm": 1.7041010515159773, "learning_rate": 4.270298442014677e-06, "loss": 0.6517, "step": 18279 }, { "epoch": 0.5602549957092068, "grad_norm": 1.6641810930488696, "learning_rate": 4.26980744165007e-06, "loss": 0.5669, "step": 18280 }, { "epoch": 0.560285644231948, "grad_norm": 1.6521742597252653, "learning_rate": 4.269316448480237e-06, "loss": 0.6964, "step": 18281 }, { "epoch": 0.5603162927546892, "grad_norm": 1.907828125691995, "learning_rate": 4.268825462510015e-06, "loss": 0.6514, "step": 18282 }, { "epoch": 0.5603469412774305, "grad_norm": 1.8256853077921529, "learning_rate": 4.268334483744244e-06, "loss": 0.6585, "step": 18283 }, { "epoch": 0.5603775898001716, "grad_norm": 1.8383346989959168, "learning_rate": 4.26784351218776e-06, "loss": 0.6709, "step": 18284 }, { "epoch": 0.5604082383229129, "grad_norm": 1.778974982773008, "learning_rate": 4.267352547845401e-06, "loss": 0.6546, "step": 18285 }, { "epoch": 0.560438886845654, "grad_norm": 1.9785379977423698, "learning_rate": 4.266861590722007e-06, "loss": 0.6544, "step": 18286 }, { "epoch": 0.5604695353683953, "grad_norm": 1.7473932235111507, "learning_rate": 4.2663706408224094e-06, "loss": 0.6929, "step": 18287 }, { "epoch": 0.5605001838911364, "grad_norm": 1.815715681516648, "learning_rate": 4.265879698151453e-06, "loss": 0.6158, "step": 18288 }, { "epoch": 0.5605308324138777, "grad_norm": 1.8345003830614477, "learning_rate": 4.26538876271397e-06, "loss": 0.6276, "step": 18289 }, { "epoch": 0.5605614809366188, "grad_norm": 1.7864029405649786, "learning_rate": 4.2648978345147995e-06, "loss": 0.6614, "step": 18290 }, { "epoch": 0.5605921294593601, "grad_norm": 1.5427565006141695, "learning_rate": 4.264406913558779e-06, "loss": 0.6137, "step": 18291 }, { "epoch": 0.5606227779821013, "grad_norm": 0.7955752315978383, "learning_rate": 4.263915999850746e-06, "loss": 0.4119, "step": 18292 }, { "epoch": 0.5606534265048425, "grad_norm": 0.8100853174172666, "learning_rate": 4.263425093395536e-06, "loss": 0.4154, "step": 18293 }, { "epoch": 0.5606840750275837, "grad_norm": 0.8112030628372098, "learning_rate": 4.2629341941979885e-06, "loss": 0.4176, "step": 18294 }, { "epoch": 0.5607147235503249, "grad_norm": 1.7341120334110602, "learning_rate": 4.262443302262937e-06, "loss": 0.665, "step": 18295 }, { "epoch": 0.5607453720730661, "grad_norm": 1.766433211826748, "learning_rate": 4.261952417595222e-06, "loss": 0.6642, "step": 18296 }, { "epoch": 0.5607760205958073, "grad_norm": 1.8986710329526992, "learning_rate": 4.261461540199679e-06, "loss": 0.6402, "step": 18297 }, { "epoch": 0.5608066691185485, "grad_norm": 1.629868764721668, "learning_rate": 4.2609706700811424e-06, "loss": 0.5279, "step": 18298 }, { "epoch": 0.5608373176412896, "grad_norm": 1.5347551054007962, "learning_rate": 4.260479807244452e-06, "loss": 0.5903, "step": 18299 }, { "epoch": 0.5608679661640309, "grad_norm": 0.8163657944195541, "learning_rate": 4.2599889516944435e-06, "loss": 0.4199, "step": 18300 }, { "epoch": 0.560898614686772, "grad_norm": 1.6678117835362352, "learning_rate": 4.259498103435953e-06, "loss": 0.5814, "step": 18301 }, { "epoch": 0.5609292632095133, "grad_norm": 1.7837791416539743, "learning_rate": 4.259007262473817e-06, "loss": 0.5747, "step": 18302 }, { "epoch": 0.5609599117322545, "grad_norm": 0.8087422538848582, "learning_rate": 4.258516428812871e-06, "loss": 0.4248, "step": 18303 }, { "epoch": 0.5609905602549957, "grad_norm": 1.5851559423491854, "learning_rate": 4.258025602457954e-06, "loss": 0.6361, "step": 18304 }, { "epoch": 0.5610212087777369, "grad_norm": 1.6889585136547536, "learning_rate": 4.2575347834139e-06, "loss": 0.5812, "step": 18305 }, { "epoch": 0.5610518573004781, "grad_norm": 1.5562937770232357, "learning_rate": 4.257043971685545e-06, "loss": 0.5547, "step": 18306 }, { "epoch": 0.5610825058232193, "grad_norm": 1.9950247491129558, "learning_rate": 4.256553167277729e-06, "loss": 0.6392, "step": 18307 }, { "epoch": 0.5611131543459605, "grad_norm": 1.8521890301954071, "learning_rate": 4.256062370195282e-06, "loss": 0.6618, "step": 18308 }, { "epoch": 0.5611438028687017, "grad_norm": 1.6375715019889767, "learning_rate": 4.2555715804430425e-06, "loss": 0.5987, "step": 18309 }, { "epoch": 0.561174451391443, "grad_norm": 1.7825671500997222, "learning_rate": 4.255080798025848e-06, "loss": 0.6443, "step": 18310 }, { "epoch": 0.5612050999141841, "grad_norm": 0.807678633767612, "learning_rate": 4.2545900229485315e-06, "loss": 0.4519, "step": 18311 }, { "epoch": 0.5612357484369254, "grad_norm": 1.853140868200417, "learning_rate": 4.254099255215931e-06, "loss": 0.6366, "step": 18312 }, { "epoch": 0.5612663969596665, "grad_norm": 1.6466999442976455, "learning_rate": 4.253608494832882e-06, "loss": 0.534, "step": 18313 }, { "epoch": 0.5612970454824078, "grad_norm": 1.7274825115040355, "learning_rate": 4.253117741804219e-06, "loss": 0.6094, "step": 18314 }, { "epoch": 0.5613276940051489, "grad_norm": 1.6994993032903898, "learning_rate": 4.252626996134778e-06, "loss": 0.6106, "step": 18315 }, { "epoch": 0.5613583425278902, "grad_norm": 1.7205394393192148, "learning_rate": 4.252136257829396e-06, "loss": 0.6803, "step": 18316 }, { "epoch": 0.5613889910506313, "grad_norm": 0.847567827739303, "learning_rate": 4.251645526892903e-06, "loss": 0.4426, "step": 18317 }, { "epoch": 0.5614196395733726, "grad_norm": 0.8258509121309311, "learning_rate": 4.251154803330142e-06, "loss": 0.4274, "step": 18318 }, { "epoch": 0.5614502880961137, "grad_norm": 1.8098150516902793, "learning_rate": 4.250664087145943e-06, "loss": 0.622, "step": 18319 }, { "epoch": 0.561480936618855, "grad_norm": 1.6859722671224044, "learning_rate": 4.250173378345141e-06, "loss": 0.6373, "step": 18320 }, { "epoch": 0.5615115851415962, "grad_norm": 1.7852446581828165, "learning_rate": 4.2496826769325735e-06, "loss": 0.5931, "step": 18321 }, { "epoch": 0.5615422336643374, "grad_norm": 1.741119211311145, "learning_rate": 4.249191982913074e-06, "loss": 0.6224, "step": 18322 }, { "epoch": 0.5615728821870786, "grad_norm": 1.7257595426255816, "learning_rate": 4.248701296291479e-06, "loss": 0.6651, "step": 18323 }, { "epoch": 0.5616035307098198, "grad_norm": 1.5868547417518595, "learning_rate": 4.248210617072623e-06, "loss": 0.5883, "step": 18324 }, { "epoch": 0.561634179232561, "grad_norm": 1.6165287995839959, "learning_rate": 4.247719945261338e-06, "loss": 0.6549, "step": 18325 }, { "epoch": 0.5616648277553022, "grad_norm": 1.7116372783762877, "learning_rate": 4.247229280862463e-06, "loss": 0.5985, "step": 18326 }, { "epoch": 0.5616954762780434, "grad_norm": 1.9344143123907522, "learning_rate": 4.246738623880831e-06, "loss": 0.6746, "step": 18327 }, { "epoch": 0.5617261248007847, "grad_norm": 1.8217815630188265, "learning_rate": 4.246247974321273e-06, "loss": 0.7045, "step": 18328 }, { "epoch": 0.5617567733235258, "grad_norm": 1.9821401934363105, "learning_rate": 4.245757332188629e-06, "loss": 0.6229, "step": 18329 }, { "epoch": 0.561787421846267, "grad_norm": 1.947583137378476, "learning_rate": 4.245266697487729e-06, "loss": 0.6714, "step": 18330 }, { "epoch": 0.5618180703690082, "grad_norm": 2.219514319732754, "learning_rate": 4.244776070223412e-06, "loss": 0.6932, "step": 18331 }, { "epoch": 0.5618487188917494, "grad_norm": 1.8322268775372945, "learning_rate": 4.244285450400508e-06, "loss": 0.704, "step": 18332 }, { "epoch": 0.5618793674144906, "grad_norm": 1.7677161654782334, "learning_rate": 4.2437948380238525e-06, "loss": 0.6283, "step": 18333 }, { "epoch": 0.5619100159372318, "grad_norm": 0.8774018805203285, "learning_rate": 4.2433042330982805e-06, "loss": 0.4312, "step": 18334 }, { "epoch": 0.561940664459973, "grad_norm": 1.6604494131378522, "learning_rate": 4.242813635628626e-06, "loss": 0.5265, "step": 18335 }, { "epoch": 0.5619713129827142, "grad_norm": 0.809984554177966, "learning_rate": 4.242323045619721e-06, "loss": 0.4256, "step": 18336 }, { "epoch": 0.5620019615054554, "grad_norm": 1.6954889760771024, "learning_rate": 4.241832463076402e-06, "loss": 0.6437, "step": 18337 }, { "epoch": 0.5620326100281966, "grad_norm": 1.9864574780690778, "learning_rate": 4.241341888003501e-06, "loss": 0.6729, "step": 18338 }, { "epoch": 0.5620632585509379, "grad_norm": 1.8152693975452208, "learning_rate": 4.240851320405853e-06, "loss": 0.6283, "step": 18339 }, { "epoch": 0.562093907073679, "grad_norm": 1.5713599885520109, "learning_rate": 4.240360760288293e-06, "loss": 0.5351, "step": 18340 }, { "epoch": 0.5621245555964203, "grad_norm": 1.727970596414256, "learning_rate": 4.239870207655648e-06, "loss": 0.6249, "step": 18341 }, { "epoch": 0.5621552041191614, "grad_norm": 1.7553934193827883, "learning_rate": 4.239379662512761e-06, "loss": 0.5954, "step": 18342 }, { "epoch": 0.5621858526419027, "grad_norm": 1.7683614199777598, "learning_rate": 4.238889124864461e-06, "loss": 0.6463, "step": 18343 }, { "epoch": 0.5622165011646438, "grad_norm": 1.9552537407125008, "learning_rate": 4.238398594715577e-06, "loss": 0.7527, "step": 18344 }, { "epoch": 0.5622471496873851, "grad_norm": 1.879937567908243, "learning_rate": 4.23790807207095e-06, "loss": 0.6279, "step": 18345 }, { "epoch": 0.5622777982101262, "grad_norm": 1.7153084996628756, "learning_rate": 4.237417556935409e-06, "loss": 0.5969, "step": 18346 }, { "epoch": 0.5623084467328675, "grad_norm": 1.741222893170807, "learning_rate": 4.236927049313786e-06, "loss": 0.5689, "step": 18347 }, { "epoch": 0.5623390952556087, "grad_norm": 1.7472109661661885, "learning_rate": 4.236436549210918e-06, "loss": 0.5417, "step": 18348 }, { "epoch": 0.5623697437783499, "grad_norm": 1.6976859473772754, "learning_rate": 4.235946056631635e-06, "loss": 0.6397, "step": 18349 }, { "epoch": 0.5624003923010911, "grad_norm": 1.6397755913925942, "learning_rate": 4.2354555715807735e-06, "loss": 0.6092, "step": 18350 }, { "epoch": 0.5624310408238323, "grad_norm": 1.930934148540748, "learning_rate": 4.2349650940631615e-06, "loss": 0.6327, "step": 18351 }, { "epoch": 0.5624616893465735, "grad_norm": 1.5407398804617871, "learning_rate": 4.2344746240836345e-06, "loss": 0.6316, "step": 18352 }, { "epoch": 0.5624923378693147, "grad_norm": 1.687765350765832, "learning_rate": 4.233984161647025e-06, "loss": 0.6444, "step": 18353 }, { "epoch": 0.5625229863920559, "grad_norm": 1.7487674157437285, "learning_rate": 4.233493706758166e-06, "loss": 0.6435, "step": 18354 }, { "epoch": 0.5625536349147972, "grad_norm": 0.8909633665164679, "learning_rate": 4.2330032594218885e-06, "loss": 0.4416, "step": 18355 }, { "epoch": 0.5625842834375383, "grad_norm": 1.968967697149699, "learning_rate": 4.2325128196430265e-06, "loss": 0.6611, "step": 18356 }, { "epoch": 0.5626149319602796, "grad_norm": 1.7302859109414206, "learning_rate": 4.232022387426412e-06, "loss": 0.6364, "step": 18357 }, { "epoch": 0.5626455804830207, "grad_norm": 1.737331494033916, "learning_rate": 4.231531962776878e-06, "loss": 0.5933, "step": 18358 }, { "epoch": 0.562676229005762, "grad_norm": 1.6259121232004805, "learning_rate": 4.231041545699257e-06, "loss": 0.5748, "step": 18359 }, { "epoch": 0.5627068775285031, "grad_norm": 1.7533595253699736, "learning_rate": 4.230551136198377e-06, "loss": 0.6605, "step": 18360 }, { "epoch": 0.5627375260512444, "grad_norm": 1.9296325537076333, "learning_rate": 4.230060734279078e-06, "loss": 0.6184, "step": 18361 }, { "epoch": 0.5627681745739855, "grad_norm": 1.5887432339366023, "learning_rate": 4.229570339946186e-06, "loss": 0.6695, "step": 18362 }, { "epoch": 0.5627988230967267, "grad_norm": 1.6320226335029584, "learning_rate": 4.229079953204533e-06, "loss": 0.6055, "step": 18363 }, { "epoch": 0.562829471619468, "grad_norm": 1.754263435042526, "learning_rate": 4.228589574058954e-06, "loss": 0.7074, "step": 18364 }, { "epoch": 0.5628601201422091, "grad_norm": 0.8202697492370459, "learning_rate": 4.228099202514279e-06, "loss": 0.4023, "step": 18365 }, { "epoch": 0.5628907686649504, "grad_norm": 1.7114050623941004, "learning_rate": 4.2276088385753396e-06, "loss": 0.6612, "step": 18366 }, { "epoch": 0.5629214171876915, "grad_norm": 1.8944007158540186, "learning_rate": 4.227118482246968e-06, "loss": 0.6147, "step": 18367 }, { "epoch": 0.5629520657104328, "grad_norm": 1.5985358785527777, "learning_rate": 4.226628133533996e-06, "loss": 0.6284, "step": 18368 }, { "epoch": 0.5629827142331739, "grad_norm": 1.40353294889712, "learning_rate": 4.226137792441254e-06, "loss": 0.6082, "step": 18369 }, { "epoch": 0.5630133627559152, "grad_norm": 1.5342929094417475, "learning_rate": 4.225647458973578e-06, "loss": 0.6634, "step": 18370 }, { "epoch": 0.5630440112786563, "grad_norm": 1.5920800111875328, "learning_rate": 4.22515713313579e-06, "loss": 0.5863, "step": 18371 }, { "epoch": 0.5630746598013976, "grad_norm": 1.8256536666871517, "learning_rate": 4.224666814932731e-06, "loss": 0.6889, "step": 18372 }, { "epoch": 0.5631053083241387, "grad_norm": 2.0145398974698288, "learning_rate": 4.224176504369228e-06, "loss": 0.6183, "step": 18373 }, { "epoch": 0.56313595684688, "grad_norm": 1.7770669831601027, "learning_rate": 4.223686201450111e-06, "loss": 0.6362, "step": 18374 }, { "epoch": 0.5631666053696212, "grad_norm": 1.6041706418589101, "learning_rate": 4.223195906180213e-06, "loss": 0.6275, "step": 18375 }, { "epoch": 0.5631972538923624, "grad_norm": 1.8378334080628465, "learning_rate": 4.222705618564364e-06, "loss": 0.6602, "step": 18376 }, { "epoch": 0.5632279024151036, "grad_norm": 1.7448477784513514, "learning_rate": 4.222215338607396e-06, "loss": 0.6082, "step": 18377 }, { "epoch": 0.5632585509378448, "grad_norm": 1.976925924189923, "learning_rate": 4.22172506631414e-06, "loss": 0.6639, "step": 18378 }, { "epoch": 0.563289199460586, "grad_norm": 1.9183160121011018, "learning_rate": 4.221234801689424e-06, "loss": 0.6884, "step": 18379 }, { "epoch": 0.5633198479833272, "grad_norm": 1.5209154154140958, "learning_rate": 4.220744544738082e-06, "loss": 0.5845, "step": 18380 }, { "epoch": 0.5633504965060684, "grad_norm": 1.9925436359424313, "learning_rate": 4.220254295464945e-06, "loss": 0.7535, "step": 18381 }, { "epoch": 0.5633811450288096, "grad_norm": 0.8097446467611503, "learning_rate": 4.219764053874838e-06, "loss": 0.444, "step": 18382 }, { "epoch": 0.5634117935515508, "grad_norm": 1.5735352555189237, "learning_rate": 4.2192738199726e-06, "loss": 0.5818, "step": 18383 }, { "epoch": 0.5634424420742921, "grad_norm": 1.730392135392516, "learning_rate": 4.2187835937630524e-06, "loss": 0.6492, "step": 18384 }, { "epoch": 0.5634730905970332, "grad_norm": 1.6342946399289462, "learning_rate": 4.218293375251034e-06, "loss": 0.5965, "step": 18385 }, { "epoch": 0.5635037391197745, "grad_norm": 1.731234903604976, "learning_rate": 4.217803164441369e-06, "loss": 0.6138, "step": 18386 }, { "epoch": 0.5635343876425156, "grad_norm": 1.7779233853065517, "learning_rate": 4.217312961338889e-06, "loss": 0.54, "step": 18387 }, { "epoch": 0.5635650361652569, "grad_norm": 0.8146274892814486, "learning_rate": 4.216822765948425e-06, "loss": 0.42, "step": 18388 }, { "epoch": 0.563595684687998, "grad_norm": 1.9945741259707648, "learning_rate": 4.216332578274808e-06, "loss": 0.716, "step": 18389 }, { "epoch": 0.5636263332107393, "grad_norm": 0.7694779526600913, "learning_rate": 4.215842398322865e-06, "loss": 0.425, "step": 18390 }, { "epoch": 0.5636569817334804, "grad_norm": 0.7666726886408015, "learning_rate": 4.215352226097428e-06, "loss": 0.419, "step": 18391 }, { "epoch": 0.5636876302562217, "grad_norm": 1.5422678748060343, "learning_rate": 4.214862061603328e-06, "loss": 0.6614, "step": 18392 }, { "epoch": 0.5637182787789629, "grad_norm": 1.7363121821384788, "learning_rate": 4.214371904845389e-06, "loss": 0.6224, "step": 18393 }, { "epoch": 0.563748927301704, "grad_norm": 1.7145836337896838, "learning_rate": 4.213881755828449e-06, "loss": 0.591, "step": 18394 }, { "epoch": 0.5637795758244453, "grad_norm": 1.6594662365229849, "learning_rate": 4.2133916145573295e-06, "loss": 0.5301, "step": 18395 }, { "epoch": 0.5638102243471864, "grad_norm": 1.541784685854649, "learning_rate": 4.212901481036866e-06, "loss": 0.6139, "step": 18396 }, { "epoch": 0.5638408728699277, "grad_norm": 1.719826162418137, "learning_rate": 4.212411355271885e-06, "loss": 0.631, "step": 18397 }, { "epoch": 0.5638715213926688, "grad_norm": 1.7197241459511066, "learning_rate": 4.211921237267216e-06, "loss": 0.6726, "step": 18398 }, { "epoch": 0.5639021699154101, "grad_norm": 1.7710479848804186, "learning_rate": 4.2114311270276895e-06, "loss": 0.6324, "step": 18399 }, { "epoch": 0.5639328184381512, "grad_norm": 1.8188538929913012, "learning_rate": 4.210941024558133e-06, "loss": 0.5729, "step": 18400 }, { "epoch": 0.5639634669608925, "grad_norm": 1.4960231688487038, "learning_rate": 4.210450929863376e-06, "loss": 0.7019, "step": 18401 }, { "epoch": 0.5639941154836337, "grad_norm": 1.743877312929451, "learning_rate": 4.20996084294825e-06, "loss": 0.6821, "step": 18402 }, { "epoch": 0.5640247640063749, "grad_norm": 1.758052025433625, "learning_rate": 4.20947076381758e-06, "loss": 0.5739, "step": 18403 }, { "epoch": 0.5640554125291161, "grad_norm": 1.6748529620470525, "learning_rate": 4.208980692476199e-06, "loss": 0.5324, "step": 18404 }, { "epoch": 0.5640860610518573, "grad_norm": 0.8302257733201689, "learning_rate": 4.2084906289289325e-06, "loss": 0.4225, "step": 18405 }, { "epoch": 0.5641167095745985, "grad_norm": 1.8026213901407007, "learning_rate": 4.20800057318061e-06, "loss": 0.6254, "step": 18406 }, { "epoch": 0.5641473580973397, "grad_norm": 1.61834849071561, "learning_rate": 4.20751052523606e-06, "loss": 0.595, "step": 18407 }, { "epoch": 0.5641780066200809, "grad_norm": 1.6935964764358835, "learning_rate": 4.207020485100113e-06, "loss": 0.6539, "step": 18408 }, { "epoch": 0.5642086551428221, "grad_norm": 1.8258016242979105, "learning_rate": 4.206530452777594e-06, "loss": 0.559, "step": 18409 }, { "epoch": 0.5642393036655633, "grad_norm": 1.6674887418066886, "learning_rate": 4.206040428273336e-06, "loss": 0.5923, "step": 18410 }, { "epoch": 0.5642699521883046, "grad_norm": 0.8285356001638499, "learning_rate": 4.205550411592162e-06, "loss": 0.4237, "step": 18411 }, { "epoch": 0.5643006007110457, "grad_norm": 1.7127354859503237, "learning_rate": 4.205060402738905e-06, "loss": 0.6078, "step": 18412 }, { "epoch": 0.564331249233787, "grad_norm": 1.943075944742401, "learning_rate": 4.204570401718392e-06, "loss": 0.662, "step": 18413 }, { "epoch": 0.5643618977565281, "grad_norm": 1.5807499824789475, "learning_rate": 4.204080408535448e-06, "loss": 0.6166, "step": 18414 }, { "epoch": 0.5643925462792694, "grad_norm": 1.7459323423609217, "learning_rate": 4.203590423194905e-06, "loss": 0.5884, "step": 18415 }, { "epoch": 0.5644231948020105, "grad_norm": 1.6338490268346122, "learning_rate": 4.20310044570159e-06, "loss": 0.6072, "step": 18416 }, { "epoch": 0.5644538433247518, "grad_norm": 1.7442989721690587, "learning_rate": 4.202610476060328e-06, "loss": 0.5801, "step": 18417 }, { "epoch": 0.5644844918474929, "grad_norm": 1.586391665838328, "learning_rate": 4.202120514275951e-06, "loss": 0.6152, "step": 18418 }, { "epoch": 0.5645151403702342, "grad_norm": 0.8294097290579179, "learning_rate": 4.2016305603532835e-06, "loss": 0.4405, "step": 18419 }, { "epoch": 0.5645457888929754, "grad_norm": 1.7212691255513188, "learning_rate": 4.201140614297155e-06, "loss": 0.6868, "step": 18420 }, { "epoch": 0.5645764374157166, "grad_norm": 0.7958992882745027, "learning_rate": 4.200650676112392e-06, "loss": 0.4145, "step": 18421 }, { "epoch": 0.5646070859384578, "grad_norm": 1.754076476163791, "learning_rate": 4.200160745803821e-06, "loss": 0.697, "step": 18422 }, { "epoch": 0.564637734461199, "grad_norm": 0.7776735044189058, "learning_rate": 4.199670823376273e-06, "loss": 0.4043, "step": 18423 }, { "epoch": 0.5646683829839402, "grad_norm": 1.6764987886102634, "learning_rate": 4.199180908834573e-06, "loss": 0.4869, "step": 18424 }, { "epoch": 0.5646990315066813, "grad_norm": 1.57335217661521, "learning_rate": 4.198691002183547e-06, "loss": 0.5672, "step": 18425 }, { "epoch": 0.5647296800294226, "grad_norm": 1.894165131811522, "learning_rate": 4.198201103428025e-06, "loss": 0.6869, "step": 18426 }, { "epoch": 0.5647603285521637, "grad_norm": 1.8706128894033212, "learning_rate": 4.197711212572834e-06, "loss": 0.5509, "step": 18427 }, { "epoch": 0.564790977074905, "grad_norm": 1.627812860936387, "learning_rate": 4.197221329622796e-06, "loss": 0.5373, "step": 18428 }, { "epoch": 0.5648216255976461, "grad_norm": 1.7000140950812457, "learning_rate": 4.196731454582744e-06, "loss": 0.5982, "step": 18429 }, { "epoch": 0.5648522741203874, "grad_norm": 2.3543958952124275, "learning_rate": 4.196241587457501e-06, "loss": 0.6059, "step": 18430 }, { "epoch": 0.5648829226431286, "grad_norm": 1.842540907196089, "learning_rate": 4.1957517282518965e-06, "loss": 0.6341, "step": 18431 }, { "epoch": 0.5649135711658698, "grad_norm": 1.7499442415847635, "learning_rate": 4.195261876970756e-06, "loss": 0.6531, "step": 18432 }, { "epoch": 0.564944219688611, "grad_norm": 1.8851209733413312, "learning_rate": 4.1947720336189055e-06, "loss": 0.6771, "step": 18433 }, { "epoch": 0.5649748682113522, "grad_norm": 1.8517393888170457, "learning_rate": 4.1942821982011735e-06, "loss": 0.6691, "step": 18434 }, { "epoch": 0.5650055167340934, "grad_norm": 1.640188167385473, "learning_rate": 4.193792370722386e-06, "loss": 0.576, "step": 18435 }, { "epoch": 0.5650361652568346, "grad_norm": 1.707768763746691, "learning_rate": 4.193302551187364e-06, "loss": 0.5964, "step": 18436 }, { "epoch": 0.5650668137795758, "grad_norm": 1.7597817691814746, "learning_rate": 4.192812739600942e-06, "loss": 0.6198, "step": 18437 }, { "epoch": 0.565097462302317, "grad_norm": 2.0051647271942454, "learning_rate": 4.1923229359679405e-06, "loss": 0.6175, "step": 18438 }, { "epoch": 0.5651281108250582, "grad_norm": 1.9891887152693313, "learning_rate": 4.191833140293191e-06, "loss": 0.7114, "step": 18439 }, { "epoch": 0.5651587593477995, "grad_norm": 1.6400863802892522, "learning_rate": 4.191343352581514e-06, "loss": 0.6274, "step": 18440 }, { "epoch": 0.5651894078705406, "grad_norm": 2.099707245183428, "learning_rate": 4.190853572837737e-06, "loss": 0.7407, "step": 18441 }, { "epoch": 0.5652200563932819, "grad_norm": 1.8814029615568622, "learning_rate": 4.1903638010666895e-06, "loss": 0.5954, "step": 18442 }, { "epoch": 0.565250704916023, "grad_norm": 1.7074280097054055, "learning_rate": 4.189874037273193e-06, "loss": 0.6191, "step": 18443 }, { "epoch": 0.5652813534387643, "grad_norm": 1.859470077413823, "learning_rate": 4.189384281462074e-06, "loss": 0.7267, "step": 18444 }, { "epoch": 0.5653120019615054, "grad_norm": 1.6449990707060338, "learning_rate": 4.188894533638161e-06, "loss": 0.5999, "step": 18445 }, { "epoch": 0.5653426504842467, "grad_norm": 1.6249168535895762, "learning_rate": 4.1884047938062774e-06, "loss": 0.5733, "step": 18446 }, { "epoch": 0.5653732990069879, "grad_norm": 0.841678488054031, "learning_rate": 4.187915061971248e-06, "loss": 0.4189, "step": 18447 }, { "epoch": 0.5654039475297291, "grad_norm": 1.8311260333966801, "learning_rate": 4.1874253381379e-06, "loss": 0.6456, "step": 18448 }, { "epoch": 0.5654345960524703, "grad_norm": 1.673110920438966, "learning_rate": 4.186935622311057e-06, "loss": 0.6836, "step": 18449 }, { "epoch": 0.5654652445752115, "grad_norm": 2.0507955116461503, "learning_rate": 4.186445914495546e-06, "loss": 0.6307, "step": 18450 }, { "epoch": 0.5654958930979527, "grad_norm": 1.7322790751182353, "learning_rate": 4.1859562146961925e-06, "loss": 0.677, "step": 18451 }, { "epoch": 0.5655265416206939, "grad_norm": 1.6255794366065521, "learning_rate": 4.185466522917819e-06, "loss": 0.4794, "step": 18452 }, { "epoch": 0.5655571901434351, "grad_norm": 1.6308463157864492, "learning_rate": 4.184976839165254e-06, "loss": 0.6603, "step": 18453 }, { "epoch": 0.5655878386661763, "grad_norm": 1.966379789684128, "learning_rate": 4.1844871634433206e-06, "loss": 0.6873, "step": 18454 }, { "epoch": 0.5656184871889175, "grad_norm": 1.4071739931848384, "learning_rate": 4.183997495756841e-06, "loss": 0.4775, "step": 18455 }, { "epoch": 0.5656491357116586, "grad_norm": 1.7593794802008296, "learning_rate": 4.183507836110646e-06, "loss": 0.6342, "step": 18456 }, { "epoch": 0.5656797842343999, "grad_norm": 1.9323084184453339, "learning_rate": 4.183018184509555e-06, "loss": 0.6262, "step": 18457 }, { "epoch": 0.5657104327571411, "grad_norm": 1.8614571943256075, "learning_rate": 4.182528540958397e-06, "loss": 0.6755, "step": 18458 }, { "epoch": 0.5657410812798823, "grad_norm": 1.7572579862783384, "learning_rate": 4.182038905461994e-06, "loss": 0.529, "step": 18459 }, { "epoch": 0.5657717298026235, "grad_norm": 1.6737158998784052, "learning_rate": 4.1815492780251695e-06, "loss": 0.7136, "step": 18460 }, { "epoch": 0.5658023783253647, "grad_norm": 1.6300658964681936, "learning_rate": 4.181059658652751e-06, "loss": 0.5364, "step": 18461 }, { "epoch": 0.5658330268481059, "grad_norm": 1.7879368679892287, "learning_rate": 4.18057004734956e-06, "loss": 0.5705, "step": 18462 }, { "epoch": 0.5658636753708471, "grad_norm": 1.6681289976694293, "learning_rate": 4.180080444120422e-06, "loss": 0.6059, "step": 18463 }, { "epoch": 0.5658943238935883, "grad_norm": 1.915159440979533, "learning_rate": 4.179590848970162e-06, "loss": 0.6808, "step": 18464 }, { "epoch": 0.5659249724163296, "grad_norm": 1.6457967826779791, "learning_rate": 4.179101261903602e-06, "loss": 0.5251, "step": 18465 }, { "epoch": 0.5659556209390707, "grad_norm": 1.7016254788622758, "learning_rate": 4.178611682925569e-06, "loss": 0.5288, "step": 18466 }, { "epoch": 0.565986269461812, "grad_norm": 1.536991133801106, "learning_rate": 4.178122112040886e-06, "loss": 0.564, "step": 18467 }, { "epoch": 0.5660169179845531, "grad_norm": 1.5950893575140144, "learning_rate": 4.177632549254372e-06, "loss": 0.7046, "step": 18468 }, { "epoch": 0.5660475665072944, "grad_norm": 1.7205904701073194, "learning_rate": 4.177142994570859e-06, "loss": 0.5907, "step": 18469 }, { "epoch": 0.5660782150300355, "grad_norm": 1.7913386985603188, "learning_rate": 4.176653447995165e-06, "loss": 0.7187, "step": 18470 }, { "epoch": 0.5661088635527768, "grad_norm": 1.6232426349204512, "learning_rate": 4.176163909532115e-06, "loss": 0.6301, "step": 18471 }, { "epoch": 0.5661395120755179, "grad_norm": 1.6489718919664818, "learning_rate": 4.175674379186534e-06, "loss": 0.6173, "step": 18472 }, { "epoch": 0.5661701605982592, "grad_norm": 1.5790888836542822, "learning_rate": 4.175184856963243e-06, "loss": 0.5785, "step": 18473 }, { "epoch": 0.5662008091210003, "grad_norm": 1.9708751381366716, "learning_rate": 4.174695342867066e-06, "loss": 0.6404, "step": 18474 }, { "epoch": 0.5662314576437416, "grad_norm": 0.8612561357129158, "learning_rate": 4.174205836902828e-06, "loss": 0.441, "step": 18475 }, { "epoch": 0.5662621061664828, "grad_norm": 1.9299447143911408, "learning_rate": 4.173716339075351e-06, "loss": 0.79, "step": 18476 }, { "epoch": 0.566292754689224, "grad_norm": 1.7430642724101202, "learning_rate": 4.1732268493894586e-06, "loss": 0.6535, "step": 18477 }, { "epoch": 0.5663234032119652, "grad_norm": 1.6892321888361925, "learning_rate": 4.172737367849975e-06, "loss": 0.6155, "step": 18478 }, { "epoch": 0.5663540517347064, "grad_norm": 1.8988707954412156, "learning_rate": 4.1722478944617184e-06, "loss": 0.6182, "step": 18479 }, { "epoch": 0.5663847002574476, "grad_norm": 0.8055932728243945, "learning_rate": 4.171758429229518e-06, "loss": 0.4329, "step": 18480 }, { "epoch": 0.5664153487801888, "grad_norm": 1.79862045584604, "learning_rate": 4.171268972158193e-06, "loss": 0.6701, "step": 18481 }, { "epoch": 0.56644599730293, "grad_norm": 1.7833032647530247, "learning_rate": 4.170779523252565e-06, "loss": 0.6524, "step": 18482 }, { "epoch": 0.5664766458256713, "grad_norm": 0.7826807382953315, "learning_rate": 4.170290082517461e-06, "loss": 0.435, "step": 18483 }, { "epoch": 0.5665072943484124, "grad_norm": 0.795142724874591, "learning_rate": 4.169800649957699e-06, "loss": 0.4381, "step": 18484 }, { "epoch": 0.5665379428711537, "grad_norm": 1.47939314718036, "learning_rate": 4.1693112255781055e-06, "loss": 0.5619, "step": 18485 }, { "epoch": 0.5665685913938948, "grad_norm": 0.7850037713915324, "learning_rate": 4.1688218093835005e-06, "loss": 0.4273, "step": 18486 }, { "epoch": 0.566599239916636, "grad_norm": 1.763864923833146, "learning_rate": 4.1683324013787056e-06, "loss": 0.6555, "step": 18487 }, { "epoch": 0.5666298884393772, "grad_norm": 1.7224811111705958, "learning_rate": 4.167843001568545e-06, "loss": 0.6526, "step": 18488 }, { "epoch": 0.5666605369621184, "grad_norm": 2.0029322965225074, "learning_rate": 4.1673536099578425e-06, "loss": 0.6537, "step": 18489 }, { "epoch": 0.5666911854848596, "grad_norm": 1.6351287753103911, "learning_rate": 4.1668642265514145e-06, "loss": 0.5934, "step": 18490 }, { "epoch": 0.5667218340076008, "grad_norm": 1.959065068867312, "learning_rate": 4.166374851354089e-06, "loss": 0.6696, "step": 18491 }, { "epoch": 0.566752482530342, "grad_norm": 1.8173079019062586, "learning_rate": 4.165885484370684e-06, "loss": 0.7317, "step": 18492 }, { "epoch": 0.5667831310530832, "grad_norm": 1.6528681226982944, "learning_rate": 4.1653961256060235e-06, "loss": 0.556, "step": 18493 }, { "epoch": 0.5668137795758245, "grad_norm": 1.8383760035285661, "learning_rate": 4.164906775064929e-06, "loss": 0.6216, "step": 18494 }, { "epoch": 0.5668444280985656, "grad_norm": 1.556404190771897, "learning_rate": 4.1644174327522195e-06, "loss": 0.5722, "step": 18495 }, { "epoch": 0.5668750766213069, "grad_norm": 1.6807317735027123, "learning_rate": 4.1639280986727205e-06, "loss": 0.6275, "step": 18496 }, { "epoch": 0.566905725144048, "grad_norm": 1.7136572790579383, "learning_rate": 4.163438772831251e-06, "loss": 0.646, "step": 18497 }, { "epoch": 0.5669363736667893, "grad_norm": 0.8777459403878255, "learning_rate": 4.162949455232634e-06, "loss": 0.4326, "step": 18498 }, { "epoch": 0.5669670221895304, "grad_norm": 0.8317451026158588, "learning_rate": 4.162460145881691e-06, "loss": 0.4418, "step": 18499 }, { "epoch": 0.5669976707122717, "grad_norm": 1.772135779796484, "learning_rate": 4.161970844783242e-06, "loss": 0.6717, "step": 18500 }, { "epoch": 0.5670283192350128, "grad_norm": 1.670808214526764, "learning_rate": 4.161481551942107e-06, "loss": 0.6156, "step": 18501 }, { "epoch": 0.5670589677577541, "grad_norm": 0.7854945406419863, "learning_rate": 4.16099226736311e-06, "loss": 0.4404, "step": 18502 }, { "epoch": 0.5670896162804953, "grad_norm": 1.960556566771591, "learning_rate": 4.160502991051071e-06, "loss": 0.652, "step": 18503 }, { "epoch": 0.5671202648032365, "grad_norm": 1.5218104028521324, "learning_rate": 4.1600137230108106e-06, "loss": 0.5856, "step": 18504 }, { "epoch": 0.5671509133259777, "grad_norm": 1.69055046279009, "learning_rate": 4.159524463247151e-06, "loss": 0.5834, "step": 18505 }, { "epoch": 0.5671815618487189, "grad_norm": 2.1819311270979767, "learning_rate": 4.159035211764909e-06, "loss": 0.5491, "step": 18506 }, { "epoch": 0.5672122103714601, "grad_norm": 1.610704516858351, "learning_rate": 4.1585459685689105e-06, "loss": 0.7265, "step": 18507 }, { "epoch": 0.5672428588942013, "grad_norm": 1.6400043682196894, "learning_rate": 4.158056733663975e-06, "loss": 0.5646, "step": 18508 }, { "epoch": 0.5672735074169425, "grad_norm": 1.5524654039900425, "learning_rate": 4.157567507054919e-06, "loss": 0.6029, "step": 18509 }, { "epoch": 0.5673041559396838, "grad_norm": 1.8256811827512271, "learning_rate": 4.1570782887465685e-06, "loss": 0.703, "step": 18510 }, { "epoch": 0.5673348044624249, "grad_norm": 1.9196417577641625, "learning_rate": 4.156589078743738e-06, "loss": 0.7266, "step": 18511 }, { "epoch": 0.5673654529851662, "grad_norm": 1.626910248693126, "learning_rate": 4.156099877051254e-06, "loss": 0.5774, "step": 18512 }, { "epoch": 0.5673961015079073, "grad_norm": 1.7244990745830426, "learning_rate": 4.155610683673934e-06, "loss": 0.5817, "step": 18513 }, { "epoch": 0.5674267500306486, "grad_norm": 1.5964214517690258, "learning_rate": 4.155121498616596e-06, "loss": 0.5869, "step": 18514 }, { "epoch": 0.5674573985533897, "grad_norm": 0.7979922432096651, "learning_rate": 4.154632321884063e-06, "loss": 0.4279, "step": 18515 }, { "epoch": 0.567488047076131, "grad_norm": 1.5978680698585392, "learning_rate": 4.154143153481155e-06, "loss": 0.5737, "step": 18516 }, { "epoch": 0.5675186955988721, "grad_norm": 1.6534092889052256, "learning_rate": 4.15365399341269e-06, "loss": 0.5481, "step": 18517 }, { "epoch": 0.5675493441216133, "grad_norm": 1.696266706997799, "learning_rate": 4.153164841683488e-06, "loss": 0.573, "step": 18518 }, { "epoch": 0.5675799926443545, "grad_norm": 1.6155355952790942, "learning_rate": 4.152675698298371e-06, "loss": 0.6975, "step": 18519 }, { "epoch": 0.5676106411670957, "grad_norm": 1.8312039782377982, "learning_rate": 4.152186563262155e-06, "loss": 0.5857, "step": 18520 }, { "epoch": 0.567641289689837, "grad_norm": 1.6960501013468383, "learning_rate": 4.1516974365796645e-06, "loss": 0.6012, "step": 18521 }, { "epoch": 0.5676719382125781, "grad_norm": 1.5728422895886023, "learning_rate": 4.151208318255713e-06, "loss": 0.575, "step": 18522 }, { "epoch": 0.5677025867353194, "grad_norm": 1.9531013509605983, "learning_rate": 4.150719208295127e-06, "loss": 0.6006, "step": 18523 }, { "epoch": 0.5677332352580605, "grad_norm": 1.7403120524300126, "learning_rate": 4.15023010670272e-06, "loss": 0.5986, "step": 18524 }, { "epoch": 0.5677638837808018, "grad_norm": 0.8187195570980701, "learning_rate": 4.149741013483312e-06, "loss": 0.4215, "step": 18525 }, { "epoch": 0.5677945323035429, "grad_norm": 1.6141463290650244, "learning_rate": 4.149251928641725e-06, "loss": 0.561, "step": 18526 }, { "epoch": 0.5678251808262842, "grad_norm": 1.8743547773306866, "learning_rate": 4.1487628521827765e-06, "loss": 0.6412, "step": 18527 }, { "epoch": 0.5678558293490253, "grad_norm": 1.5163433366859398, "learning_rate": 4.1482737841112835e-06, "loss": 0.6368, "step": 18528 }, { "epoch": 0.5678864778717666, "grad_norm": 1.8706740867836473, "learning_rate": 4.1477847244320685e-06, "loss": 0.6678, "step": 18529 }, { "epoch": 0.5679171263945078, "grad_norm": 0.7657609576284323, "learning_rate": 4.147295673149947e-06, "loss": 0.4221, "step": 18530 }, { "epoch": 0.567947774917249, "grad_norm": 1.9405321628464185, "learning_rate": 4.146806630269741e-06, "loss": 0.6303, "step": 18531 }, { "epoch": 0.5679784234399902, "grad_norm": 1.8658070338414858, "learning_rate": 4.1463175957962686e-06, "loss": 0.7404, "step": 18532 }, { "epoch": 0.5680090719627314, "grad_norm": 2.6750980859651436, "learning_rate": 4.1458285697343445e-06, "loss": 0.6486, "step": 18533 }, { "epoch": 0.5680397204854726, "grad_norm": 1.875733472545887, "learning_rate": 4.145339552088793e-06, "loss": 0.6627, "step": 18534 }, { "epoch": 0.5680703690082138, "grad_norm": 0.8052280985029562, "learning_rate": 4.144850542864428e-06, "loss": 0.4248, "step": 18535 }, { "epoch": 0.568101017530955, "grad_norm": 1.631039791668866, "learning_rate": 4.144361542066069e-06, "loss": 0.5846, "step": 18536 }, { "epoch": 0.5681316660536962, "grad_norm": 1.626255662442959, "learning_rate": 4.143872549698535e-06, "loss": 0.581, "step": 18537 }, { "epoch": 0.5681623145764374, "grad_norm": 1.5857248378994728, "learning_rate": 4.143383565766643e-06, "loss": 0.6814, "step": 18538 }, { "epoch": 0.5681929630991787, "grad_norm": 1.6430800571016468, "learning_rate": 4.1428945902752135e-06, "loss": 0.4875, "step": 18539 }, { "epoch": 0.5682236116219198, "grad_norm": 1.63680735153756, "learning_rate": 4.142405623229062e-06, "loss": 0.606, "step": 18540 }, { "epoch": 0.5682542601446611, "grad_norm": 1.8588366473636262, "learning_rate": 4.141916664633008e-06, "loss": 0.6959, "step": 18541 }, { "epoch": 0.5682849086674022, "grad_norm": 1.6557987075615828, "learning_rate": 4.141427714491868e-06, "loss": 0.5911, "step": 18542 }, { "epoch": 0.5683155571901435, "grad_norm": 1.8119721505337743, "learning_rate": 4.1409387728104615e-06, "loss": 0.5486, "step": 18543 }, { "epoch": 0.5683462057128846, "grad_norm": 1.7067931095152462, "learning_rate": 4.1404498395936035e-06, "loss": 0.6134, "step": 18544 }, { "epoch": 0.5683768542356259, "grad_norm": 1.665432315394693, "learning_rate": 4.1399609148461135e-06, "loss": 0.6571, "step": 18545 }, { "epoch": 0.568407502758367, "grad_norm": 1.7937394507062123, "learning_rate": 4.13947199857281e-06, "loss": 0.6118, "step": 18546 }, { "epoch": 0.5684381512811083, "grad_norm": 1.4833073987338619, "learning_rate": 4.138983090778507e-06, "loss": 0.6402, "step": 18547 }, { "epoch": 0.5684687998038495, "grad_norm": 1.5767976356004394, "learning_rate": 4.1384941914680256e-06, "loss": 0.5544, "step": 18548 }, { "epoch": 0.5684994483265906, "grad_norm": 0.8263073217916601, "learning_rate": 4.1380053006461804e-06, "loss": 0.4119, "step": 18549 }, { "epoch": 0.5685300968493319, "grad_norm": 1.6746732764028334, "learning_rate": 4.13751641831779e-06, "loss": 0.7232, "step": 18550 }, { "epoch": 0.568560745372073, "grad_norm": 1.728151579484166, "learning_rate": 4.137027544487672e-06, "loss": 0.6724, "step": 18551 }, { "epoch": 0.5685913938948143, "grad_norm": 1.701039492968913, "learning_rate": 4.136538679160639e-06, "loss": 0.6587, "step": 18552 }, { "epoch": 0.5686220424175554, "grad_norm": 1.667351787076564, "learning_rate": 4.136049822341516e-06, "loss": 0.616, "step": 18553 }, { "epoch": 0.5686526909402967, "grad_norm": 1.6351765194127996, "learning_rate": 4.135560974035112e-06, "loss": 0.5343, "step": 18554 }, { "epoch": 0.5686833394630378, "grad_norm": 1.91047516241696, "learning_rate": 4.135072134246247e-06, "loss": 0.6285, "step": 18555 }, { "epoch": 0.5687139879857791, "grad_norm": 1.748361631644768, "learning_rate": 4.134583302979739e-06, "loss": 0.6049, "step": 18556 }, { "epoch": 0.5687446365085203, "grad_norm": 0.838022722913728, "learning_rate": 4.134094480240402e-06, "loss": 0.4175, "step": 18557 }, { "epoch": 0.5687752850312615, "grad_norm": 1.7781341482993427, "learning_rate": 4.1336056660330535e-06, "loss": 0.6843, "step": 18558 }, { "epoch": 0.5688059335540027, "grad_norm": 0.7715621361189842, "learning_rate": 4.133116860362511e-06, "loss": 0.4435, "step": 18559 }, { "epoch": 0.5688365820767439, "grad_norm": 1.6765307097666806, "learning_rate": 4.132628063233589e-06, "loss": 0.5958, "step": 18560 }, { "epoch": 0.5688672305994851, "grad_norm": 0.818048583348214, "learning_rate": 4.132139274651105e-06, "loss": 0.4177, "step": 18561 }, { "epoch": 0.5688978791222263, "grad_norm": 1.829509009813729, "learning_rate": 4.131650494619876e-06, "loss": 0.6773, "step": 18562 }, { "epoch": 0.5689285276449675, "grad_norm": 1.7596078386033398, "learning_rate": 4.1311617231447136e-06, "loss": 0.6441, "step": 18563 }, { "epoch": 0.5689591761677087, "grad_norm": 0.776021902730361, "learning_rate": 4.130672960230441e-06, "loss": 0.4473, "step": 18564 }, { "epoch": 0.5689898246904499, "grad_norm": 0.7359641450716095, "learning_rate": 4.130184205881866e-06, "loss": 0.3974, "step": 18565 }, { "epoch": 0.5690204732131912, "grad_norm": 1.7890931613419367, "learning_rate": 4.129695460103813e-06, "loss": 0.6144, "step": 18566 }, { "epoch": 0.5690511217359323, "grad_norm": 1.733657080471215, "learning_rate": 4.12920672290109e-06, "loss": 0.6194, "step": 18567 }, { "epoch": 0.5690817702586736, "grad_norm": 1.9138777577306758, "learning_rate": 4.128717994278517e-06, "loss": 0.5919, "step": 18568 }, { "epoch": 0.5691124187814147, "grad_norm": 1.66805825968978, "learning_rate": 4.1282292742409095e-06, "loss": 0.5614, "step": 18569 }, { "epoch": 0.569143067304156, "grad_norm": 1.9660722249852072, "learning_rate": 4.127740562793081e-06, "loss": 0.5864, "step": 18570 }, { "epoch": 0.5691737158268971, "grad_norm": 1.8295431662133623, "learning_rate": 4.127251859939847e-06, "loss": 0.6276, "step": 18571 }, { "epoch": 0.5692043643496384, "grad_norm": 1.7612093734412313, "learning_rate": 4.126763165686025e-06, "loss": 0.6991, "step": 18572 }, { "epoch": 0.5692350128723795, "grad_norm": 1.6254338130125883, "learning_rate": 4.12627448003643e-06, "loss": 0.616, "step": 18573 }, { "epoch": 0.5692656613951208, "grad_norm": 0.7874544628574089, "learning_rate": 4.125785802995874e-06, "loss": 0.3953, "step": 18574 }, { "epoch": 0.569296309917862, "grad_norm": 1.9380386968049983, "learning_rate": 4.125297134569176e-06, "loss": 0.6929, "step": 18575 }, { "epoch": 0.5693269584406032, "grad_norm": 1.8052037531052851, "learning_rate": 4.124808474761146e-06, "loss": 0.6557, "step": 18576 }, { "epoch": 0.5693576069633444, "grad_norm": 1.8933916102225576, "learning_rate": 4.124319823576606e-06, "loss": 0.6769, "step": 18577 }, { "epoch": 0.5693882554860856, "grad_norm": 1.7807648779726006, "learning_rate": 4.123831181020365e-06, "loss": 0.5826, "step": 18578 }, { "epoch": 0.5694189040088268, "grad_norm": 1.755162590584831, "learning_rate": 4.123342547097239e-06, "loss": 0.6187, "step": 18579 }, { "epoch": 0.5694495525315679, "grad_norm": 1.7205669199864377, "learning_rate": 4.122853921812044e-06, "loss": 0.5946, "step": 18580 }, { "epoch": 0.5694802010543092, "grad_norm": 2.1175355210299576, "learning_rate": 4.122365305169593e-06, "loss": 0.6302, "step": 18581 }, { "epoch": 0.5695108495770503, "grad_norm": 1.6604523279031627, "learning_rate": 4.121876697174701e-06, "loss": 0.5674, "step": 18582 }, { "epoch": 0.5695414980997916, "grad_norm": 1.7289202308237535, "learning_rate": 4.121388097832184e-06, "loss": 0.6102, "step": 18583 }, { "epoch": 0.5695721466225327, "grad_norm": 1.857535242446626, "learning_rate": 4.120899507146853e-06, "loss": 0.7218, "step": 18584 }, { "epoch": 0.569602795145274, "grad_norm": 1.663717800714552, "learning_rate": 4.1204109251235255e-06, "loss": 0.5194, "step": 18585 }, { "epoch": 0.5696334436680152, "grad_norm": 1.7729555974613824, "learning_rate": 4.119922351767016e-06, "loss": 0.6252, "step": 18586 }, { "epoch": 0.5696640921907564, "grad_norm": 0.7803993481270006, "learning_rate": 4.119433787082133e-06, "loss": 0.4126, "step": 18587 }, { "epoch": 0.5696947407134976, "grad_norm": 1.899876151089557, "learning_rate": 4.118945231073697e-06, "loss": 0.6341, "step": 18588 }, { "epoch": 0.5697253892362388, "grad_norm": 1.844957168173005, "learning_rate": 4.118456683746518e-06, "loss": 0.6667, "step": 18589 }, { "epoch": 0.56975603775898, "grad_norm": 1.8535975697667118, "learning_rate": 4.11796814510541e-06, "loss": 0.6055, "step": 18590 }, { "epoch": 0.5697866862817212, "grad_norm": 1.6463371562402958, "learning_rate": 4.1174796151551885e-06, "loss": 0.6859, "step": 18591 }, { "epoch": 0.5698173348044624, "grad_norm": 1.5099879240142295, "learning_rate": 4.116991093900665e-06, "loss": 0.5933, "step": 18592 }, { "epoch": 0.5698479833272037, "grad_norm": 1.8364099202741495, "learning_rate": 4.116502581346655e-06, "loss": 0.5906, "step": 18593 }, { "epoch": 0.5698786318499448, "grad_norm": 1.6739175278561387, "learning_rate": 4.116014077497972e-06, "loss": 0.7129, "step": 18594 }, { "epoch": 0.5699092803726861, "grad_norm": 1.585219100490649, "learning_rate": 4.115525582359427e-06, "loss": 0.5781, "step": 18595 }, { "epoch": 0.5699399288954272, "grad_norm": 1.883351914602563, "learning_rate": 4.115037095935837e-06, "loss": 0.6836, "step": 18596 }, { "epoch": 0.5699705774181685, "grad_norm": 1.5471005745779802, "learning_rate": 4.114548618232012e-06, "loss": 0.7564, "step": 18597 }, { "epoch": 0.5700012259409096, "grad_norm": 1.7887748329763793, "learning_rate": 4.114060149252764e-06, "loss": 0.6309, "step": 18598 }, { "epoch": 0.5700318744636509, "grad_norm": 1.7452256461823232, "learning_rate": 4.1135716890029096e-06, "loss": 0.656, "step": 18599 }, { "epoch": 0.570062522986392, "grad_norm": 1.4824993461192022, "learning_rate": 4.1130832374872605e-06, "loss": 0.5737, "step": 18600 }, { "epoch": 0.5700931715091333, "grad_norm": 1.7796466929820551, "learning_rate": 4.112594794710628e-06, "loss": 0.5936, "step": 18601 }, { "epoch": 0.5701238200318745, "grad_norm": 1.6610481744387382, "learning_rate": 4.1121063606778264e-06, "loss": 0.5607, "step": 18602 }, { "epoch": 0.5701544685546157, "grad_norm": 1.8073390852819766, "learning_rate": 4.111617935393668e-06, "loss": 0.4776, "step": 18603 }, { "epoch": 0.5701851170773569, "grad_norm": 1.918119811963361, "learning_rate": 4.1111295188629665e-06, "loss": 0.6625, "step": 18604 }, { "epoch": 0.5702157656000981, "grad_norm": 1.5492108708765895, "learning_rate": 4.110641111090533e-06, "loss": 0.5309, "step": 18605 }, { "epoch": 0.5702464141228393, "grad_norm": 1.6563041946594248, "learning_rate": 4.110152712081178e-06, "loss": 0.6097, "step": 18606 }, { "epoch": 0.5702770626455805, "grad_norm": 1.8388635560654674, "learning_rate": 4.109664321839719e-06, "loss": 0.6601, "step": 18607 }, { "epoch": 0.5703077111683217, "grad_norm": 1.7908717910128769, "learning_rate": 4.109175940370965e-06, "loss": 0.6642, "step": 18608 }, { "epoch": 0.570338359691063, "grad_norm": 0.787129178002681, "learning_rate": 4.108687567679726e-06, "loss": 0.4357, "step": 18609 }, { "epoch": 0.5703690082138041, "grad_norm": 1.7171707487331567, "learning_rate": 4.108199203770818e-06, "loss": 0.6621, "step": 18610 }, { "epoch": 0.5703996567365452, "grad_norm": 1.5615793123092196, "learning_rate": 4.10771084864905e-06, "loss": 0.6691, "step": 18611 }, { "epoch": 0.5704303052592865, "grad_norm": 0.7710149079625077, "learning_rate": 4.107222502319237e-06, "loss": 0.4068, "step": 18612 }, { "epoch": 0.5704609537820277, "grad_norm": 2.0728022069479235, "learning_rate": 4.106734164786189e-06, "loss": 0.6536, "step": 18613 }, { "epoch": 0.5704916023047689, "grad_norm": 1.7605756815069533, "learning_rate": 4.106245836054717e-06, "loss": 0.6534, "step": 18614 }, { "epoch": 0.5705222508275101, "grad_norm": 1.8005183445996882, "learning_rate": 4.1057575161296335e-06, "loss": 0.6426, "step": 18615 }, { "epoch": 0.5705528993502513, "grad_norm": 1.6360424276610577, "learning_rate": 4.105269205015753e-06, "loss": 0.5845, "step": 18616 }, { "epoch": 0.5705835478729925, "grad_norm": 1.5891007162516206, "learning_rate": 4.104780902717879e-06, "loss": 0.657, "step": 18617 }, { "epoch": 0.5706141963957337, "grad_norm": 1.5696590714035852, "learning_rate": 4.104292609240831e-06, "loss": 0.4967, "step": 18618 }, { "epoch": 0.5706448449184749, "grad_norm": 1.8416077247698197, "learning_rate": 4.103804324589414e-06, "loss": 0.5771, "step": 18619 }, { "epoch": 0.5706754934412162, "grad_norm": 1.7630024889468823, "learning_rate": 4.103316048768447e-06, "loss": 0.5799, "step": 18620 }, { "epoch": 0.5707061419639573, "grad_norm": 0.8234059670603251, "learning_rate": 4.102827781782734e-06, "loss": 0.4322, "step": 18621 }, { "epoch": 0.5707367904866986, "grad_norm": 1.70367152746702, "learning_rate": 4.102339523637087e-06, "loss": 0.6603, "step": 18622 }, { "epoch": 0.5707674390094397, "grad_norm": 1.8554888964062604, "learning_rate": 4.10185127433632e-06, "loss": 0.6151, "step": 18623 }, { "epoch": 0.570798087532181, "grad_norm": 1.744213840562847, "learning_rate": 4.101363033885242e-06, "loss": 0.5655, "step": 18624 }, { "epoch": 0.5708287360549221, "grad_norm": 1.889698752458755, "learning_rate": 4.100874802288664e-06, "loss": 0.6652, "step": 18625 }, { "epoch": 0.5708593845776634, "grad_norm": 1.7596080005162658, "learning_rate": 4.100386579551397e-06, "loss": 0.6123, "step": 18626 }, { "epoch": 0.5708900331004045, "grad_norm": 0.8249509422807069, "learning_rate": 4.099898365678252e-06, "loss": 0.4164, "step": 18627 }, { "epoch": 0.5709206816231458, "grad_norm": 2.02962750178488, "learning_rate": 4.099410160674035e-06, "loss": 0.6648, "step": 18628 }, { "epoch": 0.570951330145887, "grad_norm": 1.5251375610682025, "learning_rate": 4.098921964543563e-06, "loss": 0.5153, "step": 18629 }, { "epoch": 0.5709819786686282, "grad_norm": 1.6563885446690039, "learning_rate": 4.0984337772916415e-06, "loss": 0.5666, "step": 18630 }, { "epoch": 0.5710126271913694, "grad_norm": 1.8936918153650193, "learning_rate": 4.097945598923085e-06, "loss": 0.6674, "step": 18631 }, { "epoch": 0.5710432757141106, "grad_norm": 1.752757047843011, "learning_rate": 4.0974574294427016e-06, "loss": 0.6802, "step": 18632 }, { "epoch": 0.5710739242368518, "grad_norm": 0.7502620426223233, "learning_rate": 4.096969268855299e-06, "loss": 0.4062, "step": 18633 }, { "epoch": 0.571104572759593, "grad_norm": 1.6453804793594082, "learning_rate": 4.09648111716569e-06, "loss": 0.6119, "step": 18634 }, { "epoch": 0.5711352212823342, "grad_norm": 1.579005788587555, "learning_rate": 4.095992974378684e-06, "loss": 0.6547, "step": 18635 }, { "epoch": 0.5711658698050754, "grad_norm": 1.7043241206562918, "learning_rate": 4.09550484049909e-06, "loss": 0.6575, "step": 18636 }, { "epoch": 0.5711965183278166, "grad_norm": 1.6655313213192817, "learning_rate": 4.0950167155317185e-06, "loss": 0.5153, "step": 18637 }, { "epoch": 0.5712271668505579, "grad_norm": 1.5641606636873036, "learning_rate": 4.094528599481379e-06, "loss": 0.5836, "step": 18638 }, { "epoch": 0.571257815373299, "grad_norm": 1.6841573829859378, "learning_rate": 4.0940404923528824e-06, "loss": 0.5688, "step": 18639 }, { "epoch": 0.5712884638960403, "grad_norm": 1.8300882506256957, "learning_rate": 4.093552394151034e-06, "loss": 0.6288, "step": 18640 }, { "epoch": 0.5713191124187814, "grad_norm": 1.8363298629448734, "learning_rate": 4.0930643048806465e-06, "loss": 0.6501, "step": 18641 }, { "epoch": 0.5713497609415226, "grad_norm": 1.699812879177453, "learning_rate": 4.092576224546529e-06, "loss": 0.5223, "step": 18642 }, { "epoch": 0.5713804094642638, "grad_norm": 1.5344616696820645, "learning_rate": 4.092088153153491e-06, "loss": 0.54, "step": 18643 }, { "epoch": 0.571411057987005, "grad_norm": 1.6906095383612127, "learning_rate": 4.091600090706338e-06, "loss": 0.58, "step": 18644 }, { "epoch": 0.5714417065097462, "grad_norm": 1.5646967319711846, "learning_rate": 4.091112037209885e-06, "loss": 0.5844, "step": 18645 }, { "epoch": 0.5714723550324874, "grad_norm": 1.8110991869603046, "learning_rate": 4.090623992668934e-06, "loss": 0.5549, "step": 18646 }, { "epoch": 0.5715030035552287, "grad_norm": 1.7390293259357401, "learning_rate": 4.0901359570883006e-06, "loss": 0.6386, "step": 18647 }, { "epoch": 0.5715336520779698, "grad_norm": 1.6445043668223127, "learning_rate": 4.089647930472791e-06, "loss": 0.5685, "step": 18648 }, { "epoch": 0.5715643006007111, "grad_norm": 1.529435637947826, "learning_rate": 4.089159912827209e-06, "loss": 0.5815, "step": 18649 }, { "epoch": 0.5715949491234522, "grad_norm": 1.5848806900928287, "learning_rate": 4.088671904156372e-06, "loss": 0.5754, "step": 18650 }, { "epoch": 0.5716255976461935, "grad_norm": 1.8439035970609934, "learning_rate": 4.088183904465081e-06, "loss": 0.7485, "step": 18651 }, { "epoch": 0.5716562461689346, "grad_norm": 1.8723362703957536, "learning_rate": 4.0876959137581484e-06, "loss": 0.6014, "step": 18652 }, { "epoch": 0.5716868946916759, "grad_norm": 1.7246858548403594, "learning_rate": 4.0872079320403814e-06, "loss": 0.6221, "step": 18653 }, { "epoch": 0.571717543214417, "grad_norm": 0.8334995663366631, "learning_rate": 4.086719959316588e-06, "loss": 0.423, "step": 18654 }, { "epoch": 0.5717481917371583, "grad_norm": 1.709454355407097, "learning_rate": 4.0862319955915755e-06, "loss": 0.6693, "step": 18655 }, { "epoch": 0.5717788402598994, "grad_norm": 0.7896206068414731, "learning_rate": 4.085744040870155e-06, "loss": 0.425, "step": 18656 }, { "epoch": 0.5718094887826407, "grad_norm": 0.7881313996489739, "learning_rate": 4.08525609515713e-06, "loss": 0.3937, "step": 18657 }, { "epoch": 0.5718401373053819, "grad_norm": 1.8687240013053477, "learning_rate": 4.084768158457313e-06, "loss": 0.5956, "step": 18658 }, { "epoch": 0.5718707858281231, "grad_norm": 2.1115516383793977, "learning_rate": 4.08428023077551e-06, "loss": 0.6692, "step": 18659 }, { "epoch": 0.5719014343508643, "grad_norm": 1.5318405145424496, "learning_rate": 4.0837923121165245e-06, "loss": 0.5791, "step": 18660 }, { "epoch": 0.5719320828736055, "grad_norm": 0.7743359141488005, "learning_rate": 4.083304402485171e-06, "loss": 0.4346, "step": 18661 }, { "epoch": 0.5719627313963467, "grad_norm": 1.8553516080382448, "learning_rate": 4.082816501886254e-06, "loss": 0.5723, "step": 18662 }, { "epoch": 0.5719933799190879, "grad_norm": 2.2166757359041833, "learning_rate": 4.082328610324579e-06, "loss": 0.7438, "step": 18663 }, { "epoch": 0.5720240284418291, "grad_norm": 1.723288761716145, "learning_rate": 4.0818407278049545e-06, "loss": 0.5696, "step": 18664 }, { "epoch": 0.5720546769645704, "grad_norm": 1.7641122616585758, "learning_rate": 4.081352854332189e-06, "loss": 0.6614, "step": 18665 }, { "epoch": 0.5720853254873115, "grad_norm": 1.741745902178236, "learning_rate": 4.08086498991109e-06, "loss": 0.7036, "step": 18666 }, { "epoch": 0.5721159740100528, "grad_norm": 1.7719677363233668, "learning_rate": 4.080377134546462e-06, "loss": 0.6758, "step": 18667 }, { "epoch": 0.5721466225327939, "grad_norm": 1.7122085921035786, "learning_rate": 4.0798892882431135e-06, "loss": 0.6315, "step": 18668 }, { "epoch": 0.5721772710555352, "grad_norm": 2.1529752825124824, "learning_rate": 4.079401451005852e-06, "loss": 0.6625, "step": 18669 }, { "epoch": 0.5722079195782763, "grad_norm": 1.877386372821195, "learning_rate": 4.078913622839485e-06, "loss": 0.6403, "step": 18670 }, { "epoch": 0.5722385681010176, "grad_norm": 1.8752050640801847, "learning_rate": 4.078425803748813e-06, "loss": 0.668, "step": 18671 }, { "epoch": 0.5722692166237587, "grad_norm": 0.7896589399158996, "learning_rate": 4.077937993738652e-06, "loss": 0.4327, "step": 18672 }, { "epoch": 0.5722998651464999, "grad_norm": 1.6862927302293986, "learning_rate": 4.0774501928138005e-06, "loss": 0.6176, "step": 18673 }, { "epoch": 0.5723305136692411, "grad_norm": 1.846129811321577, "learning_rate": 4.076962400979071e-06, "loss": 0.6899, "step": 18674 }, { "epoch": 0.5723611621919823, "grad_norm": 0.825350706263423, "learning_rate": 4.076474618239266e-06, "loss": 0.4397, "step": 18675 }, { "epoch": 0.5723918107147236, "grad_norm": 1.9763691252990974, "learning_rate": 4.0759868445991925e-06, "loss": 0.5824, "step": 18676 }, { "epoch": 0.5724224592374647, "grad_norm": 1.8205705228603557, "learning_rate": 4.075499080063658e-06, "loss": 0.6471, "step": 18677 }, { "epoch": 0.572453107760206, "grad_norm": 1.873774748932699, "learning_rate": 4.075011324637468e-06, "loss": 0.7267, "step": 18678 }, { "epoch": 0.5724837562829471, "grad_norm": 1.7269222559711754, "learning_rate": 4.074523578325426e-06, "loss": 0.5923, "step": 18679 }, { "epoch": 0.5725144048056884, "grad_norm": 1.7647163658627654, "learning_rate": 4.0740358411323415e-06, "loss": 0.6634, "step": 18680 }, { "epoch": 0.5725450533284295, "grad_norm": 1.8929572939783104, "learning_rate": 4.07354811306302e-06, "loss": 0.615, "step": 18681 }, { "epoch": 0.5725757018511708, "grad_norm": 1.662468894449907, "learning_rate": 4.0730603941222626e-06, "loss": 0.5828, "step": 18682 }, { "epoch": 0.5726063503739119, "grad_norm": 1.7960994581248517, "learning_rate": 4.072572684314881e-06, "loss": 0.6025, "step": 18683 }, { "epoch": 0.5726369988966532, "grad_norm": 0.7905620902658769, "learning_rate": 4.072084983645677e-06, "loss": 0.4223, "step": 18684 }, { "epoch": 0.5726676474193944, "grad_norm": 1.5483437969425105, "learning_rate": 4.071597292119457e-06, "loss": 0.4813, "step": 18685 }, { "epoch": 0.5726982959421356, "grad_norm": 1.8351772795937435, "learning_rate": 4.071109609741027e-06, "loss": 0.6328, "step": 18686 }, { "epoch": 0.5727289444648768, "grad_norm": 1.763433429969457, "learning_rate": 4.070621936515191e-06, "loss": 0.6433, "step": 18687 }, { "epoch": 0.572759592987618, "grad_norm": 1.6552416974485247, "learning_rate": 4.070134272446755e-06, "loss": 0.633, "step": 18688 }, { "epoch": 0.5727902415103592, "grad_norm": 1.7640577498541117, "learning_rate": 4.069646617540525e-06, "loss": 0.5807, "step": 18689 }, { "epoch": 0.5728208900331004, "grad_norm": 0.8290137194711901, "learning_rate": 4.069158971801304e-06, "loss": 0.4168, "step": 18690 }, { "epoch": 0.5728515385558416, "grad_norm": 1.8189824191557986, "learning_rate": 4.068671335233898e-06, "loss": 0.6635, "step": 18691 }, { "epoch": 0.5728821870785828, "grad_norm": 0.76617566572778, "learning_rate": 4.06818370784311e-06, "loss": 0.3894, "step": 18692 }, { "epoch": 0.572912835601324, "grad_norm": 1.9836323443483623, "learning_rate": 4.067696089633749e-06, "loss": 0.7082, "step": 18693 }, { "epoch": 0.5729434841240653, "grad_norm": 1.6827927765563768, "learning_rate": 4.067208480610617e-06, "loss": 0.6034, "step": 18694 }, { "epoch": 0.5729741326468064, "grad_norm": 1.824871723623736, "learning_rate": 4.066720880778516e-06, "loss": 0.7104, "step": 18695 }, { "epoch": 0.5730047811695477, "grad_norm": 0.8025110399544803, "learning_rate": 4.0662332901422545e-06, "loss": 0.4216, "step": 18696 }, { "epoch": 0.5730354296922888, "grad_norm": 1.7504411735451002, "learning_rate": 4.065745708706636e-06, "loss": 0.7122, "step": 18697 }, { "epoch": 0.5730660782150301, "grad_norm": 1.6446683151174097, "learning_rate": 4.065258136476462e-06, "loss": 0.6054, "step": 18698 }, { "epoch": 0.5730967267377712, "grad_norm": 1.5285239231822807, "learning_rate": 4.06477057345654e-06, "loss": 0.5402, "step": 18699 }, { "epoch": 0.5731273752605125, "grad_norm": 1.5963737321509834, "learning_rate": 4.064283019651674e-06, "loss": 0.6218, "step": 18700 }, { "epoch": 0.5731580237832536, "grad_norm": 0.805323681258647, "learning_rate": 4.063795475066664e-06, "loss": 0.4093, "step": 18701 }, { "epoch": 0.5731886723059949, "grad_norm": 2.008287026060275, "learning_rate": 4.063307939706319e-06, "loss": 0.6506, "step": 18702 }, { "epoch": 0.5732193208287361, "grad_norm": 1.910081852401554, "learning_rate": 4.062820413575438e-06, "loss": 0.5956, "step": 18703 }, { "epoch": 0.5732499693514772, "grad_norm": 1.8366462791804528, "learning_rate": 4.062332896678831e-06, "loss": 0.5587, "step": 18704 }, { "epoch": 0.5732806178742185, "grad_norm": 1.7222125536571526, "learning_rate": 4.061845389021296e-06, "loss": 0.5732, "step": 18705 }, { "epoch": 0.5733112663969596, "grad_norm": 0.8157828891369032, "learning_rate": 4.061357890607638e-06, "loss": 0.4208, "step": 18706 }, { "epoch": 0.5733419149197009, "grad_norm": 0.8118762383375977, "learning_rate": 4.060870401442661e-06, "loss": 0.4168, "step": 18707 }, { "epoch": 0.573372563442442, "grad_norm": 0.7806788446472324, "learning_rate": 4.060382921531169e-06, "loss": 0.4102, "step": 18708 }, { "epoch": 0.5734032119651833, "grad_norm": 1.7949516178656049, "learning_rate": 4.059895450877963e-06, "loss": 0.608, "step": 18709 }, { "epoch": 0.5734338604879244, "grad_norm": 1.8024467108659505, "learning_rate": 4.059407989487849e-06, "loss": 0.5743, "step": 18710 }, { "epoch": 0.5734645090106657, "grad_norm": 1.770031165000167, "learning_rate": 4.058920537365627e-06, "loss": 0.5438, "step": 18711 }, { "epoch": 0.5734951575334069, "grad_norm": 1.664151686977408, "learning_rate": 4.058433094516105e-06, "loss": 0.5768, "step": 18712 }, { "epoch": 0.5735258060561481, "grad_norm": 1.5092561226021963, "learning_rate": 4.057945660944081e-06, "loss": 0.6035, "step": 18713 }, { "epoch": 0.5735564545788893, "grad_norm": 0.789461214641325, "learning_rate": 4.057458236654358e-06, "loss": 0.4034, "step": 18714 }, { "epoch": 0.5735871031016305, "grad_norm": 1.719734869234221, "learning_rate": 4.056970821651742e-06, "loss": 0.6605, "step": 18715 }, { "epoch": 0.5736177516243717, "grad_norm": 1.7408256004847011, "learning_rate": 4.056483415941033e-06, "loss": 0.5956, "step": 18716 }, { "epoch": 0.5736484001471129, "grad_norm": 1.91402499410698, "learning_rate": 4.055996019527034e-06, "loss": 0.6917, "step": 18717 }, { "epoch": 0.5736790486698541, "grad_norm": 1.8692938655361395, "learning_rate": 4.0555086324145484e-06, "loss": 0.6182, "step": 18718 }, { "epoch": 0.5737096971925953, "grad_norm": 1.944004142390059, "learning_rate": 4.055021254608377e-06, "loss": 0.6256, "step": 18719 }, { "epoch": 0.5737403457153365, "grad_norm": 1.8508757276827237, "learning_rate": 4.054533886113324e-06, "loss": 0.7149, "step": 18720 }, { "epoch": 0.5737709942380778, "grad_norm": 1.463326392047493, "learning_rate": 4.05404652693419e-06, "loss": 0.6424, "step": 18721 }, { "epoch": 0.5738016427608189, "grad_norm": 1.8278241237495534, "learning_rate": 4.053559177075777e-06, "loss": 0.6918, "step": 18722 }, { "epoch": 0.5738322912835602, "grad_norm": 1.5976505126184313, "learning_rate": 4.053071836542889e-06, "loss": 0.621, "step": 18723 }, { "epoch": 0.5738629398063013, "grad_norm": 1.7113964489546594, "learning_rate": 4.052584505340327e-06, "loss": 0.5422, "step": 18724 }, { "epoch": 0.5738935883290426, "grad_norm": 1.6084583343236036, "learning_rate": 4.052097183472889e-06, "loss": 0.5619, "step": 18725 }, { "epoch": 0.5739242368517837, "grad_norm": 1.6519187816288259, "learning_rate": 4.0516098709453835e-06, "loss": 0.5796, "step": 18726 }, { "epoch": 0.573954885374525, "grad_norm": 1.5577615271906118, "learning_rate": 4.051122567762608e-06, "loss": 0.6027, "step": 18727 }, { "epoch": 0.5739855338972661, "grad_norm": 1.6240326087361183, "learning_rate": 4.050635273929362e-06, "loss": 0.5856, "step": 18728 }, { "epoch": 0.5740161824200074, "grad_norm": 1.7286066950158263, "learning_rate": 4.050147989450452e-06, "loss": 0.6409, "step": 18729 }, { "epoch": 0.5740468309427486, "grad_norm": 1.8708992044496071, "learning_rate": 4.049660714330676e-06, "loss": 0.6234, "step": 18730 }, { "epoch": 0.5740774794654898, "grad_norm": 1.6255318091932165, "learning_rate": 4.049173448574836e-06, "loss": 0.566, "step": 18731 }, { "epoch": 0.574108127988231, "grad_norm": 1.6011466140225143, "learning_rate": 4.0486861921877345e-06, "loss": 0.5991, "step": 18732 }, { "epoch": 0.5741387765109722, "grad_norm": 0.839412958917161, "learning_rate": 4.048198945174169e-06, "loss": 0.4358, "step": 18733 }, { "epoch": 0.5741694250337134, "grad_norm": 1.792624200330832, "learning_rate": 4.047711707538945e-06, "loss": 0.6701, "step": 18734 }, { "epoch": 0.5742000735564545, "grad_norm": 1.6185640228234337, "learning_rate": 4.047224479286862e-06, "loss": 0.6302, "step": 18735 }, { "epoch": 0.5742307220791958, "grad_norm": 1.70191482038724, "learning_rate": 4.0467372604227175e-06, "loss": 0.5748, "step": 18736 }, { "epoch": 0.5742613706019369, "grad_norm": 1.7842790524802312, "learning_rate": 4.046250050951316e-06, "loss": 0.7107, "step": 18737 }, { "epoch": 0.5742920191246782, "grad_norm": 1.760155953957919, "learning_rate": 4.045762850877456e-06, "loss": 0.596, "step": 18738 }, { "epoch": 0.5743226676474193, "grad_norm": 1.934191369022422, "learning_rate": 4.045275660205939e-06, "loss": 0.6476, "step": 18739 }, { "epoch": 0.5743533161701606, "grad_norm": 1.7679356064250196, "learning_rate": 4.044788478941566e-06, "loss": 0.6807, "step": 18740 }, { "epoch": 0.5743839646929018, "grad_norm": 1.8478204458742797, "learning_rate": 4.044301307089134e-06, "loss": 0.6081, "step": 18741 }, { "epoch": 0.574414613215643, "grad_norm": 1.5936610524891521, "learning_rate": 4.043814144653449e-06, "loss": 0.5206, "step": 18742 }, { "epoch": 0.5744452617383842, "grad_norm": 0.7680211368828929, "learning_rate": 4.043326991639308e-06, "loss": 0.4138, "step": 18743 }, { "epoch": 0.5744759102611254, "grad_norm": 0.7575317221481799, "learning_rate": 4.0428398480515074e-06, "loss": 0.4107, "step": 18744 }, { "epoch": 0.5745065587838666, "grad_norm": 1.8225002594596313, "learning_rate": 4.042352713894854e-06, "loss": 0.6941, "step": 18745 }, { "epoch": 0.5745372073066078, "grad_norm": 0.778559543753558, "learning_rate": 4.041865589174141e-06, "loss": 0.429, "step": 18746 }, { "epoch": 0.574567855829349, "grad_norm": 1.8025600115027338, "learning_rate": 4.0413784738941755e-06, "loss": 0.65, "step": 18747 }, { "epoch": 0.5745985043520903, "grad_norm": 1.8281054738909857, "learning_rate": 4.040891368059752e-06, "loss": 0.6594, "step": 18748 }, { "epoch": 0.5746291528748314, "grad_norm": 1.8624541359479954, "learning_rate": 4.040404271675669e-06, "loss": 0.6109, "step": 18749 }, { "epoch": 0.5746598013975727, "grad_norm": 1.8007735188241254, "learning_rate": 4.03991718474673e-06, "loss": 0.5501, "step": 18750 }, { "epoch": 0.5746904499203138, "grad_norm": 1.6816636673543641, "learning_rate": 4.0394301072777335e-06, "loss": 0.5733, "step": 18751 }, { "epoch": 0.5747210984430551, "grad_norm": 1.7242987611445153, "learning_rate": 4.038943039273476e-06, "loss": 0.5732, "step": 18752 }, { "epoch": 0.5747517469657962, "grad_norm": 0.816958333956805, "learning_rate": 4.038455980738759e-06, "loss": 0.4384, "step": 18753 }, { "epoch": 0.5747823954885375, "grad_norm": 1.8898301581921482, "learning_rate": 4.037968931678383e-06, "loss": 0.6561, "step": 18754 }, { "epoch": 0.5748130440112786, "grad_norm": 1.8417120218517797, "learning_rate": 4.037481892097143e-06, "loss": 0.5978, "step": 18755 }, { "epoch": 0.5748436925340199, "grad_norm": 0.8246687738439876, "learning_rate": 4.036994861999842e-06, "loss": 0.4206, "step": 18756 }, { "epoch": 0.574874341056761, "grad_norm": 1.5998239886914687, "learning_rate": 4.036507841391274e-06, "loss": 0.5926, "step": 18757 }, { "epoch": 0.5749049895795023, "grad_norm": 1.6785739108807767, "learning_rate": 4.036020830276245e-06, "loss": 0.6634, "step": 18758 }, { "epoch": 0.5749356381022435, "grad_norm": 1.6341034921333364, "learning_rate": 4.0355338286595465e-06, "loss": 0.5834, "step": 18759 }, { "epoch": 0.5749662866249847, "grad_norm": 1.7601490288499255, "learning_rate": 4.035046836545981e-06, "loss": 0.6196, "step": 18760 }, { "epoch": 0.5749969351477259, "grad_norm": 0.8186845874273554, "learning_rate": 4.034559853940346e-06, "loss": 0.4279, "step": 18761 }, { "epoch": 0.5750275836704671, "grad_norm": 1.95427398047557, "learning_rate": 4.0340728808474395e-06, "loss": 0.6562, "step": 18762 }, { "epoch": 0.5750582321932083, "grad_norm": 1.918999520398539, "learning_rate": 4.03358591727206e-06, "loss": 0.6375, "step": 18763 }, { "epoch": 0.5750888807159495, "grad_norm": 1.6760534671228133, "learning_rate": 4.033098963219006e-06, "loss": 0.6231, "step": 18764 }, { "epoch": 0.5751195292386907, "grad_norm": 1.8502662041340296, "learning_rate": 4.032612018693073e-06, "loss": 0.6401, "step": 18765 }, { "epoch": 0.5751501777614318, "grad_norm": 1.7878819080559125, "learning_rate": 4.032125083699064e-06, "loss": 0.6196, "step": 18766 }, { "epoch": 0.5751808262841731, "grad_norm": 1.685749397027347, "learning_rate": 4.031638158241775e-06, "loss": 0.6336, "step": 18767 }, { "epoch": 0.5752114748069143, "grad_norm": 1.9657401338420843, "learning_rate": 4.031151242326e-06, "loss": 0.656, "step": 18768 }, { "epoch": 0.5752421233296555, "grad_norm": 1.8286723694854807, "learning_rate": 4.0306643359565426e-06, "loss": 0.6199, "step": 18769 }, { "epoch": 0.5752727718523967, "grad_norm": 1.74555995960298, "learning_rate": 4.030177439138197e-06, "loss": 0.6352, "step": 18770 }, { "epoch": 0.5753034203751379, "grad_norm": 0.8167388316389214, "learning_rate": 4.029690551875759e-06, "loss": 0.4395, "step": 18771 }, { "epoch": 0.5753340688978791, "grad_norm": 1.8871980408871425, "learning_rate": 4.02920367417403e-06, "loss": 0.5942, "step": 18772 }, { "epoch": 0.5753647174206203, "grad_norm": 1.9286754220988105, "learning_rate": 4.028716806037804e-06, "loss": 0.6458, "step": 18773 }, { "epoch": 0.5753953659433615, "grad_norm": 1.69275046537425, "learning_rate": 4.028229947471881e-06, "loss": 0.5918, "step": 18774 }, { "epoch": 0.5754260144661028, "grad_norm": 0.7934624682021001, "learning_rate": 4.027743098481058e-06, "loss": 0.4121, "step": 18775 }, { "epoch": 0.5754566629888439, "grad_norm": 1.7968130844188737, "learning_rate": 4.0272562590701295e-06, "loss": 0.5899, "step": 18776 }, { "epoch": 0.5754873115115852, "grad_norm": 1.7590298871566967, "learning_rate": 4.026769429243894e-06, "loss": 0.6318, "step": 18777 }, { "epoch": 0.5755179600343263, "grad_norm": 1.6711442992132148, "learning_rate": 4.0262826090071505e-06, "loss": 0.5662, "step": 18778 }, { "epoch": 0.5755486085570676, "grad_norm": 1.6458043419976494, "learning_rate": 4.02579579836469e-06, "loss": 0.6601, "step": 18779 }, { "epoch": 0.5755792570798087, "grad_norm": 1.9642507292801148, "learning_rate": 4.025308997321316e-06, "loss": 0.6147, "step": 18780 }, { "epoch": 0.57560990560255, "grad_norm": 1.6639832140944404, "learning_rate": 4.0248222058818206e-06, "loss": 0.6106, "step": 18781 }, { "epoch": 0.5756405541252911, "grad_norm": 1.7162774626176582, "learning_rate": 4.024335424051001e-06, "loss": 0.5519, "step": 18782 }, { "epoch": 0.5756712026480324, "grad_norm": 2.0304138897400645, "learning_rate": 4.023848651833655e-06, "loss": 0.6895, "step": 18783 }, { "epoch": 0.5757018511707735, "grad_norm": 2.0240378740826346, "learning_rate": 4.023361889234576e-06, "loss": 0.5485, "step": 18784 }, { "epoch": 0.5757324996935148, "grad_norm": 1.8006528397561425, "learning_rate": 4.022875136258564e-06, "loss": 0.5835, "step": 18785 }, { "epoch": 0.575763148216256, "grad_norm": 1.7657548579209716, "learning_rate": 4.022388392910413e-06, "loss": 0.6598, "step": 18786 }, { "epoch": 0.5757937967389972, "grad_norm": 1.744336304415083, "learning_rate": 4.021901659194919e-06, "loss": 0.6202, "step": 18787 }, { "epoch": 0.5758244452617384, "grad_norm": 1.938512717158902, "learning_rate": 4.02141493511688e-06, "loss": 0.698, "step": 18788 }, { "epoch": 0.5758550937844796, "grad_norm": 1.926026765789517, "learning_rate": 4.020928220681089e-06, "loss": 0.4908, "step": 18789 }, { "epoch": 0.5758857423072208, "grad_norm": 1.858311152211975, "learning_rate": 4.020441515892341e-06, "loss": 0.5935, "step": 18790 }, { "epoch": 0.575916390829962, "grad_norm": 2.0067330397502112, "learning_rate": 4.019954820755435e-06, "loss": 0.7382, "step": 18791 }, { "epoch": 0.5759470393527032, "grad_norm": 1.9005732015624506, "learning_rate": 4.019468135275164e-06, "loss": 0.5518, "step": 18792 }, { "epoch": 0.5759776878754445, "grad_norm": 1.908529439903129, "learning_rate": 4.018981459456325e-06, "loss": 0.6467, "step": 18793 }, { "epoch": 0.5760083363981856, "grad_norm": 1.7868835495999924, "learning_rate": 4.018494793303714e-06, "loss": 0.7088, "step": 18794 }, { "epoch": 0.5760389849209269, "grad_norm": 1.8134203033102543, "learning_rate": 4.018008136822122e-06, "loss": 0.6108, "step": 18795 }, { "epoch": 0.576069633443668, "grad_norm": 1.7862059004319037, "learning_rate": 4.0175214900163485e-06, "loss": 0.5909, "step": 18796 }, { "epoch": 0.5761002819664092, "grad_norm": 0.7506017518537921, "learning_rate": 4.017034852891189e-06, "loss": 0.4052, "step": 18797 }, { "epoch": 0.5761309304891504, "grad_norm": 1.9117699493480926, "learning_rate": 4.0165482254514325e-06, "loss": 0.6803, "step": 18798 }, { "epoch": 0.5761615790118916, "grad_norm": 2.22428111040531, "learning_rate": 4.0160616077018826e-06, "loss": 0.6788, "step": 18799 }, { "epoch": 0.5761922275346328, "grad_norm": 1.831250623647471, "learning_rate": 4.015574999647324e-06, "loss": 0.5643, "step": 18800 }, { "epoch": 0.576222876057374, "grad_norm": 1.614724546030611, "learning_rate": 4.0150884012925614e-06, "loss": 0.5678, "step": 18801 }, { "epoch": 0.5762535245801153, "grad_norm": 1.5548051948200137, "learning_rate": 4.014601812642384e-06, "loss": 0.628, "step": 18802 }, { "epoch": 0.5762841731028564, "grad_norm": 1.6913456982298878, "learning_rate": 4.0141152337015854e-06, "loss": 0.587, "step": 18803 }, { "epoch": 0.5763148216255977, "grad_norm": 1.7298713897036564, "learning_rate": 4.013628664474963e-06, "loss": 0.6554, "step": 18804 }, { "epoch": 0.5763454701483388, "grad_norm": 1.6408702410170206, "learning_rate": 4.01314210496731e-06, "loss": 0.6008, "step": 18805 }, { "epoch": 0.5763761186710801, "grad_norm": 1.8415176889363312, "learning_rate": 4.012655555183419e-06, "loss": 0.6207, "step": 18806 }, { "epoch": 0.5764067671938212, "grad_norm": 1.6881946945799429, "learning_rate": 4.012169015128086e-06, "loss": 0.6138, "step": 18807 }, { "epoch": 0.5764374157165625, "grad_norm": 1.6548358707230286, "learning_rate": 4.0116824848061065e-06, "loss": 0.5996, "step": 18808 }, { "epoch": 0.5764680642393036, "grad_norm": 1.6635133969882183, "learning_rate": 4.011195964222268e-06, "loss": 0.5054, "step": 18809 }, { "epoch": 0.5764987127620449, "grad_norm": 1.7226462558609414, "learning_rate": 4.010709453381373e-06, "loss": 0.5462, "step": 18810 }, { "epoch": 0.576529361284786, "grad_norm": 1.707283636991415, "learning_rate": 4.010222952288207e-06, "loss": 0.5481, "step": 18811 }, { "epoch": 0.5765600098075273, "grad_norm": 2.038061896093011, "learning_rate": 4.009736460947571e-06, "loss": 0.6471, "step": 18812 }, { "epoch": 0.5765906583302685, "grad_norm": 1.7452004714577414, "learning_rate": 4.009249979364254e-06, "loss": 0.5374, "step": 18813 }, { "epoch": 0.5766213068530097, "grad_norm": 1.7193234071964985, "learning_rate": 4.008763507543048e-06, "loss": 0.5899, "step": 18814 }, { "epoch": 0.5766519553757509, "grad_norm": 1.8686455101828245, "learning_rate": 4.0082770454887514e-06, "loss": 0.6112, "step": 18815 }, { "epoch": 0.5766826038984921, "grad_norm": 2.5717038872496736, "learning_rate": 4.007790593206154e-06, "loss": 0.6429, "step": 18816 }, { "epoch": 0.5767132524212333, "grad_norm": 1.8247478114906734, "learning_rate": 4.00730415070005e-06, "loss": 0.6853, "step": 18817 }, { "epoch": 0.5767439009439745, "grad_norm": 1.5935680865399757, "learning_rate": 4.006817717975232e-06, "loss": 0.6644, "step": 18818 }, { "epoch": 0.5767745494667157, "grad_norm": 1.863094555058133, "learning_rate": 4.0063312950364925e-06, "loss": 0.6506, "step": 18819 }, { "epoch": 0.576805197989457, "grad_norm": 1.6753770166934348, "learning_rate": 4.005844881888626e-06, "loss": 0.6238, "step": 18820 }, { "epoch": 0.5768358465121981, "grad_norm": 1.626497222418724, "learning_rate": 4.005358478536425e-06, "loss": 0.5629, "step": 18821 }, { "epoch": 0.5768664950349394, "grad_norm": 1.826012440472572, "learning_rate": 4.004872084984679e-06, "loss": 0.6147, "step": 18822 }, { "epoch": 0.5768971435576805, "grad_norm": 1.723789799331234, "learning_rate": 4.0043857012381855e-06, "loss": 0.6067, "step": 18823 }, { "epoch": 0.5769277920804218, "grad_norm": 1.7058360509704875, "learning_rate": 4.003899327301733e-06, "loss": 0.6887, "step": 18824 }, { "epoch": 0.5769584406031629, "grad_norm": 1.754389458807407, "learning_rate": 4.003412963180115e-06, "loss": 0.599, "step": 18825 }, { "epoch": 0.5769890891259042, "grad_norm": 1.6704626756797678, "learning_rate": 4.002926608878125e-06, "loss": 0.5116, "step": 18826 }, { "epoch": 0.5770197376486453, "grad_norm": 1.8724305655084976, "learning_rate": 4.002440264400553e-06, "loss": 0.6719, "step": 18827 }, { "epoch": 0.5770503861713865, "grad_norm": 1.9402414587902919, "learning_rate": 4.001953929752193e-06, "loss": 0.6608, "step": 18828 }, { "epoch": 0.5770810346941277, "grad_norm": 0.8118793650515453, "learning_rate": 4.001467604937837e-06, "loss": 0.4166, "step": 18829 }, { "epoch": 0.5771116832168689, "grad_norm": 1.6498627345178734, "learning_rate": 4.000981289962275e-06, "loss": 0.5923, "step": 18830 }, { "epoch": 0.5771423317396102, "grad_norm": 1.7718235283609307, "learning_rate": 4.000494984830301e-06, "loss": 0.6244, "step": 18831 }, { "epoch": 0.5771729802623513, "grad_norm": 1.503583252731151, "learning_rate": 4.000008689546707e-06, "loss": 0.6064, "step": 18832 }, { "epoch": 0.5772036287850926, "grad_norm": 1.7910299731948298, "learning_rate": 3.99952240411628e-06, "loss": 0.6763, "step": 18833 }, { "epoch": 0.5772342773078337, "grad_norm": 1.7890077840071472, "learning_rate": 3.999036128543817e-06, "loss": 0.687, "step": 18834 }, { "epoch": 0.577264925830575, "grad_norm": 1.8138604410574317, "learning_rate": 3.998549862834106e-06, "loss": 0.6705, "step": 18835 }, { "epoch": 0.5772955743533161, "grad_norm": 1.5088078262215032, "learning_rate": 3.998063606991939e-06, "loss": 0.5461, "step": 18836 }, { "epoch": 0.5773262228760574, "grad_norm": 1.8097723071655516, "learning_rate": 3.997577361022109e-06, "loss": 0.6104, "step": 18837 }, { "epoch": 0.5773568713987985, "grad_norm": 1.7884042007503766, "learning_rate": 3.997091124929404e-06, "loss": 0.6262, "step": 18838 }, { "epoch": 0.5773875199215398, "grad_norm": 1.66992991684294, "learning_rate": 3.996604898718618e-06, "loss": 0.6604, "step": 18839 }, { "epoch": 0.577418168444281, "grad_norm": 1.6110986127004108, "learning_rate": 3.996118682394542e-06, "loss": 0.5528, "step": 18840 }, { "epoch": 0.5774488169670222, "grad_norm": 1.681586078645647, "learning_rate": 3.995632475961962e-06, "loss": 0.5821, "step": 18841 }, { "epoch": 0.5774794654897634, "grad_norm": 1.8359156453868568, "learning_rate": 3.995146279425676e-06, "loss": 0.6672, "step": 18842 }, { "epoch": 0.5775101140125046, "grad_norm": 1.8895487247597575, "learning_rate": 3.9946600927904695e-06, "loss": 0.7191, "step": 18843 }, { "epoch": 0.5775407625352458, "grad_norm": 1.6965280514636176, "learning_rate": 3.994173916061133e-06, "loss": 0.6819, "step": 18844 }, { "epoch": 0.577571411057987, "grad_norm": 1.6507067122827022, "learning_rate": 3.993687749242459e-06, "loss": 0.64, "step": 18845 }, { "epoch": 0.5776020595807282, "grad_norm": 1.7351365952598463, "learning_rate": 3.993201592339237e-06, "loss": 0.6322, "step": 18846 }, { "epoch": 0.5776327081034694, "grad_norm": 1.7650906851710664, "learning_rate": 3.992715445356258e-06, "loss": 0.6278, "step": 18847 }, { "epoch": 0.5776633566262106, "grad_norm": 1.9237215912710173, "learning_rate": 3.992229308298311e-06, "loss": 0.5953, "step": 18848 }, { "epoch": 0.5776940051489519, "grad_norm": 1.7092703038003898, "learning_rate": 3.991743181170186e-06, "loss": 0.6331, "step": 18849 }, { "epoch": 0.577724653671693, "grad_norm": 2.104430457021574, "learning_rate": 3.991257063976673e-06, "loss": 0.6567, "step": 18850 }, { "epoch": 0.5777553021944343, "grad_norm": 1.6883585614620156, "learning_rate": 3.990770956722565e-06, "loss": 0.529, "step": 18851 }, { "epoch": 0.5777859507171754, "grad_norm": 1.7531566251966921, "learning_rate": 3.990284859412646e-06, "loss": 0.6282, "step": 18852 }, { "epoch": 0.5778165992399167, "grad_norm": 1.7358590795945619, "learning_rate": 3.989798772051711e-06, "loss": 0.6198, "step": 18853 }, { "epoch": 0.5778472477626578, "grad_norm": 1.523766674195304, "learning_rate": 3.9893126946445435e-06, "loss": 0.5816, "step": 18854 }, { "epoch": 0.5778778962853991, "grad_norm": 0.8588223162208546, "learning_rate": 3.98882662719594e-06, "loss": 0.4466, "step": 18855 }, { "epoch": 0.5779085448081402, "grad_norm": 1.978976516697107, "learning_rate": 3.988340569710686e-06, "loss": 0.7348, "step": 18856 }, { "epoch": 0.5779391933308815, "grad_norm": 1.7450573137124346, "learning_rate": 3.98785452219357e-06, "loss": 0.6239, "step": 18857 }, { "epoch": 0.5779698418536227, "grad_norm": 1.82460750607765, "learning_rate": 3.9873684846493835e-06, "loss": 0.541, "step": 18858 }, { "epoch": 0.5780004903763638, "grad_norm": 1.861186087931137, "learning_rate": 3.986882457082914e-06, "loss": 0.6032, "step": 18859 }, { "epoch": 0.5780311388991051, "grad_norm": 1.5671644231177526, "learning_rate": 3.98639643949895e-06, "loss": 0.5927, "step": 18860 }, { "epoch": 0.5780617874218462, "grad_norm": 2.168612657449596, "learning_rate": 3.985910431902282e-06, "loss": 0.5801, "step": 18861 }, { "epoch": 0.5780924359445875, "grad_norm": 0.7861206352383585, "learning_rate": 3.985424434297699e-06, "loss": 0.3928, "step": 18862 }, { "epoch": 0.5781230844673286, "grad_norm": 1.8660160058221575, "learning_rate": 3.984938446689987e-06, "loss": 0.5626, "step": 18863 }, { "epoch": 0.5781537329900699, "grad_norm": 2.066246335215465, "learning_rate": 3.9844524690839376e-06, "loss": 0.6742, "step": 18864 }, { "epoch": 0.578184381512811, "grad_norm": 1.597497419857301, "learning_rate": 3.983966501484336e-06, "loss": 0.6327, "step": 18865 }, { "epoch": 0.5782150300355523, "grad_norm": 1.7757359138480784, "learning_rate": 3.983480543895974e-06, "loss": 0.6239, "step": 18866 }, { "epoch": 0.5782456785582935, "grad_norm": 1.69086767862164, "learning_rate": 3.982994596323638e-06, "loss": 0.5705, "step": 18867 }, { "epoch": 0.5782763270810347, "grad_norm": 1.9365794261065634, "learning_rate": 3.982508658772116e-06, "loss": 0.6799, "step": 18868 }, { "epoch": 0.5783069756037759, "grad_norm": 1.7964754393901103, "learning_rate": 3.982022731246197e-06, "loss": 0.5969, "step": 18869 }, { "epoch": 0.5783376241265171, "grad_norm": 1.9462620461075495, "learning_rate": 3.981536813750668e-06, "loss": 0.7289, "step": 18870 }, { "epoch": 0.5783682726492583, "grad_norm": 0.7765631044105008, "learning_rate": 3.981050906290317e-06, "loss": 0.4487, "step": 18871 }, { "epoch": 0.5783989211719995, "grad_norm": 1.6333190079465705, "learning_rate": 3.980565008869933e-06, "loss": 0.6476, "step": 18872 }, { "epoch": 0.5784295696947407, "grad_norm": 1.671719752634486, "learning_rate": 3.9800791214943015e-06, "loss": 0.534, "step": 18873 }, { "epoch": 0.578460218217482, "grad_norm": 1.89647043579649, "learning_rate": 3.979593244168214e-06, "loss": 0.6003, "step": 18874 }, { "epoch": 0.5784908667402231, "grad_norm": 1.709818679140625, "learning_rate": 3.979107376896454e-06, "loss": 0.6732, "step": 18875 }, { "epoch": 0.5785215152629644, "grad_norm": 1.832021172193617, "learning_rate": 3.978621519683808e-06, "loss": 0.5495, "step": 18876 }, { "epoch": 0.5785521637857055, "grad_norm": 1.6312292175190104, "learning_rate": 3.97813567253507e-06, "loss": 0.5674, "step": 18877 }, { "epoch": 0.5785828123084468, "grad_norm": 1.6846159321521135, "learning_rate": 3.97764983545502e-06, "loss": 0.5808, "step": 18878 }, { "epoch": 0.5786134608311879, "grad_norm": 1.6274705613228861, "learning_rate": 3.977164008448447e-06, "loss": 0.5952, "step": 18879 }, { "epoch": 0.5786441093539292, "grad_norm": 1.549273642572994, "learning_rate": 3.976678191520141e-06, "loss": 0.6127, "step": 18880 }, { "epoch": 0.5786747578766703, "grad_norm": 1.7278249059770732, "learning_rate": 3.976192384674884e-06, "loss": 0.5924, "step": 18881 }, { "epoch": 0.5787054063994116, "grad_norm": 1.7711056986323643, "learning_rate": 3.9757065879174665e-06, "loss": 0.7084, "step": 18882 }, { "epoch": 0.5787360549221527, "grad_norm": 1.5230219464319181, "learning_rate": 3.975220801252674e-06, "loss": 0.5534, "step": 18883 }, { "epoch": 0.578766703444894, "grad_norm": 1.7532240799666567, "learning_rate": 3.974735024685293e-06, "loss": 0.7277, "step": 18884 }, { "epoch": 0.5787973519676352, "grad_norm": 1.814303048979287, "learning_rate": 3.974249258220112e-06, "loss": 0.6046, "step": 18885 }, { "epoch": 0.5788280004903764, "grad_norm": 1.7130448615705853, "learning_rate": 3.973763501861914e-06, "loss": 0.5473, "step": 18886 }, { "epoch": 0.5788586490131176, "grad_norm": 1.9178930600722335, "learning_rate": 3.973277755615486e-06, "loss": 0.5864, "step": 18887 }, { "epoch": 0.5788892975358588, "grad_norm": 1.756196373356356, "learning_rate": 3.972792019485616e-06, "loss": 0.5726, "step": 18888 }, { "epoch": 0.5789199460586, "grad_norm": 1.7857119890954702, "learning_rate": 3.9723062934770895e-06, "loss": 0.5216, "step": 18889 }, { "epoch": 0.5789505945813411, "grad_norm": 1.9364107395076746, "learning_rate": 3.97182057759469e-06, "loss": 0.6251, "step": 18890 }, { "epoch": 0.5789812431040824, "grad_norm": 1.6829742582751677, "learning_rate": 3.971334871843207e-06, "loss": 0.6935, "step": 18891 }, { "epoch": 0.5790118916268235, "grad_norm": 1.9221703675047463, "learning_rate": 3.970849176227424e-06, "loss": 0.6706, "step": 18892 }, { "epoch": 0.5790425401495648, "grad_norm": 1.9098125261278227, "learning_rate": 3.9703634907521285e-06, "loss": 0.6693, "step": 18893 }, { "epoch": 0.579073188672306, "grad_norm": 1.7750040151755104, "learning_rate": 3.969877815422106e-06, "loss": 0.6533, "step": 18894 }, { "epoch": 0.5791038371950472, "grad_norm": 1.9093406480955073, "learning_rate": 3.969392150242136e-06, "loss": 0.6081, "step": 18895 }, { "epoch": 0.5791344857177884, "grad_norm": 1.712471603843466, "learning_rate": 3.968906495217014e-06, "loss": 0.6249, "step": 18896 }, { "epoch": 0.5791651342405296, "grad_norm": 1.5864554960967796, "learning_rate": 3.968420850351519e-06, "loss": 0.6333, "step": 18897 }, { "epoch": 0.5791957827632708, "grad_norm": 0.7966617739676499, "learning_rate": 3.967935215650436e-06, "loss": 0.4107, "step": 18898 }, { "epoch": 0.579226431286012, "grad_norm": 0.7948834689095741, "learning_rate": 3.967449591118552e-06, "loss": 0.4324, "step": 18899 }, { "epoch": 0.5792570798087532, "grad_norm": 1.7839555100794902, "learning_rate": 3.966963976760651e-06, "loss": 0.6108, "step": 18900 }, { "epoch": 0.5792877283314944, "grad_norm": 1.824351835644095, "learning_rate": 3.966478372581518e-06, "loss": 0.5777, "step": 18901 }, { "epoch": 0.5793183768542356, "grad_norm": 1.6951830624341737, "learning_rate": 3.965992778585939e-06, "loss": 0.6029, "step": 18902 }, { "epoch": 0.5793490253769769, "grad_norm": 1.829333066310494, "learning_rate": 3.965507194778697e-06, "loss": 0.6547, "step": 18903 }, { "epoch": 0.579379673899718, "grad_norm": 1.4148570494587638, "learning_rate": 3.965021621164577e-06, "loss": 0.5545, "step": 18904 }, { "epoch": 0.5794103224224593, "grad_norm": 1.5533061642102701, "learning_rate": 3.964536057748366e-06, "loss": 0.5486, "step": 18905 }, { "epoch": 0.5794409709452004, "grad_norm": 1.6479168279452092, "learning_rate": 3.964050504534844e-06, "loss": 0.5253, "step": 18906 }, { "epoch": 0.5794716194679417, "grad_norm": 1.7071731270405726, "learning_rate": 3.963564961528798e-06, "loss": 0.5753, "step": 18907 }, { "epoch": 0.5795022679906828, "grad_norm": 1.6923949672333296, "learning_rate": 3.9630794287350126e-06, "loss": 0.6171, "step": 18908 }, { "epoch": 0.5795329165134241, "grad_norm": 1.491922534649056, "learning_rate": 3.962593906158269e-06, "loss": 0.5758, "step": 18909 }, { "epoch": 0.5795635650361652, "grad_norm": 1.7405652207790416, "learning_rate": 3.962108393803354e-06, "loss": 0.6504, "step": 18910 }, { "epoch": 0.5795942135589065, "grad_norm": 1.6488155415742385, "learning_rate": 3.96162289167505e-06, "loss": 0.5667, "step": 18911 }, { "epoch": 0.5796248620816477, "grad_norm": 1.7993934709429706, "learning_rate": 3.961137399778142e-06, "loss": 0.5666, "step": 18912 }, { "epoch": 0.5796555106043889, "grad_norm": 1.6354525310688761, "learning_rate": 3.960651918117413e-06, "loss": 0.7352, "step": 18913 }, { "epoch": 0.5796861591271301, "grad_norm": 1.7510168881817234, "learning_rate": 3.960166446697645e-06, "loss": 0.6283, "step": 18914 }, { "epoch": 0.5797168076498713, "grad_norm": 1.8861553120080479, "learning_rate": 3.959680985523625e-06, "loss": 0.5966, "step": 18915 }, { "epoch": 0.5797474561726125, "grad_norm": 1.821716876216917, "learning_rate": 3.959195534600136e-06, "loss": 0.6673, "step": 18916 }, { "epoch": 0.5797781046953537, "grad_norm": 1.7661404972048478, "learning_rate": 3.958710093931956e-06, "loss": 0.7014, "step": 18917 }, { "epoch": 0.5798087532180949, "grad_norm": 1.8021924343413203, "learning_rate": 3.9582246635238745e-06, "loss": 0.6557, "step": 18918 }, { "epoch": 0.5798394017408361, "grad_norm": 1.9903088641900617, "learning_rate": 3.957739243380669e-06, "loss": 0.6835, "step": 18919 }, { "epoch": 0.5798700502635773, "grad_norm": 1.8509626663336216, "learning_rate": 3.957253833507129e-06, "loss": 0.6396, "step": 18920 }, { "epoch": 0.5799006987863184, "grad_norm": 2.1411508186682555, "learning_rate": 3.956768433908031e-06, "loss": 0.7176, "step": 18921 }, { "epoch": 0.5799313473090597, "grad_norm": 1.6101372409809136, "learning_rate": 3.9562830445881615e-06, "loss": 0.6361, "step": 18922 }, { "epoch": 0.5799619958318009, "grad_norm": 1.6943193102028438, "learning_rate": 3.9557976655523025e-06, "loss": 0.6199, "step": 18923 }, { "epoch": 0.5799926443545421, "grad_norm": 1.7423047906985119, "learning_rate": 3.955312296805237e-06, "loss": 0.6132, "step": 18924 }, { "epoch": 0.5800232928772833, "grad_norm": 1.8318771276553298, "learning_rate": 3.954826938351745e-06, "loss": 0.5536, "step": 18925 }, { "epoch": 0.5800539414000245, "grad_norm": 1.6038515912091178, "learning_rate": 3.9543415901966115e-06, "loss": 0.6306, "step": 18926 }, { "epoch": 0.5800845899227657, "grad_norm": 1.8578342056113053, "learning_rate": 3.953856252344617e-06, "loss": 0.6762, "step": 18927 }, { "epoch": 0.5801152384455069, "grad_norm": 1.8483329862136157, "learning_rate": 3.953370924800546e-06, "loss": 0.6801, "step": 18928 }, { "epoch": 0.5801458869682481, "grad_norm": 1.6686383767990085, "learning_rate": 3.952885607569179e-06, "loss": 0.6678, "step": 18929 }, { "epoch": 0.5801765354909894, "grad_norm": 1.7656261804779392, "learning_rate": 3.952400300655297e-06, "loss": 0.6875, "step": 18930 }, { "epoch": 0.5802071840137305, "grad_norm": 1.5909969105549584, "learning_rate": 3.951915004063683e-06, "loss": 0.5331, "step": 18931 }, { "epoch": 0.5802378325364718, "grad_norm": 1.5893098571882533, "learning_rate": 3.95142971779912e-06, "loss": 0.5348, "step": 18932 }, { "epoch": 0.5802684810592129, "grad_norm": 1.7220476894821328, "learning_rate": 3.950944441866386e-06, "loss": 0.6324, "step": 18933 }, { "epoch": 0.5802991295819542, "grad_norm": 1.8199679306624506, "learning_rate": 3.950459176270267e-06, "loss": 0.5975, "step": 18934 }, { "epoch": 0.5803297781046953, "grad_norm": 1.7550451596493255, "learning_rate": 3.9499739210155405e-06, "loss": 0.5862, "step": 18935 }, { "epoch": 0.5803604266274366, "grad_norm": 1.683519084411721, "learning_rate": 3.94948867610699e-06, "loss": 0.6315, "step": 18936 }, { "epoch": 0.5803910751501777, "grad_norm": 1.6571037683640675, "learning_rate": 3.949003441549398e-06, "loss": 0.531, "step": 18937 }, { "epoch": 0.580421723672919, "grad_norm": 1.7217206362050412, "learning_rate": 3.948518217347541e-06, "loss": 0.6289, "step": 18938 }, { "epoch": 0.5804523721956601, "grad_norm": 1.6816871757867495, "learning_rate": 3.948033003506206e-06, "loss": 0.6215, "step": 18939 }, { "epoch": 0.5804830207184014, "grad_norm": 1.7926007341689418, "learning_rate": 3.94754780003017e-06, "loss": 0.697, "step": 18940 }, { "epoch": 0.5805136692411426, "grad_norm": 1.7423379082724322, "learning_rate": 3.9470626069242145e-06, "loss": 0.6857, "step": 18941 }, { "epoch": 0.5805443177638838, "grad_norm": 1.7164636768227348, "learning_rate": 3.946577424193121e-06, "loss": 0.6832, "step": 18942 }, { "epoch": 0.580574966286625, "grad_norm": 2.0339340560483463, "learning_rate": 3.94609225184167e-06, "loss": 0.6545, "step": 18943 }, { "epoch": 0.5806056148093662, "grad_norm": 2.08725966643522, "learning_rate": 3.945607089874639e-06, "loss": 0.6896, "step": 18944 }, { "epoch": 0.5806362633321074, "grad_norm": 1.6059571277138458, "learning_rate": 3.945121938296814e-06, "loss": 0.56, "step": 18945 }, { "epoch": 0.5806669118548486, "grad_norm": 1.8228131711728284, "learning_rate": 3.94463679711297e-06, "loss": 0.7179, "step": 18946 }, { "epoch": 0.5806975603775898, "grad_norm": 1.7407176349735027, "learning_rate": 3.9441516663278925e-06, "loss": 0.6896, "step": 18947 }, { "epoch": 0.580728208900331, "grad_norm": 1.6064493269984337, "learning_rate": 3.943666545946359e-06, "loss": 0.5515, "step": 18948 }, { "epoch": 0.5807588574230722, "grad_norm": 1.714959072568832, "learning_rate": 3.9431814359731455e-06, "loss": 0.5866, "step": 18949 }, { "epoch": 0.5807895059458135, "grad_norm": 1.8612277439558915, "learning_rate": 3.942696336413039e-06, "loss": 0.5918, "step": 18950 }, { "epoch": 0.5808201544685546, "grad_norm": 1.618283113714242, "learning_rate": 3.942211247270816e-06, "loss": 0.628, "step": 18951 }, { "epoch": 0.5808508029912958, "grad_norm": 0.9296375168979066, "learning_rate": 3.941726168551254e-06, "loss": 0.4395, "step": 18952 }, { "epoch": 0.580881451514037, "grad_norm": 1.7990686166190917, "learning_rate": 3.941241100259136e-06, "loss": 0.6429, "step": 18953 }, { "epoch": 0.5809121000367782, "grad_norm": 1.7817424002485185, "learning_rate": 3.9407560423992405e-06, "loss": 0.5909, "step": 18954 }, { "epoch": 0.5809427485595194, "grad_norm": 2.051169567877565, "learning_rate": 3.940270994976347e-06, "loss": 0.6747, "step": 18955 }, { "epoch": 0.5809733970822606, "grad_norm": 1.712565279869514, "learning_rate": 3.939785957995234e-06, "loss": 0.6054, "step": 18956 }, { "epoch": 0.5810040456050019, "grad_norm": 1.8112524515372512, "learning_rate": 3.9393009314606815e-06, "loss": 0.7009, "step": 18957 }, { "epoch": 0.581034694127743, "grad_norm": 1.7676011805528935, "learning_rate": 3.938815915377468e-06, "loss": 0.666, "step": 18958 }, { "epoch": 0.5810653426504843, "grad_norm": 1.8279322846900108, "learning_rate": 3.938330909750374e-06, "loss": 0.617, "step": 18959 }, { "epoch": 0.5810959911732254, "grad_norm": 0.7857405557853585, "learning_rate": 3.937845914584175e-06, "loss": 0.4322, "step": 18960 }, { "epoch": 0.5811266396959667, "grad_norm": 0.8201070028413023, "learning_rate": 3.937360929883654e-06, "loss": 0.4347, "step": 18961 }, { "epoch": 0.5811572882187078, "grad_norm": 2.0785707160066673, "learning_rate": 3.936875955653587e-06, "loss": 0.5944, "step": 18962 }, { "epoch": 0.5811879367414491, "grad_norm": 0.7945370341150008, "learning_rate": 3.936390991898752e-06, "loss": 0.4396, "step": 18963 }, { "epoch": 0.5812185852641902, "grad_norm": 1.8262066902551133, "learning_rate": 3.93590603862393e-06, "loss": 0.7126, "step": 18964 }, { "epoch": 0.5812492337869315, "grad_norm": 1.6752441368246713, "learning_rate": 3.935421095833898e-06, "loss": 0.5776, "step": 18965 }, { "epoch": 0.5812798823096726, "grad_norm": 1.6386393377652047, "learning_rate": 3.934936163533434e-06, "loss": 0.683, "step": 18966 }, { "epoch": 0.5813105308324139, "grad_norm": 0.794950500586596, "learning_rate": 3.9344512417273165e-06, "loss": 0.4493, "step": 18967 }, { "epoch": 0.5813411793551551, "grad_norm": 0.7534718819887313, "learning_rate": 3.9339663304203236e-06, "loss": 0.4041, "step": 18968 }, { "epoch": 0.5813718278778963, "grad_norm": 1.907941995819433, "learning_rate": 3.933481429617233e-06, "loss": 0.6273, "step": 18969 }, { "epoch": 0.5814024764006375, "grad_norm": 1.8078618287458867, "learning_rate": 3.932996539322825e-06, "loss": 0.6509, "step": 18970 }, { "epoch": 0.5814331249233787, "grad_norm": 1.7710398188428047, "learning_rate": 3.932511659541871e-06, "loss": 0.5655, "step": 18971 }, { "epoch": 0.5814637734461199, "grad_norm": 1.8163495947917998, "learning_rate": 3.9320267902791564e-06, "loss": 0.6023, "step": 18972 }, { "epoch": 0.5814944219688611, "grad_norm": 1.7424848751571191, "learning_rate": 3.9315419315394525e-06, "loss": 0.5815, "step": 18973 }, { "epoch": 0.5815250704916023, "grad_norm": 1.8328089253884483, "learning_rate": 3.931057083327541e-06, "loss": 0.687, "step": 18974 }, { "epoch": 0.5815557190143436, "grad_norm": 1.8816598796056114, "learning_rate": 3.930572245648197e-06, "loss": 0.5768, "step": 18975 }, { "epoch": 0.5815863675370847, "grad_norm": 1.6417642021728935, "learning_rate": 3.930087418506198e-06, "loss": 0.5329, "step": 18976 }, { "epoch": 0.581617016059826, "grad_norm": 1.836011381045137, "learning_rate": 3.929602601906322e-06, "loss": 0.6696, "step": 18977 }, { "epoch": 0.5816476645825671, "grad_norm": 1.7237855257339252, "learning_rate": 3.929117795853345e-06, "loss": 0.6409, "step": 18978 }, { "epoch": 0.5816783131053084, "grad_norm": 1.7592285067940048, "learning_rate": 3.928633000352043e-06, "loss": 0.6253, "step": 18979 }, { "epoch": 0.5817089616280495, "grad_norm": 1.7756387977951695, "learning_rate": 3.928148215407197e-06, "loss": 0.6185, "step": 18980 }, { "epoch": 0.5817396101507908, "grad_norm": 1.9670500997492555, "learning_rate": 3.927663441023578e-06, "loss": 0.694, "step": 18981 }, { "epoch": 0.5817702586735319, "grad_norm": 1.8649931855812394, "learning_rate": 3.927178677205969e-06, "loss": 0.5483, "step": 18982 }, { "epoch": 0.5818009071962731, "grad_norm": 1.9448310504913866, "learning_rate": 3.92669392395914e-06, "loss": 0.6272, "step": 18983 }, { "epoch": 0.5818315557190143, "grad_norm": 2.3609351070907945, "learning_rate": 3.926209181287871e-06, "loss": 0.6978, "step": 18984 }, { "epoch": 0.5818622042417555, "grad_norm": 1.6097655470584809, "learning_rate": 3.925724449196938e-06, "loss": 0.7157, "step": 18985 }, { "epoch": 0.5818928527644968, "grad_norm": 1.4736734744857567, "learning_rate": 3.925239727691118e-06, "loss": 0.6238, "step": 18986 }, { "epoch": 0.5819235012872379, "grad_norm": 1.8059927523732147, "learning_rate": 3.924755016775184e-06, "loss": 0.7346, "step": 18987 }, { "epoch": 0.5819541498099792, "grad_norm": 1.7659653627652325, "learning_rate": 3.924270316453915e-06, "loss": 0.6443, "step": 18988 }, { "epoch": 0.5819847983327203, "grad_norm": 1.7854434259547105, "learning_rate": 3.923785626732087e-06, "loss": 0.665, "step": 18989 }, { "epoch": 0.5820154468554616, "grad_norm": 1.8207192080921868, "learning_rate": 3.923300947614471e-06, "loss": 0.6332, "step": 18990 }, { "epoch": 0.5820460953782027, "grad_norm": 2.0825756933882897, "learning_rate": 3.92281627910585e-06, "loss": 0.6218, "step": 18991 }, { "epoch": 0.582076743900944, "grad_norm": 0.8988885712481619, "learning_rate": 3.922331621210992e-06, "loss": 0.4269, "step": 18992 }, { "epoch": 0.5821073924236851, "grad_norm": 1.537076650842733, "learning_rate": 3.92184697393468e-06, "loss": 0.5427, "step": 18993 }, { "epoch": 0.5821380409464264, "grad_norm": 1.626133623380086, "learning_rate": 3.9213623372816845e-06, "loss": 0.6718, "step": 18994 }, { "epoch": 0.5821686894691676, "grad_norm": 1.825522415757323, "learning_rate": 3.920877711256781e-06, "loss": 0.6508, "step": 18995 }, { "epoch": 0.5821993379919088, "grad_norm": 1.6798020491439924, "learning_rate": 3.920393095864746e-06, "loss": 0.6266, "step": 18996 }, { "epoch": 0.58222998651465, "grad_norm": 1.6960636082625118, "learning_rate": 3.919908491110354e-06, "loss": 0.6863, "step": 18997 }, { "epoch": 0.5822606350373912, "grad_norm": 1.6693298606472757, "learning_rate": 3.9194238969983795e-06, "loss": 0.5614, "step": 18998 }, { "epoch": 0.5822912835601324, "grad_norm": 1.659123067181568, "learning_rate": 3.918939313533598e-06, "loss": 0.6417, "step": 18999 }, { "epoch": 0.5823219320828736, "grad_norm": 1.7791612635294491, "learning_rate": 3.918454740720784e-06, "loss": 0.7293, "step": 19000 }, { "epoch": 0.5823525806056148, "grad_norm": 1.8457408233513295, "learning_rate": 3.917970178564713e-06, "loss": 0.5991, "step": 19001 }, { "epoch": 0.582383229128356, "grad_norm": 1.6043414141971357, "learning_rate": 3.91748562707016e-06, "loss": 0.665, "step": 19002 }, { "epoch": 0.5824138776510972, "grad_norm": 1.7834367702367142, "learning_rate": 3.917001086241895e-06, "loss": 0.6219, "step": 19003 }, { "epoch": 0.5824445261738385, "grad_norm": 1.8269862528275989, "learning_rate": 3.916516556084697e-06, "loss": 0.633, "step": 19004 }, { "epoch": 0.5824751746965796, "grad_norm": 1.7511560768928653, "learning_rate": 3.916032036603339e-06, "loss": 0.6169, "step": 19005 }, { "epoch": 0.5825058232193209, "grad_norm": 1.7527580383357315, "learning_rate": 3.9155475278025935e-06, "loss": 0.6366, "step": 19006 }, { "epoch": 0.582536471742062, "grad_norm": 0.8095203370818067, "learning_rate": 3.915063029687236e-06, "loss": 0.4221, "step": 19007 }, { "epoch": 0.5825671202648033, "grad_norm": 1.7187474235136098, "learning_rate": 3.91457854226204e-06, "loss": 0.6643, "step": 19008 }, { "epoch": 0.5825977687875444, "grad_norm": 0.9935878996378328, "learning_rate": 3.9140940655317795e-06, "loss": 0.4207, "step": 19009 }, { "epoch": 0.5826284173102857, "grad_norm": 1.854675784116305, "learning_rate": 3.913609599501228e-06, "loss": 0.668, "step": 19010 }, { "epoch": 0.5826590658330268, "grad_norm": 1.749701308996094, "learning_rate": 3.913125144175159e-06, "loss": 0.6829, "step": 19011 }, { "epoch": 0.5826897143557681, "grad_norm": 1.9476589580454977, "learning_rate": 3.912640699558346e-06, "loss": 0.6308, "step": 19012 }, { "epoch": 0.5827203628785093, "grad_norm": 1.8732180983831437, "learning_rate": 3.912156265655564e-06, "loss": 0.7338, "step": 19013 }, { "epoch": 0.5827510114012504, "grad_norm": 0.8211163453143457, "learning_rate": 3.9116718424715825e-06, "loss": 0.4486, "step": 19014 }, { "epoch": 0.5827816599239917, "grad_norm": 1.7576559109235022, "learning_rate": 3.9111874300111786e-06, "loss": 0.5993, "step": 19015 }, { "epoch": 0.5828123084467328, "grad_norm": 2.0920108185626773, "learning_rate": 3.910703028279123e-06, "loss": 0.7087, "step": 19016 }, { "epoch": 0.5828429569694741, "grad_norm": 1.83688019991271, "learning_rate": 3.9102186372801875e-06, "loss": 0.6169, "step": 19017 }, { "epoch": 0.5828736054922152, "grad_norm": 1.7059458707438926, "learning_rate": 3.909734257019148e-06, "loss": 0.5438, "step": 19018 }, { "epoch": 0.5829042540149565, "grad_norm": 1.8060720898906983, "learning_rate": 3.909249887500775e-06, "loss": 0.6263, "step": 19019 }, { "epoch": 0.5829349025376976, "grad_norm": 1.7525056618569008, "learning_rate": 3.9087655287298435e-06, "loss": 0.6524, "step": 19020 }, { "epoch": 0.5829655510604389, "grad_norm": 1.6773799180300824, "learning_rate": 3.908281180711123e-06, "loss": 0.5429, "step": 19021 }, { "epoch": 0.58299619958318, "grad_norm": 1.8213896938169312, "learning_rate": 3.907796843449387e-06, "loss": 0.6842, "step": 19022 }, { "epoch": 0.5830268481059213, "grad_norm": 1.8541847269601517, "learning_rate": 3.9073125169494095e-06, "loss": 0.6462, "step": 19023 }, { "epoch": 0.5830574966286625, "grad_norm": 0.8481856433855895, "learning_rate": 3.906828201215963e-06, "loss": 0.4649, "step": 19024 }, { "epoch": 0.5830881451514037, "grad_norm": 1.639288674910339, "learning_rate": 3.9063438962538145e-06, "loss": 0.5977, "step": 19025 }, { "epoch": 0.5831187936741449, "grad_norm": 0.7721991095750141, "learning_rate": 3.9058596020677406e-06, "loss": 0.4249, "step": 19026 }, { "epoch": 0.5831494421968861, "grad_norm": 1.5664261737336251, "learning_rate": 3.9053753186625114e-06, "loss": 0.5733, "step": 19027 }, { "epoch": 0.5831800907196273, "grad_norm": 1.9018485515298784, "learning_rate": 3.9048910460429e-06, "loss": 0.6742, "step": 19028 }, { "epoch": 0.5832107392423685, "grad_norm": 1.811478313478255, "learning_rate": 3.904406784213678e-06, "loss": 0.549, "step": 19029 }, { "epoch": 0.5832413877651097, "grad_norm": 0.7900645692208692, "learning_rate": 3.9039225331796145e-06, "loss": 0.4345, "step": 19030 }, { "epoch": 0.583272036287851, "grad_norm": 1.4624915784707855, "learning_rate": 3.903438292945485e-06, "loss": 0.5723, "step": 19031 }, { "epoch": 0.5833026848105921, "grad_norm": 1.8157839565385732, "learning_rate": 3.902954063516058e-06, "loss": 0.6238, "step": 19032 }, { "epoch": 0.5833333333333334, "grad_norm": 1.4289217114135895, "learning_rate": 3.902469844896103e-06, "loss": 0.6881, "step": 19033 }, { "epoch": 0.5833639818560745, "grad_norm": 1.9218828495130387, "learning_rate": 3.901985637090397e-06, "loss": 0.6545, "step": 19034 }, { "epoch": 0.5833946303788158, "grad_norm": 1.7958567012438478, "learning_rate": 3.901501440103706e-06, "loss": 0.6204, "step": 19035 }, { "epoch": 0.5834252789015569, "grad_norm": 1.6598499113064569, "learning_rate": 3.9010172539408006e-06, "loss": 0.5682, "step": 19036 }, { "epoch": 0.5834559274242982, "grad_norm": 2.0154696544986606, "learning_rate": 3.9005330786064545e-06, "loss": 0.6036, "step": 19037 }, { "epoch": 0.5834865759470393, "grad_norm": 0.8015038788601558, "learning_rate": 3.900048914105436e-06, "loss": 0.4269, "step": 19038 }, { "epoch": 0.5835172244697806, "grad_norm": 1.6467464945672057, "learning_rate": 3.89956476044252e-06, "loss": 0.5428, "step": 19039 }, { "epoch": 0.5835478729925218, "grad_norm": 1.6100500019131039, "learning_rate": 3.899080617622472e-06, "loss": 0.6165, "step": 19040 }, { "epoch": 0.583578521515263, "grad_norm": 0.8456980573191815, "learning_rate": 3.898596485650065e-06, "loss": 0.4305, "step": 19041 }, { "epoch": 0.5836091700380042, "grad_norm": 1.7809563573836222, "learning_rate": 3.898112364530068e-06, "loss": 0.702, "step": 19042 }, { "epoch": 0.5836398185607454, "grad_norm": 1.691913713102105, "learning_rate": 3.897628254267254e-06, "loss": 0.6283, "step": 19043 }, { "epoch": 0.5836704670834866, "grad_norm": 2.019311329045685, "learning_rate": 3.897144154866387e-06, "loss": 0.69, "step": 19044 }, { "epoch": 0.5837011156062277, "grad_norm": 1.6920574766592458, "learning_rate": 3.896660066332244e-06, "loss": 0.6265, "step": 19045 }, { "epoch": 0.583731764128969, "grad_norm": 1.845344096089537, "learning_rate": 3.896175988669589e-06, "loss": 0.6846, "step": 19046 }, { "epoch": 0.5837624126517101, "grad_norm": 0.8307499445958542, "learning_rate": 3.8956919218831975e-06, "loss": 0.427, "step": 19047 }, { "epoch": 0.5837930611744514, "grad_norm": 1.7649861961537905, "learning_rate": 3.895207865977835e-06, "loss": 0.4619, "step": 19048 }, { "epoch": 0.5838237096971925, "grad_norm": 1.7123353520005238, "learning_rate": 3.89472382095827e-06, "loss": 0.6877, "step": 19049 }, { "epoch": 0.5838543582199338, "grad_norm": 1.7676386422787558, "learning_rate": 3.894239786829277e-06, "loss": 0.6438, "step": 19050 }, { "epoch": 0.583885006742675, "grad_norm": 1.8813387597556632, "learning_rate": 3.8937557635956205e-06, "loss": 0.5687, "step": 19051 }, { "epoch": 0.5839156552654162, "grad_norm": 1.546753817294541, "learning_rate": 3.893271751262071e-06, "loss": 0.5217, "step": 19052 }, { "epoch": 0.5839463037881574, "grad_norm": 1.6604227416492767, "learning_rate": 3.8927877498334e-06, "loss": 0.5965, "step": 19053 }, { "epoch": 0.5839769523108986, "grad_norm": 1.6560348549447712, "learning_rate": 3.892303759314372e-06, "loss": 0.6195, "step": 19054 }, { "epoch": 0.5840076008336398, "grad_norm": 1.7276616348288223, "learning_rate": 3.891819779709761e-06, "loss": 0.5992, "step": 19055 }, { "epoch": 0.584038249356381, "grad_norm": 1.4422897427053138, "learning_rate": 3.8913358110243335e-06, "loss": 0.6791, "step": 19056 }, { "epoch": 0.5840688978791222, "grad_norm": 1.5549036185935667, "learning_rate": 3.890851853262855e-06, "loss": 0.6247, "step": 19057 }, { "epoch": 0.5840995464018635, "grad_norm": 1.7635789406537519, "learning_rate": 3.8903679064301e-06, "loss": 0.7603, "step": 19058 }, { "epoch": 0.5841301949246046, "grad_norm": 0.8224265813680806, "learning_rate": 3.889883970530833e-06, "loss": 0.4312, "step": 19059 }, { "epoch": 0.5841608434473459, "grad_norm": 1.5632312103732697, "learning_rate": 3.889400045569822e-06, "loss": 0.6244, "step": 19060 }, { "epoch": 0.584191491970087, "grad_norm": 1.7892167032538062, "learning_rate": 3.888916131551837e-06, "loss": 0.7539, "step": 19061 }, { "epoch": 0.5842221404928283, "grad_norm": 1.6801003633815743, "learning_rate": 3.888432228481647e-06, "loss": 0.6073, "step": 19062 }, { "epoch": 0.5842527890155694, "grad_norm": 1.7624568449311695, "learning_rate": 3.887948336364017e-06, "loss": 0.6405, "step": 19063 }, { "epoch": 0.5842834375383107, "grad_norm": 1.7515592919225325, "learning_rate": 3.887464455203717e-06, "loss": 0.6961, "step": 19064 }, { "epoch": 0.5843140860610518, "grad_norm": 1.7278621714664857, "learning_rate": 3.886980585005515e-06, "loss": 0.6297, "step": 19065 }, { "epoch": 0.5843447345837931, "grad_norm": 1.7489856707670306, "learning_rate": 3.886496725774178e-06, "loss": 0.5934, "step": 19066 }, { "epoch": 0.5843753831065343, "grad_norm": 1.6286708768840386, "learning_rate": 3.886012877514475e-06, "loss": 0.6383, "step": 19067 }, { "epoch": 0.5844060316292755, "grad_norm": 1.7874983187320745, "learning_rate": 3.885529040231168e-06, "loss": 0.6658, "step": 19068 }, { "epoch": 0.5844366801520167, "grad_norm": 1.7331434547760391, "learning_rate": 3.885045213929032e-06, "loss": 0.5992, "step": 19069 }, { "epoch": 0.5844673286747579, "grad_norm": 1.6439281012212776, "learning_rate": 3.884561398612831e-06, "loss": 0.6323, "step": 19070 }, { "epoch": 0.5844979771974991, "grad_norm": 0.81396529884095, "learning_rate": 3.88407759428733e-06, "loss": 0.4328, "step": 19071 }, { "epoch": 0.5845286257202403, "grad_norm": 1.8062007306998118, "learning_rate": 3.883593800957299e-06, "loss": 0.6817, "step": 19072 }, { "epoch": 0.5845592742429815, "grad_norm": 1.8999660125954059, "learning_rate": 3.883110018627503e-06, "loss": 0.6133, "step": 19073 }, { "epoch": 0.5845899227657227, "grad_norm": 1.8639478673698895, "learning_rate": 3.88262624730271e-06, "loss": 0.6106, "step": 19074 }, { "epoch": 0.5846205712884639, "grad_norm": 1.7417376046447892, "learning_rate": 3.882142486987688e-06, "loss": 0.6495, "step": 19075 }, { "epoch": 0.584651219811205, "grad_norm": 1.6471156881472413, "learning_rate": 3.8816587376872e-06, "loss": 0.5887, "step": 19076 }, { "epoch": 0.5846818683339463, "grad_norm": 0.7776041669778733, "learning_rate": 3.881174999406017e-06, "loss": 0.427, "step": 19077 }, { "epoch": 0.5847125168566875, "grad_norm": 1.9150302634393517, "learning_rate": 3.880691272148902e-06, "loss": 0.6187, "step": 19078 }, { "epoch": 0.5847431653794287, "grad_norm": 1.6445013562119843, "learning_rate": 3.880207555920621e-06, "loss": 0.5908, "step": 19079 }, { "epoch": 0.5847738139021699, "grad_norm": 1.669280587909245, "learning_rate": 3.879723850725943e-06, "loss": 0.6264, "step": 19080 }, { "epoch": 0.5848044624249111, "grad_norm": 1.6733845594063874, "learning_rate": 3.879240156569631e-06, "loss": 0.5664, "step": 19081 }, { "epoch": 0.5848351109476523, "grad_norm": 1.674183260791591, "learning_rate": 3.878756473456453e-06, "loss": 0.646, "step": 19082 }, { "epoch": 0.5848657594703935, "grad_norm": 2.021749954527491, "learning_rate": 3.878272801391176e-06, "loss": 0.597, "step": 19083 }, { "epoch": 0.5848964079931347, "grad_norm": 1.8119265722470828, "learning_rate": 3.877789140378561e-06, "loss": 0.6454, "step": 19084 }, { "epoch": 0.584927056515876, "grad_norm": 1.8473042119590966, "learning_rate": 3.87730549042338e-06, "loss": 0.6554, "step": 19085 }, { "epoch": 0.5849577050386171, "grad_norm": 1.8167708276683314, "learning_rate": 3.876821851530395e-06, "loss": 0.6061, "step": 19086 }, { "epoch": 0.5849883535613584, "grad_norm": 1.7134489163717652, "learning_rate": 3.876338223704368e-06, "loss": 0.6264, "step": 19087 }, { "epoch": 0.5850190020840995, "grad_norm": 1.5649914237003548, "learning_rate": 3.875854606950072e-06, "loss": 0.6327, "step": 19088 }, { "epoch": 0.5850496506068408, "grad_norm": 1.7763513422342634, "learning_rate": 3.875371001272266e-06, "loss": 0.6194, "step": 19089 }, { "epoch": 0.5850802991295819, "grad_norm": 0.8190171725391355, "learning_rate": 3.874887406675718e-06, "loss": 0.4468, "step": 19090 }, { "epoch": 0.5851109476523232, "grad_norm": 1.7740418214422442, "learning_rate": 3.874403823165192e-06, "loss": 0.6368, "step": 19091 }, { "epoch": 0.5851415961750643, "grad_norm": 1.5230206112596942, "learning_rate": 3.873920250745453e-06, "loss": 0.6166, "step": 19092 }, { "epoch": 0.5851722446978056, "grad_norm": 1.7912074639989188, "learning_rate": 3.873436689421266e-06, "loss": 0.6161, "step": 19093 }, { "epoch": 0.5852028932205467, "grad_norm": 1.697716544092059, "learning_rate": 3.872953139197397e-06, "loss": 0.634, "step": 19094 }, { "epoch": 0.585233541743288, "grad_norm": 0.7623605590652176, "learning_rate": 3.872469600078607e-06, "loss": 0.4231, "step": 19095 }, { "epoch": 0.5852641902660292, "grad_norm": 1.7086980629645572, "learning_rate": 3.871986072069663e-06, "loss": 0.5972, "step": 19096 }, { "epoch": 0.5852948387887704, "grad_norm": 1.7146673137192567, "learning_rate": 3.871502555175331e-06, "loss": 0.696, "step": 19097 }, { "epoch": 0.5853254873115116, "grad_norm": 1.771668645286199, "learning_rate": 3.8710190494003694e-06, "loss": 0.574, "step": 19098 }, { "epoch": 0.5853561358342528, "grad_norm": 1.4964500682602553, "learning_rate": 3.870535554749549e-06, "loss": 0.511, "step": 19099 }, { "epoch": 0.585386784356994, "grad_norm": 1.8332933641819644, "learning_rate": 3.870052071227628e-06, "loss": 0.6096, "step": 19100 }, { "epoch": 0.5854174328797352, "grad_norm": 1.8285737822460708, "learning_rate": 3.869568598839376e-06, "loss": 0.676, "step": 19101 }, { "epoch": 0.5854480814024764, "grad_norm": 0.7952217825554627, "learning_rate": 3.869085137589552e-06, "loss": 0.4339, "step": 19102 }, { "epoch": 0.5854787299252177, "grad_norm": 1.568715078482158, "learning_rate": 3.868601687482922e-06, "loss": 0.6105, "step": 19103 }, { "epoch": 0.5855093784479588, "grad_norm": 1.775236569168049, "learning_rate": 3.8681182485242494e-06, "loss": 0.5676, "step": 19104 }, { "epoch": 0.5855400269707001, "grad_norm": 1.8550882434163534, "learning_rate": 3.867634820718297e-06, "loss": 0.5922, "step": 19105 }, { "epoch": 0.5855706754934412, "grad_norm": 1.8562101818500507, "learning_rate": 3.867151404069828e-06, "loss": 0.6054, "step": 19106 }, { "epoch": 0.5856013240161824, "grad_norm": 0.756883882774923, "learning_rate": 3.8666679985836065e-06, "loss": 0.4214, "step": 19107 }, { "epoch": 0.5856319725389236, "grad_norm": 1.8522675691517272, "learning_rate": 3.8661846042643945e-06, "loss": 0.7204, "step": 19108 }, { "epoch": 0.5856626210616648, "grad_norm": 1.8360134652803761, "learning_rate": 3.865701221116957e-06, "loss": 0.6231, "step": 19109 }, { "epoch": 0.585693269584406, "grad_norm": 1.6897051701933128, "learning_rate": 3.865217849146055e-06, "loss": 0.6994, "step": 19110 }, { "epoch": 0.5857239181071472, "grad_norm": 1.7384315790664069, "learning_rate": 3.864734488356451e-06, "loss": 0.7432, "step": 19111 }, { "epoch": 0.5857545666298885, "grad_norm": 1.764278070595234, "learning_rate": 3.864251138752911e-06, "loss": 0.6602, "step": 19112 }, { "epoch": 0.5857852151526296, "grad_norm": 1.8181906527122362, "learning_rate": 3.863767800340193e-06, "loss": 0.5451, "step": 19113 }, { "epoch": 0.5858158636753709, "grad_norm": 1.6341925502295223, "learning_rate": 3.863284473123061e-06, "loss": 0.6311, "step": 19114 }, { "epoch": 0.585846512198112, "grad_norm": 1.8287190633279193, "learning_rate": 3.862801157106279e-06, "loss": 0.5787, "step": 19115 }, { "epoch": 0.5858771607208533, "grad_norm": 1.6647402734935657, "learning_rate": 3.862317852294609e-06, "loss": 0.5776, "step": 19116 }, { "epoch": 0.5859078092435944, "grad_norm": 1.4942520466946005, "learning_rate": 3.8618345586928105e-06, "loss": 0.5638, "step": 19117 }, { "epoch": 0.5859384577663357, "grad_norm": 1.6788064492763333, "learning_rate": 3.861351276305649e-06, "loss": 0.6552, "step": 19118 }, { "epoch": 0.5859691062890768, "grad_norm": 1.7206301175107825, "learning_rate": 3.860868005137883e-06, "loss": 0.6775, "step": 19119 }, { "epoch": 0.5859997548118181, "grad_norm": 1.7710894604000424, "learning_rate": 3.8603847451942776e-06, "loss": 0.5751, "step": 19120 }, { "epoch": 0.5860304033345592, "grad_norm": 1.7863364435399944, "learning_rate": 3.859901496479593e-06, "loss": 0.6481, "step": 19121 }, { "epoch": 0.5860610518573005, "grad_norm": 1.7915044231318877, "learning_rate": 3.85941825899859e-06, "loss": 0.5876, "step": 19122 }, { "epoch": 0.5860917003800417, "grad_norm": 1.6457726894491667, "learning_rate": 3.858935032756031e-06, "loss": 0.5992, "step": 19123 }, { "epoch": 0.5861223489027829, "grad_norm": 1.832298497724581, "learning_rate": 3.858451817756676e-06, "loss": 0.634, "step": 19124 }, { "epoch": 0.5861529974255241, "grad_norm": 1.7490878040510722, "learning_rate": 3.857968614005287e-06, "loss": 0.5795, "step": 19125 }, { "epoch": 0.5861836459482653, "grad_norm": 1.6334321211976417, "learning_rate": 3.857485421506627e-06, "loss": 0.5642, "step": 19126 }, { "epoch": 0.5862142944710065, "grad_norm": 1.4749136967457244, "learning_rate": 3.857002240265454e-06, "loss": 0.5572, "step": 19127 }, { "epoch": 0.5862449429937477, "grad_norm": 1.674553909403125, "learning_rate": 3.856519070286532e-06, "loss": 0.5766, "step": 19128 }, { "epoch": 0.5862755915164889, "grad_norm": 1.5522338214537141, "learning_rate": 3.856035911574621e-06, "loss": 0.5404, "step": 19129 }, { "epoch": 0.5863062400392302, "grad_norm": 1.7786722644460038, "learning_rate": 3.855552764134478e-06, "loss": 0.5711, "step": 19130 }, { "epoch": 0.5863368885619713, "grad_norm": 1.698700843464874, "learning_rate": 3.855069627970869e-06, "loss": 0.5912, "step": 19131 }, { "epoch": 0.5863675370847126, "grad_norm": 1.7052046603717774, "learning_rate": 3.854586503088551e-06, "loss": 0.6144, "step": 19132 }, { "epoch": 0.5863981856074537, "grad_norm": 1.8785138710268667, "learning_rate": 3.854103389492283e-06, "loss": 0.5612, "step": 19133 }, { "epoch": 0.586428834130195, "grad_norm": 1.5348415591679327, "learning_rate": 3.85362028718683e-06, "loss": 0.6261, "step": 19134 }, { "epoch": 0.5864594826529361, "grad_norm": 0.8041708408978139, "learning_rate": 3.853137196176949e-06, "loss": 0.4158, "step": 19135 }, { "epoch": 0.5864901311756774, "grad_norm": 1.7899597632339217, "learning_rate": 3.852654116467401e-06, "loss": 0.5412, "step": 19136 }, { "epoch": 0.5865207796984185, "grad_norm": 2.394897756203924, "learning_rate": 3.852171048062945e-06, "loss": 0.6536, "step": 19137 }, { "epoch": 0.5865514282211597, "grad_norm": 1.726235178533683, "learning_rate": 3.851687990968341e-06, "loss": 0.5971, "step": 19138 }, { "epoch": 0.586582076743901, "grad_norm": 1.7568877108551684, "learning_rate": 3.85120494518835e-06, "loss": 0.6107, "step": 19139 }, { "epoch": 0.5866127252666421, "grad_norm": 1.8145162522819223, "learning_rate": 3.850721910727731e-06, "loss": 0.6103, "step": 19140 }, { "epoch": 0.5866433737893834, "grad_norm": 1.6356384016009309, "learning_rate": 3.850238887591241e-06, "loss": 0.6525, "step": 19141 }, { "epoch": 0.5866740223121245, "grad_norm": 1.6406526975731375, "learning_rate": 3.849755875783644e-06, "loss": 0.4726, "step": 19142 }, { "epoch": 0.5867046708348658, "grad_norm": 1.969992246558081, "learning_rate": 3.849272875309696e-06, "loss": 0.6225, "step": 19143 }, { "epoch": 0.5867353193576069, "grad_norm": 1.6365057375621452, "learning_rate": 3.848789886174155e-06, "loss": 0.6343, "step": 19144 }, { "epoch": 0.5867659678803482, "grad_norm": 0.7870364927738104, "learning_rate": 3.848306908381783e-06, "loss": 0.4522, "step": 19145 }, { "epoch": 0.5867966164030893, "grad_norm": 1.8305593830726477, "learning_rate": 3.847823941937338e-06, "loss": 0.5927, "step": 19146 }, { "epoch": 0.5868272649258306, "grad_norm": 1.7514509541099836, "learning_rate": 3.847340986845578e-06, "loss": 0.6356, "step": 19147 }, { "epoch": 0.5868579134485717, "grad_norm": 1.5663609380071752, "learning_rate": 3.846858043111262e-06, "loss": 0.5778, "step": 19148 }, { "epoch": 0.586888561971313, "grad_norm": 1.7659075349387523, "learning_rate": 3.846375110739149e-06, "loss": 0.6372, "step": 19149 }, { "epoch": 0.5869192104940542, "grad_norm": 1.9386851826822193, "learning_rate": 3.8458921897339975e-06, "loss": 0.6343, "step": 19150 }, { "epoch": 0.5869498590167954, "grad_norm": 1.7955868399887223, "learning_rate": 3.845409280100567e-06, "loss": 0.5336, "step": 19151 }, { "epoch": 0.5869805075395366, "grad_norm": 1.7018786401660488, "learning_rate": 3.84492638184361e-06, "loss": 0.614, "step": 19152 }, { "epoch": 0.5870111560622778, "grad_norm": 1.6070395164967406, "learning_rate": 3.844443494967893e-06, "loss": 0.5546, "step": 19153 }, { "epoch": 0.587041804585019, "grad_norm": 1.9197329970976846, "learning_rate": 3.8439606194781665e-06, "loss": 0.6523, "step": 19154 }, { "epoch": 0.5870724531077602, "grad_norm": 1.7247012705491618, "learning_rate": 3.843477755379195e-06, "loss": 0.6766, "step": 19155 }, { "epoch": 0.5871031016305014, "grad_norm": 1.6940221315563682, "learning_rate": 3.842994902675732e-06, "loss": 0.6801, "step": 19156 }, { "epoch": 0.5871337501532427, "grad_norm": 1.8760604593293495, "learning_rate": 3.842512061372535e-06, "loss": 0.6555, "step": 19157 }, { "epoch": 0.5871643986759838, "grad_norm": 1.9028789159680057, "learning_rate": 3.842029231474364e-06, "loss": 0.6366, "step": 19158 }, { "epoch": 0.5871950471987251, "grad_norm": 1.5770980752667134, "learning_rate": 3.841546412985977e-06, "loss": 0.5139, "step": 19159 }, { "epoch": 0.5872256957214662, "grad_norm": 1.8135998632213115, "learning_rate": 3.841063605912126e-06, "loss": 0.6612, "step": 19160 }, { "epoch": 0.5872563442442075, "grad_norm": 1.7062891524452322, "learning_rate": 3.840580810257574e-06, "loss": 0.6195, "step": 19161 }, { "epoch": 0.5872869927669486, "grad_norm": 1.79242697627435, "learning_rate": 3.840098026027075e-06, "loss": 0.6192, "step": 19162 }, { "epoch": 0.5873176412896899, "grad_norm": 1.6502364017103843, "learning_rate": 3.839615253225387e-06, "loss": 0.683, "step": 19163 }, { "epoch": 0.587348289812431, "grad_norm": 1.6355330736525606, "learning_rate": 3.839132491857269e-06, "loss": 0.6144, "step": 19164 }, { "epoch": 0.5873789383351723, "grad_norm": 1.9106151403686291, "learning_rate": 3.838649741927472e-06, "loss": 0.6704, "step": 19165 }, { "epoch": 0.5874095868579134, "grad_norm": 1.5820238557188144, "learning_rate": 3.838167003440759e-06, "loss": 0.5644, "step": 19166 }, { "epoch": 0.5874402353806547, "grad_norm": 1.8604056600949115, "learning_rate": 3.837684276401883e-06, "loss": 0.7016, "step": 19167 }, { "epoch": 0.5874708839033959, "grad_norm": 1.7465627406506812, "learning_rate": 3.837201560815601e-06, "loss": 0.5973, "step": 19168 }, { "epoch": 0.587501532426137, "grad_norm": 1.7310997063907019, "learning_rate": 3.83671885668667e-06, "loss": 0.6409, "step": 19169 }, { "epoch": 0.5875321809488783, "grad_norm": 1.7478707468495018, "learning_rate": 3.836236164019845e-06, "loss": 0.6659, "step": 19170 }, { "epoch": 0.5875628294716194, "grad_norm": 1.683318510521181, "learning_rate": 3.835753482819883e-06, "loss": 0.5195, "step": 19171 }, { "epoch": 0.5875934779943607, "grad_norm": 1.5804235620767042, "learning_rate": 3.835270813091539e-06, "loss": 0.6156, "step": 19172 }, { "epoch": 0.5876241265171018, "grad_norm": 1.6942944439478813, "learning_rate": 3.834788154839571e-06, "loss": 0.6643, "step": 19173 }, { "epoch": 0.5876547750398431, "grad_norm": 1.7481711769973183, "learning_rate": 3.834305508068734e-06, "loss": 0.5871, "step": 19174 }, { "epoch": 0.5876854235625842, "grad_norm": 0.7724318993133843, "learning_rate": 3.833822872783782e-06, "loss": 0.4267, "step": 19175 }, { "epoch": 0.5877160720853255, "grad_norm": 1.6147674339110811, "learning_rate": 3.833340248989471e-06, "loss": 0.5415, "step": 19176 }, { "epoch": 0.5877467206080667, "grad_norm": 1.670044882870649, "learning_rate": 3.832857636690559e-06, "loss": 0.5686, "step": 19177 }, { "epoch": 0.5877773691308079, "grad_norm": 1.7245139920432466, "learning_rate": 3.832375035891798e-06, "loss": 0.5351, "step": 19178 }, { "epoch": 0.5878080176535491, "grad_norm": 1.7373456888214471, "learning_rate": 3.831892446597944e-06, "loss": 0.5936, "step": 19179 }, { "epoch": 0.5878386661762903, "grad_norm": 1.7571247002347703, "learning_rate": 3.831409868813754e-06, "loss": 0.6877, "step": 19180 }, { "epoch": 0.5878693146990315, "grad_norm": 1.6079852047212344, "learning_rate": 3.83092730254398e-06, "loss": 0.5345, "step": 19181 }, { "epoch": 0.5878999632217727, "grad_norm": 1.7976244048581507, "learning_rate": 3.830444747793379e-06, "loss": 0.5878, "step": 19182 }, { "epoch": 0.5879306117445139, "grad_norm": 1.6716959330152026, "learning_rate": 3.829962204566707e-06, "loss": 0.5949, "step": 19183 }, { "epoch": 0.5879612602672551, "grad_norm": 1.6705227714800213, "learning_rate": 3.829479672868713e-06, "loss": 0.6587, "step": 19184 }, { "epoch": 0.5879919087899963, "grad_norm": 0.7437144468353714, "learning_rate": 3.828997152704159e-06, "loss": 0.4121, "step": 19185 }, { "epoch": 0.5880225573127376, "grad_norm": 1.546784026531024, "learning_rate": 3.828514644077794e-06, "loss": 0.5354, "step": 19186 }, { "epoch": 0.5880532058354787, "grad_norm": 1.6466094875456745, "learning_rate": 3.8280321469943734e-06, "loss": 0.5709, "step": 19187 }, { "epoch": 0.58808385435822, "grad_norm": 1.7173784105349361, "learning_rate": 3.827549661458653e-06, "loss": 0.6406, "step": 19188 }, { "epoch": 0.5881145028809611, "grad_norm": 2.0145399615214603, "learning_rate": 3.827067187475384e-06, "loss": 0.6542, "step": 19189 }, { "epoch": 0.5881451514037024, "grad_norm": 1.6564341221529688, "learning_rate": 3.826584725049325e-06, "loss": 0.6831, "step": 19190 }, { "epoch": 0.5881757999264435, "grad_norm": 1.7267736352675929, "learning_rate": 3.826102274185225e-06, "loss": 0.6182, "step": 19191 }, { "epoch": 0.5882064484491848, "grad_norm": 1.4661872327898864, "learning_rate": 3.82561983488784e-06, "loss": 0.6057, "step": 19192 }, { "epoch": 0.5882370969719259, "grad_norm": 1.6713080736256387, "learning_rate": 3.825137407161923e-06, "loss": 0.6522, "step": 19193 }, { "epoch": 0.5882677454946672, "grad_norm": 1.762250342554031, "learning_rate": 3.8246549910122285e-06, "loss": 0.6458, "step": 19194 }, { "epoch": 0.5882983940174084, "grad_norm": 1.9784959213698168, "learning_rate": 3.824172586443507e-06, "loss": 0.6247, "step": 19195 }, { "epoch": 0.5883290425401496, "grad_norm": 1.6877664138648223, "learning_rate": 3.823690193460517e-06, "loss": 0.6868, "step": 19196 }, { "epoch": 0.5883596910628908, "grad_norm": 1.8392498742756516, "learning_rate": 3.8232078120680075e-06, "loss": 0.6802, "step": 19197 }, { "epoch": 0.588390339585632, "grad_norm": 1.8004975872071303, "learning_rate": 3.822725442270731e-06, "loss": 0.6759, "step": 19198 }, { "epoch": 0.5884209881083732, "grad_norm": 1.5564284751396973, "learning_rate": 3.822243084073443e-06, "loss": 0.577, "step": 19199 }, { "epoch": 0.5884516366311143, "grad_norm": 1.7133076238945129, "learning_rate": 3.821760737480894e-06, "loss": 0.5505, "step": 19200 }, { "epoch": 0.5884822851538556, "grad_norm": 1.5915766913181026, "learning_rate": 3.82127840249784e-06, "loss": 0.6214, "step": 19201 }, { "epoch": 0.5885129336765967, "grad_norm": 1.7479006723881105, "learning_rate": 3.820796079129031e-06, "loss": 0.6186, "step": 19202 }, { "epoch": 0.588543582199338, "grad_norm": 0.8465284852510335, "learning_rate": 3.8203137673792185e-06, "loss": 0.4107, "step": 19203 }, { "epoch": 0.5885742307220792, "grad_norm": 0.809494313444861, "learning_rate": 3.819831467253158e-06, "loss": 0.4188, "step": 19204 }, { "epoch": 0.5886048792448204, "grad_norm": 1.8074179163475164, "learning_rate": 3.8193491787556e-06, "loss": 0.5992, "step": 19205 }, { "epoch": 0.5886355277675616, "grad_norm": 0.8214352548258065, "learning_rate": 3.818866901891295e-06, "loss": 0.4218, "step": 19206 }, { "epoch": 0.5886661762903028, "grad_norm": 1.7525447908489378, "learning_rate": 3.818384636664998e-06, "loss": 0.6702, "step": 19207 }, { "epoch": 0.588696824813044, "grad_norm": 1.7068013942128997, "learning_rate": 3.817902383081458e-06, "loss": 0.5974, "step": 19208 }, { "epoch": 0.5887274733357852, "grad_norm": 1.8920189960509226, "learning_rate": 3.817420141145431e-06, "loss": 0.6431, "step": 19209 }, { "epoch": 0.5887581218585264, "grad_norm": 1.810218061968455, "learning_rate": 3.816937910861663e-06, "loss": 0.6224, "step": 19210 }, { "epoch": 0.5887887703812676, "grad_norm": 1.764277862527994, "learning_rate": 3.81645569223491e-06, "loss": 0.524, "step": 19211 }, { "epoch": 0.5888194189040088, "grad_norm": 1.6723143365002306, "learning_rate": 3.815973485269921e-06, "loss": 0.5564, "step": 19212 }, { "epoch": 0.5888500674267501, "grad_norm": 1.9305639999989765, "learning_rate": 3.815491289971449e-06, "loss": 0.6959, "step": 19213 }, { "epoch": 0.5888807159494912, "grad_norm": 1.707229601795532, "learning_rate": 3.815009106344244e-06, "loss": 0.5881, "step": 19214 }, { "epoch": 0.5889113644722325, "grad_norm": 1.6921483054524116, "learning_rate": 3.814526934393058e-06, "loss": 0.6079, "step": 19215 }, { "epoch": 0.5889420129949736, "grad_norm": 2.035955880336247, "learning_rate": 3.814044774122642e-06, "loss": 0.5578, "step": 19216 }, { "epoch": 0.5889726615177149, "grad_norm": 0.884655457157649, "learning_rate": 3.813562625537743e-06, "loss": 0.4127, "step": 19217 }, { "epoch": 0.589003310040456, "grad_norm": 1.7852122554843906, "learning_rate": 3.8130804886431194e-06, "loss": 0.5246, "step": 19218 }, { "epoch": 0.5890339585631973, "grad_norm": 0.8440823440971991, "learning_rate": 3.8125983634435147e-06, "loss": 0.436, "step": 19219 }, { "epoch": 0.5890646070859384, "grad_norm": 1.5504353291214854, "learning_rate": 3.812116249943683e-06, "loss": 0.6655, "step": 19220 }, { "epoch": 0.5890952556086797, "grad_norm": 1.7734956358554517, "learning_rate": 3.8116341481483738e-06, "loss": 0.5821, "step": 19221 }, { "epoch": 0.5891259041314209, "grad_norm": 1.8072046992161348, "learning_rate": 3.811152058062337e-06, "loss": 0.675, "step": 19222 }, { "epoch": 0.5891565526541621, "grad_norm": 1.805563805186923, "learning_rate": 3.8106699796903236e-06, "loss": 0.6351, "step": 19223 }, { "epoch": 0.5891872011769033, "grad_norm": 1.9164790798092002, "learning_rate": 3.8101879130370827e-06, "loss": 0.5389, "step": 19224 }, { "epoch": 0.5892178496996445, "grad_norm": 1.5833594248075558, "learning_rate": 3.8097058581073644e-06, "loss": 0.5935, "step": 19225 }, { "epoch": 0.5892484982223857, "grad_norm": 1.7837927411789058, "learning_rate": 3.809223814905921e-06, "loss": 0.5192, "step": 19226 }, { "epoch": 0.5892791467451269, "grad_norm": 1.6354745257193832, "learning_rate": 3.8087417834374964e-06, "loss": 0.5368, "step": 19227 }, { "epoch": 0.5893097952678681, "grad_norm": 1.8377480548053153, "learning_rate": 3.8082597637068476e-06, "loss": 0.6091, "step": 19228 }, { "epoch": 0.5893404437906093, "grad_norm": 1.852083960733439, "learning_rate": 3.8077777557187185e-06, "loss": 0.5605, "step": 19229 }, { "epoch": 0.5893710923133505, "grad_norm": 1.7462097212665793, "learning_rate": 3.807295759477859e-06, "loss": 0.711, "step": 19230 }, { "epoch": 0.5894017408360916, "grad_norm": 1.5722762007291458, "learning_rate": 3.8068137749890214e-06, "loss": 0.6355, "step": 19231 }, { "epoch": 0.5894323893588329, "grad_norm": 0.9239295770217948, "learning_rate": 3.8063318022569528e-06, "loss": 0.4289, "step": 19232 }, { "epoch": 0.5894630378815741, "grad_norm": 1.861344996269647, "learning_rate": 3.8058498412864016e-06, "loss": 0.609, "step": 19233 }, { "epoch": 0.5894936864043153, "grad_norm": 1.6919236051642517, "learning_rate": 3.805367892082118e-06, "loss": 0.589, "step": 19234 }, { "epoch": 0.5895243349270565, "grad_norm": 1.7933136358393396, "learning_rate": 3.804885954648849e-06, "loss": 0.6154, "step": 19235 }, { "epoch": 0.5895549834497977, "grad_norm": 1.702271013334686, "learning_rate": 3.804404028991346e-06, "loss": 0.5539, "step": 19236 }, { "epoch": 0.5895856319725389, "grad_norm": 1.6701835961142972, "learning_rate": 3.8039221151143566e-06, "loss": 0.6142, "step": 19237 }, { "epoch": 0.5896162804952801, "grad_norm": 0.773194519144322, "learning_rate": 3.8034402130226255e-06, "loss": 0.4199, "step": 19238 }, { "epoch": 0.5896469290180213, "grad_norm": 1.632948971633312, "learning_rate": 3.8029583227209077e-06, "loss": 0.6342, "step": 19239 }, { "epoch": 0.5896775775407626, "grad_norm": 1.836530085900821, "learning_rate": 3.8024764442139467e-06, "loss": 0.6964, "step": 19240 }, { "epoch": 0.5897082260635037, "grad_norm": 1.4025449988424492, "learning_rate": 3.8019945775064904e-06, "loss": 0.4277, "step": 19241 }, { "epoch": 0.589738874586245, "grad_norm": 1.6241315519234012, "learning_rate": 3.8015127226032888e-06, "loss": 0.4926, "step": 19242 }, { "epoch": 0.5897695231089861, "grad_norm": 1.7996946571903494, "learning_rate": 3.80103087950909e-06, "loss": 0.5817, "step": 19243 }, { "epoch": 0.5898001716317274, "grad_norm": 0.8056153480907179, "learning_rate": 3.800549048228639e-06, "loss": 0.4387, "step": 19244 }, { "epoch": 0.5898308201544685, "grad_norm": 1.7016472489504906, "learning_rate": 3.8000672287666863e-06, "loss": 0.6181, "step": 19245 }, { "epoch": 0.5898614686772098, "grad_norm": 1.6732594353056012, "learning_rate": 3.799585421127977e-06, "loss": 0.6222, "step": 19246 }, { "epoch": 0.5898921171999509, "grad_norm": 1.7481890109648268, "learning_rate": 3.799103625317261e-06, "loss": 0.5269, "step": 19247 }, { "epoch": 0.5899227657226922, "grad_norm": 1.5675949927198205, "learning_rate": 3.7986218413392844e-06, "loss": 0.5873, "step": 19248 }, { "epoch": 0.5899534142454333, "grad_norm": 1.5821587781290785, "learning_rate": 3.798140069198792e-06, "loss": 0.601, "step": 19249 }, { "epoch": 0.5899840627681746, "grad_norm": 1.8932619288717354, "learning_rate": 3.797658308900536e-06, "loss": 0.5311, "step": 19250 }, { "epoch": 0.5900147112909158, "grad_norm": 1.7702128128196843, "learning_rate": 3.797176560449259e-06, "loss": 0.6083, "step": 19251 }, { "epoch": 0.590045359813657, "grad_norm": 1.8524269597807672, "learning_rate": 3.7966948238497083e-06, "loss": 0.6268, "step": 19252 }, { "epoch": 0.5900760083363982, "grad_norm": 1.663403908567589, "learning_rate": 3.7962130991066325e-06, "loss": 0.6454, "step": 19253 }, { "epoch": 0.5901066568591394, "grad_norm": 1.658106714469388, "learning_rate": 3.795731386224776e-06, "loss": 0.6794, "step": 19254 }, { "epoch": 0.5901373053818806, "grad_norm": 1.550876678937309, "learning_rate": 3.795249685208887e-06, "loss": 0.574, "step": 19255 }, { "epoch": 0.5901679539046218, "grad_norm": 1.8457656698677054, "learning_rate": 3.7947679960637113e-06, "loss": 0.6299, "step": 19256 }, { "epoch": 0.590198602427363, "grad_norm": 1.7900508483949396, "learning_rate": 3.794286318793994e-06, "loss": 0.632, "step": 19257 }, { "epoch": 0.5902292509501043, "grad_norm": 1.8987444074365871, "learning_rate": 3.7938046534044826e-06, "loss": 0.6476, "step": 19258 }, { "epoch": 0.5902598994728454, "grad_norm": 1.9131607995723765, "learning_rate": 3.7933229998999237e-06, "loss": 0.6465, "step": 19259 }, { "epoch": 0.5902905479955867, "grad_norm": 1.5533202541970472, "learning_rate": 3.7928413582850594e-06, "loss": 0.5161, "step": 19260 }, { "epoch": 0.5903211965183278, "grad_norm": 1.578724493444264, "learning_rate": 3.7923597285646406e-06, "loss": 0.599, "step": 19261 }, { "epoch": 0.590351845041069, "grad_norm": 1.785887125729285, "learning_rate": 3.7918781107434087e-06, "loss": 0.5546, "step": 19262 }, { "epoch": 0.5903824935638102, "grad_norm": 1.9540282275935486, "learning_rate": 3.7913965048261123e-06, "loss": 0.5946, "step": 19263 }, { "epoch": 0.5904131420865514, "grad_norm": 0.848288582456909, "learning_rate": 3.790914910817495e-06, "loss": 0.4381, "step": 19264 }, { "epoch": 0.5904437906092926, "grad_norm": 2.454595591325256, "learning_rate": 3.790433328722301e-06, "loss": 0.5345, "step": 19265 }, { "epoch": 0.5904744391320338, "grad_norm": 1.684118091062498, "learning_rate": 3.789951758545278e-06, "loss": 0.6734, "step": 19266 }, { "epoch": 0.590505087654775, "grad_norm": 1.6053535040942368, "learning_rate": 3.789470200291171e-06, "loss": 0.5865, "step": 19267 }, { "epoch": 0.5905357361775162, "grad_norm": 0.8190132635894997, "learning_rate": 3.788988653964722e-06, "loss": 0.4423, "step": 19268 }, { "epoch": 0.5905663847002575, "grad_norm": 1.5237906929095897, "learning_rate": 3.7885071195706786e-06, "loss": 0.4836, "step": 19269 }, { "epoch": 0.5905970332229986, "grad_norm": 1.6269639583829303, "learning_rate": 3.7880255971137857e-06, "loss": 0.5964, "step": 19270 }, { "epoch": 0.5906276817457399, "grad_norm": 1.7149035010805669, "learning_rate": 3.7875440865987843e-06, "loss": 0.5809, "step": 19271 }, { "epoch": 0.590658330268481, "grad_norm": 1.8273815534531945, "learning_rate": 3.787062588030423e-06, "loss": 0.5962, "step": 19272 }, { "epoch": 0.5906889787912223, "grad_norm": 1.697807618646677, "learning_rate": 3.7865811014134425e-06, "loss": 0.6083, "step": 19273 }, { "epoch": 0.5907196273139634, "grad_norm": 1.8207980540851834, "learning_rate": 3.7860996267525906e-06, "loss": 0.6376, "step": 19274 }, { "epoch": 0.5907502758367047, "grad_norm": 1.6412640093755666, "learning_rate": 3.7856181640526093e-06, "loss": 0.5362, "step": 19275 }, { "epoch": 0.5907809243594458, "grad_norm": 1.4717838810879358, "learning_rate": 3.7851367133182414e-06, "loss": 0.5401, "step": 19276 }, { "epoch": 0.5908115728821871, "grad_norm": 1.7212963735120994, "learning_rate": 3.784655274554234e-06, "loss": 0.6654, "step": 19277 }, { "epoch": 0.5908422214049283, "grad_norm": 1.7401482756275612, "learning_rate": 3.7841738477653305e-06, "loss": 0.5793, "step": 19278 }, { "epoch": 0.5908728699276695, "grad_norm": 1.7722229957525155, "learning_rate": 3.7836924329562697e-06, "loss": 0.6603, "step": 19279 }, { "epoch": 0.5909035184504107, "grad_norm": 1.69440572298294, "learning_rate": 3.7832110301318013e-06, "loss": 0.5266, "step": 19280 }, { "epoch": 0.5909341669731519, "grad_norm": 0.776926241480539, "learning_rate": 3.7827296392966634e-06, "loss": 0.4119, "step": 19281 }, { "epoch": 0.5909648154958931, "grad_norm": 1.8075047673668887, "learning_rate": 3.7822482604556043e-06, "loss": 0.5946, "step": 19282 }, { "epoch": 0.5909954640186343, "grad_norm": 1.9677055037146216, "learning_rate": 3.7817668936133645e-06, "loss": 0.5767, "step": 19283 }, { "epoch": 0.5910261125413755, "grad_norm": 1.7769968953795177, "learning_rate": 3.7812855387746857e-06, "loss": 0.5686, "step": 19284 }, { "epoch": 0.5910567610641168, "grad_norm": 1.8094909468982705, "learning_rate": 3.780804195944313e-06, "loss": 0.613, "step": 19285 }, { "epoch": 0.5910874095868579, "grad_norm": 0.7892763747065392, "learning_rate": 3.7803228651269887e-06, "loss": 0.4416, "step": 19286 }, { "epoch": 0.5911180581095992, "grad_norm": 1.7989939068169367, "learning_rate": 3.7798415463274544e-06, "loss": 0.5936, "step": 19287 }, { "epoch": 0.5911487066323403, "grad_norm": 1.6175008957120345, "learning_rate": 3.7793602395504546e-06, "loss": 0.546, "step": 19288 }, { "epoch": 0.5911793551550816, "grad_norm": 1.8992274751345488, "learning_rate": 3.7788789448007297e-06, "loss": 0.7286, "step": 19289 }, { "epoch": 0.5912100036778227, "grad_norm": 0.7624708252529574, "learning_rate": 3.7783976620830235e-06, "loss": 0.445, "step": 19290 }, { "epoch": 0.591240652200564, "grad_norm": 1.9844785438405874, "learning_rate": 3.7779163914020795e-06, "loss": 0.5092, "step": 19291 }, { "epoch": 0.5912713007233051, "grad_norm": 0.7721129910915299, "learning_rate": 3.777435132762634e-06, "loss": 0.4235, "step": 19292 }, { "epoch": 0.5913019492460463, "grad_norm": 1.8263512182346473, "learning_rate": 3.7769538861694365e-06, "loss": 0.6714, "step": 19293 }, { "epoch": 0.5913325977687875, "grad_norm": 1.7579447992697321, "learning_rate": 3.7764726516272243e-06, "loss": 0.6017, "step": 19294 }, { "epoch": 0.5913632462915287, "grad_norm": 1.6525320429849422, "learning_rate": 3.7759914291407397e-06, "loss": 0.5456, "step": 19295 }, { "epoch": 0.59139389481427, "grad_norm": 1.926871633746984, "learning_rate": 3.775510218714725e-06, "loss": 0.7048, "step": 19296 }, { "epoch": 0.5914245433370111, "grad_norm": 1.7471088878273509, "learning_rate": 3.7750290203539214e-06, "loss": 0.6486, "step": 19297 }, { "epoch": 0.5914551918597524, "grad_norm": 1.8800451585177342, "learning_rate": 3.7745478340630693e-06, "loss": 0.7215, "step": 19298 }, { "epoch": 0.5914858403824935, "grad_norm": 1.8569609730921108, "learning_rate": 3.774066659846912e-06, "loss": 0.6151, "step": 19299 }, { "epoch": 0.5915164889052348, "grad_norm": 1.7552892061626746, "learning_rate": 3.773585497710189e-06, "loss": 0.6337, "step": 19300 }, { "epoch": 0.5915471374279759, "grad_norm": 1.6587120150738968, "learning_rate": 3.7731043476576424e-06, "loss": 0.5538, "step": 19301 }, { "epoch": 0.5915777859507172, "grad_norm": 1.6954593076869762, "learning_rate": 3.7726232096940134e-06, "loss": 0.6072, "step": 19302 }, { "epoch": 0.5916084344734583, "grad_norm": 1.7902258231288177, "learning_rate": 3.772142083824039e-06, "loss": 0.6602, "step": 19303 }, { "epoch": 0.5916390829961996, "grad_norm": 1.8282908558153073, "learning_rate": 3.7716609700524664e-06, "loss": 0.6088, "step": 19304 }, { "epoch": 0.5916697315189408, "grad_norm": 1.88532329091873, "learning_rate": 3.771179868384031e-06, "loss": 0.5927, "step": 19305 }, { "epoch": 0.591700380041682, "grad_norm": 1.723757119414137, "learning_rate": 3.7706987788234738e-06, "loss": 0.5775, "step": 19306 }, { "epoch": 0.5917310285644232, "grad_norm": 0.7217395486321727, "learning_rate": 3.7702177013755376e-06, "loss": 0.3987, "step": 19307 }, { "epoch": 0.5917616770871644, "grad_norm": 0.8079100948673555, "learning_rate": 3.7697366360449592e-06, "loss": 0.4239, "step": 19308 }, { "epoch": 0.5917923256099056, "grad_norm": 1.7191446641340182, "learning_rate": 3.7692555828364824e-06, "loss": 0.6427, "step": 19309 }, { "epoch": 0.5918229741326468, "grad_norm": 1.6240939514585588, "learning_rate": 3.768774541754845e-06, "loss": 0.6506, "step": 19310 }, { "epoch": 0.591853622655388, "grad_norm": 1.481736695843373, "learning_rate": 3.768293512804786e-06, "loss": 0.5673, "step": 19311 }, { "epoch": 0.5918842711781293, "grad_norm": 1.7608880977350723, "learning_rate": 3.7678124959910466e-06, "loss": 0.6254, "step": 19312 }, { "epoch": 0.5919149197008704, "grad_norm": 1.781407659665013, "learning_rate": 3.767331491318368e-06, "loss": 0.5946, "step": 19313 }, { "epoch": 0.5919455682236117, "grad_norm": 1.7942990490190633, "learning_rate": 3.7668504987914846e-06, "loss": 0.637, "step": 19314 }, { "epoch": 0.5919762167463528, "grad_norm": 1.7230860692185042, "learning_rate": 3.76636951841514e-06, "loss": 0.5762, "step": 19315 }, { "epoch": 0.5920068652690941, "grad_norm": 1.6542784025094648, "learning_rate": 3.7658885501940713e-06, "loss": 0.5482, "step": 19316 }, { "epoch": 0.5920375137918352, "grad_norm": 1.8660337035414605, "learning_rate": 3.765407594133019e-06, "loss": 0.5539, "step": 19317 }, { "epoch": 0.5920681623145765, "grad_norm": 1.8964288632353512, "learning_rate": 3.7649266502367225e-06, "loss": 0.6949, "step": 19318 }, { "epoch": 0.5920988108373176, "grad_norm": 1.7686465020834166, "learning_rate": 3.764445718509918e-06, "loss": 0.6542, "step": 19319 }, { "epoch": 0.5921294593600589, "grad_norm": 1.8637510753383888, "learning_rate": 3.7639647989573474e-06, "loss": 0.6141, "step": 19320 }, { "epoch": 0.5921601078828, "grad_norm": 1.9681420480579368, "learning_rate": 3.7634838915837477e-06, "loss": 0.6311, "step": 19321 }, { "epoch": 0.5921907564055413, "grad_norm": 1.418054115370853, "learning_rate": 3.763002996393857e-06, "loss": 0.5569, "step": 19322 }, { "epoch": 0.5922214049282825, "grad_norm": 1.8475238659878117, "learning_rate": 3.7625221133924156e-06, "loss": 0.6977, "step": 19323 }, { "epoch": 0.5922520534510236, "grad_norm": 0.8396167123114756, "learning_rate": 3.76204124258416e-06, "loss": 0.4012, "step": 19324 }, { "epoch": 0.5922827019737649, "grad_norm": 0.8273984185073905, "learning_rate": 3.7615603839738275e-06, "loss": 0.4124, "step": 19325 }, { "epoch": 0.592313350496506, "grad_norm": 2.230522784708072, "learning_rate": 3.761079537566158e-06, "loss": 0.7452, "step": 19326 }, { "epoch": 0.5923439990192473, "grad_norm": 1.8125470676414863, "learning_rate": 3.7605987033658887e-06, "loss": 0.5719, "step": 19327 }, { "epoch": 0.5923746475419884, "grad_norm": 1.8502552301101496, "learning_rate": 3.760117881377758e-06, "loss": 0.5903, "step": 19328 }, { "epoch": 0.5924052960647297, "grad_norm": 1.5152701214683977, "learning_rate": 3.759637071606503e-06, "loss": 0.5803, "step": 19329 }, { "epoch": 0.5924359445874708, "grad_norm": 1.7033682780801118, "learning_rate": 3.75915627405686e-06, "loss": 0.6397, "step": 19330 }, { "epoch": 0.5924665931102121, "grad_norm": 1.9777494811503555, "learning_rate": 3.758675488733569e-06, "loss": 0.6413, "step": 19331 }, { "epoch": 0.5924972416329533, "grad_norm": 1.827118534559176, "learning_rate": 3.7581947156413673e-06, "loss": 0.6295, "step": 19332 }, { "epoch": 0.5925278901556945, "grad_norm": 1.7459903718491236, "learning_rate": 3.757713954784988e-06, "loss": 0.6641, "step": 19333 }, { "epoch": 0.5925585386784357, "grad_norm": 1.888208391467662, "learning_rate": 3.757233206169173e-06, "loss": 0.6502, "step": 19334 }, { "epoch": 0.5925891872011769, "grad_norm": 1.8161135290443542, "learning_rate": 3.7567524697986547e-06, "loss": 0.6937, "step": 19335 }, { "epoch": 0.5926198357239181, "grad_norm": 0.8024376924127635, "learning_rate": 3.7562717456781755e-06, "loss": 0.3964, "step": 19336 }, { "epoch": 0.5926504842466593, "grad_norm": 1.8356070736744934, "learning_rate": 3.755791033812468e-06, "loss": 0.5483, "step": 19337 }, { "epoch": 0.5926811327694005, "grad_norm": 1.5732967301058283, "learning_rate": 3.755310334206269e-06, "loss": 0.6309, "step": 19338 }, { "epoch": 0.5927117812921417, "grad_norm": 1.8974191427326113, "learning_rate": 3.7548296468643164e-06, "loss": 0.5729, "step": 19339 }, { "epoch": 0.5927424298148829, "grad_norm": 1.5950943291447364, "learning_rate": 3.754348971791346e-06, "loss": 0.6036, "step": 19340 }, { "epoch": 0.5927730783376242, "grad_norm": 1.7499844162801683, "learning_rate": 3.753868308992093e-06, "loss": 0.5051, "step": 19341 }, { "epoch": 0.5928037268603653, "grad_norm": 1.7983131893688238, "learning_rate": 3.7533876584712953e-06, "loss": 0.5952, "step": 19342 }, { "epoch": 0.5928343753831066, "grad_norm": 1.8746713896706733, "learning_rate": 3.7529070202336864e-06, "loss": 0.602, "step": 19343 }, { "epoch": 0.5928650239058477, "grad_norm": 1.9551245033078324, "learning_rate": 3.7524263942840056e-06, "loss": 0.6971, "step": 19344 }, { "epoch": 0.592895672428589, "grad_norm": 1.6730090827865616, "learning_rate": 3.751945780626988e-06, "loss": 0.5672, "step": 19345 }, { "epoch": 0.5929263209513301, "grad_norm": 1.843146505362776, "learning_rate": 3.7514651792673634e-06, "loss": 0.6011, "step": 19346 }, { "epoch": 0.5929569694740714, "grad_norm": 1.832999666097708, "learning_rate": 3.750984590209876e-06, "loss": 0.6645, "step": 19347 }, { "epoch": 0.5929876179968125, "grad_norm": 1.6956394438086133, "learning_rate": 3.7505040134592557e-06, "loss": 0.5464, "step": 19348 }, { "epoch": 0.5930182665195538, "grad_norm": 1.7118570907881323, "learning_rate": 3.750023449020238e-06, "loss": 0.5887, "step": 19349 }, { "epoch": 0.593048915042295, "grad_norm": 0.7836040145788165, "learning_rate": 3.7495428968975606e-06, "loss": 0.3987, "step": 19350 }, { "epoch": 0.5930795635650362, "grad_norm": 1.9763238536457002, "learning_rate": 3.749062357095956e-06, "loss": 0.707, "step": 19351 }, { "epoch": 0.5931102120877774, "grad_norm": 1.6697100091281718, "learning_rate": 3.7485818296201603e-06, "loss": 0.5873, "step": 19352 }, { "epoch": 0.5931408606105186, "grad_norm": 1.8778698722424563, "learning_rate": 3.7481013144749077e-06, "loss": 0.5503, "step": 19353 }, { "epoch": 0.5931715091332598, "grad_norm": 1.8745097449561412, "learning_rate": 3.7476208116649333e-06, "loss": 0.6326, "step": 19354 }, { "epoch": 0.5932021576560009, "grad_norm": 1.8097311437745653, "learning_rate": 3.747140321194972e-06, "loss": 0.6401, "step": 19355 }, { "epoch": 0.5932328061787422, "grad_norm": 1.7572867066654647, "learning_rate": 3.746659843069759e-06, "loss": 0.6642, "step": 19356 }, { "epoch": 0.5932634547014833, "grad_norm": 1.7216867423844742, "learning_rate": 3.7461793772940236e-06, "loss": 0.6534, "step": 19357 }, { "epoch": 0.5932941032242246, "grad_norm": 0.8008896668513972, "learning_rate": 3.745698923872507e-06, "loss": 0.4011, "step": 19358 }, { "epoch": 0.5933247517469658, "grad_norm": 1.8170370605620316, "learning_rate": 3.7452184828099385e-06, "loss": 0.605, "step": 19359 }, { "epoch": 0.593355400269707, "grad_norm": 1.8260952471019474, "learning_rate": 3.744738054111053e-06, "loss": 0.6581, "step": 19360 }, { "epoch": 0.5933860487924482, "grad_norm": 1.8318294818812224, "learning_rate": 3.744257637780585e-06, "loss": 0.5356, "step": 19361 }, { "epoch": 0.5934166973151894, "grad_norm": 1.7769069967607285, "learning_rate": 3.743777233823267e-06, "loss": 0.6411, "step": 19362 }, { "epoch": 0.5934473458379306, "grad_norm": 0.7705117429934734, "learning_rate": 3.743296842243834e-06, "loss": 0.4151, "step": 19363 }, { "epoch": 0.5934779943606718, "grad_norm": 1.8157352143947092, "learning_rate": 3.7428164630470193e-06, "loss": 0.559, "step": 19364 }, { "epoch": 0.593508642883413, "grad_norm": 1.687371817386489, "learning_rate": 3.7423360962375544e-06, "loss": 0.5867, "step": 19365 }, { "epoch": 0.5935392914061542, "grad_norm": 1.7919091804291543, "learning_rate": 3.741855741820176e-06, "loss": 0.6809, "step": 19366 }, { "epoch": 0.5935699399288954, "grad_norm": 1.725722126091299, "learning_rate": 3.741375399799614e-06, "loss": 0.6293, "step": 19367 }, { "epoch": 0.5936005884516367, "grad_norm": 1.6051631065283842, "learning_rate": 3.7408950701806003e-06, "loss": 0.5845, "step": 19368 }, { "epoch": 0.5936312369743778, "grad_norm": 1.8023051760285338, "learning_rate": 3.7404147529678715e-06, "loss": 0.5706, "step": 19369 }, { "epoch": 0.5936618854971191, "grad_norm": 1.5060717457510935, "learning_rate": 3.7399344481661582e-06, "loss": 0.5372, "step": 19370 }, { "epoch": 0.5936925340198602, "grad_norm": 0.7841029628609454, "learning_rate": 3.739454155780192e-06, "loss": 0.4347, "step": 19371 }, { "epoch": 0.5937231825426015, "grad_norm": 0.8552655890969834, "learning_rate": 3.7389738758147075e-06, "loss": 0.4223, "step": 19372 }, { "epoch": 0.5937538310653426, "grad_norm": 1.8480646093888857, "learning_rate": 3.738493608274435e-06, "loss": 0.609, "step": 19373 }, { "epoch": 0.5937844795880839, "grad_norm": 1.5261503658996365, "learning_rate": 3.7380133531641093e-06, "loss": 0.6111, "step": 19374 }, { "epoch": 0.593815128110825, "grad_norm": 1.631241427810733, "learning_rate": 3.7375331104884617e-06, "loss": 0.5379, "step": 19375 }, { "epoch": 0.5938457766335663, "grad_norm": 1.820955194440108, "learning_rate": 3.73705288025222e-06, "loss": 0.5741, "step": 19376 }, { "epoch": 0.5938764251563075, "grad_norm": 0.7935306693491364, "learning_rate": 3.7365726624601228e-06, "loss": 0.4106, "step": 19377 }, { "epoch": 0.5939070736790487, "grad_norm": 1.9973481031953129, "learning_rate": 3.736092457116897e-06, "loss": 0.6775, "step": 19378 }, { "epoch": 0.5939377222017899, "grad_norm": 1.8005425830629411, "learning_rate": 3.7356122642272753e-06, "loss": 0.7311, "step": 19379 }, { "epoch": 0.5939683707245311, "grad_norm": 2.070999871211758, "learning_rate": 3.73513208379599e-06, "loss": 0.6081, "step": 19380 }, { "epoch": 0.5939990192472723, "grad_norm": 0.819191194570169, "learning_rate": 3.7346519158277707e-06, "loss": 0.443, "step": 19381 }, { "epoch": 0.5940296677700135, "grad_norm": 1.908613687415709, "learning_rate": 3.734171760327351e-06, "loss": 0.6209, "step": 19382 }, { "epoch": 0.5940603162927547, "grad_norm": 2.2167309374099915, "learning_rate": 3.7336916172994608e-06, "loss": 0.5588, "step": 19383 }, { "epoch": 0.594090964815496, "grad_norm": 2.057910563131458, "learning_rate": 3.73321148674883e-06, "loss": 0.7374, "step": 19384 }, { "epoch": 0.5941216133382371, "grad_norm": 1.8071951918836335, "learning_rate": 3.7327313686801926e-06, "loss": 0.6053, "step": 19385 }, { "epoch": 0.5941522618609782, "grad_norm": 1.8053160061845401, "learning_rate": 3.732251263098277e-06, "loss": 0.6148, "step": 19386 }, { "epoch": 0.5941829103837195, "grad_norm": 0.8117673477941186, "learning_rate": 3.731771170007811e-06, "loss": 0.4076, "step": 19387 }, { "epoch": 0.5942135589064607, "grad_norm": 0.7836032324033416, "learning_rate": 3.7312910894135324e-06, "loss": 0.4294, "step": 19388 }, { "epoch": 0.5942442074292019, "grad_norm": 1.76382860794999, "learning_rate": 3.730811021320163e-06, "loss": 0.5777, "step": 19389 }, { "epoch": 0.5942748559519431, "grad_norm": 1.5181504659690654, "learning_rate": 3.730330965732441e-06, "loss": 0.6416, "step": 19390 }, { "epoch": 0.5943055044746843, "grad_norm": 2.0177259844469013, "learning_rate": 3.7298509226550916e-06, "loss": 0.6698, "step": 19391 }, { "epoch": 0.5943361529974255, "grad_norm": 1.620306849371045, "learning_rate": 3.729370892092845e-06, "loss": 0.5848, "step": 19392 }, { "epoch": 0.5943668015201667, "grad_norm": 0.7421363855234615, "learning_rate": 3.7288908740504337e-06, "loss": 0.4169, "step": 19393 }, { "epoch": 0.5943974500429079, "grad_norm": 0.7534641313524038, "learning_rate": 3.7284108685325853e-06, "loss": 0.4282, "step": 19394 }, { "epoch": 0.5944280985656492, "grad_norm": 1.8385580861410649, "learning_rate": 3.727930875544029e-06, "loss": 0.6634, "step": 19395 }, { "epoch": 0.5944587470883903, "grad_norm": 1.8041364767526773, "learning_rate": 3.727450895089497e-06, "loss": 0.513, "step": 19396 }, { "epoch": 0.5944893956111316, "grad_norm": 0.7782866442961898, "learning_rate": 3.726970927173717e-06, "loss": 0.4228, "step": 19397 }, { "epoch": 0.5945200441338727, "grad_norm": 1.782413210321628, "learning_rate": 3.7264909718014153e-06, "loss": 0.57, "step": 19398 }, { "epoch": 0.594550692656614, "grad_norm": 1.8377092204227632, "learning_rate": 3.726011028977327e-06, "loss": 0.6644, "step": 19399 }, { "epoch": 0.5945813411793551, "grad_norm": 1.9534148909677505, "learning_rate": 3.725531098706175e-06, "loss": 0.5995, "step": 19400 }, { "epoch": 0.5946119897020964, "grad_norm": 1.8891419536417584, "learning_rate": 3.7250511809926943e-06, "loss": 0.5725, "step": 19401 }, { "epoch": 0.5946426382248375, "grad_norm": 1.6717008204412636, "learning_rate": 3.72457127584161e-06, "loss": 0.6331, "step": 19402 }, { "epoch": 0.5946732867475788, "grad_norm": 1.7667644315208826, "learning_rate": 3.724091383257649e-06, "loss": 0.6444, "step": 19403 }, { "epoch": 0.59470393527032, "grad_norm": 2.1373672617124595, "learning_rate": 3.723611503245544e-06, "loss": 0.6007, "step": 19404 }, { "epoch": 0.5947345837930612, "grad_norm": 1.7051149416163498, "learning_rate": 3.723131635810021e-06, "loss": 0.5272, "step": 19405 }, { "epoch": 0.5947652323158024, "grad_norm": 1.6218339976575555, "learning_rate": 3.7226517809558084e-06, "loss": 0.544, "step": 19406 }, { "epoch": 0.5947958808385436, "grad_norm": 1.5292079220526238, "learning_rate": 3.7221719386876342e-06, "loss": 0.602, "step": 19407 }, { "epoch": 0.5948265293612848, "grad_norm": 1.664706140565794, "learning_rate": 3.721692109010227e-06, "loss": 0.5765, "step": 19408 }, { "epoch": 0.594857177884026, "grad_norm": 1.526975526933511, "learning_rate": 3.7212122919283158e-06, "loss": 0.459, "step": 19409 }, { "epoch": 0.5948878264067672, "grad_norm": 3.6418643585220027, "learning_rate": 3.7207324874466274e-06, "loss": 0.6214, "step": 19410 }, { "epoch": 0.5949184749295084, "grad_norm": 1.6686931902659647, "learning_rate": 3.720252695569887e-06, "loss": 0.5282, "step": 19411 }, { "epoch": 0.5949491234522496, "grad_norm": 1.7042865623880097, "learning_rate": 3.7197729163028252e-06, "loss": 0.6308, "step": 19412 }, { "epoch": 0.5949797719749909, "grad_norm": 1.917769646979922, "learning_rate": 3.7192931496501687e-06, "loss": 0.5404, "step": 19413 }, { "epoch": 0.595010420497732, "grad_norm": 0.8122155017455199, "learning_rate": 3.718813395616644e-06, "loss": 0.4283, "step": 19414 }, { "epoch": 0.5950410690204733, "grad_norm": 1.7046604774520293, "learning_rate": 3.7183336542069792e-06, "loss": 0.5771, "step": 19415 }, { "epoch": 0.5950717175432144, "grad_norm": 1.7658111283686082, "learning_rate": 3.7178539254258992e-06, "loss": 0.617, "step": 19416 }, { "epoch": 0.5951023660659556, "grad_norm": 1.7874528154857434, "learning_rate": 3.7173742092781344e-06, "loss": 0.5829, "step": 19417 }, { "epoch": 0.5951330145886968, "grad_norm": 0.8112184925374429, "learning_rate": 3.7168945057684103e-06, "loss": 0.4552, "step": 19418 }, { "epoch": 0.595163663111438, "grad_norm": 1.9038215708262005, "learning_rate": 3.71641481490145e-06, "loss": 0.6643, "step": 19419 }, { "epoch": 0.5951943116341792, "grad_norm": 1.5043965325069202, "learning_rate": 3.7159351366819863e-06, "loss": 0.5182, "step": 19420 }, { "epoch": 0.5952249601569204, "grad_norm": 1.6273479114148375, "learning_rate": 3.7154554711147405e-06, "loss": 0.7179, "step": 19421 }, { "epoch": 0.5952556086796617, "grad_norm": 1.7862785444070457, "learning_rate": 3.7149758182044405e-06, "loss": 0.6406, "step": 19422 }, { "epoch": 0.5952862572024028, "grad_norm": 1.6721178996194845, "learning_rate": 3.714496177955813e-06, "loss": 0.6202, "step": 19423 }, { "epoch": 0.5953169057251441, "grad_norm": 1.768901043089718, "learning_rate": 3.7140165503735835e-06, "loss": 0.6349, "step": 19424 }, { "epoch": 0.5953475542478852, "grad_norm": 1.5628105777610821, "learning_rate": 3.7135369354624774e-06, "loss": 0.6224, "step": 19425 }, { "epoch": 0.5953782027706265, "grad_norm": 1.8544160331942359, "learning_rate": 3.713057333227222e-06, "loss": 0.6862, "step": 19426 }, { "epoch": 0.5954088512933676, "grad_norm": 1.7442491992121831, "learning_rate": 3.712577743672541e-06, "loss": 0.5895, "step": 19427 }, { "epoch": 0.5954394998161089, "grad_norm": 1.4886151455266854, "learning_rate": 3.7120981668031608e-06, "loss": 0.5852, "step": 19428 }, { "epoch": 0.59547014833885, "grad_norm": 2.0302363302089517, "learning_rate": 3.7116186026238094e-06, "loss": 0.7409, "step": 19429 }, { "epoch": 0.5955007968615913, "grad_norm": 1.9806887551572343, "learning_rate": 3.7111390511392054e-06, "loss": 0.6978, "step": 19430 }, { "epoch": 0.5955314453843324, "grad_norm": 0.8456187834786735, "learning_rate": 3.7106595123540818e-06, "loss": 0.4278, "step": 19431 }, { "epoch": 0.5955620939070737, "grad_norm": 1.786422802779214, "learning_rate": 3.710179986273159e-06, "loss": 0.6404, "step": 19432 }, { "epoch": 0.5955927424298149, "grad_norm": 1.7013507155696659, "learning_rate": 3.709700472901161e-06, "loss": 0.691, "step": 19433 }, { "epoch": 0.5956233909525561, "grad_norm": 1.678471159621965, "learning_rate": 3.7092209722428162e-06, "loss": 0.5585, "step": 19434 }, { "epoch": 0.5956540394752973, "grad_norm": 1.691674893549127, "learning_rate": 3.708741484302846e-06, "loss": 0.5621, "step": 19435 }, { "epoch": 0.5956846879980385, "grad_norm": 1.608581008974358, "learning_rate": 3.708262009085978e-06, "loss": 0.6564, "step": 19436 }, { "epoch": 0.5957153365207797, "grad_norm": 1.7425218492965013, "learning_rate": 3.707782546596934e-06, "loss": 0.594, "step": 19437 }, { "epoch": 0.5957459850435209, "grad_norm": 1.9060232474706262, "learning_rate": 3.7073030968404382e-06, "loss": 0.6147, "step": 19438 }, { "epoch": 0.5957766335662621, "grad_norm": 1.866349455404348, "learning_rate": 3.706823659821217e-06, "loss": 0.5815, "step": 19439 }, { "epoch": 0.5958072820890034, "grad_norm": 1.7783095765201185, "learning_rate": 3.706344235543995e-06, "loss": 0.6041, "step": 19440 }, { "epoch": 0.5958379306117445, "grad_norm": 1.8820697689478365, "learning_rate": 3.7058648240134897e-06, "loss": 0.6807, "step": 19441 }, { "epoch": 0.5958685791344858, "grad_norm": 1.554464412829625, "learning_rate": 3.7053854252344334e-06, "loss": 0.6018, "step": 19442 }, { "epoch": 0.5958992276572269, "grad_norm": 0.8592748345779473, "learning_rate": 3.7049060392115425e-06, "loss": 0.4232, "step": 19443 }, { "epoch": 0.5959298761799682, "grad_norm": 1.780573174656462, "learning_rate": 3.704426665949547e-06, "loss": 0.6131, "step": 19444 }, { "epoch": 0.5959605247027093, "grad_norm": 1.7819848369288476, "learning_rate": 3.7039473054531662e-06, "loss": 0.653, "step": 19445 }, { "epoch": 0.5959911732254506, "grad_norm": 1.7601011531592523, "learning_rate": 3.7034679577271226e-06, "loss": 0.6437, "step": 19446 }, { "epoch": 0.5960218217481917, "grad_norm": 2.0691381707414545, "learning_rate": 3.7029886227761426e-06, "loss": 0.6869, "step": 19447 }, { "epoch": 0.5960524702709329, "grad_norm": 1.750810475421135, "learning_rate": 3.7025093006049467e-06, "loss": 0.6138, "step": 19448 }, { "epoch": 0.5960831187936741, "grad_norm": 0.7591748281596538, "learning_rate": 3.702029991218258e-06, "loss": 0.4227, "step": 19449 }, { "epoch": 0.5961137673164153, "grad_norm": 1.6783361576481786, "learning_rate": 3.7015506946208014e-06, "loss": 0.6522, "step": 19450 }, { "epoch": 0.5961444158391566, "grad_norm": 1.6555305893112175, "learning_rate": 3.7010714108172986e-06, "loss": 0.5764, "step": 19451 }, { "epoch": 0.5961750643618977, "grad_norm": 1.812623005159799, "learning_rate": 3.7005921398124682e-06, "loss": 0.5665, "step": 19452 }, { "epoch": 0.596205712884639, "grad_norm": 1.9565392345865873, "learning_rate": 3.70011288161104e-06, "loss": 0.6412, "step": 19453 }, { "epoch": 0.5962363614073801, "grad_norm": 1.8244872584997862, "learning_rate": 3.6996336362177274e-06, "loss": 0.5913, "step": 19454 }, { "epoch": 0.5962670099301214, "grad_norm": 1.8031919360861384, "learning_rate": 3.699154403637262e-06, "loss": 0.5958, "step": 19455 }, { "epoch": 0.5962976584528625, "grad_norm": 0.7796911206946123, "learning_rate": 3.6986751838743596e-06, "loss": 0.4248, "step": 19456 }, { "epoch": 0.5963283069756038, "grad_norm": 1.6012192262491383, "learning_rate": 3.6981959769337423e-06, "loss": 0.5907, "step": 19457 }, { "epoch": 0.596358955498345, "grad_norm": 1.6181999916742387, "learning_rate": 3.6977167828201344e-06, "loss": 0.581, "step": 19458 }, { "epoch": 0.5963896040210862, "grad_norm": 1.6384525236384193, "learning_rate": 3.6972376015382563e-06, "loss": 0.5699, "step": 19459 }, { "epoch": 0.5964202525438274, "grad_norm": 1.705114825414781, "learning_rate": 3.6967584330928274e-06, "loss": 0.6113, "step": 19460 }, { "epoch": 0.5964509010665686, "grad_norm": 2.7065251558441505, "learning_rate": 3.6962792774885735e-06, "loss": 0.6705, "step": 19461 }, { "epoch": 0.5964815495893098, "grad_norm": 2.0790663195874197, "learning_rate": 3.695800134730212e-06, "loss": 0.7524, "step": 19462 }, { "epoch": 0.596512198112051, "grad_norm": 1.8523508776729654, "learning_rate": 3.695321004822467e-06, "loss": 0.5393, "step": 19463 }, { "epoch": 0.5965428466347922, "grad_norm": 0.8131816324943953, "learning_rate": 3.6948418877700577e-06, "loss": 0.4334, "step": 19464 }, { "epoch": 0.5965734951575334, "grad_norm": 1.798908743559139, "learning_rate": 3.6943627835777035e-06, "loss": 0.5658, "step": 19465 }, { "epoch": 0.5966041436802746, "grad_norm": 1.9577740113800044, "learning_rate": 3.693883692250128e-06, "loss": 0.6452, "step": 19466 }, { "epoch": 0.5966347922030159, "grad_norm": 1.719359391815523, "learning_rate": 3.693404613792051e-06, "loss": 0.6013, "step": 19467 }, { "epoch": 0.596665440725757, "grad_norm": 1.5373970892412774, "learning_rate": 3.6929255482081916e-06, "loss": 0.5817, "step": 19468 }, { "epoch": 0.5966960892484983, "grad_norm": 1.6355994613514018, "learning_rate": 3.692446495503272e-06, "loss": 0.6184, "step": 19469 }, { "epoch": 0.5967267377712394, "grad_norm": 1.7191938464664684, "learning_rate": 3.6919674556820108e-06, "loss": 0.6628, "step": 19470 }, { "epoch": 0.5967573862939807, "grad_norm": 0.7787966082564265, "learning_rate": 3.691488428749129e-06, "loss": 0.4364, "step": 19471 }, { "epoch": 0.5967880348167218, "grad_norm": 1.7650690050957292, "learning_rate": 3.691009414709349e-06, "loss": 0.5687, "step": 19472 }, { "epoch": 0.5968186833394631, "grad_norm": 2.0574091791621028, "learning_rate": 3.6905304135673848e-06, "loss": 0.5681, "step": 19473 }, { "epoch": 0.5968493318622042, "grad_norm": 1.7431562246944232, "learning_rate": 3.6900514253279618e-06, "loss": 0.5635, "step": 19474 }, { "epoch": 0.5968799803849455, "grad_norm": 0.7645495283279021, "learning_rate": 3.689572449995797e-06, "loss": 0.4248, "step": 19475 }, { "epoch": 0.5969106289076866, "grad_norm": 1.7629831014337312, "learning_rate": 3.6890934875756086e-06, "loss": 0.6959, "step": 19476 }, { "epoch": 0.5969412774304279, "grad_norm": 1.6548735730094917, "learning_rate": 3.6886145380721182e-06, "loss": 0.6198, "step": 19477 }, { "epoch": 0.5969719259531691, "grad_norm": 2.031714703645662, "learning_rate": 3.6881356014900447e-06, "loss": 0.5967, "step": 19478 }, { "epoch": 0.5970025744759102, "grad_norm": 1.7785835359824331, "learning_rate": 3.687656677834106e-06, "loss": 0.5911, "step": 19479 }, { "epoch": 0.5970332229986515, "grad_norm": 1.7859733427552267, "learning_rate": 3.6871777671090233e-06, "loss": 0.6085, "step": 19480 }, { "epoch": 0.5970638715213926, "grad_norm": 1.682236366499462, "learning_rate": 3.686698869319512e-06, "loss": 0.6346, "step": 19481 }, { "epoch": 0.5970945200441339, "grad_norm": 1.755455281262762, "learning_rate": 3.686219984470294e-06, "loss": 0.6038, "step": 19482 }, { "epoch": 0.597125168566875, "grad_norm": 1.7076185970640183, "learning_rate": 3.685741112566088e-06, "loss": 0.6226, "step": 19483 }, { "epoch": 0.5971558170896163, "grad_norm": 1.85626289685358, "learning_rate": 3.6852622536116076e-06, "loss": 0.6722, "step": 19484 }, { "epoch": 0.5971864656123574, "grad_norm": 1.652448764447401, "learning_rate": 3.684783407611578e-06, "loss": 0.5859, "step": 19485 }, { "epoch": 0.5972171141350987, "grad_norm": 0.8036232222378359, "learning_rate": 3.684304574570713e-06, "loss": 0.4379, "step": 19486 }, { "epoch": 0.5972477626578399, "grad_norm": 1.9756476982062676, "learning_rate": 3.6838257544937307e-06, "loss": 0.6608, "step": 19487 }, { "epoch": 0.5972784111805811, "grad_norm": 1.8487697617258985, "learning_rate": 3.68334694738535e-06, "loss": 0.539, "step": 19488 }, { "epoch": 0.5973090597033223, "grad_norm": 1.8364078521192653, "learning_rate": 3.6828681532502884e-06, "loss": 0.651, "step": 19489 }, { "epoch": 0.5973397082260635, "grad_norm": 1.9982732484609511, "learning_rate": 3.6823893720932656e-06, "loss": 0.6475, "step": 19490 }, { "epoch": 0.5973703567488047, "grad_norm": 1.5012925291723804, "learning_rate": 3.6819106039189967e-06, "loss": 0.691, "step": 19491 }, { "epoch": 0.5974010052715459, "grad_norm": 1.7004961905727545, "learning_rate": 3.681431848732199e-06, "loss": 0.6925, "step": 19492 }, { "epoch": 0.5974316537942871, "grad_norm": 1.7660467958561292, "learning_rate": 3.6809531065375914e-06, "loss": 0.6418, "step": 19493 }, { "epoch": 0.5974623023170283, "grad_norm": 1.8235592410006878, "learning_rate": 3.680474377339892e-06, "loss": 0.5917, "step": 19494 }, { "epoch": 0.5974929508397695, "grad_norm": 1.8350982413131562, "learning_rate": 3.6799956611438124e-06, "loss": 0.7205, "step": 19495 }, { "epoch": 0.5975235993625108, "grad_norm": 1.7757063558042767, "learning_rate": 3.679516957954077e-06, "loss": 0.6261, "step": 19496 }, { "epoch": 0.5975542478852519, "grad_norm": 1.8305970836882453, "learning_rate": 3.6790382677753954e-06, "loss": 0.6385, "step": 19497 }, { "epoch": 0.5975848964079932, "grad_norm": 1.888675426570334, "learning_rate": 3.6785595906124903e-06, "loss": 0.6577, "step": 19498 }, { "epoch": 0.5976155449307343, "grad_norm": 1.9651496822431553, "learning_rate": 3.678080926470076e-06, "loss": 0.5417, "step": 19499 }, { "epoch": 0.5976461934534756, "grad_norm": 2.081351126106748, "learning_rate": 3.6776022753528664e-06, "loss": 0.5983, "step": 19500 }, { "epoch": 0.5976768419762167, "grad_norm": 2.1327691694528617, "learning_rate": 3.6771236372655817e-06, "loss": 0.6166, "step": 19501 }, { "epoch": 0.597707490498958, "grad_norm": 1.7464270030924123, "learning_rate": 3.6766450122129355e-06, "loss": 0.5303, "step": 19502 }, { "epoch": 0.5977381390216991, "grad_norm": 1.703364475638287, "learning_rate": 3.6761664001996437e-06, "loss": 0.5448, "step": 19503 }, { "epoch": 0.5977687875444404, "grad_norm": 1.8072411762760532, "learning_rate": 3.6756878012304242e-06, "loss": 0.5971, "step": 19504 }, { "epoch": 0.5977994360671816, "grad_norm": 0.7707429145968181, "learning_rate": 3.6752092153099934e-06, "loss": 0.4124, "step": 19505 }, { "epoch": 0.5978300845899228, "grad_norm": 0.8030316801898522, "learning_rate": 3.674730642443061e-06, "loss": 0.4238, "step": 19506 }, { "epoch": 0.597860733112664, "grad_norm": 1.8446976492099925, "learning_rate": 3.6742520826343508e-06, "loss": 0.6729, "step": 19507 }, { "epoch": 0.5978913816354052, "grad_norm": 1.8287484965411216, "learning_rate": 3.673773535888571e-06, "loss": 0.6221, "step": 19508 }, { "epoch": 0.5979220301581464, "grad_norm": 1.72265603521505, "learning_rate": 3.673295002210442e-06, "loss": 0.5973, "step": 19509 }, { "epoch": 0.5979526786808875, "grad_norm": 1.6913610371938328, "learning_rate": 3.672816481604676e-06, "loss": 0.6202, "step": 19510 }, { "epoch": 0.5979833272036288, "grad_norm": 1.7042751093344557, "learning_rate": 3.672337974075988e-06, "loss": 0.658, "step": 19511 }, { "epoch": 0.5980139757263699, "grad_norm": 1.848400053308879, "learning_rate": 3.671859479629094e-06, "loss": 0.7211, "step": 19512 }, { "epoch": 0.5980446242491112, "grad_norm": 1.809497881390554, "learning_rate": 3.671380998268709e-06, "loss": 0.5527, "step": 19513 }, { "epoch": 0.5980752727718524, "grad_norm": 1.6269701761245723, "learning_rate": 3.670902529999546e-06, "loss": 0.575, "step": 19514 }, { "epoch": 0.5981059212945936, "grad_norm": 1.657278319256192, "learning_rate": 3.670424074826322e-06, "loss": 0.5736, "step": 19515 }, { "epoch": 0.5981365698173348, "grad_norm": 1.674043187194475, "learning_rate": 3.6699456327537477e-06, "loss": 0.6048, "step": 19516 }, { "epoch": 0.598167218340076, "grad_norm": 1.7216000568116996, "learning_rate": 3.6694672037865416e-06, "loss": 0.5772, "step": 19517 }, { "epoch": 0.5981978668628172, "grad_norm": 1.648134259936896, "learning_rate": 3.6689887879294146e-06, "loss": 0.5282, "step": 19518 }, { "epoch": 0.5982285153855584, "grad_norm": 1.4602952413313712, "learning_rate": 3.6685103851870808e-06, "loss": 0.5335, "step": 19519 }, { "epoch": 0.5982591639082996, "grad_norm": 0.8055976255317008, "learning_rate": 3.6680319955642556e-06, "loss": 0.4194, "step": 19520 }, { "epoch": 0.5982898124310408, "grad_norm": 1.7025371420095752, "learning_rate": 3.6675536190656525e-06, "loss": 0.6444, "step": 19521 }, { "epoch": 0.598320460953782, "grad_norm": 1.568913491301863, "learning_rate": 3.6670752556959834e-06, "loss": 0.5754, "step": 19522 }, { "epoch": 0.5983511094765233, "grad_norm": 0.866505143457592, "learning_rate": 3.6665969054599633e-06, "loss": 0.4527, "step": 19523 }, { "epoch": 0.5983817579992644, "grad_norm": 1.6450686614334529, "learning_rate": 3.6661185683623047e-06, "loss": 0.5588, "step": 19524 }, { "epoch": 0.5984124065220057, "grad_norm": 2.08273846447409, "learning_rate": 3.665640244407721e-06, "loss": 0.5703, "step": 19525 }, { "epoch": 0.5984430550447468, "grad_norm": 2.122869203096768, "learning_rate": 3.6651619336009275e-06, "loss": 0.6601, "step": 19526 }, { "epoch": 0.5984737035674881, "grad_norm": 2.0897015468518805, "learning_rate": 3.664683635946632e-06, "loss": 0.6319, "step": 19527 }, { "epoch": 0.5985043520902292, "grad_norm": 1.6405153463719035, "learning_rate": 3.664205351449553e-06, "loss": 0.5985, "step": 19528 }, { "epoch": 0.5985350006129705, "grad_norm": 0.787423106736258, "learning_rate": 3.663727080114399e-06, "loss": 0.428, "step": 19529 }, { "epoch": 0.5985656491357116, "grad_norm": 0.8079050994611153, "learning_rate": 3.663248821945884e-06, "loss": 0.4113, "step": 19530 }, { "epoch": 0.5985962976584529, "grad_norm": 0.7840423177979775, "learning_rate": 3.6627705769487204e-06, "loss": 0.4408, "step": 19531 }, { "epoch": 0.598626946181194, "grad_norm": 1.8143258786794134, "learning_rate": 3.662292345127621e-06, "loss": 0.5769, "step": 19532 }, { "epoch": 0.5986575947039353, "grad_norm": 1.9005843264111688, "learning_rate": 3.6618141264872964e-06, "loss": 0.6577, "step": 19533 }, { "epoch": 0.5986882432266765, "grad_norm": 1.6455231698893489, "learning_rate": 3.6613359210324606e-06, "loss": 0.6487, "step": 19534 }, { "epoch": 0.5987188917494177, "grad_norm": 1.7409078850974344, "learning_rate": 3.6608577287678226e-06, "loss": 0.7314, "step": 19535 }, { "epoch": 0.5987495402721589, "grad_norm": 1.6828612388117734, "learning_rate": 3.6603795496980983e-06, "loss": 0.6, "step": 19536 }, { "epoch": 0.5987801887949001, "grad_norm": 1.8354334327660637, "learning_rate": 3.6599013838279975e-06, "loss": 0.6134, "step": 19537 }, { "epoch": 0.5988108373176413, "grad_norm": 1.7998182289757478, "learning_rate": 3.659423231162228e-06, "loss": 0.5779, "step": 19538 }, { "epoch": 0.5988414858403825, "grad_norm": 1.7108049856074548, "learning_rate": 3.658945091705508e-06, "loss": 0.6352, "step": 19539 }, { "epoch": 0.5988721343631237, "grad_norm": 1.874345958673576, "learning_rate": 3.6584669654625436e-06, "loss": 0.5475, "step": 19540 }, { "epoch": 0.5989027828858648, "grad_norm": 1.6458485876274462, "learning_rate": 3.657988852438047e-06, "loss": 0.642, "step": 19541 }, { "epoch": 0.5989334314086061, "grad_norm": 1.773679040678756, "learning_rate": 3.6575107526367297e-06, "loss": 0.5773, "step": 19542 }, { "epoch": 0.5989640799313473, "grad_norm": 0.8289585431166243, "learning_rate": 3.657032666063302e-06, "loss": 0.4247, "step": 19543 }, { "epoch": 0.5989947284540885, "grad_norm": 1.5712652485521228, "learning_rate": 3.6565545927224762e-06, "loss": 0.5849, "step": 19544 }, { "epoch": 0.5990253769768297, "grad_norm": 1.6372123182695812, "learning_rate": 3.6560765326189617e-06, "loss": 0.5286, "step": 19545 }, { "epoch": 0.5990560254995709, "grad_norm": 2.0167437906520784, "learning_rate": 3.655598485757468e-06, "loss": 0.6891, "step": 19546 }, { "epoch": 0.5990866740223121, "grad_norm": 1.9298396983667276, "learning_rate": 3.655120452142707e-06, "loss": 0.7128, "step": 19547 }, { "epoch": 0.5991173225450533, "grad_norm": 1.6270885270333937, "learning_rate": 3.6546424317793893e-06, "loss": 0.5847, "step": 19548 }, { "epoch": 0.5991479710677945, "grad_norm": 1.8099144535813796, "learning_rate": 3.6541644246722212e-06, "loss": 0.5636, "step": 19549 }, { "epoch": 0.5991786195905358, "grad_norm": 0.7960160791393857, "learning_rate": 3.653686430825919e-06, "loss": 0.419, "step": 19550 }, { "epoch": 0.5992092681132769, "grad_norm": 1.7773349340405926, "learning_rate": 3.653208450245187e-06, "loss": 0.5771, "step": 19551 }, { "epoch": 0.5992399166360182, "grad_norm": 1.643455862697796, "learning_rate": 3.6527304829347356e-06, "loss": 0.5729, "step": 19552 }, { "epoch": 0.5992705651587593, "grad_norm": 1.7547746432047413, "learning_rate": 3.652252528899277e-06, "loss": 0.6167, "step": 19553 }, { "epoch": 0.5993012136815006, "grad_norm": 0.7899822226494637, "learning_rate": 3.651774588143518e-06, "loss": 0.4254, "step": 19554 }, { "epoch": 0.5993318622042417, "grad_norm": 1.6177604018516254, "learning_rate": 3.65129666067217e-06, "loss": 0.5882, "step": 19555 }, { "epoch": 0.599362510726983, "grad_norm": 0.7955909059518642, "learning_rate": 3.6508187464899402e-06, "loss": 0.4195, "step": 19556 }, { "epoch": 0.5993931592497241, "grad_norm": 0.7746190086632813, "learning_rate": 3.650340845601539e-06, "loss": 0.4099, "step": 19557 }, { "epoch": 0.5994238077724654, "grad_norm": 1.8510923429234203, "learning_rate": 3.6498629580116747e-06, "loss": 0.535, "step": 19558 }, { "epoch": 0.5994544562952066, "grad_norm": 2.1670440649403364, "learning_rate": 3.6493850837250576e-06, "loss": 0.698, "step": 19559 }, { "epoch": 0.5994851048179478, "grad_norm": 1.684799371402369, "learning_rate": 3.6489072227463924e-06, "loss": 0.6665, "step": 19560 }, { "epoch": 0.599515753340689, "grad_norm": 1.7402967381373031, "learning_rate": 3.648429375080391e-06, "loss": 0.5999, "step": 19561 }, { "epoch": 0.5995464018634302, "grad_norm": 1.6810453332560324, "learning_rate": 3.6479515407317603e-06, "loss": 0.598, "step": 19562 }, { "epoch": 0.5995770503861714, "grad_norm": 1.7666280828258512, "learning_rate": 3.6474737197052094e-06, "loss": 0.5282, "step": 19563 }, { "epoch": 0.5996076989089126, "grad_norm": 1.847198567928944, "learning_rate": 3.6469959120054464e-06, "loss": 0.6382, "step": 19564 }, { "epoch": 0.5996383474316538, "grad_norm": 1.8570566622671876, "learning_rate": 3.6465181176371777e-06, "loss": 0.5628, "step": 19565 }, { "epoch": 0.599668995954395, "grad_norm": 1.7669637801064924, "learning_rate": 3.6460403366051132e-06, "loss": 0.6085, "step": 19566 }, { "epoch": 0.5996996444771362, "grad_norm": 0.7956207509739166, "learning_rate": 3.6455625689139617e-06, "loss": 0.3935, "step": 19567 }, { "epoch": 0.5997302929998775, "grad_norm": 1.90469501010854, "learning_rate": 3.645084814568425e-06, "loss": 0.5622, "step": 19568 }, { "epoch": 0.5997609415226186, "grad_norm": 1.6617421115628106, "learning_rate": 3.6446070735732168e-06, "loss": 0.6589, "step": 19569 }, { "epoch": 0.5997915900453599, "grad_norm": 0.7711721112679057, "learning_rate": 3.6441293459330394e-06, "loss": 0.4355, "step": 19570 }, { "epoch": 0.599822238568101, "grad_norm": 1.8308298511261067, "learning_rate": 3.6436516316526054e-06, "loss": 0.6367, "step": 19571 }, { "epoch": 0.5998528870908422, "grad_norm": 2.076717111360441, "learning_rate": 3.643173930736618e-06, "loss": 0.5904, "step": 19572 }, { "epoch": 0.5998835356135834, "grad_norm": 1.7614544766094955, "learning_rate": 3.642696243189784e-06, "loss": 0.731, "step": 19573 }, { "epoch": 0.5999141841363246, "grad_norm": 0.8084588934913072, "learning_rate": 3.6422185690168123e-06, "loss": 0.4502, "step": 19574 }, { "epoch": 0.5999448326590658, "grad_norm": 0.7787413366839814, "learning_rate": 3.641740908222408e-06, "loss": 0.4248, "step": 19575 }, { "epoch": 0.599975481181807, "grad_norm": 1.8799478307629969, "learning_rate": 3.6412632608112775e-06, "loss": 0.6834, "step": 19576 }, { "epoch": 0.6000061297045483, "grad_norm": 1.9182762048143351, "learning_rate": 3.6407856267881283e-06, "loss": 0.7176, "step": 19577 }, { "epoch": 0.6000367782272894, "grad_norm": 1.5861728394920016, "learning_rate": 3.6403080061576677e-06, "loss": 0.5633, "step": 19578 }, { "epoch": 0.6000674267500307, "grad_norm": 1.7955576475153243, "learning_rate": 3.6398303989245964e-06, "loss": 0.5766, "step": 19579 }, { "epoch": 0.6000980752727718, "grad_norm": 1.7405957470339597, "learning_rate": 3.6393528050936277e-06, "loss": 0.629, "step": 19580 }, { "epoch": 0.6001287237955131, "grad_norm": 1.7495656882071862, "learning_rate": 3.6388752246694613e-06, "loss": 0.632, "step": 19581 }, { "epoch": 0.6001593723182542, "grad_norm": 1.7320220172245469, "learning_rate": 3.638397657656808e-06, "loss": 0.664, "step": 19582 }, { "epoch": 0.6001900208409955, "grad_norm": 1.845071322786074, "learning_rate": 3.63792010406037e-06, "loss": 0.6804, "step": 19583 }, { "epoch": 0.6002206693637366, "grad_norm": 1.542460954685355, "learning_rate": 3.637442563884853e-06, "loss": 0.6021, "step": 19584 }, { "epoch": 0.6002513178864779, "grad_norm": 1.5805575069441862, "learning_rate": 3.636965037134964e-06, "loss": 0.6409, "step": 19585 }, { "epoch": 0.600281966409219, "grad_norm": 1.6942751858752925, "learning_rate": 3.6364875238154073e-06, "loss": 0.559, "step": 19586 }, { "epoch": 0.6003126149319603, "grad_norm": 1.9480557648325953, "learning_rate": 3.6360100239308867e-06, "loss": 0.6376, "step": 19587 }, { "epoch": 0.6003432634547015, "grad_norm": 0.8248888488504038, "learning_rate": 3.6355325374861096e-06, "loss": 0.4198, "step": 19588 }, { "epoch": 0.6003739119774427, "grad_norm": 1.5479326595644376, "learning_rate": 3.635055064485778e-06, "loss": 0.5535, "step": 19589 }, { "epoch": 0.6004045605001839, "grad_norm": 1.7944846340659393, "learning_rate": 3.634577604934599e-06, "loss": 0.627, "step": 19590 }, { "epoch": 0.6004352090229251, "grad_norm": 1.673833495118982, "learning_rate": 3.634100158837278e-06, "loss": 0.6868, "step": 19591 }, { "epoch": 0.6004658575456663, "grad_norm": 1.9069040566285722, "learning_rate": 3.633622726198514e-06, "loss": 0.5816, "step": 19592 }, { "epoch": 0.6004965060684075, "grad_norm": 1.7034467134265787, "learning_rate": 3.6331453070230182e-06, "loss": 0.6299, "step": 19593 }, { "epoch": 0.6005271545911487, "grad_norm": 1.7457711806866332, "learning_rate": 3.6326679013154904e-06, "loss": 0.5723, "step": 19594 }, { "epoch": 0.60055780311389, "grad_norm": 1.6936568626482589, "learning_rate": 3.632190509080634e-06, "loss": 0.6294, "step": 19595 }, { "epoch": 0.6005884516366311, "grad_norm": 2.075467545609608, "learning_rate": 3.631713130323157e-06, "loss": 0.6153, "step": 19596 }, { "epoch": 0.6006191001593724, "grad_norm": 1.9258828368410097, "learning_rate": 3.631235765047758e-06, "loss": 0.5651, "step": 19597 }, { "epoch": 0.6006497486821135, "grad_norm": 1.772358063776085, "learning_rate": 3.6307584132591445e-06, "loss": 0.6663, "step": 19598 }, { "epoch": 0.6006803972048548, "grad_norm": 1.653186333610547, "learning_rate": 3.6302810749620193e-06, "loss": 0.5338, "step": 19599 }, { "epoch": 0.6007110457275959, "grad_norm": 1.6821665753475412, "learning_rate": 3.629803750161084e-06, "loss": 0.5992, "step": 19600 }, { "epoch": 0.6007416942503372, "grad_norm": 1.8744956004829811, "learning_rate": 3.629326438861044e-06, "loss": 0.644, "step": 19601 }, { "epoch": 0.6007723427730783, "grad_norm": 1.9252209324384226, "learning_rate": 3.6288491410666015e-06, "loss": 0.5404, "step": 19602 }, { "epoch": 0.6008029912958195, "grad_norm": 1.6600119357200644, "learning_rate": 3.6283718567824575e-06, "loss": 0.5524, "step": 19603 }, { "epoch": 0.6008336398185607, "grad_norm": 1.7306373537803077, "learning_rate": 3.6278945860133184e-06, "loss": 0.5992, "step": 19604 }, { "epoch": 0.6008642883413019, "grad_norm": 1.8638531193386838, "learning_rate": 3.6274173287638848e-06, "loss": 0.5868, "step": 19605 }, { "epoch": 0.6008949368640432, "grad_norm": 0.7815330151059654, "learning_rate": 3.626940085038858e-06, "loss": 0.4285, "step": 19606 }, { "epoch": 0.6009255853867843, "grad_norm": 1.9563521674731847, "learning_rate": 3.6264628548429427e-06, "loss": 0.6732, "step": 19607 }, { "epoch": 0.6009562339095256, "grad_norm": 1.7027522000747135, "learning_rate": 3.62598563818084e-06, "loss": 0.5899, "step": 19608 }, { "epoch": 0.6009868824322667, "grad_norm": 0.8056973329134985, "learning_rate": 3.6255084350572523e-06, "loss": 0.417, "step": 19609 }, { "epoch": 0.601017530955008, "grad_norm": 1.671414510477594, "learning_rate": 3.6250312454768827e-06, "loss": 0.5602, "step": 19610 }, { "epoch": 0.6010481794777491, "grad_norm": 1.9522190379584807, "learning_rate": 3.6245540694444303e-06, "loss": 0.5891, "step": 19611 }, { "epoch": 0.6010788280004904, "grad_norm": 0.7714396465408897, "learning_rate": 3.6240769069646016e-06, "loss": 0.4448, "step": 19612 }, { "epoch": 0.6011094765232315, "grad_norm": 1.9956993302497696, "learning_rate": 3.6235997580420934e-06, "loss": 0.6545, "step": 19613 }, { "epoch": 0.6011401250459728, "grad_norm": 1.960920080377245, "learning_rate": 3.623122622681608e-06, "loss": 0.6317, "step": 19614 }, { "epoch": 0.601170773568714, "grad_norm": 1.569374170755168, "learning_rate": 3.6226455008878486e-06, "loss": 0.5754, "step": 19615 }, { "epoch": 0.6012014220914552, "grad_norm": 2.0023121022055625, "learning_rate": 3.622168392665515e-06, "loss": 0.5783, "step": 19616 }, { "epoch": 0.6012320706141964, "grad_norm": 1.7341329274634214, "learning_rate": 3.6216912980193094e-06, "loss": 0.6756, "step": 19617 }, { "epoch": 0.6012627191369376, "grad_norm": 1.7997340053685082, "learning_rate": 3.621214216953932e-06, "loss": 0.7087, "step": 19618 }, { "epoch": 0.6012933676596788, "grad_norm": 1.4485490513342782, "learning_rate": 3.620737149474083e-06, "loss": 0.6256, "step": 19619 }, { "epoch": 0.60132401618242, "grad_norm": 1.8734005426140026, "learning_rate": 3.6202600955844642e-06, "loss": 0.6498, "step": 19620 }, { "epoch": 0.6013546647051612, "grad_norm": 2.0867180354741897, "learning_rate": 3.6197830552897773e-06, "loss": 0.5768, "step": 19621 }, { "epoch": 0.6013853132279025, "grad_norm": 2.009549733940646, "learning_rate": 3.619306028594718e-06, "loss": 0.632, "step": 19622 }, { "epoch": 0.6014159617506436, "grad_norm": 1.7249142738305612, "learning_rate": 3.6188290155039925e-06, "loss": 0.6356, "step": 19623 }, { "epoch": 0.6014466102733849, "grad_norm": 1.8511374959351556, "learning_rate": 3.618352016022295e-06, "loss": 0.6337, "step": 19624 }, { "epoch": 0.601477258796126, "grad_norm": 1.6646549045212287, "learning_rate": 3.617875030154332e-06, "loss": 0.5524, "step": 19625 }, { "epoch": 0.6015079073188673, "grad_norm": 1.700687079171754, "learning_rate": 3.6173980579047984e-06, "loss": 0.638, "step": 19626 }, { "epoch": 0.6015385558416084, "grad_norm": 1.5652721696070728, "learning_rate": 3.6169210992783948e-06, "loss": 0.5886, "step": 19627 }, { "epoch": 0.6015692043643497, "grad_norm": 2.045229209194322, "learning_rate": 3.6164441542798225e-06, "loss": 0.615, "step": 19628 }, { "epoch": 0.6015998528870908, "grad_norm": 1.7676069386112998, "learning_rate": 3.61596722291378e-06, "loss": 0.5513, "step": 19629 }, { "epoch": 0.6016305014098321, "grad_norm": 1.7691584187306078, "learning_rate": 3.6154903051849656e-06, "loss": 0.621, "step": 19630 }, { "epoch": 0.6016611499325732, "grad_norm": 1.7395905460695908, "learning_rate": 3.6150134010980796e-06, "loss": 0.6757, "step": 19631 }, { "epoch": 0.6016917984553145, "grad_norm": 1.9850717296980493, "learning_rate": 3.6145365106578235e-06, "loss": 0.6181, "step": 19632 }, { "epoch": 0.6017224469780557, "grad_norm": 1.649290147300558, "learning_rate": 3.614059633868889e-06, "loss": 0.6291, "step": 19633 }, { "epoch": 0.6017530955007968, "grad_norm": 1.7480688617885365, "learning_rate": 3.613582770735984e-06, "loss": 0.7485, "step": 19634 }, { "epoch": 0.6017837440235381, "grad_norm": 0.73928102655028, "learning_rate": 3.6131059212637986e-06, "loss": 0.3929, "step": 19635 }, { "epoch": 0.6018143925462792, "grad_norm": 2.0382627152103896, "learning_rate": 3.612629085457039e-06, "loss": 0.7335, "step": 19636 }, { "epoch": 0.6018450410690205, "grad_norm": 1.6697370837247099, "learning_rate": 3.6121522633203987e-06, "loss": 0.5669, "step": 19637 }, { "epoch": 0.6018756895917616, "grad_norm": 1.9929944267578121, "learning_rate": 3.6116754548585765e-06, "loss": 0.6461, "step": 19638 }, { "epoch": 0.6019063381145029, "grad_norm": 1.7684811857141516, "learning_rate": 3.611198660076273e-06, "loss": 0.6128, "step": 19639 }, { "epoch": 0.601936986637244, "grad_norm": 1.6930938561704758, "learning_rate": 3.610721878978183e-06, "loss": 0.6051, "step": 19640 }, { "epoch": 0.6019676351599853, "grad_norm": 1.954171092034899, "learning_rate": 3.610245111569005e-06, "loss": 0.6289, "step": 19641 }, { "epoch": 0.6019982836827265, "grad_norm": 1.7161236388519625, "learning_rate": 3.609768357853439e-06, "loss": 0.6841, "step": 19642 }, { "epoch": 0.6020289322054677, "grad_norm": 1.7150949070543942, "learning_rate": 3.6092916178361802e-06, "loss": 0.626, "step": 19643 }, { "epoch": 0.6020595807282089, "grad_norm": 1.6363194522995388, "learning_rate": 3.6088148915219277e-06, "loss": 0.6594, "step": 19644 }, { "epoch": 0.6020902292509501, "grad_norm": 1.6328333994084387, "learning_rate": 3.6083381789153792e-06, "loss": 0.5079, "step": 19645 }, { "epoch": 0.6021208777736913, "grad_norm": 0.8264765421511668, "learning_rate": 3.6078614800212273e-06, "loss": 0.4481, "step": 19646 }, { "epoch": 0.6021515262964325, "grad_norm": 1.9901912932920545, "learning_rate": 3.6073847948441756e-06, "loss": 0.5993, "step": 19647 }, { "epoch": 0.6021821748191737, "grad_norm": 0.7625659166099723, "learning_rate": 3.606908123388917e-06, "loss": 0.419, "step": 19648 }, { "epoch": 0.602212823341915, "grad_norm": 1.7094733352446587, "learning_rate": 3.606431465660148e-06, "loss": 0.4882, "step": 19649 }, { "epoch": 0.6022434718646561, "grad_norm": 0.8004481126678565, "learning_rate": 3.605954821662567e-06, "loss": 0.4396, "step": 19650 }, { "epoch": 0.6022741203873974, "grad_norm": 1.7912025365337536, "learning_rate": 3.6054781914008696e-06, "loss": 0.628, "step": 19651 }, { "epoch": 0.6023047689101385, "grad_norm": 1.830373525103728, "learning_rate": 3.6050015748797528e-06, "loss": 0.6409, "step": 19652 }, { "epoch": 0.6023354174328798, "grad_norm": 1.851816905179635, "learning_rate": 3.6045249721039122e-06, "loss": 0.7662, "step": 19653 }, { "epoch": 0.6023660659556209, "grad_norm": 1.8412621879328797, "learning_rate": 3.604048383078044e-06, "loss": 0.6228, "step": 19654 }, { "epoch": 0.6023967144783622, "grad_norm": 1.8676198134668562, "learning_rate": 3.6035718078068455e-06, "loss": 0.6661, "step": 19655 }, { "epoch": 0.6024273630011033, "grad_norm": 1.7436516989679434, "learning_rate": 3.6030952462950103e-06, "loss": 0.5959, "step": 19656 }, { "epoch": 0.6024580115238446, "grad_norm": 1.761253937782661, "learning_rate": 3.6026186985472344e-06, "loss": 0.6995, "step": 19657 }, { "epoch": 0.6024886600465857, "grad_norm": 1.603096980876257, "learning_rate": 3.602142164568214e-06, "loss": 0.5567, "step": 19658 }, { "epoch": 0.602519308569327, "grad_norm": 1.6705337840671295, "learning_rate": 3.6016656443626458e-06, "loss": 0.6494, "step": 19659 }, { "epoch": 0.6025499570920682, "grad_norm": 1.7849609834913847, "learning_rate": 3.6011891379352224e-06, "loss": 0.6553, "step": 19660 }, { "epoch": 0.6025806056148094, "grad_norm": 1.8705210811612425, "learning_rate": 3.600712645290641e-06, "loss": 0.6671, "step": 19661 }, { "epoch": 0.6026112541375506, "grad_norm": 1.883292309487976, "learning_rate": 3.600236166433595e-06, "loss": 0.6837, "step": 19662 }, { "epoch": 0.6026419026602918, "grad_norm": 1.6645800830288469, "learning_rate": 3.5997597013687813e-06, "loss": 0.611, "step": 19663 }, { "epoch": 0.602672551183033, "grad_norm": 1.7548024938254254, "learning_rate": 3.5992832501008943e-06, "loss": 0.7049, "step": 19664 }, { "epoch": 0.6027031997057741, "grad_norm": 1.8523913785822592, "learning_rate": 3.5988068126346254e-06, "loss": 0.593, "step": 19665 }, { "epoch": 0.6027338482285154, "grad_norm": 1.9294933794350957, "learning_rate": 3.5983303889746745e-06, "loss": 0.5916, "step": 19666 }, { "epoch": 0.6027644967512565, "grad_norm": 0.8573929606070763, "learning_rate": 3.597853979125732e-06, "loss": 0.4152, "step": 19667 }, { "epoch": 0.6027951452739978, "grad_norm": 1.853916545391658, "learning_rate": 3.5973775830924907e-06, "loss": 0.5754, "step": 19668 }, { "epoch": 0.602825793796739, "grad_norm": 1.7134820578666983, "learning_rate": 3.5969012008796487e-06, "loss": 0.604, "step": 19669 }, { "epoch": 0.6028564423194802, "grad_norm": 1.7844801558823686, "learning_rate": 3.5964248324918977e-06, "loss": 0.5994, "step": 19670 }, { "epoch": 0.6028870908422214, "grad_norm": 1.7283631709441591, "learning_rate": 3.5959484779339327e-06, "loss": 0.614, "step": 19671 }, { "epoch": 0.6029177393649626, "grad_norm": 1.6395729061005835, "learning_rate": 3.5954721372104464e-06, "loss": 0.6211, "step": 19672 }, { "epoch": 0.6029483878877038, "grad_norm": 1.7699029366067103, "learning_rate": 3.594995810326132e-06, "loss": 0.5768, "step": 19673 }, { "epoch": 0.602979036410445, "grad_norm": 1.69385628841741, "learning_rate": 3.5945194972856834e-06, "loss": 0.5481, "step": 19674 }, { "epoch": 0.6030096849331862, "grad_norm": 1.941221587614188, "learning_rate": 3.594043198093795e-06, "loss": 0.5831, "step": 19675 }, { "epoch": 0.6030403334559274, "grad_norm": 1.7430650746577319, "learning_rate": 3.5935669127551566e-06, "loss": 0.6218, "step": 19676 }, { "epoch": 0.6030709819786686, "grad_norm": 1.7697309937535435, "learning_rate": 3.5930906412744656e-06, "loss": 0.6347, "step": 19677 }, { "epoch": 0.6031016305014099, "grad_norm": 1.7044308342751053, "learning_rate": 3.5926143836564093e-06, "loss": 0.7084, "step": 19678 }, { "epoch": 0.603132279024151, "grad_norm": 1.919956018956643, "learning_rate": 3.5921381399056864e-06, "loss": 0.5979, "step": 19679 }, { "epoch": 0.6031629275468923, "grad_norm": 0.7985562526802865, "learning_rate": 3.591661910026987e-06, "loss": 0.4166, "step": 19680 }, { "epoch": 0.6031935760696334, "grad_norm": 1.8116249013846757, "learning_rate": 3.5911856940250006e-06, "loss": 0.5785, "step": 19681 }, { "epoch": 0.6032242245923747, "grad_norm": 1.5694479162412127, "learning_rate": 3.5907094919044237e-06, "loss": 0.5137, "step": 19682 }, { "epoch": 0.6032548731151158, "grad_norm": 0.7915605716594074, "learning_rate": 3.5902333036699465e-06, "loss": 0.4392, "step": 19683 }, { "epoch": 0.6032855216378571, "grad_norm": 2.164945667676202, "learning_rate": 3.58975712932626e-06, "loss": 0.7328, "step": 19684 }, { "epoch": 0.6033161701605982, "grad_norm": 1.6192170494704483, "learning_rate": 3.5892809688780594e-06, "loss": 0.5896, "step": 19685 }, { "epoch": 0.6033468186833395, "grad_norm": 1.832274972216766, "learning_rate": 3.5888048223300343e-06, "loss": 0.6449, "step": 19686 }, { "epoch": 0.6033774672060807, "grad_norm": 1.6214387962267789, "learning_rate": 3.588328689686874e-06, "loss": 0.5837, "step": 19687 }, { "epoch": 0.6034081157288219, "grad_norm": 1.9888915150553061, "learning_rate": 3.587852570953275e-06, "loss": 0.6305, "step": 19688 }, { "epoch": 0.6034387642515631, "grad_norm": 2.130764950324046, "learning_rate": 3.587376466133923e-06, "loss": 0.6334, "step": 19689 }, { "epoch": 0.6034694127743043, "grad_norm": 0.7702079263369691, "learning_rate": 3.5869003752335152e-06, "loss": 0.4167, "step": 19690 }, { "epoch": 0.6035000612970455, "grad_norm": 1.6066529340701368, "learning_rate": 3.5864242982567386e-06, "loss": 0.6566, "step": 19691 }, { "epoch": 0.6035307098197867, "grad_norm": 0.7454212295118912, "learning_rate": 3.5859482352082837e-06, "loss": 0.3884, "step": 19692 }, { "epoch": 0.6035613583425279, "grad_norm": 0.80484496084411, "learning_rate": 3.5854721860928436e-06, "loss": 0.4272, "step": 19693 }, { "epoch": 0.6035920068652691, "grad_norm": 1.800611152124239, "learning_rate": 3.5849961509151088e-06, "loss": 0.6631, "step": 19694 }, { "epoch": 0.6036226553880103, "grad_norm": 2.0674047000000835, "learning_rate": 3.584520129679767e-06, "loss": 0.613, "step": 19695 }, { "epoch": 0.6036533039107514, "grad_norm": 2.0572694831803284, "learning_rate": 3.5840441223915123e-06, "loss": 0.5585, "step": 19696 }, { "epoch": 0.6036839524334927, "grad_norm": 1.7592688454200323, "learning_rate": 3.5835681290550315e-06, "loss": 0.6259, "step": 19697 }, { "epoch": 0.6037146009562339, "grad_norm": 1.8311508271478292, "learning_rate": 3.5830921496750178e-06, "loss": 0.6608, "step": 19698 }, { "epoch": 0.6037452494789751, "grad_norm": 1.944811393062771, "learning_rate": 3.58261618425616e-06, "loss": 0.7115, "step": 19699 }, { "epoch": 0.6037758980017163, "grad_norm": 1.8344648276571833, "learning_rate": 3.5821402328031463e-06, "loss": 0.6207, "step": 19700 }, { "epoch": 0.6038065465244575, "grad_norm": 1.678154432077264, "learning_rate": 3.5816642953206686e-06, "loss": 0.6239, "step": 19701 }, { "epoch": 0.6038371950471987, "grad_norm": 1.7808139059724897, "learning_rate": 3.5811883718134154e-06, "loss": 0.5785, "step": 19702 }, { "epoch": 0.6038678435699399, "grad_norm": 1.77284711201431, "learning_rate": 3.5807124622860756e-06, "loss": 0.5817, "step": 19703 }, { "epoch": 0.6038984920926811, "grad_norm": 1.6590033817610403, "learning_rate": 3.580236566743339e-06, "loss": 0.6337, "step": 19704 }, { "epoch": 0.6039291406154224, "grad_norm": 1.7857973032908236, "learning_rate": 3.5797606851898946e-06, "loss": 0.5405, "step": 19705 }, { "epoch": 0.6039597891381635, "grad_norm": 0.912939483278073, "learning_rate": 3.5792848176304323e-06, "loss": 0.4161, "step": 19706 }, { "epoch": 0.6039904376609048, "grad_norm": 0.893345300172173, "learning_rate": 3.578808964069641e-06, "loss": 0.4368, "step": 19707 }, { "epoch": 0.6040210861836459, "grad_norm": 2.102986606892131, "learning_rate": 3.578333124512206e-06, "loss": 0.6088, "step": 19708 }, { "epoch": 0.6040517347063872, "grad_norm": 1.768096593862705, "learning_rate": 3.5778572989628215e-06, "loss": 0.6784, "step": 19709 }, { "epoch": 0.6040823832291283, "grad_norm": 1.5913400690707606, "learning_rate": 3.5773814874261716e-06, "loss": 0.691, "step": 19710 }, { "epoch": 0.6041130317518696, "grad_norm": 1.8902625324393276, "learning_rate": 3.5769056899069455e-06, "loss": 0.5915, "step": 19711 }, { "epoch": 0.6041436802746107, "grad_norm": 2.0180781707541713, "learning_rate": 3.576429906409832e-06, "loss": 0.5218, "step": 19712 }, { "epoch": 0.604174328797352, "grad_norm": 2.8353175073698065, "learning_rate": 3.575954136939519e-06, "loss": 0.542, "step": 19713 }, { "epoch": 0.6042049773200932, "grad_norm": 0.879145840288188, "learning_rate": 3.575478381500693e-06, "loss": 0.4306, "step": 19714 }, { "epoch": 0.6042356258428344, "grad_norm": 1.7013775754024953, "learning_rate": 3.575002640098045e-06, "loss": 0.5763, "step": 19715 }, { "epoch": 0.6042662743655756, "grad_norm": 1.9284662708637716, "learning_rate": 3.5745269127362584e-06, "loss": 0.5988, "step": 19716 }, { "epoch": 0.6042969228883168, "grad_norm": 1.8104111773123501, "learning_rate": 3.5740511994200245e-06, "loss": 0.5752, "step": 19717 }, { "epoch": 0.604327571411058, "grad_norm": 0.8165174306997475, "learning_rate": 3.57357550015403e-06, "loss": 0.3986, "step": 19718 }, { "epoch": 0.6043582199337992, "grad_norm": 1.6881550745162843, "learning_rate": 3.573099814942958e-06, "loss": 0.6116, "step": 19719 }, { "epoch": 0.6043888684565404, "grad_norm": 1.726233245613569, "learning_rate": 3.5726241437915014e-06, "loss": 0.609, "step": 19720 }, { "epoch": 0.6044195169792816, "grad_norm": 1.762043002511959, "learning_rate": 3.572148486704344e-06, "loss": 0.6694, "step": 19721 }, { "epoch": 0.6044501655020228, "grad_norm": 1.5576756247517973, "learning_rate": 3.5716728436861715e-06, "loss": 0.5403, "step": 19722 }, { "epoch": 0.6044808140247641, "grad_norm": 1.767253667124356, "learning_rate": 3.5711972147416723e-06, "loss": 0.5532, "step": 19723 }, { "epoch": 0.6045114625475052, "grad_norm": 2.5145380109773146, "learning_rate": 3.570721599875532e-06, "loss": 0.6615, "step": 19724 }, { "epoch": 0.6045421110702465, "grad_norm": 1.6974497068064607, "learning_rate": 3.5702459990924386e-06, "loss": 0.7075, "step": 19725 }, { "epoch": 0.6045727595929876, "grad_norm": 1.954627688517818, "learning_rate": 3.5697704123970767e-06, "loss": 0.6834, "step": 19726 }, { "epoch": 0.6046034081157288, "grad_norm": 1.5229131438441428, "learning_rate": 3.5692948397941322e-06, "loss": 0.6271, "step": 19727 }, { "epoch": 0.60463405663847, "grad_norm": 1.8103074009760673, "learning_rate": 3.5688192812882927e-06, "loss": 0.6308, "step": 19728 }, { "epoch": 0.6046647051612112, "grad_norm": 1.6777768006999532, "learning_rate": 3.5683437368842444e-06, "loss": 0.6862, "step": 19729 }, { "epoch": 0.6046953536839524, "grad_norm": 1.8346466950238915, "learning_rate": 3.5678682065866684e-06, "loss": 0.5888, "step": 19730 }, { "epoch": 0.6047260022066936, "grad_norm": 1.6490069344656637, "learning_rate": 3.567392690400256e-06, "loss": 0.5898, "step": 19731 }, { "epoch": 0.6047566507294349, "grad_norm": 0.7952044747348392, "learning_rate": 3.5669171883296896e-06, "loss": 0.4213, "step": 19732 }, { "epoch": 0.604787299252176, "grad_norm": 1.9576544307741022, "learning_rate": 3.5664417003796524e-06, "loss": 0.6188, "step": 19733 }, { "epoch": 0.6048179477749173, "grad_norm": 1.757276837811021, "learning_rate": 3.5659662265548344e-06, "loss": 0.6351, "step": 19734 }, { "epoch": 0.6048485962976584, "grad_norm": 1.8850898701231413, "learning_rate": 3.5654907668599165e-06, "loss": 0.5579, "step": 19735 }, { "epoch": 0.6048792448203997, "grad_norm": 1.7552842620962523, "learning_rate": 3.5650153212995864e-06, "loss": 0.5773, "step": 19736 }, { "epoch": 0.6049098933431408, "grad_norm": 1.7467318186790384, "learning_rate": 3.564539889878527e-06, "loss": 0.691, "step": 19737 }, { "epoch": 0.6049405418658821, "grad_norm": 1.7875078321772415, "learning_rate": 3.564064472601423e-06, "loss": 0.6192, "step": 19738 }, { "epoch": 0.6049711903886232, "grad_norm": 1.8299542305428944, "learning_rate": 3.5635890694729596e-06, "loss": 0.5967, "step": 19739 }, { "epoch": 0.6050018389113645, "grad_norm": 1.962940550451023, "learning_rate": 3.5631136804978215e-06, "loss": 0.5702, "step": 19740 }, { "epoch": 0.6050324874341056, "grad_norm": 1.6736170586959433, "learning_rate": 3.5626383056806896e-06, "loss": 0.6462, "step": 19741 }, { "epoch": 0.6050631359568469, "grad_norm": 2.022526637126696, "learning_rate": 3.562162945026253e-06, "loss": 0.5562, "step": 19742 }, { "epoch": 0.6050937844795881, "grad_norm": 1.653343175174734, "learning_rate": 3.5616875985391897e-06, "loss": 0.5983, "step": 19743 }, { "epoch": 0.6051244330023293, "grad_norm": 1.8698875764855043, "learning_rate": 3.5612122662241894e-06, "loss": 0.6459, "step": 19744 }, { "epoch": 0.6051550815250705, "grad_norm": 1.6731307186867608, "learning_rate": 3.560736948085932e-06, "loss": 0.5427, "step": 19745 }, { "epoch": 0.6051857300478117, "grad_norm": 1.6465462766396004, "learning_rate": 3.5602616441291003e-06, "loss": 0.541, "step": 19746 }, { "epoch": 0.6052163785705529, "grad_norm": 1.824926649079809, "learning_rate": 3.55978635435838e-06, "loss": 0.6581, "step": 19747 }, { "epoch": 0.6052470270932941, "grad_norm": 1.705531047844107, "learning_rate": 3.5593110787784535e-06, "loss": 0.6805, "step": 19748 }, { "epoch": 0.6052776756160353, "grad_norm": 0.8348001995286175, "learning_rate": 3.558835817394003e-06, "loss": 0.4363, "step": 19749 }, { "epoch": 0.6053083241387766, "grad_norm": 1.9572050379175712, "learning_rate": 3.5583605702097122e-06, "loss": 0.5675, "step": 19750 }, { "epoch": 0.6053389726615177, "grad_norm": 1.708744452920872, "learning_rate": 3.557885337230263e-06, "loss": 0.6234, "step": 19751 }, { "epoch": 0.605369621184259, "grad_norm": 1.7375873229344894, "learning_rate": 3.5574101184603405e-06, "loss": 0.6504, "step": 19752 }, { "epoch": 0.6054002697070001, "grad_norm": 1.6673162990101302, "learning_rate": 3.5569349139046237e-06, "loss": 0.5276, "step": 19753 }, { "epoch": 0.6054309182297414, "grad_norm": 1.7565079008520188, "learning_rate": 3.556459723567796e-06, "loss": 0.6302, "step": 19754 }, { "epoch": 0.6054615667524825, "grad_norm": 2.1349138794473923, "learning_rate": 3.5559845474545406e-06, "loss": 0.615, "step": 19755 }, { "epoch": 0.6054922152752238, "grad_norm": 1.6911068430126388, "learning_rate": 3.5555093855695396e-06, "loss": 0.5895, "step": 19756 }, { "epoch": 0.6055228637979649, "grad_norm": 1.7663177010227167, "learning_rate": 3.5550342379174725e-06, "loss": 0.6391, "step": 19757 }, { "epoch": 0.6055535123207061, "grad_norm": 1.7343150098702893, "learning_rate": 3.5545591045030238e-06, "loss": 0.6186, "step": 19758 }, { "epoch": 0.6055841608434473, "grad_norm": 1.640610467291216, "learning_rate": 3.5540839853308754e-06, "loss": 0.548, "step": 19759 }, { "epoch": 0.6056148093661885, "grad_norm": 1.9539177282861817, "learning_rate": 3.5536088804057044e-06, "loss": 0.6555, "step": 19760 }, { "epoch": 0.6056454578889298, "grad_norm": 1.8980750998235463, "learning_rate": 3.553133789732198e-06, "loss": 0.6449, "step": 19761 }, { "epoch": 0.6056761064116709, "grad_norm": 1.9047643979966, "learning_rate": 3.5526587133150314e-06, "loss": 0.6381, "step": 19762 }, { "epoch": 0.6057067549344122, "grad_norm": 1.8067786316735124, "learning_rate": 3.5521836511588925e-06, "loss": 0.7082, "step": 19763 }, { "epoch": 0.6057374034571533, "grad_norm": 1.7374259523613944, "learning_rate": 3.5517086032684567e-06, "loss": 0.5953, "step": 19764 }, { "epoch": 0.6057680519798946, "grad_norm": 1.8416491828896075, "learning_rate": 3.5512335696484064e-06, "loss": 0.6139, "step": 19765 }, { "epoch": 0.6057987005026357, "grad_norm": 1.7571094812633647, "learning_rate": 3.550758550303423e-06, "loss": 0.6932, "step": 19766 }, { "epoch": 0.605829349025377, "grad_norm": 1.579224234040905, "learning_rate": 3.5502835452381866e-06, "loss": 0.6434, "step": 19767 }, { "epoch": 0.6058599975481181, "grad_norm": 1.5414871892754651, "learning_rate": 3.5498085544573755e-06, "loss": 0.5186, "step": 19768 }, { "epoch": 0.6058906460708594, "grad_norm": 1.9590273209791915, "learning_rate": 3.549333577965674e-06, "loss": 0.6861, "step": 19769 }, { "epoch": 0.6059212945936006, "grad_norm": 0.8373733377750713, "learning_rate": 3.5488586157677586e-06, "loss": 0.4363, "step": 19770 }, { "epoch": 0.6059519431163418, "grad_norm": 1.7070414677348367, "learning_rate": 3.5483836678683108e-06, "loss": 0.5511, "step": 19771 }, { "epoch": 0.605982591639083, "grad_norm": 0.7996604936198745, "learning_rate": 3.547908734272012e-06, "loss": 0.4273, "step": 19772 }, { "epoch": 0.6060132401618242, "grad_norm": 0.7422402720059492, "learning_rate": 3.5474338149835363e-06, "loss": 0.3924, "step": 19773 }, { "epoch": 0.6060438886845654, "grad_norm": 1.7207095456281958, "learning_rate": 3.5469589100075707e-06, "loss": 0.6396, "step": 19774 }, { "epoch": 0.6060745372073066, "grad_norm": 1.6842132415631288, "learning_rate": 3.546484019348789e-06, "loss": 0.6, "step": 19775 }, { "epoch": 0.6061051857300478, "grad_norm": 1.865491028677414, "learning_rate": 3.5460091430118714e-06, "loss": 0.6391, "step": 19776 }, { "epoch": 0.606135834252789, "grad_norm": 1.644081362722796, "learning_rate": 3.5455342810014987e-06, "loss": 0.5892, "step": 19777 }, { "epoch": 0.6061664827755302, "grad_norm": 1.7809942849319176, "learning_rate": 3.5450594333223476e-06, "loss": 0.591, "step": 19778 }, { "epoch": 0.6061971312982715, "grad_norm": 1.8094945288524, "learning_rate": 3.5445845999790994e-06, "loss": 0.6091, "step": 19779 }, { "epoch": 0.6062277798210126, "grad_norm": 1.6272258472791605, "learning_rate": 3.544109780976432e-06, "loss": 0.6597, "step": 19780 }, { "epoch": 0.6062584283437539, "grad_norm": 1.8161833120628241, "learning_rate": 3.543634976319022e-06, "loss": 0.5448, "step": 19781 }, { "epoch": 0.606289076866495, "grad_norm": 1.9104430737718014, "learning_rate": 3.54316018601155e-06, "loss": 0.5799, "step": 19782 }, { "epoch": 0.6063197253892363, "grad_norm": 0.864361279310396, "learning_rate": 3.542685410058695e-06, "loss": 0.4, "step": 19783 }, { "epoch": 0.6063503739119774, "grad_norm": 1.5935203122038029, "learning_rate": 3.5422106484651297e-06, "loss": 0.492, "step": 19784 }, { "epoch": 0.6063810224347187, "grad_norm": 1.6031440418089915, "learning_rate": 3.5417359012355395e-06, "loss": 0.6365, "step": 19785 }, { "epoch": 0.6064116709574598, "grad_norm": 1.847892840615589, "learning_rate": 3.541261168374598e-06, "loss": 0.6912, "step": 19786 }, { "epoch": 0.6064423194802011, "grad_norm": 1.7478959179047509, "learning_rate": 3.5407864498869815e-06, "loss": 0.6796, "step": 19787 }, { "epoch": 0.6064729680029423, "grad_norm": 1.980150079107268, "learning_rate": 3.5403117457773708e-06, "loss": 0.6781, "step": 19788 }, { "epoch": 0.6065036165256834, "grad_norm": 1.9100413899355841, "learning_rate": 3.539837056050441e-06, "loss": 0.6552, "step": 19789 }, { "epoch": 0.6065342650484247, "grad_norm": 1.7328067526245647, "learning_rate": 3.5393623807108714e-06, "loss": 0.4407, "step": 19790 }, { "epoch": 0.6065649135711658, "grad_norm": 0.8343599035369724, "learning_rate": 3.5388877197633378e-06, "loss": 0.4247, "step": 19791 }, { "epoch": 0.6065955620939071, "grad_norm": 0.7839160815157868, "learning_rate": 3.5384130732125165e-06, "loss": 0.3905, "step": 19792 }, { "epoch": 0.6066262106166482, "grad_norm": 1.6899312272741387, "learning_rate": 3.5379384410630858e-06, "loss": 0.5955, "step": 19793 }, { "epoch": 0.6066568591393895, "grad_norm": 0.7685895178232408, "learning_rate": 3.5374638233197233e-06, "loss": 0.4099, "step": 19794 }, { "epoch": 0.6066875076621306, "grad_norm": 1.765403091072089, "learning_rate": 3.5369892199871e-06, "loss": 0.6771, "step": 19795 }, { "epoch": 0.6067181561848719, "grad_norm": 1.6504663797453496, "learning_rate": 3.5365146310699007e-06, "loss": 0.5723, "step": 19796 }, { "epoch": 0.606748804707613, "grad_norm": 1.682112974310743, "learning_rate": 3.536040056572794e-06, "loss": 0.4891, "step": 19797 }, { "epoch": 0.6067794532303543, "grad_norm": 0.7792720194385934, "learning_rate": 3.5355654965004604e-06, "loss": 0.404, "step": 19798 }, { "epoch": 0.6068101017530955, "grad_norm": 1.9122569264816145, "learning_rate": 3.535090950857575e-06, "loss": 0.6001, "step": 19799 }, { "epoch": 0.6068407502758367, "grad_norm": 1.8225283810696677, "learning_rate": 3.534616419648812e-06, "loss": 0.6481, "step": 19800 }, { "epoch": 0.6068713987985779, "grad_norm": 2.0487625223453096, "learning_rate": 3.534141902878849e-06, "loss": 0.6764, "step": 19801 }, { "epoch": 0.6069020473213191, "grad_norm": 1.7627226168554841, "learning_rate": 3.533667400552362e-06, "loss": 0.686, "step": 19802 }, { "epoch": 0.6069326958440603, "grad_norm": 1.8590713536245902, "learning_rate": 3.533192912674023e-06, "loss": 0.6703, "step": 19803 }, { "epoch": 0.6069633443668015, "grad_norm": 1.7631964112198881, "learning_rate": 3.5327184392485124e-06, "loss": 0.6254, "step": 19804 }, { "epoch": 0.6069939928895427, "grad_norm": 2.0159379379428315, "learning_rate": 3.5322439802804993e-06, "loss": 0.6032, "step": 19805 }, { "epoch": 0.607024641412284, "grad_norm": 1.8417101914602838, "learning_rate": 3.5317695357746645e-06, "loss": 0.6468, "step": 19806 }, { "epoch": 0.6070552899350251, "grad_norm": 1.7044849242782691, "learning_rate": 3.5312951057356793e-06, "loss": 0.6524, "step": 19807 }, { "epoch": 0.6070859384577664, "grad_norm": 1.5653782382498687, "learning_rate": 3.5308206901682186e-06, "loss": 0.6329, "step": 19808 }, { "epoch": 0.6071165869805075, "grad_norm": 1.9046017985924415, "learning_rate": 3.530346289076958e-06, "loss": 0.5941, "step": 19809 }, { "epoch": 0.6071472355032488, "grad_norm": 1.6972707762041424, "learning_rate": 3.529871902466572e-06, "loss": 0.5401, "step": 19810 }, { "epoch": 0.6071778840259899, "grad_norm": 1.889569452955055, "learning_rate": 3.5293975303417322e-06, "loss": 0.5713, "step": 19811 }, { "epoch": 0.6072085325487312, "grad_norm": 0.816738595147859, "learning_rate": 3.5289231727071166e-06, "loss": 0.4181, "step": 19812 }, { "epoch": 0.6072391810714723, "grad_norm": 1.878728645662505, "learning_rate": 3.528448829567398e-06, "loss": 0.6558, "step": 19813 }, { "epoch": 0.6072698295942136, "grad_norm": 1.7247100808581406, "learning_rate": 3.527974500927247e-06, "loss": 0.6106, "step": 19814 }, { "epoch": 0.6073004781169548, "grad_norm": 1.9284134484195525, "learning_rate": 3.527500186791343e-06, "loss": 0.7001, "step": 19815 }, { "epoch": 0.607331126639696, "grad_norm": 1.9285391547647186, "learning_rate": 3.5270258871643526e-06, "loss": 0.7526, "step": 19816 }, { "epoch": 0.6073617751624372, "grad_norm": 1.8747985784738452, "learning_rate": 3.5265516020509573e-06, "loss": 0.6092, "step": 19817 }, { "epoch": 0.6073924236851784, "grad_norm": 1.578195501837031, "learning_rate": 3.526077331455824e-06, "loss": 0.5992, "step": 19818 }, { "epoch": 0.6074230722079196, "grad_norm": 1.5584207385150477, "learning_rate": 3.5256030753836267e-06, "loss": 0.6184, "step": 19819 }, { "epoch": 0.6074537207306607, "grad_norm": 1.7001260790743822, "learning_rate": 3.525128833839041e-06, "loss": 0.549, "step": 19820 }, { "epoch": 0.607484369253402, "grad_norm": 1.8762785756083225, "learning_rate": 3.5246546068267382e-06, "loss": 0.5242, "step": 19821 }, { "epoch": 0.6075150177761431, "grad_norm": 0.8397631464301999, "learning_rate": 3.5241803943513907e-06, "loss": 0.4394, "step": 19822 }, { "epoch": 0.6075456662988844, "grad_norm": 1.8568891740747775, "learning_rate": 3.523706196417672e-06, "loss": 0.6122, "step": 19823 }, { "epoch": 0.6075763148216256, "grad_norm": 1.6547677592271766, "learning_rate": 3.523232013030252e-06, "loss": 0.5767, "step": 19824 }, { "epoch": 0.6076069633443668, "grad_norm": 1.6653974990947986, "learning_rate": 3.522757844193807e-06, "loss": 0.5521, "step": 19825 }, { "epoch": 0.607637611867108, "grad_norm": 1.7149178027321768, "learning_rate": 3.5222836899130077e-06, "loss": 0.6067, "step": 19826 }, { "epoch": 0.6076682603898492, "grad_norm": 1.558995923971291, "learning_rate": 3.521809550192522e-06, "loss": 0.5528, "step": 19827 }, { "epoch": 0.6076989089125904, "grad_norm": 1.5862078799449848, "learning_rate": 3.5213354250370278e-06, "loss": 0.6373, "step": 19828 }, { "epoch": 0.6077295574353316, "grad_norm": 1.6679829180558141, "learning_rate": 3.5208613144511934e-06, "loss": 0.6245, "step": 19829 }, { "epoch": 0.6077602059580728, "grad_norm": 1.5328844209050148, "learning_rate": 3.520387218439689e-06, "loss": 0.5664, "step": 19830 }, { "epoch": 0.607790854480814, "grad_norm": 1.7576178951702188, "learning_rate": 3.5199131370071905e-06, "loss": 0.5945, "step": 19831 }, { "epoch": 0.6078215030035552, "grad_norm": 1.7425509670887922, "learning_rate": 3.519439070158365e-06, "loss": 0.6874, "step": 19832 }, { "epoch": 0.6078521515262965, "grad_norm": 0.8087553211577486, "learning_rate": 3.518965017897885e-06, "loss": 0.4253, "step": 19833 }, { "epoch": 0.6078828000490376, "grad_norm": 2.1512339057358907, "learning_rate": 3.5184909802304228e-06, "loss": 0.6956, "step": 19834 }, { "epoch": 0.6079134485717789, "grad_norm": 1.698194853997438, "learning_rate": 3.518016957160647e-06, "loss": 0.633, "step": 19835 }, { "epoch": 0.60794409709452, "grad_norm": 1.7992973390531433, "learning_rate": 3.51754294869323e-06, "loss": 0.6257, "step": 19836 }, { "epoch": 0.6079747456172613, "grad_norm": 1.8991682446926836, "learning_rate": 3.517068954832843e-06, "loss": 0.7446, "step": 19837 }, { "epoch": 0.6080053941400024, "grad_norm": 1.7600309365711333, "learning_rate": 3.516594975584151e-06, "loss": 0.6144, "step": 19838 }, { "epoch": 0.6080360426627437, "grad_norm": 1.8087569855646752, "learning_rate": 3.516121010951832e-06, "loss": 0.5101, "step": 19839 }, { "epoch": 0.6080666911854848, "grad_norm": 1.5616296503886604, "learning_rate": 3.515647060940551e-06, "loss": 0.5991, "step": 19840 }, { "epoch": 0.6080973397082261, "grad_norm": 1.89290610392304, "learning_rate": 3.5151731255549794e-06, "loss": 0.6463, "step": 19841 }, { "epoch": 0.6081279882309673, "grad_norm": 1.912765453764979, "learning_rate": 3.5146992047997864e-06, "loss": 0.5981, "step": 19842 }, { "epoch": 0.6081586367537085, "grad_norm": 1.866503849868564, "learning_rate": 3.514225298679642e-06, "loss": 0.586, "step": 19843 }, { "epoch": 0.6081892852764497, "grad_norm": 1.780013538159988, "learning_rate": 3.513751407199217e-06, "loss": 0.6632, "step": 19844 }, { "epoch": 0.6082199337991909, "grad_norm": 1.8217372815226867, "learning_rate": 3.5132775303631793e-06, "loss": 0.7041, "step": 19845 }, { "epoch": 0.6082505823219321, "grad_norm": 0.8200774827083941, "learning_rate": 3.5128036681761975e-06, "loss": 0.4219, "step": 19846 }, { "epoch": 0.6082812308446733, "grad_norm": 1.7671920434085933, "learning_rate": 3.5123298206429425e-06, "loss": 0.5507, "step": 19847 }, { "epoch": 0.6083118793674145, "grad_norm": 1.9547127411662624, "learning_rate": 3.5118559877680834e-06, "loss": 0.6558, "step": 19848 }, { "epoch": 0.6083425278901557, "grad_norm": 1.5800966823203682, "learning_rate": 3.5113821695562867e-06, "loss": 0.6576, "step": 19849 }, { "epoch": 0.6083731764128969, "grad_norm": 2.001220162416779, "learning_rate": 3.5109083660122233e-06, "loss": 0.5534, "step": 19850 }, { "epoch": 0.608403824935638, "grad_norm": 2.0051408058109415, "learning_rate": 3.510434577140559e-06, "loss": 0.6398, "step": 19851 }, { "epoch": 0.6084344734583793, "grad_norm": 0.7807772613739978, "learning_rate": 3.5099608029459653e-06, "loss": 0.4302, "step": 19852 }, { "epoch": 0.6084651219811205, "grad_norm": 1.9909854056090026, "learning_rate": 3.5094870434331093e-06, "loss": 0.6092, "step": 19853 }, { "epoch": 0.6084957705038617, "grad_norm": 1.8100190810020773, "learning_rate": 3.5090132986066572e-06, "loss": 0.6256, "step": 19854 }, { "epoch": 0.6085264190266029, "grad_norm": 1.9079610348385176, "learning_rate": 3.50853956847128e-06, "loss": 0.6136, "step": 19855 }, { "epoch": 0.6085570675493441, "grad_norm": 0.788923251456102, "learning_rate": 3.508065853031645e-06, "loss": 0.4224, "step": 19856 }, { "epoch": 0.6085877160720853, "grad_norm": 1.8834590988064142, "learning_rate": 3.507592152292416e-06, "loss": 0.6532, "step": 19857 }, { "epoch": 0.6086183645948265, "grad_norm": 1.8913315812897915, "learning_rate": 3.5071184662582664e-06, "loss": 0.6287, "step": 19858 }, { "epoch": 0.6086490131175677, "grad_norm": 2.0198441375274165, "learning_rate": 3.5066447949338573e-06, "loss": 0.643, "step": 19859 }, { "epoch": 0.608679661640309, "grad_norm": 1.749678772057237, "learning_rate": 3.5061711383238623e-06, "loss": 0.7076, "step": 19860 }, { "epoch": 0.6087103101630501, "grad_norm": 1.7512533073227199, "learning_rate": 3.5056974964329443e-06, "loss": 0.6596, "step": 19861 }, { "epoch": 0.6087409586857914, "grad_norm": 1.770775768360001, "learning_rate": 3.50522386926577e-06, "loss": 0.591, "step": 19862 }, { "epoch": 0.6087716072085325, "grad_norm": 0.7483094330874355, "learning_rate": 3.5047502568270085e-06, "loss": 0.4233, "step": 19863 }, { "epoch": 0.6088022557312738, "grad_norm": 0.7642096561018992, "learning_rate": 3.504276659121325e-06, "loss": 0.3945, "step": 19864 }, { "epoch": 0.6088329042540149, "grad_norm": 1.8880253453428506, "learning_rate": 3.5038030761533858e-06, "loss": 0.6634, "step": 19865 }, { "epoch": 0.6088635527767562, "grad_norm": 1.6202578433774417, "learning_rate": 3.5033295079278585e-06, "loss": 0.5434, "step": 19866 }, { "epoch": 0.6088942012994973, "grad_norm": 1.881760298172005, "learning_rate": 3.5028559544494095e-06, "loss": 0.5651, "step": 19867 }, { "epoch": 0.6089248498222386, "grad_norm": 1.824633023744843, "learning_rate": 3.5023824157227003e-06, "loss": 0.5818, "step": 19868 }, { "epoch": 0.6089554983449798, "grad_norm": 0.7841202913566347, "learning_rate": 3.501908891752404e-06, "loss": 0.4092, "step": 19869 }, { "epoch": 0.608986146867721, "grad_norm": 1.8313787207076386, "learning_rate": 3.5014353825431796e-06, "loss": 0.7043, "step": 19870 }, { "epoch": 0.6090167953904622, "grad_norm": 1.7431600584855098, "learning_rate": 3.5009618880996986e-06, "loss": 0.603, "step": 19871 }, { "epoch": 0.6090474439132034, "grad_norm": 1.9675497862217022, "learning_rate": 3.5004884084266235e-06, "loss": 0.6539, "step": 19872 }, { "epoch": 0.6090780924359446, "grad_norm": 1.679792700256641, "learning_rate": 3.5000149435286172e-06, "loss": 0.6678, "step": 19873 }, { "epoch": 0.6091087409586858, "grad_norm": 1.7563381830886322, "learning_rate": 3.49954149341035e-06, "loss": 0.6765, "step": 19874 }, { "epoch": 0.609139389481427, "grad_norm": 1.6752940303745747, "learning_rate": 3.4990680580764837e-06, "loss": 0.5855, "step": 19875 }, { "epoch": 0.6091700380041682, "grad_norm": 1.6066336306292253, "learning_rate": 3.4985946375316828e-06, "loss": 0.5804, "step": 19876 }, { "epoch": 0.6092006865269094, "grad_norm": 1.658640494476141, "learning_rate": 3.4981212317806133e-06, "loss": 0.6445, "step": 19877 }, { "epoch": 0.6092313350496507, "grad_norm": 1.7047676603108541, "learning_rate": 3.49764784082794e-06, "loss": 0.6486, "step": 19878 }, { "epoch": 0.6092619835723918, "grad_norm": 0.831856462934535, "learning_rate": 3.4971744646783267e-06, "loss": 0.4365, "step": 19879 }, { "epoch": 0.6092926320951331, "grad_norm": 1.5833089103476226, "learning_rate": 3.49670110333644e-06, "loss": 0.6065, "step": 19880 }, { "epoch": 0.6093232806178742, "grad_norm": 1.8510959802298952, "learning_rate": 3.496227756806938e-06, "loss": 0.7035, "step": 19881 }, { "epoch": 0.6093539291406154, "grad_norm": 1.9195591538779482, "learning_rate": 3.495754425094493e-06, "loss": 0.6472, "step": 19882 }, { "epoch": 0.6093845776633566, "grad_norm": 1.880418633044292, "learning_rate": 3.4952811082037626e-06, "loss": 0.5369, "step": 19883 }, { "epoch": 0.6094152261860978, "grad_norm": 1.7750952972561227, "learning_rate": 3.4948078061394116e-06, "loss": 0.6375, "step": 19884 }, { "epoch": 0.609445874708839, "grad_norm": 2.0130049987325287, "learning_rate": 3.4943345189061052e-06, "loss": 0.653, "step": 19885 }, { "epoch": 0.6094765232315802, "grad_norm": 1.6265635259484266, "learning_rate": 3.493861246508506e-06, "loss": 0.4815, "step": 19886 }, { "epoch": 0.6095071717543215, "grad_norm": 1.6629872397322154, "learning_rate": 3.493387988951277e-06, "loss": 0.5591, "step": 19887 }, { "epoch": 0.6095378202770626, "grad_norm": 1.8872296002934026, "learning_rate": 3.492914746239081e-06, "loss": 0.6434, "step": 19888 }, { "epoch": 0.6095684687998039, "grad_norm": 1.918461795697678, "learning_rate": 3.4924415183765826e-06, "loss": 0.6454, "step": 19889 }, { "epoch": 0.609599117322545, "grad_norm": 1.8464757526584508, "learning_rate": 3.491968305368443e-06, "loss": 0.744, "step": 19890 }, { "epoch": 0.6096297658452863, "grad_norm": 1.6100091043605036, "learning_rate": 3.4914951072193274e-06, "loss": 0.5092, "step": 19891 }, { "epoch": 0.6096604143680274, "grad_norm": 1.7687147008748894, "learning_rate": 3.4910219239338938e-06, "loss": 0.5995, "step": 19892 }, { "epoch": 0.6096910628907687, "grad_norm": 1.5155250534166345, "learning_rate": 3.4905487555168093e-06, "loss": 0.5917, "step": 19893 }, { "epoch": 0.6097217114135098, "grad_norm": 1.8730217255316841, "learning_rate": 3.490075601972734e-06, "loss": 0.6159, "step": 19894 }, { "epoch": 0.6097523599362511, "grad_norm": 1.8927900687616237, "learning_rate": 3.4896024633063288e-06, "loss": 0.6152, "step": 19895 }, { "epoch": 0.6097830084589922, "grad_norm": 1.5789895417909938, "learning_rate": 3.489129339522258e-06, "loss": 0.5764, "step": 19896 }, { "epoch": 0.6098136569817335, "grad_norm": 1.5458987367943215, "learning_rate": 3.4886562306251815e-06, "loss": 0.5397, "step": 19897 }, { "epoch": 0.6098443055044747, "grad_norm": 1.6694037899560668, "learning_rate": 3.4881831366197627e-06, "loss": 0.6273, "step": 19898 }, { "epoch": 0.6098749540272159, "grad_norm": 1.7981876854416465, "learning_rate": 3.4877100575106622e-06, "loss": 0.577, "step": 19899 }, { "epoch": 0.6099056025499571, "grad_norm": 1.7622098770432861, "learning_rate": 3.4872369933025404e-06, "loss": 0.5667, "step": 19900 }, { "epoch": 0.6099362510726983, "grad_norm": 1.5508928805578188, "learning_rate": 3.4867639440000617e-06, "loss": 0.5388, "step": 19901 }, { "epoch": 0.6099668995954395, "grad_norm": 1.9583716968132352, "learning_rate": 3.486290909607884e-06, "loss": 0.6497, "step": 19902 }, { "epoch": 0.6099975481181807, "grad_norm": 1.7337148880418711, "learning_rate": 3.4858178901306684e-06, "loss": 0.5717, "step": 19903 }, { "epoch": 0.6100281966409219, "grad_norm": 1.611154654461563, "learning_rate": 3.4853448855730775e-06, "loss": 0.5219, "step": 19904 }, { "epoch": 0.6100588451636632, "grad_norm": 1.7984799026917166, "learning_rate": 3.48487189593977e-06, "loss": 0.6286, "step": 19905 }, { "epoch": 0.6100894936864043, "grad_norm": 2.0346893959338175, "learning_rate": 3.484398921235408e-06, "loss": 0.5543, "step": 19906 }, { "epoch": 0.6101201422091456, "grad_norm": 2.0966612113385854, "learning_rate": 3.4839259614646516e-06, "loss": 0.7436, "step": 19907 }, { "epoch": 0.6101507907318867, "grad_norm": 1.8670729184800414, "learning_rate": 3.483453016632159e-06, "loss": 0.6204, "step": 19908 }, { "epoch": 0.610181439254628, "grad_norm": 1.7293615047528035, "learning_rate": 3.4829800867425933e-06, "loss": 0.5887, "step": 19909 }, { "epoch": 0.6102120877773691, "grad_norm": 1.7984363781133135, "learning_rate": 3.4825071718006142e-06, "loss": 0.6578, "step": 19910 }, { "epoch": 0.6102427363001104, "grad_norm": 1.7992087854119703, "learning_rate": 3.4820342718108767e-06, "loss": 0.5944, "step": 19911 }, { "epoch": 0.6102733848228515, "grad_norm": 1.7283814123056962, "learning_rate": 3.4815613867780474e-06, "loss": 0.6713, "step": 19912 }, { "epoch": 0.6103040333455927, "grad_norm": 1.923431473036008, "learning_rate": 3.481088516706781e-06, "loss": 0.6474, "step": 19913 }, { "epoch": 0.610334681868334, "grad_norm": 0.8359878807476588, "learning_rate": 3.4806156616017374e-06, "loss": 0.4338, "step": 19914 }, { "epoch": 0.6103653303910751, "grad_norm": 1.6818956494768607, "learning_rate": 3.480142821467577e-06, "loss": 0.6096, "step": 19915 }, { "epoch": 0.6103959789138164, "grad_norm": 1.8326107306015886, "learning_rate": 3.4796699963089577e-06, "loss": 0.627, "step": 19916 }, { "epoch": 0.6104266274365575, "grad_norm": 1.7302530409009738, "learning_rate": 3.4791971861305395e-06, "loss": 0.5333, "step": 19917 }, { "epoch": 0.6104572759592988, "grad_norm": 0.8355383066129372, "learning_rate": 3.4787243909369806e-06, "loss": 0.4323, "step": 19918 }, { "epoch": 0.6104879244820399, "grad_norm": 0.8307742647583123, "learning_rate": 3.478251610732939e-06, "loss": 0.4168, "step": 19919 }, { "epoch": 0.6105185730047812, "grad_norm": 2.4704300369888146, "learning_rate": 3.4777788455230744e-06, "loss": 0.6208, "step": 19920 }, { "epoch": 0.6105492215275223, "grad_norm": 1.5325835654375162, "learning_rate": 3.477306095312045e-06, "loss": 0.5168, "step": 19921 }, { "epoch": 0.6105798700502636, "grad_norm": 0.806266526003367, "learning_rate": 3.476833360104505e-06, "loss": 0.4253, "step": 19922 }, { "epoch": 0.6106105185730047, "grad_norm": 1.5570284589386811, "learning_rate": 3.476360639905119e-06, "loss": 0.6509, "step": 19923 }, { "epoch": 0.610641167095746, "grad_norm": 1.6737145086587373, "learning_rate": 3.4758879347185386e-06, "loss": 0.6152, "step": 19924 }, { "epoch": 0.6106718156184872, "grad_norm": 1.8060341235781885, "learning_rate": 3.475415244549427e-06, "loss": 0.5418, "step": 19925 }, { "epoch": 0.6107024641412284, "grad_norm": 1.8681786690255777, "learning_rate": 3.4749425694024386e-06, "loss": 0.642, "step": 19926 }, { "epoch": 0.6107331126639696, "grad_norm": 1.916182210316396, "learning_rate": 3.4744699092822296e-06, "loss": 0.5551, "step": 19927 }, { "epoch": 0.6107637611867108, "grad_norm": 1.6375086658605327, "learning_rate": 3.4739972641934606e-06, "loss": 0.5446, "step": 19928 }, { "epoch": 0.610794409709452, "grad_norm": 1.6022005910285757, "learning_rate": 3.4735246341407867e-06, "loss": 0.6808, "step": 19929 }, { "epoch": 0.6108250582321932, "grad_norm": 1.9138705683633364, "learning_rate": 3.473052019128864e-06, "loss": 0.7126, "step": 19930 }, { "epoch": 0.6108557067549344, "grad_norm": 1.619608381796362, "learning_rate": 3.472579419162352e-06, "loss": 0.5743, "step": 19931 }, { "epoch": 0.6108863552776757, "grad_norm": 1.865997241075373, "learning_rate": 3.472106834245904e-06, "loss": 0.6533, "step": 19932 }, { "epoch": 0.6109170038004168, "grad_norm": 1.8611485881773138, "learning_rate": 3.4716342643841796e-06, "loss": 0.7076, "step": 19933 }, { "epoch": 0.6109476523231581, "grad_norm": 0.7800293524032437, "learning_rate": 3.471161709581835e-06, "loss": 0.4148, "step": 19934 }, { "epoch": 0.6109783008458992, "grad_norm": 1.6934593387435393, "learning_rate": 3.470689169843522e-06, "loss": 0.472, "step": 19935 }, { "epoch": 0.6110089493686405, "grad_norm": 1.6768976846375907, "learning_rate": 3.4702166451739026e-06, "loss": 0.6152, "step": 19936 }, { "epoch": 0.6110395978913816, "grad_norm": 1.7192839380700915, "learning_rate": 3.4697441355776296e-06, "loss": 0.6182, "step": 19937 }, { "epoch": 0.6110702464141229, "grad_norm": 1.7433112750589683, "learning_rate": 3.4692716410593587e-06, "loss": 0.5994, "step": 19938 }, { "epoch": 0.611100894936864, "grad_norm": 2.0226069442733827, "learning_rate": 3.468799161623746e-06, "loss": 0.6901, "step": 19939 }, { "epoch": 0.6111315434596053, "grad_norm": 0.7689345265940392, "learning_rate": 3.468326697275447e-06, "loss": 0.4199, "step": 19940 }, { "epoch": 0.6111621919823464, "grad_norm": 1.9488739564342525, "learning_rate": 3.467854248019116e-06, "loss": 0.7379, "step": 19941 }, { "epoch": 0.6111928405050877, "grad_norm": 0.8082612425966784, "learning_rate": 3.4673818138594107e-06, "loss": 0.4128, "step": 19942 }, { "epoch": 0.6112234890278289, "grad_norm": 0.8985551354173343, "learning_rate": 3.466909394800983e-06, "loss": 0.4202, "step": 19943 }, { "epoch": 0.61125413755057, "grad_norm": 1.614728505051623, "learning_rate": 3.4664369908484912e-06, "loss": 0.4841, "step": 19944 }, { "epoch": 0.6112847860733113, "grad_norm": 1.8198223968984328, "learning_rate": 3.4659646020065874e-06, "loss": 0.6967, "step": 19945 }, { "epoch": 0.6113154345960524, "grad_norm": 1.9245423536681656, "learning_rate": 3.4654922282799256e-06, "loss": 0.6509, "step": 19946 }, { "epoch": 0.6113460831187937, "grad_norm": 1.634879772646968, "learning_rate": 3.4650198696731627e-06, "loss": 0.6194, "step": 19947 }, { "epoch": 0.6113767316415348, "grad_norm": 1.9784908846312632, "learning_rate": 3.4645475261909524e-06, "loss": 0.6865, "step": 19948 }, { "epoch": 0.6114073801642761, "grad_norm": 1.7688147713421396, "learning_rate": 3.464075197837946e-06, "loss": 0.681, "step": 19949 }, { "epoch": 0.6114380286870172, "grad_norm": 1.944159711976541, "learning_rate": 3.463602884618801e-06, "loss": 0.62, "step": 19950 }, { "epoch": 0.6114686772097585, "grad_norm": 1.8201761058787178, "learning_rate": 3.4631305865381693e-06, "loss": 0.65, "step": 19951 }, { "epoch": 0.6114993257324997, "grad_norm": 1.725570094939191, "learning_rate": 3.4626583036007055e-06, "loss": 0.6386, "step": 19952 }, { "epoch": 0.6115299742552409, "grad_norm": 1.75938033345263, "learning_rate": 3.462186035811065e-06, "loss": 0.5829, "step": 19953 }, { "epoch": 0.6115606227779821, "grad_norm": 1.7441176420148536, "learning_rate": 3.4617137831738945e-06, "loss": 0.6262, "step": 19954 }, { "epoch": 0.6115912713007233, "grad_norm": 1.7248525209283723, "learning_rate": 3.4612415456938553e-06, "loss": 0.5748, "step": 19955 }, { "epoch": 0.6116219198234645, "grad_norm": 1.6703006748645273, "learning_rate": 3.4607693233755958e-06, "loss": 0.4721, "step": 19956 }, { "epoch": 0.6116525683462057, "grad_norm": 1.6894366233872347, "learning_rate": 3.460297116223769e-06, "loss": 0.6345, "step": 19957 }, { "epoch": 0.6116832168689469, "grad_norm": 1.824493387359436, "learning_rate": 3.4598249242430304e-06, "loss": 0.5629, "step": 19958 }, { "epoch": 0.6117138653916881, "grad_norm": 1.850409249663015, "learning_rate": 3.4593527474380288e-06, "loss": 0.6873, "step": 19959 }, { "epoch": 0.6117445139144293, "grad_norm": 1.794035129319999, "learning_rate": 3.45888058581342e-06, "loss": 0.608, "step": 19960 }, { "epoch": 0.6117751624371706, "grad_norm": 1.9510783093868642, "learning_rate": 3.458408439373856e-06, "loss": 0.6274, "step": 19961 }, { "epoch": 0.6118058109599117, "grad_norm": 1.7245709593860234, "learning_rate": 3.4579363081239857e-06, "loss": 0.543, "step": 19962 }, { "epoch": 0.611836459482653, "grad_norm": 1.5854482135506311, "learning_rate": 3.4574641920684653e-06, "loss": 0.6055, "step": 19963 }, { "epoch": 0.6118671080053941, "grad_norm": 1.5911577796745842, "learning_rate": 3.4569920912119458e-06, "loss": 0.6409, "step": 19964 }, { "epoch": 0.6118977565281354, "grad_norm": 1.7780183518939563, "learning_rate": 3.456520005559075e-06, "loss": 0.5224, "step": 19965 }, { "epoch": 0.6119284050508765, "grad_norm": 1.8128372667428372, "learning_rate": 3.4560479351145103e-06, "loss": 0.5971, "step": 19966 }, { "epoch": 0.6119590535736178, "grad_norm": 0.8567292074812936, "learning_rate": 3.4555758798829e-06, "loss": 0.4205, "step": 19967 }, { "epoch": 0.611989702096359, "grad_norm": 1.7906051423839031, "learning_rate": 3.4551038398688943e-06, "loss": 0.6378, "step": 19968 }, { "epoch": 0.6120203506191002, "grad_norm": 1.7714216259599602, "learning_rate": 3.4546318150771463e-06, "loss": 0.6599, "step": 19969 }, { "epoch": 0.6120509991418414, "grad_norm": 1.8365157872297218, "learning_rate": 3.454159805512306e-06, "loss": 0.5142, "step": 19970 }, { "epoch": 0.6120816476645826, "grad_norm": 1.8969356746377972, "learning_rate": 3.453687811179025e-06, "loss": 0.5953, "step": 19971 }, { "epoch": 0.6121122961873238, "grad_norm": 1.7763733639534176, "learning_rate": 3.4532158320819543e-06, "loss": 0.6604, "step": 19972 }, { "epoch": 0.612142944710065, "grad_norm": 2.0217318433874296, "learning_rate": 3.452743868225743e-06, "loss": 0.6248, "step": 19973 }, { "epoch": 0.6121735932328062, "grad_norm": 0.7964805291022317, "learning_rate": 3.4522719196150423e-06, "loss": 0.4155, "step": 19974 }, { "epoch": 0.6122042417555473, "grad_norm": 1.6651579033522297, "learning_rate": 3.4517999862545045e-06, "loss": 0.5984, "step": 19975 }, { "epoch": 0.6122348902782886, "grad_norm": 2.1074343278902807, "learning_rate": 3.4513280681487738e-06, "loss": 0.7014, "step": 19976 }, { "epoch": 0.6122655388010297, "grad_norm": 1.838961746501008, "learning_rate": 3.4508561653025076e-06, "loss": 0.5935, "step": 19977 }, { "epoch": 0.612296187323771, "grad_norm": 1.803851408586033, "learning_rate": 3.450384277720348e-06, "loss": 0.5537, "step": 19978 }, { "epoch": 0.6123268358465122, "grad_norm": 1.6646983827644715, "learning_rate": 3.449912405406952e-06, "loss": 0.6304, "step": 19979 }, { "epoch": 0.6123574843692534, "grad_norm": 1.697830490527625, "learning_rate": 3.449440548366965e-06, "loss": 0.689, "step": 19980 }, { "epoch": 0.6123881328919946, "grad_norm": 1.736357745544882, "learning_rate": 3.4489687066050353e-06, "loss": 0.6053, "step": 19981 }, { "epoch": 0.6124187814147358, "grad_norm": 1.6854563891033905, "learning_rate": 3.448496880125815e-06, "loss": 0.6067, "step": 19982 }, { "epoch": 0.612449429937477, "grad_norm": 2.12815909762349, "learning_rate": 3.4480250689339522e-06, "loss": 0.6525, "step": 19983 }, { "epoch": 0.6124800784602182, "grad_norm": 1.7035006428275052, "learning_rate": 3.4475532730340944e-06, "loss": 0.5321, "step": 19984 }, { "epoch": 0.6125107269829594, "grad_norm": 1.9481053465771747, "learning_rate": 3.4470814924308926e-06, "loss": 0.7404, "step": 19985 }, { "epoch": 0.6125413755057006, "grad_norm": 1.8110605230459702, "learning_rate": 3.446609727128993e-06, "loss": 0.5532, "step": 19986 }, { "epoch": 0.6125720240284418, "grad_norm": 1.8468135554546095, "learning_rate": 3.446137977133046e-06, "loss": 0.6003, "step": 19987 }, { "epoch": 0.6126026725511831, "grad_norm": 1.867071884782059, "learning_rate": 3.4456662424477006e-06, "loss": 0.6042, "step": 19988 }, { "epoch": 0.6126333210739242, "grad_norm": 1.7744909770367028, "learning_rate": 3.4451945230776007e-06, "loss": 0.654, "step": 19989 }, { "epoch": 0.6126639695966655, "grad_norm": 1.846208672481918, "learning_rate": 3.4447228190273987e-06, "loss": 0.6927, "step": 19990 }, { "epoch": 0.6126946181194066, "grad_norm": 1.6803082565785885, "learning_rate": 3.44425113030174e-06, "loss": 0.5172, "step": 19991 }, { "epoch": 0.6127252666421479, "grad_norm": 1.5672458651413137, "learning_rate": 3.4437794569052724e-06, "loss": 0.6223, "step": 19992 }, { "epoch": 0.612755915164889, "grad_norm": 1.7549886826411691, "learning_rate": 3.443307798842645e-06, "loss": 0.6464, "step": 19993 }, { "epoch": 0.6127865636876303, "grad_norm": 1.5864909348206622, "learning_rate": 3.4428361561185043e-06, "loss": 0.5562, "step": 19994 }, { "epoch": 0.6128172122103714, "grad_norm": 1.7517261628391916, "learning_rate": 3.442364528737496e-06, "loss": 0.6504, "step": 19995 }, { "epoch": 0.6128478607331127, "grad_norm": 1.8167620063522247, "learning_rate": 3.4418929167042704e-06, "loss": 0.6556, "step": 19996 }, { "epoch": 0.6128785092558539, "grad_norm": 1.793509808038817, "learning_rate": 3.4414213200234696e-06, "loss": 0.5641, "step": 19997 }, { "epoch": 0.6129091577785951, "grad_norm": 1.7114671785459328, "learning_rate": 3.4409497386997472e-06, "loss": 0.6209, "step": 19998 }, { "epoch": 0.6129398063013363, "grad_norm": 1.791478707770155, "learning_rate": 3.440478172737744e-06, "loss": 0.6238, "step": 19999 }, { "epoch": 0.6129704548240775, "grad_norm": 0.7932899176650432, "learning_rate": 3.4400066221421073e-06, "loss": 0.4272, "step": 20000 }, { "epoch": 0.6130011033468187, "grad_norm": 1.5582187793502618, "learning_rate": 3.439535086917486e-06, "loss": 0.5355, "step": 20001 }, { "epoch": 0.6130317518695599, "grad_norm": 1.6432438115527819, "learning_rate": 3.4390635670685244e-06, "loss": 0.6106, "step": 20002 }, { "epoch": 0.6130624003923011, "grad_norm": 0.7702442281193594, "learning_rate": 3.438592062599868e-06, "loss": 0.4168, "step": 20003 }, { "epoch": 0.6130930489150423, "grad_norm": 1.7376324153501788, "learning_rate": 3.438120573516165e-06, "loss": 0.5901, "step": 20004 }, { "epoch": 0.6131236974377835, "grad_norm": 1.753736795617146, "learning_rate": 3.437649099822058e-06, "loss": 0.6226, "step": 20005 }, { "epoch": 0.6131543459605246, "grad_norm": 1.777063877877705, "learning_rate": 3.437177641522196e-06, "loss": 0.7053, "step": 20006 }, { "epoch": 0.6131849944832659, "grad_norm": 1.8088983242274714, "learning_rate": 3.4367061986212223e-06, "loss": 0.603, "step": 20007 }, { "epoch": 0.6132156430060071, "grad_norm": 1.9045621350712796, "learning_rate": 3.4362347711237797e-06, "loss": 0.7115, "step": 20008 }, { "epoch": 0.6132462915287483, "grad_norm": 1.8527516443235297, "learning_rate": 3.4357633590345195e-06, "loss": 0.6156, "step": 20009 }, { "epoch": 0.6132769400514895, "grad_norm": 1.6793077033945087, "learning_rate": 3.435291962358082e-06, "loss": 0.5611, "step": 20010 }, { "epoch": 0.6133075885742307, "grad_norm": 1.5734035033854812, "learning_rate": 3.434820581099112e-06, "loss": 0.5752, "step": 20011 }, { "epoch": 0.6133382370969719, "grad_norm": 1.6116163536965884, "learning_rate": 3.4343492152622564e-06, "loss": 0.6367, "step": 20012 }, { "epoch": 0.6133688856197131, "grad_norm": 1.6179526099450392, "learning_rate": 3.4338778648521575e-06, "loss": 0.6605, "step": 20013 }, { "epoch": 0.6133995341424543, "grad_norm": 1.7544778727898451, "learning_rate": 3.433406529873462e-06, "loss": 0.6575, "step": 20014 }, { "epoch": 0.6134301826651956, "grad_norm": 1.799343005699092, "learning_rate": 3.4329352103308123e-06, "loss": 0.6923, "step": 20015 }, { "epoch": 0.6134608311879367, "grad_norm": 1.4913997299974293, "learning_rate": 3.432463906228852e-06, "loss": 0.4809, "step": 20016 }, { "epoch": 0.613491479710678, "grad_norm": 1.9645507705343332, "learning_rate": 3.4319926175722272e-06, "loss": 0.6636, "step": 20017 }, { "epoch": 0.6135221282334191, "grad_norm": 1.59531779250335, "learning_rate": 3.4315213443655816e-06, "loss": 0.5708, "step": 20018 }, { "epoch": 0.6135527767561604, "grad_norm": 1.8126964664500194, "learning_rate": 3.4310500866135543e-06, "loss": 0.6294, "step": 20019 }, { "epoch": 0.6135834252789015, "grad_norm": 1.7932672370294707, "learning_rate": 3.4305788443207944e-06, "loss": 0.6159, "step": 20020 }, { "epoch": 0.6136140738016428, "grad_norm": 0.7886402986879186, "learning_rate": 3.4301076174919423e-06, "loss": 0.4083, "step": 20021 }, { "epoch": 0.6136447223243839, "grad_norm": 1.766195046218155, "learning_rate": 3.4296364061316402e-06, "loss": 0.6704, "step": 20022 }, { "epoch": 0.6136753708471252, "grad_norm": 1.6482672573676276, "learning_rate": 3.4291652102445337e-06, "loss": 0.6541, "step": 20023 }, { "epoch": 0.6137060193698664, "grad_norm": 0.8093002978717676, "learning_rate": 3.4286940298352627e-06, "loss": 0.4389, "step": 20024 }, { "epoch": 0.6137366678926076, "grad_norm": 1.871917110026218, "learning_rate": 3.4282228649084733e-06, "loss": 0.6208, "step": 20025 }, { "epoch": 0.6137673164153488, "grad_norm": 1.7021626125702656, "learning_rate": 3.4277517154688055e-06, "loss": 0.6087, "step": 20026 }, { "epoch": 0.61379796493809, "grad_norm": 1.6645656957059078, "learning_rate": 3.4272805815209015e-06, "loss": 0.6762, "step": 20027 }, { "epoch": 0.6138286134608312, "grad_norm": 1.5781858689575243, "learning_rate": 3.4268094630694047e-06, "loss": 0.6801, "step": 20028 }, { "epoch": 0.6138592619835724, "grad_norm": 1.8485982816102924, "learning_rate": 3.4263383601189594e-06, "loss": 0.639, "step": 20029 }, { "epoch": 0.6138899105063136, "grad_norm": 1.6246604818341814, "learning_rate": 3.4258672726742005e-06, "loss": 0.5879, "step": 20030 }, { "epoch": 0.6139205590290548, "grad_norm": 1.796829506752128, "learning_rate": 3.425396200739778e-06, "loss": 0.5891, "step": 20031 }, { "epoch": 0.613951207551796, "grad_norm": 0.7993164588078756, "learning_rate": 3.4249251443203256e-06, "loss": 0.4148, "step": 20032 }, { "epoch": 0.6139818560745373, "grad_norm": 1.5002944215257816, "learning_rate": 3.4244541034204926e-06, "loss": 0.5566, "step": 20033 }, { "epoch": 0.6140125045972784, "grad_norm": 1.8860573071433926, "learning_rate": 3.4239830780449147e-06, "loss": 0.599, "step": 20034 }, { "epoch": 0.6140431531200197, "grad_norm": 1.783600602605681, "learning_rate": 3.423512068198234e-06, "loss": 0.5987, "step": 20035 }, { "epoch": 0.6140738016427608, "grad_norm": 1.675587766348633, "learning_rate": 3.423041073885094e-06, "loss": 0.5072, "step": 20036 }, { "epoch": 0.614104450165502, "grad_norm": 1.6340383517754276, "learning_rate": 3.422570095110133e-06, "loss": 0.5281, "step": 20037 }, { "epoch": 0.6141350986882432, "grad_norm": 1.777752767049151, "learning_rate": 3.4220991318779917e-06, "loss": 0.6681, "step": 20038 }, { "epoch": 0.6141657472109844, "grad_norm": 1.9688653279111723, "learning_rate": 3.4216281841933126e-06, "loss": 0.6657, "step": 20039 }, { "epoch": 0.6141963957337256, "grad_norm": 1.5523403353885474, "learning_rate": 3.4211572520607334e-06, "loss": 0.482, "step": 20040 }, { "epoch": 0.6142270442564668, "grad_norm": 0.7814596916900892, "learning_rate": 3.4206863354848978e-06, "loss": 0.4033, "step": 20041 }, { "epoch": 0.614257692779208, "grad_norm": 1.8410437448592971, "learning_rate": 3.420215434470443e-06, "loss": 0.5989, "step": 20042 }, { "epoch": 0.6142883413019492, "grad_norm": 1.7418446633119873, "learning_rate": 3.4197445490220086e-06, "loss": 0.6542, "step": 20043 }, { "epoch": 0.6143189898246905, "grad_norm": 1.673367866451533, "learning_rate": 3.419273679144237e-06, "loss": 0.6668, "step": 20044 }, { "epoch": 0.6143496383474316, "grad_norm": 1.6289735093462363, "learning_rate": 3.418802824841766e-06, "loss": 0.5541, "step": 20045 }, { "epoch": 0.6143802868701729, "grad_norm": 1.5759952115592668, "learning_rate": 3.4183319861192344e-06, "loss": 0.6518, "step": 20046 }, { "epoch": 0.614410935392914, "grad_norm": 1.5376172109979471, "learning_rate": 3.417861162981283e-06, "loss": 0.4987, "step": 20047 }, { "epoch": 0.6144415839156553, "grad_norm": 1.782460504151838, "learning_rate": 3.4173903554325517e-06, "loss": 0.5727, "step": 20048 }, { "epoch": 0.6144722324383964, "grad_norm": 1.8940553883762383, "learning_rate": 3.4169195634776747e-06, "loss": 0.596, "step": 20049 }, { "epoch": 0.6145028809611377, "grad_norm": 0.7428395652807265, "learning_rate": 3.416448787121298e-06, "loss": 0.3997, "step": 20050 }, { "epoch": 0.6145335294838788, "grad_norm": 1.8135091749693926, "learning_rate": 3.4159780263680533e-06, "loss": 0.6217, "step": 20051 }, { "epoch": 0.6145641780066201, "grad_norm": 1.9277497824843124, "learning_rate": 3.4155072812225852e-06, "loss": 0.6291, "step": 20052 }, { "epoch": 0.6145948265293613, "grad_norm": 1.8790093718581526, "learning_rate": 3.4150365516895285e-06, "loss": 0.658, "step": 20053 }, { "epoch": 0.6146254750521025, "grad_norm": 0.7995765408707408, "learning_rate": 3.4145658377735206e-06, "loss": 0.4283, "step": 20054 }, { "epoch": 0.6146561235748437, "grad_norm": 1.9624226167223278, "learning_rate": 3.4140951394792033e-06, "loss": 0.5583, "step": 20055 }, { "epoch": 0.6146867720975849, "grad_norm": 1.8756018549397564, "learning_rate": 3.4136244568112115e-06, "loss": 0.5759, "step": 20056 }, { "epoch": 0.6147174206203261, "grad_norm": 1.7837261931292492, "learning_rate": 3.4131537897741828e-06, "loss": 0.678, "step": 20057 }, { "epoch": 0.6147480691430673, "grad_norm": 1.6980636749193287, "learning_rate": 3.412683138372757e-06, "loss": 0.539, "step": 20058 }, { "epoch": 0.6147787176658085, "grad_norm": 1.7787679813962698, "learning_rate": 3.412212502611569e-06, "loss": 0.5993, "step": 20059 }, { "epoch": 0.6148093661885498, "grad_norm": 1.8265360096688936, "learning_rate": 3.4117418824952597e-06, "loss": 0.5614, "step": 20060 }, { "epoch": 0.6148400147112909, "grad_norm": 1.9015586254671593, "learning_rate": 3.411271278028464e-06, "loss": 0.6584, "step": 20061 }, { "epoch": 0.6148706632340322, "grad_norm": 2.0393518713925034, "learning_rate": 3.4108006892158162e-06, "loss": 0.5675, "step": 20062 }, { "epoch": 0.6149013117567733, "grad_norm": 1.7959364010374477, "learning_rate": 3.4103301160619585e-06, "loss": 0.591, "step": 20063 }, { "epoch": 0.6149319602795146, "grad_norm": 1.7361004427660214, "learning_rate": 3.4098595585715243e-06, "loss": 0.6116, "step": 20064 }, { "epoch": 0.6149626088022557, "grad_norm": 1.5804790946154477, "learning_rate": 3.4093890167491493e-06, "loss": 0.4916, "step": 20065 }, { "epoch": 0.614993257324997, "grad_norm": 1.9567642457832108, "learning_rate": 3.408918490599472e-06, "loss": 0.6679, "step": 20066 }, { "epoch": 0.6150239058477381, "grad_norm": 1.75380046389947, "learning_rate": 3.4084479801271285e-06, "loss": 0.644, "step": 20067 }, { "epoch": 0.6150545543704793, "grad_norm": 2.027968584140985, "learning_rate": 3.407977485336754e-06, "loss": 0.6257, "step": 20068 }, { "epoch": 0.6150852028932206, "grad_norm": 1.7250251230232734, "learning_rate": 3.4075070062329847e-06, "loss": 0.5776, "step": 20069 }, { "epoch": 0.6151158514159617, "grad_norm": 1.6651339808987844, "learning_rate": 3.4070365428204555e-06, "loss": 0.5115, "step": 20070 }, { "epoch": 0.615146499938703, "grad_norm": 1.7421881210814025, "learning_rate": 3.406566095103804e-06, "loss": 0.5492, "step": 20071 }, { "epoch": 0.6151771484614441, "grad_norm": 1.8180253769082626, "learning_rate": 3.4060956630876653e-06, "loss": 0.6528, "step": 20072 }, { "epoch": 0.6152077969841854, "grad_norm": 1.7928373504466275, "learning_rate": 3.4056252467766703e-06, "loss": 0.6316, "step": 20073 }, { "epoch": 0.6152384455069265, "grad_norm": 1.9710521952462354, "learning_rate": 3.4051548461754615e-06, "loss": 0.6405, "step": 20074 }, { "epoch": 0.6152690940296678, "grad_norm": 1.7231749671357202, "learning_rate": 3.404684461288669e-06, "loss": 0.6184, "step": 20075 }, { "epoch": 0.6152997425524089, "grad_norm": 2.0552290939645004, "learning_rate": 3.4042140921209265e-06, "loss": 0.6759, "step": 20076 }, { "epoch": 0.6153303910751502, "grad_norm": 1.5313125578000633, "learning_rate": 3.4037437386768735e-06, "loss": 0.5439, "step": 20077 }, { "epoch": 0.6153610395978913, "grad_norm": 0.8309705053159724, "learning_rate": 3.4032734009611403e-06, "loss": 0.4118, "step": 20078 }, { "epoch": 0.6153916881206326, "grad_norm": 1.7543353754075754, "learning_rate": 3.4028030789783635e-06, "loss": 0.6688, "step": 20079 }, { "epoch": 0.6154223366433738, "grad_norm": 1.851021379307945, "learning_rate": 3.4023327727331768e-06, "loss": 0.6358, "step": 20080 }, { "epoch": 0.615452985166115, "grad_norm": 1.6418874880417313, "learning_rate": 3.4018624822302126e-06, "loss": 0.6295, "step": 20081 }, { "epoch": 0.6154836336888562, "grad_norm": 1.6983298574147334, "learning_rate": 3.401392207474108e-06, "loss": 0.6361, "step": 20082 }, { "epoch": 0.6155142822115974, "grad_norm": 1.8624512245382157, "learning_rate": 3.4009219484694954e-06, "loss": 0.6783, "step": 20083 }, { "epoch": 0.6155449307343386, "grad_norm": 0.7685748693144909, "learning_rate": 3.4004517052210056e-06, "loss": 0.4262, "step": 20084 }, { "epoch": 0.6155755792570798, "grad_norm": 1.8430822870875407, "learning_rate": 3.3999814777332774e-06, "loss": 0.5559, "step": 20085 }, { "epoch": 0.615606227779821, "grad_norm": 1.8603856270891457, "learning_rate": 3.399511266010939e-06, "loss": 0.5861, "step": 20086 }, { "epoch": 0.6156368763025623, "grad_norm": 1.6607599641790691, "learning_rate": 3.3990410700586262e-06, "loss": 0.5714, "step": 20087 }, { "epoch": 0.6156675248253034, "grad_norm": 1.9637178937525026, "learning_rate": 3.398570889880971e-06, "loss": 0.6514, "step": 20088 }, { "epoch": 0.6156981733480447, "grad_norm": 0.7811621607024806, "learning_rate": 3.3981007254826064e-06, "loss": 0.402, "step": 20089 }, { "epoch": 0.6157288218707858, "grad_norm": 1.8543559557747347, "learning_rate": 3.3976305768681662e-06, "loss": 0.6817, "step": 20090 }, { "epoch": 0.6157594703935271, "grad_norm": 1.77774385883581, "learning_rate": 3.3971604440422813e-06, "loss": 0.5777, "step": 20091 }, { "epoch": 0.6157901189162682, "grad_norm": 1.7158308121996813, "learning_rate": 3.396690327009584e-06, "loss": 0.5891, "step": 20092 }, { "epoch": 0.6158207674390095, "grad_norm": 1.6585224702677124, "learning_rate": 3.396220225774709e-06, "loss": 0.5872, "step": 20093 }, { "epoch": 0.6158514159617506, "grad_norm": 0.7957802946866845, "learning_rate": 3.395750140342286e-06, "loss": 0.4278, "step": 20094 }, { "epoch": 0.6158820644844919, "grad_norm": 1.7610462325601763, "learning_rate": 3.395280070716946e-06, "loss": 0.6424, "step": 20095 }, { "epoch": 0.615912713007233, "grad_norm": 0.7977072470730368, "learning_rate": 3.3948100169033225e-06, "loss": 0.4291, "step": 20096 }, { "epoch": 0.6159433615299743, "grad_norm": 1.8617544704837792, "learning_rate": 3.394339978906046e-06, "loss": 0.6128, "step": 20097 }, { "epoch": 0.6159740100527155, "grad_norm": 1.576848517484986, "learning_rate": 3.39386995672975e-06, "loss": 0.6353, "step": 20098 }, { "epoch": 0.6160046585754566, "grad_norm": 2.0080551778808733, "learning_rate": 3.3933999503790638e-06, "loss": 0.5373, "step": 20099 }, { "epoch": 0.6160353070981979, "grad_norm": 1.8456865700024587, "learning_rate": 3.3929299598586183e-06, "loss": 0.7118, "step": 20100 }, { "epoch": 0.616065955620939, "grad_norm": 2.066478330550117, "learning_rate": 3.3924599851730456e-06, "loss": 0.4885, "step": 20101 }, { "epoch": 0.6160966041436803, "grad_norm": 1.7648272848024962, "learning_rate": 3.391990026326977e-06, "loss": 0.673, "step": 20102 }, { "epoch": 0.6161272526664214, "grad_norm": 1.6410537809195198, "learning_rate": 3.3915200833250393e-06, "loss": 0.561, "step": 20103 }, { "epoch": 0.6161579011891627, "grad_norm": 1.5506389393267563, "learning_rate": 3.391050156171869e-06, "loss": 0.5648, "step": 20104 }, { "epoch": 0.6161885497119038, "grad_norm": 1.6457183343203918, "learning_rate": 3.39058024487209e-06, "loss": 0.6095, "step": 20105 }, { "epoch": 0.6162191982346451, "grad_norm": 2.170605742737231, "learning_rate": 3.390110349430339e-06, "loss": 0.6197, "step": 20106 }, { "epoch": 0.6162498467573863, "grad_norm": 1.7833314171072312, "learning_rate": 3.389640469851241e-06, "loss": 0.5679, "step": 20107 }, { "epoch": 0.6162804952801275, "grad_norm": 1.7404047261229634, "learning_rate": 3.3891706061394263e-06, "loss": 0.5651, "step": 20108 }, { "epoch": 0.6163111438028687, "grad_norm": 0.7908685663589469, "learning_rate": 3.388700758299527e-06, "loss": 0.4402, "step": 20109 }, { "epoch": 0.6163417923256099, "grad_norm": 1.9672364098608397, "learning_rate": 3.388230926336172e-06, "loss": 0.6924, "step": 20110 }, { "epoch": 0.6163724408483511, "grad_norm": 1.6147982671771983, "learning_rate": 3.3877611102539885e-06, "loss": 0.5992, "step": 20111 }, { "epoch": 0.6164030893710923, "grad_norm": 0.8396270476273787, "learning_rate": 3.387291310057608e-06, "loss": 0.4253, "step": 20112 }, { "epoch": 0.6164337378938335, "grad_norm": 1.6659754559237625, "learning_rate": 3.3868215257516583e-06, "loss": 0.6147, "step": 20113 }, { "epoch": 0.6164643864165747, "grad_norm": 1.6408083455195424, "learning_rate": 3.38635175734077e-06, "loss": 0.505, "step": 20114 }, { "epoch": 0.6164950349393159, "grad_norm": 1.8624678023761863, "learning_rate": 3.3858820048295714e-06, "loss": 0.6347, "step": 20115 }, { "epoch": 0.6165256834620572, "grad_norm": 1.7556317440369213, "learning_rate": 3.3854122682226873e-06, "loss": 0.6689, "step": 20116 }, { "epoch": 0.6165563319847983, "grad_norm": 1.7116642879968686, "learning_rate": 3.3849425475247533e-06, "loss": 0.6298, "step": 20117 }, { "epoch": 0.6165869805075396, "grad_norm": 1.7431970752394768, "learning_rate": 3.384472842740392e-06, "loss": 0.6799, "step": 20118 }, { "epoch": 0.6166176290302807, "grad_norm": 1.781639446479928, "learning_rate": 3.384003153874231e-06, "loss": 0.6285, "step": 20119 }, { "epoch": 0.616648277553022, "grad_norm": 1.5198096986571643, "learning_rate": 3.383533480930903e-06, "loss": 0.5719, "step": 20120 }, { "epoch": 0.6166789260757631, "grad_norm": 1.8228237902557751, "learning_rate": 3.383063823915032e-06, "loss": 0.6642, "step": 20121 }, { "epoch": 0.6167095745985044, "grad_norm": 1.6969054072013925, "learning_rate": 3.3825941828312463e-06, "loss": 0.5156, "step": 20122 }, { "epoch": 0.6167402231212455, "grad_norm": 1.7962852661937943, "learning_rate": 3.382124557684175e-06, "loss": 0.6389, "step": 20123 }, { "epoch": 0.6167708716439868, "grad_norm": 0.8166881409786969, "learning_rate": 3.3816549484784434e-06, "loss": 0.4102, "step": 20124 }, { "epoch": 0.616801520166728, "grad_norm": 1.8446923382208367, "learning_rate": 3.38118535521868e-06, "loss": 0.5628, "step": 20125 }, { "epoch": 0.6168321686894692, "grad_norm": 1.7080002601940938, "learning_rate": 3.3807157779095135e-06, "loss": 0.596, "step": 20126 }, { "epoch": 0.6168628172122104, "grad_norm": 2.0288005672649434, "learning_rate": 3.3802462165555653e-06, "loss": 0.6754, "step": 20127 }, { "epoch": 0.6168934657349516, "grad_norm": 1.8660540853167669, "learning_rate": 3.3797766711614686e-06, "loss": 0.6963, "step": 20128 }, { "epoch": 0.6169241142576928, "grad_norm": 1.8701639299549597, "learning_rate": 3.379307141731846e-06, "loss": 0.5953, "step": 20129 }, { "epoch": 0.6169547627804339, "grad_norm": 1.6215945012345767, "learning_rate": 3.3788376282713244e-06, "loss": 0.6072, "step": 20130 }, { "epoch": 0.6169854113031752, "grad_norm": 1.8189837603649583, "learning_rate": 3.3783681307845307e-06, "loss": 0.5808, "step": 20131 }, { "epoch": 0.6170160598259163, "grad_norm": 1.944489605937557, "learning_rate": 3.3778986492760895e-06, "loss": 0.604, "step": 20132 }, { "epoch": 0.6170467083486576, "grad_norm": 1.6125878079893068, "learning_rate": 3.377429183750629e-06, "loss": 0.5683, "step": 20133 }, { "epoch": 0.6170773568713988, "grad_norm": 0.7854910555486416, "learning_rate": 3.3769597342127745e-06, "loss": 0.4374, "step": 20134 }, { "epoch": 0.61710800539414, "grad_norm": 1.5384663473458964, "learning_rate": 3.3764903006671496e-06, "loss": 0.5734, "step": 20135 }, { "epoch": 0.6171386539168812, "grad_norm": 1.791249556582372, "learning_rate": 3.376020883118382e-06, "loss": 0.6468, "step": 20136 }, { "epoch": 0.6171693024396224, "grad_norm": 1.6413420723366758, "learning_rate": 3.3755514815710976e-06, "loss": 0.6181, "step": 20137 }, { "epoch": 0.6171999509623636, "grad_norm": 1.7700133246230128, "learning_rate": 3.375082096029918e-06, "loss": 0.5366, "step": 20138 }, { "epoch": 0.6172305994851048, "grad_norm": 1.5991085287537388, "learning_rate": 3.374612726499471e-06, "loss": 0.5338, "step": 20139 }, { "epoch": 0.617261248007846, "grad_norm": 0.8077481718302417, "learning_rate": 3.3741433729843796e-06, "loss": 0.4374, "step": 20140 }, { "epoch": 0.6172918965305872, "grad_norm": 1.8102057431877918, "learning_rate": 3.3736740354892707e-06, "loss": 0.5801, "step": 20141 }, { "epoch": 0.6173225450533284, "grad_norm": 2.0758958102266907, "learning_rate": 3.373204714018768e-06, "loss": 0.5934, "step": 20142 }, { "epoch": 0.6173531935760697, "grad_norm": 1.7244308644955924, "learning_rate": 3.3727354085774944e-06, "loss": 0.6884, "step": 20143 }, { "epoch": 0.6173838420988108, "grad_norm": 1.911040435683925, "learning_rate": 3.3722661191700757e-06, "loss": 0.5593, "step": 20144 }, { "epoch": 0.6174144906215521, "grad_norm": 1.851854049598556, "learning_rate": 3.3717968458011364e-06, "loss": 0.5899, "step": 20145 }, { "epoch": 0.6174451391442932, "grad_norm": 1.7386765007510487, "learning_rate": 3.371327588475297e-06, "loss": 0.5877, "step": 20146 }, { "epoch": 0.6174757876670345, "grad_norm": 1.8227363298679222, "learning_rate": 3.3708583471971854e-06, "loss": 0.6739, "step": 20147 }, { "epoch": 0.6175064361897756, "grad_norm": 2.034815740539113, "learning_rate": 3.3703891219714237e-06, "loss": 0.5702, "step": 20148 }, { "epoch": 0.6175370847125169, "grad_norm": 1.6314376710440703, "learning_rate": 3.369919912802633e-06, "loss": 0.5569, "step": 20149 }, { "epoch": 0.617567733235258, "grad_norm": 1.7904103013076238, "learning_rate": 3.36945071969544e-06, "loss": 0.5922, "step": 20150 }, { "epoch": 0.6175983817579993, "grad_norm": 1.7830589772156424, "learning_rate": 3.368981542654465e-06, "loss": 0.6451, "step": 20151 }, { "epoch": 0.6176290302807405, "grad_norm": 1.84901746353147, "learning_rate": 3.3685123816843335e-06, "loss": 0.6594, "step": 20152 }, { "epoch": 0.6176596788034817, "grad_norm": 1.6299561250564207, "learning_rate": 3.3680432367896667e-06, "loss": 0.5764, "step": 20153 }, { "epoch": 0.6176903273262229, "grad_norm": 1.8330000694387496, "learning_rate": 3.367574107975087e-06, "loss": 0.6325, "step": 20154 }, { "epoch": 0.6177209758489641, "grad_norm": 1.758613695028552, "learning_rate": 3.3671049952452172e-06, "loss": 0.5496, "step": 20155 }, { "epoch": 0.6177516243717053, "grad_norm": 2.247166078667834, "learning_rate": 3.366635898604681e-06, "loss": 0.6587, "step": 20156 }, { "epoch": 0.6177822728944465, "grad_norm": 1.7285824334085593, "learning_rate": 3.3661668180580965e-06, "loss": 0.6633, "step": 20157 }, { "epoch": 0.6178129214171877, "grad_norm": 1.491972002124259, "learning_rate": 3.3656977536100916e-06, "loss": 0.5014, "step": 20158 }, { "epoch": 0.617843569939929, "grad_norm": 1.9273970199439365, "learning_rate": 3.3652287052652816e-06, "loss": 0.552, "step": 20159 }, { "epoch": 0.6178742184626701, "grad_norm": 1.7904234962480803, "learning_rate": 3.3647596730282944e-06, "loss": 0.6948, "step": 20160 }, { "epoch": 0.6179048669854112, "grad_norm": 2.016214880902336, "learning_rate": 3.3642906569037474e-06, "loss": 0.6181, "step": 20161 }, { "epoch": 0.6179355155081525, "grad_norm": 1.7250611016711082, "learning_rate": 3.363821656896262e-06, "loss": 0.6482, "step": 20162 }, { "epoch": 0.6179661640308937, "grad_norm": 1.7145714035026383, "learning_rate": 3.363352673010462e-06, "loss": 0.5555, "step": 20163 }, { "epoch": 0.6179968125536349, "grad_norm": 1.7784094036465417, "learning_rate": 3.3628837052509666e-06, "loss": 0.6503, "step": 20164 }, { "epoch": 0.6180274610763761, "grad_norm": 1.65575748517617, "learning_rate": 3.3624147536223962e-06, "loss": 0.6329, "step": 20165 }, { "epoch": 0.6180581095991173, "grad_norm": 1.9497082044326208, "learning_rate": 3.3619458181293728e-06, "loss": 0.5668, "step": 20166 }, { "epoch": 0.6180887581218585, "grad_norm": 1.9706282840292184, "learning_rate": 3.3614768987765155e-06, "loss": 0.6362, "step": 20167 }, { "epoch": 0.6181194066445997, "grad_norm": 1.8628491512968708, "learning_rate": 3.361007995568446e-06, "loss": 0.6386, "step": 20168 }, { "epoch": 0.6181500551673409, "grad_norm": 0.8428001228888347, "learning_rate": 3.360539108509786e-06, "loss": 0.4167, "step": 20169 }, { "epoch": 0.6181807036900822, "grad_norm": 1.6295799801404889, "learning_rate": 3.3600702376051497e-06, "loss": 0.5449, "step": 20170 }, { "epoch": 0.6182113522128233, "grad_norm": 1.528273707203889, "learning_rate": 3.359601382859165e-06, "loss": 0.5452, "step": 20171 }, { "epoch": 0.6182420007355646, "grad_norm": 1.7692876565528417, "learning_rate": 3.359132544276446e-06, "loss": 0.6733, "step": 20172 }, { "epoch": 0.6182726492583057, "grad_norm": 1.9092084681602892, "learning_rate": 3.358663721861613e-06, "loss": 0.6309, "step": 20173 }, { "epoch": 0.618303297781047, "grad_norm": 1.561089639851084, "learning_rate": 3.358194915619287e-06, "loss": 0.5611, "step": 20174 }, { "epoch": 0.6183339463037881, "grad_norm": 1.5333501741888176, "learning_rate": 3.3577261255540873e-06, "loss": 0.6625, "step": 20175 }, { "epoch": 0.6183645948265294, "grad_norm": 1.7827188956186997, "learning_rate": 3.3572573516706307e-06, "loss": 0.5602, "step": 20176 }, { "epoch": 0.6183952433492705, "grad_norm": 1.5323487877201103, "learning_rate": 3.356788593973539e-06, "loss": 0.6037, "step": 20177 }, { "epoch": 0.6184258918720118, "grad_norm": 1.796592453421197, "learning_rate": 3.356319852467428e-06, "loss": 0.5454, "step": 20178 }, { "epoch": 0.618456540394753, "grad_norm": 1.8166689707391754, "learning_rate": 3.3558511271569194e-06, "loss": 0.663, "step": 20179 }, { "epoch": 0.6184871889174942, "grad_norm": 0.7813118637996507, "learning_rate": 3.355382418046632e-06, "loss": 0.4236, "step": 20180 }, { "epoch": 0.6185178374402354, "grad_norm": 1.7033996039259882, "learning_rate": 3.3549137251411788e-06, "loss": 0.646, "step": 20181 }, { "epoch": 0.6185484859629766, "grad_norm": 1.5789698097053504, "learning_rate": 3.354445048445185e-06, "loss": 0.5819, "step": 20182 }, { "epoch": 0.6185791344857178, "grad_norm": 0.8122165581050435, "learning_rate": 3.3539763879632636e-06, "loss": 0.4266, "step": 20183 }, { "epoch": 0.618609783008459, "grad_norm": 1.9753420438015512, "learning_rate": 3.353507743700033e-06, "loss": 0.5886, "step": 20184 }, { "epoch": 0.6186404315312002, "grad_norm": 1.8699108436061764, "learning_rate": 3.353039115660113e-06, "loss": 0.5232, "step": 20185 }, { "epoch": 0.6186710800539414, "grad_norm": 1.6004506469575097, "learning_rate": 3.3525705038481194e-06, "loss": 0.5839, "step": 20186 }, { "epoch": 0.6187017285766826, "grad_norm": 1.7994586398366184, "learning_rate": 3.352101908268671e-06, "loss": 0.5595, "step": 20187 }, { "epoch": 0.6187323770994239, "grad_norm": 1.9655813069651904, "learning_rate": 3.3516333289263843e-06, "loss": 0.5462, "step": 20188 }, { "epoch": 0.618763025622165, "grad_norm": 2.020027794324559, "learning_rate": 3.3511647658258747e-06, "loss": 0.5941, "step": 20189 }, { "epoch": 0.6187936741449063, "grad_norm": 1.8134028605840016, "learning_rate": 3.3506962189717628e-06, "loss": 0.6082, "step": 20190 }, { "epoch": 0.6188243226676474, "grad_norm": 1.6212506126494288, "learning_rate": 3.350227688368662e-06, "loss": 0.5974, "step": 20191 }, { "epoch": 0.6188549711903886, "grad_norm": 1.7866777048729323, "learning_rate": 3.349759174021189e-06, "loss": 0.6057, "step": 20192 }, { "epoch": 0.6188856197131298, "grad_norm": 1.8912208845052416, "learning_rate": 3.349290675933962e-06, "loss": 0.6481, "step": 20193 }, { "epoch": 0.618916268235871, "grad_norm": 0.7589454785487206, "learning_rate": 3.348822194111595e-06, "loss": 0.4081, "step": 20194 }, { "epoch": 0.6189469167586122, "grad_norm": 1.6148396335491833, "learning_rate": 3.3483537285587066e-06, "loss": 0.655, "step": 20195 }, { "epoch": 0.6189775652813534, "grad_norm": 1.8213883642862685, "learning_rate": 3.3478852792799116e-06, "loss": 0.6589, "step": 20196 }, { "epoch": 0.6190082138040947, "grad_norm": 1.506378666272306, "learning_rate": 3.3474168462798244e-06, "loss": 0.5597, "step": 20197 }, { "epoch": 0.6190388623268358, "grad_norm": 1.817051134606795, "learning_rate": 3.3469484295630634e-06, "loss": 0.6062, "step": 20198 }, { "epoch": 0.6190695108495771, "grad_norm": 0.7835943665020696, "learning_rate": 3.3464800291342432e-06, "loss": 0.435, "step": 20199 }, { "epoch": 0.6191001593723182, "grad_norm": 1.8491349150904448, "learning_rate": 3.346011644997975e-06, "loss": 0.6095, "step": 20200 }, { "epoch": 0.6191308078950595, "grad_norm": 1.6737293931181991, "learning_rate": 3.3455432771588803e-06, "loss": 0.5608, "step": 20201 }, { "epoch": 0.6191614564178006, "grad_norm": 1.7986370040188087, "learning_rate": 3.345074925621571e-06, "loss": 0.6124, "step": 20202 }, { "epoch": 0.6191921049405419, "grad_norm": 0.7802500571522473, "learning_rate": 3.3446065903906597e-06, "loss": 0.4157, "step": 20203 }, { "epoch": 0.619222753463283, "grad_norm": 0.796235291406591, "learning_rate": 3.3441382714707647e-06, "loss": 0.4253, "step": 20204 }, { "epoch": 0.6192534019860243, "grad_norm": 1.9084031540822268, "learning_rate": 3.3436699688664975e-06, "loss": 0.6842, "step": 20205 }, { "epoch": 0.6192840505087654, "grad_norm": 1.7462891332114057, "learning_rate": 3.3432016825824753e-06, "loss": 0.6696, "step": 20206 }, { "epoch": 0.6193146990315067, "grad_norm": 1.4579958260338983, "learning_rate": 3.3427334126233115e-06, "loss": 0.5293, "step": 20207 }, { "epoch": 0.6193453475542479, "grad_norm": 0.8879522752906471, "learning_rate": 3.3422651589936173e-06, "loss": 0.4081, "step": 20208 }, { "epoch": 0.6193759960769891, "grad_norm": 1.6689715964937233, "learning_rate": 3.3417969216980107e-06, "loss": 0.6582, "step": 20209 }, { "epoch": 0.6194066445997303, "grad_norm": 1.9936984428171518, "learning_rate": 3.3413287007411034e-06, "loss": 0.6188, "step": 20210 }, { "epoch": 0.6194372931224715, "grad_norm": 1.5938704878399022, "learning_rate": 3.340860496127506e-06, "loss": 0.5489, "step": 20211 }, { "epoch": 0.6194679416452127, "grad_norm": 1.805701834569897, "learning_rate": 3.3403923078618378e-06, "loss": 0.5001, "step": 20212 }, { "epoch": 0.6194985901679539, "grad_norm": 0.8332632262703782, "learning_rate": 3.3399241359487057e-06, "loss": 0.4063, "step": 20213 }, { "epoch": 0.6195292386906951, "grad_norm": 1.582178785840196, "learning_rate": 3.339455980392729e-06, "loss": 0.6177, "step": 20214 }, { "epoch": 0.6195598872134364, "grad_norm": 1.721968169454078, "learning_rate": 3.3389878411985165e-06, "loss": 0.7397, "step": 20215 }, { "epoch": 0.6195905357361775, "grad_norm": 1.818360576054386, "learning_rate": 3.3385197183706803e-06, "loss": 0.6073, "step": 20216 }, { "epoch": 0.6196211842589188, "grad_norm": 2.004723700295565, "learning_rate": 3.3380516119138357e-06, "loss": 0.6168, "step": 20217 }, { "epoch": 0.6196518327816599, "grad_norm": 1.832698154434655, "learning_rate": 3.3375835218325934e-06, "loss": 0.6414, "step": 20218 }, { "epoch": 0.6196824813044012, "grad_norm": 1.7507301415207044, "learning_rate": 3.337115448131566e-06, "loss": 0.5776, "step": 20219 }, { "epoch": 0.6197131298271423, "grad_norm": 1.8257291425200017, "learning_rate": 3.336647390815366e-06, "loss": 0.5422, "step": 20220 }, { "epoch": 0.6197437783498836, "grad_norm": 2.776917784312109, "learning_rate": 3.3361793498886035e-06, "loss": 0.6815, "step": 20221 }, { "epoch": 0.6197744268726247, "grad_norm": 1.859993231686729, "learning_rate": 3.3357113253558927e-06, "loss": 0.6811, "step": 20222 }, { "epoch": 0.6198050753953659, "grad_norm": 1.9054208432868285, "learning_rate": 3.3352433172218457e-06, "loss": 0.6063, "step": 20223 }, { "epoch": 0.6198357239181072, "grad_norm": 1.5185273551361282, "learning_rate": 3.3347753254910686e-06, "loss": 0.5093, "step": 20224 }, { "epoch": 0.6198663724408483, "grad_norm": 1.7963638572879348, "learning_rate": 3.3343073501681794e-06, "loss": 0.6711, "step": 20225 }, { "epoch": 0.6198970209635896, "grad_norm": 1.7263577331600735, "learning_rate": 3.3338393912577848e-06, "loss": 0.482, "step": 20226 }, { "epoch": 0.6199276694863307, "grad_norm": 1.8635266827030574, "learning_rate": 3.3333714487644963e-06, "loss": 0.5707, "step": 20227 }, { "epoch": 0.619958318009072, "grad_norm": 1.809728275822843, "learning_rate": 3.3329035226929265e-06, "loss": 0.5984, "step": 20228 }, { "epoch": 0.6199889665318131, "grad_norm": 1.9311065160989602, "learning_rate": 3.332435613047685e-06, "loss": 0.6796, "step": 20229 }, { "epoch": 0.6200196150545544, "grad_norm": 1.7066583139168552, "learning_rate": 3.3319677198333804e-06, "loss": 0.5842, "step": 20230 }, { "epoch": 0.6200502635772955, "grad_norm": 1.8038349725520926, "learning_rate": 3.331499843054626e-06, "loss": 0.6446, "step": 20231 }, { "epoch": 0.6200809121000368, "grad_norm": 1.8089073620933622, "learning_rate": 3.3310319827160297e-06, "loss": 0.6343, "step": 20232 }, { "epoch": 0.620111560622778, "grad_norm": 2.0197043080886514, "learning_rate": 3.330564138822203e-06, "loss": 0.7085, "step": 20233 }, { "epoch": 0.6201422091455192, "grad_norm": 1.98894423039122, "learning_rate": 3.3300963113777563e-06, "loss": 0.5954, "step": 20234 }, { "epoch": 0.6201728576682604, "grad_norm": 1.64049986175199, "learning_rate": 3.329628500387295e-06, "loss": 0.5798, "step": 20235 }, { "epoch": 0.6202035061910016, "grad_norm": 0.8058955364882344, "learning_rate": 3.329160705855434e-06, "loss": 0.4183, "step": 20236 }, { "epoch": 0.6202341547137428, "grad_norm": 1.8519923416698856, "learning_rate": 3.328692927786779e-06, "loss": 0.6377, "step": 20237 }, { "epoch": 0.620264803236484, "grad_norm": 1.8238466802383515, "learning_rate": 3.32822516618594e-06, "loss": 0.5604, "step": 20238 }, { "epoch": 0.6202954517592252, "grad_norm": 1.7898427742158602, "learning_rate": 3.327757421057526e-06, "loss": 0.5983, "step": 20239 }, { "epoch": 0.6203261002819664, "grad_norm": 1.5753255777413093, "learning_rate": 3.327289692406146e-06, "loss": 0.5343, "step": 20240 }, { "epoch": 0.6203567488047076, "grad_norm": 1.7367159750636936, "learning_rate": 3.3268219802364088e-06, "loss": 0.728, "step": 20241 }, { "epoch": 0.6203873973274489, "grad_norm": 1.7302307240977288, "learning_rate": 3.3263542845529247e-06, "loss": 0.6671, "step": 20242 }, { "epoch": 0.62041804585019, "grad_norm": 0.7830594135103125, "learning_rate": 3.3258866053602967e-06, "loss": 0.4172, "step": 20243 }, { "epoch": 0.6204486943729313, "grad_norm": 1.6552084617924954, "learning_rate": 3.325418942663139e-06, "loss": 0.583, "step": 20244 }, { "epoch": 0.6204793428956724, "grad_norm": 1.917409874095901, "learning_rate": 3.3249512964660556e-06, "loss": 0.5608, "step": 20245 }, { "epoch": 0.6205099914184137, "grad_norm": 0.7780374763971255, "learning_rate": 3.3244836667736557e-06, "loss": 0.4231, "step": 20246 }, { "epoch": 0.6205406399411548, "grad_norm": 1.5445078919433746, "learning_rate": 3.3240160535905475e-06, "loss": 0.6421, "step": 20247 }, { "epoch": 0.6205712884638961, "grad_norm": 1.464578852183084, "learning_rate": 3.3235484569213373e-06, "loss": 0.5651, "step": 20248 }, { "epoch": 0.6206019369866372, "grad_norm": 1.6571093029808903, "learning_rate": 3.3230808767706328e-06, "loss": 0.6054, "step": 20249 }, { "epoch": 0.6206325855093785, "grad_norm": 1.7118066763123327, "learning_rate": 3.3226133131430428e-06, "loss": 0.5402, "step": 20250 }, { "epoch": 0.6206632340321196, "grad_norm": 1.6529037344699902, "learning_rate": 3.3221457660431713e-06, "loss": 0.6253, "step": 20251 }, { "epoch": 0.6206938825548609, "grad_norm": 1.8441755516487852, "learning_rate": 3.321678235475628e-06, "loss": 0.6685, "step": 20252 }, { "epoch": 0.6207245310776021, "grad_norm": 1.786404739532649, "learning_rate": 3.3212107214450196e-06, "loss": 0.729, "step": 20253 }, { "epoch": 0.6207551796003432, "grad_norm": 0.7917822312430998, "learning_rate": 3.320743223955948e-06, "loss": 0.4137, "step": 20254 }, { "epoch": 0.6207858281230845, "grad_norm": 0.7460083704553951, "learning_rate": 3.3202757430130265e-06, "loss": 0.4255, "step": 20255 }, { "epoch": 0.6208164766458256, "grad_norm": 1.6232292317695647, "learning_rate": 3.3198082786208575e-06, "loss": 0.6378, "step": 20256 }, { "epoch": 0.6208471251685669, "grad_norm": 0.7643778194200013, "learning_rate": 3.3193408307840453e-06, "loss": 0.4037, "step": 20257 }, { "epoch": 0.620877773691308, "grad_norm": 1.7106503547500762, "learning_rate": 3.3188733995072e-06, "loss": 0.5706, "step": 20258 }, { "epoch": 0.6209084222140493, "grad_norm": 1.4769577436935895, "learning_rate": 3.3184059847949234e-06, "loss": 0.5229, "step": 20259 }, { "epoch": 0.6209390707367904, "grad_norm": 1.9320483240754838, "learning_rate": 3.3179385866518236e-06, "loss": 0.6895, "step": 20260 }, { "epoch": 0.6209697192595317, "grad_norm": 1.866468509890451, "learning_rate": 3.3174712050825066e-06, "loss": 0.6111, "step": 20261 }, { "epoch": 0.6210003677822729, "grad_norm": 1.7534415822628753, "learning_rate": 3.3170038400915737e-06, "loss": 0.5879, "step": 20262 }, { "epoch": 0.6210310163050141, "grad_norm": 1.7856005616533261, "learning_rate": 3.3165364916836346e-06, "loss": 0.5977, "step": 20263 }, { "epoch": 0.6210616648277553, "grad_norm": 0.8402744481044486, "learning_rate": 3.3160691598632934e-06, "loss": 0.4011, "step": 20264 }, { "epoch": 0.6210923133504965, "grad_norm": 1.6813138584510794, "learning_rate": 3.31560184463515e-06, "loss": 0.5462, "step": 20265 }, { "epoch": 0.6211229618732377, "grad_norm": 1.8232826358177254, "learning_rate": 3.3151345460038154e-06, "loss": 0.6067, "step": 20266 }, { "epoch": 0.6211536103959789, "grad_norm": 1.8751735566520178, "learning_rate": 3.3146672639738886e-06, "loss": 0.5862, "step": 20267 }, { "epoch": 0.6211842589187201, "grad_norm": 1.718952431019854, "learning_rate": 3.3141999985499795e-06, "loss": 0.602, "step": 20268 }, { "epoch": 0.6212149074414614, "grad_norm": 1.6916504470129161, "learning_rate": 3.3137327497366885e-06, "loss": 0.668, "step": 20269 }, { "epoch": 0.6212455559642025, "grad_norm": 1.8486772216531933, "learning_rate": 3.3132655175386188e-06, "loss": 0.6202, "step": 20270 }, { "epoch": 0.6212762044869438, "grad_norm": 1.8792604711451826, "learning_rate": 3.312798301960376e-06, "loss": 0.6994, "step": 20271 }, { "epoch": 0.6213068530096849, "grad_norm": 1.7383082856937706, "learning_rate": 3.312331103006564e-06, "loss": 0.582, "step": 20272 }, { "epoch": 0.6213375015324262, "grad_norm": 2.027527560204616, "learning_rate": 3.3118639206817836e-06, "loss": 0.6085, "step": 20273 }, { "epoch": 0.6213681500551673, "grad_norm": 1.7976780809558874, "learning_rate": 3.3113967549906424e-06, "loss": 0.5998, "step": 20274 }, { "epoch": 0.6213987985779086, "grad_norm": 1.83622945884188, "learning_rate": 3.3109296059377405e-06, "loss": 0.6099, "step": 20275 }, { "epoch": 0.6214294471006497, "grad_norm": 1.7715738000063985, "learning_rate": 3.310462473527679e-06, "loss": 0.6451, "step": 20276 }, { "epoch": 0.621460095623391, "grad_norm": 0.8126911903807653, "learning_rate": 3.309995357765066e-06, "loss": 0.4506, "step": 20277 }, { "epoch": 0.6214907441461321, "grad_norm": 1.7454630780990872, "learning_rate": 3.3095282586545e-06, "loss": 0.6104, "step": 20278 }, { "epoch": 0.6215213926688734, "grad_norm": 1.8246766550232363, "learning_rate": 3.309061176200584e-06, "loss": 0.6514, "step": 20279 }, { "epoch": 0.6215520411916146, "grad_norm": 1.5436349489393768, "learning_rate": 3.3085941104079217e-06, "loss": 0.5307, "step": 20280 }, { "epoch": 0.6215826897143558, "grad_norm": 1.8161213362215118, "learning_rate": 3.3081270612811132e-06, "loss": 0.5771, "step": 20281 }, { "epoch": 0.621613338237097, "grad_norm": 1.7401683448170122, "learning_rate": 3.3076600288247627e-06, "loss": 0.5709, "step": 20282 }, { "epoch": 0.6216439867598382, "grad_norm": 1.6237879937285198, "learning_rate": 3.3071930130434717e-06, "loss": 0.5173, "step": 20283 }, { "epoch": 0.6216746352825794, "grad_norm": 1.5740785258381982, "learning_rate": 3.306726013941839e-06, "loss": 0.5582, "step": 20284 }, { "epoch": 0.6217052838053205, "grad_norm": 1.7549576802363949, "learning_rate": 3.306259031524469e-06, "loss": 0.6157, "step": 20285 }, { "epoch": 0.6217359323280618, "grad_norm": 1.654972909522068, "learning_rate": 3.305792065795962e-06, "loss": 0.5781, "step": 20286 }, { "epoch": 0.6217665808508029, "grad_norm": 1.7546523505294391, "learning_rate": 3.3053251167609214e-06, "loss": 0.6361, "step": 20287 }, { "epoch": 0.6217972293735442, "grad_norm": 1.922707803871604, "learning_rate": 3.3048581844239436e-06, "loss": 0.7269, "step": 20288 }, { "epoch": 0.6218278778962854, "grad_norm": 1.608989309380751, "learning_rate": 3.304391268789632e-06, "loss": 0.5602, "step": 20289 }, { "epoch": 0.6218585264190266, "grad_norm": 1.8347130798533224, "learning_rate": 3.303924369862588e-06, "loss": 0.5915, "step": 20290 }, { "epoch": 0.6218891749417678, "grad_norm": 1.9657816977499252, "learning_rate": 3.3034574876474113e-06, "loss": 0.6583, "step": 20291 }, { "epoch": 0.621919823464509, "grad_norm": 1.8948444954543957, "learning_rate": 3.3029906221487e-06, "loss": 0.7181, "step": 20292 }, { "epoch": 0.6219504719872502, "grad_norm": 1.8685280136679954, "learning_rate": 3.302523773371058e-06, "loss": 0.5616, "step": 20293 }, { "epoch": 0.6219811205099914, "grad_norm": 1.643774882835605, "learning_rate": 3.302056941319083e-06, "loss": 0.5841, "step": 20294 }, { "epoch": 0.6220117690327326, "grad_norm": 1.7692959729720847, "learning_rate": 3.301590125997376e-06, "loss": 0.4703, "step": 20295 }, { "epoch": 0.6220424175554738, "grad_norm": 1.796664848839257, "learning_rate": 3.301123327410537e-06, "loss": 0.6873, "step": 20296 }, { "epoch": 0.622073066078215, "grad_norm": 1.8708423276441348, "learning_rate": 3.300656545563161e-06, "loss": 0.5791, "step": 20297 }, { "epoch": 0.6221037146009563, "grad_norm": 0.803032825832607, "learning_rate": 3.300189780459855e-06, "loss": 0.3939, "step": 20298 }, { "epoch": 0.6221343631236974, "grad_norm": 1.6575116225559088, "learning_rate": 3.299723032105212e-06, "loss": 0.5859, "step": 20299 }, { "epoch": 0.6221650116464387, "grad_norm": 1.65154945947927, "learning_rate": 3.2992563005038323e-06, "loss": 0.6077, "step": 20300 }, { "epoch": 0.6221956601691798, "grad_norm": 0.8148196781307027, "learning_rate": 3.298789585660317e-06, "loss": 0.4276, "step": 20301 }, { "epoch": 0.6222263086919211, "grad_norm": 1.9583855049003036, "learning_rate": 3.298322887579263e-06, "loss": 0.5757, "step": 20302 }, { "epoch": 0.6222569572146622, "grad_norm": 1.7772601282395664, "learning_rate": 3.2978562062652674e-06, "loss": 0.6573, "step": 20303 }, { "epoch": 0.6222876057374035, "grad_norm": 1.7623419196084364, "learning_rate": 3.2973895417229312e-06, "loss": 0.6273, "step": 20304 }, { "epoch": 0.6223182542601446, "grad_norm": 1.7326755011155137, "learning_rate": 3.296922893956851e-06, "loss": 0.6264, "step": 20305 }, { "epoch": 0.6223489027828859, "grad_norm": 1.6891226011979017, "learning_rate": 3.2964562629716256e-06, "loss": 0.5726, "step": 20306 }, { "epoch": 0.622379551305627, "grad_norm": 0.802196698562019, "learning_rate": 3.295989648771854e-06, "loss": 0.4148, "step": 20307 }, { "epoch": 0.6224101998283683, "grad_norm": 0.8168891851276758, "learning_rate": 3.2955230513621294e-06, "loss": 0.4203, "step": 20308 }, { "epoch": 0.6224408483511095, "grad_norm": 1.7899758591254133, "learning_rate": 3.295056470747055e-06, "loss": 0.5803, "step": 20309 }, { "epoch": 0.6224714968738507, "grad_norm": 1.5988560178038764, "learning_rate": 3.294589906931225e-06, "loss": 0.6357, "step": 20310 }, { "epoch": 0.6225021453965919, "grad_norm": 1.8241047506168757, "learning_rate": 3.294123359919235e-06, "loss": 0.6923, "step": 20311 }, { "epoch": 0.6225327939193331, "grad_norm": 1.666165394336207, "learning_rate": 3.2936568297156856e-06, "loss": 0.6176, "step": 20312 }, { "epoch": 0.6225634424420743, "grad_norm": 1.6028958570489444, "learning_rate": 3.293190316325171e-06, "loss": 0.5905, "step": 20313 }, { "epoch": 0.6225940909648155, "grad_norm": 0.7626684794128763, "learning_rate": 3.2927238197522897e-06, "loss": 0.416, "step": 20314 }, { "epoch": 0.6226247394875567, "grad_norm": 1.7209162462787801, "learning_rate": 3.292257340001638e-06, "loss": 0.5611, "step": 20315 }, { "epoch": 0.6226553880102978, "grad_norm": 1.8789140285084516, "learning_rate": 3.29179087707781e-06, "loss": 0.5867, "step": 20316 }, { "epoch": 0.6226860365330391, "grad_norm": 0.7696046103276575, "learning_rate": 3.291324430985405e-06, "loss": 0.3895, "step": 20317 }, { "epoch": 0.6227166850557803, "grad_norm": 1.6435855185850445, "learning_rate": 3.2908580017290185e-06, "loss": 0.6059, "step": 20318 }, { "epoch": 0.6227473335785215, "grad_norm": 1.816689819963533, "learning_rate": 3.2903915893132423e-06, "loss": 0.5818, "step": 20319 }, { "epoch": 0.6227779821012627, "grad_norm": 1.639843259619909, "learning_rate": 3.2899251937426783e-06, "loss": 0.5368, "step": 20320 }, { "epoch": 0.6228086306240039, "grad_norm": 1.607569428194566, "learning_rate": 3.289458815021916e-06, "loss": 0.5668, "step": 20321 }, { "epoch": 0.6228392791467451, "grad_norm": 1.815978739524941, "learning_rate": 3.288992453155556e-06, "loss": 0.591, "step": 20322 }, { "epoch": 0.6228699276694863, "grad_norm": 1.8753536457022737, "learning_rate": 3.288526108148191e-06, "loss": 0.6184, "step": 20323 }, { "epoch": 0.6229005761922275, "grad_norm": 1.6819420059112766, "learning_rate": 3.2880597800044144e-06, "loss": 0.5346, "step": 20324 }, { "epoch": 0.6229312247149688, "grad_norm": 1.9935844452801965, "learning_rate": 3.2875934687288245e-06, "loss": 0.7069, "step": 20325 }, { "epoch": 0.6229618732377099, "grad_norm": 0.8123960545416904, "learning_rate": 3.287127174326014e-06, "loss": 0.4327, "step": 20326 }, { "epoch": 0.6229925217604512, "grad_norm": 1.8942519072496673, "learning_rate": 3.286660896800577e-06, "loss": 0.6237, "step": 20327 }, { "epoch": 0.6230231702831923, "grad_norm": 1.7309968365861423, "learning_rate": 3.2861946361571094e-06, "loss": 0.5178, "step": 20328 }, { "epoch": 0.6230538188059336, "grad_norm": 1.7044302756570844, "learning_rate": 3.2857283924002055e-06, "loss": 0.6262, "step": 20329 }, { "epoch": 0.6230844673286747, "grad_norm": 1.8045162583839167, "learning_rate": 3.285262165534456e-06, "loss": 0.6029, "step": 20330 }, { "epoch": 0.623115115851416, "grad_norm": 0.8083474492370243, "learning_rate": 3.2847959555644582e-06, "loss": 0.4329, "step": 20331 }, { "epoch": 0.6231457643741571, "grad_norm": 1.8422977198961485, "learning_rate": 3.284329762494804e-06, "loss": 0.6299, "step": 20332 }, { "epoch": 0.6231764128968984, "grad_norm": 0.8584408713131739, "learning_rate": 3.283863586330088e-06, "loss": 0.43, "step": 20333 }, { "epoch": 0.6232070614196396, "grad_norm": 0.791280621692549, "learning_rate": 3.2833974270749047e-06, "loss": 0.4245, "step": 20334 }, { "epoch": 0.6232377099423808, "grad_norm": 1.5476357913936305, "learning_rate": 3.2829312847338434e-06, "loss": 0.5018, "step": 20335 }, { "epoch": 0.623268358465122, "grad_norm": 1.754124445269533, "learning_rate": 3.282465159311501e-06, "loss": 0.6202, "step": 20336 }, { "epoch": 0.6232990069878632, "grad_norm": 1.85968245389962, "learning_rate": 3.28199905081247e-06, "loss": 0.6297, "step": 20337 }, { "epoch": 0.6233296555106044, "grad_norm": 2.2036690317707026, "learning_rate": 3.281532959241338e-06, "loss": 0.5342, "step": 20338 }, { "epoch": 0.6233603040333456, "grad_norm": 1.5703158189431474, "learning_rate": 3.281066884602705e-06, "loss": 0.59, "step": 20339 }, { "epoch": 0.6233909525560868, "grad_norm": 1.6905688738418474, "learning_rate": 3.280600826901157e-06, "loss": 0.5884, "step": 20340 }, { "epoch": 0.623421601078828, "grad_norm": 1.5981890486819021, "learning_rate": 3.280134786141292e-06, "loss": 0.6263, "step": 20341 }, { "epoch": 0.6234522496015692, "grad_norm": 1.652927819809463, "learning_rate": 3.279668762327698e-06, "loss": 0.6346, "step": 20342 }, { "epoch": 0.6234828981243105, "grad_norm": 1.6077979940870588, "learning_rate": 3.2792027554649663e-06, "loss": 0.5915, "step": 20343 }, { "epoch": 0.6235135466470516, "grad_norm": 1.7878547800258524, "learning_rate": 3.278736765557692e-06, "loss": 0.5834, "step": 20344 }, { "epoch": 0.6235441951697929, "grad_norm": 1.9281936758420593, "learning_rate": 3.278270792610464e-06, "loss": 0.641, "step": 20345 }, { "epoch": 0.623574843692534, "grad_norm": 1.793462129773038, "learning_rate": 3.2778048366278737e-06, "loss": 0.5899, "step": 20346 }, { "epoch": 0.6236054922152752, "grad_norm": 1.64032234033022, "learning_rate": 3.277338897614514e-06, "loss": 0.5885, "step": 20347 }, { "epoch": 0.6236361407380164, "grad_norm": 1.6299078736882084, "learning_rate": 3.2768729755749734e-06, "loss": 0.5807, "step": 20348 }, { "epoch": 0.6236667892607576, "grad_norm": 0.8005536494164044, "learning_rate": 3.2764070705138463e-06, "loss": 0.4075, "step": 20349 }, { "epoch": 0.6236974377834988, "grad_norm": 1.6081490721336686, "learning_rate": 3.2759411824357213e-06, "loss": 0.4646, "step": 20350 }, { "epoch": 0.62372808630624, "grad_norm": 1.8984516010884738, "learning_rate": 3.2754753113451864e-06, "loss": 0.6364, "step": 20351 }, { "epoch": 0.6237587348289813, "grad_norm": 1.5935941187687028, "learning_rate": 3.275009457246837e-06, "loss": 0.5267, "step": 20352 }, { "epoch": 0.6237893833517224, "grad_norm": 0.7880893687312328, "learning_rate": 3.2745436201452606e-06, "loss": 0.4124, "step": 20353 }, { "epoch": 0.6238200318744637, "grad_norm": 1.800094339058846, "learning_rate": 3.274077800045046e-06, "loss": 0.6226, "step": 20354 }, { "epoch": 0.6238506803972048, "grad_norm": 1.8859072830697772, "learning_rate": 3.2736119969507858e-06, "loss": 0.6094, "step": 20355 }, { "epoch": 0.6238813289199461, "grad_norm": 1.6618415705967766, "learning_rate": 3.2731462108670676e-06, "loss": 0.582, "step": 20356 }, { "epoch": 0.6239119774426872, "grad_norm": 1.674550309431135, "learning_rate": 3.2726804417984816e-06, "loss": 0.6844, "step": 20357 }, { "epoch": 0.6239426259654285, "grad_norm": 0.7582691168273324, "learning_rate": 3.272214689749618e-06, "loss": 0.3979, "step": 20358 }, { "epoch": 0.6239732744881696, "grad_norm": 1.744573601631207, "learning_rate": 3.271748954725063e-06, "loss": 0.7002, "step": 20359 }, { "epoch": 0.6240039230109109, "grad_norm": 1.8245557596815511, "learning_rate": 3.2712832367294094e-06, "loss": 0.6901, "step": 20360 }, { "epoch": 0.624034571533652, "grad_norm": 1.896003557606396, "learning_rate": 3.2708175357672457e-06, "loss": 0.5929, "step": 20361 }, { "epoch": 0.6240652200563933, "grad_norm": 1.8240041549048385, "learning_rate": 3.2703518518431552e-06, "loss": 0.6042, "step": 20362 }, { "epoch": 0.6240958685791345, "grad_norm": 1.9600886708099714, "learning_rate": 3.269886184961735e-06, "loss": 0.7216, "step": 20363 }, { "epoch": 0.6241265171018757, "grad_norm": 1.8511518542612924, "learning_rate": 3.2694205351275666e-06, "loss": 0.6166, "step": 20364 }, { "epoch": 0.6241571656246169, "grad_norm": 1.6118154282500918, "learning_rate": 3.2689549023452405e-06, "loss": 0.714, "step": 20365 }, { "epoch": 0.6241878141473581, "grad_norm": 1.9426385151142616, "learning_rate": 3.268489286619345e-06, "loss": 0.6424, "step": 20366 }, { "epoch": 0.6242184626700993, "grad_norm": 1.6605808552361634, "learning_rate": 3.2680236879544667e-06, "loss": 0.5355, "step": 20367 }, { "epoch": 0.6242491111928405, "grad_norm": 1.6083695094367465, "learning_rate": 3.2675581063551954e-06, "loss": 0.6074, "step": 20368 }, { "epoch": 0.6242797597155817, "grad_norm": 1.7227796347585513, "learning_rate": 3.2670925418261167e-06, "loss": 0.5748, "step": 20369 }, { "epoch": 0.624310408238323, "grad_norm": 1.8902592493141552, "learning_rate": 3.2666269943718175e-06, "loss": 0.6068, "step": 20370 }, { "epoch": 0.6243410567610641, "grad_norm": 1.6467182939628897, "learning_rate": 3.266161463996888e-06, "loss": 0.6319, "step": 20371 }, { "epoch": 0.6243717052838054, "grad_norm": 1.8178273140435153, "learning_rate": 3.2656959507059137e-06, "loss": 0.6566, "step": 20372 }, { "epoch": 0.6244023538065465, "grad_norm": 1.722985262863499, "learning_rate": 3.265230454503478e-06, "loss": 0.6157, "step": 20373 }, { "epoch": 0.6244330023292878, "grad_norm": 1.7619916967352776, "learning_rate": 3.2647649753941733e-06, "loss": 0.4641, "step": 20374 }, { "epoch": 0.6244636508520289, "grad_norm": 1.8129922640181055, "learning_rate": 3.2642995133825815e-06, "loss": 0.7045, "step": 20375 }, { "epoch": 0.6244942993747702, "grad_norm": 1.6435794533744255, "learning_rate": 3.263834068473292e-06, "loss": 0.5969, "step": 20376 }, { "epoch": 0.6245249478975113, "grad_norm": 1.7739813364489714, "learning_rate": 3.2633686406708888e-06, "loss": 0.5727, "step": 20377 }, { "epoch": 0.6245555964202525, "grad_norm": 1.5870277322673492, "learning_rate": 3.2629032299799577e-06, "loss": 0.5729, "step": 20378 }, { "epoch": 0.6245862449429938, "grad_norm": 1.768998406895267, "learning_rate": 3.262437836405088e-06, "loss": 0.6144, "step": 20379 }, { "epoch": 0.6246168934657349, "grad_norm": 1.6581526581348325, "learning_rate": 3.261972459950862e-06, "loss": 0.6162, "step": 20380 }, { "epoch": 0.6246475419884762, "grad_norm": 1.8139782596069052, "learning_rate": 3.2615071006218644e-06, "loss": 0.5692, "step": 20381 }, { "epoch": 0.6246781905112173, "grad_norm": 1.668284183705762, "learning_rate": 3.261041758422685e-06, "loss": 0.5361, "step": 20382 }, { "epoch": 0.6247088390339586, "grad_norm": 1.8377136690709146, "learning_rate": 3.260576433357905e-06, "loss": 0.6033, "step": 20383 }, { "epoch": 0.6247394875566997, "grad_norm": 1.6874868871884394, "learning_rate": 3.2601111254321083e-06, "loss": 0.5689, "step": 20384 }, { "epoch": 0.624770136079441, "grad_norm": 1.8614180307712744, "learning_rate": 3.2596458346498836e-06, "loss": 0.6049, "step": 20385 }, { "epoch": 0.6248007846021821, "grad_norm": 2.088708877864155, "learning_rate": 3.2591805610158134e-06, "loss": 0.5319, "step": 20386 }, { "epoch": 0.6248314331249234, "grad_norm": 1.7560566205853574, "learning_rate": 3.258715304534483e-06, "loss": 0.5586, "step": 20387 }, { "epoch": 0.6248620816476645, "grad_norm": 0.8121636423564433, "learning_rate": 3.2582500652104765e-06, "loss": 0.4195, "step": 20388 }, { "epoch": 0.6248927301704058, "grad_norm": 1.911015421768257, "learning_rate": 3.2577848430483767e-06, "loss": 0.6333, "step": 20389 }, { "epoch": 0.624923378693147, "grad_norm": 1.6424680494893706, "learning_rate": 3.2573196380527693e-06, "loss": 0.526, "step": 20390 }, { "epoch": 0.6249540272158882, "grad_norm": 1.6998173430338215, "learning_rate": 3.2568544502282384e-06, "loss": 0.5998, "step": 20391 }, { "epoch": 0.6249846757386294, "grad_norm": 0.7878365790477732, "learning_rate": 3.256389279579364e-06, "loss": 0.4231, "step": 20392 }, { "epoch": 0.6250153242613706, "grad_norm": 1.725711407392629, "learning_rate": 3.255924126110735e-06, "loss": 0.5711, "step": 20393 }, { "epoch": 0.6250459727841118, "grad_norm": 0.8305176933610717, "learning_rate": 3.2554589898269284e-06, "loss": 0.4259, "step": 20394 }, { "epoch": 0.625076621306853, "grad_norm": 1.921070093976453, "learning_rate": 3.2549938707325346e-06, "loss": 0.6169, "step": 20395 }, { "epoch": 0.6251072698295942, "grad_norm": 0.8106677232238452, "learning_rate": 3.2545287688321308e-06, "loss": 0.4137, "step": 20396 }, { "epoch": 0.6251379183523355, "grad_norm": 1.6726559229054971, "learning_rate": 3.2540636841303006e-06, "loss": 0.6721, "step": 20397 }, { "epoch": 0.6251685668750766, "grad_norm": 0.804605592780331, "learning_rate": 3.2535986166316292e-06, "loss": 0.4206, "step": 20398 }, { "epoch": 0.6251992153978179, "grad_norm": 1.9475696233068989, "learning_rate": 3.2531335663406976e-06, "loss": 0.6364, "step": 20399 }, { "epoch": 0.625229863920559, "grad_norm": 0.7782212109358141, "learning_rate": 3.2526685332620867e-06, "loss": 0.4139, "step": 20400 }, { "epoch": 0.6252605124433003, "grad_norm": 1.4953785584247812, "learning_rate": 3.252203517400381e-06, "loss": 0.5589, "step": 20401 }, { "epoch": 0.6252911609660414, "grad_norm": 1.8340381746687302, "learning_rate": 3.251738518760161e-06, "loss": 0.6695, "step": 20402 }, { "epoch": 0.6253218094887827, "grad_norm": 1.7836050866222852, "learning_rate": 3.2512735373460068e-06, "loss": 0.6785, "step": 20403 }, { "epoch": 0.6253524580115238, "grad_norm": 0.7441211898227595, "learning_rate": 3.250808573162505e-06, "loss": 0.4194, "step": 20404 }, { "epoch": 0.6253831065342651, "grad_norm": 1.9205063166449636, "learning_rate": 3.250343626214231e-06, "loss": 0.6786, "step": 20405 }, { "epoch": 0.6254137550570062, "grad_norm": 1.7143745164095099, "learning_rate": 3.2498786965057716e-06, "loss": 0.6051, "step": 20406 }, { "epoch": 0.6254444035797475, "grad_norm": 1.5788217210460607, "learning_rate": 3.249413784041704e-06, "loss": 0.6828, "step": 20407 }, { "epoch": 0.6254750521024887, "grad_norm": 1.6364565687579948, "learning_rate": 3.248948888826609e-06, "loss": 0.6263, "step": 20408 }, { "epoch": 0.6255057006252298, "grad_norm": 1.7279428820741478, "learning_rate": 3.2484840108650706e-06, "loss": 0.6829, "step": 20409 }, { "epoch": 0.6255363491479711, "grad_norm": 1.5996601511924269, "learning_rate": 3.2480191501616663e-06, "loss": 0.6076, "step": 20410 }, { "epoch": 0.6255669976707122, "grad_norm": 1.8576862064175677, "learning_rate": 3.2475543067209768e-06, "loss": 0.6153, "step": 20411 }, { "epoch": 0.6255976461934535, "grad_norm": 1.6860829977843599, "learning_rate": 3.247089480547585e-06, "loss": 0.631, "step": 20412 }, { "epoch": 0.6256282947161946, "grad_norm": 1.7965759458967256, "learning_rate": 3.246624671646067e-06, "loss": 0.5522, "step": 20413 }, { "epoch": 0.6256589432389359, "grad_norm": 1.8125782503103876, "learning_rate": 3.2461598800210065e-06, "loss": 0.5906, "step": 20414 }, { "epoch": 0.625689591761677, "grad_norm": 0.8180241995716859, "learning_rate": 3.245695105676982e-06, "loss": 0.4388, "step": 20415 }, { "epoch": 0.6257202402844183, "grad_norm": 1.587144770849988, "learning_rate": 3.2452303486185698e-06, "loss": 0.5478, "step": 20416 }, { "epoch": 0.6257508888071595, "grad_norm": 1.9141237999106469, "learning_rate": 3.244765608850354e-06, "loss": 0.6707, "step": 20417 }, { "epoch": 0.6257815373299007, "grad_norm": 1.7289395855264227, "learning_rate": 3.244300886376912e-06, "loss": 0.6214, "step": 20418 }, { "epoch": 0.6258121858526419, "grad_norm": 2.114457695560023, "learning_rate": 3.2438361812028212e-06, "loss": 0.5347, "step": 20419 }, { "epoch": 0.6258428343753831, "grad_norm": 1.6362951064822926, "learning_rate": 3.243371493332663e-06, "loss": 0.5914, "step": 20420 }, { "epoch": 0.6258734828981243, "grad_norm": 1.8352994697187999, "learning_rate": 3.2429068227710137e-06, "loss": 0.5857, "step": 20421 }, { "epoch": 0.6259041314208655, "grad_norm": 1.668150125146379, "learning_rate": 3.2424421695224538e-06, "loss": 0.6115, "step": 20422 }, { "epoch": 0.6259347799436067, "grad_norm": 1.7070529019543141, "learning_rate": 3.241977533591561e-06, "loss": 0.5622, "step": 20423 }, { "epoch": 0.625965428466348, "grad_norm": 1.6477864802836408, "learning_rate": 3.241512914982913e-06, "loss": 0.5678, "step": 20424 }, { "epoch": 0.6259960769890891, "grad_norm": 1.6655588720691676, "learning_rate": 3.2410483137010885e-06, "loss": 0.5555, "step": 20425 }, { "epoch": 0.6260267255118304, "grad_norm": 1.7064498146858182, "learning_rate": 3.240583729750666e-06, "loss": 0.5831, "step": 20426 }, { "epoch": 0.6260573740345715, "grad_norm": 1.7088160534663974, "learning_rate": 3.24011916313622e-06, "loss": 0.5875, "step": 20427 }, { "epoch": 0.6260880225573128, "grad_norm": 1.9328780278290016, "learning_rate": 3.2396546138623313e-06, "loss": 0.6331, "step": 20428 }, { "epoch": 0.6261186710800539, "grad_norm": 1.9587404320508106, "learning_rate": 3.2391900819335766e-06, "loss": 0.6288, "step": 20429 }, { "epoch": 0.6261493196027952, "grad_norm": 1.7886592236209582, "learning_rate": 3.2387255673545317e-06, "loss": 0.6424, "step": 20430 }, { "epoch": 0.6261799681255363, "grad_norm": 1.7280053724756865, "learning_rate": 3.2382610701297743e-06, "loss": 0.6325, "step": 20431 }, { "epoch": 0.6262106166482776, "grad_norm": 1.8011089882694236, "learning_rate": 3.2377965902638807e-06, "loss": 0.6152, "step": 20432 }, { "epoch": 0.6262412651710187, "grad_norm": 1.882961606247245, "learning_rate": 3.23733212776143e-06, "loss": 0.6018, "step": 20433 }, { "epoch": 0.62627191369376, "grad_norm": 1.9815683876309718, "learning_rate": 3.2368676826269972e-06, "loss": 0.6004, "step": 20434 }, { "epoch": 0.6263025622165012, "grad_norm": 1.8445282798463316, "learning_rate": 3.2364032548651554e-06, "loss": 0.5566, "step": 20435 }, { "epoch": 0.6263332107392424, "grad_norm": 1.8066637419344729, "learning_rate": 3.2359388444804863e-06, "loss": 0.6572, "step": 20436 }, { "epoch": 0.6263638592619836, "grad_norm": 2.0538076822163123, "learning_rate": 3.2354744514775626e-06, "loss": 0.6052, "step": 20437 }, { "epoch": 0.6263945077847248, "grad_norm": 1.7889066421034765, "learning_rate": 3.235010075860959e-06, "loss": 0.5492, "step": 20438 }, { "epoch": 0.626425156307466, "grad_norm": 1.9447879462225675, "learning_rate": 3.2345457176352546e-06, "loss": 0.5667, "step": 20439 }, { "epoch": 0.6264558048302071, "grad_norm": 1.7472301028634778, "learning_rate": 3.2340813768050213e-06, "loss": 0.6228, "step": 20440 }, { "epoch": 0.6264864533529484, "grad_norm": 1.8716509960965149, "learning_rate": 3.233617053374837e-06, "loss": 0.6336, "step": 20441 }, { "epoch": 0.6265171018756895, "grad_norm": 0.783958501784461, "learning_rate": 3.233152747349276e-06, "loss": 0.412, "step": 20442 }, { "epoch": 0.6265477503984308, "grad_norm": 1.7617463650923086, "learning_rate": 3.232688458732912e-06, "loss": 0.5231, "step": 20443 }, { "epoch": 0.626578398921172, "grad_norm": 1.761286456259328, "learning_rate": 3.2322241875303217e-06, "loss": 0.6493, "step": 20444 }, { "epoch": 0.6266090474439132, "grad_norm": 1.8232358520418952, "learning_rate": 3.23175993374608e-06, "loss": 0.6284, "step": 20445 }, { "epoch": 0.6266396959666544, "grad_norm": 1.7009347975579192, "learning_rate": 3.231295697384757e-06, "loss": 0.5573, "step": 20446 }, { "epoch": 0.6266703444893956, "grad_norm": 1.8839956452164062, "learning_rate": 3.2308314784509333e-06, "loss": 0.6797, "step": 20447 }, { "epoch": 0.6267009930121368, "grad_norm": 1.9484133073937024, "learning_rate": 3.230367276949176e-06, "loss": 0.5288, "step": 20448 }, { "epoch": 0.626731641534878, "grad_norm": 1.7012399299564873, "learning_rate": 3.2299030928840665e-06, "loss": 0.6086, "step": 20449 }, { "epoch": 0.6267622900576192, "grad_norm": 2.0397944674013577, "learning_rate": 3.2294389262601733e-06, "loss": 0.6307, "step": 20450 }, { "epoch": 0.6267929385803604, "grad_norm": 2.127789739440519, "learning_rate": 3.22897477708207e-06, "loss": 0.6128, "step": 20451 }, { "epoch": 0.6268235871031016, "grad_norm": 1.8922337541698242, "learning_rate": 3.228510645354333e-06, "loss": 0.626, "step": 20452 }, { "epoch": 0.6268542356258429, "grad_norm": 1.993519886790638, "learning_rate": 3.2280465310815335e-06, "loss": 0.6786, "step": 20453 }, { "epoch": 0.626884884148584, "grad_norm": 1.7312893206112885, "learning_rate": 3.227582434268244e-06, "loss": 0.6495, "step": 20454 }, { "epoch": 0.6269155326713253, "grad_norm": 1.8053956613079936, "learning_rate": 3.22711835491904e-06, "loss": 0.539, "step": 20455 }, { "epoch": 0.6269461811940664, "grad_norm": 1.8654875251556506, "learning_rate": 3.2266542930384926e-06, "loss": 0.554, "step": 20456 }, { "epoch": 0.6269768297168077, "grad_norm": 1.6583036652400782, "learning_rate": 3.226190248631171e-06, "loss": 0.5642, "step": 20457 }, { "epoch": 0.6270074782395488, "grad_norm": 1.6920328105842262, "learning_rate": 3.2257262217016546e-06, "loss": 0.587, "step": 20458 }, { "epoch": 0.6270381267622901, "grad_norm": 0.7741976987031327, "learning_rate": 3.2252622122545076e-06, "loss": 0.414, "step": 20459 }, { "epoch": 0.6270687752850312, "grad_norm": 1.8388461893146304, "learning_rate": 3.2247982202943096e-06, "loss": 0.608, "step": 20460 }, { "epoch": 0.6270994238077725, "grad_norm": 3.7219588187060415, "learning_rate": 3.2243342458256287e-06, "loss": 0.6858, "step": 20461 }, { "epoch": 0.6271300723305137, "grad_norm": 1.821827794665857, "learning_rate": 3.223870288853035e-06, "loss": 0.5733, "step": 20462 }, { "epoch": 0.6271607208532549, "grad_norm": 1.8240108833655269, "learning_rate": 3.223406349381103e-06, "loss": 0.6649, "step": 20463 }, { "epoch": 0.6271913693759961, "grad_norm": 1.7588539859886048, "learning_rate": 3.2229424274144028e-06, "loss": 0.5148, "step": 20464 }, { "epoch": 0.6272220178987373, "grad_norm": 1.8946433088644223, "learning_rate": 3.222478522957504e-06, "loss": 0.5866, "step": 20465 }, { "epoch": 0.6272526664214785, "grad_norm": 1.8426665500586485, "learning_rate": 3.2220146360149806e-06, "loss": 0.6565, "step": 20466 }, { "epoch": 0.6272833149442197, "grad_norm": 0.7799195657227673, "learning_rate": 3.2215507665914015e-06, "loss": 0.4088, "step": 20467 }, { "epoch": 0.6273139634669609, "grad_norm": 1.80781682289389, "learning_rate": 3.2210869146913374e-06, "loss": 0.712, "step": 20468 }, { "epoch": 0.6273446119897022, "grad_norm": 1.7039281101464476, "learning_rate": 3.220623080319361e-06, "loss": 0.6075, "step": 20469 }, { "epoch": 0.6273752605124433, "grad_norm": 1.9981046673091818, "learning_rate": 3.2201592634800375e-06, "loss": 0.6606, "step": 20470 }, { "epoch": 0.6274059090351845, "grad_norm": 1.8207161280310507, "learning_rate": 3.2196954641779433e-06, "loss": 0.6384, "step": 20471 }, { "epoch": 0.6274365575579257, "grad_norm": 1.7900117174229335, "learning_rate": 3.219231682417644e-06, "loss": 0.6822, "step": 20472 }, { "epoch": 0.6274672060806669, "grad_norm": 1.7019631922860596, "learning_rate": 3.2187679182037096e-06, "loss": 0.6497, "step": 20473 }, { "epoch": 0.6274978546034081, "grad_norm": 1.7090889026841425, "learning_rate": 3.2183041715407117e-06, "loss": 0.5887, "step": 20474 }, { "epoch": 0.6275285031261493, "grad_norm": 1.8083928628977999, "learning_rate": 3.217840442433218e-06, "loss": 0.6276, "step": 20475 }, { "epoch": 0.6275591516488905, "grad_norm": 1.8476766770211812, "learning_rate": 3.2173767308857982e-06, "loss": 0.6272, "step": 20476 }, { "epoch": 0.6275898001716317, "grad_norm": 1.5746272060019286, "learning_rate": 3.2169130369030234e-06, "loss": 0.5692, "step": 20477 }, { "epoch": 0.627620448694373, "grad_norm": 1.9338928181694697, "learning_rate": 3.216449360489458e-06, "loss": 0.6447, "step": 20478 }, { "epoch": 0.6276510972171141, "grad_norm": 1.5949492130975411, "learning_rate": 3.2159857016496763e-06, "loss": 0.5901, "step": 20479 }, { "epoch": 0.6276817457398554, "grad_norm": 1.6052734125449388, "learning_rate": 3.215522060388243e-06, "loss": 0.5325, "step": 20480 }, { "epoch": 0.6277123942625965, "grad_norm": 0.7859366377630111, "learning_rate": 3.215058436709726e-06, "loss": 0.427, "step": 20481 }, { "epoch": 0.6277430427853378, "grad_norm": 1.700496816493946, "learning_rate": 3.214594830618696e-06, "loss": 0.6146, "step": 20482 }, { "epoch": 0.6277736913080789, "grad_norm": 1.8769930907121721, "learning_rate": 3.21413124211972e-06, "loss": 0.7597, "step": 20483 }, { "epoch": 0.6278043398308202, "grad_norm": 1.893411442780474, "learning_rate": 3.2136676712173647e-06, "loss": 0.6334, "step": 20484 }, { "epoch": 0.6278349883535613, "grad_norm": 1.8860265075972134, "learning_rate": 3.2132041179162e-06, "loss": 0.6567, "step": 20485 }, { "epoch": 0.6278656368763026, "grad_norm": 1.7284712234150863, "learning_rate": 3.212740582220791e-06, "loss": 0.5035, "step": 20486 }, { "epoch": 0.6278962853990437, "grad_norm": 1.6846336214219417, "learning_rate": 3.212277064135708e-06, "loss": 0.6497, "step": 20487 }, { "epoch": 0.627926933921785, "grad_norm": 1.8520473621275255, "learning_rate": 3.211813563665517e-06, "loss": 0.6373, "step": 20488 }, { "epoch": 0.6279575824445262, "grad_norm": 0.8420123145079044, "learning_rate": 3.2113500808147814e-06, "loss": 0.435, "step": 20489 }, { "epoch": 0.6279882309672674, "grad_norm": 1.8600798430082806, "learning_rate": 3.2108866155880745e-06, "loss": 0.6494, "step": 20490 }, { "epoch": 0.6280188794900086, "grad_norm": 1.8019334874980244, "learning_rate": 3.2104231679899584e-06, "loss": 0.6098, "step": 20491 }, { "epoch": 0.6280495280127498, "grad_norm": 1.8203123231175113, "learning_rate": 3.2099597380249998e-06, "loss": 0.6239, "step": 20492 }, { "epoch": 0.628080176535491, "grad_norm": 1.6813828116600422, "learning_rate": 3.2094963256977663e-06, "loss": 0.617, "step": 20493 }, { "epoch": 0.6281108250582322, "grad_norm": 0.8011237811660112, "learning_rate": 3.209032931012823e-06, "loss": 0.4387, "step": 20494 }, { "epoch": 0.6281414735809734, "grad_norm": 2.1130738426754254, "learning_rate": 3.208569553974738e-06, "loss": 0.5551, "step": 20495 }, { "epoch": 0.6281721221037146, "grad_norm": 1.5869359285553717, "learning_rate": 3.2081061945880756e-06, "loss": 0.5181, "step": 20496 }, { "epoch": 0.6282027706264558, "grad_norm": 1.6043372518582864, "learning_rate": 3.207642852857399e-06, "loss": 0.6475, "step": 20497 }, { "epoch": 0.6282334191491971, "grad_norm": 1.6510017739327898, "learning_rate": 3.207179528787278e-06, "loss": 0.5349, "step": 20498 }, { "epoch": 0.6282640676719382, "grad_norm": 1.7433039551315448, "learning_rate": 3.206716222382277e-06, "loss": 0.6375, "step": 20499 }, { "epoch": 0.6282947161946795, "grad_norm": 1.6904331284704677, "learning_rate": 3.206252933646956e-06, "loss": 0.5358, "step": 20500 }, { "epoch": 0.6283253647174206, "grad_norm": 1.8309344876804101, "learning_rate": 3.2057896625858875e-06, "loss": 0.5888, "step": 20501 }, { "epoch": 0.6283560132401618, "grad_norm": 1.6218136051997558, "learning_rate": 3.2053264092036297e-06, "loss": 0.5355, "step": 20502 }, { "epoch": 0.628386661762903, "grad_norm": 0.795315415937686, "learning_rate": 3.204863173504752e-06, "loss": 0.4131, "step": 20503 }, { "epoch": 0.6284173102856442, "grad_norm": 1.9966415876676118, "learning_rate": 3.2043999554938165e-06, "loss": 0.6269, "step": 20504 }, { "epoch": 0.6284479588083854, "grad_norm": 1.7124076749619856, "learning_rate": 3.203936755175386e-06, "loss": 0.5952, "step": 20505 }, { "epoch": 0.6284786073311266, "grad_norm": 1.680833286997206, "learning_rate": 3.2034735725540283e-06, "loss": 0.6289, "step": 20506 }, { "epoch": 0.6285092558538679, "grad_norm": 0.8005603629553749, "learning_rate": 3.203010407634305e-06, "loss": 0.4334, "step": 20507 }, { "epoch": 0.628539904376609, "grad_norm": 1.7747090889027841, "learning_rate": 3.202547260420778e-06, "loss": 0.5894, "step": 20508 }, { "epoch": 0.6285705528993503, "grad_norm": 1.7359692676921357, "learning_rate": 3.202084130918014e-06, "loss": 0.5404, "step": 20509 }, { "epoch": 0.6286012014220914, "grad_norm": 1.9273072856443914, "learning_rate": 3.201621019130576e-06, "loss": 0.6928, "step": 20510 }, { "epoch": 0.6286318499448327, "grad_norm": 1.8085345810200577, "learning_rate": 3.2011579250630244e-06, "loss": 0.6372, "step": 20511 }, { "epoch": 0.6286624984675738, "grad_norm": 1.5751519364508104, "learning_rate": 3.2006948487199264e-06, "loss": 0.5913, "step": 20512 }, { "epoch": 0.6286931469903151, "grad_norm": 0.7463297456308166, "learning_rate": 3.2002317901058387e-06, "loss": 0.4282, "step": 20513 }, { "epoch": 0.6287237955130562, "grad_norm": 1.7798602902930591, "learning_rate": 3.199768749225331e-06, "loss": 0.6026, "step": 20514 }, { "epoch": 0.6287544440357975, "grad_norm": 1.831598368895332, "learning_rate": 3.199305726082962e-06, "loss": 0.6533, "step": 20515 }, { "epoch": 0.6287850925585386, "grad_norm": 1.771422039323073, "learning_rate": 3.1988427206832927e-06, "loss": 0.6829, "step": 20516 }, { "epoch": 0.6288157410812799, "grad_norm": 1.7063523417246873, "learning_rate": 3.1983797330308886e-06, "loss": 0.6047, "step": 20517 }, { "epoch": 0.6288463896040211, "grad_norm": 1.679340403531877, "learning_rate": 3.1979167631303087e-06, "loss": 0.6184, "step": 20518 }, { "epoch": 0.6288770381267623, "grad_norm": 1.7877455528216395, "learning_rate": 3.1974538109861164e-06, "loss": 0.6479, "step": 20519 }, { "epoch": 0.6289076866495035, "grad_norm": 1.7314249929324064, "learning_rate": 3.1969908766028736e-06, "loss": 0.6238, "step": 20520 }, { "epoch": 0.6289383351722447, "grad_norm": 1.5963073937045704, "learning_rate": 3.1965279599851397e-06, "loss": 0.6055, "step": 20521 }, { "epoch": 0.6289689836949859, "grad_norm": 1.7368209585925733, "learning_rate": 3.1960650611374777e-06, "loss": 0.5616, "step": 20522 }, { "epoch": 0.6289996322177271, "grad_norm": 1.8918427245116092, "learning_rate": 3.1956021800644497e-06, "loss": 0.5901, "step": 20523 }, { "epoch": 0.6290302807404683, "grad_norm": 0.7764907233409674, "learning_rate": 3.1951393167706137e-06, "loss": 0.4163, "step": 20524 }, { "epoch": 0.6290609292632096, "grad_norm": 1.7027222447945773, "learning_rate": 3.194676471260533e-06, "loss": 0.5882, "step": 20525 }, { "epoch": 0.6290915777859507, "grad_norm": 2.088807083021128, "learning_rate": 3.194213643538766e-06, "loss": 0.671, "step": 20526 }, { "epoch": 0.629122226308692, "grad_norm": 1.5957325761922654, "learning_rate": 3.193750833609873e-06, "loss": 0.5463, "step": 20527 }, { "epoch": 0.6291528748314331, "grad_norm": 1.7772445845143667, "learning_rate": 3.193288041478416e-06, "loss": 0.5916, "step": 20528 }, { "epoch": 0.6291835233541744, "grad_norm": 1.7740015768492643, "learning_rate": 3.192825267148954e-06, "loss": 0.6072, "step": 20529 }, { "epoch": 0.6292141718769155, "grad_norm": 2.106563592016565, "learning_rate": 3.1923625106260483e-06, "loss": 0.6333, "step": 20530 }, { "epoch": 0.6292448203996568, "grad_norm": 1.7958682209491599, "learning_rate": 3.1918997719142573e-06, "loss": 0.5547, "step": 20531 }, { "epoch": 0.6292754689223979, "grad_norm": 0.7988015724369831, "learning_rate": 3.1914370510181382e-06, "loss": 0.4105, "step": 20532 }, { "epoch": 0.6293061174451391, "grad_norm": 1.794436708051027, "learning_rate": 3.190974347942255e-06, "loss": 0.6329, "step": 20533 }, { "epoch": 0.6293367659678804, "grad_norm": 1.7730830234880592, "learning_rate": 3.1905116626911636e-06, "loss": 0.5987, "step": 20534 }, { "epoch": 0.6293674144906215, "grad_norm": 1.7648875254092415, "learning_rate": 3.1900489952694225e-06, "loss": 0.6218, "step": 20535 }, { "epoch": 0.6293980630133628, "grad_norm": 1.9367203694194854, "learning_rate": 3.1895863456815933e-06, "loss": 0.6553, "step": 20536 }, { "epoch": 0.6294287115361039, "grad_norm": 1.7725007012459248, "learning_rate": 3.189123713932233e-06, "loss": 0.6672, "step": 20537 }, { "epoch": 0.6294593600588452, "grad_norm": 0.8029795212166516, "learning_rate": 3.1886611000258984e-06, "loss": 0.4001, "step": 20538 }, { "epoch": 0.6294900085815863, "grad_norm": 1.859428869761475, "learning_rate": 3.1881985039671515e-06, "loss": 0.7105, "step": 20539 }, { "epoch": 0.6295206571043276, "grad_norm": 1.6612694277913398, "learning_rate": 3.187735925760547e-06, "loss": 0.58, "step": 20540 }, { "epoch": 0.6295513056270687, "grad_norm": 0.8203464200345841, "learning_rate": 3.1872733654106443e-06, "loss": 0.4438, "step": 20541 }, { "epoch": 0.62958195414981, "grad_norm": 1.9265863411516104, "learning_rate": 3.1868108229220024e-06, "loss": 0.6644, "step": 20542 }, { "epoch": 0.6296126026725511, "grad_norm": 1.9143156778344304, "learning_rate": 3.1863482982991745e-06, "loss": 0.6529, "step": 20543 }, { "epoch": 0.6296432511952924, "grad_norm": 1.7126392400511523, "learning_rate": 3.1858857915467234e-06, "loss": 0.702, "step": 20544 }, { "epoch": 0.6296738997180336, "grad_norm": 1.8685763281745644, "learning_rate": 3.185423302669204e-06, "loss": 0.6082, "step": 20545 }, { "epoch": 0.6297045482407748, "grad_norm": 1.7058653054479038, "learning_rate": 3.184960831671171e-06, "loss": 0.4668, "step": 20546 }, { "epoch": 0.629735196763516, "grad_norm": 1.7875947542725796, "learning_rate": 3.184498378557184e-06, "loss": 0.593, "step": 20547 }, { "epoch": 0.6297658452862572, "grad_norm": 1.881378653866311, "learning_rate": 3.1840359433317993e-06, "loss": 0.7228, "step": 20548 }, { "epoch": 0.6297964938089984, "grad_norm": 1.801178029497944, "learning_rate": 3.1835735259995725e-06, "loss": 0.602, "step": 20549 }, { "epoch": 0.6298271423317396, "grad_norm": 1.868347343136499, "learning_rate": 3.1831111265650616e-06, "loss": 0.6731, "step": 20550 }, { "epoch": 0.6298577908544808, "grad_norm": 0.7827174154144594, "learning_rate": 3.1826487450328203e-06, "loss": 0.431, "step": 20551 }, { "epoch": 0.629888439377222, "grad_norm": 1.6960540523379326, "learning_rate": 3.182186381407407e-06, "loss": 0.6657, "step": 20552 }, { "epoch": 0.6299190878999632, "grad_norm": 1.8209737319765598, "learning_rate": 3.181724035693378e-06, "loss": 0.5876, "step": 20553 }, { "epoch": 0.6299497364227045, "grad_norm": 1.8294161839086198, "learning_rate": 3.1812617078952834e-06, "loss": 0.6667, "step": 20554 }, { "epoch": 0.6299803849454456, "grad_norm": 1.9009205347016842, "learning_rate": 3.1807993980176855e-06, "loss": 0.6454, "step": 20555 }, { "epoch": 0.6300110334681869, "grad_norm": 1.8061743720510557, "learning_rate": 3.1803371060651343e-06, "loss": 0.5747, "step": 20556 }, { "epoch": 0.630041681990928, "grad_norm": 1.582172718577165, "learning_rate": 3.1798748320421895e-06, "loss": 0.5808, "step": 20557 }, { "epoch": 0.6300723305136693, "grad_norm": 1.843620824646968, "learning_rate": 3.1794125759534033e-06, "loss": 0.5741, "step": 20558 }, { "epoch": 0.6301029790364104, "grad_norm": 1.7558867673622744, "learning_rate": 3.178950337803329e-06, "loss": 0.6156, "step": 20559 }, { "epoch": 0.6301336275591517, "grad_norm": 1.4763604531454075, "learning_rate": 3.1784881175965248e-06, "loss": 0.5795, "step": 20560 }, { "epoch": 0.6301642760818928, "grad_norm": 1.956979708653327, "learning_rate": 3.1780259153375426e-06, "loss": 0.6788, "step": 20561 }, { "epoch": 0.6301949246046341, "grad_norm": 1.6646531729223066, "learning_rate": 3.177563731030937e-06, "loss": 0.6202, "step": 20562 }, { "epoch": 0.6302255731273753, "grad_norm": 1.779383527753664, "learning_rate": 3.1771015646812625e-06, "loss": 0.5744, "step": 20563 }, { "epoch": 0.6302562216501164, "grad_norm": 1.580131632275643, "learning_rate": 3.1766394162930734e-06, "loss": 0.5977, "step": 20564 }, { "epoch": 0.6302868701728577, "grad_norm": 1.6584162494130699, "learning_rate": 3.1761772858709204e-06, "loss": 0.6188, "step": 20565 }, { "epoch": 0.6303175186955988, "grad_norm": 1.9418258011750256, "learning_rate": 3.1757151734193624e-06, "loss": 0.655, "step": 20566 }, { "epoch": 0.6303481672183401, "grad_norm": 1.8963419212253279, "learning_rate": 3.175253078942947e-06, "loss": 0.6015, "step": 20567 }, { "epoch": 0.6303788157410812, "grad_norm": 2.1602437910487513, "learning_rate": 3.174791002446231e-06, "loss": 0.6215, "step": 20568 }, { "epoch": 0.6304094642638225, "grad_norm": 1.6880441293261115, "learning_rate": 3.1743289439337665e-06, "loss": 0.5146, "step": 20569 }, { "epoch": 0.6304401127865636, "grad_norm": 1.6558278318128472, "learning_rate": 3.173866903410105e-06, "loss": 0.5971, "step": 20570 }, { "epoch": 0.6304707613093049, "grad_norm": 0.8087431404706698, "learning_rate": 3.173404880879801e-06, "loss": 0.4368, "step": 20571 }, { "epoch": 0.6305014098320461, "grad_norm": 1.7317480515544545, "learning_rate": 3.1729428763474057e-06, "loss": 0.6039, "step": 20572 }, { "epoch": 0.6305320583547873, "grad_norm": 1.8064338019759336, "learning_rate": 3.1724808898174712e-06, "loss": 0.5586, "step": 20573 }, { "epoch": 0.6305627068775285, "grad_norm": 1.867065751662398, "learning_rate": 3.1720189212945513e-06, "loss": 0.613, "step": 20574 }, { "epoch": 0.6305933554002697, "grad_norm": 0.7741085325664374, "learning_rate": 3.171556970783195e-06, "loss": 0.43, "step": 20575 }, { "epoch": 0.6306240039230109, "grad_norm": 1.9318572465099035, "learning_rate": 3.1710950382879586e-06, "loss": 0.6125, "step": 20576 }, { "epoch": 0.6306546524457521, "grad_norm": 2.0730497728469066, "learning_rate": 3.170633123813389e-06, "loss": 0.5474, "step": 20577 }, { "epoch": 0.6306853009684933, "grad_norm": 1.827468457307641, "learning_rate": 3.1701712273640383e-06, "loss": 0.6095, "step": 20578 }, { "epoch": 0.6307159494912346, "grad_norm": 0.7667473426642679, "learning_rate": 3.16970934894446e-06, "loss": 0.4024, "step": 20579 }, { "epoch": 0.6307465980139757, "grad_norm": 1.6213528710827545, "learning_rate": 3.169247488559204e-06, "loss": 0.5356, "step": 20580 }, { "epoch": 0.630777246536717, "grad_norm": 1.8555208996407504, "learning_rate": 3.168785646212819e-06, "loss": 0.6919, "step": 20581 }, { "epoch": 0.6308078950594581, "grad_norm": 0.8222068065796254, "learning_rate": 3.1683238219098596e-06, "loss": 0.4233, "step": 20582 }, { "epoch": 0.6308385435821994, "grad_norm": 1.5315070045170853, "learning_rate": 3.1678620156548744e-06, "loss": 0.5118, "step": 20583 }, { "epoch": 0.6308691921049405, "grad_norm": 1.8615123182478486, "learning_rate": 3.167400227452411e-06, "loss": 0.6402, "step": 20584 }, { "epoch": 0.6308998406276818, "grad_norm": 2.0494866756675765, "learning_rate": 3.166938457307025e-06, "loss": 0.6676, "step": 20585 }, { "epoch": 0.6309304891504229, "grad_norm": 2.1002029564933653, "learning_rate": 3.1664767052232603e-06, "loss": 0.591, "step": 20586 }, { "epoch": 0.6309611376731642, "grad_norm": 1.5041327994469118, "learning_rate": 3.1660149712056728e-06, "loss": 0.6404, "step": 20587 }, { "epoch": 0.6309917861959053, "grad_norm": 2.0587708563225022, "learning_rate": 3.1655532552588077e-06, "loss": 0.6243, "step": 20588 }, { "epoch": 0.6310224347186466, "grad_norm": 1.7662165017183171, "learning_rate": 3.1650915573872154e-06, "loss": 0.5585, "step": 20589 }, { "epoch": 0.6310530832413878, "grad_norm": 1.4973126711300524, "learning_rate": 3.164629877595446e-06, "loss": 0.5887, "step": 20590 }, { "epoch": 0.631083731764129, "grad_norm": 1.7082285640907753, "learning_rate": 3.164168215888048e-06, "loss": 0.6655, "step": 20591 }, { "epoch": 0.6311143802868702, "grad_norm": 1.6874912253715182, "learning_rate": 3.163706572269569e-06, "loss": 0.6148, "step": 20592 }, { "epoch": 0.6311450288096114, "grad_norm": 1.9638621004739583, "learning_rate": 3.163244946744561e-06, "loss": 0.5443, "step": 20593 }, { "epoch": 0.6311756773323526, "grad_norm": 0.7703000803427188, "learning_rate": 3.162783339317569e-06, "loss": 0.4259, "step": 20594 }, { "epoch": 0.6312063258550937, "grad_norm": 1.681140422628178, "learning_rate": 3.1623217499931434e-06, "loss": 0.5737, "step": 20595 }, { "epoch": 0.631236974377835, "grad_norm": 2.046869515834786, "learning_rate": 3.161860178775833e-06, "loss": 0.6525, "step": 20596 }, { "epoch": 0.6312676229005761, "grad_norm": 1.5897377279239533, "learning_rate": 3.161398625670182e-06, "loss": 0.6606, "step": 20597 }, { "epoch": 0.6312982714233174, "grad_norm": 1.9451053887809673, "learning_rate": 3.1609370906807436e-06, "loss": 0.6237, "step": 20598 }, { "epoch": 0.6313289199460586, "grad_norm": 1.530129722489017, "learning_rate": 3.1604755738120614e-06, "loss": 0.5867, "step": 20599 }, { "epoch": 0.6313595684687998, "grad_norm": 1.8331877045745735, "learning_rate": 3.1600140750686827e-06, "loss": 0.6106, "step": 20600 }, { "epoch": 0.631390216991541, "grad_norm": 2.216866519762971, "learning_rate": 3.1595525944551574e-06, "loss": 0.6911, "step": 20601 }, { "epoch": 0.6314208655142822, "grad_norm": 1.7790874022630763, "learning_rate": 3.159091131976031e-06, "loss": 0.5726, "step": 20602 }, { "epoch": 0.6314515140370234, "grad_norm": 1.876145080165682, "learning_rate": 3.1586296876358506e-06, "loss": 0.7303, "step": 20603 }, { "epoch": 0.6314821625597646, "grad_norm": 1.6534027226277344, "learning_rate": 3.1581682614391634e-06, "loss": 0.4839, "step": 20604 }, { "epoch": 0.6315128110825058, "grad_norm": 2.12536289350892, "learning_rate": 3.157706853390515e-06, "loss": 0.6279, "step": 20605 }, { "epoch": 0.631543459605247, "grad_norm": 1.630770071855235, "learning_rate": 3.157245463494453e-06, "loss": 0.5992, "step": 20606 }, { "epoch": 0.6315741081279882, "grad_norm": 1.5676693378052788, "learning_rate": 3.1567840917555237e-06, "loss": 0.5882, "step": 20607 }, { "epoch": 0.6316047566507295, "grad_norm": 1.7569519793180206, "learning_rate": 3.15632273817827e-06, "loss": 0.6535, "step": 20608 }, { "epoch": 0.6316354051734706, "grad_norm": 1.756850533703347, "learning_rate": 3.1558614027672417e-06, "loss": 0.5994, "step": 20609 }, { "epoch": 0.6316660536962119, "grad_norm": 1.9394946359765757, "learning_rate": 3.1554000855269833e-06, "loss": 0.6319, "step": 20610 }, { "epoch": 0.631696702218953, "grad_norm": 1.8386236975274692, "learning_rate": 3.1549387864620386e-06, "loss": 0.6354, "step": 20611 }, { "epoch": 0.6317273507416943, "grad_norm": 1.5295921269765844, "learning_rate": 3.154477505576955e-06, "loss": 0.5849, "step": 20612 }, { "epoch": 0.6317579992644354, "grad_norm": 1.6778470506476795, "learning_rate": 3.1540162428762755e-06, "loss": 0.6357, "step": 20613 }, { "epoch": 0.6317886477871767, "grad_norm": 1.7632759186345714, "learning_rate": 3.153554998364547e-06, "loss": 0.5655, "step": 20614 }, { "epoch": 0.6318192963099178, "grad_norm": 0.8188165454928438, "learning_rate": 3.153093772046314e-06, "loss": 0.4111, "step": 20615 }, { "epoch": 0.6318499448326591, "grad_norm": 1.6837122106806413, "learning_rate": 3.152632563926119e-06, "loss": 0.5584, "step": 20616 }, { "epoch": 0.6318805933554003, "grad_norm": 1.5986635299457, "learning_rate": 3.1521713740085103e-06, "loss": 0.5812, "step": 20617 }, { "epoch": 0.6319112418781415, "grad_norm": 1.6471723104657379, "learning_rate": 3.1517102022980296e-06, "loss": 0.5986, "step": 20618 }, { "epoch": 0.6319418904008827, "grad_norm": 1.9774148434510037, "learning_rate": 3.1512490487992197e-06, "loss": 0.6772, "step": 20619 }, { "epoch": 0.6319725389236239, "grad_norm": 1.5982896377206557, "learning_rate": 3.150787913516627e-06, "loss": 0.5555, "step": 20620 }, { "epoch": 0.6320031874463651, "grad_norm": 1.6664134737525644, "learning_rate": 3.1503267964547927e-06, "loss": 0.5339, "step": 20621 }, { "epoch": 0.6320338359691063, "grad_norm": 1.6941792531551343, "learning_rate": 3.149865697618263e-06, "loss": 0.5764, "step": 20622 }, { "epoch": 0.6320644844918475, "grad_norm": 1.790384044287892, "learning_rate": 3.1494046170115798e-06, "loss": 0.6632, "step": 20623 }, { "epoch": 0.6320951330145888, "grad_norm": 1.9339876163400387, "learning_rate": 3.148943554639286e-06, "loss": 0.5556, "step": 20624 }, { "epoch": 0.6321257815373299, "grad_norm": 1.9379931227921448, "learning_rate": 3.148482510505926e-06, "loss": 0.6388, "step": 20625 }, { "epoch": 0.632156430060071, "grad_norm": 1.8772127855070257, "learning_rate": 3.1480214846160405e-06, "loss": 0.6077, "step": 20626 }, { "epoch": 0.6321870785828123, "grad_norm": 0.7572178440886999, "learning_rate": 3.147560476974173e-06, "loss": 0.4057, "step": 20627 }, { "epoch": 0.6322177271055535, "grad_norm": 1.703632634075055, "learning_rate": 3.147099487584868e-06, "loss": 0.6379, "step": 20628 }, { "epoch": 0.6322483756282947, "grad_norm": 1.8470388947696161, "learning_rate": 3.1466385164526625e-06, "loss": 0.6445, "step": 20629 }, { "epoch": 0.6322790241510359, "grad_norm": 1.8465695549401877, "learning_rate": 3.1461775635821053e-06, "loss": 0.5803, "step": 20630 }, { "epoch": 0.6323096726737771, "grad_norm": 1.937339525887373, "learning_rate": 3.1457166289777335e-06, "loss": 0.6823, "step": 20631 }, { "epoch": 0.6323403211965183, "grad_norm": 1.7028379008634038, "learning_rate": 3.14525571264409e-06, "loss": 0.5574, "step": 20632 }, { "epoch": 0.6323709697192595, "grad_norm": 1.9807999561834733, "learning_rate": 3.1447948145857165e-06, "loss": 0.712, "step": 20633 }, { "epoch": 0.6324016182420007, "grad_norm": 0.81856154631167, "learning_rate": 3.1443339348071544e-06, "loss": 0.4258, "step": 20634 }, { "epoch": 0.632432266764742, "grad_norm": 1.7948460803441175, "learning_rate": 3.1438730733129445e-06, "loss": 0.606, "step": 20635 }, { "epoch": 0.6324629152874831, "grad_norm": 1.9084181791448542, "learning_rate": 3.1434122301076286e-06, "loss": 0.6423, "step": 20636 }, { "epoch": 0.6324935638102244, "grad_norm": 0.8170446817760217, "learning_rate": 3.142951405195749e-06, "loss": 0.4158, "step": 20637 }, { "epoch": 0.6325242123329655, "grad_norm": 1.542378784662037, "learning_rate": 3.14249059858184e-06, "loss": 0.6526, "step": 20638 }, { "epoch": 0.6325548608557068, "grad_norm": 1.4663188862908076, "learning_rate": 3.1420298102704505e-06, "loss": 0.6211, "step": 20639 }, { "epoch": 0.6325855093784479, "grad_norm": 1.8705255262160958, "learning_rate": 3.1415690402661124e-06, "loss": 0.6272, "step": 20640 }, { "epoch": 0.6326161579011892, "grad_norm": 1.7091958030761236, "learning_rate": 3.1411082885733747e-06, "loss": 0.5832, "step": 20641 }, { "epoch": 0.6326468064239303, "grad_norm": 1.540790482789843, "learning_rate": 3.1406475551967703e-06, "loss": 0.5966, "step": 20642 }, { "epoch": 0.6326774549466716, "grad_norm": 1.704117779905222, "learning_rate": 3.140186840140841e-06, "loss": 0.652, "step": 20643 }, { "epoch": 0.6327081034694128, "grad_norm": 1.6621512693707894, "learning_rate": 3.139726143410127e-06, "loss": 0.5557, "step": 20644 }, { "epoch": 0.632738751992154, "grad_norm": 1.7688976151503824, "learning_rate": 3.139265465009168e-06, "loss": 0.5231, "step": 20645 }, { "epoch": 0.6327694005148952, "grad_norm": 1.8595458453847666, "learning_rate": 3.1388048049425007e-06, "loss": 0.6687, "step": 20646 }, { "epoch": 0.6328000490376364, "grad_norm": 1.6697849703705367, "learning_rate": 3.1383441632146673e-06, "loss": 0.5878, "step": 20647 }, { "epoch": 0.6328306975603776, "grad_norm": 1.8111921188063522, "learning_rate": 3.1378835398302043e-06, "loss": 0.5992, "step": 20648 }, { "epoch": 0.6328613460831188, "grad_norm": 1.9836239718026927, "learning_rate": 3.137422934793652e-06, "loss": 0.5876, "step": 20649 }, { "epoch": 0.63289199460586, "grad_norm": 1.7511467841299693, "learning_rate": 3.136962348109549e-06, "loss": 0.5737, "step": 20650 }, { "epoch": 0.6329226431286012, "grad_norm": 1.7257239336226786, "learning_rate": 3.13650177978243e-06, "loss": 0.5994, "step": 20651 }, { "epoch": 0.6329532916513424, "grad_norm": 0.7676942231849078, "learning_rate": 3.1360412298168384e-06, "loss": 0.4153, "step": 20652 }, { "epoch": 0.6329839401740837, "grad_norm": 1.785741364067304, "learning_rate": 3.1355806982173086e-06, "loss": 0.5917, "step": 20653 }, { "epoch": 0.6330145886968248, "grad_norm": 1.5664620779334737, "learning_rate": 3.1351201849883784e-06, "loss": 0.5447, "step": 20654 }, { "epoch": 0.6330452372195661, "grad_norm": 1.9927579832337827, "learning_rate": 3.134659690134587e-06, "loss": 0.529, "step": 20655 }, { "epoch": 0.6330758857423072, "grad_norm": 1.4434713403960462, "learning_rate": 3.1341992136604706e-06, "loss": 0.6052, "step": 20656 }, { "epoch": 0.6331065342650484, "grad_norm": 1.792927197868223, "learning_rate": 3.1337387555705667e-06, "loss": 0.6195, "step": 20657 }, { "epoch": 0.6331371827877896, "grad_norm": 1.7096608060098095, "learning_rate": 3.1332783158694123e-06, "loss": 0.5858, "step": 20658 }, { "epoch": 0.6331678313105308, "grad_norm": 0.7789655629882515, "learning_rate": 3.132817894561544e-06, "loss": 0.3992, "step": 20659 }, { "epoch": 0.633198479833272, "grad_norm": 1.627742769662724, "learning_rate": 3.1323574916515e-06, "loss": 0.5448, "step": 20660 }, { "epoch": 0.6332291283560132, "grad_norm": 1.9604169610543662, "learning_rate": 3.1318971071438154e-06, "loss": 0.6273, "step": 20661 }, { "epoch": 0.6332597768787545, "grad_norm": 1.6850477708633635, "learning_rate": 3.131436741043024e-06, "loss": 0.6057, "step": 20662 }, { "epoch": 0.6332904254014956, "grad_norm": 1.7508374088878074, "learning_rate": 3.130976393353668e-06, "loss": 0.5696, "step": 20663 }, { "epoch": 0.6333210739242369, "grad_norm": 1.6820357576574265, "learning_rate": 3.1305160640802786e-06, "loss": 0.5659, "step": 20664 }, { "epoch": 0.633351722446978, "grad_norm": 0.8005494773952327, "learning_rate": 3.1300557532273913e-06, "loss": 0.4154, "step": 20665 }, { "epoch": 0.6333823709697193, "grad_norm": 2.121048267443057, "learning_rate": 3.129595460799544e-06, "loss": 0.6209, "step": 20666 }, { "epoch": 0.6334130194924604, "grad_norm": 1.6316664668968632, "learning_rate": 3.129135186801271e-06, "loss": 0.6373, "step": 20667 }, { "epoch": 0.6334436680152017, "grad_norm": 0.7860853090688118, "learning_rate": 3.128674931237108e-06, "loss": 0.4027, "step": 20668 }, { "epoch": 0.6334743165379428, "grad_norm": 0.783565625571643, "learning_rate": 3.12821469411159e-06, "loss": 0.4269, "step": 20669 }, { "epoch": 0.6335049650606841, "grad_norm": 1.6944847869223791, "learning_rate": 3.1277544754292505e-06, "loss": 0.6434, "step": 20670 }, { "epoch": 0.6335356135834253, "grad_norm": 1.9638019898648351, "learning_rate": 3.127294275194627e-06, "loss": 0.6575, "step": 20671 }, { "epoch": 0.6335662621061665, "grad_norm": 1.764127111351941, "learning_rate": 3.1268340934122515e-06, "loss": 0.6211, "step": 20672 }, { "epoch": 0.6335969106289077, "grad_norm": 1.9439015957825199, "learning_rate": 3.126373930086658e-06, "loss": 0.7041, "step": 20673 }, { "epoch": 0.6336275591516489, "grad_norm": 1.5400391520516552, "learning_rate": 3.125913785222382e-06, "loss": 0.5409, "step": 20674 }, { "epoch": 0.6336582076743901, "grad_norm": 1.869693419567552, "learning_rate": 3.1254536588239566e-06, "loss": 0.5724, "step": 20675 }, { "epoch": 0.6336888561971313, "grad_norm": 1.7124002567647634, "learning_rate": 3.1249935508959163e-06, "loss": 0.6345, "step": 20676 }, { "epoch": 0.6337195047198725, "grad_norm": 1.76919890963178, "learning_rate": 3.1245334614427946e-06, "loss": 0.5624, "step": 20677 }, { "epoch": 0.6337501532426137, "grad_norm": 1.6288898076252858, "learning_rate": 3.1240733904691234e-06, "loss": 0.5905, "step": 20678 }, { "epoch": 0.6337808017653549, "grad_norm": 1.6719277939478288, "learning_rate": 3.1236133379794386e-06, "loss": 0.6118, "step": 20679 }, { "epoch": 0.6338114502880962, "grad_norm": 1.784195875608321, "learning_rate": 3.1231533039782724e-06, "loss": 0.5937, "step": 20680 }, { "epoch": 0.6338420988108373, "grad_norm": 1.9387210325390238, "learning_rate": 3.1226932884701537e-06, "loss": 0.6575, "step": 20681 }, { "epoch": 0.6338727473335786, "grad_norm": 1.7842824923323402, "learning_rate": 3.1222332914596214e-06, "loss": 0.6379, "step": 20682 }, { "epoch": 0.6339033958563197, "grad_norm": 1.6486796625906053, "learning_rate": 3.121773312951202e-06, "loss": 0.6144, "step": 20683 }, { "epoch": 0.633934044379061, "grad_norm": 1.6961487813083702, "learning_rate": 3.121313352949434e-06, "loss": 0.6761, "step": 20684 }, { "epoch": 0.6339646929018021, "grad_norm": 1.6994503236953893, "learning_rate": 3.120853411458845e-06, "loss": 0.4963, "step": 20685 }, { "epoch": 0.6339953414245434, "grad_norm": 1.6317287324016387, "learning_rate": 3.120393488483967e-06, "loss": 0.564, "step": 20686 }, { "epoch": 0.6340259899472845, "grad_norm": 0.8342582578644038, "learning_rate": 3.119933584029334e-06, "loss": 0.4299, "step": 20687 }, { "epoch": 0.6340566384700257, "grad_norm": 1.5945745698619094, "learning_rate": 3.1194736980994764e-06, "loss": 0.5525, "step": 20688 }, { "epoch": 0.634087286992767, "grad_norm": 1.6316919949621114, "learning_rate": 3.1190138306989247e-06, "loss": 0.5966, "step": 20689 }, { "epoch": 0.6341179355155081, "grad_norm": 1.9843328154729234, "learning_rate": 3.118553981832212e-06, "loss": 0.6246, "step": 20690 }, { "epoch": 0.6341485840382494, "grad_norm": 1.7264863304432023, "learning_rate": 3.118094151503869e-06, "loss": 0.6204, "step": 20691 }, { "epoch": 0.6341792325609905, "grad_norm": 1.87262400540405, "learning_rate": 3.1176343397184226e-06, "loss": 0.652, "step": 20692 }, { "epoch": 0.6342098810837318, "grad_norm": 1.8246652633270235, "learning_rate": 3.1171745464804106e-06, "loss": 0.5507, "step": 20693 }, { "epoch": 0.6342405296064729, "grad_norm": 1.7981692481134053, "learning_rate": 3.1167147717943556e-06, "loss": 0.5903, "step": 20694 }, { "epoch": 0.6342711781292142, "grad_norm": 1.8515550743151101, "learning_rate": 3.116255015664795e-06, "loss": 0.6251, "step": 20695 }, { "epoch": 0.6343018266519553, "grad_norm": 1.7470580730824272, "learning_rate": 3.1157952780962555e-06, "loss": 0.6048, "step": 20696 }, { "epoch": 0.6343324751746966, "grad_norm": 1.8126182300481366, "learning_rate": 3.1153355590932655e-06, "loss": 0.6087, "step": 20697 }, { "epoch": 0.6343631236974377, "grad_norm": 1.7193292729844105, "learning_rate": 3.114875858660358e-06, "loss": 0.5922, "step": 20698 }, { "epoch": 0.634393772220179, "grad_norm": 1.8383755260953547, "learning_rate": 3.11441617680206e-06, "loss": 0.7079, "step": 20699 }, { "epoch": 0.6344244207429202, "grad_norm": 1.585724875955732, "learning_rate": 3.113956513522902e-06, "loss": 0.5635, "step": 20700 }, { "epoch": 0.6344550692656614, "grad_norm": 0.7649192265579107, "learning_rate": 3.1134968688274135e-06, "loss": 0.4186, "step": 20701 }, { "epoch": 0.6344857177884026, "grad_norm": 1.7539112272766955, "learning_rate": 3.1130372427201215e-06, "loss": 0.5538, "step": 20702 }, { "epoch": 0.6345163663111438, "grad_norm": 1.9766151059212718, "learning_rate": 3.1125776352055583e-06, "loss": 0.6451, "step": 20703 }, { "epoch": 0.634547014833885, "grad_norm": 0.8349811476476676, "learning_rate": 3.112118046288251e-06, "loss": 0.4445, "step": 20704 }, { "epoch": 0.6345776633566262, "grad_norm": 1.685995451779664, "learning_rate": 3.1116584759727255e-06, "loss": 0.6308, "step": 20705 }, { "epoch": 0.6346083118793674, "grad_norm": 0.7845147499677652, "learning_rate": 3.1111989242635144e-06, "loss": 0.4286, "step": 20706 }, { "epoch": 0.6346389604021087, "grad_norm": 1.8753007470413738, "learning_rate": 3.110739391165142e-06, "loss": 0.5859, "step": 20707 }, { "epoch": 0.6346696089248498, "grad_norm": 1.8892174419275034, "learning_rate": 3.110279876682137e-06, "loss": 0.6037, "step": 20708 }, { "epoch": 0.6347002574475911, "grad_norm": 1.9597293029049454, "learning_rate": 3.1098203808190297e-06, "loss": 0.6857, "step": 20709 }, { "epoch": 0.6347309059703322, "grad_norm": 1.748061334780207, "learning_rate": 3.1093609035803446e-06, "loss": 0.6302, "step": 20710 }, { "epoch": 0.6347615544930735, "grad_norm": 1.7251760077981622, "learning_rate": 3.1089014449706114e-06, "loss": 0.5281, "step": 20711 }, { "epoch": 0.6347922030158146, "grad_norm": 1.8622796236793049, "learning_rate": 3.1084420049943553e-06, "loss": 0.5936, "step": 20712 }, { "epoch": 0.6348228515385559, "grad_norm": 0.8254477866063524, "learning_rate": 3.107982583656104e-06, "loss": 0.456, "step": 20713 }, { "epoch": 0.634853500061297, "grad_norm": 1.7493987351132745, "learning_rate": 3.1075231809603847e-06, "loss": 0.646, "step": 20714 }, { "epoch": 0.6348841485840383, "grad_norm": 1.7347529176181884, "learning_rate": 3.107063796911725e-06, "loss": 0.5633, "step": 20715 }, { "epoch": 0.6349147971067794, "grad_norm": 1.5588999394506122, "learning_rate": 3.106604431514648e-06, "loss": 0.5298, "step": 20716 }, { "epoch": 0.6349454456295207, "grad_norm": 2.1070557937290464, "learning_rate": 3.106145084773683e-06, "loss": 0.7073, "step": 20717 }, { "epoch": 0.6349760941522619, "grad_norm": 1.4559751378403538, "learning_rate": 3.105685756693355e-06, "loss": 0.6169, "step": 20718 }, { "epoch": 0.635006742675003, "grad_norm": 1.7545055464141541, "learning_rate": 3.105226447278189e-06, "loss": 0.6001, "step": 20719 }, { "epoch": 0.6350373911977443, "grad_norm": 2.010896640535331, "learning_rate": 3.104767156532713e-06, "loss": 0.6092, "step": 20720 }, { "epoch": 0.6350680397204854, "grad_norm": 1.8211945212488774, "learning_rate": 3.1043078844614495e-06, "loss": 0.5776, "step": 20721 }, { "epoch": 0.6350986882432267, "grad_norm": 1.6801811921834031, "learning_rate": 3.1038486310689266e-06, "loss": 0.6434, "step": 20722 }, { "epoch": 0.6351293367659678, "grad_norm": 1.9084939570896358, "learning_rate": 3.1033893963596695e-06, "loss": 0.62, "step": 20723 }, { "epoch": 0.6351599852887091, "grad_norm": 1.678188712909797, "learning_rate": 3.1029301803381984e-06, "loss": 0.5414, "step": 20724 }, { "epoch": 0.6351906338114502, "grad_norm": 1.7202728135473977, "learning_rate": 3.1024709830090453e-06, "loss": 0.6325, "step": 20725 }, { "epoch": 0.6352212823341915, "grad_norm": 1.5128955332200558, "learning_rate": 3.10201180437673e-06, "loss": 0.5299, "step": 20726 }, { "epoch": 0.6352519308569327, "grad_norm": 1.7823915709131202, "learning_rate": 3.1015526444457767e-06, "loss": 0.594, "step": 20727 }, { "epoch": 0.6352825793796739, "grad_norm": 1.7995719105405628, "learning_rate": 3.1010935032207123e-06, "loss": 0.6309, "step": 20728 }, { "epoch": 0.6353132279024151, "grad_norm": 1.6143757755593895, "learning_rate": 3.100634380706058e-06, "loss": 0.6208, "step": 20729 }, { "epoch": 0.6353438764251563, "grad_norm": 1.8525868785719348, "learning_rate": 3.10017527690634e-06, "loss": 0.6816, "step": 20730 }, { "epoch": 0.6353745249478975, "grad_norm": 1.7924813190803395, "learning_rate": 3.099716191826082e-06, "loss": 0.6229, "step": 20731 }, { "epoch": 0.6354051734706387, "grad_norm": 1.5411852721801524, "learning_rate": 3.099257125469805e-06, "loss": 0.513, "step": 20732 }, { "epoch": 0.6354358219933799, "grad_norm": 1.810903732085817, "learning_rate": 3.0987980778420346e-06, "loss": 0.6326, "step": 20733 }, { "epoch": 0.6354664705161212, "grad_norm": 1.7219994353601173, "learning_rate": 3.0983390489472948e-06, "loss": 0.5279, "step": 20734 }, { "epoch": 0.6354971190388623, "grad_norm": 1.8075873216516853, "learning_rate": 3.0978800387901033e-06, "loss": 0.564, "step": 20735 }, { "epoch": 0.6355277675616036, "grad_norm": 1.625941775909582, "learning_rate": 3.09742104737499e-06, "loss": 0.6157, "step": 20736 }, { "epoch": 0.6355584160843447, "grad_norm": 1.918405518408745, "learning_rate": 3.0969620747064704e-06, "loss": 0.6774, "step": 20737 }, { "epoch": 0.635589064607086, "grad_norm": 1.8661379868164054, "learning_rate": 3.096503120789074e-06, "loss": 0.6543, "step": 20738 }, { "epoch": 0.6356197131298271, "grad_norm": 1.8516986269632212, "learning_rate": 3.0960441856273183e-06, "loss": 0.5529, "step": 20739 }, { "epoch": 0.6356503616525684, "grad_norm": 1.7677891394658867, "learning_rate": 3.095585269225725e-06, "loss": 0.6468, "step": 20740 }, { "epoch": 0.6356810101753095, "grad_norm": 1.7048311899874087, "learning_rate": 3.095126371588818e-06, "loss": 0.6026, "step": 20741 }, { "epoch": 0.6357116586980508, "grad_norm": 0.805821047706876, "learning_rate": 3.0946674927211186e-06, "loss": 0.4158, "step": 20742 }, { "epoch": 0.635742307220792, "grad_norm": 1.5526840647372389, "learning_rate": 3.094208632627147e-06, "loss": 0.584, "step": 20743 }, { "epoch": 0.6357729557435332, "grad_norm": 1.7209800510983295, "learning_rate": 3.093749791311427e-06, "loss": 0.6148, "step": 20744 }, { "epoch": 0.6358036042662744, "grad_norm": 1.6227862533295507, "learning_rate": 3.093290968778478e-06, "loss": 0.5791, "step": 20745 }, { "epoch": 0.6358342527890156, "grad_norm": 1.6800777326369807, "learning_rate": 3.0928321650328187e-06, "loss": 0.6031, "step": 20746 }, { "epoch": 0.6358649013117568, "grad_norm": 0.8041956756685935, "learning_rate": 3.092373380078974e-06, "loss": 0.4025, "step": 20747 }, { "epoch": 0.635895549834498, "grad_norm": 1.633522288838291, "learning_rate": 3.091914613921461e-06, "loss": 0.5498, "step": 20748 }, { "epoch": 0.6359261983572392, "grad_norm": 1.945194600379558, "learning_rate": 3.091455866564803e-06, "loss": 0.6248, "step": 20749 }, { "epoch": 0.6359568468799803, "grad_norm": 1.5826273406896285, "learning_rate": 3.0909971380135184e-06, "loss": 0.6764, "step": 20750 }, { "epoch": 0.6359874954027216, "grad_norm": 1.8995301778292903, "learning_rate": 3.0905384282721262e-06, "loss": 0.5867, "step": 20751 }, { "epoch": 0.6360181439254627, "grad_norm": 1.6237423635985324, "learning_rate": 3.0900797373451485e-06, "loss": 0.5721, "step": 20752 }, { "epoch": 0.636048792448204, "grad_norm": 1.9006585319336424, "learning_rate": 3.089621065237104e-06, "loss": 0.6314, "step": 20753 }, { "epoch": 0.6360794409709452, "grad_norm": 1.7085909100560808, "learning_rate": 3.089162411952511e-06, "loss": 0.6339, "step": 20754 }, { "epoch": 0.6361100894936864, "grad_norm": 1.9079465449074857, "learning_rate": 3.0887037774958906e-06, "loss": 0.6506, "step": 20755 }, { "epoch": 0.6361407380164276, "grad_norm": 1.608747392705523, "learning_rate": 3.088245161871759e-06, "loss": 0.5973, "step": 20756 }, { "epoch": 0.6361713865391688, "grad_norm": 1.7511192536432985, "learning_rate": 3.0877865650846395e-06, "loss": 0.6483, "step": 20757 }, { "epoch": 0.63620203506191, "grad_norm": 1.6679070793796198, "learning_rate": 3.087327987139048e-06, "loss": 0.5989, "step": 20758 }, { "epoch": 0.6362326835846512, "grad_norm": 0.9018726766502613, "learning_rate": 3.0868694280395006e-06, "loss": 0.4217, "step": 20759 }, { "epoch": 0.6362633321073924, "grad_norm": 1.6853655902241722, "learning_rate": 3.086410887790522e-06, "loss": 0.5818, "step": 20760 }, { "epoch": 0.6362939806301336, "grad_norm": 1.7330579107832071, "learning_rate": 3.0859523663966244e-06, "loss": 0.5794, "step": 20761 }, { "epoch": 0.6363246291528748, "grad_norm": 1.5137190501381517, "learning_rate": 3.0854938638623276e-06, "loss": 0.5135, "step": 20762 }, { "epoch": 0.6363552776756161, "grad_norm": 1.6309018880138884, "learning_rate": 3.08503538019215e-06, "loss": 0.5969, "step": 20763 }, { "epoch": 0.6363859261983572, "grad_norm": 0.7762416183018767, "learning_rate": 3.084576915390609e-06, "loss": 0.3814, "step": 20764 }, { "epoch": 0.6364165747210985, "grad_norm": 1.5975441405017001, "learning_rate": 3.084118469462221e-06, "loss": 0.5701, "step": 20765 }, { "epoch": 0.6364472232438396, "grad_norm": 1.9710726948535584, "learning_rate": 3.083660042411505e-06, "loss": 0.611, "step": 20766 }, { "epoch": 0.6364778717665809, "grad_norm": 1.6819686964749772, "learning_rate": 3.0832016342429754e-06, "loss": 0.5844, "step": 20767 }, { "epoch": 0.636508520289322, "grad_norm": 1.9542812015225854, "learning_rate": 3.0827432449611527e-06, "loss": 0.6829, "step": 20768 }, { "epoch": 0.6365391688120633, "grad_norm": 1.6637659494359303, "learning_rate": 3.082284874570551e-06, "loss": 0.5898, "step": 20769 }, { "epoch": 0.6365698173348044, "grad_norm": 2.024374164530236, "learning_rate": 3.081826523075685e-06, "loss": 0.5967, "step": 20770 }, { "epoch": 0.6366004658575457, "grad_norm": 1.7248286120876788, "learning_rate": 3.0813681904810748e-06, "loss": 0.5899, "step": 20771 }, { "epoch": 0.6366311143802869, "grad_norm": 1.6568043729092918, "learning_rate": 3.080909876791235e-06, "loss": 0.5613, "step": 20772 }, { "epoch": 0.6366617629030281, "grad_norm": 1.598670166236102, "learning_rate": 3.0804515820106797e-06, "loss": 0.5506, "step": 20773 }, { "epoch": 0.6366924114257693, "grad_norm": 1.6298587287012913, "learning_rate": 3.079993306143927e-06, "loss": 0.5517, "step": 20774 }, { "epoch": 0.6367230599485105, "grad_norm": 1.940021306158929, "learning_rate": 3.0795350491954904e-06, "loss": 0.6429, "step": 20775 }, { "epoch": 0.6367537084712517, "grad_norm": 1.7102904570554656, "learning_rate": 3.079076811169888e-06, "loss": 0.581, "step": 20776 }, { "epoch": 0.6367843569939929, "grad_norm": 0.8256550794199924, "learning_rate": 3.0786185920716337e-06, "loss": 0.4028, "step": 20777 }, { "epoch": 0.6368150055167341, "grad_norm": 1.9625729542955992, "learning_rate": 3.0781603919052394e-06, "loss": 0.6392, "step": 20778 }, { "epoch": 0.6368456540394754, "grad_norm": 2.18449653889642, "learning_rate": 3.0777022106752254e-06, "loss": 0.627, "step": 20779 }, { "epoch": 0.6368763025622165, "grad_norm": 1.954177229188417, "learning_rate": 3.0772440483861022e-06, "loss": 0.6413, "step": 20780 }, { "epoch": 0.6369069510849577, "grad_norm": 1.8953200469717564, "learning_rate": 3.076785905042384e-06, "loss": 0.603, "step": 20781 }, { "epoch": 0.6369375996076989, "grad_norm": 1.9052628866777463, "learning_rate": 3.076327780648588e-06, "loss": 0.6535, "step": 20782 }, { "epoch": 0.6369682481304401, "grad_norm": 1.8254420840343883, "learning_rate": 3.0758696752092255e-06, "loss": 0.5654, "step": 20783 }, { "epoch": 0.6369988966531813, "grad_norm": 1.8849125165137934, "learning_rate": 3.0754115887288116e-06, "loss": 0.6154, "step": 20784 }, { "epoch": 0.6370295451759225, "grad_norm": 1.8373616220283386, "learning_rate": 3.0749535212118608e-06, "loss": 0.4966, "step": 20785 }, { "epoch": 0.6370601936986637, "grad_norm": 0.7789431234287083, "learning_rate": 3.074495472662884e-06, "loss": 0.4258, "step": 20786 }, { "epoch": 0.6370908422214049, "grad_norm": 1.712499141347845, "learning_rate": 3.0740374430863966e-06, "loss": 0.624, "step": 20787 }, { "epoch": 0.6371214907441461, "grad_norm": 1.697145762909831, "learning_rate": 3.0735794324869127e-06, "loss": 0.6045, "step": 20788 }, { "epoch": 0.6371521392668873, "grad_norm": 1.7751652211420068, "learning_rate": 3.0731214408689403e-06, "loss": 0.6006, "step": 20789 }, { "epoch": 0.6371827877896286, "grad_norm": 1.7818007102097972, "learning_rate": 3.0726634682369984e-06, "loss": 0.4706, "step": 20790 }, { "epoch": 0.6372134363123697, "grad_norm": 2.092155918159525, "learning_rate": 3.0722055145955953e-06, "loss": 0.615, "step": 20791 }, { "epoch": 0.637244084835111, "grad_norm": 1.8446557955383678, "learning_rate": 3.0717475799492436e-06, "loss": 0.6708, "step": 20792 }, { "epoch": 0.6372747333578521, "grad_norm": 1.8346214689158706, "learning_rate": 3.0712896643024576e-06, "loss": 0.6449, "step": 20793 }, { "epoch": 0.6373053818805934, "grad_norm": 0.7786941214282904, "learning_rate": 3.0708317676597464e-06, "loss": 0.4118, "step": 20794 }, { "epoch": 0.6373360304033345, "grad_norm": 1.700238370634972, "learning_rate": 3.070373890025625e-06, "loss": 0.5595, "step": 20795 }, { "epoch": 0.6373666789260758, "grad_norm": 0.7759493659696406, "learning_rate": 3.0699160314046033e-06, "loss": 0.4277, "step": 20796 }, { "epoch": 0.6373973274488169, "grad_norm": 0.7932835121403086, "learning_rate": 3.0694581918011916e-06, "loss": 0.405, "step": 20797 }, { "epoch": 0.6374279759715582, "grad_norm": 1.5189108988748656, "learning_rate": 3.0690003712199037e-06, "loss": 0.5082, "step": 20798 }, { "epoch": 0.6374586244942994, "grad_norm": 1.6864294167771214, "learning_rate": 3.068542569665249e-06, "loss": 0.5178, "step": 20799 }, { "epoch": 0.6374892730170406, "grad_norm": 1.7815893354722427, "learning_rate": 3.0680847871417373e-06, "loss": 0.7169, "step": 20800 }, { "epoch": 0.6375199215397818, "grad_norm": 0.7896652928234837, "learning_rate": 3.0676270236538823e-06, "loss": 0.4017, "step": 20801 }, { "epoch": 0.637550570062523, "grad_norm": 1.9293322428764794, "learning_rate": 3.0671692792061904e-06, "loss": 0.6043, "step": 20802 }, { "epoch": 0.6375812185852642, "grad_norm": 1.765895216102873, "learning_rate": 3.0667115538031766e-06, "loss": 0.6598, "step": 20803 }, { "epoch": 0.6376118671080054, "grad_norm": 1.8536283606148627, "learning_rate": 3.0662538474493473e-06, "loss": 0.5379, "step": 20804 }, { "epoch": 0.6376425156307466, "grad_norm": 1.7521735712332338, "learning_rate": 3.0657961601492133e-06, "loss": 0.5772, "step": 20805 }, { "epoch": 0.6376731641534878, "grad_norm": 1.8172627705706177, "learning_rate": 3.065338491907285e-06, "loss": 0.5809, "step": 20806 }, { "epoch": 0.637703812676229, "grad_norm": 1.6873373955823663, "learning_rate": 3.0648808427280717e-06, "loss": 0.5142, "step": 20807 }, { "epoch": 0.6377344611989703, "grad_norm": 1.9342826787258904, "learning_rate": 3.0644232126160815e-06, "loss": 0.6187, "step": 20808 }, { "epoch": 0.6377651097217114, "grad_norm": 1.8912942595751077, "learning_rate": 3.0639656015758256e-06, "loss": 0.606, "step": 20809 }, { "epoch": 0.6377957582444527, "grad_norm": 1.679694424267562, "learning_rate": 3.063508009611812e-06, "loss": 0.5798, "step": 20810 }, { "epoch": 0.6378264067671938, "grad_norm": 1.7033374097883043, "learning_rate": 3.0630504367285493e-06, "loss": 0.6319, "step": 20811 }, { "epoch": 0.637857055289935, "grad_norm": 1.8485960313811587, "learning_rate": 3.0625928829305474e-06, "loss": 0.6283, "step": 20812 }, { "epoch": 0.6378877038126762, "grad_norm": 1.6787363351736748, "learning_rate": 3.0621353482223116e-06, "loss": 0.5762, "step": 20813 }, { "epoch": 0.6379183523354174, "grad_norm": 1.58004350929684, "learning_rate": 3.061677832608353e-06, "loss": 0.6073, "step": 20814 }, { "epoch": 0.6379490008581586, "grad_norm": 1.9471504013080134, "learning_rate": 3.0612203360931792e-06, "loss": 0.5577, "step": 20815 }, { "epoch": 0.6379796493808998, "grad_norm": 1.7969324806485552, "learning_rate": 3.060762858681296e-06, "loss": 0.5927, "step": 20816 }, { "epoch": 0.638010297903641, "grad_norm": 2.084676090664511, "learning_rate": 3.0603054003772137e-06, "loss": 0.758, "step": 20817 }, { "epoch": 0.6380409464263822, "grad_norm": 1.7335488762259328, "learning_rate": 3.059847961185438e-06, "loss": 0.6249, "step": 20818 }, { "epoch": 0.6380715949491235, "grad_norm": 1.600467229060709, "learning_rate": 3.059390541110476e-06, "loss": 0.5868, "step": 20819 }, { "epoch": 0.6381022434718646, "grad_norm": 1.7707459556726173, "learning_rate": 3.0589331401568383e-06, "loss": 0.5936, "step": 20820 }, { "epoch": 0.6381328919946059, "grad_norm": 1.742249705019826, "learning_rate": 3.0584757583290257e-06, "loss": 0.5792, "step": 20821 }, { "epoch": 0.638163540517347, "grad_norm": 0.8794829893263938, "learning_rate": 3.0580183956315513e-06, "loss": 0.4191, "step": 20822 }, { "epoch": 0.6381941890400883, "grad_norm": 1.8252997114398348, "learning_rate": 3.057561052068917e-06, "loss": 0.6546, "step": 20823 }, { "epoch": 0.6382248375628294, "grad_norm": 1.7494143023746072, "learning_rate": 3.0571037276456293e-06, "loss": 0.5939, "step": 20824 }, { "epoch": 0.6382554860855707, "grad_norm": 1.8106515336402587, "learning_rate": 3.0566464223661975e-06, "loss": 0.509, "step": 20825 }, { "epoch": 0.6382861346083119, "grad_norm": 1.701098665071213, "learning_rate": 3.056189136235126e-06, "loss": 0.612, "step": 20826 }, { "epoch": 0.6383167831310531, "grad_norm": 2.058963955123678, "learning_rate": 3.0557318692569183e-06, "loss": 0.6174, "step": 20827 }, { "epoch": 0.6383474316537943, "grad_norm": 0.7731531915306822, "learning_rate": 3.0552746214360834e-06, "loss": 0.4104, "step": 20828 }, { "epoch": 0.6383780801765355, "grad_norm": 1.414537643824622, "learning_rate": 3.054817392777124e-06, "loss": 0.4102, "step": 20829 }, { "epoch": 0.6384087286992767, "grad_norm": 1.9355990402704064, "learning_rate": 3.054360183284548e-06, "loss": 0.5959, "step": 20830 }, { "epoch": 0.6384393772220179, "grad_norm": 0.8116651220819597, "learning_rate": 3.0539029929628596e-06, "loss": 0.4325, "step": 20831 }, { "epoch": 0.6384700257447591, "grad_norm": 1.768266110084487, "learning_rate": 3.05344582181656e-06, "loss": 0.646, "step": 20832 }, { "epoch": 0.6385006742675003, "grad_norm": 1.8597697060972205, "learning_rate": 3.05298866985016e-06, "loss": 0.6415, "step": 20833 }, { "epoch": 0.6385313227902415, "grad_norm": 1.6032786202166616, "learning_rate": 3.05253153706816e-06, "loss": 0.5309, "step": 20834 }, { "epoch": 0.6385619713129828, "grad_norm": 0.7845641282909234, "learning_rate": 3.052074423475063e-06, "loss": 0.4234, "step": 20835 }, { "epoch": 0.6385926198357239, "grad_norm": 1.6594699964804187, "learning_rate": 3.051617329075377e-06, "loss": 0.6009, "step": 20836 }, { "epoch": 0.6386232683584652, "grad_norm": 2.017891248280549, "learning_rate": 3.051160253873603e-06, "loss": 0.5583, "step": 20837 }, { "epoch": 0.6386539168812063, "grad_norm": 1.701894301073669, "learning_rate": 3.0507031978742463e-06, "loss": 0.5388, "step": 20838 }, { "epoch": 0.6386845654039476, "grad_norm": 0.7792574149403756, "learning_rate": 3.0502461610818095e-06, "loss": 0.4183, "step": 20839 }, { "epoch": 0.6387152139266887, "grad_norm": 1.636407405522348, "learning_rate": 3.0497891435007952e-06, "loss": 0.6127, "step": 20840 }, { "epoch": 0.63874586244943, "grad_norm": 2.0106570176677643, "learning_rate": 3.0493321451357093e-06, "loss": 0.5891, "step": 20841 }, { "epoch": 0.6387765109721711, "grad_norm": 1.8370597218458082, "learning_rate": 3.0488751659910533e-06, "loss": 0.663, "step": 20842 }, { "epoch": 0.6388071594949123, "grad_norm": 1.951023602571019, "learning_rate": 3.048418206071326e-06, "loss": 0.6176, "step": 20843 }, { "epoch": 0.6388378080176536, "grad_norm": 1.822892381381252, "learning_rate": 3.0479612653810376e-06, "loss": 0.6322, "step": 20844 }, { "epoch": 0.6388684565403947, "grad_norm": 0.8027179814358247, "learning_rate": 3.0475043439246845e-06, "loss": 0.4141, "step": 20845 }, { "epoch": 0.638899105063136, "grad_norm": 1.801303136613631, "learning_rate": 3.0470474417067696e-06, "loss": 0.5529, "step": 20846 }, { "epoch": 0.6389297535858771, "grad_norm": 0.7499922204305984, "learning_rate": 3.0465905587317967e-06, "loss": 0.3873, "step": 20847 }, { "epoch": 0.6389604021086184, "grad_norm": 1.8322789356415252, "learning_rate": 3.0461336950042664e-06, "loss": 0.559, "step": 20848 }, { "epoch": 0.6389910506313595, "grad_norm": 1.7924364747825583, "learning_rate": 3.0456768505286815e-06, "loss": 0.5535, "step": 20849 }, { "epoch": 0.6390216991541008, "grad_norm": 1.7818878166060101, "learning_rate": 3.0452200253095433e-06, "loss": 0.615, "step": 20850 }, { "epoch": 0.6390523476768419, "grad_norm": 2.119262811562729, "learning_rate": 3.044763219351351e-06, "loss": 0.6816, "step": 20851 }, { "epoch": 0.6390829961995832, "grad_norm": 2.0612372819276414, "learning_rate": 3.044306432658607e-06, "loss": 0.6953, "step": 20852 }, { "epoch": 0.6391136447223243, "grad_norm": 1.6251638658547274, "learning_rate": 3.043849665235814e-06, "loss": 0.5426, "step": 20853 }, { "epoch": 0.6391442932450656, "grad_norm": 1.7927938315436076, "learning_rate": 3.0433929170874676e-06, "loss": 0.6133, "step": 20854 }, { "epoch": 0.6391749417678068, "grad_norm": 1.8336426898186706, "learning_rate": 3.0429361882180747e-06, "loss": 0.6554, "step": 20855 }, { "epoch": 0.639205590290548, "grad_norm": 1.747375164301548, "learning_rate": 3.042479478632129e-06, "loss": 0.6673, "step": 20856 }, { "epoch": 0.6392362388132892, "grad_norm": 1.699313335320503, "learning_rate": 3.042022788334137e-06, "loss": 0.5385, "step": 20857 }, { "epoch": 0.6392668873360304, "grad_norm": 1.75598010699695, "learning_rate": 3.0415661173285938e-06, "loss": 0.6328, "step": 20858 }, { "epoch": 0.6392975358587716, "grad_norm": 1.7526804813159205, "learning_rate": 3.04110946562e-06, "loss": 0.416, "step": 20859 }, { "epoch": 0.6393281843815128, "grad_norm": 1.8492593085994242, "learning_rate": 3.040652833212857e-06, "loss": 0.5941, "step": 20860 }, { "epoch": 0.639358832904254, "grad_norm": 1.763220212027369, "learning_rate": 3.040196220111663e-06, "loss": 0.6948, "step": 20861 }, { "epoch": 0.6393894814269953, "grad_norm": 0.7355954041464429, "learning_rate": 3.0397396263209165e-06, "loss": 0.4115, "step": 20862 }, { "epoch": 0.6394201299497364, "grad_norm": 1.64121574484959, "learning_rate": 3.0392830518451177e-06, "loss": 0.4618, "step": 20863 }, { "epoch": 0.6394507784724777, "grad_norm": 1.8170878039150338, "learning_rate": 3.0388264966887636e-06, "loss": 0.6721, "step": 20864 }, { "epoch": 0.6394814269952188, "grad_norm": 1.8284814883901526, "learning_rate": 3.0383699608563557e-06, "loss": 0.6853, "step": 20865 }, { "epoch": 0.6395120755179601, "grad_norm": 1.6658274612028967, "learning_rate": 3.0379134443523892e-06, "loss": 0.5865, "step": 20866 }, { "epoch": 0.6395427240407012, "grad_norm": 0.8177885961307907, "learning_rate": 3.0374569471813632e-06, "loss": 0.3993, "step": 20867 }, { "epoch": 0.6395733725634425, "grad_norm": 1.6979396480432165, "learning_rate": 3.0370004693477767e-06, "loss": 0.5135, "step": 20868 }, { "epoch": 0.6396040210861836, "grad_norm": 1.7935195958069206, "learning_rate": 3.0365440108561265e-06, "loss": 0.6176, "step": 20869 }, { "epoch": 0.6396346696089249, "grad_norm": 1.564562154350732, "learning_rate": 3.03608757171091e-06, "loss": 0.6186, "step": 20870 }, { "epoch": 0.639665318131666, "grad_norm": 1.7472851641884377, "learning_rate": 3.035631151916626e-06, "loss": 0.6128, "step": 20871 }, { "epoch": 0.6396959666544073, "grad_norm": 1.778885916564394, "learning_rate": 3.0351747514777716e-06, "loss": 0.5044, "step": 20872 }, { "epoch": 0.6397266151771485, "grad_norm": 1.6829218524104341, "learning_rate": 3.034718370398841e-06, "loss": 0.5584, "step": 20873 }, { "epoch": 0.6397572636998896, "grad_norm": 1.95330892639344, "learning_rate": 3.0342620086843354e-06, "loss": 0.5398, "step": 20874 }, { "epoch": 0.6397879122226309, "grad_norm": 1.9565225233411043, "learning_rate": 3.033805666338746e-06, "loss": 0.6151, "step": 20875 }, { "epoch": 0.639818560745372, "grad_norm": 1.8471412130744307, "learning_rate": 3.033349343366576e-06, "loss": 0.6382, "step": 20876 }, { "epoch": 0.6398492092681133, "grad_norm": 1.7505851492512259, "learning_rate": 3.0328930397723167e-06, "loss": 0.4478, "step": 20877 }, { "epoch": 0.6398798577908544, "grad_norm": 1.798629736638038, "learning_rate": 3.032436755560465e-06, "loss": 0.5896, "step": 20878 }, { "epoch": 0.6399105063135957, "grad_norm": 1.9190072365691064, "learning_rate": 3.031980490735518e-06, "loss": 0.5623, "step": 20879 }, { "epoch": 0.6399411548363368, "grad_norm": 0.8448323311607663, "learning_rate": 3.031524245301971e-06, "loss": 0.4262, "step": 20880 }, { "epoch": 0.6399718033590781, "grad_norm": 2.1114935785514057, "learning_rate": 3.031068019264318e-06, "loss": 0.6833, "step": 20881 }, { "epoch": 0.6400024518818193, "grad_norm": 1.6808977527850633, "learning_rate": 3.0306118126270567e-06, "loss": 0.5736, "step": 20882 }, { "epoch": 0.6400331004045605, "grad_norm": 2.0452774940827836, "learning_rate": 3.0301556253946806e-06, "loss": 0.6389, "step": 20883 }, { "epoch": 0.6400637489273017, "grad_norm": 1.8115667384828387, "learning_rate": 3.0296994575716854e-06, "loss": 0.6253, "step": 20884 }, { "epoch": 0.6400943974500429, "grad_norm": 1.5097706508809876, "learning_rate": 3.0292433091625672e-06, "loss": 0.6029, "step": 20885 }, { "epoch": 0.6401250459727841, "grad_norm": 1.8394961271288148, "learning_rate": 3.028787180171815e-06, "loss": 0.6258, "step": 20886 }, { "epoch": 0.6401556944955253, "grad_norm": 1.9763652354018189, "learning_rate": 3.028331070603931e-06, "loss": 0.6143, "step": 20887 }, { "epoch": 0.6401863430182665, "grad_norm": 1.7341911790173883, "learning_rate": 3.027874980463404e-06, "loss": 0.6305, "step": 20888 }, { "epoch": 0.6402169915410078, "grad_norm": 1.5334916188350156, "learning_rate": 3.0274189097547287e-06, "loss": 0.542, "step": 20889 }, { "epoch": 0.6402476400637489, "grad_norm": 1.6442644428473612, "learning_rate": 3.0269628584824002e-06, "loss": 0.5707, "step": 20890 }, { "epoch": 0.6402782885864902, "grad_norm": 1.8630802320131412, "learning_rate": 3.0265068266509107e-06, "loss": 0.6952, "step": 20891 }, { "epoch": 0.6403089371092313, "grad_norm": 1.8745275742972134, "learning_rate": 3.0260508142647548e-06, "loss": 0.6146, "step": 20892 }, { "epoch": 0.6403395856319726, "grad_norm": 0.78250018558939, "learning_rate": 3.025594821328426e-06, "loss": 0.4057, "step": 20893 }, { "epoch": 0.6403702341547137, "grad_norm": 1.7039953802759895, "learning_rate": 3.0251388478464155e-06, "loss": 0.632, "step": 20894 }, { "epoch": 0.640400882677455, "grad_norm": 0.8004473299199153, "learning_rate": 3.0246828938232183e-06, "loss": 0.4172, "step": 20895 }, { "epoch": 0.6404315312001961, "grad_norm": 0.7891897119682659, "learning_rate": 3.0242269592633267e-06, "loss": 0.3884, "step": 20896 }, { "epoch": 0.6404621797229374, "grad_norm": 1.666478273085203, "learning_rate": 3.02377104417123e-06, "loss": 0.5409, "step": 20897 }, { "epoch": 0.6404928282456785, "grad_norm": 1.7106219222252061, "learning_rate": 3.0233151485514246e-06, "loss": 0.576, "step": 20898 }, { "epoch": 0.6405234767684198, "grad_norm": 0.7708077179022034, "learning_rate": 3.0228592724084003e-06, "loss": 0.4029, "step": 20899 }, { "epoch": 0.640554125291161, "grad_norm": 1.9755926749275696, "learning_rate": 3.022403415746649e-06, "loss": 0.6685, "step": 20900 }, { "epoch": 0.6405847738139022, "grad_norm": 1.73948970481558, "learning_rate": 3.0219475785706633e-06, "loss": 0.6169, "step": 20901 }, { "epoch": 0.6406154223366434, "grad_norm": 0.8054436615880987, "learning_rate": 3.021491760884934e-06, "loss": 0.423, "step": 20902 }, { "epoch": 0.6406460708593846, "grad_norm": 1.9655793258477947, "learning_rate": 3.0210359626939523e-06, "loss": 0.7265, "step": 20903 }, { "epoch": 0.6406767193821258, "grad_norm": 0.7632759740321504, "learning_rate": 3.020580184002211e-06, "loss": 0.409, "step": 20904 }, { "epoch": 0.6407073679048669, "grad_norm": 1.6115644542989944, "learning_rate": 3.0201244248141975e-06, "loss": 0.5952, "step": 20905 }, { "epoch": 0.6407380164276082, "grad_norm": 1.686613961228727, "learning_rate": 3.019668685134406e-06, "loss": 0.6319, "step": 20906 }, { "epoch": 0.6407686649503493, "grad_norm": 1.8038764273722732, "learning_rate": 3.0192129649673267e-06, "loss": 0.6703, "step": 20907 }, { "epoch": 0.6407993134730906, "grad_norm": 2.0734413421554048, "learning_rate": 3.0187572643174465e-06, "loss": 0.687, "step": 20908 }, { "epoch": 0.6408299619958318, "grad_norm": 1.8700845069836407, "learning_rate": 3.0183015831892592e-06, "loss": 0.5547, "step": 20909 }, { "epoch": 0.640860610518573, "grad_norm": 1.9084709760293195, "learning_rate": 3.0178459215872528e-06, "loss": 0.6611, "step": 20910 }, { "epoch": 0.6408912590413142, "grad_norm": 2.172873013337938, "learning_rate": 3.017390279515918e-06, "loss": 0.6512, "step": 20911 }, { "epoch": 0.6409219075640554, "grad_norm": 1.476396899479965, "learning_rate": 3.0169346569797446e-06, "loss": 0.5368, "step": 20912 }, { "epoch": 0.6409525560867966, "grad_norm": 1.6230614798084653, "learning_rate": 3.01647905398322e-06, "loss": 0.5913, "step": 20913 }, { "epoch": 0.6409832046095378, "grad_norm": 1.9106748147854964, "learning_rate": 3.0160234705308366e-06, "loss": 0.6549, "step": 20914 }, { "epoch": 0.641013853132279, "grad_norm": 1.7581579637167442, "learning_rate": 3.0155679066270803e-06, "loss": 0.5527, "step": 20915 }, { "epoch": 0.6410445016550202, "grad_norm": 1.4393800355144657, "learning_rate": 3.015112362276441e-06, "loss": 0.5294, "step": 20916 }, { "epoch": 0.6410751501777614, "grad_norm": 1.8199089543394686, "learning_rate": 3.0146568374834095e-06, "loss": 0.6412, "step": 20917 }, { "epoch": 0.6411057987005027, "grad_norm": 1.8883265897994361, "learning_rate": 3.014201332252471e-06, "loss": 0.5546, "step": 20918 }, { "epoch": 0.6411364472232438, "grad_norm": 2.01805989357932, "learning_rate": 3.013745846588114e-06, "loss": 0.529, "step": 20919 }, { "epoch": 0.6411670957459851, "grad_norm": 1.969316788888211, "learning_rate": 3.0132903804948276e-06, "loss": 0.6961, "step": 20920 }, { "epoch": 0.6411977442687262, "grad_norm": 1.7751272703267766, "learning_rate": 3.012834933977099e-06, "loss": 0.6318, "step": 20921 }, { "epoch": 0.6412283927914675, "grad_norm": 1.9606603863613112, "learning_rate": 3.012379507039418e-06, "loss": 0.6492, "step": 20922 }, { "epoch": 0.6412590413142086, "grad_norm": 1.8090252507460425, "learning_rate": 3.0119240996862693e-06, "loss": 0.5614, "step": 20923 }, { "epoch": 0.6412896898369499, "grad_norm": 1.841400348386901, "learning_rate": 3.0114687119221398e-06, "loss": 0.6929, "step": 20924 }, { "epoch": 0.641320338359691, "grad_norm": 1.9458459295836614, "learning_rate": 3.0110133437515197e-06, "loss": 0.59, "step": 20925 }, { "epoch": 0.6413509868824323, "grad_norm": 1.9071865931814367, "learning_rate": 3.0105579951788944e-06, "loss": 0.6642, "step": 20926 }, { "epoch": 0.6413816354051735, "grad_norm": 1.7848647871081138, "learning_rate": 3.0101026662087474e-06, "loss": 0.5841, "step": 20927 }, { "epoch": 0.6414122839279147, "grad_norm": 0.8148957114239677, "learning_rate": 3.0096473568455716e-06, "loss": 0.4056, "step": 20928 }, { "epoch": 0.6414429324506559, "grad_norm": 1.754237354844692, "learning_rate": 3.009192067093846e-06, "loss": 0.6096, "step": 20929 }, { "epoch": 0.6414735809733971, "grad_norm": 0.8430566680548981, "learning_rate": 3.0087367969580635e-06, "loss": 0.4513, "step": 20930 }, { "epoch": 0.6415042294961383, "grad_norm": 1.8064709920819997, "learning_rate": 3.008281546442706e-06, "loss": 0.6125, "step": 20931 }, { "epoch": 0.6415348780188795, "grad_norm": 0.7888197179395905, "learning_rate": 3.007826315552258e-06, "loss": 0.4197, "step": 20932 }, { "epoch": 0.6415655265416207, "grad_norm": 1.9742593179864667, "learning_rate": 3.007371104291209e-06, "loss": 0.766, "step": 20933 }, { "epoch": 0.641596175064362, "grad_norm": 2.10539948353522, "learning_rate": 3.0069159126640426e-06, "loss": 0.6723, "step": 20934 }, { "epoch": 0.6416268235871031, "grad_norm": 0.7505400119571873, "learning_rate": 3.006460740675242e-06, "loss": 0.4151, "step": 20935 }, { "epoch": 0.6416574721098443, "grad_norm": 1.7590176167425478, "learning_rate": 3.0060055883292947e-06, "loss": 0.6503, "step": 20936 }, { "epoch": 0.6416881206325855, "grad_norm": 1.6271562602597944, "learning_rate": 3.0055504556306842e-06, "loss": 0.5237, "step": 20937 }, { "epoch": 0.6417187691553267, "grad_norm": 1.7688732617801208, "learning_rate": 3.005095342583896e-06, "loss": 0.5243, "step": 20938 }, { "epoch": 0.6417494176780679, "grad_norm": 1.897675094861759, "learning_rate": 3.0046402491934147e-06, "loss": 0.6053, "step": 20939 }, { "epoch": 0.6417800662008091, "grad_norm": 0.8266491036544462, "learning_rate": 3.004185175463721e-06, "loss": 0.409, "step": 20940 }, { "epoch": 0.6418107147235503, "grad_norm": 1.7961352519011655, "learning_rate": 3.003730121399304e-06, "loss": 0.6083, "step": 20941 }, { "epoch": 0.6418413632462915, "grad_norm": 1.805480231766555, "learning_rate": 3.0032750870046435e-06, "loss": 0.5858, "step": 20942 }, { "epoch": 0.6418720117690327, "grad_norm": 1.7989562489092579, "learning_rate": 3.0028200722842243e-06, "loss": 0.6884, "step": 20943 }, { "epoch": 0.6419026602917739, "grad_norm": 1.8393988196597817, "learning_rate": 3.0023650772425306e-06, "loss": 0.6324, "step": 20944 }, { "epoch": 0.6419333088145152, "grad_norm": 2.125360618382555, "learning_rate": 3.0019101018840446e-06, "loss": 0.6124, "step": 20945 }, { "epoch": 0.6419639573372563, "grad_norm": 1.6015596235789187, "learning_rate": 3.001455146213249e-06, "loss": 0.5178, "step": 20946 }, { "epoch": 0.6419946058599976, "grad_norm": 1.7847388052588211, "learning_rate": 3.001000210234628e-06, "loss": 0.661, "step": 20947 }, { "epoch": 0.6420252543827387, "grad_norm": 2.009190243027548, "learning_rate": 3.0005452939526624e-06, "loss": 0.5684, "step": 20948 }, { "epoch": 0.64205590290548, "grad_norm": 1.8308180472183238, "learning_rate": 3.0000903973718365e-06, "loss": 0.6304, "step": 20949 }, { "epoch": 0.6420865514282211, "grad_norm": 1.6920775024755144, "learning_rate": 2.9996355204966334e-06, "loss": 0.5448, "step": 20950 }, { "epoch": 0.6421171999509624, "grad_norm": 1.7829326122513764, "learning_rate": 2.999180663331529e-06, "loss": 0.6642, "step": 20951 }, { "epoch": 0.6421478484737035, "grad_norm": 1.8335537105731217, "learning_rate": 2.9987258258810135e-06, "loss": 0.7462, "step": 20952 }, { "epoch": 0.6421784969964448, "grad_norm": 1.8438001704086893, "learning_rate": 2.998271008149563e-06, "loss": 0.5734, "step": 20953 }, { "epoch": 0.642209145519186, "grad_norm": 1.718906187779067, "learning_rate": 2.9978162101416595e-06, "loss": 0.5439, "step": 20954 }, { "epoch": 0.6422397940419272, "grad_norm": 1.6315510544042133, "learning_rate": 2.997361431861786e-06, "loss": 0.5935, "step": 20955 }, { "epoch": 0.6422704425646684, "grad_norm": 1.8306888298606787, "learning_rate": 2.9969066733144225e-06, "loss": 0.6746, "step": 20956 }, { "epoch": 0.6423010910874096, "grad_norm": 1.6721192424830231, "learning_rate": 2.9964519345040506e-06, "loss": 0.6296, "step": 20957 }, { "epoch": 0.6423317396101508, "grad_norm": 2.0987632080363356, "learning_rate": 2.9959972154351504e-06, "loss": 0.6119, "step": 20958 }, { "epoch": 0.642362388132892, "grad_norm": 1.8025260614287055, "learning_rate": 2.995542516112201e-06, "loss": 0.6512, "step": 20959 }, { "epoch": 0.6423930366556332, "grad_norm": 1.6638527207634572, "learning_rate": 2.9950878365396865e-06, "loss": 0.5611, "step": 20960 }, { "epoch": 0.6424236851783744, "grad_norm": 1.6273878991773336, "learning_rate": 2.994633176722083e-06, "loss": 0.5465, "step": 20961 }, { "epoch": 0.6424543337011156, "grad_norm": 0.7721737911985957, "learning_rate": 2.9941785366638715e-06, "loss": 0.4179, "step": 20962 }, { "epoch": 0.6424849822238569, "grad_norm": 1.7939249794677954, "learning_rate": 2.993723916369533e-06, "loss": 0.5684, "step": 20963 }, { "epoch": 0.642515630746598, "grad_norm": 1.8325601887498828, "learning_rate": 2.9932693158435444e-06, "loss": 0.5602, "step": 20964 }, { "epoch": 0.6425462792693393, "grad_norm": 1.8910952027231787, "learning_rate": 2.9928147350903887e-06, "loss": 0.6077, "step": 20965 }, { "epoch": 0.6425769277920804, "grad_norm": 1.9632363546498912, "learning_rate": 2.9923601741145427e-06, "loss": 0.6928, "step": 20966 }, { "epoch": 0.6426075763148216, "grad_norm": 0.806500874191416, "learning_rate": 2.9919056329204845e-06, "loss": 0.4065, "step": 20967 }, { "epoch": 0.6426382248375628, "grad_norm": 2.0742282520629884, "learning_rate": 2.991451111512694e-06, "loss": 0.6628, "step": 20968 }, { "epoch": 0.642668873360304, "grad_norm": 1.7275879708591269, "learning_rate": 2.9909966098956515e-06, "loss": 0.5758, "step": 20969 }, { "epoch": 0.6426995218830452, "grad_norm": 1.8221478174289807, "learning_rate": 2.9905421280738307e-06, "loss": 0.6549, "step": 20970 }, { "epoch": 0.6427301704057864, "grad_norm": 1.69785700359273, "learning_rate": 2.9900876660517155e-06, "loss": 0.5936, "step": 20971 }, { "epoch": 0.6427608189285277, "grad_norm": 1.8476480763571146, "learning_rate": 2.989633223833779e-06, "loss": 0.6062, "step": 20972 }, { "epoch": 0.6427914674512688, "grad_norm": 1.8151240332668985, "learning_rate": 2.9891788014245e-06, "loss": 0.587, "step": 20973 }, { "epoch": 0.6428221159740101, "grad_norm": 1.643825633811113, "learning_rate": 2.9887243988283576e-06, "loss": 0.6267, "step": 20974 }, { "epoch": 0.6428527644967512, "grad_norm": 1.7436246873351386, "learning_rate": 2.988270016049828e-06, "loss": 0.5782, "step": 20975 }, { "epoch": 0.6428834130194925, "grad_norm": 1.8644926427438544, "learning_rate": 2.9878156530933887e-06, "loss": 0.6857, "step": 20976 }, { "epoch": 0.6429140615422336, "grad_norm": 1.8277126462260542, "learning_rate": 2.9873613099635166e-06, "loss": 0.5765, "step": 20977 }, { "epoch": 0.6429447100649749, "grad_norm": 1.7313090300411214, "learning_rate": 2.9869069866646882e-06, "loss": 0.6876, "step": 20978 }, { "epoch": 0.642975358587716, "grad_norm": 1.5905346058684497, "learning_rate": 2.9864526832013813e-06, "loss": 0.6042, "step": 20979 }, { "epoch": 0.6430060071104573, "grad_norm": 1.713550154501349, "learning_rate": 2.985998399578072e-06, "loss": 0.5902, "step": 20980 }, { "epoch": 0.6430366556331985, "grad_norm": 2.072029165843892, "learning_rate": 2.9855441357992333e-06, "loss": 0.6853, "step": 20981 }, { "epoch": 0.6430673041559397, "grad_norm": 1.9120746320955169, "learning_rate": 2.985089891869346e-06, "loss": 0.5946, "step": 20982 }, { "epoch": 0.6430979526786809, "grad_norm": 1.8650811525069086, "learning_rate": 2.9846356677928805e-06, "loss": 0.6627, "step": 20983 }, { "epoch": 0.6431286012014221, "grad_norm": 1.7701648970645192, "learning_rate": 2.9841814635743188e-06, "loss": 0.6401, "step": 20984 }, { "epoch": 0.6431592497241633, "grad_norm": 1.871479201552037, "learning_rate": 2.9837272792181316e-06, "loss": 0.5712, "step": 20985 }, { "epoch": 0.6431898982469045, "grad_norm": 2.0699458863876066, "learning_rate": 2.983273114728795e-06, "loss": 0.6691, "step": 20986 }, { "epoch": 0.6432205467696457, "grad_norm": 1.8057510151141931, "learning_rate": 2.9828189701107848e-06, "loss": 0.6485, "step": 20987 }, { "epoch": 0.643251195292387, "grad_norm": 2.0309236753783857, "learning_rate": 2.982364845368576e-06, "loss": 0.643, "step": 20988 }, { "epoch": 0.6432818438151281, "grad_norm": 1.9728257654397194, "learning_rate": 2.9819107405066407e-06, "loss": 0.5737, "step": 20989 }, { "epoch": 0.6433124923378694, "grad_norm": 2.062404089591156, "learning_rate": 2.9814566555294567e-06, "loss": 0.6754, "step": 20990 }, { "epoch": 0.6433431408606105, "grad_norm": 1.7222001229284045, "learning_rate": 2.9810025904414962e-06, "loss": 0.5925, "step": 20991 }, { "epoch": 0.6433737893833518, "grad_norm": 1.8642275509272657, "learning_rate": 2.980548545247234e-06, "loss": 0.5887, "step": 20992 }, { "epoch": 0.6434044379060929, "grad_norm": 1.8696884984116773, "learning_rate": 2.9800945199511453e-06, "loss": 0.6272, "step": 20993 }, { "epoch": 0.6434350864288342, "grad_norm": 1.887812694099518, "learning_rate": 2.979640514557699e-06, "loss": 0.6563, "step": 20994 }, { "epoch": 0.6434657349515753, "grad_norm": 0.8029272613027989, "learning_rate": 2.979186529071374e-06, "loss": 0.4192, "step": 20995 }, { "epoch": 0.6434963834743166, "grad_norm": 1.8495311343219827, "learning_rate": 2.978732563496641e-06, "loss": 0.6538, "step": 20996 }, { "epoch": 0.6435270319970577, "grad_norm": 1.796215339550137, "learning_rate": 2.978278617837972e-06, "loss": 0.5905, "step": 20997 }, { "epoch": 0.6435576805197989, "grad_norm": 2.103815345328435, "learning_rate": 2.9778246920998418e-06, "loss": 0.4832, "step": 20998 }, { "epoch": 0.6435883290425402, "grad_norm": 1.6652219151860823, "learning_rate": 2.9773707862867217e-06, "loss": 0.5407, "step": 20999 }, { "epoch": 0.6436189775652813, "grad_norm": 1.6586337788511814, "learning_rate": 2.976916900403085e-06, "loss": 0.6027, "step": 21000 }, { "epoch": 0.6436496260880226, "grad_norm": 1.668081363949488, "learning_rate": 2.976463034453404e-06, "loss": 0.5888, "step": 21001 }, { "epoch": 0.6436802746107637, "grad_norm": 1.837330276898257, "learning_rate": 2.9760091884421493e-06, "loss": 0.5971, "step": 21002 }, { "epoch": 0.643710923133505, "grad_norm": 1.481435085621304, "learning_rate": 2.9755553623737952e-06, "loss": 0.532, "step": 21003 }, { "epoch": 0.6437415716562461, "grad_norm": 1.8851120271193418, "learning_rate": 2.975101556252814e-06, "loss": 0.5851, "step": 21004 }, { "epoch": 0.6437722201789874, "grad_norm": 0.7575138331686915, "learning_rate": 2.9746477700836717e-06, "loss": 0.4058, "step": 21005 }, { "epoch": 0.6438028687017285, "grad_norm": 1.659849862762036, "learning_rate": 2.974194003870845e-06, "loss": 0.6655, "step": 21006 }, { "epoch": 0.6438335172244698, "grad_norm": 1.7362162764906448, "learning_rate": 2.9737402576188036e-06, "loss": 0.642, "step": 21007 }, { "epoch": 0.643864165747211, "grad_norm": 1.6069120209005598, "learning_rate": 2.9732865313320157e-06, "loss": 0.5601, "step": 21008 }, { "epoch": 0.6438948142699522, "grad_norm": 2.1638853066304398, "learning_rate": 2.9728328250149557e-06, "loss": 0.5607, "step": 21009 }, { "epoch": 0.6439254627926934, "grad_norm": 1.817278597070588, "learning_rate": 2.9723791386720915e-06, "loss": 0.6707, "step": 21010 }, { "epoch": 0.6439561113154346, "grad_norm": 1.7070496186017514, "learning_rate": 2.9719254723078956e-06, "loss": 0.5593, "step": 21011 }, { "epoch": 0.6439867598381758, "grad_norm": 2.0322899174954667, "learning_rate": 2.9714718259268383e-06, "loss": 0.6211, "step": 21012 }, { "epoch": 0.644017408360917, "grad_norm": 1.7190635619277534, "learning_rate": 2.9710181995333843e-06, "loss": 0.5995, "step": 21013 }, { "epoch": 0.6440480568836582, "grad_norm": 1.681527425184132, "learning_rate": 2.9705645931320103e-06, "loss": 0.5375, "step": 21014 }, { "epoch": 0.6440787054063994, "grad_norm": 0.7992014039451616, "learning_rate": 2.970111006727182e-06, "loss": 0.4187, "step": 21015 }, { "epoch": 0.6441093539291406, "grad_norm": 2.2078089463085044, "learning_rate": 2.969657440323368e-06, "loss": 0.6325, "step": 21016 }, { "epoch": 0.6441400024518819, "grad_norm": 1.837136099941989, "learning_rate": 2.96920389392504e-06, "loss": 0.6531, "step": 21017 }, { "epoch": 0.644170650974623, "grad_norm": 0.8417423358237371, "learning_rate": 2.9687503675366643e-06, "loss": 0.3978, "step": 21018 }, { "epoch": 0.6442012994973643, "grad_norm": 1.7010808253545409, "learning_rate": 2.9682968611627116e-06, "loss": 0.5782, "step": 21019 }, { "epoch": 0.6442319480201054, "grad_norm": 2.0145115516314505, "learning_rate": 2.9678433748076508e-06, "loss": 0.6721, "step": 21020 }, { "epoch": 0.6442625965428467, "grad_norm": 1.7609762204043515, "learning_rate": 2.9673899084759474e-06, "loss": 0.7038, "step": 21021 }, { "epoch": 0.6442932450655878, "grad_norm": 1.8833348522531936, "learning_rate": 2.9669364621720725e-06, "loss": 0.6267, "step": 21022 }, { "epoch": 0.6443238935883291, "grad_norm": 1.7571005522085357, "learning_rate": 2.9664830359004937e-06, "loss": 0.6155, "step": 21023 }, { "epoch": 0.6443545421110702, "grad_norm": 1.679236796513888, "learning_rate": 2.9660296296656754e-06, "loss": 0.6345, "step": 21024 }, { "epoch": 0.6443851906338115, "grad_norm": 0.8220455114531299, "learning_rate": 2.9655762434720904e-06, "loss": 0.4158, "step": 21025 }, { "epoch": 0.6444158391565527, "grad_norm": 1.6714165671185939, "learning_rate": 2.9651228773242015e-06, "loss": 0.4912, "step": 21026 }, { "epoch": 0.6444464876792939, "grad_norm": 2.2245758705687066, "learning_rate": 2.9646695312264774e-06, "loss": 0.62, "step": 21027 }, { "epoch": 0.6444771362020351, "grad_norm": 1.8707654506106925, "learning_rate": 2.9642162051833858e-06, "loss": 0.5947, "step": 21028 }, { "epoch": 0.6445077847247762, "grad_norm": 1.6659198150010934, "learning_rate": 2.9637628991993917e-06, "loss": 0.6482, "step": 21029 }, { "epoch": 0.6445384332475175, "grad_norm": 1.7606111501557313, "learning_rate": 2.9633096132789636e-06, "loss": 0.5771, "step": 21030 }, { "epoch": 0.6445690817702586, "grad_norm": 0.7673408325159138, "learning_rate": 2.962856347426567e-06, "loss": 0.4249, "step": 21031 }, { "epoch": 0.6445997302929999, "grad_norm": 1.6904308169989124, "learning_rate": 2.962403101646667e-06, "loss": 0.5321, "step": 21032 }, { "epoch": 0.644630378815741, "grad_norm": 0.7966371976847993, "learning_rate": 2.9619498759437325e-06, "loss": 0.4277, "step": 21033 }, { "epoch": 0.6446610273384823, "grad_norm": 1.8285068551912063, "learning_rate": 2.9614966703222274e-06, "loss": 0.6849, "step": 21034 }, { "epoch": 0.6446916758612234, "grad_norm": 1.5962495399277816, "learning_rate": 2.9610434847866144e-06, "loss": 0.6014, "step": 21035 }, { "epoch": 0.6447223243839647, "grad_norm": 1.7048864687640142, "learning_rate": 2.9605903193413647e-06, "loss": 0.6133, "step": 21036 }, { "epoch": 0.6447529729067059, "grad_norm": 1.6343121209170526, "learning_rate": 2.9601371739909374e-06, "loss": 0.5328, "step": 21037 }, { "epoch": 0.6447836214294471, "grad_norm": 0.7632541864386995, "learning_rate": 2.9596840487398036e-06, "loss": 0.3986, "step": 21038 }, { "epoch": 0.6448142699521883, "grad_norm": 1.9160848562733785, "learning_rate": 2.959230943592424e-06, "loss": 0.6045, "step": 21039 }, { "epoch": 0.6448449184749295, "grad_norm": 1.8602872499721659, "learning_rate": 2.9587778585532623e-06, "loss": 0.5988, "step": 21040 }, { "epoch": 0.6448755669976707, "grad_norm": 1.6533398596883757, "learning_rate": 2.9583247936267863e-06, "loss": 0.5197, "step": 21041 }, { "epoch": 0.6449062155204119, "grad_norm": 1.5787614593967725, "learning_rate": 2.957871748817458e-06, "loss": 0.5951, "step": 21042 }, { "epoch": 0.6449368640431531, "grad_norm": 1.8778732925876302, "learning_rate": 2.9574187241297413e-06, "loss": 0.6716, "step": 21043 }, { "epoch": 0.6449675125658944, "grad_norm": 2.0846205184740856, "learning_rate": 2.9569657195681013e-06, "loss": 0.746, "step": 21044 }, { "epoch": 0.6449981610886355, "grad_norm": 1.8550061271525367, "learning_rate": 2.9565127351370005e-06, "loss": 0.6663, "step": 21045 }, { "epoch": 0.6450288096113768, "grad_norm": 1.7876983778838194, "learning_rate": 2.956059770840902e-06, "loss": 0.559, "step": 21046 }, { "epoch": 0.6450594581341179, "grad_norm": 1.6636222606051054, "learning_rate": 2.955606826684272e-06, "loss": 0.6464, "step": 21047 }, { "epoch": 0.6450901066568592, "grad_norm": 0.8503492276306204, "learning_rate": 2.9551539026715675e-06, "loss": 0.4274, "step": 21048 }, { "epoch": 0.6451207551796003, "grad_norm": 1.9464427995456908, "learning_rate": 2.954700998807258e-06, "loss": 0.7069, "step": 21049 }, { "epoch": 0.6451514037023416, "grad_norm": 1.5290711532276853, "learning_rate": 2.9542481150958013e-06, "loss": 0.5653, "step": 21050 }, { "epoch": 0.6451820522250827, "grad_norm": 1.8557137370110337, "learning_rate": 2.9537952515416613e-06, "loss": 0.5861, "step": 21051 }, { "epoch": 0.645212700747824, "grad_norm": 1.7367645016555997, "learning_rate": 2.9533424081493e-06, "loss": 0.5616, "step": 21052 }, { "epoch": 0.6452433492705651, "grad_norm": 1.5950392024213773, "learning_rate": 2.952889584923181e-06, "loss": 0.6075, "step": 21053 }, { "epoch": 0.6452739977933064, "grad_norm": 0.7808104690278662, "learning_rate": 2.952436781867762e-06, "loss": 0.4227, "step": 21054 }, { "epoch": 0.6453046463160476, "grad_norm": 1.7093300988571691, "learning_rate": 2.95198399898751e-06, "loss": 0.684, "step": 21055 }, { "epoch": 0.6453352948387888, "grad_norm": 1.8579296474286733, "learning_rate": 2.951531236286882e-06, "loss": 0.5417, "step": 21056 }, { "epoch": 0.64536594336153, "grad_norm": 0.7704892286651932, "learning_rate": 2.951078493770343e-06, "loss": 0.4151, "step": 21057 }, { "epoch": 0.6453965918842712, "grad_norm": 0.7724461603744557, "learning_rate": 2.9506257714423504e-06, "loss": 0.4153, "step": 21058 }, { "epoch": 0.6454272404070124, "grad_norm": 1.7444206583517092, "learning_rate": 2.9501730693073656e-06, "loss": 0.6188, "step": 21059 }, { "epoch": 0.6454578889297535, "grad_norm": 1.7655774389024705, "learning_rate": 2.949720387369851e-06, "loss": 0.6805, "step": 21060 }, { "epoch": 0.6454885374524948, "grad_norm": 0.7674097070876732, "learning_rate": 2.9492677256342663e-06, "loss": 0.421, "step": 21061 }, { "epoch": 0.6455191859752359, "grad_norm": 1.7022899712800552, "learning_rate": 2.94881508410507e-06, "loss": 0.5289, "step": 21062 }, { "epoch": 0.6455498344979772, "grad_norm": 1.9332852544526309, "learning_rate": 2.9483624627867246e-06, "loss": 0.6355, "step": 21063 }, { "epoch": 0.6455804830207184, "grad_norm": 1.7329857223468927, "learning_rate": 2.9479098616836876e-06, "loss": 0.583, "step": 21064 }, { "epoch": 0.6456111315434596, "grad_norm": 2.039179570074876, "learning_rate": 2.947457280800421e-06, "loss": 0.7315, "step": 21065 }, { "epoch": 0.6456417800662008, "grad_norm": 0.741342308490082, "learning_rate": 2.9470047201413843e-06, "loss": 0.3865, "step": 21066 }, { "epoch": 0.645672428588942, "grad_norm": 2.064885180566934, "learning_rate": 2.946552179711032e-06, "loss": 0.5963, "step": 21067 }, { "epoch": 0.6457030771116832, "grad_norm": 1.676821279954323, "learning_rate": 2.94609965951383e-06, "loss": 0.5353, "step": 21068 }, { "epoch": 0.6457337256344244, "grad_norm": 1.8785115232010858, "learning_rate": 2.9456471595542317e-06, "loss": 0.6068, "step": 21069 }, { "epoch": 0.6457643741571656, "grad_norm": 1.5440533591328476, "learning_rate": 2.945194679836697e-06, "loss": 0.5293, "step": 21070 }, { "epoch": 0.6457950226799068, "grad_norm": 1.807335537109018, "learning_rate": 2.944742220365686e-06, "loss": 0.6429, "step": 21071 }, { "epoch": 0.645825671202648, "grad_norm": 1.6436060433569277, "learning_rate": 2.9442897811456545e-06, "loss": 0.5353, "step": 21072 }, { "epoch": 0.6458563197253893, "grad_norm": 1.5641597428820417, "learning_rate": 2.943837362181063e-06, "loss": 0.5808, "step": 21073 }, { "epoch": 0.6458869682481304, "grad_norm": 1.9725553625037746, "learning_rate": 2.9433849634763678e-06, "loss": 0.6936, "step": 21074 }, { "epoch": 0.6459176167708717, "grad_norm": 1.8841043067230283, "learning_rate": 2.9429325850360257e-06, "loss": 0.572, "step": 21075 }, { "epoch": 0.6459482652936128, "grad_norm": 1.7218583637982035, "learning_rate": 2.942480226864497e-06, "loss": 0.6084, "step": 21076 }, { "epoch": 0.6459789138163541, "grad_norm": 1.6445157216200248, "learning_rate": 2.9420278889662366e-06, "loss": 0.6416, "step": 21077 }, { "epoch": 0.6460095623390952, "grad_norm": 1.7477090042604655, "learning_rate": 2.9415755713457008e-06, "loss": 0.5699, "step": 21078 }, { "epoch": 0.6460402108618365, "grad_norm": 1.8170711622752906, "learning_rate": 2.9411232740073493e-06, "loss": 0.6456, "step": 21079 }, { "epoch": 0.6460708593845776, "grad_norm": 1.8927656138916304, "learning_rate": 2.9406709969556356e-06, "loss": 0.5613, "step": 21080 }, { "epoch": 0.6461015079073189, "grad_norm": 1.9687504339570812, "learning_rate": 2.940218740195018e-06, "loss": 0.607, "step": 21081 }, { "epoch": 0.6461321564300601, "grad_norm": 1.9200141980377499, "learning_rate": 2.9397665037299518e-06, "loss": 0.5819, "step": 21082 }, { "epoch": 0.6461628049528013, "grad_norm": 0.8087039149161467, "learning_rate": 2.939314287564893e-06, "loss": 0.4262, "step": 21083 }, { "epoch": 0.6461934534755425, "grad_norm": 1.8286800537738268, "learning_rate": 2.9388620917042987e-06, "loss": 0.5639, "step": 21084 }, { "epoch": 0.6462241019982837, "grad_norm": 1.7632521463527175, "learning_rate": 2.9384099161526236e-06, "loss": 0.6155, "step": 21085 }, { "epoch": 0.6462547505210249, "grad_norm": 1.9858184747728163, "learning_rate": 2.937957760914322e-06, "loss": 0.6567, "step": 21086 }, { "epoch": 0.6462853990437661, "grad_norm": 1.9522122950975636, "learning_rate": 2.9375056259938507e-06, "loss": 0.6355, "step": 21087 }, { "epoch": 0.6463160475665073, "grad_norm": 1.6124045825217008, "learning_rate": 2.9370535113956654e-06, "loss": 0.6058, "step": 21088 }, { "epoch": 0.6463466960892486, "grad_norm": 1.8893855618264075, "learning_rate": 2.9366014171242173e-06, "loss": 0.5698, "step": 21089 }, { "epoch": 0.6463773446119897, "grad_norm": 1.9129041004145912, "learning_rate": 2.936149343183966e-06, "loss": 0.6013, "step": 21090 }, { "epoch": 0.6464079931347309, "grad_norm": 1.573051642312755, "learning_rate": 2.93569728957936e-06, "loss": 0.6247, "step": 21091 }, { "epoch": 0.6464386416574721, "grad_norm": 2.172468182780583, "learning_rate": 2.9352452563148605e-06, "loss": 0.534, "step": 21092 }, { "epoch": 0.6464692901802133, "grad_norm": 1.7850522317934556, "learning_rate": 2.934793243394916e-06, "loss": 0.5695, "step": 21093 }, { "epoch": 0.6464999387029545, "grad_norm": 1.8810438865020465, "learning_rate": 2.9343412508239817e-06, "loss": 0.5617, "step": 21094 }, { "epoch": 0.6465305872256957, "grad_norm": 1.755742512039154, "learning_rate": 2.9338892786065123e-06, "loss": 0.5459, "step": 21095 }, { "epoch": 0.6465612357484369, "grad_norm": 1.7562126765887414, "learning_rate": 2.93343732674696e-06, "loss": 0.6225, "step": 21096 }, { "epoch": 0.6465918842711781, "grad_norm": 1.7439500190591355, "learning_rate": 2.932985395249778e-06, "loss": 0.6106, "step": 21097 }, { "epoch": 0.6466225327939193, "grad_norm": 1.9021496397015736, "learning_rate": 2.932533484119421e-06, "loss": 0.6022, "step": 21098 }, { "epoch": 0.6466531813166605, "grad_norm": 1.6918649308941631, "learning_rate": 2.932081593360341e-06, "loss": 0.5448, "step": 21099 }, { "epoch": 0.6466838298394018, "grad_norm": 1.7964940530503002, "learning_rate": 2.9316297229769873e-06, "loss": 0.6483, "step": 21100 }, { "epoch": 0.6467144783621429, "grad_norm": 1.8754506456607745, "learning_rate": 2.9311778729738183e-06, "loss": 0.5801, "step": 21101 }, { "epoch": 0.6467451268848842, "grad_norm": 0.8133008321127955, "learning_rate": 2.930726043355281e-06, "loss": 0.4237, "step": 21102 }, { "epoch": 0.6467757754076253, "grad_norm": 1.8481924912387875, "learning_rate": 2.9302742341258306e-06, "loss": 0.622, "step": 21103 }, { "epoch": 0.6468064239303666, "grad_norm": 1.6480315725387804, "learning_rate": 2.9298224452899177e-06, "loss": 0.6296, "step": 21104 }, { "epoch": 0.6468370724531077, "grad_norm": 1.6148978585258464, "learning_rate": 2.9293706768519925e-06, "loss": 0.6, "step": 21105 }, { "epoch": 0.646867720975849, "grad_norm": 1.8354928195784268, "learning_rate": 2.9289189288165098e-06, "loss": 0.5793, "step": 21106 }, { "epoch": 0.6468983694985901, "grad_norm": 2.012238370347361, "learning_rate": 2.928467201187918e-06, "loss": 0.5938, "step": 21107 }, { "epoch": 0.6469290180213314, "grad_norm": 0.7675828883084757, "learning_rate": 2.928015493970668e-06, "loss": 0.4207, "step": 21108 }, { "epoch": 0.6469596665440726, "grad_norm": 0.8399874306463831, "learning_rate": 2.9275638071692137e-06, "loss": 0.4124, "step": 21109 }, { "epoch": 0.6469903150668138, "grad_norm": 1.5121660365564231, "learning_rate": 2.9271121407879997e-06, "loss": 0.5803, "step": 21110 }, { "epoch": 0.647020963589555, "grad_norm": 1.7434384686928905, "learning_rate": 2.9266604948314843e-06, "loss": 0.683, "step": 21111 }, { "epoch": 0.6470516121122962, "grad_norm": 1.808521531234815, "learning_rate": 2.9262088693041114e-06, "loss": 0.5951, "step": 21112 }, { "epoch": 0.6470822606350374, "grad_norm": 1.8915911092996693, "learning_rate": 2.9257572642103327e-06, "loss": 0.6614, "step": 21113 }, { "epoch": 0.6471129091577786, "grad_norm": 1.855243921515353, "learning_rate": 2.9253056795545996e-06, "loss": 0.6377, "step": 21114 }, { "epoch": 0.6471435576805198, "grad_norm": 1.7102678305042989, "learning_rate": 2.92485411534136e-06, "loss": 0.5442, "step": 21115 }, { "epoch": 0.647174206203261, "grad_norm": 1.780200960569097, "learning_rate": 2.9244025715750634e-06, "loss": 0.6373, "step": 21116 }, { "epoch": 0.6472048547260022, "grad_norm": 1.735541583037074, "learning_rate": 2.9239510482601596e-06, "loss": 0.6502, "step": 21117 }, { "epoch": 0.6472355032487435, "grad_norm": 0.8467610629161428, "learning_rate": 2.9234995454010968e-06, "loss": 0.4469, "step": 21118 }, { "epoch": 0.6472661517714846, "grad_norm": 0.7852418083843932, "learning_rate": 2.923048063002325e-06, "loss": 0.415, "step": 21119 }, { "epoch": 0.6472968002942259, "grad_norm": 2.077640419924069, "learning_rate": 2.9225966010682933e-06, "loss": 0.5951, "step": 21120 }, { "epoch": 0.647327448816967, "grad_norm": 1.8722041886870604, "learning_rate": 2.9221451596034457e-06, "loss": 0.6656, "step": 21121 }, { "epoch": 0.6473580973397082, "grad_norm": 2.0673057279004503, "learning_rate": 2.921693738612236e-06, "loss": 0.62, "step": 21122 }, { "epoch": 0.6473887458624494, "grad_norm": 1.7577717962595458, "learning_rate": 2.9212423380991093e-06, "loss": 0.5959, "step": 21123 }, { "epoch": 0.6474193943851906, "grad_norm": 1.639820658626309, "learning_rate": 2.9207909580685123e-06, "loss": 0.5838, "step": 21124 }, { "epoch": 0.6474500429079318, "grad_norm": 1.860991513179502, "learning_rate": 2.9203395985248954e-06, "loss": 0.552, "step": 21125 }, { "epoch": 0.647480691430673, "grad_norm": 1.8683505662772788, "learning_rate": 2.919888259472704e-06, "loss": 0.6343, "step": 21126 }, { "epoch": 0.6475113399534143, "grad_norm": 1.5699244469001552, "learning_rate": 2.919436940916386e-06, "loss": 0.5847, "step": 21127 }, { "epoch": 0.6475419884761554, "grad_norm": 1.6017646491597, "learning_rate": 2.918985642860388e-06, "loss": 0.6283, "step": 21128 }, { "epoch": 0.6475726369988967, "grad_norm": 1.7878410336250978, "learning_rate": 2.9185343653091557e-06, "loss": 0.5069, "step": 21129 }, { "epoch": 0.6476032855216378, "grad_norm": 1.8608042688921929, "learning_rate": 2.9180831082671396e-06, "loss": 0.6307, "step": 21130 }, { "epoch": 0.6476339340443791, "grad_norm": 0.8599520112500522, "learning_rate": 2.9176318717387808e-06, "loss": 0.4044, "step": 21131 }, { "epoch": 0.6476645825671202, "grad_norm": 1.611384541410393, "learning_rate": 2.917180655728529e-06, "loss": 0.6131, "step": 21132 }, { "epoch": 0.6476952310898615, "grad_norm": 1.7721456486040053, "learning_rate": 2.91672946024083e-06, "loss": 0.5455, "step": 21133 }, { "epoch": 0.6477258796126026, "grad_norm": 1.9095913117728673, "learning_rate": 2.9162782852801268e-06, "loss": 0.5474, "step": 21134 }, { "epoch": 0.6477565281353439, "grad_norm": 1.7384303483988046, "learning_rate": 2.9158271308508667e-06, "loss": 0.6695, "step": 21135 }, { "epoch": 0.647787176658085, "grad_norm": 0.8121881124974981, "learning_rate": 2.915375996957498e-06, "loss": 0.4109, "step": 21136 }, { "epoch": 0.6478178251808263, "grad_norm": 1.604354354405965, "learning_rate": 2.91492488360446e-06, "loss": 0.5956, "step": 21137 }, { "epoch": 0.6478484737035675, "grad_norm": 1.942405227634912, "learning_rate": 2.9144737907962036e-06, "loss": 0.6558, "step": 21138 }, { "epoch": 0.6478791222263087, "grad_norm": 1.9170202817936783, "learning_rate": 2.914022718537168e-06, "loss": 0.5813, "step": 21139 }, { "epoch": 0.6479097707490499, "grad_norm": 1.9090023908856981, "learning_rate": 2.9135716668318e-06, "loss": 0.7029, "step": 21140 }, { "epoch": 0.6479404192717911, "grad_norm": 0.7880923781902116, "learning_rate": 2.9131206356845463e-06, "loss": 0.4069, "step": 21141 }, { "epoch": 0.6479710677945323, "grad_norm": 0.7758152766280073, "learning_rate": 2.912669625099847e-06, "loss": 0.4276, "step": 21142 }, { "epoch": 0.6480017163172735, "grad_norm": 1.8069106612031296, "learning_rate": 2.912218635082148e-06, "loss": 0.5428, "step": 21143 }, { "epoch": 0.6480323648400147, "grad_norm": 1.6486191954485507, "learning_rate": 2.911767665635895e-06, "loss": 0.6203, "step": 21144 }, { "epoch": 0.648063013362756, "grad_norm": 1.8802256165336388, "learning_rate": 2.9113167167655277e-06, "loss": 0.6457, "step": 21145 }, { "epoch": 0.6480936618854971, "grad_norm": 1.7027322461471488, "learning_rate": 2.9108657884754908e-06, "loss": 0.6454, "step": 21146 }, { "epoch": 0.6481243104082384, "grad_norm": 1.982031393459396, "learning_rate": 2.9104148807702302e-06, "loss": 0.726, "step": 21147 }, { "epoch": 0.6481549589309795, "grad_norm": 1.704862750619392, "learning_rate": 2.9099639936541845e-06, "loss": 0.6609, "step": 21148 }, { "epoch": 0.6481856074537208, "grad_norm": 1.6929620249277366, "learning_rate": 2.9095131271318e-06, "loss": 0.4622, "step": 21149 }, { "epoch": 0.6482162559764619, "grad_norm": 1.700069254323458, "learning_rate": 2.909062281207515e-06, "loss": 0.6096, "step": 21150 }, { "epoch": 0.6482469044992032, "grad_norm": 1.895587541348312, "learning_rate": 2.908611455885775e-06, "loss": 0.646, "step": 21151 }, { "epoch": 0.6482775530219443, "grad_norm": 1.6285748442183108, "learning_rate": 2.9081606511710236e-06, "loss": 0.5956, "step": 21152 }, { "epoch": 0.6483082015446855, "grad_norm": 1.7471545530695725, "learning_rate": 2.9077098670676983e-06, "loss": 0.5917, "step": 21153 }, { "epoch": 0.6483388500674268, "grad_norm": 1.7607605239051305, "learning_rate": 2.9072591035802435e-06, "loss": 0.5505, "step": 21154 }, { "epoch": 0.6483694985901679, "grad_norm": 1.9027771933896056, "learning_rate": 2.9068083607131015e-06, "loss": 0.5425, "step": 21155 }, { "epoch": 0.6484001471129092, "grad_norm": 1.7423160500742827, "learning_rate": 2.9063576384707103e-06, "loss": 0.5722, "step": 21156 }, { "epoch": 0.6484307956356503, "grad_norm": 1.809088495803579, "learning_rate": 2.905906936857513e-06, "loss": 0.6681, "step": 21157 }, { "epoch": 0.6484614441583916, "grad_norm": 1.7987354788663625, "learning_rate": 2.9054562558779524e-06, "loss": 0.7025, "step": 21158 }, { "epoch": 0.6484920926811327, "grad_norm": 2.1503475656295308, "learning_rate": 2.905005595536465e-06, "loss": 0.6516, "step": 21159 }, { "epoch": 0.648522741203874, "grad_norm": 1.9583277809473716, "learning_rate": 2.9045549558374965e-06, "loss": 0.5306, "step": 21160 }, { "epoch": 0.6485533897266151, "grad_norm": 1.7166517404883155, "learning_rate": 2.9041043367854815e-06, "loss": 0.6016, "step": 21161 }, { "epoch": 0.6485840382493564, "grad_norm": 2.032727493643695, "learning_rate": 2.903653738384863e-06, "loss": 0.6567, "step": 21162 }, { "epoch": 0.6486146867720975, "grad_norm": 1.7719016219203076, "learning_rate": 2.9032031606400825e-06, "loss": 0.6314, "step": 21163 }, { "epoch": 0.6486453352948388, "grad_norm": 1.7362033803184573, "learning_rate": 2.9027526035555754e-06, "loss": 0.5182, "step": 21164 }, { "epoch": 0.64867598381758, "grad_norm": 1.5613111881475064, "learning_rate": 2.9023020671357837e-06, "loss": 0.5889, "step": 21165 }, { "epoch": 0.6487066323403212, "grad_norm": 1.9237689323728724, "learning_rate": 2.9018515513851487e-06, "loss": 0.6048, "step": 21166 }, { "epoch": 0.6487372808630624, "grad_norm": 1.8604632758553856, "learning_rate": 2.901401056308105e-06, "loss": 0.7054, "step": 21167 }, { "epoch": 0.6487679293858036, "grad_norm": 1.9474732301319921, "learning_rate": 2.9009505819090934e-06, "loss": 0.6712, "step": 21168 }, { "epoch": 0.6487985779085448, "grad_norm": 1.7387950692233634, "learning_rate": 2.9005001281925548e-06, "loss": 0.5382, "step": 21169 }, { "epoch": 0.648829226431286, "grad_norm": 1.7994046859184873, "learning_rate": 2.9000496951629244e-06, "loss": 0.685, "step": 21170 }, { "epoch": 0.6488598749540272, "grad_norm": 1.7989103298272864, "learning_rate": 2.8995992828246428e-06, "loss": 0.5844, "step": 21171 }, { "epoch": 0.6488905234767685, "grad_norm": 1.7989579418927184, "learning_rate": 2.8991488911821455e-06, "loss": 0.6086, "step": 21172 }, { "epoch": 0.6489211719995096, "grad_norm": 1.7615356759731056, "learning_rate": 2.898698520239871e-06, "loss": 0.6283, "step": 21173 }, { "epoch": 0.6489518205222509, "grad_norm": 1.649936277201448, "learning_rate": 2.8982481700022604e-06, "loss": 0.6339, "step": 21174 }, { "epoch": 0.648982469044992, "grad_norm": 1.716035554710949, "learning_rate": 2.8977978404737458e-06, "loss": 0.6125, "step": 21175 }, { "epoch": 0.6490131175677333, "grad_norm": 1.8757485054746303, "learning_rate": 2.8973475316587667e-06, "loss": 0.6171, "step": 21176 }, { "epoch": 0.6490437660904744, "grad_norm": 1.7267486242738959, "learning_rate": 2.8968972435617624e-06, "loss": 0.634, "step": 21177 }, { "epoch": 0.6490744146132157, "grad_norm": 1.8470426653077208, "learning_rate": 2.896446976187166e-06, "loss": 0.5931, "step": 21178 }, { "epoch": 0.6491050631359568, "grad_norm": 0.8611766667368776, "learning_rate": 2.8959967295394183e-06, "loss": 0.4365, "step": 21179 }, { "epoch": 0.6491357116586981, "grad_norm": 1.7686044124546272, "learning_rate": 2.8955465036229503e-06, "loss": 0.6226, "step": 21180 }, { "epoch": 0.6491663601814393, "grad_norm": 1.575978917546248, "learning_rate": 2.8950962984422015e-06, "loss": 0.5646, "step": 21181 }, { "epoch": 0.6491970087041805, "grad_norm": 1.4925726885758075, "learning_rate": 2.894646114001609e-06, "loss": 0.4188, "step": 21182 }, { "epoch": 0.6492276572269217, "grad_norm": 1.598731890765702, "learning_rate": 2.8941959503056053e-06, "loss": 0.5492, "step": 21183 }, { "epoch": 0.6492583057496628, "grad_norm": 1.9705932064123075, "learning_rate": 2.8937458073586276e-06, "loss": 0.5887, "step": 21184 }, { "epoch": 0.6492889542724041, "grad_norm": 1.7816568043978585, "learning_rate": 2.893295685165114e-06, "loss": 0.604, "step": 21185 }, { "epoch": 0.6493196027951452, "grad_norm": 1.6858637421507754, "learning_rate": 2.8928455837294944e-06, "loss": 0.5467, "step": 21186 }, { "epoch": 0.6493502513178865, "grad_norm": 1.8989091993893334, "learning_rate": 2.892395503056207e-06, "loss": 0.5998, "step": 21187 }, { "epoch": 0.6493808998406276, "grad_norm": 0.8110550240504627, "learning_rate": 2.891945443149687e-06, "loss": 0.4099, "step": 21188 }, { "epoch": 0.6494115483633689, "grad_norm": 1.8799060395671585, "learning_rate": 2.891495404014366e-06, "loss": 0.473, "step": 21189 }, { "epoch": 0.64944219688611, "grad_norm": 1.7422757295252915, "learning_rate": 2.891045385654683e-06, "loss": 0.5963, "step": 21190 }, { "epoch": 0.6494728454088513, "grad_norm": 1.5385168079886702, "learning_rate": 2.8905953880750646e-06, "loss": 0.6162, "step": 21191 }, { "epoch": 0.6495034939315925, "grad_norm": 0.799519465808415, "learning_rate": 2.8901454112799543e-06, "loss": 0.4259, "step": 21192 }, { "epoch": 0.6495341424543337, "grad_norm": 1.7277339769676863, "learning_rate": 2.889695455273781e-06, "loss": 0.5723, "step": 21193 }, { "epoch": 0.6495647909770749, "grad_norm": 1.8321414596059333, "learning_rate": 2.889245520060976e-06, "loss": 0.6246, "step": 21194 }, { "epoch": 0.6495954394998161, "grad_norm": 1.6956784863492766, "learning_rate": 2.8887956056459755e-06, "loss": 0.5601, "step": 21195 }, { "epoch": 0.6496260880225573, "grad_norm": 0.7587203388059279, "learning_rate": 2.8883457120332136e-06, "loss": 0.4251, "step": 21196 }, { "epoch": 0.6496567365452985, "grad_norm": 1.7883616665203155, "learning_rate": 2.8878958392271194e-06, "loss": 0.5282, "step": 21197 }, { "epoch": 0.6496873850680397, "grad_norm": 0.800220178420437, "learning_rate": 2.8874459872321283e-06, "loss": 0.4208, "step": 21198 }, { "epoch": 0.649718033590781, "grad_norm": 1.8058867057800312, "learning_rate": 2.886996156052673e-06, "loss": 0.598, "step": 21199 }, { "epoch": 0.6497486821135221, "grad_norm": 1.7320011961183668, "learning_rate": 2.8865463456931865e-06, "loss": 0.6444, "step": 21200 }, { "epoch": 0.6497793306362634, "grad_norm": 1.8427165299402053, "learning_rate": 2.886096556158099e-06, "loss": 0.5632, "step": 21201 }, { "epoch": 0.6498099791590045, "grad_norm": 1.8284642128628787, "learning_rate": 2.88564678745184e-06, "loss": 0.5987, "step": 21202 }, { "epoch": 0.6498406276817458, "grad_norm": 1.6419436000731673, "learning_rate": 2.8851970395788474e-06, "loss": 0.5843, "step": 21203 }, { "epoch": 0.6498712762044869, "grad_norm": 1.5981116149353911, "learning_rate": 2.8847473125435497e-06, "loss": 0.6088, "step": 21204 }, { "epoch": 0.6499019247272282, "grad_norm": 1.7118018300186095, "learning_rate": 2.884297606350377e-06, "loss": 0.5668, "step": 21205 }, { "epoch": 0.6499325732499693, "grad_norm": 1.6861687739202071, "learning_rate": 2.8838479210037603e-06, "loss": 0.6042, "step": 21206 }, { "epoch": 0.6499632217727106, "grad_norm": 1.685440166693117, "learning_rate": 2.883398256508133e-06, "loss": 0.542, "step": 21207 }, { "epoch": 0.6499938702954517, "grad_norm": 1.784920782231317, "learning_rate": 2.8829486128679234e-06, "loss": 0.6449, "step": 21208 }, { "epoch": 0.650024518818193, "grad_norm": 1.740621647039357, "learning_rate": 2.8824989900875623e-06, "loss": 0.5856, "step": 21209 }, { "epoch": 0.6500551673409342, "grad_norm": 1.602555210302882, "learning_rate": 2.8820493881714807e-06, "loss": 0.5886, "step": 21210 }, { "epoch": 0.6500858158636754, "grad_norm": 1.49393186501103, "learning_rate": 2.8815998071241097e-06, "loss": 0.4778, "step": 21211 }, { "epoch": 0.6501164643864166, "grad_norm": 2.0772169712941735, "learning_rate": 2.881150246949878e-06, "loss": 0.6532, "step": 21212 }, { "epoch": 0.6501471129091578, "grad_norm": 1.6153053139431912, "learning_rate": 2.880700707653211e-06, "loss": 0.5672, "step": 21213 }, { "epoch": 0.650177761431899, "grad_norm": 1.7516002040321517, "learning_rate": 2.8802511892385466e-06, "loss": 0.65, "step": 21214 }, { "epoch": 0.6502084099546401, "grad_norm": 1.6399470995009913, "learning_rate": 2.87980169171031e-06, "loss": 0.5751, "step": 21215 }, { "epoch": 0.6502390584773814, "grad_norm": 1.8344177127918808, "learning_rate": 2.879352215072927e-06, "loss": 0.5936, "step": 21216 }, { "epoch": 0.6502697070001225, "grad_norm": 0.8814447387557726, "learning_rate": 2.8789027593308295e-06, "loss": 0.4303, "step": 21217 }, { "epoch": 0.6503003555228638, "grad_norm": 1.643847818859935, "learning_rate": 2.878453324488446e-06, "loss": 0.6061, "step": 21218 }, { "epoch": 0.650331004045605, "grad_norm": 0.8341278583720135, "learning_rate": 2.878003910550206e-06, "loss": 0.4248, "step": 21219 }, { "epoch": 0.6503616525683462, "grad_norm": 1.7257604707586836, "learning_rate": 2.877554517520535e-06, "loss": 0.5552, "step": 21220 }, { "epoch": 0.6503923010910874, "grad_norm": 1.6141672823343647, "learning_rate": 2.8771051454038622e-06, "loss": 0.5126, "step": 21221 }, { "epoch": 0.6504229496138286, "grad_norm": 1.9471511245778435, "learning_rate": 2.876655794204618e-06, "loss": 0.6218, "step": 21222 }, { "epoch": 0.6504535981365698, "grad_norm": 1.8022541075280618, "learning_rate": 2.876206463927227e-06, "loss": 0.5669, "step": 21223 }, { "epoch": 0.650484246659311, "grad_norm": 1.7477643556336988, "learning_rate": 2.8757571545761152e-06, "loss": 0.5793, "step": 21224 }, { "epoch": 0.6505148951820522, "grad_norm": 1.6830370210107148, "learning_rate": 2.875307866155712e-06, "loss": 0.684, "step": 21225 }, { "epoch": 0.6505455437047934, "grad_norm": 1.7597900418226666, "learning_rate": 2.8748585986704437e-06, "loss": 0.5938, "step": 21226 }, { "epoch": 0.6505761922275346, "grad_norm": 2.1334000782459386, "learning_rate": 2.8744093521247396e-06, "loss": 0.7154, "step": 21227 }, { "epoch": 0.6506068407502759, "grad_norm": 1.8665082455807185, "learning_rate": 2.8739601265230216e-06, "loss": 0.6835, "step": 21228 }, { "epoch": 0.650637489273017, "grad_norm": 1.6899821178448424, "learning_rate": 2.873510921869719e-06, "loss": 0.5967, "step": 21229 }, { "epoch": 0.6506681377957583, "grad_norm": 1.9551793373273603, "learning_rate": 2.8730617381692583e-06, "loss": 0.5295, "step": 21230 }, { "epoch": 0.6506987863184994, "grad_norm": 1.8725811655307594, "learning_rate": 2.872612575426066e-06, "loss": 0.6987, "step": 21231 }, { "epoch": 0.6507294348412407, "grad_norm": 1.6799131357534627, "learning_rate": 2.8721634336445616e-06, "loss": 0.5779, "step": 21232 }, { "epoch": 0.6507600833639818, "grad_norm": 1.9195982480304454, "learning_rate": 2.871714312829179e-06, "loss": 0.6317, "step": 21233 }, { "epoch": 0.6507907318867231, "grad_norm": 1.7695712341826315, "learning_rate": 2.87126521298434e-06, "loss": 0.5855, "step": 21234 }, { "epoch": 0.6508213804094642, "grad_norm": 1.857968635188257, "learning_rate": 2.870816134114468e-06, "loss": 0.6945, "step": 21235 }, { "epoch": 0.6508520289322055, "grad_norm": 1.7349246287400557, "learning_rate": 2.8703670762239886e-06, "loss": 0.5465, "step": 21236 }, { "epoch": 0.6508826774549467, "grad_norm": 1.928458098893157, "learning_rate": 2.8699180393173275e-06, "loss": 0.65, "step": 21237 }, { "epoch": 0.6509133259776879, "grad_norm": 1.8367076171108876, "learning_rate": 2.8694690233989116e-06, "loss": 0.6777, "step": 21238 }, { "epoch": 0.6509439745004291, "grad_norm": 0.7894611929357419, "learning_rate": 2.86902002847316e-06, "loss": 0.4101, "step": 21239 }, { "epoch": 0.6509746230231703, "grad_norm": 2.1297880243960097, "learning_rate": 2.8685710545444996e-06, "loss": 0.5948, "step": 21240 }, { "epoch": 0.6510052715459115, "grad_norm": 1.7856966460464796, "learning_rate": 2.8681221016173554e-06, "loss": 0.5588, "step": 21241 }, { "epoch": 0.6510359200686527, "grad_norm": 1.6827954908615188, "learning_rate": 2.86767316969615e-06, "loss": 0.6482, "step": 21242 }, { "epoch": 0.6510665685913939, "grad_norm": 1.5325770105071128, "learning_rate": 2.867224258785303e-06, "loss": 0.7642, "step": 21243 }, { "epoch": 0.6510972171141352, "grad_norm": 1.6935520650756108, "learning_rate": 2.8667753688892442e-06, "loss": 0.5718, "step": 21244 }, { "epoch": 0.6511278656368763, "grad_norm": 1.7514405810737828, "learning_rate": 2.866326500012392e-06, "loss": 0.6971, "step": 21245 }, { "epoch": 0.6511585141596175, "grad_norm": 0.7555831316386609, "learning_rate": 2.865877652159172e-06, "loss": 0.3941, "step": 21246 }, { "epoch": 0.6511891626823587, "grad_norm": 1.8872497561424857, "learning_rate": 2.865428825334004e-06, "loss": 0.6136, "step": 21247 }, { "epoch": 0.6512198112050999, "grad_norm": 1.7010337812715528, "learning_rate": 2.864980019541312e-06, "loss": 0.5895, "step": 21248 }, { "epoch": 0.6512504597278411, "grad_norm": 1.8035856592868469, "learning_rate": 2.8645312347855204e-06, "loss": 0.6678, "step": 21249 }, { "epoch": 0.6512811082505823, "grad_norm": 1.969030452576208, "learning_rate": 2.8640824710710464e-06, "loss": 0.6398, "step": 21250 }, { "epoch": 0.6513117567733235, "grad_norm": 1.8479062458822282, "learning_rate": 2.8636337284023143e-06, "loss": 0.5462, "step": 21251 }, { "epoch": 0.6513424052960647, "grad_norm": 1.968232606651758, "learning_rate": 2.863185006783748e-06, "loss": 0.5814, "step": 21252 }, { "epoch": 0.651373053818806, "grad_norm": 1.7303614399284084, "learning_rate": 2.8627363062197664e-06, "loss": 0.5715, "step": 21253 }, { "epoch": 0.6514037023415471, "grad_norm": 1.8185477764252511, "learning_rate": 2.862287626714787e-06, "loss": 0.6112, "step": 21254 }, { "epoch": 0.6514343508642884, "grad_norm": 1.8878519290671683, "learning_rate": 2.861838968273238e-06, "loss": 0.6289, "step": 21255 }, { "epoch": 0.6514649993870295, "grad_norm": 1.7001433963712367, "learning_rate": 2.8613903308995356e-06, "loss": 0.6092, "step": 21256 }, { "epoch": 0.6514956479097708, "grad_norm": 1.7344412787167536, "learning_rate": 2.8609417145981034e-06, "loss": 0.5913, "step": 21257 }, { "epoch": 0.6515262964325119, "grad_norm": 1.8469518716786788, "learning_rate": 2.860493119373357e-06, "loss": 0.5899, "step": 21258 }, { "epoch": 0.6515569449552532, "grad_norm": 1.7826957864843893, "learning_rate": 2.86004454522972e-06, "loss": 0.5407, "step": 21259 }, { "epoch": 0.6515875934779943, "grad_norm": 1.684368817851811, "learning_rate": 2.859595992171613e-06, "loss": 0.5819, "step": 21260 }, { "epoch": 0.6516182420007356, "grad_norm": 1.790402687595252, "learning_rate": 2.859147460203453e-06, "loss": 0.5792, "step": 21261 }, { "epoch": 0.6516488905234767, "grad_norm": 1.7982282405105912, "learning_rate": 2.8586989493296603e-06, "loss": 0.632, "step": 21262 }, { "epoch": 0.651679539046218, "grad_norm": 1.6061255393857625, "learning_rate": 2.858250459554657e-06, "loss": 0.5485, "step": 21263 }, { "epoch": 0.6517101875689592, "grad_norm": 0.7793641632987017, "learning_rate": 2.857801990882858e-06, "loss": 0.4127, "step": 21264 }, { "epoch": 0.6517408360917004, "grad_norm": 1.7612306682307781, "learning_rate": 2.857353543318684e-06, "loss": 0.6836, "step": 21265 }, { "epoch": 0.6517714846144416, "grad_norm": 2.1102349750964806, "learning_rate": 2.856905116866556e-06, "loss": 0.6783, "step": 21266 }, { "epoch": 0.6518021331371828, "grad_norm": 1.7029737107648075, "learning_rate": 2.856456711530887e-06, "loss": 0.5762, "step": 21267 }, { "epoch": 0.651832781659924, "grad_norm": 2.036318230140799, "learning_rate": 2.856008327316102e-06, "loss": 0.631, "step": 21268 }, { "epoch": 0.6518634301826652, "grad_norm": 2.269805594552434, "learning_rate": 2.855559964226613e-06, "loss": 0.5681, "step": 21269 }, { "epoch": 0.6518940787054064, "grad_norm": 1.7823261270592703, "learning_rate": 2.85511162226684e-06, "loss": 0.6134, "step": 21270 }, { "epoch": 0.6519247272281476, "grad_norm": 1.5204890332309307, "learning_rate": 2.8546633014412035e-06, "loss": 0.5343, "step": 21271 }, { "epoch": 0.6519553757508888, "grad_norm": 1.465870070499181, "learning_rate": 2.8542150017541158e-06, "loss": 0.5311, "step": 21272 }, { "epoch": 0.6519860242736301, "grad_norm": 1.999488372551191, "learning_rate": 2.8537667232099975e-06, "loss": 0.5963, "step": 21273 }, { "epoch": 0.6520166727963712, "grad_norm": 1.773266655286881, "learning_rate": 2.8533184658132662e-06, "loss": 0.624, "step": 21274 }, { "epoch": 0.6520473213191125, "grad_norm": 1.6956460727019083, "learning_rate": 2.852870229568335e-06, "loss": 0.5685, "step": 21275 }, { "epoch": 0.6520779698418536, "grad_norm": 1.5781281792389195, "learning_rate": 2.8524220144796257e-06, "loss": 0.5017, "step": 21276 }, { "epoch": 0.6521086183645948, "grad_norm": 1.718496685750533, "learning_rate": 2.851973820551549e-06, "loss": 0.5422, "step": 21277 }, { "epoch": 0.652139266887336, "grad_norm": 1.6688679117427467, "learning_rate": 2.8515256477885247e-06, "loss": 0.5941, "step": 21278 }, { "epoch": 0.6521699154100772, "grad_norm": 1.8301493005288247, "learning_rate": 2.8510774961949694e-06, "loss": 0.6195, "step": 21279 }, { "epoch": 0.6522005639328184, "grad_norm": 1.6109496712362787, "learning_rate": 2.8506293657752947e-06, "loss": 0.5913, "step": 21280 }, { "epoch": 0.6522312124555596, "grad_norm": 1.6002666353760469, "learning_rate": 2.85018125653392e-06, "loss": 0.5721, "step": 21281 }, { "epoch": 0.6522618609783009, "grad_norm": 1.8789734160034501, "learning_rate": 2.8497331684752605e-06, "loss": 0.6653, "step": 21282 }, { "epoch": 0.652292509501042, "grad_norm": 1.4848791988639805, "learning_rate": 2.849285101603729e-06, "loss": 0.5491, "step": 21283 }, { "epoch": 0.6523231580237833, "grad_norm": 1.7250018791356405, "learning_rate": 2.848837055923741e-06, "loss": 0.5566, "step": 21284 }, { "epoch": 0.6523538065465244, "grad_norm": 1.9791771555770643, "learning_rate": 2.8483890314397145e-06, "loss": 0.6162, "step": 21285 }, { "epoch": 0.6523844550692657, "grad_norm": 0.8216696050369154, "learning_rate": 2.8479410281560595e-06, "loss": 0.4313, "step": 21286 }, { "epoch": 0.6524151035920068, "grad_norm": 1.795562735929404, "learning_rate": 2.8474930460771933e-06, "loss": 0.6012, "step": 21287 }, { "epoch": 0.6524457521147481, "grad_norm": 1.7404170462642976, "learning_rate": 2.8470450852075273e-06, "loss": 0.5782, "step": 21288 }, { "epoch": 0.6524764006374892, "grad_norm": 1.8185102155859447, "learning_rate": 2.8465971455514774e-06, "loss": 0.6216, "step": 21289 }, { "epoch": 0.6525070491602305, "grad_norm": 0.7909121015602187, "learning_rate": 2.8461492271134585e-06, "loss": 0.4259, "step": 21290 }, { "epoch": 0.6525376976829717, "grad_norm": 1.9363808443877004, "learning_rate": 2.8457013298978797e-06, "loss": 0.5356, "step": 21291 }, { "epoch": 0.6525683462057129, "grad_norm": 1.6250558620808369, "learning_rate": 2.8452534539091574e-06, "loss": 0.508, "step": 21292 }, { "epoch": 0.6525989947284541, "grad_norm": 1.8639833037339757, "learning_rate": 2.8448055991517065e-06, "loss": 0.617, "step": 21293 }, { "epoch": 0.6526296432511953, "grad_norm": 1.8812704096061443, "learning_rate": 2.844357765629935e-06, "loss": 0.5315, "step": 21294 }, { "epoch": 0.6526602917739365, "grad_norm": 1.784642359976055, "learning_rate": 2.843909953348258e-06, "loss": 0.5469, "step": 21295 }, { "epoch": 0.6526909402966777, "grad_norm": 1.9470396566618584, "learning_rate": 2.8434621623110904e-06, "loss": 0.6087, "step": 21296 }, { "epoch": 0.6527215888194189, "grad_norm": 1.6800681093831844, "learning_rate": 2.8430143925228394e-06, "loss": 0.5542, "step": 21297 }, { "epoch": 0.6527522373421601, "grad_norm": 0.7936767794946292, "learning_rate": 2.8425666439879207e-06, "loss": 0.4285, "step": 21298 }, { "epoch": 0.6527828858649013, "grad_norm": 1.683818675194742, "learning_rate": 2.8421189167107422e-06, "loss": 0.5717, "step": 21299 }, { "epoch": 0.6528135343876426, "grad_norm": 1.8078149400919798, "learning_rate": 2.8416712106957213e-06, "loss": 0.5887, "step": 21300 }, { "epoch": 0.6528441829103837, "grad_norm": 1.7395698808094664, "learning_rate": 2.8412235259472663e-06, "loss": 0.6359, "step": 21301 }, { "epoch": 0.652874831433125, "grad_norm": 1.8483640527452332, "learning_rate": 2.840775862469787e-06, "loss": 0.7238, "step": 21302 }, { "epoch": 0.6529054799558661, "grad_norm": 1.6397164688823607, "learning_rate": 2.840328220267695e-06, "loss": 0.5554, "step": 21303 }, { "epoch": 0.6529361284786074, "grad_norm": 1.604516101728577, "learning_rate": 2.8398805993454037e-06, "loss": 0.6128, "step": 21304 }, { "epoch": 0.6529667770013485, "grad_norm": 1.7447030010683995, "learning_rate": 2.8394329997073193e-06, "loss": 0.6095, "step": 21305 }, { "epoch": 0.6529974255240898, "grad_norm": 1.7445245682703994, "learning_rate": 2.838985421357855e-06, "loss": 0.5583, "step": 21306 }, { "epoch": 0.6530280740468309, "grad_norm": 1.9321295189929912, "learning_rate": 2.8385378643014215e-06, "loss": 0.598, "step": 21307 }, { "epoch": 0.6530587225695721, "grad_norm": 1.731247451481944, "learning_rate": 2.838090328542426e-06, "loss": 0.6032, "step": 21308 }, { "epoch": 0.6530893710923134, "grad_norm": 1.8826909866198405, "learning_rate": 2.8376428140852812e-06, "loss": 0.6947, "step": 21309 }, { "epoch": 0.6531200196150545, "grad_norm": 1.8252688647063942, "learning_rate": 2.8371953209343918e-06, "loss": 0.6317, "step": 21310 }, { "epoch": 0.6531506681377958, "grad_norm": 1.5934642280761977, "learning_rate": 2.8367478490941737e-06, "loss": 0.605, "step": 21311 }, { "epoch": 0.6531813166605369, "grad_norm": 1.6214022684839153, "learning_rate": 2.8363003985690323e-06, "loss": 0.5221, "step": 21312 }, { "epoch": 0.6532119651832782, "grad_norm": 0.7944624214209801, "learning_rate": 2.8358529693633752e-06, "loss": 0.4188, "step": 21313 }, { "epoch": 0.6532426137060193, "grad_norm": 1.8671225218315655, "learning_rate": 2.835405561481612e-06, "loss": 0.5864, "step": 21314 }, { "epoch": 0.6532732622287606, "grad_norm": 2.055091508494061, "learning_rate": 2.834958174928154e-06, "loss": 0.5939, "step": 21315 }, { "epoch": 0.6533039107515017, "grad_norm": 1.5950895197191584, "learning_rate": 2.834510809707405e-06, "loss": 0.5518, "step": 21316 }, { "epoch": 0.653334559274243, "grad_norm": 2.0254718337616233, "learning_rate": 2.8340634658237747e-06, "loss": 0.7646, "step": 21317 }, { "epoch": 0.6533652077969841, "grad_norm": 0.773364667662555, "learning_rate": 2.8336161432816716e-06, "loss": 0.4081, "step": 21318 }, { "epoch": 0.6533958563197254, "grad_norm": 1.7952748845514566, "learning_rate": 2.833168842085505e-06, "loss": 0.6092, "step": 21319 }, { "epoch": 0.6534265048424666, "grad_norm": 2.120822073540666, "learning_rate": 2.8327215622396803e-06, "loss": 0.6302, "step": 21320 }, { "epoch": 0.6534571533652078, "grad_norm": 2.0177830149006026, "learning_rate": 2.8322743037486022e-06, "loss": 0.5659, "step": 21321 }, { "epoch": 0.653487801887949, "grad_norm": 1.7319602964921188, "learning_rate": 2.83182706661668e-06, "loss": 0.6359, "step": 21322 }, { "epoch": 0.6535184504106902, "grad_norm": 1.92217999987481, "learning_rate": 2.8313798508483226e-06, "loss": 0.6367, "step": 21323 }, { "epoch": 0.6535490989334314, "grad_norm": 1.7670934720178357, "learning_rate": 2.8309326564479328e-06, "loss": 0.6811, "step": 21324 }, { "epoch": 0.6535797474561726, "grad_norm": 1.7417479093757247, "learning_rate": 2.830485483419918e-06, "loss": 0.5745, "step": 21325 }, { "epoch": 0.6536103959789138, "grad_norm": 1.6967015977644937, "learning_rate": 2.830038331768685e-06, "loss": 0.5466, "step": 21326 }, { "epoch": 0.653641044501655, "grad_norm": 1.802012046078078, "learning_rate": 2.8295912014986417e-06, "loss": 0.5816, "step": 21327 }, { "epoch": 0.6536716930243962, "grad_norm": 2.118250131593261, "learning_rate": 2.8291440926141912e-06, "loss": 0.6706, "step": 21328 }, { "epoch": 0.6537023415471375, "grad_norm": 1.6269912458498377, "learning_rate": 2.828697005119736e-06, "loss": 0.5524, "step": 21329 }, { "epoch": 0.6537329900698786, "grad_norm": 1.8699192862395253, "learning_rate": 2.8282499390196883e-06, "loss": 0.6831, "step": 21330 }, { "epoch": 0.6537636385926199, "grad_norm": 1.858497525275795, "learning_rate": 2.82780289431845e-06, "loss": 0.6496, "step": 21331 }, { "epoch": 0.653794287115361, "grad_norm": 1.853466753555718, "learning_rate": 2.827355871020423e-06, "loss": 0.6839, "step": 21332 }, { "epoch": 0.6538249356381023, "grad_norm": 1.6103226386996734, "learning_rate": 2.826908869130015e-06, "loss": 0.5667, "step": 21333 }, { "epoch": 0.6538555841608434, "grad_norm": 1.5213772031661985, "learning_rate": 2.8264618886516315e-06, "loss": 0.4859, "step": 21334 }, { "epoch": 0.6538862326835847, "grad_norm": 1.7795844428591496, "learning_rate": 2.8260149295896734e-06, "loss": 0.6298, "step": 21335 }, { "epoch": 0.6539168812063259, "grad_norm": 1.9608365407717556, "learning_rate": 2.825567991948546e-06, "loss": 0.6217, "step": 21336 }, { "epoch": 0.6539475297290671, "grad_norm": 0.793819808991368, "learning_rate": 2.825121075732654e-06, "loss": 0.4265, "step": 21337 }, { "epoch": 0.6539781782518083, "grad_norm": 1.7915097344224713, "learning_rate": 2.8246741809464024e-06, "loss": 0.5412, "step": 21338 }, { "epoch": 0.6540088267745494, "grad_norm": 1.6954834402440484, "learning_rate": 2.824227307594193e-06, "loss": 0.6256, "step": 21339 }, { "epoch": 0.6540394752972907, "grad_norm": 1.7572247510514007, "learning_rate": 2.823780455680424e-06, "loss": 0.6379, "step": 21340 }, { "epoch": 0.6540701238200318, "grad_norm": 1.8605836941332285, "learning_rate": 2.8233336252095073e-06, "loss": 0.5859, "step": 21341 }, { "epoch": 0.6541007723427731, "grad_norm": 1.8494269988494862, "learning_rate": 2.8228868161858413e-06, "loss": 0.6101, "step": 21342 }, { "epoch": 0.6541314208655142, "grad_norm": 1.5956202420017223, "learning_rate": 2.8224400286138264e-06, "loss": 0.5879, "step": 21343 }, { "epoch": 0.6541620693882555, "grad_norm": 1.6258477160739158, "learning_rate": 2.8219932624978675e-06, "loss": 0.6344, "step": 21344 }, { "epoch": 0.6541927179109966, "grad_norm": 1.5823091397818323, "learning_rate": 2.8215465178423663e-06, "loss": 0.5935, "step": 21345 }, { "epoch": 0.6542233664337379, "grad_norm": 1.8037322012381838, "learning_rate": 2.821099794651726e-06, "loss": 0.5646, "step": 21346 }, { "epoch": 0.6542540149564791, "grad_norm": 0.7943416766926213, "learning_rate": 2.820653092930345e-06, "loss": 0.4077, "step": 21347 }, { "epoch": 0.6542846634792203, "grad_norm": 1.8806490402076028, "learning_rate": 2.820206412682627e-06, "loss": 0.5666, "step": 21348 }, { "epoch": 0.6543153120019615, "grad_norm": 2.1407043417170604, "learning_rate": 2.819759753912975e-06, "loss": 0.615, "step": 21349 }, { "epoch": 0.6543459605247027, "grad_norm": 1.7005081437394414, "learning_rate": 2.8193131166257875e-06, "loss": 0.6077, "step": 21350 }, { "epoch": 0.6543766090474439, "grad_norm": 1.9937684609137134, "learning_rate": 2.8188665008254622e-06, "loss": 0.7495, "step": 21351 }, { "epoch": 0.6544072575701851, "grad_norm": 1.784961525153754, "learning_rate": 2.8184199065164076e-06, "loss": 0.6008, "step": 21352 }, { "epoch": 0.6544379060929263, "grad_norm": 0.7607827053930196, "learning_rate": 2.8179733337030167e-06, "loss": 0.4229, "step": 21353 }, { "epoch": 0.6544685546156676, "grad_norm": 2.1606061379555017, "learning_rate": 2.817526782389696e-06, "loss": 0.7106, "step": 21354 }, { "epoch": 0.6544992031384087, "grad_norm": 1.726005207485666, "learning_rate": 2.8170802525808398e-06, "loss": 0.5766, "step": 21355 }, { "epoch": 0.65452985166115, "grad_norm": 1.9241542227306423, "learning_rate": 2.816633744280851e-06, "loss": 0.6002, "step": 21356 }, { "epoch": 0.6545605001838911, "grad_norm": 1.7305166963724843, "learning_rate": 2.8161872574941295e-06, "loss": 0.5939, "step": 21357 }, { "epoch": 0.6545911487066324, "grad_norm": 1.9890081510662239, "learning_rate": 2.8157407922250725e-06, "loss": 0.7018, "step": 21358 }, { "epoch": 0.6546217972293735, "grad_norm": 1.6465511675158606, "learning_rate": 2.8152943484780804e-06, "loss": 0.6212, "step": 21359 }, { "epoch": 0.6546524457521148, "grad_norm": 0.7962310765239404, "learning_rate": 2.8148479262575536e-06, "loss": 0.4079, "step": 21360 }, { "epoch": 0.6546830942748559, "grad_norm": 1.8056985674638402, "learning_rate": 2.81440152556789e-06, "loss": 0.606, "step": 21361 }, { "epoch": 0.6547137427975972, "grad_norm": 1.7814889018897935, "learning_rate": 2.8139551464134827e-06, "loss": 0.581, "step": 21362 }, { "epoch": 0.6547443913203383, "grad_norm": 1.866648695423336, "learning_rate": 2.813508788798739e-06, "loss": 0.6171, "step": 21363 }, { "epoch": 0.6547750398430796, "grad_norm": 0.7874163180229834, "learning_rate": 2.81306245272805e-06, "loss": 0.4129, "step": 21364 }, { "epoch": 0.6548056883658208, "grad_norm": 1.8992730968718252, "learning_rate": 2.812616138205819e-06, "loss": 0.6101, "step": 21365 }, { "epoch": 0.654836336888562, "grad_norm": 2.0343960113230346, "learning_rate": 2.812169845236439e-06, "loss": 0.4744, "step": 21366 }, { "epoch": 0.6548669854113032, "grad_norm": 1.7765874448461836, "learning_rate": 2.8117235738243087e-06, "loss": 0.6421, "step": 21367 }, { "epoch": 0.6548976339340444, "grad_norm": 2.0368885179776677, "learning_rate": 2.811277323973828e-06, "loss": 0.5797, "step": 21368 }, { "epoch": 0.6549282824567856, "grad_norm": 1.8019804626918114, "learning_rate": 2.8108310956893896e-06, "loss": 0.5726, "step": 21369 }, { "epoch": 0.6549589309795267, "grad_norm": 1.8652578130153599, "learning_rate": 2.810384888975393e-06, "loss": 0.5432, "step": 21370 }, { "epoch": 0.654989579502268, "grad_norm": 1.7492508064616112, "learning_rate": 2.8099387038362357e-06, "loss": 0.6255, "step": 21371 }, { "epoch": 0.6550202280250091, "grad_norm": 1.958122173182121, "learning_rate": 2.809492540276312e-06, "loss": 0.5661, "step": 21372 }, { "epoch": 0.6550508765477504, "grad_norm": 1.888378787912314, "learning_rate": 2.809046398300019e-06, "loss": 0.6351, "step": 21373 }, { "epoch": 0.6550815250704916, "grad_norm": 1.8524963938061052, "learning_rate": 2.8086002779117515e-06, "loss": 0.6459, "step": 21374 }, { "epoch": 0.6551121735932328, "grad_norm": 1.822399755024566, "learning_rate": 2.8081541791159063e-06, "loss": 0.6425, "step": 21375 }, { "epoch": 0.655142822115974, "grad_norm": 1.8367212465075515, "learning_rate": 2.8077081019168804e-06, "loss": 0.5941, "step": 21376 }, { "epoch": 0.6551734706387152, "grad_norm": 1.671080667648415, "learning_rate": 2.807262046319066e-06, "loss": 0.5817, "step": 21377 }, { "epoch": 0.6552041191614564, "grad_norm": 1.7395092774025627, "learning_rate": 2.806816012326859e-06, "loss": 0.5477, "step": 21378 }, { "epoch": 0.6552347676841976, "grad_norm": 1.8312941197103, "learning_rate": 2.806369999944657e-06, "loss": 0.6954, "step": 21379 }, { "epoch": 0.6552654162069388, "grad_norm": 1.5791424416012165, "learning_rate": 2.8059240091768514e-06, "loss": 0.5195, "step": 21380 }, { "epoch": 0.65529606472968, "grad_norm": 0.8753246533064106, "learning_rate": 2.805478040027837e-06, "loss": 0.4082, "step": 21381 }, { "epoch": 0.6553267132524212, "grad_norm": 2.030137607772288, "learning_rate": 2.8050320925020112e-06, "loss": 0.5327, "step": 21382 }, { "epoch": 0.6553573617751625, "grad_norm": 1.6772432959405064, "learning_rate": 2.8045861666037645e-06, "loss": 0.5427, "step": 21383 }, { "epoch": 0.6553880102979036, "grad_norm": 1.8305614797393084, "learning_rate": 2.8041402623374936e-06, "loss": 0.5631, "step": 21384 }, { "epoch": 0.6554186588206449, "grad_norm": 1.836189654496562, "learning_rate": 2.8036943797075884e-06, "loss": 0.606, "step": 21385 }, { "epoch": 0.655449307343386, "grad_norm": 1.9407774768354578, "learning_rate": 2.8032485187184446e-06, "loss": 0.5522, "step": 21386 }, { "epoch": 0.6554799558661273, "grad_norm": 1.7512094129034395, "learning_rate": 2.802802679374457e-06, "loss": 0.6022, "step": 21387 }, { "epoch": 0.6555106043888684, "grad_norm": 0.7955712021507424, "learning_rate": 2.8023568616800147e-06, "loss": 0.4286, "step": 21388 }, { "epoch": 0.6555412529116097, "grad_norm": 0.7683781135435055, "learning_rate": 2.8019110656395124e-06, "loss": 0.3855, "step": 21389 }, { "epoch": 0.6555719014343508, "grad_norm": 1.675714912351998, "learning_rate": 2.8014652912573453e-06, "loss": 0.588, "step": 21390 }, { "epoch": 0.6556025499570921, "grad_norm": 0.8165591852590856, "learning_rate": 2.8010195385379014e-06, "loss": 0.4282, "step": 21391 }, { "epoch": 0.6556331984798333, "grad_norm": 1.8730074767920943, "learning_rate": 2.800573807485574e-06, "loss": 0.6121, "step": 21392 }, { "epoch": 0.6556638470025745, "grad_norm": 1.7476470119869174, "learning_rate": 2.8001280981047574e-06, "loss": 0.5662, "step": 21393 }, { "epoch": 0.6556944955253157, "grad_norm": 0.7720529416656503, "learning_rate": 2.7996824103998398e-06, "loss": 0.4027, "step": 21394 }, { "epoch": 0.6557251440480569, "grad_norm": 1.835786044141044, "learning_rate": 2.7992367443752167e-06, "loss": 0.5341, "step": 21395 }, { "epoch": 0.6557557925707981, "grad_norm": 1.7132231868070382, "learning_rate": 2.7987911000352752e-06, "loss": 0.5091, "step": 21396 }, { "epoch": 0.6557864410935393, "grad_norm": 1.7975188789055885, "learning_rate": 2.7983454773844078e-06, "loss": 0.5903, "step": 21397 }, { "epoch": 0.6558170896162805, "grad_norm": 1.9274067754698736, "learning_rate": 2.797899876427008e-06, "loss": 0.6309, "step": 21398 }, { "epoch": 0.6558477381390218, "grad_norm": 0.7706900440948502, "learning_rate": 2.7974542971674614e-06, "loss": 0.4024, "step": 21399 }, { "epoch": 0.6558783866617629, "grad_norm": 1.8359159892811296, "learning_rate": 2.797008739610162e-06, "loss": 0.6392, "step": 21400 }, { "epoch": 0.655909035184504, "grad_norm": 2.043488798396027, "learning_rate": 2.7965632037595002e-06, "loss": 0.6412, "step": 21401 }, { "epoch": 0.6559396837072453, "grad_norm": 1.5969109078370662, "learning_rate": 2.7961176896198637e-06, "loss": 0.5749, "step": 21402 }, { "epoch": 0.6559703322299865, "grad_norm": 1.7616998132091244, "learning_rate": 2.7956721971956435e-06, "loss": 0.6139, "step": 21403 }, { "epoch": 0.6560009807527277, "grad_norm": 1.6731910416905222, "learning_rate": 2.7952267264912314e-06, "loss": 0.5579, "step": 21404 }, { "epoch": 0.6560316292754689, "grad_norm": 1.8561570697987677, "learning_rate": 2.7947812775110117e-06, "loss": 0.7399, "step": 21405 }, { "epoch": 0.6560622777982101, "grad_norm": 1.6410941805015573, "learning_rate": 2.7943358502593787e-06, "loss": 0.5599, "step": 21406 }, { "epoch": 0.6560929263209513, "grad_norm": 1.788246589235603, "learning_rate": 2.793890444740715e-06, "loss": 0.5775, "step": 21407 }, { "epoch": 0.6561235748436925, "grad_norm": 1.7844929961597795, "learning_rate": 2.793445060959417e-06, "loss": 0.6102, "step": 21408 }, { "epoch": 0.6561542233664337, "grad_norm": 1.933517557375261, "learning_rate": 2.7929996989198695e-06, "loss": 0.7438, "step": 21409 }, { "epoch": 0.656184871889175, "grad_norm": 1.7894284626828527, "learning_rate": 2.7925543586264588e-06, "loss": 0.6153, "step": 21410 }, { "epoch": 0.6562155204119161, "grad_norm": 1.9463686830526585, "learning_rate": 2.7921090400835747e-06, "loss": 0.5999, "step": 21411 }, { "epoch": 0.6562461689346574, "grad_norm": 1.9690931130307114, "learning_rate": 2.7916637432956066e-06, "loss": 0.6692, "step": 21412 }, { "epoch": 0.6562768174573985, "grad_norm": 1.7138930721778811, "learning_rate": 2.7912184682669396e-06, "loss": 0.5676, "step": 21413 }, { "epoch": 0.6563074659801398, "grad_norm": 1.9772924041243076, "learning_rate": 2.7907732150019617e-06, "loss": 0.5125, "step": 21414 }, { "epoch": 0.6563381145028809, "grad_norm": 1.8918132071837734, "learning_rate": 2.790327983505062e-06, "loss": 0.6882, "step": 21415 }, { "epoch": 0.6563687630256222, "grad_norm": 1.7300934410674211, "learning_rate": 2.789882773780625e-06, "loss": 0.5882, "step": 21416 }, { "epoch": 0.6563994115483633, "grad_norm": 1.836069452102581, "learning_rate": 2.78943758583304e-06, "loss": 0.5685, "step": 21417 }, { "epoch": 0.6564300600711046, "grad_norm": 1.7625981569615596, "learning_rate": 2.7889924196666908e-06, "loss": 0.6105, "step": 21418 }, { "epoch": 0.6564607085938458, "grad_norm": 0.8228398055770814, "learning_rate": 2.788547275285964e-06, "loss": 0.423, "step": 21419 }, { "epoch": 0.656491357116587, "grad_norm": 0.820798669964529, "learning_rate": 2.788102152695249e-06, "loss": 0.4202, "step": 21420 }, { "epoch": 0.6565220056393282, "grad_norm": 1.87706840021976, "learning_rate": 2.787657051898928e-06, "loss": 0.6449, "step": 21421 }, { "epoch": 0.6565526541620694, "grad_norm": 1.9619239857161566, "learning_rate": 2.787211972901387e-06, "loss": 0.678, "step": 21422 }, { "epoch": 0.6565833026848106, "grad_norm": 1.76018640023994, "learning_rate": 2.7867669157070155e-06, "loss": 0.5837, "step": 21423 }, { "epoch": 0.6566139512075518, "grad_norm": 1.7099207056463877, "learning_rate": 2.7863218803201938e-06, "loss": 0.6195, "step": 21424 }, { "epoch": 0.656644599730293, "grad_norm": 2.0067135984873556, "learning_rate": 2.7858768667453107e-06, "loss": 0.6877, "step": 21425 }, { "epoch": 0.6566752482530342, "grad_norm": 1.771443326904726, "learning_rate": 2.7854318749867454e-06, "loss": 0.5652, "step": 21426 }, { "epoch": 0.6567058967757754, "grad_norm": 1.8287287731246717, "learning_rate": 2.784986905048891e-06, "loss": 0.6309, "step": 21427 }, { "epoch": 0.6567365452985167, "grad_norm": 1.732869690521806, "learning_rate": 2.7845419569361263e-06, "loss": 0.6053, "step": 21428 }, { "epoch": 0.6567671938212578, "grad_norm": 2.0272022919921477, "learning_rate": 2.784097030652835e-06, "loss": 0.6055, "step": 21429 }, { "epoch": 0.6567978423439991, "grad_norm": 1.9430908254828452, "learning_rate": 2.7836521262034034e-06, "loss": 0.5548, "step": 21430 }, { "epoch": 0.6568284908667402, "grad_norm": 0.7731673754756109, "learning_rate": 2.7832072435922154e-06, "loss": 0.3976, "step": 21431 }, { "epoch": 0.6568591393894814, "grad_norm": 1.7014729388832104, "learning_rate": 2.7827623828236523e-06, "loss": 0.5851, "step": 21432 }, { "epoch": 0.6568897879122226, "grad_norm": 1.8149339303456398, "learning_rate": 2.7823175439020984e-06, "loss": 0.6258, "step": 21433 }, { "epoch": 0.6569204364349638, "grad_norm": 1.7933067542538084, "learning_rate": 2.781872726831939e-06, "loss": 0.5702, "step": 21434 }, { "epoch": 0.656951084957705, "grad_norm": 1.7256444794088968, "learning_rate": 2.781427931617554e-06, "loss": 0.6548, "step": 21435 }, { "epoch": 0.6569817334804462, "grad_norm": 1.9809864560984254, "learning_rate": 2.7809831582633284e-06, "loss": 0.6632, "step": 21436 }, { "epoch": 0.6570123820031875, "grad_norm": 1.7009967307899136, "learning_rate": 2.7805384067736397e-06, "loss": 0.5729, "step": 21437 }, { "epoch": 0.6570430305259286, "grad_norm": 0.7930365048687101, "learning_rate": 2.780093677152878e-06, "loss": 0.411, "step": 21438 }, { "epoch": 0.6570736790486699, "grad_norm": 1.6757414595312399, "learning_rate": 2.7796489694054214e-06, "loss": 0.5178, "step": 21439 }, { "epoch": 0.657104327571411, "grad_norm": 2.021057404563855, "learning_rate": 2.7792042835356492e-06, "loss": 0.6243, "step": 21440 }, { "epoch": 0.6571349760941523, "grad_norm": 1.6826506211352288, "learning_rate": 2.778759619547946e-06, "loss": 0.5399, "step": 21441 }, { "epoch": 0.6571656246168934, "grad_norm": 1.572373738305891, "learning_rate": 2.7783149774466944e-06, "loss": 0.5329, "step": 21442 }, { "epoch": 0.6571962731396347, "grad_norm": 0.7847245247327581, "learning_rate": 2.7778703572362714e-06, "loss": 0.4321, "step": 21443 }, { "epoch": 0.6572269216623758, "grad_norm": 1.7575531844163093, "learning_rate": 2.7774257589210606e-06, "loss": 0.5917, "step": 21444 }, { "epoch": 0.6572575701851171, "grad_norm": 1.7529745341291707, "learning_rate": 2.7769811825054427e-06, "loss": 0.5609, "step": 21445 }, { "epoch": 0.6572882187078583, "grad_norm": 1.8983470438467638, "learning_rate": 2.7765366279938e-06, "loss": 0.6398, "step": 21446 }, { "epoch": 0.6573188672305995, "grad_norm": 2.1469990357227355, "learning_rate": 2.7760920953905104e-06, "loss": 0.6888, "step": 21447 }, { "epoch": 0.6573495157533407, "grad_norm": 0.7791425998460099, "learning_rate": 2.7756475846999503e-06, "loss": 0.4114, "step": 21448 }, { "epoch": 0.6573801642760819, "grad_norm": 1.7374972484889717, "learning_rate": 2.775203095926508e-06, "loss": 0.5828, "step": 21449 }, { "epoch": 0.6574108127988231, "grad_norm": 1.7875469925565528, "learning_rate": 2.7747586290745586e-06, "loss": 0.5868, "step": 21450 }, { "epoch": 0.6574414613215643, "grad_norm": 1.7230954233584932, "learning_rate": 2.77431418414848e-06, "loss": 0.6817, "step": 21451 }, { "epoch": 0.6574721098443055, "grad_norm": 1.7219905387557268, "learning_rate": 2.7738697611526533e-06, "loss": 0.5675, "step": 21452 }, { "epoch": 0.6575027583670467, "grad_norm": 1.8774119118948334, "learning_rate": 2.773425360091457e-06, "loss": 0.6906, "step": 21453 }, { "epoch": 0.6575334068897879, "grad_norm": 1.6040308426815049, "learning_rate": 2.7729809809692734e-06, "loss": 0.5911, "step": 21454 }, { "epoch": 0.6575640554125292, "grad_norm": 0.7889255899845492, "learning_rate": 2.772536623790475e-06, "loss": 0.4332, "step": 21455 }, { "epoch": 0.6575947039352703, "grad_norm": 1.633094350418341, "learning_rate": 2.7720922885594433e-06, "loss": 0.6735, "step": 21456 }, { "epoch": 0.6576253524580116, "grad_norm": 1.8829556949435886, "learning_rate": 2.771647975280558e-06, "loss": 0.5492, "step": 21457 }, { "epoch": 0.6576560009807527, "grad_norm": 2.0008337972066834, "learning_rate": 2.7712036839581956e-06, "loss": 0.6076, "step": 21458 }, { "epoch": 0.657686649503494, "grad_norm": 1.9801198143774943, "learning_rate": 2.77075941459673e-06, "loss": 0.6555, "step": 21459 }, { "epoch": 0.6577172980262351, "grad_norm": 0.834755436007959, "learning_rate": 2.7703151672005457e-06, "loss": 0.4148, "step": 21460 }, { "epoch": 0.6577479465489764, "grad_norm": 1.7638191808940262, "learning_rate": 2.7698709417740165e-06, "loss": 0.5781, "step": 21461 }, { "epoch": 0.6577785950717175, "grad_norm": 1.649848447513304, "learning_rate": 2.769426738321518e-06, "loss": 0.631, "step": 21462 }, { "epoch": 0.6578092435944587, "grad_norm": 1.6204362602507218, "learning_rate": 2.768982556847429e-06, "loss": 0.6183, "step": 21463 }, { "epoch": 0.6578398921172, "grad_norm": 1.908098309170488, "learning_rate": 2.768538397356125e-06, "loss": 0.6501, "step": 21464 }, { "epoch": 0.6578705406399411, "grad_norm": 1.8337082271385612, "learning_rate": 2.768094259851985e-06, "loss": 0.6493, "step": 21465 }, { "epoch": 0.6579011891626824, "grad_norm": 2.0553952473764108, "learning_rate": 2.767650144339381e-06, "loss": 0.6004, "step": 21466 }, { "epoch": 0.6579318376854235, "grad_norm": 1.8095415513808435, "learning_rate": 2.7672060508226923e-06, "loss": 0.5802, "step": 21467 }, { "epoch": 0.6579624862081648, "grad_norm": 1.7116132439298601, "learning_rate": 2.766761979306295e-06, "loss": 0.5562, "step": 21468 }, { "epoch": 0.6579931347309059, "grad_norm": 1.5598689487150472, "learning_rate": 2.7663179297945637e-06, "loss": 0.5838, "step": 21469 }, { "epoch": 0.6580237832536472, "grad_norm": 1.5680076178926563, "learning_rate": 2.765873902291871e-06, "loss": 0.5194, "step": 21470 }, { "epoch": 0.6580544317763883, "grad_norm": 1.8408705274193828, "learning_rate": 2.765429896802595e-06, "loss": 0.5613, "step": 21471 }, { "epoch": 0.6580850802991296, "grad_norm": 1.9248889907337876, "learning_rate": 2.7649859133311092e-06, "loss": 0.6403, "step": 21472 }, { "epoch": 0.6581157288218707, "grad_norm": 0.801285040487242, "learning_rate": 2.764541951881791e-06, "loss": 0.4217, "step": 21473 }, { "epoch": 0.658146377344612, "grad_norm": 1.9648930038157644, "learning_rate": 2.7640980124590113e-06, "loss": 0.6504, "step": 21474 }, { "epoch": 0.6581770258673532, "grad_norm": 0.836619507053905, "learning_rate": 2.7636540950671463e-06, "loss": 0.4283, "step": 21475 }, { "epoch": 0.6582076743900944, "grad_norm": 1.7431472848891634, "learning_rate": 2.7632101997105708e-06, "loss": 0.572, "step": 21476 }, { "epoch": 0.6582383229128356, "grad_norm": 1.7886661261760541, "learning_rate": 2.7627663263936582e-06, "loss": 0.5813, "step": 21477 }, { "epoch": 0.6582689714355768, "grad_norm": 1.7460627676862313, "learning_rate": 2.7623224751207773e-06, "loss": 0.6555, "step": 21478 }, { "epoch": 0.658299619958318, "grad_norm": 1.862391460025425, "learning_rate": 2.7618786458963096e-06, "loss": 0.5805, "step": 21479 }, { "epoch": 0.6583302684810592, "grad_norm": 1.6738460248197384, "learning_rate": 2.761434838724622e-06, "loss": 0.5989, "step": 21480 }, { "epoch": 0.6583609170038004, "grad_norm": 1.720947459032968, "learning_rate": 2.760991053610092e-06, "loss": 0.6247, "step": 21481 }, { "epoch": 0.6583915655265417, "grad_norm": 2.042967023918439, "learning_rate": 2.7605472905570875e-06, "loss": 0.6358, "step": 21482 }, { "epoch": 0.6584222140492828, "grad_norm": 1.9177689567420126, "learning_rate": 2.7601035495699843e-06, "loss": 0.6417, "step": 21483 }, { "epoch": 0.6584528625720241, "grad_norm": 1.8227800717051492, "learning_rate": 2.7596598306531554e-06, "loss": 0.5814, "step": 21484 }, { "epoch": 0.6584835110947652, "grad_norm": 1.7769285728126032, "learning_rate": 2.75921613381097e-06, "loss": 0.5867, "step": 21485 }, { "epoch": 0.6585141596175065, "grad_norm": 2.020969631932591, "learning_rate": 2.7587724590478005e-06, "loss": 0.5988, "step": 21486 }, { "epoch": 0.6585448081402476, "grad_norm": 1.723253082137025, "learning_rate": 2.7583288063680214e-06, "loss": 0.558, "step": 21487 }, { "epoch": 0.6585754566629889, "grad_norm": 1.6533756714876287, "learning_rate": 2.757885175776003e-06, "loss": 0.5641, "step": 21488 }, { "epoch": 0.65860610518573, "grad_norm": 1.683996333490617, "learning_rate": 2.7574415672761113e-06, "loss": 0.5901, "step": 21489 }, { "epoch": 0.6586367537084713, "grad_norm": 1.753071012756386, "learning_rate": 2.7569979808727255e-06, "loss": 0.5454, "step": 21490 }, { "epoch": 0.6586674022312125, "grad_norm": 1.7711615060288997, "learning_rate": 2.75655441657021e-06, "loss": 0.518, "step": 21491 }, { "epoch": 0.6586980507539537, "grad_norm": 0.8612131672942541, "learning_rate": 2.75611087437294e-06, "loss": 0.4125, "step": 21492 }, { "epoch": 0.6587286992766949, "grad_norm": 2.0404758361711552, "learning_rate": 2.7556673542852825e-06, "loss": 0.6272, "step": 21493 }, { "epoch": 0.658759347799436, "grad_norm": 1.8332728054432315, "learning_rate": 2.7552238563116086e-06, "loss": 0.6759, "step": 21494 }, { "epoch": 0.6587899963221773, "grad_norm": 1.9638841247828014, "learning_rate": 2.75478038045629e-06, "loss": 0.6842, "step": 21495 }, { "epoch": 0.6588206448449184, "grad_norm": 1.7801380693844397, "learning_rate": 2.754336926723693e-06, "loss": 0.6025, "step": 21496 }, { "epoch": 0.6588512933676597, "grad_norm": 1.7232117836939604, "learning_rate": 2.7538934951181884e-06, "loss": 0.6765, "step": 21497 }, { "epoch": 0.6588819418904008, "grad_norm": 1.6719622582908924, "learning_rate": 2.7534500856441483e-06, "loss": 0.5977, "step": 21498 }, { "epoch": 0.6589125904131421, "grad_norm": 1.6980389681814771, "learning_rate": 2.7530066983059365e-06, "loss": 0.675, "step": 21499 }, { "epoch": 0.6589432389358832, "grad_norm": 1.5229736662851396, "learning_rate": 2.752563333107926e-06, "loss": 0.6246, "step": 21500 }, { "epoch": 0.6589738874586245, "grad_norm": 1.6620355819166588, "learning_rate": 2.7521199900544847e-06, "loss": 0.611, "step": 21501 }, { "epoch": 0.6590045359813657, "grad_norm": 1.7958642887485639, "learning_rate": 2.7516766691499797e-06, "loss": 0.5974, "step": 21502 }, { "epoch": 0.6590351845041069, "grad_norm": 0.7659042486333623, "learning_rate": 2.7512333703987803e-06, "loss": 0.4383, "step": 21503 }, { "epoch": 0.6590658330268481, "grad_norm": 0.8044829084614901, "learning_rate": 2.750790093805253e-06, "loss": 0.4116, "step": 21504 }, { "epoch": 0.6590964815495893, "grad_norm": 1.696736221505712, "learning_rate": 2.750346839373766e-06, "loss": 0.5643, "step": 21505 }, { "epoch": 0.6591271300723305, "grad_norm": 1.8267301597143992, "learning_rate": 2.7499036071086893e-06, "loss": 0.6107, "step": 21506 }, { "epoch": 0.6591577785950717, "grad_norm": 1.6637777422088096, "learning_rate": 2.749460397014385e-06, "loss": 0.499, "step": 21507 }, { "epoch": 0.6591884271178129, "grad_norm": 1.6534723771215374, "learning_rate": 2.749017209095225e-06, "loss": 0.5507, "step": 21508 }, { "epoch": 0.6592190756405542, "grad_norm": 1.9149328596595268, "learning_rate": 2.7485740433555753e-06, "loss": 0.5118, "step": 21509 }, { "epoch": 0.6592497241632953, "grad_norm": 1.9992374615773663, "learning_rate": 2.7481308997998e-06, "loss": 0.5744, "step": 21510 }, { "epoch": 0.6592803726860366, "grad_norm": 0.7632218121518454, "learning_rate": 2.7476877784322662e-06, "loss": 0.4075, "step": 21511 }, { "epoch": 0.6593110212087777, "grad_norm": 1.993489130158554, "learning_rate": 2.7472446792573435e-06, "loss": 0.665, "step": 21512 }, { "epoch": 0.659341669731519, "grad_norm": 2.273815751371751, "learning_rate": 2.746801602279394e-06, "loss": 0.521, "step": 21513 }, { "epoch": 0.6593723182542601, "grad_norm": 1.639558440252497, "learning_rate": 2.7463585475027866e-06, "loss": 0.5419, "step": 21514 }, { "epoch": 0.6594029667770014, "grad_norm": 2.0137219968727518, "learning_rate": 2.7459155149318828e-06, "loss": 0.585, "step": 21515 }, { "epoch": 0.6594336152997425, "grad_norm": 1.9614122976336996, "learning_rate": 2.74547250457105e-06, "loss": 0.6188, "step": 21516 }, { "epoch": 0.6594642638224838, "grad_norm": 1.6206149718606815, "learning_rate": 2.7450295164246556e-06, "loss": 0.5549, "step": 21517 }, { "epoch": 0.659494912345225, "grad_norm": 1.8376572320685785, "learning_rate": 2.74458655049706e-06, "loss": 0.5499, "step": 21518 }, { "epoch": 0.6595255608679662, "grad_norm": 1.8103713992504833, "learning_rate": 2.7441436067926307e-06, "loss": 0.6372, "step": 21519 }, { "epoch": 0.6595562093907074, "grad_norm": 1.5792782202166433, "learning_rate": 2.743700685315734e-06, "loss": 0.5758, "step": 21520 }, { "epoch": 0.6595868579134486, "grad_norm": 1.5733577469159525, "learning_rate": 2.743257786070729e-06, "loss": 0.538, "step": 21521 }, { "epoch": 0.6596175064361898, "grad_norm": 0.817641026258193, "learning_rate": 2.742814909061985e-06, "loss": 0.4401, "step": 21522 }, { "epoch": 0.659648154958931, "grad_norm": 1.6958151536407273, "learning_rate": 2.74237205429386e-06, "loss": 0.63, "step": 21523 }, { "epoch": 0.6596788034816722, "grad_norm": 1.530046735493897, "learning_rate": 2.741929221770723e-06, "loss": 0.528, "step": 21524 }, { "epoch": 0.6597094520044133, "grad_norm": 1.8600064502647262, "learning_rate": 2.7414864114969355e-06, "loss": 0.6607, "step": 21525 }, { "epoch": 0.6597401005271546, "grad_norm": 1.9045637684318661, "learning_rate": 2.7410436234768584e-06, "loss": 0.6131, "step": 21526 }, { "epoch": 0.6597707490498957, "grad_norm": 1.6772566923906214, "learning_rate": 2.740600857714857e-06, "loss": 0.5622, "step": 21527 }, { "epoch": 0.659801397572637, "grad_norm": 1.701592667383242, "learning_rate": 2.7401581142152945e-06, "loss": 0.6135, "step": 21528 }, { "epoch": 0.6598320460953782, "grad_norm": 1.8768467008774672, "learning_rate": 2.7397153929825317e-06, "loss": 0.6213, "step": 21529 }, { "epoch": 0.6598626946181194, "grad_norm": 1.9539176478754672, "learning_rate": 2.73927269402093e-06, "loss": 0.5667, "step": 21530 }, { "epoch": 0.6598933431408606, "grad_norm": 1.6713626916907818, "learning_rate": 2.7388300173348557e-06, "loss": 0.5412, "step": 21531 }, { "epoch": 0.6599239916636018, "grad_norm": 1.5035663659673983, "learning_rate": 2.7383873629286658e-06, "loss": 0.5724, "step": 21532 }, { "epoch": 0.659954640186343, "grad_norm": 1.9126941408902194, "learning_rate": 2.737944730806725e-06, "loss": 0.5336, "step": 21533 }, { "epoch": 0.6599852887090842, "grad_norm": 1.689923643008036, "learning_rate": 2.737502120973391e-06, "loss": 0.6043, "step": 21534 }, { "epoch": 0.6600159372318254, "grad_norm": 1.828934207479329, "learning_rate": 2.737059533433031e-06, "loss": 0.6762, "step": 21535 }, { "epoch": 0.6600465857545667, "grad_norm": 1.8531350767067252, "learning_rate": 2.7366169681900013e-06, "loss": 0.6725, "step": 21536 }, { "epoch": 0.6600772342773078, "grad_norm": 1.6062059220299936, "learning_rate": 2.7361744252486626e-06, "loss": 0.5926, "step": 21537 }, { "epoch": 0.6601078828000491, "grad_norm": 1.971469613210802, "learning_rate": 2.735731904613377e-06, "loss": 0.6022, "step": 21538 }, { "epoch": 0.6601385313227902, "grad_norm": 0.8330317321473742, "learning_rate": 2.735289406288505e-06, "loss": 0.4123, "step": 21539 }, { "epoch": 0.6601691798455315, "grad_norm": 0.796145475427795, "learning_rate": 2.734846930278405e-06, "loss": 0.4189, "step": 21540 }, { "epoch": 0.6601998283682726, "grad_norm": 2.192381388925538, "learning_rate": 2.734404476587438e-06, "loss": 0.5789, "step": 21541 }, { "epoch": 0.6602304768910139, "grad_norm": 0.8248332022730025, "learning_rate": 2.7339620452199646e-06, "loss": 0.399, "step": 21542 }, { "epoch": 0.660261125413755, "grad_norm": 1.5603856769723785, "learning_rate": 2.7335196361803408e-06, "loss": 0.5551, "step": 21543 }, { "epoch": 0.6602917739364963, "grad_norm": 1.8895327806982183, "learning_rate": 2.7330772494729304e-06, "loss": 0.6728, "step": 21544 }, { "epoch": 0.6603224224592374, "grad_norm": 1.6787650886546819, "learning_rate": 2.732634885102086e-06, "loss": 0.6147, "step": 21545 }, { "epoch": 0.6603530709819787, "grad_norm": 1.7989540061256508, "learning_rate": 2.732192543072174e-06, "loss": 0.5477, "step": 21546 }, { "epoch": 0.6603837195047199, "grad_norm": 1.7007861870807959, "learning_rate": 2.7317502233875487e-06, "loss": 0.5878, "step": 21547 }, { "epoch": 0.6604143680274611, "grad_norm": 1.7632442269052195, "learning_rate": 2.731307926052568e-06, "loss": 0.6268, "step": 21548 }, { "epoch": 0.6604450165502023, "grad_norm": 1.7604807792956512, "learning_rate": 2.730865651071589e-06, "loss": 0.5974, "step": 21549 }, { "epoch": 0.6604756650729435, "grad_norm": 1.7561672465149578, "learning_rate": 2.7304233984489746e-06, "loss": 0.5738, "step": 21550 }, { "epoch": 0.6605063135956847, "grad_norm": 1.6682654405176869, "learning_rate": 2.7299811681890764e-06, "loss": 0.5863, "step": 21551 }, { "epoch": 0.6605369621184259, "grad_norm": 2.027838838418941, "learning_rate": 2.729538960296255e-06, "loss": 0.5484, "step": 21552 }, { "epoch": 0.6605676106411671, "grad_norm": 1.6518838157418538, "learning_rate": 2.7290967747748676e-06, "loss": 0.6032, "step": 21553 }, { "epoch": 0.6605982591639084, "grad_norm": 0.875093992631532, "learning_rate": 2.7286546116292722e-06, "loss": 0.4407, "step": 21554 }, { "epoch": 0.6606289076866495, "grad_norm": 1.9339168953581356, "learning_rate": 2.7282124708638242e-06, "loss": 0.6577, "step": 21555 }, { "epoch": 0.6606595562093907, "grad_norm": 2.127470116126005, "learning_rate": 2.7277703524828757e-06, "loss": 0.5717, "step": 21556 }, { "epoch": 0.6606902047321319, "grad_norm": 1.7942876737475884, "learning_rate": 2.7273282564907918e-06, "loss": 0.6679, "step": 21557 }, { "epoch": 0.6607208532548731, "grad_norm": 0.9574425388695201, "learning_rate": 2.7268861828919237e-06, "loss": 0.4097, "step": 21558 }, { "epoch": 0.6607515017776143, "grad_norm": 1.6320787254976235, "learning_rate": 2.7264441316906253e-06, "loss": 0.5772, "step": 21559 }, { "epoch": 0.6607821503003555, "grad_norm": 1.7786931316992867, "learning_rate": 2.7260021028912553e-06, "loss": 0.6436, "step": 21560 }, { "epoch": 0.6608127988230967, "grad_norm": 1.658557711262421, "learning_rate": 2.7255600964981683e-06, "loss": 0.5484, "step": 21561 }, { "epoch": 0.6608434473458379, "grad_norm": 1.8948624157069234, "learning_rate": 2.725118112515721e-06, "loss": 0.6332, "step": 21562 }, { "epoch": 0.6608740958685791, "grad_norm": 1.8971097549965976, "learning_rate": 2.7246761509482657e-06, "loss": 0.6163, "step": 21563 }, { "epoch": 0.6609047443913203, "grad_norm": 1.7157232450663977, "learning_rate": 2.7242342118001584e-06, "loss": 0.5168, "step": 21564 }, { "epoch": 0.6609353929140616, "grad_norm": 1.8341015839617858, "learning_rate": 2.7237922950757554e-06, "loss": 0.6107, "step": 21565 }, { "epoch": 0.6609660414368027, "grad_norm": 1.7759934973913203, "learning_rate": 2.7233504007794093e-06, "loss": 0.6154, "step": 21566 }, { "epoch": 0.660996689959544, "grad_norm": 1.852157856039164, "learning_rate": 2.722908528915472e-06, "loss": 0.6101, "step": 21567 }, { "epoch": 0.6610273384822851, "grad_norm": 0.8050933777101579, "learning_rate": 2.7224666794883002e-06, "loss": 0.3972, "step": 21568 }, { "epoch": 0.6610579870050264, "grad_norm": 1.7076365226815056, "learning_rate": 2.7220248525022485e-06, "loss": 0.5562, "step": 21569 }, { "epoch": 0.6610886355277675, "grad_norm": 1.7780439233911303, "learning_rate": 2.721583047961667e-06, "loss": 0.5781, "step": 21570 }, { "epoch": 0.6611192840505088, "grad_norm": 1.8939405953988937, "learning_rate": 2.72114126587091e-06, "loss": 0.5864, "step": 21571 }, { "epoch": 0.6611499325732499, "grad_norm": 1.433383535742693, "learning_rate": 2.7206995062343323e-06, "loss": 0.5104, "step": 21572 }, { "epoch": 0.6611805810959912, "grad_norm": 0.7718158450347843, "learning_rate": 2.720257769056287e-06, "loss": 0.4289, "step": 21573 }, { "epoch": 0.6612112296187324, "grad_norm": 1.752903155322817, "learning_rate": 2.719816054341125e-06, "loss": 0.6026, "step": 21574 }, { "epoch": 0.6612418781414736, "grad_norm": 1.827992795456818, "learning_rate": 2.719374362093195e-06, "loss": 0.6149, "step": 21575 }, { "epoch": 0.6612725266642148, "grad_norm": 2.1059792567354747, "learning_rate": 2.7189326923168567e-06, "loss": 0.6103, "step": 21576 }, { "epoch": 0.661303175186956, "grad_norm": 1.8301729853000004, "learning_rate": 2.7184910450164586e-06, "loss": 0.6011, "step": 21577 }, { "epoch": 0.6613338237096972, "grad_norm": 1.8084727295392298, "learning_rate": 2.7180494201963505e-06, "loss": 0.6056, "step": 21578 }, { "epoch": 0.6613644722324384, "grad_norm": 1.9405263271188797, "learning_rate": 2.7176078178608844e-06, "loss": 0.5901, "step": 21579 }, { "epoch": 0.6613951207551796, "grad_norm": 1.6768347640468002, "learning_rate": 2.7171662380144124e-06, "loss": 0.5753, "step": 21580 }, { "epoch": 0.6614257692779208, "grad_norm": 1.577359222494013, "learning_rate": 2.716724680661288e-06, "loss": 0.5063, "step": 21581 }, { "epoch": 0.661456417800662, "grad_norm": 1.7660016096519895, "learning_rate": 2.7162831458058573e-06, "loss": 0.5905, "step": 21582 }, { "epoch": 0.6614870663234033, "grad_norm": 1.8069809767744862, "learning_rate": 2.7158416334524728e-06, "loss": 0.5882, "step": 21583 }, { "epoch": 0.6615177148461444, "grad_norm": 1.5376269007638905, "learning_rate": 2.7154001436054876e-06, "loss": 0.5646, "step": 21584 }, { "epoch": 0.6615483633688857, "grad_norm": 1.744267026042035, "learning_rate": 2.714958676269249e-06, "loss": 0.6341, "step": 21585 }, { "epoch": 0.6615790118916268, "grad_norm": 1.8934413213415127, "learning_rate": 2.7145172314481037e-06, "loss": 0.5058, "step": 21586 }, { "epoch": 0.661609660414368, "grad_norm": 1.7910909033319193, "learning_rate": 2.714075809146409e-06, "loss": 0.5368, "step": 21587 }, { "epoch": 0.6616403089371092, "grad_norm": 1.9721723985922783, "learning_rate": 2.7136344093685075e-06, "loss": 0.6653, "step": 21588 }, { "epoch": 0.6616709574598504, "grad_norm": 0.8166070576298633, "learning_rate": 2.713193032118754e-06, "loss": 0.4193, "step": 21589 }, { "epoch": 0.6617016059825916, "grad_norm": 1.8908691572356664, "learning_rate": 2.7127516774014915e-06, "loss": 0.6503, "step": 21590 }, { "epoch": 0.6617322545053328, "grad_norm": 1.7341968030479875, "learning_rate": 2.712310345221073e-06, "loss": 0.5548, "step": 21591 }, { "epoch": 0.6617629030280741, "grad_norm": 1.9088976633640542, "learning_rate": 2.711869035581848e-06, "loss": 0.5832, "step": 21592 }, { "epoch": 0.6617935515508152, "grad_norm": 1.807869251415954, "learning_rate": 2.71142774848816e-06, "loss": 0.6015, "step": 21593 }, { "epoch": 0.6618242000735565, "grad_norm": 1.7640407376717484, "learning_rate": 2.71098648394436e-06, "loss": 0.5707, "step": 21594 }, { "epoch": 0.6618548485962976, "grad_norm": 1.679918443092076, "learning_rate": 2.7105452419547982e-06, "loss": 0.594, "step": 21595 }, { "epoch": 0.6618854971190389, "grad_norm": 1.7674715114096493, "learning_rate": 2.7101040225238205e-06, "loss": 0.6144, "step": 21596 }, { "epoch": 0.66191614564178, "grad_norm": 1.711294392817366, "learning_rate": 2.709662825655769e-06, "loss": 0.6497, "step": 21597 }, { "epoch": 0.6619467941645213, "grad_norm": 1.851577763702357, "learning_rate": 2.7092216513549997e-06, "loss": 0.6532, "step": 21598 }, { "epoch": 0.6619774426872624, "grad_norm": 1.7772208448827134, "learning_rate": 2.708780499625854e-06, "loss": 0.5828, "step": 21599 }, { "epoch": 0.6620080912100037, "grad_norm": 1.8914462140150827, "learning_rate": 2.7083393704726824e-06, "loss": 0.6179, "step": 21600 }, { "epoch": 0.6620387397327449, "grad_norm": 1.7427351153103696, "learning_rate": 2.7078982638998265e-06, "loss": 0.5561, "step": 21601 }, { "epoch": 0.6620693882554861, "grad_norm": 2.151466722543075, "learning_rate": 2.7074571799116354e-06, "loss": 0.6452, "step": 21602 }, { "epoch": 0.6621000367782273, "grad_norm": 1.534078966649181, "learning_rate": 2.7070161185124582e-06, "loss": 0.5247, "step": 21603 }, { "epoch": 0.6621306853009685, "grad_norm": 1.9637878099656076, "learning_rate": 2.706575079706636e-06, "loss": 0.5128, "step": 21604 }, { "epoch": 0.6621613338237097, "grad_norm": 1.8838484895535739, "learning_rate": 2.7061340634985155e-06, "loss": 0.6187, "step": 21605 }, { "epoch": 0.6621919823464509, "grad_norm": 1.8510398748768766, "learning_rate": 2.7056930698924457e-06, "loss": 0.6041, "step": 21606 }, { "epoch": 0.6622226308691921, "grad_norm": 1.6898897072136867, "learning_rate": 2.7052520988927666e-06, "loss": 0.6292, "step": 21607 }, { "epoch": 0.6622532793919333, "grad_norm": 1.8187702495234515, "learning_rate": 2.7048111505038253e-06, "loss": 0.6168, "step": 21608 }, { "epoch": 0.6622839279146745, "grad_norm": 1.872935920678388, "learning_rate": 2.7043702247299695e-06, "loss": 0.579, "step": 21609 }, { "epoch": 0.6623145764374158, "grad_norm": 1.7486427836893244, "learning_rate": 2.703929321575539e-06, "loss": 0.6125, "step": 21610 }, { "epoch": 0.6623452249601569, "grad_norm": 1.8473758764409285, "learning_rate": 2.703488441044883e-06, "loss": 0.6151, "step": 21611 }, { "epoch": 0.6623758734828982, "grad_norm": 1.6731164742313955, "learning_rate": 2.7030475831423406e-06, "loss": 0.5821, "step": 21612 }, { "epoch": 0.6624065220056393, "grad_norm": 2.102696926475303, "learning_rate": 2.702606747872258e-06, "loss": 0.6943, "step": 21613 }, { "epoch": 0.6624371705283806, "grad_norm": 1.6602672172630477, "learning_rate": 2.7021659352389814e-06, "loss": 0.541, "step": 21614 }, { "epoch": 0.6624678190511217, "grad_norm": 1.91051320378168, "learning_rate": 2.701725145246849e-06, "loss": 0.633, "step": 21615 }, { "epoch": 0.662498467573863, "grad_norm": 1.8458115516676274, "learning_rate": 2.7012843779002074e-06, "loss": 0.5583, "step": 21616 }, { "epoch": 0.6625291160966041, "grad_norm": 1.823363658466097, "learning_rate": 2.7008436332034004e-06, "loss": 0.6502, "step": 21617 }, { "epoch": 0.6625597646193453, "grad_norm": 1.9841055729389558, "learning_rate": 2.700402911160768e-06, "loss": 0.5889, "step": 21618 }, { "epoch": 0.6625904131420866, "grad_norm": 1.869471807326267, "learning_rate": 2.6999622117766553e-06, "loss": 0.5362, "step": 21619 }, { "epoch": 0.6626210616648277, "grad_norm": 1.5860968793409536, "learning_rate": 2.6995215350554015e-06, "loss": 0.611, "step": 21620 }, { "epoch": 0.662651710187569, "grad_norm": 0.8178459550932387, "learning_rate": 2.699080881001351e-06, "loss": 0.4331, "step": 21621 }, { "epoch": 0.6626823587103101, "grad_norm": 1.815615368066207, "learning_rate": 2.698640249618848e-06, "loss": 0.5572, "step": 21622 }, { "epoch": 0.6627130072330514, "grad_norm": 1.7932215285430104, "learning_rate": 2.6981996409122285e-06, "loss": 0.5937, "step": 21623 }, { "epoch": 0.6627436557557925, "grad_norm": 1.9212330998877178, "learning_rate": 2.697759054885837e-06, "loss": 0.6744, "step": 21624 }, { "epoch": 0.6627743042785338, "grad_norm": 1.7683590531029654, "learning_rate": 2.6973184915440165e-06, "loss": 0.5433, "step": 21625 }, { "epoch": 0.6628049528012749, "grad_norm": 0.8092198841019088, "learning_rate": 2.6968779508911047e-06, "loss": 0.4211, "step": 21626 }, { "epoch": 0.6628356013240162, "grad_norm": 1.7044474508871004, "learning_rate": 2.696437432931443e-06, "loss": 0.5461, "step": 21627 }, { "epoch": 0.6628662498467573, "grad_norm": 1.904394268427711, "learning_rate": 2.695996937669375e-06, "loss": 0.5658, "step": 21628 }, { "epoch": 0.6628968983694986, "grad_norm": 0.8065110920360804, "learning_rate": 2.6955564651092368e-06, "loss": 0.4022, "step": 21629 }, { "epoch": 0.6629275468922398, "grad_norm": 0.7757165727137288, "learning_rate": 2.6951160152553724e-06, "loss": 0.4018, "step": 21630 }, { "epoch": 0.662958195414981, "grad_norm": 2.084307477116131, "learning_rate": 2.694675588112117e-06, "loss": 0.6457, "step": 21631 }, { "epoch": 0.6629888439377222, "grad_norm": 1.8756422597812221, "learning_rate": 2.6942351836838133e-06, "loss": 0.614, "step": 21632 }, { "epoch": 0.6630194924604634, "grad_norm": 1.8387963827598037, "learning_rate": 2.6937948019748024e-06, "loss": 0.6273, "step": 21633 }, { "epoch": 0.6630501409832046, "grad_norm": 0.7968775243392415, "learning_rate": 2.6933544429894192e-06, "loss": 0.4129, "step": 21634 }, { "epoch": 0.6630807895059458, "grad_norm": 1.6352382431184955, "learning_rate": 2.6929141067320052e-06, "loss": 0.5682, "step": 21635 }, { "epoch": 0.663111438028687, "grad_norm": 1.7583134416968789, "learning_rate": 2.6924737932069003e-06, "loss": 0.6026, "step": 21636 }, { "epoch": 0.6631420865514283, "grad_norm": 2.0956389936976887, "learning_rate": 2.6920335024184398e-06, "loss": 0.6252, "step": 21637 }, { "epoch": 0.6631727350741694, "grad_norm": 1.8660405679483485, "learning_rate": 2.691593234370964e-06, "loss": 0.5877, "step": 21638 }, { "epoch": 0.6632033835969107, "grad_norm": 1.694152498488241, "learning_rate": 2.691152989068812e-06, "loss": 0.6515, "step": 21639 }, { "epoch": 0.6632340321196518, "grad_norm": 2.02462098468681, "learning_rate": 2.690712766516319e-06, "loss": 0.5467, "step": 21640 }, { "epoch": 0.6632646806423931, "grad_norm": 0.7972345051753768, "learning_rate": 2.6902725667178254e-06, "loss": 0.4251, "step": 21641 }, { "epoch": 0.6632953291651342, "grad_norm": 1.9979856538721357, "learning_rate": 2.689832389677666e-06, "loss": 0.6669, "step": 21642 }, { "epoch": 0.6633259776878755, "grad_norm": 1.994568325520549, "learning_rate": 2.6893922354001777e-06, "loss": 0.5982, "step": 21643 }, { "epoch": 0.6633566262106166, "grad_norm": 2.064187788516215, "learning_rate": 2.6889521038897022e-06, "loss": 0.618, "step": 21644 }, { "epoch": 0.6633872747333579, "grad_norm": 1.9341538439770365, "learning_rate": 2.68851199515057e-06, "loss": 0.5841, "step": 21645 }, { "epoch": 0.663417923256099, "grad_norm": 1.9577777591089658, "learning_rate": 2.6880719091871212e-06, "loss": 0.7012, "step": 21646 }, { "epoch": 0.6634485717788403, "grad_norm": 0.8153195676144326, "learning_rate": 2.687631846003693e-06, "loss": 0.4488, "step": 21647 }, { "epoch": 0.6634792203015815, "grad_norm": 1.8805587690596408, "learning_rate": 2.6871918056046186e-06, "loss": 0.5761, "step": 21648 }, { "epoch": 0.6635098688243226, "grad_norm": 1.933143106321402, "learning_rate": 2.6867517879942345e-06, "loss": 0.6032, "step": 21649 }, { "epoch": 0.6635405173470639, "grad_norm": 1.820904828154029, "learning_rate": 2.686311793176879e-06, "loss": 0.5834, "step": 21650 }, { "epoch": 0.663571165869805, "grad_norm": 1.6440723339818986, "learning_rate": 2.6858718211568834e-06, "loss": 0.5969, "step": 21651 }, { "epoch": 0.6636018143925463, "grad_norm": 1.7704486572601628, "learning_rate": 2.685431871938587e-06, "loss": 0.5596, "step": 21652 }, { "epoch": 0.6636324629152874, "grad_norm": 1.7386740945273509, "learning_rate": 2.6849919455263183e-06, "loss": 0.5656, "step": 21653 }, { "epoch": 0.6636631114380287, "grad_norm": 1.8068598890474552, "learning_rate": 2.684552041924421e-06, "loss": 0.5627, "step": 21654 }, { "epoch": 0.6636937599607698, "grad_norm": 1.7204466989075942, "learning_rate": 2.6841121611372234e-06, "loss": 0.6029, "step": 21655 }, { "epoch": 0.6637244084835111, "grad_norm": 0.8633375542956515, "learning_rate": 2.6836723031690604e-06, "loss": 0.4055, "step": 21656 }, { "epoch": 0.6637550570062523, "grad_norm": 1.8805030048594964, "learning_rate": 2.6832324680242667e-06, "loss": 0.6158, "step": 21657 }, { "epoch": 0.6637857055289935, "grad_norm": 1.8230181708129678, "learning_rate": 2.682792655707178e-06, "loss": 0.5564, "step": 21658 }, { "epoch": 0.6638163540517347, "grad_norm": 1.738472238125573, "learning_rate": 2.6823528662221245e-06, "loss": 0.5379, "step": 21659 }, { "epoch": 0.6638470025744759, "grad_norm": 1.9338681877105555, "learning_rate": 2.681913099573441e-06, "loss": 0.6523, "step": 21660 }, { "epoch": 0.6638776510972171, "grad_norm": 1.8682449594808752, "learning_rate": 2.6814733557654604e-06, "loss": 0.5535, "step": 21661 }, { "epoch": 0.6639082996199583, "grad_norm": 0.7365945246973161, "learning_rate": 2.6810336348025185e-06, "loss": 0.392, "step": 21662 }, { "epoch": 0.6639389481426995, "grad_norm": 1.7516078418460117, "learning_rate": 2.6805939366889455e-06, "loss": 0.6714, "step": 21663 }, { "epoch": 0.6639695966654408, "grad_norm": 1.9360002296328385, "learning_rate": 2.680154261429072e-06, "loss": 0.5626, "step": 21664 }, { "epoch": 0.6640002451881819, "grad_norm": 1.7326823860343648, "learning_rate": 2.679714609027232e-06, "loss": 0.5761, "step": 21665 }, { "epoch": 0.6640308937109232, "grad_norm": 0.809939101300181, "learning_rate": 2.679274979487759e-06, "loss": 0.4215, "step": 21666 }, { "epoch": 0.6640615422336643, "grad_norm": 0.9068055763840914, "learning_rate": 2.6788353728149826e-06, "loss": 0.4172, "step": 21667 }, { "epoch": 0.6640921907564056, "grad_norm": 1.6858867829001305, "learning_rate": 2.6783957890132344e-06, "loss": 0.6043, "step": 21668 }, { "epoch": 0.6641228392791467, "grad_norm": 1.8805772670336987, "learning_rate": 2.677956228086849e-06, "loss": 0.626, "step": 21669 }, { "epoch": 0.664153487801888, "grad_norm": 0.8020853288577546, "learning_rate": 2.6775166900401527e-06, "loss": 0.4195, "step": 21670 }, { "epoch": 0.6641841363246291, "grad_norm": 1.8758240902496686, "learning_rate": 2.6770771748774806e-06, "loss": 0.5887, "step": 21671 }, { "epoch": 0.6642147848473704, "grad_norm": 0.7726238870653127, "learning_rate": 2.676637682603157e-06, "loss": 0.4327, "step": 21672 }, { "epoch": 0.6642454333701115, "grad_norm": 1.794578958743807, "learning_rate": 2.6761982132215212e-06, "loss": 0.6848, "step": 21673 }, { "epoch": 0.6642760818928528, "grad_norm": 1.5768987340725549, "learning_rate": 2.6757587667368996e-06, "loss": 0.5269, "step": 21674 }, { "epoch": 0.664306730415594, "grad_norm": 0.7660461303784329, "learning_rate": 2.675319343153619e-06, "loss": 0.4149, "step": 21675 }, { "epoch": 0.6643373789383352, "grad_norm": 1.6838327546057696, "learning_rate": 2.674879942476012e-06, "loss": 0.6026, "step": 21676 }, { "epoch": 0.6643680274610764, "grad_norm": 2.0019171874110837, "learning_rate": 2.674440564708409e-06, "loss": 0.7185, "step": 21677 }, { "epoch": 0.6643986759838176, "grad_norm": 1.9212876392479885, "learning_rate": 2.674001209855137e-06, "loss": 0.6536, "step": 21678 }, { "epoch": 0.6644293245065588, "grad_norm": 1.6645068912983156, "learning_rate": 2.673561877920526e-06, "loss": 0.5675, "step": 21679 }, { "epoch": 0.6644599730292999, "grad_norm": 0.7743313832344311, "learning_rate": 2.6731225689089045e-06, "loss": 0.4031, "step": 21680 }, { "epoch": 0.6644906215520412, "grad_norm": 1.7126437807462676, "learning_rate": 2.672683282824604e-06, "loss": 0.5468, "step": 21681 }, { "epoch": 0.6645212700747823, "grad_norm": 2.052475634069661, "learning_rate": 2.6722440196719514e-06, "loss": 0.6648, "step": 21682 }, { "epoch": 0.6645519185975236, "grad_norm": 1.947107079787878, "learning_rate": 2.6718047794552693e-06, "loss": 0.585, "step": 21683 }, { "epoch": 0.6645825671202648, "grad_norm": 1.8241003534839666, "learning_rate": 2.6713655621788944e-06, "loss": 0.64, "step": 21684 }, { "epoch": 0.664613215643006, "grad_norm": 1.909008721842043, "learning_rate": 2.6709263678471504e-06, "loss": 0.6625, "step": 21685 }, { "epoch": 0.6646438641657472, "grad_norm": 2.024580151879502, "learning_rate": 2.670487196464363e-06, "loss": 0.5964, "step": 21686 }, { "epoch": 0.6646745126884884, "grad_norm": 0.7961181381110762, "learning_rate": 2.670048048034861e-06, "loss": 0.3879, "step": 21687 }, { "epoch": 0.6647051612112296, "grad_norm": 1.941036593300389, "learning_rate": 2.6696089225629718e-06, "loss": 0.6132, "step": 21688 }, { "epoch": 0.6647358097339708, "grad_norm": 1.6716496772714313, "learning_rate": 2.6691698200530247e-06, "loss": 0.5282, "step": 21689 }, { "epoch": 0.664766458256712, "grad_norm": 1.7716606481053125, "learning_rate": 2.668730740509341e-06, "loss": 0.5841, "step": 21690 }, { "epoch": 0.6647971067794533, "grad_norm": 1.9130977620842151, "learning_rate": 2.6682916839362504e-06, "loss": 0.5418, "step": 21691 }, { "epoch": 0.6648277553021944, "grad_norm": 1.6975565054679114, "learning_rate": 2.6678526503380795e-06, "loss": 0.5567, "step": 21692 }, { "epoch": 0.6648584038249357, "grad_norm": 1.7522887445081332, "learning_rate": 2.667413639719154e-06, "loss": 0.6256, "step": 21693 }, { "epoch": 0.6648890523476768, "grad_norm": 1.8036362513973183, "learning_rate": 2.666974652083795e-06, "loss": 0.6177, "step": 21694 }, { "epoch": 0.6649197008704181, "grad_norm": 0.7574057368662901, "learning_rate": 2.666535687436335e-06, "loss": 0.4235, "step": 21695 }, { "epoch": 0.6649503493931592, "grad_norm": 0.7555978450114201, "learning_rate": 2.666096745781096e-06, "loss": 0.4021, "step": 21696 }, { "epoch": 0.6649809979159005, "grad_norm": 1.7347818939787671, "learning_rate": 2.665657827122401e-06, "loss": 0.5806, "step": 21697 }, { "epoch": 0.6650116464386416, "grad_norm": 1.8631915815837012, "learning_rate": 2.665218931464577e-06, "loss": 0.6456, "step": 21698 }, { "epoch": 0.6650422949613829, "grad_norm": 2.188907671497736, "learning_rate": 2.6647800588119477e-06, "loss": 0.5929, "step": 21699 }, { "epoch": 0.665072943484124, "grad_norm": 1.7085057026855652, "learning_rate": 2.6643412091688403e-06, "loss": 0.5374, "step": 21700 }, { "epoch": 0.6651035920068653, "grad_norm": 1.688935727055663, "learning_rate": 2.663902382539575e-06, "loss": 0.5882, "step": 21701 }, { "epoch": 0.6651342405296065, "grad_norm": 1.9213591416142175, "learning_rate": 2.6634635789284762e-06, "loss": 0.6884, "step": 21702 }, { "epoch": 0.6651648890523477, "grad_norm": 1.5488800214950245, "learning_rate": 2.6630247983398717e-06, "loss": 0.574, "step": 21703 }, { "epoch": 0.6651955375750889, "grad_norm": 1.8804713634988603, "learning_rate": 2.6625860407780806e-06, "loss": 0.6615, "step": 21704 }, { "epoch": 0.6652261860978301, "grad_norm": 1.7252351954832523, "learning_rate": 2.6621473062474244e-06, "loss": 0.6922, "step": 21705 }, { "epoch": 0.6652568346205713, "grad_norm": 1.9323515566006433, "learning_rate": 2.6617085947522325e-06, "loss": 0.7265, "step": 21706 }, { "epoch": 0.6652874831433125, "grad_norm": 1.7765543914720443, "learning_rate": 2.6612699062968217e-06, "loss": 0.6398, "step": 21707 }, { "epoch": 0.6653181316660537, "grad_norm": 1.843078451481265, "learning_rate": 2.66083124088552e-06, "loss": 0.588, "step": 21708 }, { "epoch": 0.665348780188795, "grad_norm": 1.8296316585814452, "learning_rate": 2.660392598522643e-06, "loss": 0.6319, "step": 21709 }, { "epoch": 0.6653794287115361, "grad_norm": 1.554092191227008, "learning_rate": 2.659953979212517e-06, "loss": 0.5316, "step": 21710 }, { "epoch": 0.6654100772342773, "grad_norm": 1.8009921647759242, "learning_rate": 2.6595153829594654e-06, "loss": 0.5957, "step": 21711 }, { "epoch": 0.6654407257570185, "grad_norm": 1.888468843370118, "learning_rate": 2.659076809767806e-06, "loss": 0.632, "step": 21712 }, { "epoch": 0.6654713742797597, "grad_norm": 1.6361278490794318, "learning_rate": 2.6586382596418615e-06, "loss": 0.5645, "step": 21713 }, { "epoch": 0.6655020228025009, "grad_norm": 1.6634945467425553, "learning_rate": 2.658199732585955e-06, "loss": 0.5205, "step": 21714 }, { "epoch": 0.6655326713252421, "grad_norm": 2.054229920654402, "learning_rate": 2.657761228604404e-06, "loss": 0.5977, "step": 21715 }, { "epoch": 0.6655633198479833, "grad_norm": 1.7806419164648128, "learning_rate": 2.657322747701532e-06, "loss": 0.5371, "step": 21716 }, { "epoch": 0.6655939683707245, "grad_norm": 1.7310081537416167, "learning_rate": 2.656884289881657e-06, "loss": 0.4919, "step": 21717 }, { "epoch": 0.6656246168934657, "grad_norm": 1.8154833368220473, "learning_rate": 2.656445855149101e-06, "loss": 0.5856, "step": 21718 }, { "epoch": 0.6656552654162069, "grad_norm": 1.6990315118360917, "learning_rate": 2.656007443508185e-06, "loss": 0.5531, "step": 21719 }, { "epoch": 0.6656859139389482, "grad_norm": 1.905583549994219, "learning_rate": 2.655569054963226e-06, "loss": 0.554, "step": 21720 }, { "epoch": 0.6657165624616893, "grad_norm": 2.2280663680870045, "learning_rate": 2.6551306895185447e-06, "loss": 0.6471, "step": 21721 }, { "epoch": 0.6657472109844306, "grad_norm": 1.7563903987306289, "learning_rate": 2.6546923471784623e-06, "loss": 0.601, "step": 21722 }, { "epoch": 0.6657778595071717, "grad_norm": 1.713204324020995, "learning_rate": 2.6542540279472974e-06, "loss": 0.5909, "step": 21723 }, { "epoch": 0.665808508029913, "grad_norm": 1.9183831880112527, "learning_rate": 2.653815731829362e-06, "loss": 0.5132, "step": 21724 }, { "epoch": 0.6658391565526541, "grad_norm": 1.685796614810446, "learning_rate": 2.653377458828986e-06, "loss": 0.6043, "step": 21725 }, { "epoch": 0.6658698050753954, "grad_norm": 1.7548034069743326, "learning_rate": 2.6529392089504798e-06, "loss": 0.5351, "step": 21726 }, { "epoch": 0.6659004535981365, "grad_norm": 1.6180287153776525, "learning_rate": 2.6525009821981663e-06, "loss": 0.4864, "step": 21727 }, { "epoch": 0.6659311021208778, "grad_norm": 1.8019773370818044, "learning_rate": 2.6520627785763588e-06, "loss": 0.6416, "step": 21728 }, { "epoch": 0.665961750643619, "grad_norm": 1.8128201080239, "learning_rate": 2.6516245980893775e-06, "loss": 0.596, "step": 21729 }, { "epoch": 0.6659923991663602, "grad_norm": 1.8554715169320604, "learning_rate": 2.6511864407415423e-06, "loss": 0.6265, "step": 21730 }, { "epoch": 0.6660230476891014, "grad_norm": 0.821342903526592, "learning_rate": 2.6507483065371654e-06, "loss": 0.4217, "step": 21731 }, { "epoch": 0.6660536962118426, "grad_norm": 1.8665773494775497, "learning_rate": 2.650310195480567e-06, "loss": 0.6655, "step": 21732 }, { "epoch": 0.6660843447345838, "grad_norm": 1.7327703661364549, "learning_rate": 2.649872107576066e-06, "loss": 0.623, "step": 21733 }, { "epoch": 0.666114993257325, "grad_norm": 1.7633100359157101, "learning_rate": 2.649434042827973e-06, "loss": 0.5559, "step": 21734 }, { "epoch": 0.6661456417800662, "grad_norm": 1.593736573419118, "learning_rate": 2.648996001240609e-06, "loss": 0.5391, "step": 21735 }, { "epoch": 0.6661762903028075, "grad_norm": 1.6146371575368026, "learning_rate": 2.6485579828182908e-06, "loss": 0.5552, "step": 21736 }, { "epoch": 0.6662069388255486, "grad_norm": 1.7173572988773618, "learning_rate": 2.6481199875653296e-06, "loss": 0.5605, "step": 21737 }, { "epoch": 0.6662375873482899, "grad_norm": 0.8108957180134369, "learning_rate": 2.6476820154860467e-06, "loss": 0.426, "step": 21738 }, { "epoch": 0.666268235871031, "grad_norm": 0.7640335582337809, "learning_rate": 2.647244066584753e-06, "loss": 0.3901, "step": 21739 }, { "epoch": 0.6662988843937723, "grad_norm": 1.957197875910531, "learning_rate": 2.6468061408657647e-06, "loss": 0.5633, "step": 21740 }, { "epoch": 0.6663295329165134, "grad_norm": 1.6906889582989744, "learning_rate": 2.6463682383333998e-06, "loss": 0.5972, "step": 21741 }, { "epoch": 0.6663601814392546, "grad_norm": 1.7863786031279867, "learning_rate": 2.6459303589919693e-06, "loss": 0.6072, "step": 21742 }, { "epoch": 0.6663908299619958, "grad_norm": 1.838144092667392, "learning_rate": 2.645492502845789e-06, "loss": 0.5812, "step": 21743 }, { "epoch": 0.666421478484737, "grad_norm": 1.7037443664329464, "learning_rate": 2.645054669899175e-06, "loss": 0.5423, "step": 21744 }, { "epoch": 0.6664521270074782, "grad_norm": 1.979050891703926, "learning_rate": 2.6446168601564387e-06, "loss": 0.6373, "step": 21745 }, { "epoch": 0.6664827755302194, "grad_norm": 1.7134301956952473, "learning_rate": 2.644179073621895e-06, "loss": 0.5204, "step": 21746 }, { "epoch": 0.6665134240529607, "grad_norm": 1.745413324314963, "learning_rate": 2.643741310299859e-06, "loss": 0.5691, "step": 21747 }, { "epoch": 0.6665440725757018, "grad_norm": 1.6724740814933532, "learning_rate": 2.643303570194641e-06, "loss": 0.5327, "step": 21748 }, { "epoch": 0.6665747210984431, "grad_norm": 1.6928458676556177, "learning_rate": 2.6428658533105585e-06, "loss": 0.6383, "step": 21749 }, { "epoch": 0.6666053696211842, "grad_norm": 1.8732388965927906, "learning_rate": 2.64242815965192e-06, "loss": 0.6053, "step": 21750 }, { "epoch": 0.6666360181439255, "grad_norm": 2.040478276772601, "learning_rate": 2.6419904892230386e-06, "loss": 0.6502, "step": 21751 }, { "epoch": 0.6666666666666666, "grad_norm": 1.5549842240925327, "learning_rate": 2.641552842028231e-06, "loss": 0.5904, "step": 21752 }, { "epoch": 0.6666973151894079, "grad_norm": 1.871368125098814, "learning_rate": 2.6411152180718046e-06, "loss": 0.6787, "step": 21753 }, { "epoch": 0.666727963712149, "grad_norm": 2.0451598403161957, "learning_rate": 2.640677617358074e-06, "loss": 0.6187, "step": 21754 }, { "epoch": 0.6667586122348903, "grad_norm": 1.7607358796911818, "learning_rate": 2.6402400398913525e-06, "loss": 0.6366, "step": 21755 }, { "epoch": 0.6667892607576315, "grad_norm": 1.7504035663679092, "learning_rate": 2.6398024856759472e-06, "loss": 0.5115, "step": 21756 }, { "epoch": 0.6668199092803727, "grad_norm": 1.6302040474447985, "learning_rate": 2.639364954716172e-06, "loss": 0.5237, "step": 21757 }, { "epoch": 0.6668505578031139, "grad_norm": 0.7794592297365178, "learning_rate": 2.63892744701634e-06, "loss": 0.4141, "step": 21758 }, { "epoch": 0.6668812063258551, "grad_norm": 1.7474367501346983, "learning_rate": 2.638489962580758e-06, "loss": 0.492, "step": 21759 }, { "epoch": 0.6669118548485963, "grad_norm": 1.9264060695286975, "learning_rate": 2.63805250141374e-06, "loss": 0.6582, "step": 21760 }, { "epoch": 0.6669425033713375, "grad_norm": 1.861286271639303, "learning_rate": 2.6376150635195942e-06, "loss": 0.6946, "step": 21761 }, { "epoch": 0.6669731518940787, "grad_norm": 1.8272180487281189, "learning_rate": 2.6371776489026307e-06, "loss": 0.5504, "step": 21762 }, { "epoch": 0.66700380041682, "grad_norm": 1.877839428177604, "learning_rate": 2.636740257567163e-06, "loss": 0.6151, "step": 21763 }, { "epoch": 0.6670344489395611, "grad_norm": 1.8476097635748956, "learning_rate": 2.636302889517496e-06, "loss": 0.5721, "step": 21764 }, { "epoch": 0.6670650974623024, "grad_norm": 1.9632718238463824, "learning_rate": 2.6358655447579407e-06, "loss": 0.5718, "step": 21765 }, { "epoch": 0.6670957459850435, "grad_norm": 1.584617836492945, "learning_rate": 2.6354282232928098e-06, "loss": 0.6377, "step": 21766 }, { "epoch": 0.6671263945077848, "grad_norm": 1.8927743341584782, "learning_rate": 2.634990925126407e-06, "loss": 0.6333, "step": 21767 }, { "epoch": 0.6671570430305259, "grad_norm": 1.7251728612189572, "learning_rate": 2.6345536502630464e-06, "loss": 0.6124, "step": 21768 }, { "epoch": 0.6671876915532672, "grad_norm": 1.832822899232748, "learning_rate": 2.634116398707032e-06, "loss": 0.6117, "step": 21769 }, { "epoch": 0.6672183400760083, "grad_norm": 1.8599116849979198, "learning_rate": 2.633679170462674e-06, "loss": 0.6882, "step": 21770 }, { "epoch": 0.6672489885987496, "grad_norm": 1.834869324708159, "learning_rate": 2.6332419655342823e-06, "loss": 0.689, "step": 21771 }, { "epoch": 0.6672796371214907, "grad_norm": 1.6259780988094112, "learning_rate": 2.6328047839261608e-06, "loss": 0.6464, "step": 21772 }, { "epoch": 0.6673102856442319, "grad_norm": 2.0771919227968203, "learning_rate": 2.63236762564262e-06, "loss": 0.6885, "step": 21773 }, { "epoch": 0.6673409341669732, "grad_norm": 1.7883089867603832, "learning_rate": 2.6319304906879682e-06, "loss": 0.6111, "step": 21774 }, { "epoch": 0.6673715826897143, "grad_norm": 1.8260899806295239, "learning_rate": 2.63149337906651e-06, "loss": 0.5338, "step": 21775 }, { "epoch": 0.6674022312124556, "grad_norm": 1.7038441569638998, "learning_rate": 2.631056290782553e-06, "loss": 0.6304, "step": 21776 }, { "epoch": 0.6674328797351967, "grad_norm": 0.7998893810166467, "learning_rate": 2.6306192258404062e-06, "loss": 0.4294, "step": 21777 }, { "epoch": 0.667463528257938, "grad_norm": 0.8177583661295383, "learning_rate": 2.6301821842443732e-06, "loss": 0.4084, "step": 21778 }, { "epoch": 0.6674941767806791, "grad_norm": 1.8950189572963978, "learning_rate": 2.6297451659987626e-06, "loss": 0.6155, "step": 21779 }, { "epoch": 0.6675248253034204, "grad_norm": 1.7806663694520393, "learning_rate": 2.629308171107876e-06, "loss": 0.6011, "step": 21780 }, { "epoch": 0.6675554738261615, "grad_norm": 1.6480931667166046, "learning_rate": 2.628871199576026e-06, "loss": 0.6341, "step": 21781 }, { "epoch": 0.6675861223489028, "grad_norm": 0.805074232695288, "learning_rate": 2.6284342514075155e-06, "loss": 0.413, "step": 21782 }, { "epoch": 0.667616770871644, "grad_norm": 1.8806413969001332, "learning_rate": 2.627997326606646e-06, "loss": 0.6129, "step": 21783 }, { "epoch": 0.6676474193943852, "grad_norm": 1.6246263527755371, "learning_rate": 2.6275604251777265e-06, "loss": 0.5344, "step": 21784 }, { "epoch": 0.6676780679171264, "grad_norm": 1.8006047704561625, "learning_rate": 2.6271235471250633e-06, "loss": 0.5977, "step": 21785 }, { "epoch": 0.6677087164398676, "grad_norm": 1.871386147077009, "learning_rate": 2.6266866924529566e-06, "loss": 0.6588, "step": 21786 }, { "epoch": 0.6677393649626088, "grad_norm": 1.80921635434187, "learning_rate": 2.6262498611657134e-06, "loss": 0.5934, "step": 21787 }, { "epoch": 0.66777001348535, "grad_norm": 1.7041452954176963, "learning_rate": 2.625813053267637e-06, "loss": 0.6141, "step": 21788 }, { "epoch": 0.6678006620080912, "grad_norm": 0.7353612311863992, "learning_rate": 2.625376268763035e-06, "loss": 0.3866, "step": 21789 }, { "epoch": 0.6678313105308324, "grad_norm": 2.08789285857896, "learning_rate": 2.6249395076562078e-06, "loss": 0.597, "step": 21790 }, { "epoch": 0.6678619590535736, "grad_norm": 1.6399787949182094, "learning_rate": 2.6245027699514554e-06, "loss": 0.6091, "step": 21791 }, { "epoch": 0.6678926075763149, "grad_norm": 1.972475764159773, "learning_rate": 2.624066055653089e-06, "loss": 0.5898, "step": 21792 }, { "epoch": 0.667923256099056, "grad_norm": 1.6776580357028634, "learning_rate": 2.6236293647654077e-06, "loss": 0.5694, "step": 21793 }, { "epoch": 0.6679539046217973, "grad_norm": 1.6905984833971892, "learning_rate": 2.623192697292712e-06, "loss": 0.6396, "step": 21794 }, { "epoch": 0.6679845531445384, "grad_norm": 1.7303202669769802, "learning_rate": 2.622756053239307e-06, "loss": 0.6049, "step": 21795 }, { "epoch": 0.6680152016672797, "grad_norm": 1.7848460151553387, "learning_rate": 2.6223194326094966e-06, "loss": 0.6592, "step": 21796 }, { "epoch": 0.6680458501900208, "grad_norm": 1.6677995439412883, "learning_rate": 2.621882835407579e-06, "loss": 0.5392, "step": 21797 }, { "epoch": 0.6680764987127621, "grad_norm": 1.7744597141837555, "learning_rate": 2.621446261637859e-06, "loss": 0.5476, "step": 21798 }, { "epoch": 0.6681071472355032, "grad_norm": 1.6811277360610095, "learning_rate": 2.6210097113046373e-06, "loss": 0.6574, "step": 21799 }, { "epoch": 0.6681377957582445, "grad_norm": 0.7990621046180609, "learning_rate": 2.620573184412217e-06, "loss": 0.4151, "step": 21800 }, { "epoch": 0.6681684442809857, "grad_norm": 2.018444046301471, "learning_rate": 2.6201366809648986e-06, "loss": 0.5996, "step": 21801 }, { "epoch": 0.6681990928037269, "grad_norm": 1.8067702412260174, "learning_rate": 2.6197002009669804e-06, "loss": 0.5403, "step": 21802 }, { "epoch": 0.6682297413264681, "grad_norm": 1.6453818571563097, "learning_rate": 2.6192637444227646e-06, "loss": 0.6612, "step": 21803 }, { "epoch": 0.6682603898492092, "grad_norm": 1.7281967892013785, "learning_rate": 2.618827311336555e-06, "loss": 0.5589, "step": 21804 }, { "epoch": 0.6682910383719505, "grad_norm": 1.8425991632283667, "learning_rate": 2.6183909017126462e-06, "loss": 0.6101, "step": 21805 }, { "epoch": 0.6683216868946916, "grad_norm": 1.7951409132608125, "learning_rate": 2.617954515555342e-06, "loss": 0.6326, "step": 21806 }, { "epoch": 0.6683523354174329, "grad_norm": 0.8399534334575518, "learning_rate": 2.6175181528689416e-06, "loss": 0.4165, "step": 21807 }, { "epoch": 0.668382983940174, "grad_norm": 1.8903120801785196, "learning_rate": 2.617081813657746e-06, "loss": 0.5984, "step": 21808 }, { "epoch": 0.6684136324629153, "grad_norm": 1.7194771397245727, "learning_rate": 2.6166454979260525e-06, "loss": 0.5122, "step": 21809 }, { "epoch": 0.6684442809856564, "grad_norm": 1.7805805318115193, "learning_rate": 2.6162092056781573e-06, "loss": 0.6878, "step": 21810 }, { "epoch": 0.6684749295083977, "grad_norm": 1.7507898708541745, "learning_rate": 2.615772936918367e-06, "loss": 0.5477, "step": 21811 }, { "epoch": 0.6685055780311389, "grad_norm": 2.012444596031975, "learning_rate": 2.6153366916509757e-06, "loss": 0.6295, "step": 21812 }, { "epoch": 0.6685362265538801, "grad_norm": 1.527399299042975, "learning_rate": 2.61490046988028e-06, "loss": 0.4838, "step": 21813 }, { "epoch": 0.6685668750766213, "grad_norm": 1.6129780715803586, "learning_rate": 2.6144642716105805e-06, "loss": 0.5908, "step": 21814 }, { "epoch": 0.6685975235993625, "grad_norm": 1.6481019225285227, "learning_rate": 2.6140280968461746e-06, "loss": 0.4674, "step": 21815 }, { "epoch": 0.6686281721221037, "grad_norm": 1.7160441047842128, "learning_rate": 2.613591945591362e-06, "loss": 0.5331, "step": 21816 }, { "epoch": 0.6686588206448449, "grad_norm": 1.637035693289162, "learning_rate": 2.6131558178504375e-06, "loss": 0.5892, "step": 21817 }, { "epoch": 0.6686894691675861, "grad_norm": 1.8870117312135972, "learning_rate": 2.6127197136276987e-06, "loss": 0.5939, "step": 21818 }, { "epoch": 0.6687201176903274, "grad_norm": 1.893732794259613, "learning_rate": 2.612283632927446e-06, "loss": 0.6566, "step": 21819 }, { "epoch": 0.6687507662130685, "grad_norm": 0.8500229698583389, "learning_rate": 2.611847575753973e-06, "loss": 0.4414, "step": 21820 }, { "epoch": 0.6687814147358098, "grad_norm": 1.7188549151383585, "learning_rate": 2.6114115421115727e-06, "loss": 0.6113, "step": 21821 }, { "epoch": 0.6688120632585509, "grad_norm": 1.7717505035948662, "learning_rate": 2.6109755320045505e-06, "loss": 0.5926, "step": 21822 }, { "epoch": 0.6688427117812922, "grad_norm": 1.6584272827822855, "learning_rate": 2.610539545437196e-06, "loss": 0.5433, "step": 21823 }, { "epoch": 0.6688733603040333, "grad_norm": 1.6764426320032495, "learning_rate": 2.6101035824138064e-06, "loss": 0.6461, "step": 21824 }, { "epoch": 0.6689040088267746, "grad_norm": 1.935175299311811, "learning_rate": 2.6096676429386767e-06, "loss": 0.5884, "step": 21825 }, { "epoch": 0.6689346573495157, "grad_norm": 1.7380349723181434, "learning_rate": 2.6092317270161037e-06, "loss": 0.5842, "step": 21826 }, { "epoch": 0.668965305872257, "grad_norm": 1.7748275825446966, "learning_rate": 2.608795834650385e-06, "loss": 0.6192, "step": 21827 }, { "epoch": 0.6689959543949981, "grad_norm": 1.7817083857804843, "learning_rate": 2.6083599658458096e-06, "loss": 0.5559, "step": 21828 }, { "epoch": 0.6690266029177394, "grad_norm": 0.827901503270176, "learning_rate": 2.607924120606676e-06, "loss": 0.4074, "step": 21829 }, { "epoch": 0.6690572514404806, "grad_norm": 1.925401904747045, "learning_rate": 2.6074882989372798e-06, "loss": 0.5586, "step": 21830 }, { "epoch": 0.6690878999632218, "grad_norm": 1.7318397705161772, "learning_rate": 2.6070525008419135e-06, "loss": 0.6408, "step": 21831 }, { "epoch": 0.669118548485963, "grad_norm": 1.6521919525647302, "learning_rate": 2.6066167263248677e-06, "loss": 0.6318, "step": 21832 }, { "epoch": 0.6691491970087042, "grad_norm": 1.7387789478197684, "learning_rate": 2.6061809753904426e-06, "loss": 0.5737, "step": 21833 }, { "epoch": 0.6691798455314454, "grad_norm": 1.8896502934150676, "learning_rate": 2.6057452480429278e-06, "loss": 0.6386, "step": 21834 }, { "epoch": 0.6692104940541865, "grad_norm": 1.635676605922519, "learning_rate": 2.6053095442866196e-06, "loss": 0.549, "step": 21835 }, { "epoch": 0.6692411425769278, "grad_norm": 0.780402807278071, "learning_rate": 2.6048738641258063e-06, "loss": 0.4127, "step": 21836 }, { "epoch": 0.669271791099669, "grad_norm": 1.909412603638179, "learning_rate": 2.6044382075647844e-06, "loss": 0.7037, "step": 21837 }, { "epoch": 0.6693024396224102, "grad_norm": 1.9294529611632103, "learning_rate": 2.604002574607847e-06, "loss": 0.5656, "step": 21838 }, { "epoch": 0.6693330881451514, "grad_norm": 1.9236070174800846, "learning_rate": 2.6035669652592843e-06, "loss": 0.6855, "step": 21839 }, { "epoch": 0.6693637366678926, "grad_norm": 0.7775235343556633, "learning_rate": 2.6031313795233894e-06, "loss": 0.4173, "step": 21840 }, { "epoch": 0.6693943851906338, "grad_norm": 1.697687543109491, "learning_rate": 2.6026958174044557e-06, "loss": 0.6751, "step": 21841 }, { "epoch": 0.669425033713375, "grad_norm": 1.7718614995535147, "learning_rate": 2.602260278906772e-06, "loss": 0.5478, "step": 21842 }, { "epoch": 0.6694556822361162, "grad_norm": 1.683796511346792, "learning_rate": 2.6018247640346304e-06, "loss": 0.5541, "step": 21843 }, { "epoch": 0.6694863307588574, "grad_norm": 1.7458918316275982, "learning_rate": 2.601389272792326e-06, "loss": 0.568, "step": 21844 }, { "epoch": 0.6695169792815986, "grad_norm": 1.8848401908204522, "learning_rate": 2.6009538051841443e-06, "loss": 0.6253, "step": 21845 }, { "epoch": 0.6695476278043399, "grad_norm": 1.99323362386098, "learning_rate": 2.60051836121438e-06, "loss": 0.5272, "step": 21846 }, { "epoch": 0.669578276327081, "grad_norm": 1.6746610818631618, "learning_rate": 2.600082940887321e-06, "loss": 0.5897, "step": 21847 }, { "epoch": 0.6696089248498223, "grad_norm": 1.7545188385779595, "learning_rate": 2.599647544207259e-06, "loss": 0.6836, "step": 21848 }, { "epoch": 0.6696395733725634, "grad_norm": 1.7382290682547827, "learning_rate": 2.5992121711784858e-06, "loss": 0.6021, "step": 21849 }, { "epoch": 0.6696702218953047, "grad_norm": 1.6388480240304144, "learning_rate": 2.5987768218052866e-06, "loss": 0.5864, "step": 21850 }, { "epoch": 0.6697008704180458, "grad_norm": 1.6734256398329512, "learning_rate": 2.5983414960919547e-06, "loss": 0.7049, "step": 21851 }, { "epoch": 0.6697315189407871, "grad_norm": 1.8380596193014391, "learning_rate": 2.5979061940427798e-06, "loss": 0.6003, "step": 21852 }, { "epoch": 0.6697621674635282, "grad_norm": 1.8536572759849543, "learning_rate": 2.5974709156620483e-06, "loss": 0.6103, "step": 21853 }, { "epoch": 0.6697928159862695, "grad_norm": 1.737591864590427, "learning_rate": 2.5970356609540522e-06, "loss": 0.5831, "step": 21854 }, { "epoch": 0.6698234645090106, "grad_norm": 1.8558413165782046, "learning_rate": 2.596600429923076e-06, "loss": 0.6445, "step": 21855 }, { "epoch": 0.6698541130317519, "grad_norm": 2.329877392365582, "learning_rate": 2.5961652225734126e-06, "loss": 0.6347, "step": 21856 }, { "epoch": 0.6698847615544931, "grad_norm": 0.763769490040948, "learning_rate": 2.5957300389093486e-06, "loss": 0.3804, "step": 21857 }, { "epoch": 0.6699154100772343, "grad_norm": 0.8363257762531874, "learning_rate": 2.5952948789351708e-06, "loss": 0.3996, "step": 21858 }, { "epoch": 0.6699460585999755, "grad_norm": 1.5591184673541134, "learning_rate": 2.594859742655167e-06, "loss": 0.608, "step": 21859 }, { "epoch": 0.6699767071227167, "grad_norm": 1.8001354598110497, "learning_rate": 2.5944246300736274e-06, "loss": 0.5626, "step": 21860 }, { "epoch": 0.6700073556454579, "grad_norm": 1.82355604551239, "learning_rate": 2.5939895411948355e-06, "loss": 0.598, "step": 21861 }, { "epoch": 0.6700380041681991, "grad_norm": 1.8772228697212776, "learning_rate": 2.5935544760230813e-06, "loss": 0.5242, "step": 21862 }, { "epoch": 0.6700686526909403, "grad_norm": 1.6714449870754007, "learning_rate": 2.5931194345626516e-06, "loss": 0.6165, "step": 21863 }, { "epoch": 0.6700993012136816, "grad_norm": 1.8712671482616585, "learning_rate": 2.59268441681783e-06, "loss": 0.6431, "step": 21864 }, { "epoch": 0.6701299497364227, "grad_norm": 1.8077887011046494, "learning_rate": 2.592249422792907e-06, "loss": 0.4935, "step": 21865 }, { "epoch": 0.6701605982591639, "grad_norm": 1.8427850973652644, "learning_rate": 2.591814452492164e-06, "loss": 0.6259, "step": 21866 }, { "epoch": 0.6701912467819051, "grad_norm": 0.7784150780545899, "learning_rate": 2.59137950591989e-06, "loss": 0.4192, "step": 21867 }, { "epoch": 0.6702218953046463, "grad_norm": 1.7822494361266183, "learning_rate": 2.590944583080372e-06, "loss": 0.6342, "step": 21868 }, { "epoch": 0.6702525438273875, "grad_norm": 1.7051453919181327, "learning_rate": 2.5905096839778907e-06, "loss": 0.5928, "step": 21869 }, { "epoch": 0.6702831923501287, "grad_norm": 1.8105975139628954, "learning_rate": 2.590074808616735e-06, "loss": 0.6218, "step": 21870 }, { "epoch": 0.6703138408728699, "grad_norm": 1.7948005109708136, "learning_rate": 2.58963995700119e-06, "loss": 0.6274, "step": 21871 }, { "epoch": 0.6703444893956111, "grad_norm": 0.7944895090556647, "learning_rate": 2.589205129135538e-06, "loss": 0.4159, "step": 21872 }, { "epoch": 0.6703751379183523, "grad_norm": 1.9204830522649756, "learning_rate": 2.5887703250240637e-06, "loss": 0.5743, "step": 21873 }, { "epoch": 0.6704057864410935, "grad_norm": 2.233207877301456, "learning_rate": 2.5883355446710547e-06, "loss": 0.6179, "step": 21874 }, { "epoch": 0.6704364349638348, "grad_norm": 1.757075605423191, "learning_rate": 2.587900788080791e-06, "loss": 0.6366, "step": 21875 }, { "epoch": 0.6704670834865759, "grad_norm": 1.453824452833262, "learning_rate": 2.58746605525756e-06, "loss": 0.5107, "step": 21876 }, { "epoch": 0.6704977320093172, "grad_norm": 0.7613078623405779, "learning_rate": 2.5870313462056405e-06, "loss": 0.4088, "step": 21877 }, { "epoch": 0.6705283805320583, "grad_norm": 1.651804812997721, "learning_rate": 2.5865966609293193e-06, "loss": 0.4854, "step": 21878 }, { "epoch": 0.6705590290547996, "grad_norm": 1.910364528452746, "learning_rate": 2.5861619994328802e-06, "loss": 0.6608, "step": 21879 }, { "epoch": 0.6705896775775407, "grad_norm": 1.7281681620808704, "learning_rate": 2.5857273617206024e-06, "loss": 0.5249, "step": 21880 }, { "epoch": 0.670620326100282, "grad_norm": 1.581519082963641, "learning_rate": 2.5852927477967714e-06, "loss": 0.6222, "step": 21881 }, { "epoch": 0.6706509746230231, "grad_norm": 1.9336889877163839, "learning_rate": 2.5848581576656707e-06, "loss": 0.6034, "step": 21882 }, { "epoch": 0.6706816231457644, "grad_norm": 1.8672289971073974, "learning_rate": 2.5844235913315773e-06, "loss": 0.5579, "step": 21883 }, { "epoch": 0.6707122716685056, "grad_norm": 1.5387985580541126, "learning_rate": 2.5839890487987773e-06, "loss": 0.5583, "step": 21884 }, { "epoch": 0.6707429201912468, "grad_norm": 1.9670665497685231, "learning_rate": 2.5835545300715537e-06, "loss": 0.6838, "step": 21885 }, { "epoch": 0.670773568713988, "grad_norm": 1.8308636123118651, "learning_rate": 2.583120035154183e-06, "loss": 0.6435, "step": 21886 }, { "epoch": 0.6708042172367292, "grad_norm": 1.750697408993787, "learning_rate": 2.5826855640509507e-06, "loss": 0.605, "step": 21887 }, { "epoch": 0.6708348657594704, "grad_norm": 1.9030078499879495, "learning_rate": 2.5822511167661328e-06, "loss": 0.6535, "step": 21888 }, { "epoch": 0.6708655142822116, "grad_norm": 1.8803665114584154, "learning_rate": 2.581816693304017e-06, "loss": 0.6222, "step": 21889 }, { "epoch": 0.6708961628049528, "grad_norm": 1.9743882446630543, "learning_rate": 2.58138229366888e-06, "loss": 0.6108, "step": 21890 }, { "epoch": 0.670926811327694, "grad_norm": 1.9457874564996942, "learning_rate": 2.580947917865e-06, "loss": 0.561, "step": 21891 }, { "epoch": 0.6709574598504352, "grad_norm": 0.8115251864002397, "learning_rate": 2.580513565896659e-06, "loss": 0.4273, "step": 21892 }, { "epoch": 0.6709881083731765, "grad_norm": 0.7654166954656801, "learning_rate": 2.5800792377681386e-06, "loss": 0.4032, "step": 21893 }, { "epoch": 0.6710187568959176, "grad_norm": 2.000911911452428, "learning_rate": 2.579644933483715e-06, "loss": 0.6619, "step": 21894 }, { "epoch": 0.6710494054186589, "grad_norm": 1.6832380698945404, "learning_rate": 2.5792106530476695e-06, "loss": 0.558, "step": 21895 }, { "epoch": 0.6710800539414, "grad_norm": 1.6686782540391996, "learning_rate": 2.578776396464281e-06, "loss": 0.6301, "step": 21896 }, { "epoch": 0.6711107024641412, "grad_norm": 0.8366396883628605, "learning_rate": 2.5783421637378293e-06, "loss": 0.434, "step": 21897 }, { "epoch": 0.6711413509868824, "grad_norm": 1.8714111287381499, "learning_rate": 2.5779079548725923e-06, "loss": 0.5622, "step": 21898 }, { "epoch": 0.6711719995096236, "grad_norm": 1.6037916617880479, "learning_rate": 2.5774737698728458e-06, "loss": 0.5571, "step": 21899 }, { "epoch": 0.6712026480323648, "grad_norm": 3.645745604572711, "learning_rate": 2.57703960874287e-06, "loss": 0.6288, "step": 21900 }, { "epoch": 0.671233296555106, "grad_norm": 1.9545345209624816, "learning_rate": 2.576605471486945e-06, "loss": 0.6484, "step": 21901 }, { "epoch": 0.6712639450778473, "grad_norm": 1.6276567273471303, "learning_rate": 2.5761713581093444e-06, "loss": 0.5062, "step": 21902 }, { "epoch": 0.6712945936005884, "grad_norm": 1.6065327537768939, "learning_rate": 2.5757372686143478e-06, "loss": 0.4489, "step": 21903 }, { "epoch": 0.6713252421233297, "grad_norm": 0.7984993989342111, "learning_rate": 2.5753032030062337e-06, "loss": 0.4155, "step": 21904 }, { "epoch": 0.6713558906460708, "grad_norm": 1.8221992718575355, "learning_rate": 2.5748691612892757e-06, "loss": 0.5992, "step": 21905 }, { "epoch": 0.6713865391688121, "grad_norm": 1.8647864344174885, "learning_rate": 2.5744351434677544e-06, "loss": 0.5969, "step": 21906 }, { "epoch": 0.6714171876915532, "grad_norm": 1.9217019290394433, "learning_rate": 2.5740011495459403e-06, "loss": 0.6361, "step": 21907 }, { "epoch": 0.6714478362142945, "grad_norm": 2.012290958673959, "learning_rate": 2.5735671795281177e-06, "loss": 0.6455, "step": 21908 }, { "epoch": 0.6714784847370356, "grad_norm": 1.7381381027309362, "learning_rate": 2.5731332334185577e-06, "loss": 0.5262, "step": 21909 }, { "epoch": 0.6715091332597769, "grad_norm": 1.8029549867903272, "learning_rate": 2.572699311221536e-06, "loss": 0.5802, "step": 21910 }, { "epoch": 0.671539781782518, "grad_norm": 1.8351874621120834, "learning_rate": 2.5722654129413283e-06, "loss": 0.5704, "step": 21911 }, { "epoch": 0.6715704303052593, "grad_norm": 1.8284005645106853, "learning_rate": 2.571831538582213e-06, "loss": 0.5624, "step": 21912 }, { "epoch": 0.6716010788280005, "grad_norm": 1.54251728377942, "learning_rate": 2.5713976881484605e-06, "loss": 0.6091, "step": 21913 }, { "epoch": 0.6716317273507417, "grad_norm": 1.9981018748090298, "learning_rate": 2.5709638616443483e-06, "loss": 0.6748, "step": 21914 }, { "epoch": 0.6716623758734829, "grad_norm": 1.8924053741133615, "learning_rate": 2.570530059074151e-06, "loss": 0.5713, "step": 21915 }, { "epoch": 0.6716930243962241, "grad_norm": 1.7168698797398514, "learning_rate": 2.570096280442144e-06, "loss": 0.495, "step": 21916 }, { "epoch": 0.6717236729189653, "grad_norm": 1.860782264003547, "learning_rate": 2.5696625257526e-06, "loss": 0.6243, "step": 21917 }, { "epoch": 0.6717543214417065, "grad_norm": 1.8420742739547686, "learning_rate": 2.5692287950097894e-06, "loss": 0.5588, "step": 21918 }, { "epoch": 0.6717849699644477, "grad_norm": 1.6570962263445632, "learning_rate": 2.5687950882179935e-06, "loss": 0.6347, "step": 21919 }, { "epoch": 0.671815618487189, "grad_norm": 1.614873060272562, "learning_rate": 2.568361405381481e-06, "loss": 0.5794, "step": 21920 }, { "epoch": 0.6718462670099301, "grad_norm": 2.212505760566257, "learning_rate": 2.567927746504524e-06, "loss": 0.6895, "step": 21921 }, { "epoch": 0.6718769155326714, "grad_norm": 1.7268893555945721, "learning_rate": 2.5674941115913975e-06, "loss": 0.5638, "step": 21922 }, { "epoch": 0.6719075640554125, "grad_norm": 1.7601334237996766, "learning_rate": 2.567060500646373e-06, "loss": 0.5723, "step": 21923 }, { "epoch": 0.6719382125781538, "grad_norm": 1.7266141926152652, "learning_rate": 2.5666269136737277e-06, "loss": 0.4938, "step": 21924 }, { "epoch": 0.6719688611008949, "grad_norm": 1.9226254990942597, "learning_rate": 2.5661933506777266e-06, "loss": 0.6764, "step": 21925 }, { "epoch": 0.6719995096236362, "grad_norm": 1.8141679737465424, "learning_rate": 2.5657598116626454e-06, "loss": 0.6433, "step": 21926 }, { "epoch": 0.6720301581463773, "grad_norm": 0.8086088084452044, "learning_rate": 2.5653262966327572e-06, "loss": 0.4317, "step": 21927 }, { "epoch": 0.6720608066691185, "grad_norm": 1.7959608925773034, "learning_rate": 2.564892805592333e-06, "loss": 0.601, "step": 21928 }, { "epoch": 0.6720914551918598, "grad_norm": 1.5622466208524484, "learning_rate": 2.5644593385456386e-06, "loss": 0.5499, "step": 21929 }, { "epoch": 0.6721221037146009, "grad_norm": 1.731772284566023, "learning_rate": 2.5640258954969533e-06, "loss": 0.6798, "step": 21930 }, { "epoch": 0.6721527522373422, "grad_norm": 2.296413594512218, "learning_rate": 2.5635924764505437e-06, "loss": 0.5822, "step": 21931 }, { "epoch": 0.6721834007600833, "grad_norm": 0.8034605355480093, "learning_rate": 2.5631590814106793e-06, "loss": 0.4248, "step": 21932 }, { "epoch": 0.6722140492828246, "grad_norm": 1.6969182952929858, "learning_rate": 2.5627257103816315e-06, "loss": 0.6081, "step": 21933 }, { "epoch": 0.6722446978055657, "grad_norm": 1.8445635089485406, "learning_rate": 2.5622923633676715e-06, "loss": 0.5847, "step": 21934 }, { "epoch": 0.672275346328307, "grad_norm": 1.7179222575493207, "learning_rate": 2.5618590403730702e-06, "loss": 0.6071, "step": 21935 }, { "epoch": 0.6723059948510481, "grad_norm": 2.0032999469554795, "learning_rate": 2.5614257414020936e-06, "loss": 0.5755, "step": 21936 }, { "epoch": 0.6723366433737894, "grad_norm": 2.0283694070231837, "learning_rate": 2.5609924664590136e-06, "loss": 0.6743, "step": 21937 }, { "epoch": 0.6723672918965306, "grad_norm": 2.0668126375988045, "learning_rate": 2.5605592155481007e-06, "loss": 0.6338, "step": 21938 }, { "epoch": 0.6723979404192718, "grad_norm": 1.762075339836358, "learning_rate": 2.5601259886736217e-06, "loss": 0.6147, "step": 21939 }, { "epoch": 0.672428588942013, "grad_norm": 1.568636138283126, "learning_rate": 2.559692785839842e-06, "loss": 0.6027, "step": 21940 }, { "epoch": 0.6724592374647542, "grad_norm": 1.8362376182781641, "learning_rate": 2.5592596070510375e-06, "loss": 0.6439, "step": 21941 }, { "epoch": 0.6724898859874954, "grad_norm": 1.8905281181163054, "learning_rate": 2.5588264523114703e-06, "loss": 0.622, "step": 21942 }, { "epoch": 0.6725205345102366, "grad_norm": 1.6738712440667407, "learning_rate": 2.5583933216254133e-06, "loss": 0.6024, "step": 21943 }, { "epoch": 0.6725511830329778, "grad_norm": 1.6723485446675543, "learning_rate": 2.5579602149971282e-06, "loss": 0.6113, "step": 21944 }, { "epoch": 0.672581831555719, "grad_norm": 2.0493299496644752, "learning_rate": 2.5575271324308876e-06, "loss": 0.6983, "step": 21945 }, { "epoch": 0.6726124800784602, "grad_norm": 0.8417890240295134, "learning_rate": 2.557094073930958e-06, "loss": 0.4407, "step": 21946 }, { "epoch": 0.6726431286012015, "grad_norm": 0.8206764915655789, "learning_rate": 2.5566610395016047e-06, "loss": 0.4167, "step": 21947 }, { "epoch": 0.6726737771239426, "grad_norm": 2.2465667462565646, "learning_rate": 2.556228029147094e-06, "loss": 0.7125, "step": 21948 }, { "epoch": 0.6727044256466839, "grad_norm": 1.9427167534310872, "learning_rate": 2.555795042871696e-06, "loss": 0.6277, "step": 21949 }, { "epoch": 0.672735074169425, "grad_norm": 1.7112264239765542, "learning_rate": 2.555362080679675e-06, "loss": 0.6309, "step": 21950 }, { "epoch": 0.6727657226921663, "grad_norm": 0.8581160025634121, "learning_rate": 2.5549291425752954e-06, "loss": 0.4173, "step": 21951 }, { "epoch": 0.6727963712149074, "grad_norm": 1.9581992368919272, "learning_rate": 2.5544962285628243e-06, "loss": 0.6703, "step": 21952 }, { "epoch": 0.6728270197376487, "grad_norm": 1.8823653278945431, "learning_rate": 2.5540633386465276e-06, "loss": 0.5881, "step": 21953 }, { "epoch": 0.6728576682603898, "grad_norm": 1.6890734662174618, "learning_rate": 2.5536304728306725e-06, "loss": 0.5574, "step": 21954 }, { "epoch": 0.6728883167831311, "grad_norm": 1.8283079434683398, "learning_rate": 2.5531976311195205e-06, "loss": 0.5617, "step": 21955 }, { "epoch": 0.6729189653058723, "grad_norm": 1.7165327548866605, "learning_rate": 2.5527648135173377e-06, "loss": 0.5926, "step": 21956 }, { "epoch": 0.6729496138286135, "grad_norm": 1.5935656336942148, "learning_rate": 2.552332020028392e-06, "loss": 0.5066, "step": 21957 }, { "epoch": 0.6729802623513547, "grad_norm": 1.923294886522952, "learning_rate": 2.5518992506569453e-06, "loss": 0.6412, "step": 21958 }, { "epoch": 0.6730109108740958, "grad_norm": 1.8470455412374873, "learning_rate": 2.5514665054072572e-06, "loss": 0.6086, "step": 21959 }, { "epoch": 0.6730415593968371, "grad_norm": 0.8396268736377983, "learning_rate": 2.5510337842835997e-06, "loss": 0.4249, "step": 21960 }, { "epoch": 0.6730722079195782, "grad_norm": 1.917951873290679, "learning_rate": 2.550601087290232e-06, "loss": 0.6001, "step": 21961 }, { "epoch": 0.6731028564423195, "grad_norm": 1.9054428031318809, "learning_rate": 2.55016841443142e-06, "loss": 0.666, "step": 21962 }, { "epoch": 0.6731335049650606, "grad_norm": 0.7940558485948119, "learning_rate": 2.549735765711423e-06, "loss": 0.4101, "step": 21963 }, { "epoch": 0.6731641534878019, "grad_norm": 1.8395948706401002, "learning_rate": 2.549303141134507e-06, "loss": 0.5742, "step": 21964 }, { "epoch": 0.673194802010543, "grad_norm": 1.7751141692246515, "learning_rate": 2.5488705407049353e-06, "loss": 0.558, "step": 21965 }, { "epoch": 0.6732254505332843, "grad_norm": 1.7869892564363152, "learning_rate": 2.5484379644269687e-06, "loss": 0.535, "step": 21966 }, { "epoch": 0.6732560990560255, "grad_norm": 1.9732129368334572, "learning_rate": 2.5480054123048693e-06, "loss": 0.6254, "step": 21967 }, { "epoch": 0.6732867475787667, "grad_norm": 1.8208703598368792, "learning_rate": 2.5475728843429017e-06, "loss": 0.6145, "step": 21968 }, { "epoch": 0.6733173961015079, "grad_norm": 0.8907257950865834, "learning_rate": 2.547140380545324e-06, "loss": 0.4225, "step": 21969 }, { "epoch": 0.6733480446242491, "grad_norm": 0.7630576650292407, "learning_rate": 2.5467079009164e-06, "loss": 0.415, "step": 21970 }, { "epoch": 0.6733786931469903, "grad_norm": 1.8956995868263384, "learning_rate": 2.5462754454603927e-06, "loss": 0.5791, "step": 21971 }, { "epoch": 0.6734093416697315, "grad_norm": 1.9890094127808922, "learning_rate": 2.545843014181559e-06, "loss": 0.6573, "step": 21972 }, { "epoch": 0.6734399901924727, "grad_norm": 1.746363059300152, "learning_rate": 2.5454106070841644e-06, "loss": 0.661, "step": 21973 }, { "epoch": 0.673470638715214, "grad_norm": 2.0227253345738725, "learning_rate": 2.544978224172465e-06, "loss": 0.6704, "step": 21974 }, { "epoch": 0.6735012872379551, "grad_norm": 1.934998485217486, "learning_rate": 2.544545865450724e-06, "loss": 0.6374, "step": 21975 }, { "epoch": 0.6735319357606964, "grad_norm": 1.880762513322935, "learning_rate": 2.544113530923201e-06, "loss": 0.583, "step": 21976 }, { "epoch": 0.6735625842834375, "grad_norm": 1.7564375148150078, "learning_rate": 2.543681220594155e-06, "loss": 0.626, "step": 21977 }, { "epoch": 0.6735932328061788, "grad_norm": 1.6981162803711463, "learning_rate": 2.5432489344678467e-06, "loss": 0.6203, "step": 21978 }, { "epoch": 0.6736238813289199, "grad_norm": 1.7867794161544253, "learning_rate": 2.5428166725485372e-06, "loss": 0.4911, "step": 21979 }, { "epoch": 0.6736545298516612, "grad_norm": 1.743619820273211, "learning_rate": 2.5423844348404812e-06, "loss": 0.6741, "step": 21980 }, { "epoch": 0.6736851783744023, "grad_norm": 1.7023725847891213, "learning_rate": 2.54195222134794e-06, "loss": 0.5049, "step": 21981 }, { "epoch": 0.6737158268971436, "grad_norm": 1.7807619432402915, "learning_rate": 2.5415200320751754e-06, "loss": 0.4861, "step": 21982 }, { "epoch": 0.6737464754198847, "grad_norm": 1.7909344957933433, "learning_rate": 2.54108786702644e-06, "loss": 0.6492, "step": 21983 }, { "epoch": 0.673777123942626, "grad_norm": 2.008849518874982, "learning_rate": 2.5406557262059973e-06, "loss": 0.5535, "step": 21984 }, { "epoch": 0.6738077724653672, "grad_norm": 2.067839332877294, "learning_rate": 2.540223609618101e-06, "loss": 0.7079, "step": 21985 }, { "epoch": 0.6738384209881084, "grad_norm": 1.8518073417622969, "learning_rate": 2.5397915172670105e-06, "loss": 0.6253, "step": 21986 }, { "epoch": 0.6738690695108496, "grad_norm": 1.7019415602676187, "learning_rate": 2.539359449156986e-06, "loss": 0.6252, "step": 21987 }, { "epoch": 0.6738997180335908, "grad_norm": 0.8067789477080879, "learning_rate": 2.5389274052922807e-06, "loss": 0.4148, "step": 21988 }, { "epoch": 0.673930366556332, "grad_norm": 0.7716236386776563, "learning_rate": 2.5384953856771533e-06, "loss": 0.3981, "step": 21989 }, { "epoch": 0.6739610150790731, "grad_norm": 1.9458136032018638, "learning_rate": 2.5380633903158623e-06, "loss": 0.6361, "step": 21990 }, { "epoch": 0.6739916636018144, "grad_norm": 1.798462749379286, "learning_rate": 2.537631419212661e-06, "loss": 0.5658, "step": 21991 }, { "epoch": 0.6740223121245555, "grad_norm": 1.668874527365498, "learning_rate": 2.5371994723718075e-06, "loss": 0.4943, "step": 21992 }, { "epoch": 0.6740529606472968, "grad_norm": 1.6243519823658343, "learning_rate": 2.536767549797559e-06, "loss": 0.5593, "step": 21993 }, { "epoch": 0.674083609170038, "grad_norm": 1.6357927505825443, "learning_rate": 2.5363356514941684e-06, "loss": 0.5116, "step": 21994 }, { "epoch": 0.6741142576927792, "grad_norm": 1.667509655954575, "learning_rate": 2.535903777465895e-06, "loss": 0.5375, "step": 21995 }, { "epoch": 0.6741449062155204, "grad_norm": 1.928533001696845, "learning_rate": 2.5354719277169906e-06, "loss": 0.6367, "step": 21996 }, { "epoch": 0.6741755547382616, "grad_norm": 1.6458476407630978, "learning_rate": 2.5350401022517114e-06, "loss": 0.5612, "step": 21997 }, { "epoch": 0.6742062032610028, "grad_norm": 1.7248888283210804, "learning_rate": 2.534608301074315e-06, "loss": 0.5647, "step": 21998 }, { "epoch": 0.674236851783744, "grad_norm": 1.833608815682671, "learning_rate": 2.5341765241890516e-06, "loss": 0.6231, "step": 21999 }, { "epoch": 0.6742675003064852, "grad_norm": 2.056935215892492, "learning_rate": 2.5337447716001773e-06, "loss": 0.6201, "step": 22000 }, { "epoch": 0.6742981488292265, "grad_norm": 0.7726058123581973, "learning_rate": 2.5333130433119495e-06, "loss": 0.4003, "step": 22001 }, { "epoch": 0.6743287973519676, "grad_norm": 1.744829798403336, "learning_rate": 2.532881339328617e-06, "loss": 0.6156, "step": 22002 }, { "epoch": 0.6743594458747089, "grad_norm": 1.768267581573039, "learning_rate": 2.5324496596544383e-06, "loss": 0.5585, "step": 22003 }, { "epoch": 0.67439009439745, "grad_norm": 1.7981711123019792, "learning_rate": 2.5320180042936627e-06, "loss": 0.5524, "step": 22004 }, { "epoch": 0.6744207429201913, "grad_norm": 1.9201505753591572, "learning_rate": 2.531586373250544e-06, "loss": 0.6634, "step": 22005 }, { "epoch": 0.6744513914429324, "grad_norm": 1.7953128252414206, "learning_rate": 2.5311547665293397e-06, "loss": 0.626, "step": 22006 }, { "epoch": 0.6744820399656737, "grad_norm": 1.69380549520526, "learning_rate": 2.5307231841342962e-06, "loss": 0.5719, "step": 22007 }, { "epoch": 0.6745126884884148, "grad_norm": 2.047989368468826, "learning_rate": 2.5302916260696698e-06, "loss": 0.5965, "step": 22008 }, { "epoch": 0.6745433370111561, "grad_norm": 0.7756901564407175, "learning_rate": 2.5298600923397133e-06, "loss": 0.4064, "step": 22009 }, { "epoch": 0.6745739855338972, "grad_norm": 1.6977713746422718, "learning_rate": 2.529428582948675e-06, "loss": 0.5605, "step": 22010 }, { "epoch": 0.6746046340566385, "grad_norm": 1.9214024580164872, "learning_rate": 2.52899709790081e-06, "loss": 0.6069, "step": 22011 }, { "epoch": 0.6746352825793797, "grad_norm": 1.771880994702041, "learning_rate": 2.52856563720037e-06, "loss": 0.5991, "step": 22012 }, { "epoch": 0.6746659311021209, "grad_norm": 1.6042754789718547, "learning_rate": 2.5281342008516035e-06, "loss": 0.5938, "step": 22013 }, { "epoch": 0.6746965796248621, "grad_norm": 1.783096978249992, "learning_rate": 2.527702788858765e-06, "loss": 0.6362, "step": 22014 }, { "epoch": 0.6747272281476033, "grad_norm": 1.5443983258809009, "learning_rate": 2.5272714012260996e-06, "loss": 0.6128, "step": 22015 }, { "epoch": 0.6747578766703445, "grad_norm": 1.9207471583709572, "learning_rate": 2.5268400379578663e-06, "loss": 0.5684, "step": 22016 }, { "epoch": 0.6747885251930857, "grad_norm": 1.640156251600711, "learning_rate": 2.5264086990583097e-06, "loss": 0.5838, "step": 22017 }, { "epoch": 0.6748191737158269, "grad_norm": 1.7537072965391685, "learning_rate": 2.5259773845316798e-06, "loss": 0.6502, "step": 22018 }, { "epoch": 0.6748498222385682, "grad_norm": 1.9163771678006485, "learning_rate": 2.5255460943822273e-06, "loss": 0.5698, "step": 22019 }, { "epoch": 0.6748804707613093, "grad_norm": 1.7984643514693546, "learning_rate": 2.5251148286142045e-06, "loss": 0.6137, "step": 22020 }, { "epoch": 0.6749111192840505, "grad_norm": 2.118959282846533, "learning_rate": 2.524683587231857e-06, "loss": 0.6293, "step": 22021 }, { "epoch": 0.6749417678067917, "grad_norm": 1.8396650050394208, "learning_rate": 2.524252370239435e-06, "loss": 0.6162, "step": 22022 }, { "epoch": 0.6749724163295329, "grad_norm": 1.818390254814153, "learning_rate": 2.523821177641188e-06, "loss": 0.56, "step": 22023 }, { "epoch": 0.6750030648522741, "grad_norm": 1.71975946452933, "learning_rate": 2.5233900094413668e-06, "loss": 0.5728, "step": 22024 }, { "epoch": 0.6750337133750153, "grad_norm": 1.816062358246703, "learning_rate": 2.522958865644217e-06, "loss": 0.685, "step": 22025 }, { "epoch": 0.6750643618977565, "grad_norm": 1.86497256057978, "learning_rate": 2.5225277462539833e-06, "loss": 0.5896, "step": 22026 }, { "epoch": 0.6750950104204977, "grad_norm": 2.1859079521898273, "learning_rate": 2.5220966512749213e-06, "loss": 0.6117, "step": 22027 }, { "epoch": 0.675125658943239, "grad_norm": 0.7942845018760125, "learning_rate": 2.5216655807112756e-06, "loss": 0.3988, "step": 22028 }, { "epoch": 0.6751563074659801, "grad_norm": 1.8026373983130357, "learning_rate": 2.521234534567291e-06, "loss": 0.678, "step": 22029 }, { "epoch": 0.6751869559887214, "grad_norm": 1.9113389016904545, "learning_rate": 2.5208035128472164e-06, "loss": 0.6249, "step": 22030 }, { "epoch": 0.6752176045114625, "grad_norm": 1.8482889574189352, "learning_rate": 2.5203725155553012e-06, "loss": 0.5571, "step": 22031 }, { "epoch": 0.6752482530342038, "grad_norm": 1.744212614608669, "learning_rate": 2.519941542695788e-06, "loss": 0.6495, "step": 22032 }, { "epoch": 0.6752789015569449, "grad_norm": 1.6013121594049038, "learning_rate": 2.5195105942729257e-06, "loss": 0.5642, "step": 22033 }, { "epoch": 0.6753095500796862, "grad_norm": 1.8442364199279158, "learning_rate": 2.51907967029096e-06, "loss": 0.6535, "step": 22034 }, { "epoch": 0.6753401986024273, "grad_norm": 1.8236808295678821, "learning_rate": 2.5186487707541384e-06, "loss": 0.521, "step": 22035 }, { "epoch": 0.6753708471251686, "grad_norm": 0.7855488195542449, "learning_rate": 2.5182178956667057e-06, "loss": 0.4026, "step": 22036 }, { "epoch": 0.6754014956479097, "grad_norm": 2.152508678634075, "learning_rate": 2.517787045032904e-06, "loss": 0.6398, "step": 22037 }, { "epoch": 0.675432144170651, "grad_norm": 2.238977441445674, "learning_rate": 2.5173562188569843e-06, "loss": 0.6481, "step": 22038 }, { "epoch": 0.6754627926933922, "grad_norm": 1.5775328215614073, "learning_rate": 2.5169254171431903e-06, "loss": 0.5647, "step": 22039 }, { "epoch": 0.6754934412161334, "grad_norm": 1.8381236889399983, "learning_rate": 2.5164946398957624e-06, "loss": 0.5756, "step": 22040 }, { "epoch": 0.6755240897388746, "grad_norm": 1.8259173256394408, "learning_rate": 2.516063887118949e-06, "loss": 0.5604, "step": 22041 }, { "epoch": 0.6755547382616158, "grad_norm": 1.9378104216900172, "learning_rate": 2.5156331588169937e-06, "loss": 0.7405, "step": 22042 }, { "epoch": 0.675585386784357, "grad_norm": 1.8142434507990528, "learning_rate": 2.5152024549941417e-06, "loss": 0.609, "step": 22043 }, { "epoch": 0.6756160353070982, "grad_norm": 1.7103094278164852, "learning_rate": 2.5147717756546343e-06, "loss": 0.5777, "step": 22044 }, { "epoch": 0.6756466838298394, "grad_norm": 0.7488709324031041, "learning_rate": 2.5143411208027165e-06, "loss": 0.3906, "step": 22045 }, { "epoch": 0.6756773323525807, "grad_norm": 1.727000553374951, "learning_rate": 2.513910490442633e-06, "loss": 0.5723, "step": 22046 }, { "epoch": 0.6757079808753218, "grad_norm": 1.7380795915869947, "learning_rate": 2.513479884578626e-06, "loss": 0.5012, "step": 22047 }, { "epoch": 0.6757386293980631, "grad_norm": 1.781741889434329, "learning_rate": 2.513049303214936e-06, "loss": 0.6193, "step": 22048 }, { "epoch": 0.6757692779208042, "grad_norm": 2.0001254274557287, "learning_rate": 2.512618746355807e-06, "loss": 0.6248, "step": 22049 }, { "epoch": 0.6757999264435455, "grad_norm": 1.6088558313734234, "learning_rate": 2.5121882140054834e-06, "loss": 0.5171, "step": 22050 }, { "epoch": 0.6758305749662866, "grad_norm": 1.6921988619909871, "learning_rate": 2.5117577061682063e-06, "loss": 0.5618, "step": 22051 }, { "epoch": 0.6758612234890278, "grad_norm": 0.8090760005167188, "learning_rate": 2.5113272228482157e-06, "loss": 0.398, "step": 22052 }, { "epoch": 0.675891872011769, "grad_norm": 1.8660226683915262, "learning_rate": 2.5108967640497544e-06, "loss": 0.6747, "step": 22053 }, { "epoch": 0.6759225205345102, "grad_norm": 1.9559821458287452, "learning_rate": 2.5104663297770664e-06, "loss": 0.7632, "step": 22054 }, { "epoch": 0.6759531690572514, "grad_norm": 1.8428902615537972, "learning_rate": 2.5100359200343903e-06, "loss": 0.6684, "step": 22055 }, { "epoch": 0.6759838175799926, "grad_norm": 1.7959299149241832, "learning_rate": 2.509605534825964e-06, "loss": 0.5788, "step": 22056 }, { "epoch": 0.6760144661027339, "grad_norm": 1.9109250883684143, "learning_rate": 2.5091751741560353e-06, "loss": 0.6477, "step": 22057 }, { "epoch": 0.676045114625475, "grad_norm": 1.7365681467417304, "learning_rate": 2.508744838028841e-06, "loss": 0.5427, "step": 22058 }, { "epoch": 0.6760757631482163, "grad_norm": 2.6837703758205858, "learning_rate": 2.5083145264486193e-06, "loss": 0.6179, "step": 22059 }, { "epoch": 0.6761064116709574, "grad_norm": 1.6799421456503187, "learning_rate": 2.507884239419612e-06, "loss": 0.5791, "step": 22060 }, { "epoch": 0.6761370601936987, "grad_norm": 1.689407016518336, "learning_rate": 2.5074539769460588e-06, "loss": 0.5889, "step": 22061 }, { "epoch": 0.6761677087164398, "grad_norm": 1.9320180968067533, "learning_rate": 2.507023739032201e-06, "loss": 0.5798, "step": 22062 }, { "epoch": 0.6761983572391811, "grad_norm": 1.9984276282087916, "learning_rate": 2.506593525682275e-06, "loss": 0.7045, "step": 22063 }, { "epoch": 0.6762290057619222, "grad_norm": 1.8898638717275371, "learning_rate": 2.50616333690052e-06, "loss": 0.5904, "step": 22064 }, { "epoch": 0.6762596542846635, "grad_norm": 1.8660846505929718, "learning_rate": 2.505733172691178e-06, "loss": 0.598, "step": 22065 }, { "epoch": 0.6762903028074047, "grad_norm": 1.868771955267229, "learning_rate": 2.5053030330584858e-06, "loss": 0.6468, "step": 22066 }, { "epoch": 0.6763209513301459, "grad_norm": 1.9824600795829737, "learning_rate": 2.5048729180066765e-06, "loss": 0.5818, "step": 22067 }, { "epoch": 0.6763515998528871, "grad_norm": 1.7375657057958829, "learning_rate": 2.5044428275399968e-06, "loss": 0.5795, "step": 22068 }, { "epoch": 0.6763822483756283, "grad_norm": 0.8060976843638097, "learning_rate": 2.5040127616626784e-06, "loss": 0.3988, "step": 22069 }, { "epoch": 0.6764128968983695, "grad_norm": 1.7626286079486162, "learning_rate": 2.503582720378964e-06, "loss": 0.6005, "step": 22070 }, { "epoch": 0.6764435454211107, "grad_norm": 1.9860987371420806, "learning_rate": 2.503152703693085e-06, "loss": 0.6833, "step": 22071 }, { "epoch": 0.6764741939438519, "grad_norm": 1.7534994614931465, "learning_rate": 2.5027227116092806e-06, "loss": 0.6217, "step": 22072 }, { "epoch": 0.6765048424665931, "grad_norm": 0.7816517765231794, "learning_rate": 2.5022927441317912e-06, "loss": 0.4233, "step": 22073 }, { "epoch": 0.6765354909893343, "grad_norm": 1.6894639128356337, "learning_rate": 2.501862801264848e-06, "loss": 0.5551, "step": 22074 }, { "epoch": 0.6765661395120756, "grad_norm": 1.7753436380034229, "learning_rate": 2.50143288301269e-06, "loss": 0.6699, "step": 22075 }, { "epoch": 0.6765967880348167, "grad_norm": 2.0099948673788375, "learning_rate": 2.5010029893795546e-06, "loss": 0.6368, "step": 22076 }, { "epoch": 0.676627436557558, "grad_norm": 0.8019653405595906, "learning_rate": 2.500573120369675e-06, "loss": 0.4247, "step": 22077 }, { "epoch": 0.6766580850802991, "grad_norm": 1.747480086691618, "learning_rate": 2.5001432759872867e-06, "loss": 0.5674, "step": 22078 }, { "epoch": 0.6766887336030404, "grad_norm": 1.8679074043556319, "learning_rate": 2.4997134562366293e-06, "loss": 0.6528, "step": 22079 }, { "epoch": 0.6767193821257815, "grad_norm": 1.5584687130582588, "learning_rate": 2.499283661121933e-06, "loss": 0.5563, "step": 22080 }, { "epoch": 0.6767500306485228, "grad_norm": 1.5223239879881023, "learning_rate": 2.4988538906474357e-06, "loss": 0.5508, "step": 22081 }, { "epoch": 0.6767806791712639, "grad_norm": 0.7779617398240641, "learning_rate": 2.4984241448173703e-06, "loss": 0.4025, "step": 22082 }, { "epoch": 0.6768113276940051, "grad_norm": 0.8173260351741337, "learning_rate": 2.497994423635971e-06, "loss": 0.4093, "step": 22083 }, { "epoch": 0.6768419762167464, "grad_norm": 1.7163915218027708, "learning_rate": 2.497564727107475e-06, "loss": 0.5568, "step": 22084 }, { "epoch": 0.6768726247394875, "grad_norm": 1.887957864404645, "learning_rate": 2.4971350552361117e-06, "loss": 0.6688, "step": 22085 }, { "epoch": 0.6769032732622288, "grad_norm": 0.8432043998399672, "learning_rate": 2.4967054080261173e-06, "loss": 0.3969, "step": 22086 }, { "epoch": 0.6769339217849699, "grad_norm": 0.7648287428436569, "learning_rate": 2.496275785481727e-06, "loss": 0.4099, "step": 22087 }, { "epoch": 0.6769645703077112, "grad_norm": 1.752907445709292, "learning_rate": 2.4958461876071704e-06, "loss": 0.5929, "step": 22088 }, { "epoch": 0.6769952188304523, "grad_norm": 1.7999728621026339, "learning_rate": 2.4954166144066815e-06, "loss": 0.6159, "step": 22089 }, { "epoch": 0.6770258673531936, "grad_norm": 1.7493690404375586, "learning_rate": 2.4949870658844953e-06, "loss": 0.6099, "step": 22090 }, { "epoch": 0.6770565158759347, "grad_norm": 0.7669097274596365, "learning_rate": 2.4945575420448417e-06, "loss": 0.3879, "step": 22091 }, { "epoch": 0.677087164398676, "grad_norm": 2.014728729997503, "learning_rate": 2.4941280428919543e-06, "loss": 0.5879, "step": 22092 }, { "epoch": 0.6771178129214172, "grad_norm": 1.7060726266757602, "learning_rate": 2.493698568430063e-06, "loss": 0.6367, "step": 22093 }, { "epoch": 0.6771484614441584, "grad_norm": 1.6189614110320956, "learning_rate": 2.493269118663401e-06, "loss": 0.6123, "step": 22094 }, { "epoch": 0.6771791099668996, "grad_norm": 1.6321311016761795, "learning_rate": 2.492839693596202e-06, "loss": 0.5136, "step": 22095 }, { "epoch": 0.6772097584896408, "grad_norm": 1.632080681059797, "learning_rate": 2.4924102932326927e-06, "loss": 0.6574, "step": 22096 }, { "epoch": 0.677240407012382, "grad_norm": 1.9015499063103012, "learning_rate": 2.4919809175771066e-06, "loss": 0.6719, "step": 22097 }, { "epoch": 0.6772710555351232, "grad_norm": 1.908982424571469, "learning_rate": 2.4915515666336758e-06, "loss": 0.6364, "step": 22098 }, { "epoch": 0.6773017040578644, "grad_norm": 1.783508269838168, "learning_rate": 2.4911222404066274e-06, "loss": 0.5161, "step": 22099 }, { "epoch": 0.6773323525806056, "grad_norm": 1.8623668903991484, "learning_rate": 2.4906929389001954e-06, "loss": 0.6528, "step": 22100 }, { "epoch": 0.6773630011033468, "grad_norm": 1.7167873136875864, "learning_rate": 2.490263662118606e-06, "loss": 0.557, "step": 22101 }, { "epoch": 0.6773936496260881, "grad_norm": 1.8382905570412085, "learning_rate": 2.489834410066091e-06, "loss": 0.636, "step": 22102 }, { "epoch": 0.6774242981488292, "grad_norm": 1.8862979669580024, "learning_rate": 2.4894051827468817e-06, "loss": 0.6665, "step": 22103 }, { "epoch": 0.6774549466715705, "grad_norm": 1.9609740972579988, "learning_rate": 2.4889759801652035e-06, "loss": 0.6797, "step": 22104 }, { "epoch": 0.6774855951943116, "grad_norm": 1.8368681250083, "learning_rate": 2.488546802325288e-06, "loss": 0.6336, "step": 22105 }, { "epoch": 0.6775162437170529, "grad_norm": 1.7470984772480713, "learning_rate": 2.488117649231365e-06, "loss": 0.5296, "step": 22106 }, { "epoch": 0.677546892239794, "grad_norm": 1.7362799265591538, "learning_rate": 2.487688520887659e-06, "loss": 0.571, "step": 22107 }, { "epoch": 0.6775775407625353, "grad_norm": 1.7540845193846548, "learning_rate": 2.487259417298401e-06, "loss": 0.6543, "step": 22108 }, { "epoch": 0.6776081892852764, "grad_norm": 0.8371459219132994, "learning_rate": 2.4868303384678216e-06, "loss": 0.4256, "step": 22109 }, { "epoch": 0.6776388378080177, "grad_norm": 0.8115067220318264, "learning_rate": 2.4864012844001435e-06, "loss": 0.3952, "step": 22110 }, { "epoch": 0.6776694863307589, "grad_norm": 1.5158694276749647, "learning_rate": 2.4859722550995978e-06, "loss": 0.5191, "step": 22111 }, { "epoch": 0.6777001348535001, "grad_norm": 1.9880012088998145, "learning_rate": 2.4855432505704095e-06, "loss": 0.5315, "step": 22112 }, { "epoch": 0.6777307833762413, "grad_norm": 1.868485396358371, "learning_rate": 2.4851142708168075e-06, "loss": 0.7636, "step": 22113 }, { "epoch": 0.6777614318989824, "grad_norm": 1.8907334968607514, "learning_rate": 2.484685315843019e-06, "loss": 0.5232, "step": 22114 }, { "epoch": 0.6777920804217237, "grad_norm": 1.7485869927768716, "learning_rate": 2.484256385653268e-06, "loss": 0.6323, "step": 22115 }, { "epoch": 0.6778227289444648, "grad_norm": 1.7298194336127577, "learning_rate": 2.483827480251783e-06, "loss": 0.5326, "step": 22116 }, { "epoch": 0.6778533774672061, "grad_norm": 1.6576609121666575, "learning_rate": 2.483398599642791e-06, "loss": 0.5979, "step": 22117 }, { "epoch": 0.6778840259899472, "grad_norm": 2.066107630132374, "learning_rate": 2.482969743830515e-06, "loss": 0.6512, "step": 22118 }, { "epoch": 0.6779146745126885, "grad_norm": 1.8733261701378106, "learning_rate": 2.4825409128191818e-06, "loss": 0.5617, "step": 22119 }, { "epoch": 0.6779453230354296, "grad_norm": 0.8162664724014713, "learning_rate": 2.482112106613019e-06, "loss": 0.4314, "step": 22120 }, { "epoch": 0.6779759715581709, "grad_norm": 1.6904906918916192, "learning_rate": 2.4816833252162482e-06, "loss": 0.5716, "step": 22121 }, { "epoch": 0.6780066200809121, "grad_norm": 1.7598058837512092, "learning_rate": 2.4812545686330976e-06, "loss": 0.5503, "step": 22122 }, { "epoch": 0.6780372686036533, "grad_norm": 1.74460951512267, "learning_rate": 2.4808258368677863e-06, "loss": 0.5931, "step": 22123 }, { "epoch": 0.6780679171263945, "grad_norm": 0.7987961618432551, "learning_rate": 2.4803971299245467e-06, "loss": 0.4463, "step": 22124 }, { "epoch": 0.6780985656491357, "grad_norm": 2.0578070309257295, "learning_rate": 2.4799684478075985e-06, "loss": 0.6937, "step": 22125 }, { "epoch": 0.6781292141718769, "grad_norm": 1.724140416593605, "learning_rate": 2.4795397905211644e-06, "loss": 0.5566, "step": 22126 }, { "epoch": 0.6781598626946181, "grad_norm": 1.6569814584316498, "learning_rate": 2.479111158069469e-06, "loss": 0.6147, "step": 22127 }, { "epoch": 0.6781905112173593, "grad_norm": 1.8652322923203428, "learning_rate": 2.478682550456739e-06, "loss": 0.5568, "step": 22128 }, { "epoch": 0.6782211597401006, "grad_norm": 1.7562727199283639, "learning_rate": 2.4782539676871932e-06, "loss": 0.6106, "step": 22129 }, { "epoch": 0.6782518082628417, "grad_norm": 1.9674329733800457, "learning_rate": 2.477825409765056e-06, "loss": 0.6464, "step": 22130 }, { "epoch": 0.678282456785583, "grad_norm": 1.7609015193463387, "learning_rate": 2.4773968766945516e-06, "loss": 0.4894, "step": 22131 }, { "epoch": 0.6783131053083241, "grad_norm": 1.8381741950187307, "learning_rate": 2.4769683684799003e-06, "loss": 0.6387, "step": 22132 }, { "epoch": 0.6783437538310654, "grad_norm": 1.8447949878205432, "learning_rate": 2.4765398851253262e-06, "loss": 0.6503, "step": 22133 }, { "epoch": 0.6783744023538065, "grad_norm": 2.0512527477399907, "learning_rate": 2.4761114266350473e-06, "loss": 0.5745, "step": 22134 }, { "epoch": 0.6784050508765478, "grad_norm": 0.7728596933506172, "learning_rate": 2.4756829930132915e-06, "loss": 0.4137, "step": 22135 }, { "epoch": 0.6784356993992889, "grad_norm": 1.6678879999512009, "learning_rate": 2.475254584264277e-06, "loss": 0.5677, "step": 22136 }, { "epoch": 0.6784663479220302, "grad_norm": 1.9777058977560575, "learning_rate": 2.4748262003922234e-06, "loss": 0.5545, "step": 22137 }, { "epoch": 0.6784969964447713, "grad_norm": 1.7026074488158234, "learning_rate": 2.4743978414013535e-06, "loss": 0.603, "step": 22138 }, { "epoch": 0.6785276449675126, "grad_norm": 1.6801830897177548, "learning_rate": 2.4739695072958898e-06, "loss": 0.6452, "step": 22139 }, { "epoch": 0.6785582934902538, "grad_norm": 1.8703800312876238, "learning_rate": 2.4735411980800483e-06, "loss": 0.5528, "step": 22140 }, { "epoch": 0.678588942012995, "grad_norm": 1.7392470008773173, "learning_rate": 2.4731129137580524e-06, "loss": 0.6357, "step": 22141 }, { "epoch": 0.6786195905357362, "grad_norm": 1.8981226545623282, "learning_rate": 2.4726846543341215e-06, "loss": 0.5771, "step": 22142 }, { "epoch": 0.6786502390584774, "grad_norm": 1.8702221508595274, "learning_rate": 2.472256419812477e-06, "loss": 0.5195, "step": 22143 }, { "epoch": 0.6786808875812186, "grad_norm": 1.8064998316176055, "learning_rate": 2.471828210197337e-06, "loss": 0.5495, "step": 22144 }, { "epoch": 0.6787115361039597, "grad_norm": 1.8772201800343025, "learning_rate": 2.4714000254929183e-06, "loss": 0.5648, "step": 22145 }, { "epoch": 0.678742184626701, "grad_norm": 1.9992299959960729, "learning_rate": 2.470971865703442e-06, "loss": 0.5965, "step": 22146 }, { "epoch": 0.6787728331494421, "grad_norm": 1.8047285711506706, "learning_rate": 2.4705437308331292e-06, "loss": 0.5655, "step": 22147 }, { "epoch": 0.6788034816721834, "grad_norm": 1.5659417653963477, "learning_rate": 2.4701156208861944e-06, "loss": 0.5697, "step": 22148 }, { "epoch": 0.6788341301949246, "grad_norm": 1.8171345159650765, "learning_rate": 2.469687535866858e-06, "loss": 0.6165, "step": 22149 }, { "epoch": 0.6788647787176658, "grad_norm": 1.8767404531931196, "learning_rate": 2.469259475779337e-06, "loss": 0.5543, "step": 22150 }, { "epoch": 0.678895427240407, "grad_norm": 0.7693886146977718, "learning_rate": 2.468831440627852e-06, "loss": 0.4028, "step": 22151 }, { "epoch": 0.6789260757631482, "grad_norm": 1.9425997138046938, "learning_rate": 2.468403430416618e-06, "loss": 0.7137, "step": 22152 }, { "epoch": 0.6789567242858894, "grad_norm": 1.889810633354062, "learning_rate": 2.46797544514985e-06, "loss": 0.6084, "step": 22153 }, { "epoch": 0.6789873728086306, "grad_norm": 1.8735649544367616, "learning_rate": 2.467547484831771e-06, "loss": 0.5964, "step": 22154 }, { "epoch": 0.6790180213313718, "grad_norm": 1.7670100799629056, "learning_rate": 2.4671195494665946e-06, "loss": 0.6537, "step": 22155 }, { "epoch": 0.679048669854113, "grad_norm": 1.8914525558118886, "learning_rate": 2.4666916390585354e-06, "loss": 0.5405, "step": 22156 }, { "epoch": 0.6790793183768542, "grad_norm": 0.7858442734599289, "learning_rate": 2.4662637536118116e-06, "loss": 0.4219, "step": 22157 }, { "epoch": 0.6791099668995955, "grad_norm": 1.7408161181308224, "learning_rate": 2.4658358931306415e-06, "loss": 0.5804, "step": 22158 }, { "epoch": 0.6791406154223366, "grad_norm": 1.6513006548860312, "learning_rate": 2.465408057619237e-06, "loss": 0.5612, "step": 22159 }, { "epoch": 0.6791712639450779, "grad_norm": 1.8863853910613464, "learning_rate": 2.4649802470818146e-06, "loss": 0.4851, "step": 22160 }, { "epoch": 0.679201912467819, "grad_norm": 1.8346147307218903, "learning_rate": 2.464552461522591e-06, "loss": 0.5619, "step": 22161 }, { "epoch": 0.6792325609905603, "grad_norm": 1.9031294327252037, "learning_rate": 2.4641247009457827e-06, "loss": 0.6813, "step": 22162 }, { "epoch": 0.6792632095133014, "grad_norm": 2.217629267967749, "learning_rate": 2.463696965355602e-06, "loss": 0.608, "step": 22163 }, { "epoch": 0.6792938580360427, "grad_norm": 1.923058910538259, "learning_rate": 2.463269254756261e-06, "loss": 0.6651, "step": 22164 }, { "epoch": 0.6793245065587838, "grad_norm": 1.69221645485473, "learning_rate": 2.4628415691519804e-06, "loss": 0.6327, "step": 22165 }, { "epoch": 0.6793551550815251, "grad_norm": 1.874401608547081, "learning_rate": 2.462413908546971e-06, "loss": 0.6427, "step": 22166 }, { "epoch": 0.6793858036042663, "grad_norm": 1.905702482927558, "learning_rate": 2.4619862729454447e-06, "loss": 0.7194, "step": 22167 }, { "epoch": 0.6794164521270075, "grad_norm": 1.8816313688511732, "learning_rate": 2.4615586623516174e-06, "loss": 0.6297, "step": 22168 }, { "epoch": 0.6794471006497487, "grad_norm": 0.8022574955674248, "learning_rate": 2.4611310767697015e-06, "loss": 0.4402, "step": 22169 }, { "epoch": 0.6794777491724899, "grad_norm": 1.7754856880850667, "learning_rate": 2.4607035162039135e-06, "loss": 0.6289, "step": 22170 }, { "epoch": 0.6795083976952311, "grad_norm": 1.6635511302103165, "learning_rate": 2.460275980658461e-06, "loss": 0.4871, "step": 22171 }, { "epoch": 0.6795390462179723, "grad_norm": 1.9718407802621662, "learning_rate": 2.459848470137559e-06, "loss": 0.6062, "step": 22172 }, { "epoch": 0.6795696947407135, "grad_norm": 0.7934360335861526, "learning_rate": 2.459420984645422e-06, "loss": 0.42, "step": 22173 }, { "epoch": 0.6796003432634548, "grad_norm": 1.9232770217701098, "learning_rate": 2.45899352418626e-06, "loss": 0.6292, "step": 22174 }, { "epoch": 0.6796309917861959, "grad_norm": 1.7733673900068936, "learning_rate": 2.458566088764281e-06, "loss": 0.5646, "step": 22175 }, { "epoch": 0.679661640308937, "grad_norm": 1.738369382794623, "learning_rate": 2.458138678383705e-06, "loss": 0.5543, "step": 22176 }, { "epoch": 0.6796922888316783, "grad_norm": 2.0731817479149104, "learning_rate": 2.457711293048736e-06, "loss": 0.6398, "step": 22177 }, { "epoch": 0.6797229373544195, "grad_norm": 1.6565864114388784, "learning_rate": 2.4572839327635904e-06, "loss": 0.5056, "step": 22178 }, { "epoch": 0.6797535858771607, "grad_norm": 1.734399557264204, "learning_rate": 2.4568565975324755e-06, "loss": 0.5991, "step": 22179 }, { "epoch": 0.6797842343999019, "grad_norm": 1.790018878186368, "learning_rate": 2.456429287359603e-06, "loss": 0.5684, "step": 22180 }, { "epoch": 0.6798148829226431, "grad_norm": 1.844725449838903, "learning_rate": 2.456002002249185e-06, "loss": 0.6354, "step": 22181 }, { "epoch": 0.6798455314453843, "grad_norm": 0.7957103545131919, "learning_rate": 2.4555747422054287e-06, "loss": 0.4133, "step": 22182 }, { "epoch": 0.6798761799681255, "grad_norm": 1.8553679596765407, "learning_rate": 2.4551475072325453e-06, "loss": 0.6316, "step": 22183 }, { "epoch": 0.6799068284908667, "grad_norm": 1.9003486393871305, "learning_rate": 2.454720297334747e-06, "loss": 0.5024, "step": 22184 }, { "epoch": 0.679937477013608, "grad_norm": 1.8460047279337624, "learning_rate": 2.45429311251624e-06, "loss": 0.5515, "step": 22185 }, { "epoch": 0.6799681255363491, "grad_norm": 1.7374583750125874, "learning_rate": 2.453865952781231e-06, "loss": 0.5526, "step": 22186 }, { "epoch": 0.6799987740590904, "grad_norm": 2.015970552868417, "learning_rate": 2.453438818133936e-06, "loss": 0.6712, "step": 22187 }, { "epoch": 0.6800294225818315, "grad_norm": 1.638159502543959, "learning_rate": 2.4530117085785576e-06, "loss": 0.5866, "step": 22188 }, { "epoch": 0.6800600711045728, "grad_norm": 1.7931041542761101, "learning_rate": 2.452584624119309e-06, "loss": 0.5403, "step": 22189 }, { "epoch": 0.6800907196273139, "grad_norm": 1.6268489743831958, "learning_rate": 2.4521575647603936e-06, "loss": 0.5284, "step": 22190 }, { "epoch": 0.6801213681500552, "grad_norm": 1.8987654282219573, "learning_rate": 2.451730530506022e-06, "loss": 0.671, "step": 22191 }, { "epoch": 0.6801520166727963, "grad_norm": 2.128792108919214, "learning_rate": 2.451303521360403e-06, "loss": 0.5668, "step": 22192 }, { "epoch": 0.6801826651955376, "grad_norm": 1.8819524192426453, "learning_rate": 2.4508765373277412e-06, "loss": 0.689, "step": 22193 }, { "epoch": 0.6802133137182788, "grad_norm": 1.8171934257178568, "learning_rate": 2.450449578412244e-06, "loss": 0.5777, "step": 22194 }, { "epoch": 0.68024396224102, "grad_norm": 1.898806861425785, "learning_rate": 2.4500226446181217e-06, "loss": 0.6558, "step": 22195 }, { "epoch": 0.6802746107637612, "grad_norm": 1.7621078338129077, "learning_rate": 2.4495957359495774e-06, "loss": 0.5027, "step": 22196 }, { "epoch": 0.6803052592865024, "grad_norm": 1.6972569643945594, "learning_rate": 2.449168852410821e-06, "loss": 0.5983, "step": 22197 }, { "epoch": 0.6803359078092436, "grad_norm": 1.8395556146026724, "learning_rate": 2.4487419940060538e-06, "loss": 0.6523, "step": 22198 }, { "epoch": 0.6803665563319848, "grad_norm": 0.8011180085235149, "learning_rate": 2.448315160739485e-06, "loss": 0.4189, "step": 22199 }, { "epoch": 0.680397204854726, "grad_norm": 1.7942916027602, "learning_rate": 2.447888352615321e-06, "loss": 0.5953, "step": 22200 }, { "epoch": 0.6804278533774673, "grad_norm": 1.467756757501893, "learning_rate": 2.447461569637765e-06, "loss": 0.6225, "step": 22201 }, { "epoch": 0.6804585019002084, "grad_norm": 1.89217818181874, "learning_rate": 2.447034811811023e-06, "loss": 0.6725, "step": 22202 }, { "epoch": 0.6804891504229497, "grad_norm": 1.6851482656104524, "learning_rate": 2.446608079139302e-06, "loss": 0.5535, "step": 22203 }, { "epoch": 0.6805197989456908, "grad_norm": 1.7687199485048901, "learning_rate": 2.446181371626803e-06, "loss": 0.5385, "step": 22204 }, { "epoch": 0.6805504474684321, "grad_norm": 1.701336599163847, "learning_rate": 2.445754689277732e-06, "loss": 0.6224, "step": 22205 }, { "epoch": 0.6805810959911732, "grad_norm": 1.676134041568258, "learning_rate": 2.4453280320962964e-06, "loss": 0.5972, "step": 22206 }, { "epoch": 0.6806117445139144, "grad_norm": 0.7614474829017344, "learning_rate": 2.4449014000866948e-06, "loss": 0.4265, "step": 22207 }, { "epoch": 0.6806423930366556, "grad_norm": 1.884995930807032, "learning_rate": 2.4444747932531354e-06, "loss": 0.6409, "step": 22208 }, { "epoch": 0.6806730415593968, "grad_norm": 1.7821299870731342, "learning_rate": 2.4440482115998182e-06, "loss": 0.6095, "step": 22209 }, { "epoch": 0.680703690082138, "grad_norm": 1.9894505833824903, "learning_rate": 2.443621655130947e-06, "loss": 0.7023, "step": 22210 }, { "epoch": 0.6807343386048792, "grad_norm": 1.8128382533163276, "learning_rate": 2.4431951238507285e-06, "loss": 0.6203, "step": 22211 }, { "epoch": 0.6807649871276205, "grad_norm": 1.99812256441558, "learning_rate": 2.442768617763361e-06, "loss": 0.5075, "step": 22212 }, { "epoch": 0.6807956356503616, "grad_norm": 1.844600551018593, "learning_rate": 2.4423421368730477e-06, "loss": 0.6227, "step": 22213 }, { "epoch": 0.6808262841731029, "grad_norm": 0.7897634462775404, "learning_rate": 2.441915681183994e-06, "loss": 0.4189, "step": 22214 }, { "epoch": 0.680856932695844, "grad_norm": 1.771017399367015, "learning_rate": 2.441489250700398e-06, "loss": 0.5153, "step": 22215 }, { "epoch": 0.6808875812185853, "grad_norm": 0.7492555038192007, "learning_rate": 2.4410628454264625e-06, "loss": 0.3984, "step": 22216 }, { "epoch": 0.6809182297413264, "grad_norm": 1.9418524259895613, "learning_rate": 2.4406364653663917e-06, "loss": 0.6545, "step": 22217 }, { "epoch": 0.6809488782640677, "grad_norm": 0.7981082887418481, "learning_rate": 2.4402101105243824e-06, "loss": 0.4406, "step": 22218 }, { "epoch": 0.6809795267868088, "grad_norm": 1.8217417649970782, "learning_rate": 2.4397837809046405e-06, "loss": 0.6029, "step": 22219 }, { "epoch": 0.6810101753095501, "grad_norm": 1.6062527762473762, "learning_rate": 2.4393574765113616e-06, "loss": 0.5339, "step": 22220 }, { "epoch": 0.6810408238322913, "grad_norm": 2.0538918722021893, "learning_rate": 2.438931197348749e-06, "loss": 0.624, "step": 22221 }, { "epoch": 0.6810714723550325, "grad_norm": 0.7941970888992014, "learning_rate": 2.438504943421004e-06, "loss": 0.4095, "step": 22222 }, { "epoch": 0.6811021208777737, "grad_norm": 1.7219589255839378, "learning_rate": 2.4380787147323236e-06, "loss": 0.5963, "step": 22223 }, { "epoch": 0.6811327694005149, "grad_norm": 0.7928901371943714, "learning_rate": 2.437652511286909e-06, "loss": 0.4114, "step": 22224 }, { "epoch": 0.6811634179232561, "grad_norm": 1.9320117576876665, "learning_rate": 2.4372263330889616e-06, "loss": 0.7193, "step": 22225 }, { "epoch": 0.6811940664459973, "grad_norm": 1.7655380087186971, "learning_rate": 2.436800180142677e-06, "loss": 0.5802, "step": 22226 }, { "epoch": 0.6812247149687385, "grad_norm": 1.9372267179740679, "learning_rate": 2.4363740524522567e-06, "loss": 0.6096, "step": 22227 }, { "epoch": 0.6812553634914797, "grad_norm": 1.6594207010686344, "learning_rate": 2.4359479500218995e-06, "loss": 0.5729, "step": 22228 }, { "epoch": 0.6812860120142209, "grad_norm": 1.8227799056880223, "learning_rate": 2.4355218728558022e-06, "loss": 0.5489, "step": 22229 }, { "epoch": 0.6813166605369622, "grad_norm": 1.812710335630218, "learning_rate": 2.435095820958166e-06, "loss": 0.5649, "step": 22230 }, { "epoch": 0.6813473090597033, "grad_norm": 1.6352853880570686, "learning_rate": 2.4346697943331826e-06, "loss": 0.5808, "step": 22231 }, { "epoch": 0.6813779575824446, "grad_norm": 1.7402821105081752, "learning_rate": 2.434243792985058e-06, "loss": 0.6709, "step": 22232 }, { "epoch": 0.6814086061051857, "grad_norm": 1.7239010494643723, "learning_rate": 2.433817816917986e-06, "loss": 0.6207, "step": 22233 }, { "epoch": 0.681439254627927, "grad_norm": 0.8160889919563656, "learning_rate": 2.4333918661361616e-06, "loss": 0.4172, "step": 22234 }, { "epoch": 0.6814699031506681, "grad_norm": 0.7866031256817667, "learning_rate": 2.432965940643784e-06, "loss": 0.3952, "step": 22235 }, { "epoch": 0.6815005516734094, "grad_norm": 1.8063846625234488, "learning_rate": 2.432540040445052e-06, "loss": 0.6178, "step": 22236 }, { "epoch": 0.6815312001961505, "grad_norm": 1.835095630615003, "learning_rate": 2.4321141655441573e-06, "loss": 0.5641, "step": 22237 }, { "epoch": 0.6815618487188917, "grad_norm": 1.5702001879565612, "learning_rate": 2.4316883159452985e-06, "loss": 0.5037, "step": 22238 }, { "epoch": 0.681592497241633, "grad_norm": 2.00234479749754, "learning_rate": 2.4312624916526744e-06, "loss": 0.6448, "step": 22239 }, { "epoch": 0.6816231457643741, "grad_norm": 1.7969986596863012, "learning_rate": 2.4308366926704763e-06, "loss": 0.5536, "step": 22240 }, { "epoch": 0.6816537942871154, "grad_norm": 1.7085951992369948, "learning_rate": 2.4304109190029036e-06, "loss": 0.5857, "step": 22241 }, { "epoch": 0.6816844428098565, "grad_norm": 1.670086737786867, "learning_rate": 2.4299851706541473e-06, "loss": 0.6525, "step": 22242 }, { "epoch": 0.6817150913325978, "grad_norm": 0.7894654861289839, "learning_rate": 2.4295594476284044e-06, "loss": 0.4089, "step": 22243 }, { "epoch": 0.6817457398553389, "grad_norm": 0.8538657006831514, "learning_rate": 2.429133749929873e-06, "loss": 0.4145, "step": 22244 }, { "epoch": 0.6817763883780802, "grad_norm": 1.8143979409286075, "learning_rate": 2.4287080775627413e-06, "loss": 0.544, "step": 22245 }, { "epoch": 0.6818070369008213, "grad_norm": 1.6148656193888766, "learning_rate": 2.4282824305312075e-06, "loss": 0.5494, "step": 22246 }, { "epoch": 0.6818376854235626, "grad_norm": 1.8759464961244685, "learning_rate": 2.4278568088394674e-06, "loss": 0.5683, "step": 22247 }, { "epoch": 0.6818683339463038, "grad_norm": 2.08920755973338, "learning_rate": 2.4274312124917094e-06, "loss": 0.5781, "step": 22248 }, { "epoch": 0.681898982469045, "grad_norm": 1.740416682365453, "learning_rate": 2.427005641492132e-06, "loss": 0.5516, "step": 22249 }, { "epoch": 0.6819296309917862, "grad_norm": 1.9573003446009967, "learning_rate": 2.4265800958449227e-06, "loss": 0.5769, "step": 22250 }, { "epoch": 0.6819602795145274, "grad_norm": 2.1292911765830618, "learning_rate": 2.426154575554282e-06, "loss": 0.5921, "step": 22251 }, { "epoch": 0.6819909280372686, "grad_norm": 1.7427049435073831, "learning_rate": 2.4257290806243983e-06, "loss": 0.5538, "step": 22252 }, { "epoch": 0.6820215765600098, "grad_norm": 1.6239078806641416, "learning_rate": 2.4253036110594634e-06, "loss": 0.4877, "step": 22253 }, { "epoch": 0.682052225082751, "grad_norm": 1.8485831709297138, "learning_rate": 2.4248781668636704e-06, "loss": 0.6511, "step": 22254 }, { "epoch": 0.6820828736054922, "grad_norm": 1.8250894401735789, "learning_rate": 2.424452748041214e-06, "loss": 0.5939, "step": 22255 }, { "epoch": 0.6821135221282334, "grad_norm": 1.7092632932335532, "learning_rate": 2.424027354596281e-06, "loss": 0.756, "step": 22256 }, { "epoch": 0.6821441706509747, "grad_norm": 0.7807651935704306, "learning_rate": 2.4236019865330664e-06, "loss": 0.4075, "step": 22257 }, { "epoch": 0.6821748191737158, "grad_norm": 1.872412313551224, "learning_rate": 2.4231766438557604e-06, "loss": 0.5371, "step": 22258 }, { "epoch": 0.6822054676964571, "grad_norm": 1.620149861148656, "learning_rate": 2.4227513265685558e-06, "loss": 0.506, "step": 22259 }, { "epoch": 0.6822361162191982, "grad_norm": 1.82639062986331, "learning_rate": 2.4223260346756416e-06, "loss": 0.6671, "step": 22260 }, { "epoch": 0.6822667647419395, "grad_norm": 1.7956408390668304, "learning_rate": 2.421900768181205e-06, "loss": 0.5934, "step": 22261 }, { "epoch": 0.6822974132646806, "grad_norm": 1.6631240003709593, "learning_rate": 2.421475527089444e-06, "loss": 0.5538, "step": 22262 }, { "epoch": 0.6823280617874219, "grad_norm": 1.6648961586979663, "learning_rate": 2.421050311404544e-06, "loss": 0.6072, "step": 22263 }, { "epoch": 0.682358710310163, "grad_norm": 1.7343461245227494, "learning_rate": 2.4206251211306935e-06, "loss": 0.593, "step": 22264 }, { "epoch": 0.6823893588329043, "grad_norm": 0.8253904709752574, "learning_rate": 2.4201999562720835e-06, "loss": 0.4307, "step": 22265 }, { "epoch": 0.6824200073556455, "grad_norm": 1.5758305889722146, "learning_rate": 2.419774816832905e-06, "loss": 0.5019, "step": 22266 }, { "epoch": 0.6824506558783867, "grad_norm": 1.8432791793923686, "learning_rate": 2.4193497028173435e-06, "loss": 0.6453, "step": 22267 }, { "epoch": 0.6824813044011279, "grad_norm": 1.7911598149937076, "learning_rate": 2.4189246142295904e-06, "loss": 0.5541, "step": 22268 }, { "epoch": 0.682511952923869, "grad_norm": 1.6477006882084353, "learning_rate": 2.418499551073833e-06, "loss": 0.6113, "step": 22269 }, { "epoch": 0.6825426014466103, "grad_norm": 1.680157107062543, "learning_rate": 2.4180745133542617e-06, "loss": 0.5501, "step": 22270 }, { "epoch": 0.6825732499693514, "grad_norm": 1.8355406994929595, "learning_rate": 2.4176495010750626e-06, "loss": 0.6198, "step": 22271 }, { "epoch": 0.6826038984920927, "grad_norm": 3.231379786524138, "learning_rate": 2.4172245142404207e-06, "loss": 0.5283, "step": 22272 }, { "epoch": 0.6826345470148338, "grad_norm": 1.664699709322351, "learning_rate": 2.4167995528545296e-06, "loss": 0.5577, "step": 22273 }, { "epoch": 0.6826651955375751, "grad_norm": 1.760246530601096, "learning_rate": 2.416374616921574e-06, "loss": 0.5816, "step": 22274 }, { "epoch": 0.6826958440603162, "grad_norm": 1.8094408684618897, "learning_rate": 2.415949706445738e-06, "loss": 0.6038, "step": 22275 }, { "epoch": 0.6827264925830575, "grad_norm": 2.0335548421167613, "learning_rate": 2.415524821431211e-06, "loss": 0.5978, "step": 22276 }, { "epoch": 0.6827571411057987, "grad_norm": 1.9697060201132164, "learning_rate": 2.415099961882179e-06, "loss": 0.5678, "step": 22277 }, { "epoch": 0.6827877896285399, "grad_norm": 1.7553524839060062, "learning_rate": 2.4146751278028306e-06, "loss": 0.571, "step": 22278 }, { "epoch": 0.6828184381512811, "grad_norm": 1.7548102298664046, "learning_rate": 2.4142503191973475e-06, "loss": 0.4923, "step": 22279 }, { "epoch": 0.6828490866740223, "grad_norm": 1.9597265270741464, "learning_rate": 2.4138255360699183e-06, "loss": 0.5729, "step": 22280 }, { "epoch": 0.6828797351967635, "grad_norm": 1.5578953463435201, "learning_rate": 2.4134007784247287e-06, "loss": 0.5547, "step": 22281 }, { "epoch": 0.6829103837195047, "grad_norm": 1.9079732307645603, "learning_rate": 2.4129760462659634e-06, "loss": 0.6653, "step": 22282 }, { "epoch": 0.6829410322422459, "grad_norm": 1.6988644535471387, "learning_rate": 2.4125513395978034e-06, "loss": 0.609, "step": 22283 }, { "epoch": 0.6829716807649872, "grad_norm": 1.847402008572745, "learning_rate": 2.4121266584244407e-06, "loss": 0.5745, "step": 22284 }, { "epoch": 0.6830023292877283, "grad_norm": 2.171856052079096, "learning_rate": 2.411702002750056e-06, "loss": 0.6097, "step": 22285 }, { "epoch": 0.6830329778104696, "grad_norm": 0.8457899208061878, "learning_rate": 2.4112773725788324e-06, "loss": 0.4064, "step": 22286 }, { "epoch": 0.6830636263332107, "grad_norm": 1.7775220615029685, "learning_rate": 2.4108527679149548e-06, "loss": 0.5981, "step": 22287 }, { "epoch": 0.683094274855952, "grad_norm": 0.8240279509742976, "learning_rate": 2.4104281887626075e-06, "loss": 0.4237, "step": 22288 }, { "epoch": 0.6831249233786931, "grad_norm": 2.2080907072916487, "learning_rate": 2.4100036351259754e-06, "loss": 0.7361, "step": 22289 }, { "epoch": 0.6831555719014344, "grad_norm": 2.013668914186918, "learning_rate": 2.4095791070092385e-06, "loss": 0.5794, "step": 22290 }, { "epoch": 0.6831862204241755, "grad_norm": 0.8190901673119516, "learning_rate": 2.4091546044165816e-06, "loss": 0.421, "step": 22291 }, { "epoch": 0.6832168689469168, "grad_norm": 1.8633861768510511, "learning_rate": 2.4087301273521883e-06, "loss": 0.6616, "step": 22292 }, { "epoch": 0.683247517469658, "grad_norm": 1.7959739682240812, "learning_rate": 2.408305675820241e-06, "loss": 0.5957, "step": 22293 }, { "epoch": 0.6832781659923992, "grad_norm": 1.9509577237175915, "learning_rate": 2.407881249824919e-06, "loss": 0.6521, "step": 22294 }, { "epoch": 0.6833088145151404, "grad_norm": 1.7575808568006295, "learning_rate": 2.407456849370406e-06, "loss": 0.647, "step": 22295 }, { "epoch": 0.6833394630378816, "grad_norm": 0.8068847751724794, "learning_rate": 2.407032474460884e-06, "loss": 0.4098, "step": 22296 }, { "epoch": 0.6833701115606228, "grad_norm": 1.9522072282157636, "learning_rate": 2.406608125100536e-06, "loss": 0.6454, "step": 22297 }, { "epoch": 0.683400760083364, "grad_norm": 1.8871347536179863, "learning_rate": 2.4061838012935405e-06, "loss": 0.6322, "step": 22298 }, { "epoch": 0.6834314086061052, "grad_norm": 2.04932085795346, "learning_rate": 2.40575950304408e-06, "loss": 0.7057, "step": 22299 }, { "epoch": 0.6834620571288463, "grad_norm": 2.011622822042383, "learning_rate": 2.405335230356336e-06, "loss": 0.5762, "step": 22300 }, { "epoch": 0.6834927056515876, "grad_norm": 1.8664330855839608, "learning_rate": 2.404910983234488e-06, "loss": 0.6121, "step": 22301 }, { "epoch": 0.6835233541743287, "grad_norm": 1.921329649782291, "learning_rate": 2.404486761682712e-06, "loss": 0.5825, "step": 22302 }, { "epoch": 0.68355400269707, "grad_norm": 1.7916304569403554, "learning_rate": 2.4040625657051965e-06, "loss": 0.5501, "step": 22303 }, { "epoch": 0.6835846512198112, "grad_norm": 1.9717251965397486, "learning_rate": 2.403638395306114e-06, "loss": 0.5527, "step": 22304 }, { "epoch": 0.6836152997425524, "grad_norm": 1.7142858448901184, "learning_rate": 2.4032142504896494e-06, "loss": 0.6304, "step": 22305 }, { "epoch": 0.6836459482652936, "grad_norm": 1.6777824877763337, "learning_rate": 2.4027901312599773e-06, "loss": 0.5436, "step": 22306 }, { "epoch": 0.6836765967880348, "grad_norm": 1.7477590811972374, "learning_rate": 2.4023660376212783e-06, "loss": 0.5608, "step": 22307 }, { "epoch": 0.683707245310776, "grad_norm": 1.7178681614839721, "learning_rate": 2.401941969577733e-06, "loss": 0.5517, "step": 22308 }, { "epoch": 0.6837378938335172, "grad_norm": 1.5584984496207375, "learning_rate": 2.4015179271335167e-06, "loss": 0.5157, "step": 22309 }, { "epoch": 0.6837685423562584, "grad_norm": 0.8096446901814351, "learning_rate": 2.4010939102928086e-06, "loss": 0.4073, "step": 22310 }, { "epoch": 0.6837991908789997, "grad_norm": 2.2077505418897982, "learning_rate": 2.4006699190597895e-06, "loss": 0.6668, "step": 22311 }, { "epoch": 0.6838298394017408, "grad_norm": 1.6429154301797786, "learning_rate": 2.400245953438635e-06, "loss": 0.6816, "step": 22312 }, { "epoch": 0.6838604879244821, "grad_norm": 0.8204543265073959, "learning_rate": 2.399822013433518e-06, "loss": 0.4087, "step": 22313 }, { "epoch": 0.6838911364472232, "grad_norm": 1.5623885950547876, "learning_rate": 2.3993980990486238e-06, "loss": 0.5447, "step": 22314 }, { "epoch": 0.6839217849699645, "grad_norm": 1.8765534408005808, "learning_rate": 2.3989742102881234e-06, "loss": 0.6768, "step": 22315 }, { "epoch": 0.6839524334927056, "grad_norm": 1.959605939595533, "learning_rate": 2.398550347156198e-06, "loss": 0.4711, "step": 22316 }, { "epoch": 0.6839830820154469, "grad_norm": 1.8073413812207373, "learning_rate": 2.39812650965702e-06, "loss": 0.5391, "step": 22317 }, { "epoch": 0.684013730538188, "grad_norm": 1.8444616650546446, "learning_rate": 2.3977026977947666e-06, "loss": 0.5964, "step": 22318 }, { "epoch": 0.6840443790609293, "grad_norm": 0.7871803155026944, "learning_rate": 2.397278911573617e-06, "loss": 0.4304, "step": 22319 }, { "epoch": 0.6840750275836704, "grad_norm": 1.5467307070249445, "learning_rate": 2.3968551509977413e-06, "loss": 0.4944, "step": 22320 }, { "epoch": 0.6841056761064117, "grad_norm": 1.827863717100983, "learning_rate": 2.396431416071318e-06, "loss": 0.6288, "step": 22321 }, { "epoch": 0.6841363246291529, "grad_norm": 2.0226536096582466, "learning_rate": 2.396007706798525e-06, "loss": 0.5542, "step": 22322 }, { "epoch": 0.6841669731518941, "grad_norm": 2.0733353257254783, "learning_rate": 2.3955840231835314e-06, "loss": 0.6582, "step": 22323 }, { "epoch": 0.6841976216746353, "grad_norm": 1.627903007154448, "learning_rate": 2.395160365230515e-06, "loss": 0.4836, "step": 22324 }, { "epoch": 0.6842282701973765, "grad_norm": 1.8912043627270017, "learning_rate": 2.3947367329436523e-06, "loss": 0.5373, "step": 22325 }, { "epoch": 0.6842589187201177, "grad_norm": 0.8185199553590968, "learning_rate": 2.394313126327113e-06, "loss": 0.3966, "step": 22326 }, { "epoch": 0.6842895672428589, "grad_norm": 1.5190609825915984, "learning_rate": 2.3938895453850753e-06, "loss": 0.5288, "step": 22327 }, { "epoch": 0.6843202157656001, "grad_norm": 0.7770081951277756, "learning_rate": 2.393465990121708e-06, "loss": 0.395, "step": 22328 }, { "epoch": 0.6843508642883414, "grad_norm": 0.8351762381097857, "learning_rate": 2.3930424605411885e-06, "loss": 0.3987, "step": 22329 }, { "epoch": 0.6843815128110825, "grad_norm": 1.6722980728531636, "learning_rate": 2.392618956647689e-06, "loss": 0.574, "step": 22330 }, { "epoch": 0.6844121613338237, "grad_norm": 1.6315401548888309, "learning_rate": 2.3921954784453814e-06, "loss": 0.5275, "step": 22331 }, { "epoch": 0.6844428098565649, "grad_norm": 1.8028967325507421, "learning_rate": 2.3917720259384386e-06, "loss": 0.5887, "step": 22332 }, { "epoch": 0.6844734583793061, "grad_norm": 1.8405465081022363, "learning_rate": 2.3913485991310352e-06, "loss": 0.6591, "step": 22333 }, { "epoch": 0.6845041069020473, "grad_norm": 1.7991910972522738, "learning_rate": 2.3909251980273397e-06, "loss": 0.5747, "step": 22334 }, { "epoch": 0.6845347554247885, "grad_norm": 1.6838994946786106, "learning_rate": 2.3905018226315256e-06, "loss": 0.5195, "step": 22335 }, { "epoch": 0.6845654039475297, "grad_norm": 1.7724477868339057, "learning_rate": 2.3900784729477672e-06, "loss": 0.5355, "step": 22336 }, { "epoch": 0.6845960524702709, "grad_norm": 1.9335789312593858, "learning_rate": 2.3896551489802307e-06, "loss": 0.6376, "step": 22337 }, { "epoch": 0.6846267009930121, "grad_norm": 1.7966128891863211, "learning_rate": 2.389231850733092e-06, "loss": 0.6615, "step": 22338 }, { "epoch": 0.6846573495157533, "grad_norm": 2.0402309326212187, "learning_rate": 2.388808578210518e-06, "loss": 0.6216, "step": 22339 }, { "epoch": 0.6846879980384946, "grad_norm": 0.8092609999597371, "learning_rate": 2.3883853314166815e-06, "loss": 0.422, "step": 22340 }, { "epoch": 0.6847186465612357, "grad_norm": 1.759853295726998, "learning_rate": 2.3879621103557545e-06, "loss": 0.5798, "step": 22341 }, { "epoch": 0.684749295083977, "grad_norm": 0.7934517008062152, "learning_rate": 2.387538915031903e-06, "loss": 0.4172, "step": 22342 }, { "epoch": 0.6847799436067181, "grad_norm": 2.013485097012287, "learning_rate": 2.3871157454492987e-06, "loss": 0.6065, "step": 22343 }, { "epoch": 0.6848105921294594, "grad_norm": 0.8279849538835156, "learning_rate": 2.386692601612114e-06, "loss": 0.4029, "step": 22344 }, { "epoch": 0.6848412406522005, "grad_norm": 1.5022711658478052, "learning_rate": 2.386269483524513e-06, "loss": 0.5106, "step": 22345 }, { "epoch": 0.6848718891749418, "grad_norm": 0.7714693215930606, "learning_rate": 2.3858463911906704e-06, "loss": 0.4291, "step": 22346 }, { "epoch": 0.684902537697683, "grad_norm": 1.5825099185603957, "learning_rate": 2.3854233246147494e-06, "loss": 0.5416, "step": 22347 }, { "epoch": 0.6849331862204242, "grad_norm": 1.7057848162713212, "learning_rate": 2.3850002838009216e-06, "loss": 0.6212, "step": 22348 }, { "epoch": 0.6849638347431654, "grad_norm": 1.5769677770304684, "learning_rate": 2.3845772687533576e-06, "loss": 0.4725, "step": 22349 }, { "epoch": 0.6849944832659066, "grad_norm": 1.657184699416461, "learning_rate": 2.384154279476221e-06, "loss": 0.5547, "step": 22350 }, { "epoch": 0.6850251317886478, "grad_norm": 1.8640011794604823, "learning_rate": 2.383731315973681e-06, "loss": 0.5701, "step": 22351 }, { "epoch": 0.685055780311389, "grad_norm": 1.6647859603653576, "learning_rate": 2.383308378249907e-06, "loss": 0.5822, "step": 22352 }, { "epoch": 0.6850864288341302, "grad_norm": 0.7806836680637119, "learning_rate": 2.3828854663090646e-06, "loss": 0.41, "step": 22353 }, { "epoch": 0.6851170773568714, "grad_norm": 3.4838867053064733, "learning_rate": 2.3824625801553203e-06, "loss": 0.5286, "step": 22354 }, { "epoch": 0.6851477258796126, "grad_norm": 1.8183788978581765, "learning_rate": 2.382039719792844e-06, "loss": 0.6056, "step": 22355 }, { "epoch": 0.6851783744023539, "grad_norm": 1.5806201271231428, "learning_rate": 2.3816168852257986e-06, "loss": 0.6393, "step": 22356 }, { "epoch": 0.685209022925095, "grad_norm": 2.1979025952612834, "learning_rate": 2.381194076458354e-06, "loss": 0.6812, "step": 22357 }, { "epoch": 0.6852396714478363, "grad_norm": 1.586139695047709, "learning_rate": 2.3807712934946703e-06, "loss": 0.5543, "step": 22358 }, { "epoch": 0.6852703199705774, "grad_norm": 1.831877145973323, "learning_rate": 2.3803485363389205e-06, "loss": 0.6391, "step": 22359 }, { "epoch": 0.6853009684933187, "grad_norm": 1.8777156297821678, "learning_rate": 2.3799258049952674e-06, "loss": 0.5242, "step": 22360 }, { "epoch": 0.6853316170160598, "grad_norm": 1.7389181815547101, "learning_rate": 2.3795030994678736e-06, "loss": 0.6451, "step": 22361 }, { "epoch": 0.685362265538801, "grad_norm": 1.906009917289436, "learning_rate": 2.3790804197609062e-06, "loss": 0.599, "step": 22362 }, { "epoch": 0.6853929140615422, "grad_norm": 1.8601540865417627, "learning_rate": 2.378657765878532e-06, "loss": 0.4652, "step": 22363 }, { "epoch": 0.6854235625842834, "grad_norm": 0.7769799627448892, "learning_rate": 2.378235137824912e-06, "loss": 0.4201, "step": 22364 }, { "epoch": 0.6854542111070246, "grad_norm": 1.8764915632754926, "learning_rate": 2.3778125356042112e-06, "loss": 0.5877, "step": 22365 }, { "epoch": 0.6854848596297658, "grad_norm": 1.7429084191561472, "learning_rate": 2.3773899592205966e-06, "loss": 0.5945, "step": 22366 }, { "epoch": 0.6855155081525071, "grad_norm": 1.9718923149256424, "learning_rate": 2.3769674086782284e-06, "loss": 0.6565, "step": 22367 }, { "epoch": 0.6855461566752482, "grad_norm": 1.7730518131737805, "learning_rate": 2.3765448839812727e-06, "loss": 0.6085, "step": 22368 }, { "epoch": 0.6855768051979895, "grad_norm": 1.7873185926620843, "learning_rate": 2.376122385133888e-06, "loss": 0.5805, "step": 22369 }, { "epoch": 0.6856074537207306, "grad_norm": 0.7701636489860314, "learning_rate": 2.3756999121402446e-06, "loss": 0.3906, "step": 22370 }, { "epoch": 0.6856381022434719, "grad_norm": 0.775479906829987, "learning_rate": 2.3752774650045014e-06, "loss": 0.4121, "step": 22371 }, { "epoch": 0.685668750766213, "grad_norm": 1.8425390941669084, "learning_rate": 2.3748550437308187e-06, "loss": 0.5428, "step": 22372 }, { "epoch": 0.6856993992889543, "grad_norm": 1.5840656791641639, "learning_rate": 2.3744326483233615e-06, "loss": 0.5468, "step": 22373 }, { "epoch": 0.6857300478116954, "grad_norm": 0.8041577763839958, "learning_rate": 2.3740102787862925e-06, "loss": 0.426, "step": 22374 }, { "epoch": 0.6857606963344367, "grad_norm": 1.7620813303617762, "learning_rate": 2.3735879351237706e-06, "loss": 0.6238, "step": 22375 }, { "epoch": 0.6857913448571779, "grad_norm": 0.8037887000604473, "learning_rate": 2.3731656173399585e-06, "loss": 0.3987, "step": 22376 }, { "epoch": 0.6858219933799191, "grad_norm": 1.7991406440334399, "learning_rate": 2.372743325439018e-06, "loss": 0.5078, "step": 22377 }, { "epoch": 0.6858526419026603, "grad_norm": 1.8042395367305044, "learning_rate": 2.372321059425111e-06, "loss": 0.6502, "step": 22378 }, { "epoch": 0.6858832904254015, "grad_norm": 0.808089270085057, "learning_rate": 2.3718988193023977e-06, "loss": 0.4202, "step": 22379 }, { "epoch": 0.6859139389481427, "grad_norm": 1.627298049458244, "learning_rate": 2.3714766050750355e-06, "loss": 0.6328, "step": 22380 }, { "epoch": 0.6859445874708839, "grad_norm": 1.9237504169518693, "learning_rate": 2.3710544167471867e-06, "loss": 0.6056, "step": 22381 }, { "epoch": 0.6859752359936251, "grad_norm": 1.8361849141954292, "learning_rate": 2.3706322543230136e-06, "loss": 0.533, "step": 22382 }, { "epoch": 0.6860058845163663, "grad_norm": 1.7270607030563327, "learning_rate": 2.3702101178066718e-06, "loss": 0.5443, "step": 22383 }, { "epoch": 0.6860365330391075, "grad_norm": 1.6408255529055178, "learning_rate": 2.3697880072023223e-06, "loss": 0.544, "step": 22384 }, { "epoch": 0.6860671815618488, "grad_norm": 1.7321293934829274, "learning_rate": 2.369365922514125e-06, "loss": 0.5875, "step": 22385 }, { "epoch": 0.6860978300845899, "grad_norm": 1.7429254570295059, "learning_rate": 2.3689438637462393e-06, "loss": 0.6495, "step": 22386 }, { "epoch": 0.6861284786073312, "grad_norm": 2.334482067066235, "learning_rate": 2.368521830902822e-06, "loss": 0.6696, "step": 22387 }, { "epoch": 0.6861591271300723, "grad_norm": 1.9906116497754738, "learning_rate": 2.3680998239880315e-06, "loss": 0.5439, "step": 22388 }, { "epoch": 0.6861897756528136, "grad_norm": 0.8092853058935727, "learning_rate": 2.367677843006029e-06, "loss": 0.4021, "step": 22389 }, { "epoch": 0.6862204241755547, "grad_norm": 1.7861538563022767, "learning_rate": 2.3672558879609707e-06, "loss": 0.6016, "step": 22390 }, { "epoch": 0.686251072698296, "grad_norm": 1.9343429630359705, "learning_rate": 2.3668339588570115e-06, "loss": 0.6298, "step": 22391 }, { "epoch": 0.6862817212210371, "grad_norm": 1.8662078861535334, "learning_rate": 2.366412055698311e-06, "loss": 0.6323, "step": 22392 }, { "epoch": 0.6863123697437783, "grad_norm": 0.7637519861638581, "learning_rate": 2.365990178489028e-06, "loss": 0.4056, "step": 22393 }, { "epoch": 0.6863430182665196, "grad_norm": 1.9705062714241839, "learning_rate": 2.3655683272333163e-06, "loss": 0.6624, "step": 22394 }, { "epoch": 0.6863736667892607, "grad_norm": 2.096557520692855, "learning_rate": 2.365146501935334e-06, "loss": 0.5811, "step": 22395 }, { "epoch": 0.686404315312002, "grad_norm": 1.7225215514279402, "learning_rate": 2.364724702599237e-06, "loss": 0.5467, "step": 22396 }, { "epoch": 0.6864349638347431, "grad_norm": 2.0225214172614123, "learning_rate": 2.364302929229184e-06, "loss": 0.6509, "step": 22397 }, { "epoch": 0.6864656123574844, "grad_norm": 1.8872231502639811, "learning_rate": 2.3638811818293284e-06, "loss": 0.5792, "step": 22398 }, { "epoch": 0.6864962608802255, "grad_norm": 0.7565121453633376, "learning_rate": 2.363459460403822e-06, "loss": 0.3957, "step": 22399 }, { "epoch": 0.6865269094029668, "grad_norm": 1.7540268304651048, "learning_rate": 2.363037764956828e-06, "loss": 0.5962, "step": 22400 }, { "epoch": 0.6865575579257079, "grad_norm": 1.7308131297356557, "learning_rate": 2.362616095492498e-06, "loss": 0.5881, "step": 22401 }, { "epoch": 0.6865882064484492, "grad_norm": 1.7730220915460913, "learning_rate": 2.3621944520149842e-06, "loss": 0.5077, "step": 22402 }, { "epoch": 0.6866188549711904, "grad_norm": 1.8077390035345442, "learning_rate": 2.3617728345284434e-06, "loss": 0.611, "step": 22403 }, { "epoch": 0.6866495034939316, "grad_norm": 1.9252105761845941, "learning_rate": 2.36135124303703e-06, "loss": 0.6702, "step": 22404 }, { "epoch": 0.6866801520166728, "grad_norm": 1.8152504372683478, "learning_rate": 2.3609296775448998e-06, "loss": 0.5784, "step": 22405 }, { "epoch": 0.686710800539414, "grad_norm": 0.7875107133459092, "learning_rate": 2.360508138056203e-06, "loss": 0.415, "step": 22406 }, { "epoch": 0.6867414490621552, "grad_norm": 1.9695100239231789, "learning_rate": 2.360086624575094e-06, "loss": 0.677, "step": 22407 }, { "epoch": 0.6867720975848964, "grad_norm": 1.7023042732721145, "learning_rate": 2.3596651371057293e-06, "loss": 0.619, "step": 22408 }, { "epoch": 0.6868027461076376, "grad_norm": 1.9099007498177307, "learning_rate": 2.35924367565226e-06, "loss": 0.6311, "step": 22409 }, { "epoch": 0.6868333946303788, "grad_norm": 1.759253457607473, "learning_rate": 2.3588222402188343e-06, "loss": 0.5953, "step": 22410 }, { "epoch": 0.68686404315312, "grad_norm": 2.0708033116537417, "learning_rate": 2.3584008308096127e-06, "loss": 0.5623, "step": 22411 }, { "epoch": 0.6868946916758613, "grad_norm": 1.803633566949237, "learning_rate": 2.3579794474287416e-06, "loss": 0.6378, "step": 22412 }, { "epoch": 0.6869253401986024, "grad_norm": 1.7825112753163745, "learning_rate": 2.357558090080377e-06, "loss": 0.5986, "step": 22413 }, { "epoch": 0.6869559887213437, "grad_norm": 1.7037992395545514, "learning_rate": 2.3571367587686667e-06, "loss": 0.6189, "step": 22414 }, { "epoch": 0.6869866372440848, "grad_norm": 1.6982726183706771, "learning_rate": 2.3567154534977643e-06, "loss": 0.4602, "step": 22415 }, { "epoch": 0.6870172857668261, "grad_norm": 2.1321794981116455, "learning_rate": 2.3562941742718227e-06, "loss": 0.59, "step": 22416 }, { "epoch": 0.6870479342895672, "grad_norm": 1.8592989457508384, "learning_rate": 2.355872921094989e-06, "loss": 0.5591, "step": 22417 }, { "epoch": 0.6870785828123085, "grad_norm": 1.8967240295678458, "learning_rate": 2.3554516939714156e-06, "loss": 0.6404, "step": 22418 }, { "epoch": 0.6871092313350496, "grad_norm": 2.111119678280495, "learning_rate": 2.355030492905256e-06, "loss": 0.6247, "step": 22419 }, { "epoch": 0.6871398798577909, "grad_norm": 1.857120546582853, "learning_rate": 2.354609317900657e-06, "loss": 0.5576, "step": 22420 }, { "epoch": 0.687170528380532, "grad_norm": 1.8938049007788422, "learning_rate": 2.354188168961766e-06, "loss": 0.5967, "step": 22421 }, { "epoch": 0.6872011769032733, "grad_norm": 1.8139650468496622, "learning_rate": 2.353767046092739e-06, "loss": 0.6057, "step": 22422 }, { "epoch": 0.6872318254260145, "grad_norm": 1.7209369667373127, "learning_rate": 2.3533459492977208e-06, "loss": 0.6037, "step": 22423 }, { "epoch": 0.6872624739487556, "grad_norm": 1.6729079330848433, "learning_rate": 2.352924878580864e-06, "loss": 0.5916, "step": 22424 }, { "epoch": 0.6872931224714969, "grad_norm": 1.759001620283047, "learning_rate": 2.3525038339463143e-06, "loss": 0.606, "step": 22425 }, { "epoch": 0.687323770994238, "grad_norm": 0.7814331069906137, "learning_rate": 2.352082815398221e-06, "loss": 0.4233, "step": 22426 }, { "epoch": 0.6873544195169793, "grad_norm": 1.7841353878334094, "learning_rate": 2.3516618229407356e-06, "loss": 0.5944, "step": 22427 }, { "epoch": 0.6873850680397204, "grad_norm": 1.8154298629650765, "learning_rate": 2.3512408565780013e-06, "loss": 0.6394, "step": 22428 }, { "epoch": 0.6874157165624617, "grad_norm": 1.9147283494053524, "learning_rate": 2.3508199163141694e-06, "loss": 0.6092, "step": 22429 }, { "epoch": 0.6874463650852028, "grad_norm": 2.0327313006677077, "learning_rate": 2.350399002153388e-06, "loss": 0.6174, "step": 22430 }, { "epoch": 0.6874770136079441, "grad_norm": 0.8047836200839621, "learning_rate": 2.3499781140998016e-06, "loss": 0.4175, "step": 22431 }, { "epoch": 0.6875076621306853, "grad_norm": 1.849572584148264, "learning_rate": 2.3495572521575603e-06, "loss": 0.4879, "step": 22432 }, { "epoch": 0.6875383106534265, "grad_norm": 1.6870239811541294, "learning_rate": 2.3491364163308083e-06, "loss": 0.5699, "step": 22433 }, { "epoch": 0.6875689591761677, "grad_norm": 1.7708839135015435, "learning_rate": 2.3487156066236934e-06, "loss": 0.6292, "step": 22434 }, { "epoch": 0.6875996076989089, "grad_norm": 2.051204854538234, "learning_rate": 2.3482948230403637e-06, "loss": 0.6079, "step": 22435 }, { "epoch": 0.6876302562216501, "grad_norm": 2.1939917709860306, "learning_rate": 2.3478740655849612e-06, "loss": 0.7016, "step": 22436 }, { "epoch": 0.6876609047443913, "grad_norm": 1.736869946021907, "learning_rate": 2.3474533342616344e-06, "loss": 0.5352, "step": 22437 }, { "epoch": 0.6876915532671325, "grad_norm": 0.7806009840830443, "learning_rate": 2.3470326290745302e-06, "loss": 0.3938, "step": 22438 }, { "epoch": 0.6877222017898738, "grad_norm": 1.5776082136567424, "learning_rate": 2.346611950027791e-06, "loss": 0.678, "step": 22439 }, { "epoch": 0.6877528503126149, "grad_norm": 1.8755889056061403, "learning_rate": 2.3461912971255635e-06, "loss": 0.5864, "step": 22440 }, { "epoch": 0.6877834988353562, "grad_norm": 1.8213233285378483, "learning_rate": 2.345770670371993e-06, "loss": 0.6021, "step": 22441 }, { "epoch": 0.6878141473580973, "grad_norm": 1.9977931871851817, "learning_rate": 2.345350069771222e-06, "loss": 0.7208, "step": 22442 }, { "epoch": 0.6878447958808386, "grad_norm": 0.7982134106219315, "learning_rate": 2.344929495327398e-06, "loss": 0.388, "step": 22443 }, { "epoch": 0.6878754444035797, "grad_norm": 0.8922470092321991, "learning_rate": 2.3445089470446604e-06, "loss": 0.3907, "step": 22444 }, { "epoch": 0.687906092926321, "grad_norm": 2.0490193774453105, "learning_rate": 2.344088424927156e-06, "loss": 0.6164, "step": 22445 }, { "epoch": 0.6879367414490621, "grad_norm": 1.7466348462742003, "learning_rate": 2.3436679289790297e-06, "loss": 0.5297, "step": 22446 }, { "epoch": 0.6879673899718034, "grad_norm": 1.7248317886700573, "learning_rate": 2.3432474592044214e-06, "loss": 0.5623, "step": 22447 }, { "epoch": 0.6879980384945446, "grad_norm": 1.5950563320409903, "learning_rate": 2.342827015607475e-06, "loss": 0.5707, "step": 22448 }, { "epoch": 0.6880286870172858, "grad_norm": 2.1403586687858054, "learning_rate": 2.342406598192336e-06, "loss": 0.5824, "step": 22449 }, { "epoch": 0.688059335540027, "grad_norm": 1.8533169726919065, "learning_rate": 2.3419862069631433e-06, "loss": 0.5705, "step": 22450 }, { "epoch": 0.6880899840627682, "grad_norm": 0.7536098814122162, "learning_rate": 2.34156584192404e-06, "loss": 0.3966, "step": 22451 }, { "epoch": 0.6881206325855094, "grad_norm": 1.912909756592003, "learning_rate": 2.341145503079171e-06, "loss": 0.6032, "step": 22452 }, { "epoch": 0.6881512811082506, "grad_norm": 1.786453809587702, "learning_rate": 2.3407251904326733e-06, "loss": 0.5205, "step": 22453 }, { "epoch": 0.6881819296309918, "grad_norm": 1.80997719286639, "learning_rate": 2.3403049039886932e-06, "loss": 0.5992, "step": 22454 }, { "epoch": 0.688212578153733, "grad_norm": 1.817801833827858, "learning_rate": 2.339884643751367e-06, "loss": 0.4829, "step": 22455 }, { "epoch": 0.6882432266764742, "grad_norm": 1.631450054821589, "learning_rate": 2.339464409724838e-06, "loss": 0.5598, "step": 22456 }, { "epoch": 0.6882738751992153, "grad_norm": 5.310960165414161, "learning_rate": 2.339044201913249e-06, "loss": 0.6192, "step": 22457 }, { "epoch": 0.6883045237219566, "grad_norm": 1.6902626725541583, "learning_rate": 2.3386240203207365e-06, "loss": 0.5599, "step": 22458 }, { "epoch": 0.6883351722446978, "grad_norm": 1.8232602440572843, "learning_rate": 2.338203864951443e-06, "loss": 0.5807, "step": 22459 }, { "epoch": 0.688365820767439, "grad_norm": 1.8680348728095213, "learning_rate": 2.337783735809509e-06, "loss": 0.6024, "step": 22460 }, { "epoch": 0.6883964692901802, "grad_norm": 0.8165929038802157, "learning_rate": 2.3373636328990713e-06, "loss": 0.4089, "step": 22461 }, { "epoch": 0.6884271178129214, "grad_norm": 1.935523008944774, "learning_rate": 2.336943556224271e-06, "loss": 0.5363, "step": 22462 }, { "epoch": 0.6884577663356626, "grad_norm": 2.009717576912026, "learning_rate": 2.336523505789249e-06, "loss": 0.5163, "step": 22463 }, { "epoch": 0.6884884148584038, "grad_norm": 1.9726905322163881, "learning_rate": 2.3361034815981406e-06, "loss": 0.71, "step": 22464 }, { "epoch": 0.688519063381145, "grad_norm": 1.850519276971451, "learning_rate": 2.335683483655088e-06, "loss": 0.6395, "step": 22465 }, { "epoch": 0.6885497119038863, "grad_norm": 1.8059405130993276, "learning_rate": 2.3352635119642252e-06, "loss": 0.521, "step": 22466 }, { "epoch": 0.6885803604266274, "grad_norm": 1.855986710634367, "learning_rate": 2.3348435665296937e-06, "loss": 0.7158, "step": 22467 }, { "epoch": 0.6886110089493687, "grad_norm": 1.6966962665066758, "learning_rate": 2.334423647355632e-06, "loss": 0.6351, "step": 22468 }, { "epoch": 0.6886416574721098, "grad_norm": 1.6757045864433058, "learning_rate": 2.3340037544461745e-06, "loss": 0.5478, "step": 22469 }, { "epoch": 0.6886723059948511, "grad_norm": 1.826166617770269, "learning_rate": 2.3335838878054602e-06, "loss": 0.5509, "step": 22470 }, { "epoch": 0.6887029545175922, "grad_norm": 0.8187218380552955, "learning_rate": 2.3331640474376277e-06, "loss": 0.4235, "step": 22471 }, { "epoch": 0.6887336030403335, "grad_norm": 1.8565521433664272, "learning_rate": 2.3327442333468104e-06, "loss": 0.5594, "step": 22472 }, { "epoch": 0.6887642515630746, "grad_norm": 0.8728225253586057, "learning_rate": 2.3323244455371465e-06, "loss": 0.4199, "step": 22473 }, { "epoch": 0.6887949000858159, "grad_norm": 1.793709752146349, "learning_rate": 2.3319046840127742e-06, "loss": 0.5836, "step": 22474 }, { "epoch": 0.688825548608557, "grad_norm": 0.7709096192054176, "learning_rate": 2.3314849487778258e-06, "loss": 0.3783, "step": 22475 }, { "epoch": 0.6888561971312983, "grad_norm": 1.9360567359822718, "learning_rate": 2.3310652398364415e-06, "loss": 0.625, "step": 22476 }, { "epoch": 0.6888868456540395, "grad_norm": 1.6596774773992888, "learning_rate": 2.330645557192752e-06, "loss": 0.6373, "step": 22477 }, { "epoch": 0.6889174941767807, "grad_norm": 0.8384980444270728, "learning_rate": 2.3302259008508942e-06, "loss": 0.4084, "step": 22478 }, { "epoch": 0.6889481426995219, "grad_norm": 1.8147657398324029, "learning_rate": 2.329806270815006e-06, "loss": 0.6204, "step": 22479 }, { "epoch": 0.6889787912222631, "grad_norm": 1.6385035591502237, "learning_rate": 2.3293866670892185e-06, "loss": 0.4804, "step": 22480 }, { "epoch": 0.6890094397450043, "grad_norm": 1.7175008052548888, "learning_rate": 2.3289670896776666e-06, "loss": 0.6561, "step": 22481 }, { "epoch": 0.6890400882677455, "grad_norm": 1.6940363592817873, "learning_rate": 2.3285475385844876e-06, "loss": 0.5853, "step": 22482 }, { "epoch": 0.6890707367904867, "grad_norm": 1.868271182583457, "learning_rate": 2.328128013813811e-06, "loss": 0.6017, "step": 22483 }, { "epoch": 0.689101385313228, "grad_norm": 0.8183053355853165, "learning_rate": 2.3277085153697755e-06, "loss": 0.3976, "step": 22484 }, { "epoch": 0.6891320338359691, "grad_norm": 1.7488988139161536, "learning_rate": 2.3272890432565077e-06, "loss": 0.567, "step": 22485 }, { "epoch": 0.6891626823587104, "grad_norm": 1.9826775527413838, "learning_rate": 2.326869597478148e-06, "loss": 0.7275, "step": 22486 }, { "epoch": 0.6891933308814515, "grad_norm": 0.8062299968532429, "learning_rate": 2.3264501780388267e-06, "loss": 0.3991, "step": 22487 }, { "epoch": 0.6892239794041927, "grad_norm": 0.8270905101960299, "learning_rate": 2.3260307849426733e-06, "loss": 0.4054, "step": 22488 }, { "epoch": 0.6892546279269339, "grad_norm": 1.7996461014041158, "learning_rate": 2.325611418193823e-06, "loss": 0.6655, "step": 22489 }, { "epoch": 0.6892852764496751, "grad_norm": 1.8873366164224021, "learning_rate": 2.3251920777964098e-06, "loss": 0.593, "step": 22490 }, { "epoch": 0.6893159249724163, "grad_norm": 1.9387852434443702, "learning_rate": 2.3247727637545612e-06, "loss": 0.6096, "step": 22491 }, { "epoch": 0.6893465734951575, "grad_norm": 0.873816186685427, "learning_rate": 2.324353476072412e-06, "loss": 0.4067, "step": 22492 }, { "epoch": 0.6893772220178987, "grad_norm": 1.8688325664186578, "learning_rate": 2.3239342147540932e-06, "loss": 0.6524, "step": 22493 }, { "epoch": 0.6894078705406399, "grad_norm": 0.7768644016524807, "learning_rate": 2.3235149798037344e-06, "loss": 0.4069, "step": 22494 }, { "epoch": 0.6894385190633812, "grad_norm": 0.793235314985987, "learning_rate": 2.3230957712254686e-06, "loss": 0.4092, "step": 22495 }, { "epoch": 0.6894691675861223, "grad_norm": 1.7840434605442654, "learning_rate": 2.3226765890234216e-06, "loss": 0.5846, "step": 22496 }, { "epoch": 0.6894998161088636, "grad_norm": 1.6827022898259334, "learning_rate": 2.3222574332017305e-06, "loss": 0.6559, "step": 22497 }, { "epoch": 0.6895304646316047, "grad_norm": 1.9796285144907024, "learning_rate": 2.3218383037645227e-06, "loss": 0.6967, "step": 22498 }, { "epoch": 0.689561113154346, "grad_norm": 1.813413697873661, "learning_rate": 2.3214192007159246e-06, "loss": 0.6358, "step": 22499 }, { "epoch": 0.6895917616770871, "grad_norm": 1.4792831180778974, "learning_rate": 2.3210001240600694e-06, "loss": 0.5441, "step": 22500 }, { "epoch": 0.6896224101998284, "grad_norm": 1.742678549422185, "learning_rate": 2.3205810738010866e-06, "loss": 0.5656, "step": 22501 }, { "epoch": 0.6896530587225695, "grad_norm": 1.9239093402227325, "learning_rate": 2.3201620499431027e-06, "loss": 0.6149, "step": 22502 }, { "epoch": 0.6896837072453108, "grad_norm": 1.7555644928450793, "learning_rate": 2.3197430524902477e-06, "loss": 0.5548, "step": 22503 }, { "epoch": 0.689714355768052, "grad_norm": 0.7814050069134002, "learning_rate": 2.3193240814466493e-06, "loss": 0.3991, "step": 22504 }, { "epoch": 0.6897450042907932, "grad_norm": 1.873544373218666, "learning_rate": 2.3189051368164393e-06, "loss": 0.5563, "step": 22505 }, { "epoch": 0.6897756528135344, "grad_norm": 2.0045601021756214, "learning_rate": 2.318486218603743e-06, "loss": 0.649, "step": 22506 }, { "epoch": 0.6898063013362756, "grad_norm": 1.6784134118680345, "learning_rate": 2.3180673268126842e-06, "loss": 0.6594, "step": 22507 }, { "epoch": 0.6898369498590168, "grad_norm": 1.9037569365493985, "learning_rate": 2.317648461447398e-06, "loss": 0.5585, "step": 22508 }, { "epoch": 0.689867598381758, "grad_norm": 1.4929087375436947, "learning_rate": 2.317229622512008e-06, "loss": 0.5315, "step": 22509 }, { "epoch": 0.6898982469044992, "grad_norm": 1.6310787204028123, "learning_rate": 2.3168108100106383e-06, "loss": 0.5581, "step": 22510 }, { "epoch": 0.6899288954272405, "grad_norm": 1.7477228763589627, "learning_rate": 2.316392023947419e-06, "loss": 0.6035, "step": 22511 }, { "epoch": 0.6899595439499816, "grad_norm": 1.647114780260168, "learning_rate": 2.3159732643264752e-06, "loss": 0.5345, "step": 22512 }, { "epoch": 0.6899901924727229, "grad_norm": 1.9874241643000723, "learning_rate": 2.3155545311519364e-06, "loss": 0.6812, "step": 22513 }, { "epoch": 0.690020840995464, "grad_norm": 2.1436212614317203, "learning_rate": 2.3151358244279227e-06, "loss": 0.6464, "step": 22514 }, { "epoch": 0.6900514895182053, "grad_norm": 2.025237767823266, "learning_rate": 2.3147171441585633e-06, "loss": 0.6254, "step": 22515 }, { "epoch": 0.6900821380409464, "grad_norm": 2.364702235149817, "learning_rate": 2.3142984903479847e-06, "loss": 0.5352, "step": 22516 }, { "epoch": 0.6901127865636877, "grad_norm": 1.5831365010011376, "learning_rate": 2.31387986300031e-06, "loss": 0.5312, "step": 22517 }, { "epoch": 0.6901434350864288, "grad_norm": 1.8211016065004721, "learning_rate": 2.3134612621196606e-06, "loss": 0.5746, "step": 22518 }, { "epoch": 0.69017408360917, "grad_norm": 1.7959317033955258, "learning_rate": 2.3130426877101686e-06, "loss": 0.6026, "step": 22519 }, { "epoch": 0.6902047321319112, "grad_norm": 1.699286327626318, "learning_rate": 2.3126241397759547e-06, "loss": 0.5297, "step": 22520 }, { "epoch": 0.6902353806546524, "grad_norm": 1.9700634871519105, "learning_rate": 2.3122056183211406e-06, "loss": 0.6386, "step": 22521 }, { "epoch": 0.6902660291773937, "grad_norm": 1.710705628527412, "learning_rate": 2.311787123349852e-06, "loss": 0.5394, "step": 22522 }, { "epoch": 0.6902966777001348, "grad_norm": 1.8942950513418269, "learning_rate": 2.3113686548662128e-06, "loss": 0.5512, "step": 22523 }, { "epoch": 0.6903273262228761, "grad_norm": 1.7307059473919983, "learning_rate": 2.3109502128743483e-06, "loss": 0.6345, "step": 22524 }, { "epoch": 0.6903579747456172, "grad_norm": 2.0832263598334033, "learning_rate": 2.3105317973783774e-06, "loss": 0.7406, "step": 22525 }, { "epoch": 0.6903886232683585, "grad_norm": 1.6562445077609445, "learning_rate": 2.310113408382425e-06, "loss": 0.5748, "step": 22526 }, { "epoch": 0.6904192717910996, "grad_norm": 1.9002404366272452, "learning_rate": 2.309695045890615e-06, "loss": 0.6275, "step": 22527 }, { "epoch": 0.6904499203138409, "grad_norm": 1.9212888225579032, "learning_rate": 2.3092767099070683e-06, "loss": 0.6157, "step": 22528 }, { "epoch": 0.690480568836582, "grad_norm": 1.7926532308297372, "learning_rate": 2.308858400435905e-06, "loss": 0.5729, "step": 22529 }, { "epoch": 0.6905112173593233, "grad_norm": 1.7065383792957711, "learning_rate": 2.3084401174812476e-06, "loss": 0.5743, "step": 22530 }, { "epoch": 0.6905418658820645, "grad_norm": 1.5945857932207081, "learning_rate": 2.308021861047219e-06, "loss": 0.4798, "step": 22531 }, { "epoch": 0.6905725144048057, "grad_norm": 2.080234933378633, "learning_rate": 2.3076036311379413e-06, "loss": 0.5843, "step": 22532 }, { "epoch": 0.6906031629275469, "grad_norm": 1.7941855314952186, "learning_rate": 2.3071854277575324e-06, "loss": 0.5794, "step": 22533 }, { "epoch": 0.6906338114502881, "grad_norm": 0.8070991633093357, "learning_rate": 2.306767250910114e-06, "loss": 0.4153, "step": 22534 }, { "epoch": 0.6906644599730293, "grad_norm": 1.1784848998164064, "learning_rate": 2.3063491005998095e-06, "loss": 0.4075, "step": 22535 }, { "epoch": 0.6906951084957705, "grad_norm": 1.502737661388871, "learning_rate": 2.3059309768307364e-06, "loss": 0.5053, "step": 22536 }, { "epoch": 0.6907257570185117, "grad_norm": 1.64045966682889, "learning_rate": 2.3055128796070105e-06, "loss": 0.5199, "step": 22537 }, { "epoch": 0.690756405541253, "grad_norm": 1.7957961661039172, "learning_rate": 2.3050948089327594e-06, "loss": 0.5883, "step": 22538 }, { "epoch": 0.6907870540639941, "grad_norm": 1.7079198453503956, "learning_rate": 2.304676764812097e-06, "loss": 0.539, "step": 22539 }, { "epoch": 0.6908177025867354, "grad_norm": 1.7325549570836798, "learning_rate": 2.3042587472491463e-06, "loss": 0.5384, "step": 22540 }, { "epoch": 0.6908483511094765, "grad_norm": 1.6204794955597233, "learning_rate": 2.3038407562480213e-06, "loss": 0.6346, "step": 22541 }, { "epoch": 0.6908789996322178, "grad_norm": 1.84512897574316, "learning_rate": 2.3034227918128438e-06, "loss": 0.516, "step": 22542 }, { "epoch": 0.6909096481549589, "grad_norm": 1.7954658295934738, "learning_rate": 2.303004853947733e-06, "loss": 0.534, "step": 22543 }, { "epoch": 0.6909402966777002, "grad_norm": 1.7698919279030467, "learning_rate": 2.302586942656803e-06, "loss": 0.5646, "step": 22544 }, { "epoch": 0.6909709452004413, "grad_norm": 1.794791322069092, "learning_rate": 2.3021690579441754e-06, "loss": 0.5741, "step": 22545 }, { "epoch": 0.6910015937231826, "grad_norm": 1.6437300581038956, "learning_rate": 2.3017511998139667e-06, "loss": 0.5001, "step": 22546 }, { "epoch": 0.6910322422459237, "grad_norm": 0.8367157045292523, "learning_rate": 2.301333368270295e-06, "loss": 0.4057, "step": 22547 }, { "epoch": 0.691062890768665, "grad_norm": 1.758185070219384, "learning_rate": 2.300915563317272e-06, "loss": 0.6214, "step": 22548 }, { "epoch": 0.6910935392914062, "grad_norm": 0.832153281579671, "learning_rate": 2.300497784959022e-06, "loss": 0.4176, "step": 22549 }, { "epoch": 0.6911241878141473, "grad_norm": 1.6773463661958068, "learning_rate": 2.3000800331996564e-06, "loss": 0.6381, "step": 22550 }, { "epoch": 0.6911548363368886, "grad_norm": 1.6023648094925773, "learning_rate": 2.299662308043295e-06, "loss": 0.52, "step": 22551 }, { "epoch": 0.6911854848596297, "grad_norm": 1.8985424414643088, "learning_rate": 2.2992446094940496e-06, "loss": 0.5656, "step": 22552 }, { "epoch": 0.691216133382371, "grad_norm": 1.8894825288091168, "learning_rate": 2.2988269375560383e-06, "loss": 0.5728, "step": 22553 }, { "epoch": 0.6912467819051121, "grad_norm": 0.7895204561239173, "learning_rate": 2.298409292233378e-06, "loss": 0.4226, "step": 22554 }, { "epoch": 0.6912774304278534, "grad_norm": 1.7564663685799677, "learning_rate": 2.2979916735301804e-06, "loss": 0.6189, "step": 22555 }, { "epoch": 0.6913080789505945, "grad_norm": 1.7119583296987761, "learning_rate": 2.297574081450563e-06, "loss": 0.5118, "step": 22556 }, { "epoch": 0.6913387274733358, "grad_norm": 1.795595431972577, "learning_rate": 2.29715651599864e-06, "loss": 0.5789, "step": 22557 }, { "epoch": 0.691369375996077, "grad_norm": 2.008725350243916, "learning_rate": 2.2967389771785243e-06, "loss": 0.5663, "step": 22558 }, { "epoch": 0.6914000245188182, "grad_norm": 1.955651979236864, "learning_rate": 2.296321464994331e-06, "loss": 0.7268, "step": 22559 }, { "epoch": 0.6914306730415594, "grad_norm": 1.9513312472849313, "learning_rate": 2.295903979450176e-06, "loss": 0.6361, "step": 22560 }, { "epoch": 0.6914613215643006, "grad_norm": 1.8781535758856847, "learning_rate": 2.295486520550169e-06, "loss": 0.6576, "step": 22561 }, { "epoch": 0.6914919700870418, "grad_norm": 0.809148340117448, "learning_rate": 2.2950690882984274e-06, "loss": 0.403, "step": 22562 }, { "epoch": 0.691522618609783, "grad_norm": 1.7551593770065141, "learning_rate": 2.29465168269906e-06, "loss": 0.4907, "step": 22563 }, { "epoch": 0.6915532671325242, "grad_norm": 1.8369185333067541, "learning_rate": 2.294234303756182e-06, "loss": 0.6288, "step": 22564 }, { "epoch": 0.6915839156552654, "grad_norm": 1.772410090301964, "learning_rate": 2.293816951473908e-06, "loss": 0.6254, "step": 22565 }, { "epoch": 0.6916145641780066, "grad_norm": 1.714754775551, "learning_rate": 2.293399625856345e-06, "loss": 0.5771, "step": 22566 }, { "epoch": 0.6916452127007479, "grad_norm": 1.6867937395198942, "learning_rate": 2.2929823269076085e-06, "loss": 0.5665, "step": 22567 }, { "epoch": 0.691675861223489, "grad_norm": 0.8481244387317944, "learning_rate": 2.292565054631812e-06, "loss": 0.4251, "step": 22568 }, { "epoch": 0.6917065097462303, "grad_norm": 0.7572814929336024, "learning_rate": 2.2921478090330624e-06, "loss": 0.4202, "step": 22569 }, { "epoch": 0.6917371582689714, "grad_norm": 1.6910858527940171, "learning_rate": 2.2917305901154737e-06, "loss": 0.5878, "step": 22570 }, { "epoch": 0.6917678067917127, "grad_norm": 1.7683428099945318, "learning_rate": 2.2913133978831582e-06, "loss": 0.6253, "step": 22571 }, { "epoch": 0.6917984553144538, "grad_norm": 1.8382730625600052, "learning_rate": 2.290896232340223e-06, "loss": 0.6473, "step": 22572 }, { "epoch": 0.6918291038371951, "grad_norm": 0.8121226695063752, "learning_rate": 2.2904790934907817e-06, "loss": 0.414, "step": 22573 }, { "epoch": 0.6918597523599362, "grad_norm": 1.7246467993873222, "learning_rate": 2.290061981338942e-06, "loss": 0.6019, "step": 22574 }, { "epoch": 0.6918904008826775, "grad_norm": 2.0979793014583565, "learning_rate": 2.2896448958888145e-06, "loss": 0.6285, "step": 22575 }, { "epoch": 0.6919210494054187, "grad_norm": 1.8527312401501703, "learning_rate": 2.2892278371445107e-06, "loss": 0.6132, "step": 22576 }, { "epoch": 0.6919516979281599, "grad_norm": 0.788401210911544, "learning_rate": 2.2888108051101377e-06, "loss": 0.4163, "step": 22577 }, { "epoch": 0.6919823464509011, "grad_norm": 1.7423808160615157, "learning_rate": 2.2883937997898053e-06, "loss": 0.6057, "step": 22578 }, { "epoch": 0.6920129949736423, "grad_norm": 1.7497680413894303, "learning_rate": 2.287976821187624e-06, "loss": 0.6041, "step": 22579 }, { "epoch": 0.6920436434963835, "grad_norm": 1.7763840116917056, "learning_rate": 2.2875598693076995e-06, "loss": 0.6459, "step": 22580 }, { "epoch": 0.6920742920191246, "grad_norm": 0.7771884284183314, "learning_rate": 2.287142944154144e-06, "loss": 0.4022, "step": 22581 }, { "epoch": 0.6921049405418659, "grad_norm": 1.7181799100592479, "learning_rate": 2.28672604573106e-06, "loss": 0.5808, "step": 22582 }, { "epoch": 0.692135589064607, "grad_norm": 1.7175747078199795, "learning_rate": 2.2863091740425597e-06, "loss": 0.5342, "step": 22583 }, { "epoch": 0.6921662375873483, "grad_norm": 1.7484939815471645, "learning_rate": 2.285892329092751e-06, "loss": 0.5607, "step": 22584 }, { "epoch": 0.6921968861100894, "grad_norm": 1.9202842115417398, "learning_rate": 2.2854755108857376e-06, "loss": 0.6378, "step": 22585 }, { "epoch": 0.6922275346328307, "grad_norm": 1.7405264035283554, "learning_rate": 2.2850587194256284e-06, "loss": 0.5404, "step": 22586 }, { "epoch": 0.6922581831555719, "grad_norm": 1.6843856909926629, "learning_rate": 2.2846419547165323e-06, "loss": 0.6507, "step": 22587 }, { "epoch": 0.6922888316783131, "grad_norm": 2.0782925110037604, "learning_rate": 2.2842252167625517e-06, "loss": 0.5748, "step": 22588 }, { "epoch": 0.6923194802010543, "grad_norm": 1.9236849927518394, "learning_rate": 2.283808505567795e-06, "loss": 0.6127, "step": 22589 }, { "epoch": 0.6923501287237955, "grad_norm": 1.6641395665328065, "learning_rate": 2.2833918211363705e-06, "loss": 0.5664, "step": 22590 }, { "epoch": 0.6923807772465367, "grad_norm": 1.8556739595339222, "learning_rate": 2.2829751634723786e-06, "loss": 0.5993, "step": 22591 }, { "epoch": 0.6924114257692779, "grad_norm": 0.8108482915555532, "learning_rate": 2.28255853257993e-06, "loss": 0.4183, "step": 22592 }, { "epoch": 0.6924420742920191, "grad_norm": 1.6746334373144345, "learning_rate": 2.2821419284631235e-06, "loss": 0.6007, "step": 22593 }, { "epoch": 0.6924727228147604, "grad_norm": 1.7021663390649122, "learning_rate": 2.2817253511260722e-06, "loss": 0.5325, "step": 22594 }, { "epoch": 0.6925033713375015, "grad_norm": 2.1887623562741783, "learning_rate": 2.281308800572876e-06, "loss": 0.6452, "step": 22595 }, { "epoch": 0.6925340198602428, "grad_norm": 1.5875202730378228, "learning_rate": 2.2808922768076387e-06, "loss": 0.5503, "step": 22596 }, { "epoch": 0.6925646683829839, "grad_norm": 2.095826894610963, "learning_rate": 2.2804757798344646e-06, "loss": 0.6192, "step": 22597 }, { "epoch": 0.6925953169057252, "grad_norm": 0.8261079260378317, "learning_rate": 2.2800593096574607e-06, "loss": 0.4209, "step": 22598 }, { "epoch": 0.6926259654284663, "grad_norm": 1.8965844606766822, "learning_rate": 2.2796428662807262e-06, "loss": 0.6149, "step": 22599 }, { "epoch": 0.6926566139512076, "grad_norm": 1.6993036423893244, "learning_rate": 2.279226449708367e-06, "loss": 0.6211, "step": 22600 }, { "epoch": 0.6926872624739487, "grad_norm": 0.8279904138585094, "learning_rate": 2.2788100599444873e-06, "loss": 0.4137, "step": 22601 }, { "epoch": 0.69271791099669, "grad_norm": 0.8677685526562499, "learning_rate": 2.278393696993187e-06, "loss": 0.4202, "step": 22602 }, { "epoch": 0.6927485595194312, "grad_norm": 1.876123332827385, "learning_rate": 2.2779773608585713e-06, "loss": 0.6263, "step": 22603 }, { "epoch": 0.6927792080421724, "grad_norm": 1.906571028162815, "learning_rate": 2.2775610515447373e-06, "loss": 0.5832, "step": 22604 }, { "epoch": 0.6928098565649136, "grad_norm": 1.8972194316872917, "learning_rate": 2.2771447690557948e-06, "loss": 0.62, "step": 22605 }, { "epoch": 0.6928405050876548, "grad_norm": 1.8016416867698604, "learning_rate": 2.2767285133958415e-06, "loss": 0.5671, "step": 22606 }, { "epoch": 0.692871153610396, "grad_norm": 1.8340066585072257, "learning_rate": 2.2763122845689772e-06, "loss": 0.5831, "step": 22607 }, { "epoch": 0.6929018021331372, "grad_norm": 1.945553530799004, "learning_rate": 2.2758960825793045e-06, "loss": 0.559, "step": 22608 }, { "epoch": 0.6929324506558784, "grad_norm": 1.8015636443530856, "learning_rate": 2.275479907430927e-06, "loss": 0.4807, "step": 22609 }, { "epoch": 0.6929630991786196, "grad_norm": 1.7293335792477815, "learning_rate": 2.2750637591279413e-06, "loss": 0.647, "step": 22610 }, { "epoch": 0.6929937477013608, "grad_norm": 1.8597661905894598, "learning_rate": 2.2746476376744493e-06, "loss": 0.6343, "step": 22611 }, { "epoch": 0.693024396224102, "grad_norm": 0.7778626188195052, "learning_rate": 2.274231543074551e-06, "loss": 0.4059, "step": 22612 }, { "epoch": 0.6930550447468432, "grad_norm": 1.7927153614733753, "learning_rate": 2.2738154753323495e-06, "loss": 0.6894, "step": 22613 }, { "epoch": 0.6930856932695844, "grad_norm": 1.6228023846247923, "learning_rate": 2.273399434451941e-06, "loss": 0.5718, "step": 22614 }, { "epoch": 0.6931163417923256, "grad_norm": 0.7623345737414766, "learning_rate": 2.272983420437422e-06, "loss": 0.4068, "step": 22615 }, { "epoch": 0.6931469903150668, "grad_norm": 1.7989029758557527, "learning_rate": 2.272567433292899e-06, "loss": 0.6458, "step": 22616 }, { "epoch": 0.693177638837808, "grad_norm": 1.6449937246524173, "learning_rate": 2.2721514730224664e-06, "loss": 0.5792, "step": 22617 }, { "epoch": 0.6932082873605492, "grad_norm": 1.7237873221878086, "learning_rate": 2.2717355396302214e-06, "loss": 0.6701, "step": 22618 }, { "epoch": 0.6932389358832904, "grad_norm": 1.7454598023730603, "learning_rate": 2.271319633120265e-06, "loss": 0.5628, "step": 22619 }, { "epoch": 0.6932695844060316, "grad_norm": 1.5354961632052873, "learning_rate": 2.270903753496694e-06, "loss": 0.508, "step": 22620 }, { "epoch": 0.6933002329287729, "grad_norm": 1.7841098802150326, "learning_rate": 2.2704879007636077e-06, "loss": 0.589, "step": 22621 }, { "epoch": 0.693330881451514, "grad_norm": 1.6619428709952326, "learning_rate": 2.2700720749251016e-06, "loss": 0.6064, "step": 22622 }, { "epoch": 0.6933615299742553, "grad_norm": 2.168277972484202, "learning_rate": 2.2696562759852738e-06, "loss": 0.6684, "step": 22623 }, { "epoch": 0.6933921784969964, "grad_norm": 1.928854136179048, "learning_rate": 2.2692405039482223e-06, "loss": 0.6333, "step": 22624 }, { "epoch": 0.6934228270197377, "grad_norm": 1.6421399516049275, "learning_rate": 2.2688247588180433e-06, "loss": 0.5939, "step": 22625 }, { "epoch": 0.6934534755424788, "grad_norm": 0.8016450394654123, "learning_rate": 2.2684090405988315e-06, "loss": 0.3972, "step": 22626 }, { "epoch": 0.6934841240652201, "grad_norm": 1.802669088032458, "learning_rate": 2.2679933492946837e-06, "loss": 0.5838, "step": 22627 }, { "epoch": 0.6935147725879612, "grad_norm": 1.7533636705502424, "learning_rate": 2.267577684909698e-06, "loss": 0.6453, "step": 22628 }, { "epoch": 0.6935454211107025, "grad_norm": 0.752706468523437, "learning_rate": 2.267162047447967e-06, "loss": 0.4138, "step": 22629 }, { "epoch": 0.6935760696334436, "grad_norm": 1.5503067624298514, "learning_rate": 2.266746436913588e-06, "loss": 0.5683, "step": 22630 }, { "epoch": 0.6936067181561849, "grad_norm": 1.8234729146985962, "learning_rate": 2.2663308533106555e-06, "loss": 0.5511, "step": 22631 }, { "epoch": 0.6936373666789261, "grad_norm": 1.8244538190560413, "learning_rate": 2.265915296643266e-06, "loss": 0.5259, "step": 22632 }, { "epoch": 0.6936680152016673, "grad_norm": 1.593017136761153, "learning_rate": 2.2654997669155125e-06, "loss": 0.5041, "step": 22633 }, { "epoch": 0.6936986637244085, "grad_norm": 0.8357134558135895, "learning_rate": 2.2650842641314864e-06, "loss": 0.3915, "step": 22634 }, { "epoch": 0.6937293122471497, "grad_norm": 1.978088398349308, "learning_rate": 2.2646687882952884e-06, "loss": 0.6603, "step": 22635 }, { "epoch": 0.6937599607698909, "grad_norm": 1.7791274050131123, "learning_rate": 2.2642533394110082e-06, "loss": 0.5579, "step": 22636 }, { "epoch": 0.6937906092926321, "grad_norm": 1.6671891499524227, "learning_rate": 2.2638379174827385e-06, "loss": 0.5565, "step": 22637 }, { "epoch": 0.6938212578153733, "grad_norm": 1.659110174146202, "learning_rate": 2.2634225225145733e-06, "loss": 0.5448, "step": 22638 }, { "epoch": 0.6938519063381146, "grad_norm": 1.7662429658312313, "learning_rate": 2.2630071545106064e-06, "loss": 0.6212, "step": 22639 }, { "epoch": 0.6938825548608557, "grad_norm": 1.8068662893492715, "learning_rate": 2.2625918134749326e-06, "loss": 0.6044, "step": 22640 }, { "epoch": 0.693913203383597, "grad_norm": 0.8353423815460664, "learning_rate": 2.2621764994116395e-06, "loss": 0.4164, "step": 22641 }, { "epoch": 0.6939438519063381, "grad_norm": 1.7336133779358742, "learning_rate": 2.2617612123248223e-06, "loss": 0.6193, "step": 22642 }, { "epoch": 0.6939745004290793, "grad_norm": 1.6743386422138877, "learning_rate": 2.2613459522185744e-06, "loss": 0.6251, "step": 22643 }, { "epoch": 0.6940051489518205, "grad_norm": 0.8269170670888853, "learning_rate": 2.2609307190969852e-06, "loss": 0.4188, "step": 22644 }, { "epoch": 0.6940357974745617, "grad_norm": 1.8657178910566885, "learning_rate": 2.260515512964143e-06, "loss": 0.6419, "step": 22645 }, { "epoch": 0.6940664459973029, "grad_norm": 1.8695355742110906, "learning_rate": 2.260100333824146e-06, "loss": 0.5991, "step": 22646 }, { "epoch": 0.6940970945200441, "grad_norm": 1.7641895510759291, "learning_rate": 2.2596851816810815e-06, "loss": 0.582, "step": 22647 }, { "epoch": 0.6941277430427854, "grad_norm": 1.6561623689121385, "learning_rate": 2.259270056539038e-06, "loss": 0.6336, "step": 22648 }, { "epoch": 0.6941583915655265, "grad_norm": 1.821987022017556, "learning_rate": 2.258854958402108e-06, "loss": 0.5776, "step": 22649 }, { "epoch": 0.6941890400882678, "grad_norm": 1.9645841500672292, "learning_rate": 2.2584398872743817e-06, "loss": 0.6411, "step": 22650 }, { "epoch": 0.6942196886110089, "grad_norm": 1.8211714374928254, "learning_rate": 2.25802484315995e-06, "loss": 0.6504, "step": 22651 }, { "epoch": 0.6942503371337502, "grad_norm": 0.7738056874246824, "learning_rate": 2.2576098260629e-06, "loss": 0.3697, "step": 22652 }, { "epoch": 0.6942809856564913, "grad_norm": 1.7910251280310048, "learning_rate": 2.2571948359873213e-06, "loss": 0.6154, "step": 22653 }, { "epoch": 0.6943116341792326, "grad_norm": 1.8041761347534284, "learning_rate": 2.256779872937306e-06, "loss": 0.5645, "step": 22654 }, { "epoch": 0.6943422827019737, "grad_norm": 1.7892888620401886, "learning_rate": 2.25636493691694e-06, "loss": 0.5656, "step": 22655 }, { "epoch": 0.694372931224715, "grad_norm": 2.121312162604777, "learning_rate": 2.2559500279303087e-06, "loss": 0.6214, "step": 22656 }, { "epoch": 0.6944035797474561, "grad_norm": 0.7937624917633198, "learning_rate": 2.2555351459815076e-06, "loss": 0.4195, "step": 22657 }, { "epoch": 0.6944342282701974, "grad_norm": 1.716439271149059, "learning_rate": 2.2551202910746196e-06, "loss": 0.6947, "step": 22658 }, { "epoch": 0.6944648767929386, "grad_norm": 1.7648421824874578, "learning_rate": 2.254705463213735e-06, "loss": 0.5575, "step": 22659 }, { "epoch": 0.6944955253156798, "grad_norm": 1.9491891711354292, "learning_rate": 2.254290662402938e-06, "loss": 0.6359, "step": 22660 }, { "epoch": 0.694526173838421, "grad_norm": 0.7874866576181201, "learning_rate": 2.2538758886463174e-06, "loss": 0.4193, "step": 22661 }, { "epoch": 0.6945568223611622, "grad_norm": 1.5674967467570387, "learning_rate": 2.253461141947963e-06, "loss": 0.6543, "step": 22662 }, { "epoch": 0.6945874708839034, "grad_norm": 1.8810233885752468, "learning_rate": 2.253046422311956e-06, "loss": 0.5452, "step": 22663 }, { "epoch": 0.6946181194066446, "grad_norm": 1.8349405784235446, "learning_rate": 2.252631729742386e-06, "loss": 0.5781, "step": 22664 }, { "epoch": 0.6946487679293858, "grad_norm": 1.5934756317614658, "learning_rate": 2.25221706424334e-06, "loss": 0.6246, "step": 22665 }, { "epoch": 0.694679416452127, "grad_norm": 2.0752784331415497, "learning_rate": 2.2518024258189004e-06, "loss": 0.6834, "step": 22666 }, { "epoch": 0.6947100649748682, "grad_norm": 1.9264257453483267, "learning_rate": 2.251387814473155e-06, "loss": 0.6458, "step": 22667 }, { "epoch": 0.6947407134976095, "grad_norm": 1.7618399484231493, "learning_rate": 2.2509732302101906e-06, "loss": 0.5459, "step": 22668 }, { "epoch": 0.6947713620203506, "grad_norm": 1.771438676225829, "learning_rate": 2.2505586730340884e-06, "loss": 0.5259, "step": 22669 }, { "epoch": 0.6948020105430919, "grad_norm": 1.6453847489640145, "learning_rate": 2.2501441429489366e-06, "loss": 0.5208, "step": 22670 }, { "epoch": 0.694832659065833, "grad_norm": 1.8343607487126112, "learning_rate": 2.2497296399588166e-06, "loss": 0.5956, "step": 22671 }, { "epoch": 0.6948633075885743, "grad_norm": 1.8216515208727393, "learning_rate": 2.249315164067814e-06, "loss": 0.6269, "step": 22672 }, { "epoch": 0.6948939561113154, "grad_norm": 1.8489297733244867, "learning_rate": 2.2489007152800146e-06, "loss": 0.5755, "step": 22673 }, { "epoch": 0.6949246046340566, "grad_norm": 1.7566955510724316, "learning_rate": 2.248486293599499e-06, "loss": 0.5238, "step": 22674 }, { "epoch": 0.6949552531567978, "grad_norm": 1.8114618345149243, "learning_rate": 2.2480718990303517e-06, "loss": 0.5384, "step": 22675 }, { "epoch": 0.694985901679539, "grad_norm": 1.7331874161075855, "learning_rate": 2.247657531576658e-06, "loss": 0.5937, "step": 22676 }, { "epoch": 0.6950165502022803, "grad_norm": 1.9323862099562008, "learning_rate": 2.247243191242497e-06, "loss": 0.6342, "step": 22677 }, { "epoch": 0.6950471987250214, "grad_norm": 1.7213345587455207, "learning_rate": 2.246828878031955e-06, "loss": 0.5682, "step": 22678 }, { "epoch": 0.6950778472477627, "grad_norm": 0.8383703173268262, "learning_rate": 2.2464145919491105e-06, "loss": 0.406, "step": 22679 }, { "epoch": 0.6951084957705038, "grad_norm": 1.7517496963718013, "learning_rate": 2.246000332998047e-06, "loss": 0.5371, "step": 22680 }, { "epoch": 0.6951391442932451, "grad_norm": 0.7997288727195855, "learning_rate": 2.2455861011828494e-06, "loss": 0.4231, "step": 22681 }, { "epoch": 0.6951697928159862, "grad_norm": 1.7690568244471108, "learning_rate": 2.245171896507595e-06, "loss": 0.5908, "step": 22682 }, { "epoch": 0.6952004413387275, "grad_norm": 1.8629745175594992, "learning_rate": 2.2447577189763662e-06, "loss": 0.6035, "step": 22683 }, { "epoch": 0.6952310898614686, "grad_norm": 2.06222697497315, "learning_rate": 2.244343568593247e-06, "loss": 0.6371, "step": 22684 }, { "epoch": 0.6952617383842099, "grad_norm": 1.9334741083035445, "learning_rate": 2.2439294453623135e-06, "loss": 0.6775, "step": 22685 }, { "epoch": 0.695292386906951, "grad_norm": 0.776645138295172, "learning_rate": 2.2435153492876484e-06, "loss": 0.4236, "step": 22686 }, { "epoch": 0.6953230354296923, "grad_norm": 1.8604267742319989, "learning_rate": 2.2431012803733337e-06, "loss": 0.6027, "step": 22687 }, { "epoch": 0.6953536839524335, "grad_norm": 1.7036184232418234, "learning_rate": 2.2426872386234457e-06, "loss": 0.5826, "step": 22688 }, { "epoch": 0.6953843324751747, "grad_norm": 1.6598706011540547, "learning_rate": 2.2422732240420674e-06, "loss": 0.5836, "step": 22689 }, { "epoch": 0.6954149809979159, "grad_norm": 1.9631350034967645, "learning_rate": 2.2418592366332753e-06, "loss": 0.5556, "step": 22690 }, { "epoch": 0.6954456295206571, "grad_norm": 1.7232815021459154, "learning_rate": 2.2414452764011495e-06, "loss": 0.6181, "step": 22691 }, { "epoch": 0.6954762780433983, "grad_norm": 1.54472745791131, "learning_rate": 2.241031343349771e-06, "loss": 0.5643, "step": 22692 }, { "epoch": 0.6955069265661395, "grad_norm": 1.8436264282110053, "learning_rate": 2.2406174374832147e-06, "loss": 0.6398, "step": 22693 }, { "epoch": 0.6955375750888807, "grad_norm": 2.121062696649265, "learning_rate": 2.240203558805561e-06, "loss": 0.5968, "step": 22694 }, { "epoch": 0.695568223611622, "grad_norm": 1.8117802831669787, "learning_rate": 2.2397897073208897e-06, "loss": 0.6508, "step": 22695 }, { "epoch": 0.6955988721343631, "grad_norm": 1.654248887502437, "learning_rate": 2.2393758830332744e-06, "loss": 0.5191, "step": 22696 }, { "epoch": 0.6956295206571044, "grad_norm": 1.6107779046937623, "learning_rate": 2.238962085946795e-06, "loss": 0.593, "step": 22697 }, { "epoch": 0.6956601691798455, "grad_norm": 1.905383713486433, "learning_rate": 2.238548316065531e-06, "loss": 0.6113, "step": 22698 }, { "epoch": 0.6956908177025868, "grad_norm": 1.7197223957693888, "learning_rate": 2.2381345733935545e-06, "loss": 0.5169, "step": 22699 }, { "epoch": 0.6957214662253279, "grad_norm": 1.7151689073220668, "learning_rate": 2.2377208579349464e-06, "loss": 0.6721, "step": 22700 }, { "epoch": 0.6957521147480692, "grad_norm": 2.097835753650382, "learning_rate": 2.23730716969378e-06, "loss": 0.5936, "step": 22701 }, { "epoch": 0.6957827632708103, "grad_norm": 1.732974981736827, "learning_rate": 2.2368935086741326e-06, "loss": 0.6306, "step": 22702 }, { "epoch": 0.6958134117935516, "grad_norm": 1.9231303612826194, "learning_rate": 2.2364798748800826e-06, "loss": 0.6081, "step": 22703 }, { "epoch": 0.6958440603162928, "grad_norm": 1.8258437386031807, "learning_rate": 2.2360662683157016e-06, "loss": 0.5605, "step": 22704 }, { "epoch": 0.6958747088390339, "grad_norm": 1.6560632121595371, "learning_rate": 2.2356526889850666e-06, "loss": 0.5819, "step": 22705 }, { "epoch": 0.6959053573617752, "grad_norm": 0.8341753684466227, "learning_rate": 2.235239136892255e-06, "loss": 0.4012, "step": 22706 }, { "epoch": 0.6959360058845163, "grad_norm": 1.68154254539331, "learning_rate": 2.234825612041338e-06, "loss": 0.5645, "step": 22707 }, { "epoch": 0.6959666544072576, "grad_norm": 2.0806554370173123, "learning_rate": 2.2344121144363912e-06, "loss": 0.666, "step": 22708 }, { "epoch": 0.6959973029299987, "grad_norm": 2.0713953785459056, "learning_rate": 2.2339986440814916e-06, "loss": 0.566, "step": 22709 }, { "epoch": 0.69602795145274, "grad_norm": 2.1515477270071752, "learning_rate": 2.233585200980709e-06, "loss": 0.606, "step": 22710 }, { "epoch": 0.6960585999754811, "grad_norm": 0.7613720648788914, "learning_rate": 2.233171785138121e-06, "loss": 0.4167, "step": 22711 }, { "epoch": 0.6960892484982224, "grad_norm": 1.7095880714447762, "learning_rate": 2.2327583965577965e-06, "loss": 0.5829, "step": 22712 }, { "epoch": 0.6961198970209636, "grad_norm": 1.5722560474482359, "learning_rate": 2.232345035243814e-06, "loss": 0.5675, "step": 22713 }, { "epoch": 0.6961505455437048, "grad_norm": 1.7790501003809076, "learning_rate": 2.2319317012002452e-06, "loss": 0.5937, "step": 22714 }, { "epoch": 0.696181194066446, "grad_norm": 1.7534837423858534, "learning_rate": 2.231518394431159e-06, "loss": 0.533, "step": 22715 }, { "epoch": 0.6962118425891872, "grad_norm": 1.7243798507160466, "learning_rate": 2.2311051149406303e-06, "loss": 0.4857, "step": 22716 }, { "epoch": 0.6962424911119284, "grad_norm": 0.7659002287199648, "learning_rate": 2.2306918627327335e-06, "loss": 0.4099, "step": 22717 }, { "epoch": 0.6962731396346696, "grad_norm": 1.8882552679141367, "learning_rate": 2.2302786378115367e-06, "loss": 0.6842, "step": 22718 }, { "epoch": 0.6963037881574108, "grad_norm": 1.870043302013146, "learning_rate": 2.2298654401811126e-06, "loss": 0.6108, "step": 22719 }, { "epoch": 0.696334436680152, "grad_norm": 1.4933574843436779, "learning_rate": 2.2294522698455332e-06, "loss": 0.5043, "step": 22720 }, { "epoch": 0.6963650852028932, "grad_norm": 1.771406932755389, "learning_rate": 2.229039126808872e-06, "loss": 0.6223, "step": 22721 }, { "epoch": 0.6963957337256345, "grad_norm": 1.7527099865794793, "learning_rate": 2.2286260110751968e-06, "loss": 0.5839, "step": 22722 }, { "epoch": 0.6964263822483756, "grad_norm": 2.253457472582216, "learning_rate": 2.2282129226485767e-06, "loss": 0.6067, "step": 22723 }, { "epoch": 0.6964570307711169, "grad_norm": 1.640746400165096, "learning_rate": 2.227799861533084e-06, "loss": 0.5411, "step": 22724 }, { "epoch": 0.696487679293858, "grad_norm": 1.8934266441338747, "learning_rate": 2.2273868277327896e-06, "loss": 0.4968, "step": 22725 }, { "epoch": 0.6965183278165993, "grad_norm": 0.8517234113488077, "learning_rate": 2.2269738212517617e-06, "loss": 0.421, "step": 22726 }, { "epoch": 0.6965489763393404, "grad_norm": 1.813645140866868, "learning_rate": 2.2265608420940694e-06, "loss": 0.5121, "step": 22727 }, { "epoch": 0.6965796248620817, "grad_norm": 1.8072546296521188, "learning_rate": 2.2261478902637847e-06, "loss": 0.5825, "step": 22728 }, { "epoch": 0.6966102733848228, "grad_norm": 1.9057027683401109, "learning_rate": 2.225734965764973e-06, "loss": 0.6537, "step": 22729 }, { "epoch": 0.6966409219075641, "grad_norm": 1.856903807998441, "learning_rate": 2.2253220686017056e-06, "loss": 0.5652, "step": 22730 }, { "epoch": 0.6966715704303053, "grad_norm": 1.8510589556309136, "learning_rate": 2.224909198778047e-06, "loss": 0.5693, "step": 22731 }, { "epoch": 0.6967022189530465, "grad_norm": 1.908035749774656, "learning_rate": 2.2244963562980713e-06, "loss": 0.6508, "step": 22732 }, { "epoch": 0.6967328674757877, "grad_norm": 1.8153259755874758, "learning_rate": 2.2240835411658435e-06, "loss": 0.5841, "step": 22733 }, { "epoch": 0.6967635159985289, "grad_norm": 1.5648843569217916, "learning_rate": 2.2236707533854285e-06, "loss": 0.5325, "step": 22734 }, { "epoch": 0.6967941645212701, "grad_norm": 0.790027964159758, "learning_rate": 2.2232579929608962e-06, "loss": 0.4009, "step": 22735 }, { "epoch": 0.6968248130440112, "grad_norm": 1.6495338999817106, "learning_rate": 2.222845259896315e-06, "loss": 0.5924, "step": 22736 }, { "epoch": 0.6968554615667525, "grad_norm": 1.6466108004052802, "learning_rate": 2.2224325541957483e-06, "loss": 0.5868, "step": 22737 }, { "epoch": 0.6968861100894936, "grad_norm": 1.6484808313440018, "learning_rate": 2.2220198758632645e-06, "loss": 0.6193, "step": 22738 }, { "epoch": 0.6969167586122349, "grad_norm": 1.954945602041418, "learning_rate": 2.221607224902929e-06, "loss": 0.6372, "step": 22739 }, { "epoch": 0.696947407134976, "grad_norm": 1.9510339321760652, "learning_rate": 2.221194601318811e-06, "loss": 0.601, "step": 22740 }, { "epoch": 0.6969780556577173, "grad_norm": 2.0019834368964435, "learning_rate": 2.2207820051149735e-06, "loss": 0.5957, "step": 22741 }, { "epoch": 0.6970087041804585, "grad_norm": 1.8811930810086088, "learning_rate": 2.220369436295478e-06, "loss": 0.618, "step": 22742 }, { "epoch": 0.6970393527031997, "grad_norm": 1.8418987512819864, "learning_rate": 2.219956894864397e-06, "loss": 0.5764, "step": 22743 }, { "epoch": 0.6970700012259409, "grad_norm": 1.9985676025438108, "learning_rate": 2.219544380825793e-06, "loss": 0.5414, "step": 22744 }, { "epoch": 0.6971006497486821, "grad_norm": 1.804564089171388, "learning_rate": 2.219131894183727e-06, "loss": 0.544, "step": 22745 }, { "epoch": 0.6971312982714233, "grad_norm": 0.7912240729562265, "learning_rate": 2.2187194349422666e-06, "loss": 0.4091, "step": 22746 }, { "epoch": 0.6971619467941645, "grad_norm": 1.6753592497794894, "learning_rate": 2.2183070031054748e-06, "loss": 0.5762, "step": 22747 }, { "epoch": 0.6971925953169057, "grad_norm": 0.7738619675542164, "learning_rate": 2.2178945986774176e-06, "loss": 0.4124, "step": 22748 }, { "epoch": 0.697223243839647, "grad_norm": 1.8993907887277857, "learning_rate": 2.217482221662155e-06, "loss": 0.5993, "step": 22749 }, { "epoch": 0.6972538923623881, "grad_norm": 0.7918723031128372, "learning_rate": 2.217069872063752e-06, "loss": 0.4187, "step": 22750 }, { "epoch": 0.6972845408851294, "grad_norm": 1.7807998890923225, "learning_rate": 2.2166575498862734e-06, "loss": 0.5926, "step": 22751 }, { "epoch": 0.6973151894078705, "grad_norm": 1.6735376156640178, "learning_rate": 2.2162452551337804e-06, "loss": 0.634, "step": 22752 }, { "epoch": 0.6973458379306118, "grad_norm": 1.7778379796144674, "learning_rate": 2.215832987810331e-06, "loss": 0.5243, "step": 22753 }, { "epoch": 0.6973764864533529, "grad_norm": 1.9309465262079186, "learning_rate": 2.215420747919996e-06, "loss": 0.6395, "step": 22754 }, { "epoch": 0.6974071349760942, "grad_norm": 1.912839853521682, "learning_rate": 2.2150085354668317e-06, "loss": 0.6173, "step": 22755 }, { "epoch": 0.6974377834988353, "grad_norm": 2.040587561838959, "learning_rate": 2.2145963504548995e-06, "loss": 0.6048, "step": 22756 }, { "epoch": 0.6974684320215766, "grad_norm": 1.9787630159630296, "learning_rate": 2.2141841928882624e-06, "loss": 0.5499, "step": 22757 }, { "epoch": 0.6974990805443178, "grad_norm": 1.698183969315406, "learning_rate": 2.2137720627709812e-06, "loss": 0.5717, "step": 22758 }, { "epoch": 0.697529729067059, "grad_norm": 1.713249326644944, "learning_rate": 2.213359960107118e-06, "loss": 0.5807, "step": 22759 }, { "epoch": 0.6975603775898002, "grad_norm": 1.825007990761892, "learning_rate": 2.212947884900731e-06, "loss": 0.5077, "step": 22760 }, { "epoch": 0.6975910261125414, "grad_norm": 2.0452219675201517, "learning_rate": 2.2125358371558815e-06, "loss": 0.6348, "step": 22761 }, { "epoch": 0.6976216746352826, "grad_norm": 1.7695152649745485, "learning_rate": 2.212123816876631e-06, "loss": 0.5634, "step": 22762 }, { "epoch": 0.6976523231580238, "grad_norm": 1.921414460955554, "learning_rate": 2.211711824067038e-06, "loss": 0.6035, "step": 22763 }, { "epoch": 0.697682971680765, "grad_norm": 0.8407865940180815, "learning_rate": 2.2112998587311584e-06, "loss": 0.4127, "step": 22764 }, { "epoch": 0.6977136202035062, "grad_norm": 0.7628555456237724, "learning_rate": 2.210887920873058e-06, "loss": 0.3862, "step": 22765 }, { "epoch": 0.6977442687262474, "grad_norm": 0.8046118608114142, "learning_rate": 2.2104760104967915e-06, "loss": 0.413, "step": 22766 }, { "epoch": 0.6977749172489885, "grad_norm": 1.8975308713000456, "learning_rate": 2.21006412760642e-06, "loss": 0.5717, "step": 22767 }, { "epoch": 0.6978055657717298, "grad_norm": 1.7045921918410976, "learning_rate": 2.2096522722059987e-06, "loss": 0.532, "step": 22768 }, { "epoch": 0.697836214294471, "grad_norm": 0.774868717773979, "learning_rate": 2.2092404442995872e-06, "loss": 0.4076, "step": 22769 }, { "epoch": 0.6978668628172122, "grad_norm": 1.816148028984821, "learning_rate": 2.208828643891246e-06, "loss": 0.5481, "step": 22770 }, { "epoch": 0.6978975113399534, "grad_norm": 1.8890256504726286, "learning_rate": 2.208416870985028e-06, "loss": 0.5646, "step": 22771 }, { "epoch": 0.6979281598626946, "grad_norm": 1.8271360431834327, "learning_rate": 2.2080051255849933e-06, "loss": 0.5985, "step": 22772 }, { "epoch": 0.6979588083854358, "grad_norm": 2.027523184884522, "learning_rate": 2.2075934076952e-06, "loss": 0.5249, "step": 22773 }, { "epoch": 0.697989456908177, "grad_norm": 1.8175086889661691, "learning_rate": 2.2071817173197014e-06, "loss": 0.6252, "step": 22774 }, { "epoch": 0.6980201054309182, "grad_norm": 1.942028261370505, "learning_rate": 2.2067700544625577e-06, "loss": 0.6057, "step": 22775 }, { "epoch": 0.6980507539536595, "grad_norm": 1.798231624447754, "learning_rate": 2.2063584191278213e-06, "loss": 0.6566, "step": 22776 }, { "epoch": 0.6980814024764006, "grad_norm": 1.9198527492034814, "learning_rate": 2.205946811319551e-06, "loss": 0.603, "step": 22777 }, { "epoch": 0.6981120509991419, "grad_norm": 1.9965721758888757, "learning_rate": 2.205535231041803e-06, "loss": 0.4617, "step": 22778 }, { "epoch": 0.698142699521883, "grad_norm": 1.806696046319959, "learning_rate": 2.2051236782986295e-06, "loss": 0.5842, "step": 22779 }, { "epoch": 0.6981733480446243, "grad_norm": 2.1467026465985333, "learning_rate": 2.2047121530940873e-06, "loss": 0.6567, "step": 22780 }, { "epoch": 0.6982039965673654, "grad_norm": 0.8458617864157207, "learning_rate": 2.204300655432234e-06, "loss": 0.414, "step": 22781 }, { "epoch": 0.6982346450901067, "grad_norm": 1.7415506446299638, "learning_rate": 2.2038891853171213e-06, "loss": 0.5118, "step": 22782 }, { "epoch": 0.6982652936128478, "grad_norm": 0.8464637945193016, "learning_rate": 2.2034777427527998e-06, "loss": 0.4188, "step": 22783 }, { "epoch": 0.6982959421355891, "grad_norm": 1.948735299107204, "learning_rate": 2.2030663277433316e-06, "loss": 0.6413, "step": 22784 }, { "epoch": 0.6983265906583302, "grad_norm": 1.734808233986996, "learning_rate": 2.2026549402927644e-06, "loss": 0.5326, "step": 22785 }, { "epoch": 0.6983572391810715, "grad_norm": 2.1337665775461745, "learning_rate": 2.202243580405156e-06, "loss": 0.6219, "step": 22786 }, { "epoch": 0.6983878877038127, "grad_norm": 1.6640277792677984, "learning_rate": 2.2018322480845554e-06, "loss": 0.5029, "step": 22787 }, { "epoch": 0.6984185362265539, "grad_norm": 1.6717636955247908, "learning_rate": 2.201420943335018e-06, "loss": 0.5846, "step": 22788 }, { "epoch": 0.6984491847492951, "grad_norm": 1.9617789097042704, "learning_rate": 2.2010096661605973e-06, "loss": 0.6087, "step": 22789 }, { "epoch": 0.6984798332720363, "grad_norm": 0.7609535884097675, "learning_rate": 2.200598416565343e-06, "loss": 0.3948, "step": 22790 }, { "epoch": 0.6985104817947775, "grad_norm": 1.534927025310699, "learning_rate": 2.2001871945533087e-06, "loss": 0.5362, "step": 22791 }, { "epoch": 0.6985411303175187, "grad_norm": 0.7666575372418257, "learning_rate": 2.1997760001285485e-06, "loss": 0.4086, "step": 22792 }, { "epoch": 0.6985717788402599, "grad_norm": 1.9846606634989685, "learning_rate": 2.19936483329511e-06, "loss": 0.5925, "step": 22793 }, { "epoch": 0.6986024273630012, "grad_norm": 1.7599367989698274, "learning_rate": 2.198953694057046e-06, "loss": 0.6261, "step": 22794 }, { "epoch": 0.6986330758857423, "grad_norm": 1.8581076928347187, "learning_rate": 2.1985425824184096e-06, "loss": 0.522, "step": 22795 }, { "epoch": 0.6986637244084836, "grad_norm": 1.7631258913580707, "learning_rate": 2.1981314983832484e-06, "loss": 0.6898, "step": 22796 }, { "epoch": 0.6986943729312247, "grad_norm": 0.7845632317507744, "learning_rate": 2.1977204419556163e-06, "loss": 0.3825, "step": 22797 }, { "epoch": 0.6987250214539659, "grad_norm": 1.9051408135488765, "learning_rate": 2.19730941313956e-06, "loss": 0.6434, "step": 22798 }, { "epoch": 0.6987556699767071, "grad_norm": 1.6824930968218572, "learning_rate": 2.1968984119391308e-06, "loss": 0.6344, "step": 22799 }, { "epoch": 0.6987863184994483, "grad_norm": 1.99882363910122, "learning_rate": 2.1964874383583805e-06, "loss": 0.5499, "step": 22800 }, { "epoch": 0.6988169670221895, "grad_norm": 1.6865221778283968, "learning_rate": 2.1960764924013554e-06, "loss": 0.6166, "step": 22801 }, { "epoch": 0.6988476155449307, "grad_norm": 1.8120804023590837, "learning_rate": 2.1956655740721056e-06, "loss": 0.6318, "step": 22802 }, { "epoch": 0.698878264067672, "grad_norm": 1.9035693241579745, "learning_rate": 2.1952546833746825e-06, "loss": 0.6658, "step": 22803 }, { "epoch": 0.6989089125904131, "grad_norm": 1.7341017643967376, "learning_rate": 2.1948438203131306e-06, "loss": 0.5472, "step": 22804 }, { "epoch": 0.6989395611131544, "grad_norm": 1.8826041870392827, "learning_rate": 2.194432984891501e-06, "loss": 0.6313, "step": 22805 }, { "epoch": 0.6989702096358955, "grad_norm": 1.8707814030444583, "learning_rate": 2.194022177113842e-06, "loss": 0.6512, "step": 22806 }, { "epoch": 0.6990008581586368, "grad_norm": 1.8058903111811595, "learning_rate": 2.193611396984199e-06, "loss": 0.5684, "step": 22807 }, { "epoch": 0.6990315066813779, "grad_norm": 0.7754403119270175, "learning_rate": 2.193200644506622e-06, "loss": 0.4143, "step": 22808 }, { "epoch": 0.6990621552041192, "grad_norm": 2.041276512887243, "learning_rate": 2.1927899196851564e-06, "loss": 0.5135, "step": 22809 }, { "epoch": 0.6990928037268603, "grad_norm": 1.817845996600563, "learning_rate": 2.19237922252385e-06, "loss": 0.5416, "step": 22810 }, { "epoch": 0.6991234522496016, "grad_norm": 1.825876497125441, "learning_rate": 2.1919685530267503e-06, "loss": 0.5917, "step": 22811 }, { "epoch": 0.6991541007723427, "grad_norm": 1.7872732480227917, "learning_rate": 2.191557911197902e-06, "loss": 0.6586, "step": 22812 }, { "epoch": 0.699184749295084, "grad_norm": 1.466533039813173, "learning_rate": 2.1911472970413517e-06, "loss": 0.5443, "step": 22813 }, { "epoch": 0.6992153978178252, "grad_norm": 1.6660468353410591, "learning_rate": 2.1907367105611475e-06, "loss": 0.5688, "step": 22814 }, { "epoch": 0.6992460463405664, "grad_norm": 2.131347852356924, "learning_rate": 2.1903261517613324e-06, "loss": 0.5956, "step": 22815 }, { "epoch": 0.6992766948633076, "grad_norm": 1.930047738184302, "learning_rate": 2.1899156206459515e-06, "loss": 0.6469, "step": 22816 }, { "epoch": 0.6993073433860488, "grad_norm": 1.7815727493065772, "learning_rate": 2.1895051172190535e-06, "loss": 0.5402, "step": 22817 }, { "epoch": 0.69933799190879, "grad_norm": 1.8081894569291301, "learning_rate": 2.1890946414846785e-06, "loss": 0.6676, "step": 22818 }, { "epoch": 0.6993686404315312, "grad_norm": 1.916404578729651, "learning_rate": 2.188684193446875e-06, "loss": 0.5721, "step": 22819 }, { "epoch": 0.6993992889542724, "grad_norm": 1.8205411879015394, "learning_rate": 2.188273773109684e-06, "loss": 0.6207, "step": 22820 }, { "epoch": 0.6994299374770137, "grad_norm": 1.7567555139999587, "learning_rate": 2.1878633804771506e-06, "loss": 0.5843, "step": 22821 }, { "epoch": 0.6994605859997548, "grad_norm": 1.6996554407557818, "learning_rate": 2.18745301555332e-06, "loss": 0.5956, "step": 22822 }, { "epoch": 0.6994912345224961, "grad_norm": 1.630153539136616, "learning_rate": 2.187042678342234e-06, "loss": 0.5888, "step": 22823 }, { "epoch": 0.6995218830452372, "grad_norm": 1.8721714796553732, "learning_rate": 2.186632368847935e-06, "loss": 0.6644, "step": 22824 }, { "epoch": 0.6995525315679785, "grad_norm": 1.6866660847725916, "learning_rate": 2.18622208707447e-06, "loss": 0.5754, "step": 22825 }, { "epoch": 0.6995831800907196, "grad_norm": 1.7190413016277915, "learning_rate": 2.185811833025876e-06, "loss": 0.635, "step": 22826 }, { "epoch": 0.6996138286134609, "grad_norm": 1.8593800784998835, "learning_rate": 2.1854016067062006e-06, "loss": 0.5452, "step": 22827 }, { "epoch": 0.699644477136202, "grad_norm": 1.9735014624734837, "learning_rate": 2.184991408119481e-06, "loss": 0.6409, "step": 22828 }, { "epoch": 0.6996751256589432, "grad_norm": 1.6858127484942012, "learning_rate": 2.184581237269761e-06, "loss": 0.5719, "step": 22829 }, { "epoch": 0.6997057741816844, "grad_norm": 1.8479834439629965, "learning_rate": 2.184171094161085e-06, "loss": 0.5962, "step": 22830 }, { "epoch": 0.6997364227044256, "grad_norm": 1.5837593275078599, "learning_rate": 2.1837609787974894e-06, "loss": 0.4814, "step": 22831 }, { "epoch": 0.6997670712271669, "grad_norm": 0.7877135209156009, "learning_rate": 2.1833508911830175e-06, "loss": 0.4021, "step": 22832 }, { "epoch": 0.699797719749908, "grad_norm": 1.7889283692332174, "learning_rate": 2.182940831321712e-06, "loss": 0.6213, "step": 22833 }, { "epoch": 0.6998283682726493, "grad_norm": 1.7901466118777165, "learning_rate": 2.1825307992176094e-06, "loss": 0.5926, "step": 22834 }, { "epoch": 0.6998590167953904, "grad_norm": 0.8086512738728966, "learning_rate": 2.1821207948747513e-06, "loss": 0.4028, "step": 22835 }, { "epoch": 0.6998896653181317, "grad_norm": 2.237638422239824, "learning_rate": 2.18171081829718e-06, "loss": 0.6438, "step": 22836 }, { "epoch": 0.6999203138408728, "grad_norm": 1.9411643396430616, "learning_rate": 2.1813008694889314e-06, "loss": 0.6401, "step": 22837 }, { "epoch": 0.6999509623636141, "grad_norm": 1.8326164784280388, "learning_rate": 2.1808909484540486e-06, "loss": 0.6374, "step": 22838 }, { "epoch": 0.6999816108863552, "grad_norm": 1.8894427279900063, "learning_rate": 2.180481055196565e-06, "loss": 0.5453, "step": 22839 }, { "epoch": 0.7000122594090965, "grad_norm": 1.7441155550521523, "learning_rate": 2.180071189720526e-06, "loss": 0.6182, "step": 22840 }, { "epoch": 0.7000429079318377, "grad_norm": 1.7962695078713702, "learning_rate": 2.1796613520299677e-06, "loss": 0.6549, "step": 22841 }, { "epoch": 0.7000735564545789, "grad_norm": 1.9243105020348978, "learning_rate": 2.179251542128925e-06, "loss": 0.6341, "step": 22842 }, { "epoch": 0.7001042049773201, "grad_norm": 1.7425602216453338, "learning_rate": 2.1788417600214398e-06, "loss": 0.6027, "step": 22843 }, { "epoch": 0.7001348535000613, "grad_norm": 0.7651811404263347, "learning_rate": 2.1784320057115493e-06, "loss": 0.4005, "step": 22844 }, { "epoch": 0.7001655020228025, "grad_norm": 1.658776934239972, "learning_rate": 2.1780222792032885e-06, "loss": 0.5156, "step": 22845 }, { "epoch": 0.7001961505455437, "grad_norm": 1.868534022962396, "learning_rate": 2.1776125805006964e-06, "loss": 0.6287, "step": 22846 }, { "epoch": 0.7002267990682849, "grad_norm": 1.7766689901380281, "learning_rate": 2.17720290960781e-06, "loss": 0.6203, "step": 22847 }, { "epoch": 0.7002574475910261, "grad_norm": 1.5896669772298289, "learning_rate": 2.176793266528667e-06, "loss": 0.5279, "step": 22848 }, { "epoch": 0.7002880961137673, "grad_norm": 1.9243705657999028, "learning_rate": 2.1763836512673025e-06, "loss": 0.6688, "step": 22849 }, { "epoch": 0.7003187446365086, "grad_norm": 1.7575782943816662, "learning_rate": 2.1759740638277486e-06, "loss": 0.5872, "step": 22850 }, { "epoch": 0.7003493931592497, "grad_norm": 0.8421903470649282, "learning_rate": 2.175564504214049e-06, "loss": 0.4158, "step": 22851 }, { "epoch": 0.700380041681991, "grad_norm": 1.7481724202051054, "learning_rate": 2.175154972430234e-06, "loss": 0.5706, "step": 22852 }, { "epoch": 0.7004106902047321, "grad_norm": 1.8352275649620942, "learning_rate": 2.1747454684803387e-06, "loss": 0.6131, "step": 22853 }, { "epoch": 0.7004413387274734, "grad_norm": 1.7912724505150674, "learning_rate": 2.174335992368399e-06, "loss": 0.5914, "step": 22854 }, { "epoch": 0.7004719872502145, "grad_norm": 1.528940232842247, "learning_rate": 2.1739265440984513e-06, "loss": 0.4765, "step": 22855 }, { "epoch": 0.7005026357729558, "grad_norm": 1.8947855832374714, "learning_rate": 2.1735171236745275e-06, "loss": 0.5326, "step": 22856 }, { "epoch": 0.700533284295697, "grad_norm": 2.1338439018242488, "learning_rate": 2.1731077311006616e-06, "loss": 0.612, "step": 22857 }, { "epoch": 0.7005639328184382, "grad_norm": 1.690758972339081, "learning_rate": 2.172698366380889e-06, "loss": 0.6014, "step": 22858 }, { "epoch": 0.7005945813411794, "grad_norm": 1.7883530876840397, "learning_rate": 2.1722890295192446e-06, "loss": 0.5154, "step": 22859 }, { "epoch": 0.7006252298639205, "grad_norm": 1.700017963954818, "learning_rate": 2.17187972051976e-06, "loss": 0.6822, "step": 22860 }, { "epoch": 0.7006558783866618, "grad_norm": 0.7996697664759217, "learning_rate": 2.1714704393864638e-06, "loss": 0.3974, "step": 22861 }, { "epoch": 0.7006865269094029, "grad_norm": 1.8366087322110007, "learning_rate": 2.1710611861233977e-06, "loss": 0.6067, "step": 22862 }, { "epoch": 0.7007171754321442, "grad_norm": 1.7931876560837638, "learning_rate": 2.170651960734589e-06, "loss": 0.5436, "step": 22863 }, { "epoch": 0.7007478239548853, "grad_norm": 1.6379114701091328, "learning_rate": 2.1702427632240684e-06, "loss": 0.5937, "step": 22864 }, { "epoch": 0.7007784724776266, "grad_norm": 1.7831950318526826, "learning_rate": 2.1698335935958705e-06, "loss": 0.6468, "step": 22865 }, { "epoch": 0.7008091210003677, "grad_norm": 1.79030272152828, "learning_rate": 2.169424451854026e-06, "loss": 0.5411, "step": 22866 }, { "epoch": 0.700839769523109, "grad_norm": 2.113494158350961, "learning_rate": 2.1690153380025685e-06, "loss": 0.7013, "step": 22867 }, { "epoch": 0.7008704180458502, "grad_norm": 1.8340857840946971, "learning_rate": 2.168606252045525e-06, "loss": 0.5973, "step": 22868 }, { "epoch": 0.7009010665685914, "grad_norm": 1.6826603734500611, "learning_rate": 2.1681971939869295e-06, "loss": 0.5058, "step": 22869 }, { "epoch": 0.7009317150913326, "grad_norm": 1.9567708236914139, "learning_rate": 2.1677881638308124e-06, "loss": 0.5329, "step": 22870 }, { "epoch": 0.7009623636140738, "grad_norm": 1.7942772449448061, "learning_rate": 2.167379161581204e-06, "loss": 0.5672, "step": 22871 }, { "epoch": 0.700993012136815, "grad_norm": 1.7437657502290347, "learning_rate": 2.1669701872421313e-06, "loss": 0.5345, "step": 22872 }, { "epoch": 0.7010236606595562, "grad_norm": 1.6893520483426028, "learning_rate": 2.166561240817626e-06, "loss": 0.5599, "step": 22873 }, { "epoch": 0.7010543091822974, "grad_norm": 0.816870341227516, "learning_rate": 2.1661523223117176e-06, "loss": 0.4177, "step": 22874 }, { "epoch": 0.7010849577050386, "grad_norm": 1.8958724044205084, "learning_rate": 2.1657434317284377e-06, "loss": 0.6346, "step": 22875 }, { "epoch": 0.7011156062277798, "grad_norm": 1.6315668614809629, "learning_rate": 2.165334569071811e-06, "loss": 0.6127, "step": 22876 }, { "epoch": 0.7011462547505211, "grad_norm": 1.6672914520897721, "learning_rate": 2.164925734345868e-06, "loss": 0.5376, "step": 22877 }, { "epoch": 0.7011769032732622, "grad_norm": 1.7546671704668273, "learning_rate": 2.164516927554639e-06, "loss": 0.6237, "step": 22878 }, { "epoch": 0.7012075517960035, "grad_norm": 0.790261564945738, "learning_rate": 2.16410814870215e-06, "loss": 0.4233, "step": 22879 }, { "epoch": 0.7012382003187446, "grad_norm": 1.6109330344568775, "learning_rate": 2.163699397792426e-06, "loss": 0.482, "step": 22880 }, { "epoch": 0.7012688488414859, "grad_norm": 1.6583647696367196, "learning_rate": 2.1632906748295006e-06, "loss": 0.587, "step": 22881 }, { "epoch": 0.701299497364227, "grad_norm": 1.8287979716482585, "learning_rate": 2.1628819798173983e-06, "loss": 0.6658, "step": 22882 }, { "epoch": 0.7013301458869683, "grad_norm": 1.5255846849870978, "learning_rate": 2.1624733127601437e-06, "loss": 0.5815, "step": 22883 }, { "epoch": 0.7013607944097094, "grad_norm": 1.9656176514827004, "learning_rate": 2.1620646736617658e-06, "loss": 0.6345, "step": 22884 }, { "epoch": 0.7013914429324507, "grad_norm": 1.778494733646985, "learning_rate": 2.1616560625262904e-06, "loss": 0.6138, "step": 22885 }, { "epoch": 0.7014220914551919, "grad_norm": 1.9031310208519212, "learning_rate": 2.1612474793577458e-06, "loss": 0.7343, "step": 22886 }, { "epoch": 0.7014527399779331, "grad_norm": 0.773258860959156, "learning_rate": 2.160838924160155e-06, "loss": 0.4001, "step": 22887 }, { "epoch": 0.7014833885006743, "grad_norm": 0.8448531467611754, "learning_rate": 2.160430396937544e-06, "loss": 0.3998, "step": 22888 }, { "epoch": 0.7015140370234155, "grad_norm": 1.8033121385579465, "learning_rate": 2.1600218976939413e-06, "loss": 0.4359, "step": 22889 }, { "epoch": 0.7015446855461567, "grad_norm": 2.2273253803067568, "learning_rate": 2.159613426433369e-06, "loss": 0.5795, "step": 22890 }, { "epoch": 0.7015753340688978, "grad_norm": 1.8330283538189287, "learning_rate": 2.1592049831598487e-06, "loss": 0.5802, "step": 22891 }, { "epoch": 0.7016059825916391, "grad_norm": 1.758519038416269, "learning_rate": 2.1587965678774125e-06, "loss": 0.5424, "step": 22892 }, { "epoch": 0.7016366311143802, "grad_norm": 0.7665960750603136, "learning_rate": 2.1583881805900786e-06, "loss": 0.416, "step": 22893 }, { "epoch": 0.7016672796371215, "grad_norm": 0.7790105960489352, "learning_rate": 2.157979821301875e-06, "loss": 0.4157, "step": 22894 }, { "epoch": 0.7016979281598626, "grad_norm": 1.669737988840252, "learning_rate": 2.1575714900168217e-06, "loss": 0.4718, "step": 22895 }, { "epoch": 0.7017285766826039, "grad_norm": 0.7940229093296726, "learning_rate": 2.157163186738943e-06, "loss": 0.4152, "step": 22896 }, { "epoch": 0.7017592252053451, "grad_norm": 1.7536108265169046, "learning_rate": 2.156754911472265e-06, "loss": 0.4579, "step": 22897 }, { "epoch": 0.7017898737280863, "grad_norm": 1.9590651622140762, "learning_rate": 2.156346664220807e-06, "loss": 0.7166, "step": 22898 }, { "epoch": 0.7018205222508275, "grad_norm": 1.8733200877264609, "learning_rate": 2.155938444988593e-06, "loss": 0.5969, "step": 22899 }, { "epoch": 0.7018511707735687, "grad_norm": 2.104462614324675, "learning_rate": 2.1555302537796463e-06, "loss": 0.582, "step": 22900 }, { "epoch": 0.7018818192963099, "grad_norm": 2.0967730286428568, "learning_rate": 2.1551220905979864e-06, "loss": 0.5648, "step": 22901 }, { "epoch": 0.7019124678190511, "grad_norm": 2.2330272120235444, "learning_rate": 2.154713955447636e-06, "loss": 0.6272, "step": 22902 }, { "epoch": 0.7019431163417923, "grad_norm": 1.788533712009177, "learning_rate": 2.154305848332619e-06, "loss": 0.5882, "step": 22903 }, { "epoch": 0.7019737648645336, "grad_norm": 1.761771632968035, "learning_rate": 2.153897769256953e-06, "loss": 0.5097, "step": 22904 }, { "epoch": 0.7020044133872747, "grad_norm": 1.8510177401685808, "learning_rate": 2.1534897182246623e-06, "loss": 0.6007, "step": 22905 }, { "epoch": 0.702035061910016, "grad_norm": 1.8260070383184794, "learning_rate": 2.1530816952397636e-06, "loss": 0.5765, "step": 22906 }, { "epoch": 0.7020657104327571, "grad_norm": 1.9877132828748383, "learning_rate": 2.15267370030628e-06, "loss": 0.6102, "step": 22907 }, { "epoch": 0.7020963589554984, "grad_norm": 0.7977309077679469, "learning_rate": 2.152265733428232e-06, "loss": 0.4125, "step": 22908 }, { "epoch": 0.7021270074782395, "grad_norm": 1.9407940595329995, "learning_rate": 2.151857794609637e-06, "loss": 0.6104, "step": 22909 }, { "epoch": 0.7021576560009808, "grad_norm": 2.2003345429847405, "learning_rate": 2.1514498838545157e-06, "loss": 0.6446, "step": 22910 }, { "epoch": 0.7021883045237219, "grad_norm": 1.8522361567687546, "learning_rate": 2.1510420011668892e-06, "loss": 0.6196, "step": 22911 }, { "epoch": 0.7022189530464632, "grad_norm": 1.8829303493151297, "learning_rate": 2.1506341465507728e-06, "loss": 0.5937, "step": 22912 }, { "epoch": 0.7022496015692044, "grad_norm": 1.9096254871394969, "learning_rate": 2.150226320010188e-06, "loss": 0.5452, "step": 22913 }, { "epoch": 0.7022802500919456, "grad_norm": 1.8406368103034785, "learning_rate": 2.1498185215491534e-06, "loss": 0.4593, "step": 22914 }, { "epoch": 0.7023108986146868, "grad_norm": 1.960815962208564, "learning_rate": 2.149410751171685e-06, "loss": 0.5762, "step": 22915 }, { "epoch": 0.702341547137428, "grad_norm": 2.2369324001366384, "learning_rate": 2.1490030088818032e-06, "loss": 0.6002, "step": 22916 }, { "epoch": 0.7023721956601692, "grad_norm": 1.9587593702823163, "learning_rate": 2.1485952946835227e-06, "loss": 0.5857, "step": 22917 }, { "epoch": 0.7024028441829104, "grad_norm": 1.7965881619414312, "learning_rate": 2.148187608580862e-06, "loss": 0.5896, "step": 22918 }, { "epoch": 0.7024334927056516, "grad_norm": 1.8881646049746232, "learning_rate": 2.1477799505778407e-06, "loss": 0.5313, "step": 22919 }, { "epoch": 0.7024641412283928, "grad_norm": 0.8067728944009381, "learning_rate": 2.147372320678471e-06, "loss": 0.4044, "step": 22920 }, { "epoch": 0.702494789751134, "grad_norm": 1.7524009799997067, "learning_rate": 2.146964718886772e-06, "loss": 0.5574, "step": 22921 }, { "epoch": 0.7025254382738751, "grad_norm": 1.913124479297782, "learning_rate": 2.1465571452067614e-06, "loss": 0.5694, "step": 22922 }, { "epoch": 0.7025560867966164, "grad_norm": 1.9814365838206247, "learning_rate": 2.1461495996424513e-06, "loss": 0.5693, "step": 22923 }, { "epoch": 0.7025867353193576, "grad_norm": 1.8710827562452912, "learning_rate": 2.145742082197862e-06, "loss": 0.5704, "step": 22924 }, { "epoch": 0.7026173838420988, "grad_norm": 1.758573387729174, "learning_rate": 2.1453345928770037e-06, "loss": 0.5319, "step": 22925 }, { "epoch": 0.70264803236484, "grad_norm": 1.9516368331735248, "learning_rate": 2.144927131683894e-06, "loss": 0.6699, "step": 22926 }, { "epoch": 0.7026786808875812, "grad_norm": 1.652742969715886, "learning_rate": 2.14451969862255e-06, "loss": 0.5031, "step": 22927 }, { "epoch": 0.7027093294103224, "grad_norm": 1.6394081152289748, "learning_rate": 2.1441122936969814e-06, "loss": 0.5477, "step": 22928 }, { "epoch": 0.7027399779330636, "grad_norm": 0.8815962671151183, "learning_rate": 2.1437049169112062e-06, "loss": 0.4057, "step": 22929 }, { "epoch": 0.7027706264558048, "grad_norm": 1.844575239375595, "learning_rate": 2.1432975682692387e-06, "loss": 0.667, "step": 22930 }, { "epoch": 0.702801274978546, "grad_norm": 0.8105891039199201, "learning_rate": 2.142890247775089e-06, "loss": 0.392, "step": 22931 }, { "epoch": 0.7028319235012872, "grad_norm": 1.8117837435710626, "learning_rate": 2.142482955432773e-06, "loss": 0.5374, "step": 22932 }, { "epoch": 0.7028625720240285, "grad_norm": 1.9867641355217522, "learning_rate": 2.142075691246305e-06, "loss": 0.6654, "step": 22933 }, { "epoch": 0.7028932205467696, "grad_norm": 1.7240760602650602, "learning_rate": 2.1416684552196947e-06, "loss": 0.5474, "step": 22934 }, { "epoch": 0.7029238690695109, "grad_norm": 1.8685906619621486, "learning_rate": 2.141261247356959e-06, "loss": 0.6028, "step": 22935 }, { "epoch": 0.702954517592252, "grad_norm": 1.7561506927704738, "learning_rate": 2.1408540676621054e-06, "loss": 0.6104, "step": 22936 }, { "epoch": 0.7029851661149933, "grad_norm": 1.9246743455471522, "learning_rate": 2.140446916139148e-06, "loss": 0.5835, "step": 22937 }, { "epoch": 0.7030158146377344, "grad_norm": 0.8025274581537983, "learning_rate": 2.140039792792101e-06, "loss": 0.3926, "step": 22938 }, { "epoch": 0.7030464631604757, "grad_norm": 1.8710950424238089, "learning_rate": 2.1396326976249716e-06, "loss": 0.5267, "step": 22939 }, { "epoch": 0.7030771116832168, "grad_norm": 1.8835115746599718, "learning_rate": 2.139225630641773e-06, "loss": 0.5523, "step": 22940 }, { "epoch": 0.7031077602059581, "grad_norm": 1.558887077615837, "learning_rate": 2.1388185918465183e-06, "loss": 0.5008, "step": 22941 }, { "epoch": 0.7031384087286993, "grad_norm": 2.06479060588471, "learning_rate": 2.1384115812432138e-06, "loss": 0.66, "step": 22942 }, { "epoch": 0.7031690572514405, "grad_norm": 1.914756334135295, "learning_rate": 2.138004598835872e-06, "loss": 0.5795, "step": 22943 }, { "epoch": 0.7031997057741817, "grad_norm": 0.881552445998556, "learning_rate": 2.1375976446285057e-06, "loss": 0.4157, "step": 22944 }, { "epoch": 0.7032303542969229, "grad_norm": 1.911998914268049, "learning_rate": 2.13719071862512e-06, "loss": 0.6829, "step": 22945 }, { "epoch": 0.7032610028196641, "grad_norm": 1.711631991950411, "learning_rate": 2.1367838208297287e-06, "loss": 0.5529, "step": 22946 }, { "epoch": 0.7032916513424053, "grad_norm": 1.7275847312005883, "learning_rate": 2.1363769512463357e-06, "loss": 0.5938, "step": 22947 }, { "epoch": 0.7033222998651465, "grad_norm": 0.829339768330126, "learning_rate": 2.1359701098789558e-06, "loss": 0.4057, "step": 22948 }, { "epoch": 0.7033529483878878, "grad_norm": 1.6883800414961039, "learning_rate": 2.1355632967315965e-06, "loss": 0.5829, "step": 22949 }, { "epoch": 0.7033835969106289, "grad_norm": 1.9134709624792579, "learning_rate": 2.1351565118082624e-06, "loss": 0.5884, "step": 22950 }, { "epoch": 0.7034142454333702, "grad_norm": 0.8175530589386361, "learning_rate": 2.1347497551129644e-06, "loss": 0.4326, "step": 22951 }, { "epoch": 0.7034448939561113, "grad_norm": 1.806773540509796, "learning_rate": 2.1343430266497116e-06, "loss": 0.5825, "step": 22952 }, { "epoch": 0.7034755424788525, "grad_norm": 1.87534184550844, "learning_rate": 2.1339363264225084e-06, "loss": 0.6405, "step": 22953 }, { "epoch": 0.7035061910015937, "grad_norm": 1.9055823780859293, "learning_rate": 2.133529654435364e-06, "loss": 0.6532, "step": 22954 }, { "epoch": 0.7035368395243349, "grad_norm": 1.687605232706679, "learning_rate": 2.1331230106922857e-06, "loss": 0.5945, "step": 22955 }, { "epoch": 0.7035674880470761, "grad_norm": 2.006210710544652, "learning_rate": 2.1327163951972814e-06, "loss": 0.6575, "step": 22956 }, { "epoch": 0.7035981365698173, "grad_norm": 1.8211196514189854, "learning_rate": 2.132309807954356e-06, "loss": 0.6355, "step": 22957 }, { "epoch": 0.7036287850925586, "grad_norm": 1.972770015127312, "learning_rate": 2.131903248967512e-06, "loss": 0.6067, "step": 22958 }, { "epoch": 0.7036594336152997, "grad_norm": 1.8001282863257348, "learning_rate": 2.131496718240763e-06, "loss": 0.5903, "step": 22959 }, { "epoch": 0.703690082138041, "grad_norm": 1.9337598019337037, "learning_rate": 2.131090215778111e-06, "loss": 0.6655, "step": 22960 }, { "epoch": 0.7037207306607821, "grad_norm": 1.9334070585682799, "learning_rate": 2.130683741583559e-06, "loss": 0.6162, "step": 22961 }, { "epoch": 0.7037513791835234, "grad_norm": 1.8729443894310724, "learning_rate": 2.1302772956611144e-06, "loss": 0.571, "step": 22962 }, { "epoch": 0.7037820277062645, "grad_norm": 1.7341702411304112, "learning_rate": 2.129870878014784e-06, "loss": 0.5315, "step": 22963 }, { "epoch": 0.7038126762290058, "grad_norm": 1.848741226477364, "learning_rate": 2.1294644886485677e-06, "loss": 0.5628, "step": 22964 }, { "epoch": 0.7038433247517469, "grad_norm": 1.678894298873829, "learning_rate": 2.129058127566473e-06, "loss": 0.5886, "step": 22965 }, { "epoch": 0.7038739732744882, "grad_norm": 1.9060063099945697, "learning_rate": 2.128651794772503e-06, "loss": 0.6609, "step": 22966 }, { "epoch": 0.7039046217972293, "grad_norm": 1.6458540291827846, "learning_rate": 2.1282454902706625e-06, "loss": 0.5426, "step": 22967 }, { "epoch": 0.7039352703199706, "grad_norm": 1.5716953835208802, "learning_rate": 2.1278392140649547e-06, "loss": 0.5543, "step": 22968 }, { "epoch": 0.7039659188427118, "grad_norm": 1.7861671386989344, "learning_rate": 2.1274329661593795e-06, "loss": 0.5748, "step": 22969 }, { "epoch": 0.703996567365453, "grad_norm": 1.595204140642166, "learning_rate": 2.127026746557943e-06, "loss": 0.5996, "step": 22970 }, { "epoch": 0.7040272158881942, "grad_norm": 1.8643102885749794, "learning_rate": 2.1266205552646485e-06, "loss": 0.5649, "step": 22971 }, { "epoch": 0.7040578644109354, "grad_norm": 1.7469489872150041, "learning_rate": 2.1262143922834953e-06, "loss": 0.5767, "step": 22972 }, { "epoch": 0.7040885129336766, "grad_norm": 1.8836797934674026, "learning_rate": 2.1258082576184868e-06, "loss": 0.674, "step": 22973 }, { "epoch": 0.7041191614564178, "grad_norm": 1.556026580495227, "learning_rate": 2.125402151273625e-06, "loss": 0.5498, "step": 22974 }, { "epoch": 0.704149809979159, "grad_norm": 1.9211724335543336, "learning_rate": 2.124996073252913e-06, "loss": 0.7022, "step": 22975 }, { "epoch": 0.7041804585019003, "grad_norm": 1.7577954728751737, "learning_rate": 2.1245900235603507e-06, "loss": 0.5313, "step": 22976 }, { "epoch": 0.7042111070246414, "grad_norm": 1.8271679697675585, "learning_rate": 2.124184002199934e-06, "loss": 0.6957, "step": 22977 }, { "epoch": 0.7042417555473827, "grad_norm": 1.625532793472091, "learning_rate": 2.1237780091756726e-06, "loss": 0.5593, "step": 22978 }, { "epoch": 0.7042724040701238, "grad_norm": 1.978091423909617, "learning_rate": 2.123372044491562e-06, "loss": 0.6423, "step": 22979 }, { "epoch": 0.7043030525928651, "grad_norm": 1.8050569544628843, "learning_rate": 2.1229661081516017e-06, "loss": 0.6086, "step": 22980 }, { "epoch": 0.7043337011156062, "grad_norm": 1.784351238605012, "learning_rate": 2.1225602001597918e-06, "loss": 0.5697, "step": 22981 }, { "epoch": 0.7043643496383475, "grad_norm": 1.941977993627292, "learning_rate": 2.122154320520134e-06, "loss": 0.6627, "step": 22982 }, { "epoch": 0.7043949981610886, "grad_norm": 1.8324185899023746, "learning_rate": 2.1217484692366245e-06, "loss": 0.5855, "step": 22983 }, { "epoch": 0.7044256466838298, "grad_norm": 1.8202477413814213, "learning_rate": 2.121342646313264e-06, "loss": 0.5951, "step": 22984 }, { "epoch": 0.704456295206571, "grad_norm": 1.7911638803622156, "learning_rate": 2.1209368517540506e-06, "loss": 0.6304, "step": 22985 }, { "epoch": 0.7044869437293122, "grad_norm": 0.8615692926877834, "learning_rate": 2.120531085562985e-06, "loss": 0.4104, "step": 22986 }, { "epoch": 0.7045175922520535, "grad_norm": 1.846191260433675, "learning_rate": 2.120125347744063e-06, "loss": 0.5224, "step": 22987 }, { "epoch": 0.7045482407747946, "grad_norm": 0.8114149181022915, "learning_rate": 2.1197196383012795e-06, "loss": 0.4251, "step": 22988 }, { "epoch": 0.7045788892975359, "grad_norm": 1.7063857863620326, "learning_rate": 2.119313957238639e-06, "loss": 0.5435, "step": 22989 }, { "epoch": 0.704609537820277, "grad_norm": 2.0700587021548387, "learning_rate": 2.1189083045601355e-06, "loss": 0.5799, "step": 22990 }, { "epoch": 0.7046401863430183, "grad_norm": 1.8027389562243508, "learning_rate": 2.118502680269763e-06, "loss": 0.5359, "step": 22991 }, { "epoch": 0.7046708348657594, "grad_norm": 1.6984267469528294, "learning_rate": 2.1180970843715215e-06, "loss": 0.4667, "step": 22992 }, { "epoch": 0.7047014833885007, "grad_norm": 2.173482520559876, "learning_rate": 2.1176915168694067e-06, "loss": 0.5699, "step": 22993 }, { "epoch": 0.7047321319112418, "grad_norm": 1.7756580966586528, "learning_rate": 2.1172859777674164e-06, "loss": 0.5713, "step": 22994 }, { "epoch": 0.7047627804339831, "grad_norm": 1.7598363358336255, "learning_rate": 2.116880467069543e-06, "loss": 0.5445, "step": 22995 }, { "epoch": 0.7047934289567243, "grad_norm": 2.0507682973915156, "learning_rate": 2.1164749847797843e-06, "loss": 0.6033, "step": 22996 }, { "epoch": 0.7048240774794655, "grad_norm": 0.817161014236857, "learning_rate": 2.1160695309021373e-06, "loss": 0.4168, "step": 22997 }, { "epoch": 0.7048547260022067, "grad_norm": 1.8181569644818394, "learning_rate": 2.1156641054405952e-06, "loss": 0.5885, "step": 22998 }, { "epoch": 0.7048853745249479, "grad_norm": 1.7871289911054078, "learning_rate": 2.1152587083991486e-06, "loss": 0.5713, "step": 22999 }, { "epoch": 0.7049160230476891, "grad_norm": 1.729620803355952, "learning_rate": 2.1148533397818e-06, "loss": 0.5208, "step": 23000 }, { "epoch": 0.7049466715704303, "grad_norm": 1.775719617736804, "learning_rate": 2.114447999592538e-06, "loss": 0.5159, "step": 23001 }, { "epoch": 0.7049773200931715, "grad_norm": 1.6914915705690847, "learning_rate": 2.114042687835359e-06, "loss": 0.5785, "step": 23002 }, { "epoch": 0.7050079686159128, "grad_norm": 1.8374455214141834, "learning_rate": 2.113637404514255e-06, "loss": 0.5518, "step": 23003 }, { "epoch": 0.7050386171386539, "grad_norm": 1.6849209551704016, "learning_rate": 2.11323214963322e-06, "loss": 0.5896, "step": 23004 }, { "epoch": 0.7050692656613952, "grad_norm": 2.0016544931355154, "learning_rate": 2.1128269231962485e-06, "loss": 0.5276, "step": 23005 }, { "epoch": 0.7050999141841363, "grad_norm": 1.5727680956667736, "learning_rate": 2.11242172520733e-06, "loss": 0.5606, "step": 23006 }, { "epoch": 0.7051305627068776, "grad_norm": 1.9601973795645307, "learning_rate": 2.1120165556704603e-06, "loss": 0.5607, "step": 23007 }, { "epoch": 0.7051612112296187, "grad_norm": 1.8087568383769073, "learning_rate": 2.1116114145896314e-06, "loss": 0.6205, "step": 23008 }, { "epoch": 0.70519185975236, "grad_norm": 1.7463750564372802, "learning_rate": 2.1112063019688343e-06, "loss": 0.522, "step": 23009 }, { "epoch": 0.7052225082751011, "grad_norm": 1.8133049876468341, "learning_rate": 2.1108012178120575e-06, "loss": 0.6414, "step": 23010 }, { "epoch": 0.7052531567978424, "grad_norm": 1.9451404110099002, "learning_rate": 2.1103961621232988e-06, "loss": 0.5286, "step": 23011 }, { "epoch": 0.7052838053205835, "grad_norm": 0.8152326005092515, "learning_rate": 2.1099911349065437e-06, "loss": 0.4427, "step": 23012 }, { "epoch": 0.7053144538433248, "grad_norm": 1.8377202335262772, "learning_rate": 2.1095861361657883e-06, "loss": 0.6013, "step": 23013 }, { "epoch": 0.705345102366066, "grad_norm": 0.8083145225453342, "learning_rate": 2.1091811659050177e-06, "loss": 0.407, "step": 23014 }, { "epoch": 0.7053757508888071, "grad_norm": 1.6090987114739654, "learning_rate": 2.1087762241282245e-06, "loss": 0.5815, "step": 23015 }, { "epoch": 0.7054063994115484, "grad_norm": 0.7888832506453851, "learning_rate": 2.1083713108394015e-06, "loss": 0.4173, "step": 23016 }, { "epoch": 0.7054370479342895, "grad_norm": 2.079028296933661, "learning_rate": 2.1079664260425337e-06, "loss": 0.6214, "step": 23017 }, { "epoch": 0.7054676964570308, "grad_norm": 1.846843336188525, "learning_rate": 2.1075615697416123e-06, "loss": 0.478, "step": 23018 }, { "epoch": 0.7054983449797719, "grad_norm": 1.5658641168835674, "learning_rate": 2.1071567419406293e-06, "loss": 0.5731, "step": 23019 }, { "epoch": 0.7055289935025132, "grad_norm": 1.8353568855652829, "learning_rate": 2.1067519426435683e-06, "loss": 0.6859, "step": 23020 }, { "epoch": 0.7055596420252543, "grad_norm": 0.80848501658511, "learning_rate": 2.106347171854423e-06, "loss": 0.4177, "step": 23021 }, { "epoch": 0.7055902905479956, "grad_norm": 0.7704352950711029, "learning_rate": 2.105942429577178e-06, "loss": 0.3964, "step": 23022 }, { "epoch": 0.7056209390707368, "grad_norm": 1.964796615475596, "learning_rate": 2.1055377158158224e-06, "loss": 0.527, "step": 23023 }, { "epoch": 0.705651587593478, "grad_norm": 0.792859513113005, "learning_rate": 2.105133030574346e-06, "loss": 0.4023, "step": 23024 }, { "epoch": 0.7056822361162192, "grad_norm": 2.1946699349829517, "learning_rate": 2.1047283738567326e-06, "loss": 0.6898, "step": 23025 }, { "epoch": 0.7057128846389604, "grad_norm": 1.880997588140535, "learning_rate": 2.104323745666972e-06, "loss": 0.5579, "step": 23026 }, { "epoch": 0.7057435331617016, "grad_norm": 1.942802588793614, "learning_rate": 2.1039191460090515e-06, "loss": 0.6172, "step": 23027 }, { "epoch": 0.7057741816844428, "grad_norm": 1.7598867324781466, "learning_rate": 2.1035145748869553e-06, "loss": 0.5833, "step": 23028 }, { "epoch": 0.705804830207184, "grad_norm": 1.8742257073753117, "learning_rate": 2.1031100323046703e-06, "loss": 0.5628, "step": 23029 }, { "epoch": 0.7058354787299252, "grad_norm": 1.6835346765321342, "learning_rate": 2.102705518266186e-06, "loss": 0.6024, "step": 23030 }, { "epoch": 0.7058661272526664, "grad_norm": 1.6425801571231124, "learning_rate": 2.1023010327754833e-06, "loss": 0.5752, "step": 23031 }, { "epoch": 0.7058967757754077, "grad_norm": 1.7581500465043862, "learning_rate": 2.101896575836552e-06, "loss": 0.6412, "step": 23032 }, { "epoch": 0.7059274242981488, "grad_norm": 1.914765434586774, "learning_rate": 2.1014921474533732e-06, "loss": 0.609, "step": 23033 }, { "epoch": 0.7059580728208901, "grad_norm": 1.5775598138004083, "learning_rate": 2.101087747629934e-06, "loss": 0.5104, "step": 23034 }, { "epoch": 0.7059887213436312, "grad_norm": 1.7848234172906712, "learning_rate": 2.1006833763702206e-06, "loss": 0.5696, "step": 23035 }, { "epoch": 0.7060193698663725, "grad_norm": 0.8358694846881417, "learning_rate": 2.1002790336782143e-06, "loss": 0.408, "step": 23036 }, { "epoch": 0.7060500183891136, "grad_norm": 0.8070630018689796, "learning_rate": 2.0998747195579007e-06, "loss": 0.4109, "step": 23037 }, { "epoch": 0.7060806669118549, "grad_norm": 0.79351854530496, "learning_rate": 2.099470434013265e-06, "loss": 0.4127, "step": 23038 }, { "epoch": 0.706111315434596, "grad_norm": 2.350727468154294, "learning_rate": 2.099066177048287e-06, "loss": 0.7355, "step": 23039 }, { "epoch": 0.7061419639573373, "grad_norm": 1.9598480238633567, "learning_rate": 2.098661948666953e-06, "loss": 0.5842, "step": 23040 }, { "epoch": 0.7061726124800785, "grad_norm": 1.9441739228916988, "learning_rate": 2.0982577488732464e-06, "loss": 0.5363, "step": 23041 }, { "epoch": 0.7062032610028197, "grad_norm": 1.7703585583794408, "learning_rate": 2.097853577671147e-06, "loss": 0.6332, "step": 23042 }, { "epoch": 0.7062339095255609, "grad_norm": 1.9157751853066574, "learning_rate": 2.0974494350646408e-06, "loss": 0.5943, "step": 23043 }, { "epoch": 0.7062645580483021, "grad_norm": 1.7046093373003355, "learning_rate": 2.0970453210577058e-06, "loss": 0.5652, "step": 23044 }, { "epoch": 0.7062952065710433, "grad_norm": 0.7382758893278049, "learning_rate": 2.0966412356543263e-06, "loss": 0.3968, "step": 23045 }, { "epoch": 0.7063258550937844, "grad_norm": 1.8924903987739563, "learning_rate": 2.096237178858485e-06, "loss": 0.6088, "step": 23046 }, { "epoch": 0.7063565036165257, "grad_norm": 1.768148211787123, "learning_rate": 2.095833150674161e-06, "loss": 0.6123, "step": 23047 }, { "epoch": 0.7063871521392668, "grad_norm": 1.6840560745460567, "learning_rate": 2.0954291511053347e-06, "loss": 0.5883, "step": 23048 }, { "epoch": 0.7064178006620081, "grad_norm": 1.7373568209221801, "learning_rate": 2.0950251801559906e-06, "loss": 0.5328, "step": 23049 }, { "epoch": 0.7064484491847492, "grad_norm": 1.7629643684415286, "learning_rate": 2.094621237830105e-06, "loss": 0.6562, "step": 23050 }, { "epoch": 0.7064790977074905, "grad_norm": 1.7102956313912845, "learning_rate": 2.0942173241316594e-06, "loss": 0.5882, "step": 23051 }, { "epoch": 0.7065097462302317, "grad_norm": 1.8719737842070487, "learning_rate": 2.0938134390646357e-06, "loss": 0.5604, "step": 23052 }, { "epoch": 0.7065403947529729, "grad_norm": 1.9777397088917423, "learning_rate": 2.09340958263301e-06, "loss": 0.6284, "step": 23053 }, { "epoch": 0.7065710432757141, "grad_norm": 0.8351522014244928, "learning_rate": 2.0930057548407658e-06, "loss": 0.4184, "step": 23054 }, { "epoch": 0.7066016917984553, "grad_norm": 1.934247717397186, "learning_rate": 2.0926019556918774e-06, "loss": 0.6468, "step": 23055 }, { "epoch": 0.7066323403211965, "grad_norm": 1.9530772089947357, "learning_rate": 2.0921981851903255e-06, "loss": 0.5708, "step": 23056 }, { "epoch": 0.7066629888439377, "grad_norm": 0.7918230034740881, "learning_rate": 2.0917944433400912e-06, "loss": 0.4154, "step": 23057 }, { "epoch": 0.7066936373666789, "grad_norm": 1.7990458231239965, "learning_rate": 2.0913907301451485e-06, "loss": 0.5628, "step": 23058 }, { "epoch": 0.7067242858894202, "grad_norm": 2.262269476729392, "learning_rate": 2.0909870456094765e-06, "loss": 0.6364, "step": 23059 }, { "epoch": 0.7067549344121613, "grad_norm": 1.8874386888073706, "learning_rate": 2.090583389737056e-06, "loss": 0.6444, "step": 23060 }, { "epoch": 0.7067855829349026, "grad_norm": 0.7858245774390359, "learning_rate": 2.09017976253186e-06, "loss": 0.4025, "step": 23061 }, { "epoch": 0.7068162314576437, "grad_norm": 1.8818648681997006, "learning_rate": 2.089776163997867e-06, "loss": 0.5667, "step": 23062 }, { "epoch": 0.706846879980385, "grad_norm": 1.9360859425219188, "learning_rate": 2.089372594139056e-06, "loss": 0.6234, "step": 23063 }, { "epoch": 0.7068775285031261, "grad_norm": 1.9693666232981577, "learning_rate": 2.0889690529593993e-06, "loss": 0.6262, "step": 23064 }, { "epoch": 0.7069081770258674, "grad_norm": 1.6617098481380927, "learning_rate": 2.0885655404628774e-06, "loss": 0.5369, "step": 23065 }, { "epoch": 0.7069388255486085, "grad_norm": 2.137089813451657, "learning_rate": 2.088162056653462e-06, "loss": 0.7773, "step": 23066 }, { "epoch": 0.7069694740713498, "grad_norm": 0.7978527097727065, "learning_rate": 2.0877586015351315e-06, "loss": 0.4145, "step": 23067 }, { "epoch": 0.707000122594091, "grad_norm": 1.798026961149208, "learning_rate": 2.0873551751118624e-06, "loss": 0.5369, "step": 23068 }, { "epoch": 0.7070307711168322, "grad_norm": 0.7888570796459055, "learning_rate": 2.086951777387626e-06, "loss": 0.4134, "step": 23069 }, { "epoch": 0.7070614196395734, "grad_norm": 2.084629303462946, "learning_rate": 2.086548408366399e-06, "loss": 0.5646, "step": 23070 }, { "epoch": 0.7070920681623146, "grad_norm": 1.760221172853829, "learning_rate": 2.0861450680521576e-06, "loss": 0.4771, "step": 23071 }, { "epoch": 0.7071227166850558, "grad_norm": 1.8665809632162313, "learning_rate": 2.085741756448873e-06, "loss": 0.6105, "step": 23072 }, { "epoch": 0.707153365207797, "grad_norm": 1.955731122128193, "learning_rate": 2.0853384735605227e-06, "loss": 0.6563, "step": 23073 }, { "epoch": 0.7071840137305382, "grad_norm": 1.690737157053022, "learning_rate": 2.084935219391074e-06, "loss": 0.5196, "step": 23074 }, { "epoch": 0.7072146622532794, "grad_norm": 0.792576969024837, "learning_rate": 2.0845319939445074e-06, "loss": 0.4055, "step": 23075 }, { "epoch": 0.7072453107760206, "grad_norm": 1.8097756697680842, "learning_rate": 2.0841287972247935e-06, "loss": 0.5286, "step": 23076 }, { "epoch": 0.7072759592987617, "grad_norm": 1.6572095449480602, "learning_rate": 2.083725629235903e-06, "loss": 0.5832, "step": 23077 }, { "epoch": 0.707306607821503, "grad_norm": 1.8374975229303327, "learning_rate": 2.0833224899818105e-06, "loss": 0.5616, "step": 23078 }, { "epoch": 0.7073372563442442, "grad_norm": 1.8357112318048383, "learning_rate": 2.082919379466489e-06, "loss": 0.6563, "step": 23079 }, { "epoch": 0.7073679048669854, "grad_norm": 1.608549918790453, "learning_rate": 2.0825162976939077e-06, "loss": 0.5594, "step": 23080 }, { "epoch": 0.7073985533897266, "grad_norm": 1.7388456035980044, "learning_rate": 2.0821132446680393e-06, "loss": 0.5242, "step": 23081 }, { "epoch": 0.7074292019124678, "grad_norm": 1.7893924271007955, "learning_rate": 2.081710220392856e-06, "loss": 0.6639, "step": 23082 }, { "epoch": 0.707459850435209, "grad_norm": 1.9324463499409998, "learning_rate": 2.0813072248723303e-06, "loss": 0.6037, "step": 23083 }, { "epoch": 0.7074904989579502, "grad_norm": 1.9954900973828027, "learning_rate": 2.0809042581104318e-06, "loss": 0.6007, "step": 23084 }, { "epoch": 0.7075211474806914, "grad_norm": 0.7623381891495501, "learning_rate": 2.0805013201111264e-06, "loss": 0.3988, "step": 23085 }, { "epoch": 0.7075517960034327, "grad_norm": 1.8916877090949993, "learning_rate": 2.0800984108783924e-06, "loss": 0.569, "step": 23086 }, { "epoch": 0.7075824445261738, "grad_norm": 1.7562576896688324, "learning_rate": 2.0796955304161954e-06, "loss": 0.597, "step": 23087 }, { "epoch": 0.7076130930489151, "grad_norm": 1.8925622901556964, "learning_rate": 2.079292678728504e-06, "loss": 0.4842, "step": 23088 }, { "epoch": 0.7076437415716562, "grad_norm": 0.8313165270919874, "learning_rate": 2.0788898558192887e-06, "loss": 0.4151, "step": 23089 }, { "epoch": 0.7076743900943975, "grad_norm": 1.7596176158118264, "learning_rate": 2.078487061692521e-06, "loss": 0.6531, "step": 23090 }, { "epoch": 0.7077050386171386, "grad_norm": 1.9392680482489897, "learning_rate": 2.0780842963521665e-06, "loss": 0.6211, "step": 23091 }, { "epoch": 0.7077356871398799, "grad_norm": 1.8914658591610038, "learning_rate": 2.077681559802195e-06, "loss": 0.6311, "step": 23092 }, { "epoch": 0.707766335662621, "grad_norm": 1.8516848582690022, "learning_rate": 2.077278852046574e-06, "loss": 0.6253, "step": 23093 }, { "epoch": 0.7077969841853623, "grad_norm": 1.7600275554054214, "learning_rate": 2.076876173089275e-06, "loss": 0.5647, "step": 23094 }, { "epoch": 0.7078276327081034, "grad_norm": 1.9937418673152105, "learning_rate": 2.0764735229342623e-06, "loss": 0.6252, "step": 23095 }, { "epoch": 0.7078582812308447, "grad_norm": 1.807217674443791, "learning_rate": 2.0760709015855006e-06, "loss": 0.6004, "step": 23096 }, { "epoch": 0.7078889297535859, "grad_norm": 1.7861807086484405, "learning_rate": 2.075668309046964e-06, "loss": 0.5532, "step": 23097 }, { "epoch": 0.7079195782763271, "grad_norm": 1.6060850557527233, "learning_rate": 2.0752657453226162e-06, "loss": 0.6638, "step": 23098 }, { "epoch": 0.7079502267990683, "grad_norm": 0.830443171388324, "learning_rate": 2.0748632104164213e-06, "loss": 0.4253, "step": 23099 }, { "epoch": 0.7079808753218095, "grad_norm": 1.932039649850433, "learning_rate": 2.0744607043323477e-06, "loss": 0.5722, "step": 23100 }, { "epoch": 0.7080115238445507, "grad_norm": 1.7926742288288722, "learning_rate": 2.074058227074361e-06, "loss": 0.6441, "step": 23101 }, { "epoch": 0.7080421723672919, "grad_norm": 1.6418445278501028, "learning_rate": 2.073655778646429e-06, "loss": 0.4495, "step": 23102 }, { "epoch": 0.7080728208900331, "grad_norm": 1.8028616579710783, "learning_rate": 2.073253359052514e-06, "loss": 0.4941, "step": 23103 }, { "epoch": 0.7081034694127744, "grad_norm": 1.5789393770142426, "learning_rate": 2.072850968296582e-06, "loss": 0.5614, "step": 23104 }, { "epoch": 0.7081341179355155, "grad_norm": 0.8539592892366691, "learning_rate": 2.0724486063826003e-06, "loss": 0.419, "step": 23105 }, { "epoch": 0.7081647664582568, "grad_norm": 1.8998968718015374, "learning_rate": 2.072046273314532e-06, "loss": 0.6684, "step": 23106 }, { "epoch": 0.7081954149809979, "grad_norm": 1.7904088530759625, "learning_rate": 2.0716439690963385e-06, "loss": 0.5817, "step": 23107 }, { "epoch": 0.7082260635037391, "grad_norm": 1.832732381044668, "learning_rate": 2.071241693731986e-06, "loss": 0.5647, "step": 23108 }, { "epoch": 0.7082567120264803, "grad_norm": 2.044890605063615, "learning_rate": 2.0708394472254397e-06, "loss": 0.4875, "step": 23109 }, { "epoch": 0.7082873605492215, "grad_norm": 1.8907612002512462, "learning_rate": 2.0704372295806622e-06, "loss": 0.5945, "step": 23110 }, { "epoch": 0.7083180090719627, "grad_norm": 0.8003749993598818, "learning_rate": 2.070035040801615e-06, "loss": 0.4266, "step": 23111 }, { "epoch": 0.7083486575947039, "grad_norm": 1.663325577100741, "learning_rate": 2.0696328808922623e-06, "loss": 0.5584, "step": 23112 }, { "epoch": 0.7083793061174452, "grad_norm": 1.808401612286784, "learning_rate": 2.0692307498565685e-06, "loss": 0.6097, "step": 23113 }, { "epoch": 0.7084099546401863, "grad_norm": 0.7568447243152859, "learning_rate": 2.068828647698492e-06, "loss": 0.3964, "step": 23114 }, { "epoch": 0.7084406031629276, "grad_norm": 1.682363647069225, "learning_rate": 2.0684265744219965e-06, "loss": 0.5852, "step": 23115 }, { "epoch": 0.7084712516856687, "grad_norm": 0.7917218749738671, "learning_rate": 2.0680245300310465e-06, "loss": 0.4056, "step": 23116 }, { "epoch": 0.70850190020841, "grad_norm": 0.7779953241558096, "learning_rate": 2.0676225145296e-06, "loss": 0.4191, "step": 23117 }, { "epoch": 0.7085325487311511, "grad_norm": 1.956519133344867, "learning_rate": 2.0672205279216183e-06, "loss": 0.5749, "step": 23118 }, { "epoch": 0.7085631972538924, "grad_norm": 0.778598351327874, "learning_rate": 2.0668185702110633e-06, "loss": 0.4073, "step": 23119 }, { "epoch": 0.7085938457766335, "grad_norm": 1.9402123853272066, "learning_rate": 2.066416641401894e-06, "loss": 0.6848, "step": 23120 }, { "epoch": 0.7086244942993748, "grad_norm": 1.8450211358344855, "learning_rate": 2.066014741498075e-06, "loss": 0.6512, "step": 23121 }, { "epoch": 0.708655142822116, "grad_norm": 1.9035200499492937, "learning_rate": 2.065612870503562e-06, "loss": 0.5635, "step": 23122 }, { "epoch": 0.7086857913448572, "grad_norm": 2.204860607623544, "learning_rate": 2.0652110284223153e-06, "loss": 0.571, "step": 23123 }, { "epoch": 0.7087164398675984, "grad_norm": 0.7764919734873843, "learning_rate": 2.064809215258298e-06, "loss": 0.4063, "step": 23124 }, { "epoch": 0.7087470883903396, "grad_norm": 1.6064385471735487, "learning_rate": 2.0644074310154656e-06, "loss": 0.5308, "step": 23125 }, { "epoch": 0.7087777369130808, "grad_norm": 0.8009285453143085, "learning_rate": 2.0640056756977743e-06, "loss": 0.4031, "step": 23126 }, { "epoch": 0.708808385435822, "grad_norm": 1.930506653809648, "learning_rate": 2.06360394930919e-06, "loss": 0.6068, "step": 23127 }, { "epoch": 0.7088390339585632, "grad_norm": 0.7363956995145273, "learning_rate": 2.063202251853666e-06, "loss": 0.3785, "step": 23128 }, { "epoch": 0.7088696824813044, "grad_norm": 1.9641916275597997, "learning_rate": 2.0628005833351634e-06, "loss": 0.5426, "step": 23129 }, { "epoch": 0.7089003310040456, "grad_norm": 1.818038275439917, "learning_rate": 2.062398943757636e-06, "loss": 0.6484, "step": 23130 }, { "epoch": 0.7089309795267869, "grad_norm": 1.9787497724996699, "learning_rate": 2.061997333125043e-06, "loss": 0.6583, "step": 23131 }, { "epoch": 0.708961628049528, "grad_norm": 2.0929485926704388, "learning_rate": 2.0615957514413446e-06, "loss": 0.6747, "step": 23132 }, { "epoch": 0.7089922765722693, "grad_norm": 0.8181744057870921, "learning_rate": 2.0611941987104927e-06, "loss": 0.4104, "step": 23133 }, { "epoch": 0.7090229250950104, "grad_norm": 1.92289711300701, "learning_rate": 2.0607926749364467e-06, "loss": 0.6589, "step": 23134 }, { "epoch": 0.7090535736177517, "grad_norm": 1.6281231703089132, "learning_rate": 2.060391180123164e-06, "loss": 0.5987, "step": 23135 }, { "epoch": 0.7090842221404928, "grad_norm": 1.9451992125334663, "learning_rate": 2.0599897142745995e-06, "loss": 0.5271, "step": 23136 }, { "epoch": 0.7091148706632341, "grad_norm": 1.8926774295401727, "learning_rate": 2.0595882773947045e-06, "loss": 0.5534, "step": 23137 }, { "epoch": 0.7091455191859752, "grad_norm": 0.8000744295952524, "learning_rate": 2.0591868694874427e-06, "loss": 0.4012, "step": 23138 }, { "epoch": 0.7091761677087164, "grad_norm": 1.6685587646326994, "learning_rate": 2.058785490556763e-06, "loss": 0.5781, "step": 23139 }, { "epoch": 0.7092068162314576, "grad_norm": 0.7407009840762244, "learning_rate": 2.058384140606624e-06, "loss": 0.3979, "step": 23140 }, { "epoch": 0.7092374647541988, "grad_norm": 1.749888191104032, "learning_rate": 2.0579828196409774e-06, "loss": 0.6322, "step": 23141 }, { "epoch": 0.7092681132769401, "grad_norm": 1.822615723744601, "learning_rate": 2.0575815276637782e-06, "loss": 0.5424, "step": 23142 }, { "epoch": 0.7092987617996812, "grad_norm": 1.9245687029509526, "learning_rate": 2.0571802646789833e-06, "loss": 0.6533, "step": 23143 }, { "epoch": 0.7093294103224225, "grad_norm": 1.761859972032971, "learning_rate": 2.0567790306905427e-06, "loss": 0.5544, "step": 23144 }, { "epoch": 0.7093600588451636, "grad_norm": 1.645960542599541, "learning_rate": 2.056377825702411e-06, "loss": 0.4936, "step": 23145 }, { "epoch": 0.7093907073679049, "grad_norm": 1.9292049443552535, "learning_rate": 2.0559766497185433e-06, "loss": 0.4867, "step": 23146 }, { "epoch": 0.709421355890646, "grad_norm": 1.7078184560483631, "learning_rate": 2.055575502742889e-06, "loss": 0.6286, "step": 23147 }, { "epoch": 0.7094520044133873, "grad_norm": 1.7450457069972634, "learning_rate": 2.055174384779403e-06, "loss": 0.5955, "step": 23148 }, { "epoch": 0.7094826529361284, "grad_norm": 1.8736763007926984, "learning_rate": 2.054773295832039e-06, "loss": 0.6449, "step": 23149 }, { "epoch": 0.7095133014588697, "grad_norm": 2.0731850075460194, "learning_rate": 2.054372235904746e-06, "loss": 0.6191, "step": 23150 }, { "epoch": 0.7095439499816109, "grad_norm": 1.81814277571714, "learning_rate": 2.0539712050014783e-06, "loss": 0.6693, "step": 23151 }, { "epoch": 0.7095745985043521, "grad_norm": 1.8720957743422366, "learning_rate": 2.0535702031261843e-06, "loss": 0.5702, "step": 23152 }, { "epoch": 0.7096052470270933, "grad_norm": 1.9442933055064453, "learning_rate": 2.053169230282817e-06, "loss": 0.597, "step": 23153 }, { "epoch": 0.7096358955498345, "grad_norm": 0.789393210900366, "learning_rate": 2.052768286475329e-06, "loss": 0.3986, "step": 23154 }, { "epoch": 0.7096665440725757, "grad_norm": 1.7334355550693246, "learning_rate": 2.0523673717076676e-06, "loss": 0.6053, "step": 23155 }, { "epoch": 0.7096971925953169, "grad_norm": 1.6560314527027604, "learning_rate": 2.0519664859837846e-06, "loss": 0.5247, "step": 23156 }, { "epoch": 0.7097278411180581, "grad_norm": 0.7822851056950269, "learning_rate": 2.0515656293076315e-06, "loss": 0.3993, "step": 23157 }, { "epoch": 0.7097584896407994, "grad_norm": 1.6647308888926329, "learning_rate": 2.0511648016831554e-06, "loss": 0.5528, "step": 23158 }, { "epoch": 0.7097891381635405, "grad_norm": 1.5432128614422795, "learning_rate": 2.0507640031143083e-06, "loss": 0.4284, "step": 23159 }, { "epoch": 0.7098197866862818, "grad_norm": 1.6896835181758598, "learning_rate": 2.0503632336050367e-06, "loss": 0.5359, "step": 23160 }, { "epoch": 0.7098504352090229, "grad_norm": 1.9849078371685427, "learning_rate": 2.0499624931592905e-06, "loss": 0.6444, "step": 23161 }, { "epoch": 0.7098810837317642, "grad_norm": 1.9069001911390897, "learning_rate": 2.049561781781021e-06, "loss": 0.5694, "step": 23162 }, { "epoch": 0.7099117322545053, "grad_norm": 1.837739939898148, "learning_rate": 2.049161099474172e-06, "loss": 0.5805, "step": 23163 }, { "epoch": 0.7099423807772466, "grad_norm": 1.5772748361001747, "learning_rate": 2.0487604462426936e-06, "loss": 0.4973, "step": 23164 }, { "epoch": 0.7099730292999877, "grad_norm": 1.866540379169468, "learning_rate": 2.0483598220905354e-06, "loss": 0.5798, "step": 23165 }, { "epoch": 0.710003677822729, "grad_norm": 0.7934504647909391, "learning_rate": 2.0479592270216414e-06, "loss": 0.4203, "step": 23166 }, { "epoch": 0.7100343263454701, "grad_norm": 1.9602061101279298, "learning_rate": 2.04755866103996e-06, "loss": 0.6939, "step": 23167 }, { "epoch": 0.7100649748682114, "grad_norm": 1.7206733170299586, "learning_rate": 2.047158124149441e-06, "loss": 0.6132, "step": 23168 }, { "epoch": 0.7100956233909526, "grad_norm": 2.2335774047905317, "learning_rate": 2.0467576163540263e-06, "loss": 0.7075, "step": 23169 }, { "epoch": 0.7101262719136937, "grad_norm": 1.8131433896918696, "learning_rate": 2.0463571376576667e-06, "loss": 0.6083, "step": 23170 }, { "epoch": 0.710156920436435, "grad_norm": 1.914638527417006, "learning_rate": 2.0459566880643038e-06, "loss": 0.631, "step": 23171 }, { "epoch": 0.7101875689591761, "grad_norm": 1.893826169677931, "learning_rate": 2.0455562675778855e-06, "loss": 0.6189, "step": 23172 }, { "epoch": 0.7102182174819174, "grad_norm": 1.6723086651360786, "learning_rate": 2.0451558762023595e-06, "loss": 0.5312, "step": 23173 }, { "epoch": 0.7102488660046585, "grad_norm": 1.796790006127883, "learning_rate": 2.0447555139416658e-06, "loss": 0.5808, "step": 23174 }, { "epoch": 0.7102795145273998, "grad_norm": 1.8192548794642127, "learning_rate": 2.044355180799753e-06, "loss": 0.4888, "step": 23175 }, { "epoch": 0.7103101630501409, "grad_norm": 1.6735035163356977, "learning_rate": 2.0439548767805667e-06, "loss": 0.5945, "step": 23176 }, { "epoch": 0.7103408115728822, "grad_norm": 1.8930143561970412, "learning_rate": 2.043554601888047e-06, "loss": 0.5755, "step": 23177 }, { "epoch": 0.7103714600956234, "grad_norm": 2.030660695651665, "learning_rate": 2.0431543561261408e-06, "loss": 0.6805, "step": 23178 }, { "epoch": 0.7104021086183646, "grad_norm": 1.993492418331314, "learning_rate": 2.0427541394987926e-06, "loss": 0.6145, "step": 23179 }, { "epoch": 0.7104327571411058, "grad_norm": 1.7600315729837783, "learning_rate": 2.042353952009943e-06, "loss": 0.5128, "step": 23180 }, { "epoch": 0.710463405663847, "grad_norm": 1.7035849806483185, "learning_rate": 2.041953793663538e-06, "loss": 0.5614, "step": 23181 }, { "epoch": 0.7104940541865882, "grad_norm": 0.8189867868671072, "learning_rate": 2.041553664463516e-06, "loss": 0.4114, "step": 23182 }, { "epoch": 0.7105247027093294, "grad_norm": 1.9204910279974103, "learning_rate": 2.0411535644138266e-06, "loss": 0.6105, "step": 23183 }, { "epoch": 0.7105553512320706, "grad_norm": 1.7527416661685, "learning_rate": 2.0407534935184076e-06, "loss": 0.6723, "step": 23184 }, { "epoch": 0.7105859997548118, "grad_norm": 1.6263599174012555, "learning_rate": 2.0403534517811996e-06, "loss": 0.5312, "step": 23185 }, { "epoch": 0.710616648277553, "grad_norm": 1.80847171948466, "learning_rate": 2.0399534392061464e-06, "loss": 0.5627, "step": 23186 }, { "epoch": 0.7106472968002943, "grad_norm": 1.8430310773708591, "learning_rate": 2.039553455797192e-06, "loss": 0.6474, "step": 23187 }, { "epoch": 0.7106779453230354, "grad_norm": 1.679985900642099, "learning_rate": 2.039153501558272e-06, "loss": 0.5268, "step": 23188 }, { "epoch": 0.7107085938457767, "grad_norm": 1.5885242675986984, "learning_rate": 2.0387535764933306e-06, "loss": 0.5505, "step": 23189 }, { "epoch": 0.7107392423685178, "grad_norm": 1.9557106959595256, "learning_rate": 2.03835368060631e-06, "loss": 0.6419, "step": 23190 }, { "epoch": 0.7107698908912591, "grad_norm": 1.8963269334692765, "learning_rate": 2.0379538139011455e-06, "loss": 0.6224, "step": 23191 }, { "epoch": 0.7108005394140002, "grad_norm": 1.8285070850039957, "learning_rate": 2.0375539763817824e-06, "loss": 0.6846, "step": 23192 }, { "epoch": 0.7108311879367415, "grad_norm": 1.80799589333928, "learning_rate": 2.0371541680521543e-06, "loss": 0.569, "step": 23193 }, { "epoch": 0.7108618364594826, "grad_norm": 1.8382165440192817, "learning_rate": 2.0367543889162083e-06, "loss": 0.5946, "step": 23194 }, { "epoch": 0.7108924849822239, "grad_norm": 1.7503421344968126, "learning_rate": 2.036354638977879e-06, "loss": 0.5525, "step": 23195 }, { "epoch": 0.710923133504965, "grad_norm": 0.741462528928418, "learning_rate": 2.0359549182411043e-06, "loss": 0.3885, "step": 23196 }, { "epoch": 0.7109537820277063, "grad_norm": 1.813378512615143, "learning_rate": 2.035555226709824e-06, "loss": 0.6694, "step": 23197 }, { "epoch": 0.7109844305504475, "grad_norm": 1.81326211752348, "learning_rate": 2.0351555643879777e-06, "loss": 0.6232, "step": 23198 }, { "epoch": 0.7110150790731887, "grad_norm": 1.9063627528310345, "learning_rate": 2.0347559312795013e-06, "loss": 0.5124, "step": 23199 }, { "epoch": 0.7110457275959299, "grad_norm": 1.9142095310344636, "learning_rate": 2.034356327388333e-06, "loss": 0.5934, "step": 23200 }, { "epoch": 0.711076376118671, "grad_norm": 0.7819214686144125, "learning_rate": 2.0339567527184107e-06, "loss": 0.3911, "step": 23201 }, { "epoch": 0.7111070246414123, "grad_norm": 0.7906792676282863, "learning_rate": 2.033557207273673e-06, "loss": 0.385, "step": 23202 }, { "epoch": 0.7111376731641534, "grad_norm": 2.053966264292855, "learning_rate": 2.0331576910580554e-06, "loss": 0.5016, "step": 23203 }, { "epoch": 0.7111683216868947, "grad_norm": 1.9865966310690195, "learning_rate": 2.0327582040754916e-06, "loss": 0.6175, "step": 23204 }, { "epoch": 0.7111989702096359, "grad_norm": 0.7861561695085215, "learning_rate": 2.0323587463299217e-06, "loss": 0.4177, "step": 23205 }, { "epoch": 0.7112296187323771, "grad_norm": 0.7530975010034575, "learning_rate": 2.031959317825281e-06, "loss": 0.4252, "step": 23206 }, { "epoch": 0.7112602672551183, "grad_norm": 1.8617482462544541, "learning_rate": 2.031559918565504e-06, "loss": 0.6183, "step": 23207 }, { "epoch": 0.7112909157778595, "grad_norm": 1.669764139160423, "learning_rate": 2.0311605485545255e-06, "loss": 0.646, "step": 23208 }, { "epoch": 0.7113215643006007, "grad_norm": 1.926613452757321, "learning_rate": 2.0307612077962822e-06, "loss": 0.6316, "step": 23209 }, { "epoch": 0.7113522128233419, "grad_norm": 1.8370129563931323, "learning_rate": 2.03036189629471e-06, "loss": 0.5157, "step": 23210 }, { "epoch": 0.7113828613460831, "grad_norm": 1.843855052315008, "learning_rate": 2.029962614053742e-06, "loss": 0.5878, "step": 23211 }, { "epoch": 0.7114135098688243, "grad_norm": 0.7721483127143135, "learning_rate": 2.029563361077309e-06, "loss": 0.4036, "step": 23212 }, { "epoch": 0.7114441583915655, "grad_norm": 1.9330775013761665, "learning_rate": 2.0291641373693515e-06, "loss": 0.5395, "step": 23213 }, { "epoch": 0.7114748069143068, "grad_norm": 1.7547894777779736, "learning_rate": 2.0287649429337997e-06, "loss": 0.5794, "step": 23214 }, { "epoch": 0.7115054554370479, "grad_norm": 1.7995478985326505, "learning_rate": 2.0283657777745856e-06, "loss": 0.5509, "step": 23215 }, { "epoch": 0.7115361039597892, "grad_norm": 1.134026565195404, "learning_rate": 2.027966641895644e-06, "loss": 0.4196, "step": 23216 }, { "epoch": 0.7115667524825303, "grad_norm": 1.5900753669685623, "learning_rate": 2.027567535300909e-06, "loss": 0.5207, "step": 23217 }, { "epoch": 0.7115974010052716, "grad_norm": 2.1522510583980416, "learning_rate": 2.0271684579943096e-06, "loss": 0.5635, "step": 23218 }, { "epoch": 0.7116280495280127, "grad_norm": 1.8424589205458002, "learning_rate": 2.02676940997978e-06, "loss": 0.6783, "step": 23219 }, { "epoch": 0.711658698050754, "grad_norm": 0.7795880901777574, "learning_rate": 2.026370391261253e-06, "loss": 0.3968, "step": 23220 }, { "epoch": 0.7116893465734951, "grad_norm": 1.8132896275171264, "learning_rate": 2.0259714018426606e-06, "loss": 0.5079, "step": 23221 }, { "epoch": 0.7117199950962364, "grad_norm": 1.8085784903781739, "learning_rate": 2.0255724417279325e-06, "loss": 0.5423, "step": 23222 }, { "epoch": 0.7117506436189776, "grad_norm": 1.8484245115875961, "learning_rate": 2.0251735109209975e-06, "loss": 0.6005, "step": 23223 }, { "epoch": 0.7117812921417188, "grad_norm": 1.731796425280219, "learning_rate": 2.024774609425792e-06, "loss": 0.5993, "step": 23224 }, { "epoch": 0.71181194066446, "grad_norm": 1.759676922775562, "learning_rate": 2.0243757372462435e-06, "loss": 0.5655, "step": 23225 }, { "epoch": 0.7118425891872012, "grad_norm": 1.7457918105718693, "learning_rate": 2.0239768943862808e-06, "loss": 0.5425, "step": 23226 }, { "epoch": 0.7118732377099424, "grad_norm": 1.7989246224827218, "learning_rate": 2.0235780808498346e-06, "loss": 0.6469, "step": 23227 }, { "epoch": 0.7119038862326836, "grad_norm": 1.7984859512654205, "learning_rate": 2.0231792966408357e-06, "loss": 0.5771, "step": 23228 }, { "epoch": 0.7119345347554248, "grad_norm": 2.03598468565875, "learning_rate": 2.0227805417632148e-06, "loss": 0.5547, "step": 23229 }, { "epoch": 0.711965183278166, "grad_norm": 1.97317167103064, "learning_rate": 2.0223818162208965e-06, "loss": 0.7136, "step": 23230 }, { "epoch": 0.7119958318009072, "grad_norm": 1.8363949449234256, "learning_rate": 2.021983120017812e-06, "loss": 0.4943, "step": 23231 }, { "epoch": 0.7120264803236483, "grad_norm": 1.704586102759324, "learning_rate": 2.021584453157892e-06, "loss": 0.5443, "step": 23232 }, { "epoch": 0.7120571288463896, "grad_norm": 2.000977980579568, "learning_rate": 2.0211858156450627e-06, "loss": 0.6015, "step": 23233 }, { "epoch": 0.7120877773691308, "grad_norm": 2.0531933904660047, "learning_rate": 2.0207872074832476e-06, "loss": 0.6989, "step": 23234 }, { "epoch": 0.712118425891872, "grad_norm": 1.7672094565366918, "learning_rate": 2.020388628676382e-06, "loss": 0.6638, "step": 23235 }, { "epoch": 0.7121490744146132, "grad_norm": 1.9205215295061129, "learning_rate": 2.019990079228388e-06, "loss": 0.6313, "step": 23236 }, { "epoch": 0.7121797229373544, "grad_norm": 1.673349418749099, "learning_rate": 2.0195915591431957e-06, "loss": 0.5676, "step": 23237 }, { "epoch": 0.7122103714600956, "grad_norm": 1.9775920982808437, "learning_rate": 2.019193068424729e-06, "loss": 0.6444, "step": 23238 }, { "epoch": 0.7122410199828368, "grad_norm": 1.9035595076049148, "learning_rate": 2.0187946070769153e-06, "loss": 0.603, "step": 23239 }, { "epoch": 0.712271668505578, "grad_norm": 1.7485033093009892, "learning_rate": 2.0183961751036834e-06, "loss": 0.681, "step": 23240 }, { "epoch": 0.7123023170283193, "grad_norm": 1.9211488310790117, "learning_rate": 2.017997772508955e-06, "loss": 0.624, "step": 23241 }, { "epoch": 0.7123329655510604, "grad_norm": 1.77618846942375, "learning_rate": 2.0175993992966568e-06, "loss": 0.6015, "step": 23242 }, { "epoch": 0.7123636140738017, "grad_norm": 1.8344989892782266, "learning_rate": 2.017201055470717e-06, "loss": 0.4658, "step": 23243 }, { "epoch": 0.7123942625965428, "grad_norm": 0.7709583230353391, "learning_rate": 2.0168027410350587e-06, "loss": 0.4141, "step": 23244 }, { "epoch": 0.7124249111192841, "grad_norm": 1.9836064102788107, "learning_rate": 2.0164044559936023e-06, "loss": 0.6754, "step": 23245 }, { "epoch": 0.7124555596420252, "grad_norm": 1.822708645863138, "learning_rate": 2.01600620035028e-06, "loss": 0.6081, "step": 23246 }, { "epoch": 0.7124862081647665, "grad_norm": 1.9150249661500391, "learning_rate": 2.0156079741090107e-06, "loss": 0.6781, "step": 23247 }, { "epoch": 0.7125168566875076, "grad_norm": 2.1050798818460748, "learning_rate": 2.0152097772737204e-06, "loss": 0.6887, "step": 23248 }, { "epoch": 0.7125475052102489, "grad_norm": 1.749519945656159, "learning_rate": 2.0148116098483313e-06, "loss": 0.585, "step": 23249 }, { "epoch": 0.71257815373299, "grad_norm": 2.0277936522150655, "learning_rate": 2.0144134718367665e-06, "loss": 0.7063, "step": 23250 }, { "epoch": 0.7126088022557313, "grad_norm": 1.7220518357324859, "learning_rate": 2.014015363242951e-06, "loss": 0.5419, "step": 23251 }, { "epoch": 0.7126394507784725, "grad_norm": 2.053326516818585, "learning_rate": 2.0136172840708053e-06, "loss": 0.6323, "step": 23252 }, { "epoch": 0.7126700993012137, "grad_norm": 1.831034467211026, "learning_rate": 2.013219234324252e-06, "loss": 0.585, "step": 23253 }, { "epoch": 0.7127007478239549, "grad_norm": 1.8477501268133436, "learning_rate": 2.0128212140072156e-06, "loss": 0.6194, "step": 23254 }, { "epoch": 0.7127313963466961, "grad_norm": 1.745016911522144, "learning_rate": 2.012423223123614e-06, "loss": 0.4997, "step": 23255 }, { "epoch": 0.7127620448694373, "grad_norm": 0.8866469623908368, "learning_rate": 2.0120252616773735e-06, "loss": 0.4057, "step": 23256 }, { "epoch": 0.7127926933921785, "grad_norm": 1.9812305212617811, "learning_rate": 2.0116273296724098e-06, "loss": 0.5915, "step": 23257 }, { "epoch": 0.7128233419149197, "grad_norm": 1.9834392682562378, "learning_rate": 2.011229427112647e-06, "loss": 0.6918, "step": 23258 }, { "epoch": 0.712853990437661, "grad_norm": 1.6573062213386651, "learning_rate": 2.0108315540020072e-06, "loss": 0.5457, "step": 23259 }, { "epoch": 0.7128846389604021, "grad_norm": 0.7976743034158658, "learning_rate": 2.0104337103444074e-06, "loss": 0.4184, "step": 23260 }, { "epoch": 0.7129152874831434, "grad_norm": 1.949858662204934, "learning_rate": 2.010035896143769e-06, "loss": 0.6274, "step": 23261 }, { "epoch": 0.7129459360058845, "grad_norm": 1.8816516011569815, "learning_rate": 2.0096381114040136e-06, "loss": 0.6153, "step": 23262 }, { "epoch": 0.7129765845286257, "grad_norm": 1.7003423227615702, "learning_rate": 2.009240356129057e-06, "loss": 0.5242, "step": 23263 }, { "epoch": 0.7130072330513669, "grad_norm": 1.6958874316421089, "learning_rate": 2.0088426303228208e-06, "loss": 0.5709, "step": 23264 }, { "epoch": 0.7130378815741081, "grad_norm": 1.8113155263152403, "learning_rate": 2.0084449339892247e-06, "loss": 0.6166, "step": 23265 }, { "epoch": 0.7130685300968493, "grad_norm": 1.9920679656169442, "learning_rate": 2.0080472671321847e-06, "loss": 0.6041, "step": 23266 }, { "epoch": 0.7130991786195905, "grad_norm": 1.8763011573232755, "learning_rate": 2.0076496297556224e-06, "loss": 0.5891, "step": 23267 }, { "epoch": 0.7131298271423318, "grad_norm": 1.833979792971873, "learning_rate": 2.0072520218634524e-06, "loss": 0.5116, "step": 23268 }, { "epoch": 0.7131604756650729, "grad_norm": 1.7371687384222518, "learning_rate": 2.006854443459594e-06, "loss": 0.586, "step": 23269 }, { "epoch": 0.7131911241878142, "grad_norm": 1.748712267805885, "learning_rate": 2.006456894547966e-06, "loss": 0.601, "step": 23270 }, { "epoch": 0.7132217727105553, "grad_norm": 1.9644600263217997, "learning_rate": 2.0060593751324817e-06, "loss": 0.5573, "step": 23271 }, { "epoch": 0.7132524212332966, "grad_norm": 0.7674639736385244, "learning_rate": 2.0056618852170613e-06, "loss": 0.3994, "step": 23272 }, { "epoch": 0.7132830697560377, "grad_norm": 0.7797343873010857, "learning_rate": 2.0052644248056217e-06, "loss": 0.3992, "step": 23273 }, { "epoch": 0.713313718278779, "grad_norm": 0.7656722512361187, "learning_rate": 2.0048669939020766e-06, "loss": 0.4146, "step": 23274 }, { "epoch": 0.7133443668015201, "grad_norm": 1.7725987311760976, "learning_rate": 2.0044695925103435e-06, "loss": 0.5184, "step": 23275 }, { "epoch": 0.7133750153242614, "grad_norm": 1.6583014086306354, "learning_rate": 2.004072220634339e-06, "loss": 0.5616, "step": 23276 }, { "epoch": 0.7134056638470025, "grad_norm": 1.9790204092019381, "learning_rate": 2.0036748782779764e-06, "loss": 0.5467, "step": 23277 }, { "epoch": 0.7134363123697438, "grad_norm": 1.880158328329981, "learning_rate": 2.0032775654451736e-06, "loss": 0.5566, "step": 23278 }, { "epoch": 0.713466960892485, "grad_norm": 1.8335615193778667, "learning_rate": 2.0028802821398415e-06, "loss": 0.5596, "step": 23279 }, { "epoch": 0.7134976094152262, "grad_norm": 2.0484062924388375, "learning_rate": 2.0024830283658968e-06, "loss": 0.6299, "step": 23280 }, { "epoch": 0.7135282579379674, "grad_norm": 1.811417136248714, "learning_rate": 2.002085804127256e-06, "loss": 0.6985, "step": 23281 }, { "epoch": 0.7135589064607086, "grad_norm": 1.6933601369455704, "learning_rate": 2.0016886094278286e-06, "loss": 0.5099, "step": 23282 }, { "epoch": 0.7135895549834498, "grad_norm": 1.8787047168633457, "learning_rate": 2.001291444271531e-06, "loss": 0.6303, "step": 23283 }, { "epoch": 0.713620203506191, "grad_norm": 1.767553047273297, "learning_rate": 2.000894308662277e-06, "loss": 0.5036, "step": 23284 }, { "epoch": 0.7136508520289322, "grad_norm": 1.6917730715679942, "learning_rate": 2.000497202603978e-06, "loss": 0.6275, "step": 23285 }, { "epoch": 0.7136815005516735, "grad_norm": 1.846686415469187, "learning_rate": 2.000100126100547e-06, "loss": 0.5923, "step": 23286 }, { "epoch": 0.7137121490744146, "grad_norm": 1.9752737141464252, "learning_rate": 1.9997030791558985e-06, "loss": 0.5705, "step": 23287 }, { "epoch": 0.7137427975971559, "grad_norm": 1.9393220808153748, "learning_rate": 1.999306061773942e-06, "loss": 0.5521, "step": 23288 }, { "epoch": 0.713773446119897, "grad_norm": 1.8488336626011974, "learning_rate": 1.998909073958592e-06, "loss": 0.4911, "step": 23289 }, { "epoch": 0.7138040946426383, "grad_norm": 0.8278111880524146, "learning_rate": 1.9985121157137553e-06, "loss": 0.4104, "step": 23290 }, { "epoch": 0.7138347431653794, "grad_norm": 1.7415034311591493, "learning_rate": 1.99811518704335e-06, "loss": 0.6282, "step": 23291 }, { "epoch": 0.7138653916881207, "grad_norm": 1.824106577766883, "learning_rate": 1.997718287951285e-06, "loss": 0.6019, "step": 23292 }, { "epoch": 0.7138960402108618, "grad_norm": 1.826273417130726, "learning_rate": 1.9973214184414667e-06, "loss": 0.5643, "step": 23293 }, { "epoch": 0.713926688733603, "grad_norm": 1.8011917530411377, "learning_rate": 1.9969245785178093e-06, "loss": 0.502, "step": 23294 }, { "epoch": 0.7139573372563442, "grad_norm": 1.664763143619634, "learning_rate": 1.9965277681842244e-06, "loss": 0.5626, "step": 23295 }, { "epoch": 0.7139879857790854, "grad_norm": 1.7369037733717112, "learning_rate": 1.996130987444618e-06, "loss": 0.6152, "step": 23296 }, { "epoch": 0.7140186343018267, "grad_norm": 0.7856049009062752, "learning_rate": 1.995734236302901e-06, "loss": 0.4222, "step": 23297 }, { "epoch": 0.7140492828245678, "grad_norm": 1.8926803638192857, "learning_rate": 1.9953375147629854e-06, "loss": 0.5695, "step": 23298 }, { "epoch": 0.7140799313473091, "grad_norm": 1.8938694325282754, "learning_rate": 1.994940822828776e-06, "loss": 0.5688, "step": 23299 }, { "epoch": 0.7141105798700502, "grad_norm": 1.8877162730560928, "learning_rate": 1.994544160504186e-06, "loss": 0.6354, "step": 23300 }, { "epoch": 0.7141412283927915, "grad_norm": 0.8111548911866543, "learning_rate": 1.9941475277931187e-06, "loss": 0.4128, "step": 23301 }, { "epoch": 0.7141718769155326, "grad_norm": 0.7956280002695207, "learning_rate": 1.993750924699486e-06, "loss": 0.4119, "step": 23302 }, { "epoch": 0.7142025254382739, "grad_norm": 1.6518949463107642, "learning_rate": 1.9933543512271954e-06, "loss": 0.5282, "step": 23303 }, { "epoch": 0.714233173961015, "grad_norm": 1.7392295723977285, "learning_rate": 1.992957807380152e-06, "loss": 0.4968, "step": 23304 }, { "epoch": 0.7142638224837563, "grad_norm": 1.7803419685033894, "learning_rate": 1.992561293162265e-06, "loss": 0.5861, "step": 23305 }, { "epoch": 0.7142944710064975, "grad_norm": 0.7413307159600009, "learning_rate": 1.992164808577443e-06, "loss": 0.3756, "step": 23306 }, { "epoch": 0.7143251195292387, "grad_norm": 2.083529657632229, "learning_rate": 1.9917683536295886e-06, "loss": 0.6345, "step": 23307 }, { "epoch": 0.7143557680519799, "grad_norm": 1.8929858790119476, "learning_rate": 1.9913719283226123e-06, "loss": 0.6431, "step": 23308 }, { "epoch": 0.7143864165747211, "grad_norm": 2.231567208228349, "learning_rate": 1.9909755326604145e-06, "loss": 0.7115, "step": 23309 }, { "epoch": 0.7144170650974623, "grad_norm": 1.8992278564803313, "learning_rate": 1.9905791666469084e-06, "loss": 0.534, "step": 23310 }, { "epoch": 0.7144477136202035, "grad_norm": 1.9460394984446057, "learning_rate": 1.9901828302859954e-06, "loss": 0.6162, "step": 23311 }, { "epoch": 0.7144783621429447, "grad_norm": 0.753063967675235, "learning_rate": 1.9897865235815795e-06, "loss": 0.3996, "step": 23312 }, { "epoch": 0.714509010665686, "grad_norm": 1.9659262119286498, "learning_rate": 1.9893902465375677e-06, "loss": 0.5742, "step": 23313 }, { "epoch": 0.7145396591884271, "grad_norm": 0.7811014559043296, "learning_rate": 1.9889939991578648e-06, "loss": 0.3981, "step": 23314 }, { "epoch": 0.7145703077111684, "grad_norm": 1.9098290218640677, "learning_rate": 1.9885977814463734e-06, "loss": 0.7683, "step": 23315 }, { "epoch": 0.7146009562339095, "grad_norm": 1.9530918013540146, "learning_rate": 1.9882015934069985e-06, "loss": 0.6211, "step": 23316 }, { "epoch": 0.7146316047566508, "grad_norm": 0.7636438934362739, "learning_rate": 1.9878054350436452e-06, "loss": 0.4027, "step": 23317 }, { "epoch": 0.7146622532793919, "grad_norm": 1.8688191675435883, "learning_rate": 1.9874093063602146e-06, "loss": 0.5034, "step": 23318 }, { "epoch": 0.7146929018021332, "grad_norm": 1.6894839723152153, "learning_rate": 1.9870132073606124e-06, "loss": 0.598, "step": 23319 }, { "epoch": 0.7147235503248743, "grad_norm": 1.9181662301183557, "learning_rate": 1.9866171380487365e-06, "loss": 0.6312, "step": 23320 }, { "epoch": 0.7147541988476156, "grad_norm": 1.9480577579496423, "learning_rate": 1.9862210984284964e-06, "loss": 0.6254, "step": 23321 }, { "epoch": 0.7147848473703567, "grad_norm": 1.8742222188504112, "learning_rate": 1.9858250885037907e-06, "loss": 0.6424, "step": 23322 }, { "epoch": 0.714815495893098, "grad_norm": 1.8561428736324697, "learning_rate": 1.98542910827852e-06, "loss": 0.6004, "step": 23323 }, { "epoch": 0.7148461444158392, "grad_norm": 1.94343565529399, "learning_rate": 1.985033157756587e-06, "loss": 0.5391, "step": 23324 }, { "epoch": 0.7148767929385803, "grad_norm": 0.7661520033823499, "learning_rate": 1.9846372369418964e-06, "loss": 0.4044, "step": 23325 }, { "epoch": 0.7149074414613216, "grad_norm": 0.772490230512227, "learning_rate": 1.984241345838345e-06, "loss": 0.4004, "step": 23326 }, { "epoch": 0.7149380899840627, "grad_norm": 1.8587464355268453, "learning_rate": 1.9838454844498344e-06, "loss": 0.5574, "step": 23327 }, { "epoch": 0.714968738506804, "grad_norm": 0.7714659733416755, "learning_rate": 1.9834496527802665e-06, "loss": 0.3912, "step": 23328 }, { "epoch": 0.7149993870295451, "grad_norm": 2.1082762743626384, "learning_rate": 1.9830538508335425e-06, "loss": 0.5728, "step": 23329 }, { "epoch": 0.7150300355522864, "grad_norm": 1.6078727164391955, "learning_rate": 1.982658078613561e-06, "loss": 0.56, "step": 23330 }, { "epoch": 0.7150606840750275, "grad_norm": 0.7793734754064203, "learning_rate": 1.9822623361242176e-06, "loss": 0.4116, "step": 23331 }, { "epoch": 0.7150913325977688, "grad_norm": 1.7201150136038346, "learning_rate": 1.9818666233694196e-06, "loss": 0.5566, "step": 23332 }, { "epoch": 0.71512198112051, "grad_norm": 1.9297898079941411, "learning_rate": 1.981470940353062e-06, "loss": 0.6049, "step": 23333 }, { "epoch": 0.7151526296432512, "grad_norm": 1.7959315948516288, "learning_rate": 1.981075287079041e-06, "loss": 0.5935, "step": 23334 }, { "epoch": 0.7151832781659924, "grad_norm": 1.8447233721484027, "learning_rate": 1.9806796635512583e-06, "loss": 0.6628, "step": 23335 }, { "epoch": 0.7152139266887336, "grad_norm": 1.820395241071523, "learning_rate": 1.980284069773611e-06, "loss": 0.6365, "step": 23336 }, { "epoch": 0.7152445752114748, "grad_norm": 1.6883864954100911, "learning_rate": 1.9798885057499994e-06, "loss": 0.5788, "step": 23337 }, { "epoch": 0.715275223734216, "grad_norm": 0.830413644464335, "learning_rate": 1.9794929714843176e-06, "loss": 0.3929, "step": 23338 }, { "epoch": 0.7153058722569572, "grad_norm": 1.772269173798396, "learning_rate": 1.9790974669804637e-06, "loss": 0.6292, "step": 23339 }, { "epoch": 0.7153365207796984, "grad_norm": 1.7868437940309123, "learning_rate": 1.9787019922423376e-06, "loss": 0.6314, "step": 23340 }, { "epoch": 0.7153671693024396, "grad_norm": 1.8360323783783068, "learning_rate": 1.978306547273834e-06, "loss": 0.5432, "step": 23341 }, { "epoch": 0.7153978178251809, "grad_norm": 1.6737900894293776, "learning_rate": 1.977911132078845e-06, "loss": 0.5623, "step": 23342 }, { "epoch": 0.715428466347922, "grad_norm": 0.7998283269502207, "learning_rate": 1.977515746661275e-06, "loss": 0.3806, "step": 23343 }, { "epoch": 0.7154591148706633, "grad_norm": 1.7791515091281547, "learning_rate": 1.9771203910250155e-06, "loss": 0.6713, "step": 23344 }, { "epoch": 0.7154897633934044, "grad_norm": 1.8991100151673335, "learning_rate": 1.97672506517396e-06, "loss": 0.5525, "step": 23345 }, { "epoch": 0.7155204119161457, "grad_norm": 0.787574995057761, "learning_rate": 1.9763297691120065e-06, "loss": 0.4083, "step": 23346 }, { "epoch": 0.7155510604388868, "grad_norm": 1.719637710080032, "learning_rate": 1.975934502843049e-06, "loss": 0.5777, "step": 23347 }, { "epoch": 0.7155817089616281, "grad_norm": 2.0016167462835224, "learning_rate": 1.9755392663709842e-06, "loss": 0.6875, "step": 23348 }, { "epoch": 0.7156123574843692, "grad_norm": 1.9551911119989187, "learning_rate": 1.975144059699704e-06, "loss": 0.6659, "step": 23349 }, { "epoch": 0.7156430060071105, "grad_norm": 0.7852424592760139, "learning_rate": 1.9747488828331022e-06, "loss": 0.409, "step": 23350 }, { "epoch": 0.7156736545298517, "grad_norm": 1.7843629970746768, "learning_rate": 1.9743537357750763e-06, "loss": 0.5489, "step": 23351 }, { "epoch": 0.7157043030525929, "grad_norm": 1.9282581811448924, "learning_rate": 1.9739586185295172e-06, "loss": 0.6578, "step": 23352 }, { "epoch": 0.7157349515753341, "grad_norm": 1.9940129022156443, "learning_rate": 1.973563531100316e-06, "loss": 0.6153, "step": 23353 }, { "epoch": 0.7157656000980753, "grad_norm": 1.9284827280931887, "learning_rate": 1.9731684734913675e-06, "loss": 0.6727, "step": 23354 }, { "epoch": 0.7157962486208165, "grad_norm": 1.6365468767597429, "learning_rate": 1.972773445706565e-06, "loss": 0.646, "step": 23355 }, { "epoch": 0.7158268971435576, "grad_norm": 0.763062875557492, "learning_rate": 1.9723784477498014e-06, "loss": 0.3872, "step": 23356 }, { "epoch": 0.7158575456662989, "grad_norm": 1.9895488531382757, "learning_rate": 1.9719834796249666e-06, "loss": 0.5992, "step": 23357 }, { "epoch": 0.71588819418904, "grad_norm": 0.8245664497970027, "learning_rate": 1.9715885413359525e-06, "loss": 0.4137, "step": 23358 }, { "epoch": 0.7159188427117813, "grad_norm": 2.016771639976629, "learning_rate": 1.971193632886654e-06, "loss": 0.6849, "step": 23359 }, { "epoch": 0.7159494912345225, "grad_norm": 1.825129721190977, "learning_rate": 1.9707987542809585e-06, "loss": 0.5842, "step": 23360 }, { "epoch": 0.7159801397572637, "grad_norm": 2.008653689263666, "learning_rate": 1.970403905522755e-06, "loss": 0.5303, "step": 23361 }, { "epoch": 0.7160107882800049, "grad_norm": 0.8045440983810871, "learning_rate": 1.970009086615941e-06, "loss": 0.4291, "step": 23362 }, { "epoch": 0.7160414368027461, "grad_norm": 2.0962076099943734, "learning_rate": 1.9696142975644008e-06, "loss": 0.6254, "step": 23363 }, { "epoch": 0.7160720853254873, "grad_norm": 2.04235033121333, "learning_rate": 1.9692195383720275e-06, "loss": 0.5573, "step": 23364 }, { "epoch": 0.7161027338482285, "grad_norm": 1.8352753027121609, "learning_rate": 1.968824809042708e-06, "loss": 0.5183, "step": 23365 }, { "epoch": 0.7161333823709697, "grad_norm": 1.7593154332600092, "learning_rate": 1.968430109580333e-06, "loss": 0.6361, "step": 23366 }, { "epoch": 0.716164030893711, "grad_norm": 1.7450092664514025, "learning_rate": 1.968035439988794e-06, "loss": 0.6034, "step": 23367 }, { "epoch": 0.7161946794164521, "grad_norm": 1.8229805116632172, "learning_rate": 1.9676408002719753e-06, "loss": 0.6209, "step": 23368 }, { "epoch": 0.7162253279391934, "grad_norm": 1.8282879220841206, "learning_rate": 1.967246190433768e-06, "loss": 0.6198, "step": 23369 }, { "epoch": 0.7162559764619345, "grad_norm": 1.9555872135500743, "learning_rate": 1.966851610478062e-06, "loss": 0.6419, "step": 23370 }, { "epoch": 0.7162866249846758, "grad_norm": 1.9313966558767925, "learning_rate": 1.9664570604087428e-06, "loss": 0.5517, "step": 23371 }, { "epoch": 0.7163172735074169, "grad_norm": 1.7467264356729766, "learning_rate": 1.9660625402296948e-06, "loss": 0.5443, "step": 23372 }, { "epoch": 0.7163479220301582, "grad_norm": 1.9543238010255561, "learning_rate": 1.965668049944812e-06, "loss": 0.5791, "step": 23373 }, { "epoch": 0.7163785705528993, "grad_norm": 0.7894441604445079, "learning_rate": 1.9652735895579773e-06, "loss": 0.395, "step": 23374 }, { "epoch": 0.7164092190756406, "grad_norm": 1.906435824470127, "learning_rate": 1.96487915907308e-06, "loss": 0.6087, "step": 23375 }, { "epoch": 0.7164398675983817, "grad_norm": 1.9045584960697757, "learning_rate": 1.964484758494003e-06, "loss": 0.5785, "step": 23376 }, { "epoch": 0.716470516121123, "grad_norm": 1.9392067432652067, "learning_rate": 1.9640903878246344e-06, "loss": 0.6454, "step": 23377 }, { "epoch": 0.7165011646438642, "grad_norm": 1.9190598875286806, "learning_rate": 1.9636960470688613e-06, "loss": 0.5492, "step": 23378 }, { "epoch": 0.7165318131666054, "grad_norm": 1.7677878305413819, "learning_rate": 1.963301736230567e-06, "loss": 0.6287, "step": 23379 }, { "epoch": 0.7165624616893466, "grad_norm": 1.6357825870522058, "learning_rate": 1.9629074553136367e-06, "loss": 0.5597, "step": 23380 }, { "epoch": 0.7165931102120878, "grad_norm": 1.9631657649248864, "learning_rate": 1.9625132043219584e-06, "loss": 0.5592, "step": 23381 }, { "epoch": 0.716623758734829, "grad_norm": 1.8472228918897065, "learning_rate": 1.962118983259413e-06, "loss": 0.5945, "step": 23382 }, { "epoch": 0.7166544072575702, "grad_norm": 1.6940336183866413, "learning_rate": 1.9617247921298865e-06, "loss": 0.5105, "step": 23383 }, { "epoch": 0.7166850557803114, "grad_norm": 0.8095851034491839, "learning_rate": 1.961330630937265e-06, "loss": 0.4251, "step": 23384 }, { "epoch": 0.7167157043030526, "grad_norm": 1.90751798198411, "learning_rate": 1.9609364996854285e-06, "loss": 0.5857, "step": 23385 }, { "epoch": 0.7167463528257938, "grad_norm": 1.7304857852433921, "learning_rate": 1.9605423983782633e-06, "loss": 0.567, "step": 23386 }, { "epoch": 0.716777001348535, "grad_norm": 0.8076233905022849, "learning_rate": 1.960148327019651e-06, "loss": 0.3961, "step": 23387 }, { "epoch": 0.7168076498712762, "grad_norm": 1.8644908674794871, "learning_rate": 1.9597542856134737e-06, "loss": 0.5745, "step": 23388 }, { "epoch": 0.7168382983940174, "grad_norm": 1.7978567838370554, "learning_rate": 1.959360274163618e-06, "loss": 0.4767, "step": 23389 }, { "epoch": 0.7168689469167586, "grad_norm": 1.8336374626899308, "learning_rate": 1.9589662926739616e-06, "loss": 0.5864, "step": 23390 }, { "epoch": 0.7168995954394998, "grad_norm": 1.7170001984326235, "learning_rate": 1.958572341148388e-06, "loss": 0.509, "step": 23391 }, { "epoch": 0.716930243962241, "grad_norm": 1.9791816046046133, "learning_rate": 1.9581784195907817e-06, "loss": 0.5979, "step": 23392 }, { "epoch": 0.7169608924849822, "grad_norm": 2.014566128662328, "learning_rate": 1.9577845280050194e-06, "loss": 0.5954, "step": 23393 }, { "epoch": 0.7169915410077234, "grad_norm": 0.8083426915414897, "learning_rate": 1.9573906663949845e-06, "loss": 0.4049, "step": 23394 }, { "epoch": 0.7170221895304646, "grad_norm": 1.6639828213640702, "learning_rate": 1.9569968347645597e-06, "loss": 0.5986, "step": 23395 }, { "epoch": 0.7170528380532059, "grad_norm": 1.7108187972290727, "learning_rate": 1.9566030331176223e-06, "loss": 0.5239, "step": 23396 }, { "epoch": 0.717083486575947, "grad_norm": 0.8317850466332807, "learning_rate": 1.956209261458055e-06, "loss": 0.4204, "step": 23397 }, { "epoch": 0.7171141350986883, "grad_norm": 1.7488841731868194, "learning_rate": 1.9558155197897355e-06, "loss": 0.5785, "step": 23398 }, { "epoch": 0.7171447836214294, "grad_norm": 1.9615033133945208, "learning_rate": 1.9554218081165444e-06, "loss": 0.6447, "step": 23399 }, { "epoch": 0.7171754321441707, "grad_norm": 1.9353188284336642, "learning_rate": 1.9550281264423626e-06, "loss": 0.6169, "step": 23400 }, { "epoch": 0.7172060806669118, "grad_norm": 1.8874662315497817, "learning_rate": 1.9546344747710666e-06, "loss": 0.5899, "step": 23401 }, { "epoch": 0.7172367291896531, "grad_norm": 1.8342424727559135, "learning_rate": 1.954240853106536e-06, "loss": 0.5268, "step": 23402 }, { "epoch": 0.7172673777123942, "grad_norm": 2.005364833134572, "learning_rate": 1.953847261452651e-06, "loss": 0.5924, "step": 23403 }, { "epoch": 0.7172980262351355, "grad_norm": 2.0437668123591495, "learning_rate": 1.953453699813287e-06, "loss": 0.6823, "step": 23404 }, { "epoch": 0.7173286747578766, "grad_norm": 1.8720161110526314, "learning_rate": 1.953060168192325e-06, "loss": 0.5212, "step": 23405 }, { "epoch": 0.7173593232806179, "grad_norm": 1.8207635995186555, "learning_rate": 1.9526666665936388e-06, "loss": 0.639, "step": 23406 }, { "epoch": 0.7173899718033591, "grad_norm": 1.9928252348289341, "learning_rate": 1.952273195021108e-06, "loss": 0.6076, "step": 23407 }, { "epoch": 0.7174206203261003, "grad_norm": 0.7548936921613243, "learning_rate": 1.95187975347861e-06, "loss": 0.394, "step": 23408 }, { "epoch": 0.7174512688488415, "grad_norm": 2.1028389121759274, "learning_rate": 1.9514863419700198e-06, "loss": 0.5552, "step": 23409 }, { "epoch": 0.7174819173715827, "grad_norm": 1.668699232149452, "learning_rate": 1.9510929604992147e-06, "loss": 0.5287, "step": 23410 }, { "epoch": 0.7175125658943239, "grad_norm": 1.6912321613554033, "learning_rate": 1.950699609070072e-06, "loss": 0.6225, "step": 23411 }, { "epoch": 0.7175432144170651, "grad_norm": 1.6962714277633042, "learning_rate": 1.950306287686465e-06, "loss": 0.5792, "step": 23412 }, { "epoch": 0.7175738629398063, "grad_norm": 1.7260417333895797, "learning_rate": 1.94991299635227e-06, "loss": 0.5775, "step": 23413 }, { "epoch": 0.7176045114625476, "grad_norm": 1.820887249100399, "learning_rate": 1.9495197350713645e-06, "loss": 0.5751, "step": 23414 }, { "epoch": 0.7176351599852887, "grad_norm": 1.9269338114418173, "learning_rate": 1.9491265038476197e-06, "loss": 0.6372, "step": 23415 }, { "epoch": 0.71766580850803, "grad_norm": 1.758989996496664, "learning_rate": 1.948733302684914e-06, "loss": 0.5547, "step": 23416 }, { "epoch": 0.7176964570307711, "grad_norm": 0.7967137978112949, "learning_rate": 1.9483401315871163e-06, "loss": 0.4077, "step": 23417 }, { "epoch": 0.7177271055535123, "grad_norm": 1.6645114270155472, "learning_rate": 1.947946990558107e-06, "loss": 0.4536, "step": 23418 }, { "epoch": 0.7177577540762535, "grad_norm": 1.5535236490442292, "learning_rate": 1.947553879601758e-06, "loss": 0.5419, "step": 23419 }, { "epoch": 0.7177884025989947, "grad_norm": 1.6611156290963447, "learning_rate": 1.9471607987219394e-06, "loss": 0.5271, "step": 23420 }, { "epoch": 0.7178190511217359, "grad_norm": 0.8193492395100239, "learning_rate": 1.946767747922526e-06, "loss": 0.4181, "step": 23421 }, { "epoch": 0.7178496996444771, "grad_norm": 1.8099552838043929, "learning_rate": 1.9463747272073935e-06, "loss": 0.6744, "step": 23422 }, { "epoch": 0.7178803481672184, "grad_norm": 1.8954728138990922, "learning_rate": 1.94598173658041e-06, "loss": 0.6166, "step": 23423 }, { "epoch": 0.7179109966899595, "grad_norm": 1.837731965097584, "learning_rate": 1.9455887760454505e-06, "loss": 0.5553, "step": 23424 }, { "epoch": 0.7179416452127008, "grad_norm": 1.774026417738412, "learning_rate": 1.9451958456063876e-06, "loss": 0.5538, "step": 23425 }, { "epoch": 0.7179722937354419, "grad_norm": 1.8422071352660991, "learning_rate": 1.9448029452670902e-06, "loss": 0.5474, "step": 23426 }, { "epoch": 0.7180029422581832, "grad_norm": 1.7279428165957047, "learning_rate": 1.9444100750314327e-06, "loss": 0.5193, "step": 23427 }, { "epoch": 0.7180335907809243, "grad_norm": 0.7959201000736668, "learning_rate": 1.9440172349032806e-06, "loss": 0.4019, "step": 23428 }, { "epoch": 0.7180642393036656, "grad_norm": 2.049102765148475, "learning_rate": 1.9436244248865123e-06, "loss": 0.6385, "step": 23429 }, { "epoch": 0.7180948878264067, "grad_norm": 1.9210743927941902, "learning_rate": 1.9432316449849946e-06, "loss": 0.5656, "step": 23430 }, { "epoch": 0.718125536349148, "grad_norm": 1.8177082887058162, "learning_rate": 1.9428388952025963e-06, "loss": 0.6187, "step": 23431 }, { "epoch": 0.7181561848718891, "grad_norm": 0.7746649717451858, "learning_rate": 1.942446175543188e-06, "loss": 0.4112, "step": 23432 }, { "epoch": 0.7181868333946304, "grad_norm": 1.7713698669256102, "learning_rate": 1.9420534860106417e-06, "loss": 0.6275, "step": 23433 }, { "epoch": 0.7182174819173716, "grad_norm": 1.768407047130166, "learning_rate": 1.941660826608823e-06, "loss": 0.5365, "step": 23434 }, { "epoch": 0.7182481304401128, "grad_norm": 1.5464280915616382, "learning_rate": 1.941268197341603e-06, "loss": 0.4882, "step": 23435 }, { "epoch": 0.718278778962854, "grad_norm": 1.6429785898513278, "learning_rate": 1.9408755982128498e-06, "loss": 0.4846, "step": 23436 }, { "epoch": 0.7183094274855952, "grad_norm": 1.9568568161114293, "learning_rate": 1.9404830292264336e-06, "loss": 0.5862, "step": 23437 }, { "epoch": 0.7183400760083364, "grad_norm": 1.7664867451561543, "learning_rate": 1.9400904903862215e-06, "loss": 0.4869, "step": 23438 }, { "epoch": 0.7183707245310776, "grad_norm": 1.6537130597722702, "learning_rate": 1.939697981696077e-06, "loss": 0.5511, "step": 23439 }, { "epoch": 0.7184013730538188, "grad_norm": 1.811177193476457, "learning_rate": 1.9393055031598745e-06, "loss": 0.558, "step": 23440 }, { "epoch": 0.71843202157656, "grad_norm": 0.8142390190188472, "learning_rate": 1.9389130547814784e-06, "loss": 0.4172, "step": 23441 }, { "epoch": 0.7184626700993012, "grad_norm": 1.6647890288334868, "learning_rate": 1.938520636564753e-06, "loss": 0.4862, "step": 23442 }, { "epoch": 0.7184933186220425, "grad_norm": 1.7625217367662378, "learning_rate": 1.9381282485135676e-06, "loss": 0.5489, "step": 23443 }, { "epoch": 0.7185239671447836, "grad_norm": 1.9148894491867035, "learning_rate": 1.937735890631788e-06, "loss": 0.5776, "step": 23444 }, { "epoch": 0.7185546156675249, "grad_norm": 2.0241076512286704, "learning_rate": 1.937343562923281e-06, "loss": 0.5574, "step": 23445 }, { "epoch": 0.718585264190266, "grad_norm": 1.7505064714986507, "learning_rate": 1.936951265391911e-06, "loss": 0.5993, "step": 23446 }, { "epoch": 0.7186159127130073, "grad_norm": 1.9455724993644727, "learning_rate": 1.9365589980415427e-06, "loss": 0.6257, "step": 23447 }, { "epoch": 0.7186465612357484, "grad_norm": 2.1642114879911776, "learning_rate": 1.936166760876045e-06, "loss": 0.5766, "step": 23448 }, { "epoch": 0.7186772097584896, "grad_norm": 0.7801737990342374, "learning_rate": 1.93577455389928e-06, "loss": 0.4258, "step": 23449 }, { "epoch": 0.7187078582812308, "grad_norm": 1.846574553466598, "learning_rate": 1.93538237711511e-06, "loss": 0.6119, "step": 23450 }, { "epoch": 0.718738506803972, "grad_norm": 1.6980958176768337, "learning_rate": 1.934990230527402e-06, "loss": 0.5204, "step": 23451 }, { "epoch": 0.7187691553267133, "grad_norm": 0.7785230989923579, "learning_rate": 1.9345981141400215e-06, "loss": 0.3893, "step": 23452 }, { "epoch": 0.7187998038494544, "grad_norm": 0.7637886848960367, "learning_rate": 1.934206027956828e-06, "loss": 0.4204, "step": 23453 }, { "epoch": 0.7188304523721957, "grad_norm": 1.6999784523712138, "learning_rate": 1.9338139719816866e-06, "loss": 0.6216, "step": 23454 }, { "epoch": 0.7188611008949368, "grad_norm": 1.9018324810049438, "learning_rate": 1.933421946218461e-06, "loss": 0.6677, "step": 23455 }, { "epoch": 0.7188917494176781, "grad_norm": 1.782994969505866, "learning_rate": 1.933029950671016e-06, "loss": 0.6993, "step": 23456 }, { "epoch": 0.7189223979404192, "grad_norm": 1.8981609379861355, "learning_rate": 1.932637985343211e-06, "loss": 0.6001, "step": 23457 }, { "epoch": 0.7189530464631605, "grad_norm": 1.6593622434805566, "learning_rate": 1.932246050238905e-06, "loss": 0.5538, "step": 23458 }, { "epoch": 0.7189836949859016, "grad_norm": 1.719214028204955, "learning_rate": 1.9318541453619673e-06, "loss": 0.6346, "step": 23459 }, { "epoch": 0.7190143435086429, "grad_norm": 1.7789786096871132, "learning_rate": 1.9314622707162555e-06, "loss": 0.5294, "step": 23460 }, { "epoch": 0.7190449920313841, "grad_norm": 2.0912299474059095, "learning_rate": 1.9310704263056295e-06, "loss": 0.4993, "step": 23461 }, { "epoch": 0.7190756405541253, "grad_norm": 1.8080345972890657, "learning_rate": 1.9306786121339522e-06, "loss": 0.5978, "step": 23462 }, { "epoch": 0.7191062890768665, "grad_norm": 1.7550819139705285, "learning_rate": 1.9302868282050837e-06, "loss": 0.6164, "step": 23463 }, { "epoch": 0.7191369375996077, "grad_norm": 1.7033208542920772, "learning_rate": 1.9298950745228858e-06, "loss": 0.5806, "step": 23464 }, { "epoch": 0.7191675861223489, "grad_norm": 1.840714504296883, "learning_rate": 1.929503351091216e-06, "loss": 0.4298, "step": 23465 }, { "epoch": 0.7191982346450901, "grad_norm": 1.7145681235281884, "learning_rate": 1.9291116579139347e-06, "loss": 0.543, "step": 23466 }, { "epoch": 0.7192288831678313, "grad_norm": 1.9768924727185242, "learning_rate": 1.9287199949949043e-06, "loss": 0.6371, "step": 23467 }, { "epoch": 0.7192595316905726, "grad_norm": 1.6989029757243534, "learning_rate": 1.928328362337981e-06, "loss": 0.552, "step": 23468 }, { "epoch": 0.7192901802133137, "grad_norm": 1.7516344426949215, "learning_rate": 1.9279367599470216e-06, "loss": 0.6035, "step": 23469 }, { "epoch": 0.719320828736055, "grad_norm": 1.9067771626066488, "learning_rate": 1.9275451878258905e-06, "loss": 0.6156, "step": 23470 }, { "epoch": 0.7193514772587961, "grad_norm": 0.782437487920946, "learning_rate": 1.9271536459784406e-06, "loss": 0.414, "step": 23471 }, { "epoch": 0.7193821257815374, "grad_norm": 2.0691452063985296, "learning_rate": 1.926762134408534e-06, "loss": 0.6138, "step": 23472 }, { "epoch": 0.7194127743042785, "grad_norm": 1.8237918684483228, "learning_rate": 1.926370653120025e-06, "loss": 0.5902, "step": 23473 }, { "epoch": 0.7194434228270198, "grad_norm": 1.8043930745997294, "learning_rate": 1.9259792021167724e-06, "loss": 0.6237, "step": 23474 }, { "epoch": 0.7194740713497609, "grad_norm": 1.9472377686976934, "learning_rate": 1.925587781402635e-06, "loss": 0.7169, "step": 23475 }, { "epoch": 0.7195047198725022, "grad_norm": 0.7783201773360993, "learning_rate": 1.925196390981465e-06, "loss": 0.4187, "step": 23476 }, { "epoch": 0.7195353683952433, "grad_norm": 1.7164182999278974, "learning_rate": 1.9248050308571224e-06, "loss": 0.5592, "step": 23477 }, { "epoch": 0.7195660169179846, "grad_norm": 1.9424133523822475, "learning_rate": 1.924413701033465e-06, "loss": 0.5633, "step": 23478 }, { "epoch": 0.7195966654407258, "grad_norm": 1.574434911405477, "learning_rate": 1.9240224015143455e-06, "loss": 0.5208, "step": 23479 }, { "epoch": 0.7196273139634669, "grad_norm": 1.7577811640984833, "learning_rate": 1.9236311323036172e-06, "loss": 0.5465, "step": 23480 }, { "epoch": 0.7196579624862082, "grad_norm": 1.7128059423086235, "learning_rate": 1.923239893405142e-06, "loss": 0.6305, "step": 23481 }, { "epoch": 0.7196886110089493, "grad_norm": 1.8771043575176163, "learning_rate": 1.9228486848227697e-06, "loss": 0.5232, "step": 23482 }, { "epoch": 0.7197192595316906, "grad_norm": 0.7716247858539694, "learning_rate": 1.922457506560358e-06, "loss": 0.3948, "step": 23483 }, { "epoch": 0.7197499080544317, "grad_norm": 1.8693746939283689, "learning_rate": 1.9220663586217585e-06, "loss": 0.618, "step": 23484 }, { "epoch": 0.719780556577173, "grad_norm": 1.957344499495347, "learning_rate": 1.9216752410108264e-06, "loss": 0.6388, "step": 23485 }, { "epoch": 0.7198112050999141, "grad_norm": 1.9149641814046827, "learning_rate": 1.9212841537314173e-06, "loss": 0.5367, "step": 23486 }, { "epoch": 0.7198418536226554, "grad_norm": 1.7907778956453448, "learning_rate": 1.9208930967873824e-06, "loss": 0.5806, "step": 23487 }, { "epoch": 0.7198725021453966, "grad_norm": 1.7189727377000492, "learning_rate": 1.920502070182575e-06, "loss": 0.545, "step": 23488 }, { "epoch": 0.7199031506681378, "grad_norm": 0.7976917841914367, "learning_rate": 1.92011107392085e-06, "loss": 0.4075, "step": 23489 }, { "epoch": 0.719933799190879, "grad_norm": 1.874277581026874, "learning_rate": 1.9197201080060572e-06, "loss": 0.5649, "step": 23490 }, { "epoch": 0.7199644477136202, "grad_norm": 2.12850531272517, "learning_rate": 1.9193291724420503e-06, "loss": 0.6094, "step": 23491 }, { "epoch": 0.7199950962363614, "grad_norm": 1.8692890991139925, "learning_rate": 1.9189382672326833e-06, "loss": 0.6668, "step": 23492 }, { "epoch": 0.7200257447591026, "grad_norm": 1.7001155133630694, "learning_rate": 1.918547392381804e-06, "loss": 0.6426, "step": 23493 }, { "epoch": 0.7200563932818438, "grad_norm": 1.7608107067175254, "learning_rate": 1.9181565478932675e-06, "loss": 0.6486, "step": 23494 }, { "epoch": 0.720087041804585, "grad_norm": 1.8641728529625101, "learning_rate": 1.917765733770921e-06, "loss": 0.6547, "step": 23495 }, { "epoch": 0.7201176903273262, "grad_norm": 0.7995695284016725, "learning_rate": 1.9173749500186174e-06, "loss": 0.3929, "step": 23496 }, { "epoch": 0.7201483388500675, "grad_norm": 1.9326510755507207, "learning_rate": 1.9169841966402085e-06, "loss": 0.5711, "step": 23497 }, { "epoch": 0.7201789873728086, "grad_norm": 1.8898877300412193, "learning_rate": 1.916593473639542e-06, "loss": 0.6821, "step": 23498 }, { "epoch": 0.7202096358955499, "grad_norm": 0.783461447689035, "learning_rate": 1.916202781020469e-06, "loss": 0.4185, "step": 23499 }, { "epoch": 0.720240284418291, "grad_norm": 0.878058261926522, "learning_rate": 1.9158121187868407e-06, "loss": 0.4293, "step": 23500 }, { "epoch": 0.7202709329410323, "grad_norm": 0.824728111205083, "learning_rate": 1.9154214869425026e-06, "loss": 0.4185, "step": 23501 }, { "epoch": 0.7203015814637734, "grad_norm": 1.5941903909977735, "learning_rate": 1.915030885491308e-06, "loss": 0.6361, "step": 23502 }, { "epoch": 0.7203322299865147, "grad_norm": 1.8136737131748462, "learning_rate": 1.9146403144371014e-06, "loss": 0.6089, "step": 23503 }, { "epoch": 0.7203628785092558, "grad_norm": 1.7612845052126627, "learning_rate": 1.9142497737837327e-06, "loss": 0.6374, "step": 23504 }, { "epoch": 0.7203935270319971, "grad_norm": 1.828463557515693, "learning_rate": 1.9138592635350535e-06, "loss": 0.5936, "step": 23505 }, { "epoch": 0.7204241755547383, "grad_norm": 0.7961946202186473, "learning_rate": 1.913468783694906e-06, "loss": 0.4416, "step": 23506 }, { "epoch": 0.7204548240774795, "grad_norm": 1.9882601234151942, "learning_rate": 1.9130783342671406e-06, "loss": 0.5337, "step": 23507 }, { "epoch": 0.7204854726002207, "grad_norm": 1.821949522812847, "learning_rate": 1.9126879152556055e-06, "loss": 0.5428, "step": 23508 }, { "epoch": 0.7205161211229619, "grad_norm": 1.859919208613375, "learning_rate": 1.9122975266641446e-06, "loss": 0.6767, "step": 23509 }, { "epoch": 0.7205467696457031, "grad_norm": 1.717515989964583, "learning_rate": 1.9119071684966065e-06, "loss": 0.4766, "step": 23510 }, { "epoch": 0.7205774181684442, "grad_norm": 0.7232193944748272, "learning_rate": 1.911516840756839e-06, "loss": 0.3997, "step": 23511 }, { "epoch": 0.7206080666911855, "grad_norm": 1.8090896714082423, "learning_rate": 1.911126543448685e-06, "loss": 0.4875, "step": 23512 }, { "epoch": 0.7206387152139266, "grad_norm": 1.8168481983046005, "learning_rate": 1.910736276575992e-06, "loss": 0.5453, "step": 23513 }, { "epoch": 0.7206693637366679, "grad_norm": 2.1813514235341125, "learning_rate": 1.910346040142604e-06, "loss": 0.5471, "step": 23514 }, { "epoch": 0.720700012259409, "grad_norm": 1.8051290937400293, "learning_rate": 1.9099558341523664e-06, "loss": 0.6486, "step": 23515 }, { "epoch": 0.7207306607821503, "grad_norm": 0.7554758429662177, "learning_rate": 1.9095656586091273e-06, "loss": 0.3943, "step": 23516 }, { "epoch": 0.7207613093048915, "grad_norm": 0.8438305040648602, "learning_rate": 1.909175513516726e-06, "loss": 0.4053, "step": 23517 }, { "epoch": 0.7207919578276327, "grad_norm": 1.8308917009458268, "learning_rate": 1.908785398879009e-06, "loss": 0.6259, "step": 23518 }, { "epoch": 0.7208226063503739, "grad_norm": 0.7595632357197978, "learning_rate": 1.9083953146998224e-06, "loss": 0.3935, "step": 23519 }, { "epoch": 0.7208532548731151, "grad_norm": 1.9041394537337015, "learning_rate": 1.9080052609830065e-06, "loss": 0.6283, "step": 23520 }, { "epoch": 0.7208839033958563, "grad_norm": 1.8282586766184457, "learning_rate": 1.907615237732405e-06, "loss": 0.618, "step": 23521 }, { "epoch": 0.7209145519185975, "grad_norm": 1.7842010774663848, "learning_rate": 1.9072252449518647e-06, "loss": 0.5747, "step": 23522 }, { "epoch": 0.7209452004413387, "grad_norm": 1.7948999850020328, "learning_rate": 1.9068352826452225e-06, "loss": 0.5479, "step": 23523 }, { "epoch": 0.72097584896408, "grad_norm": 1.9273131745777479, "learning_rate": 1.9064453508163261e-06, "loss": 0.6224, "step": 23524 }, { "epoch": 0.7210064974868211, "grad_norm": 1.7159620116192786, "learning_rate": 1.906055449469013e-06, "loss": 0.5204, "step": 23525 }, { "epoch": 0.7210371460095624, "grad_norm": 2.0016004411747135, "learning_rate": 1.9056655786071277e-06, "loss": 0.6161, "step": 23526 }, { "epoch": 0.7210677945323035, "grad_norm": 0.7894134747739006, "learning_rate": 1.9052757382345128e-06, "loss": 0.4199, "step": 23527 }, { "epoch": 0.7210984430550448, "grad_norm": 1.973791816268171, "learning_rate": 1.9048859283550059e-06, "loss": 0.5535, "step": 23528 }, { "epoch": 0.7211290915777859, "grad_norm": 1.791492596047958, "learning_rate": 1.9044961489724495e-06, "loss": 0.5079, "step": 23529 }, { "epoch": 0.7211597401005272, "grad_norm": 1.8065109458338544, "learning_rate": 1.9041064000906868e-06, "loss": 0.6046, "step": 23530 }, { "epoch": 0.7211903886232683, "grad_norm": 1.5836912548364235, "learning_rate": 1.9037166817135538e-06, "loss": 0.5553, "step": 23531 }, { "epoch": 0.7212210371460096, "grad_norm": 0.758902490620417, "learning_rate": 1.9033269938448927e-06, "loss": 0.4022, "step": 23532 }, { "epoch": 0.7212516856687508, "grad_norm": 1.7773126008533982, "learning_rate": 1.902937336488545e-06, "loss": 0.6469, "step": 23533 }, { "epoch": 0.721282334191492, "grad_norm": 0.8079581752886921, "learning_rate": 1.9025477096483463e-06, "loss": 0.3927, "step": 23534 }, { "epoch": 0.7213129827142332, "grad_norm": 1.786550005679052, "learning_rate": 1.9021581133281397e-06, "loss": 0.7192, "step": 23535 }, { "epoch": 0.7213436312369744, "grad_norm": 2.043212642889224, "learning_rate": 1.9017685475317576e-06, "loss": 0.6915, "step": 23536 }, { "epoch": 0.7213742797597156, "grad_norm": 1.7853513097821987, "learning_rate": 1.9013790122630465e-06, "loss": 0.687, "step": 23537 }, { "epoch": 0.7214049282824568, "grad_norm": 0.7821446821635313, "learning_rate": 1.900989507525841e-06, "loss": 0.3881, "step": 23538 }, { "epoch": 0.721435576805198, "grad_norm": 1.7095165033819655, "learning_rate": 1.9006000333239766e-06, "loss": 0.5447, "step": 23539 }, { "epoch": 0.7214662253279392, "grad_norm": 1.904967821155121, "learning_rate": 1.9002105896612932e-06, "loss": 0.5457, "step": 23540 }, { "epoch": 0.7214968738506804, "grad_norm": 1.7166390238705587, "learning_rate": 1.8998211765416303e-06, "loss": 0.5309, "step": 23541 }, { "epoch": 0.7215275223734215, "grad_norm": 1.8342261076670938, "learning_rate": 1.89943179396882e-06, "loss": 0.6063, "step": 23542 }, { "epoch": 0.7215581708961628, "grad_norm": 1.9906424902021855, "learning_rate": 1.8990424419467019e-06, "loss": 0.5272, "step": 23543 }, { "epoch": 0.721588819418904, "grad_norm": 1.5847986073201632, "learning_rate": 1.8986531204791124e-06, "loss": 0.4668, "step": 23544 }, { "epoch": 0.7216194679416452, "grad_norm": 0.7738400490442767, "learning_rate": 1.8982638295698885e-06, "loss": 0.423, "step": 23545 }, { "epoch": 0.7216501164643864, "grad_norm": 2.154045061550098, "learning_rate": 1.897874569222865e-06, "loss": 0.5751, "step": 23546 }, { "epoch": 0.7216807649871276, "grad_norm": 1.9039891104651863, "learning_rate": 1.8974853394418752e-06, "loss": 0.6123, "step": 23547 }, { "epoch": 0.7217114135098688, "grad_norm": 1.8364136890412563, "learning_rate": 1.897096140230756e-06, "loss": 0.5486, "step": 23548 }, { "epoch": 0.72174206203261, "grad_norm": 1.7671502080330501, "learning_rate": 1.8967069715933444e-06, "loss": 0.5462, "step": 23549 }, { "epoch": 0.7217727105553512, "grad_norm": 1.8258923522492807, "learning_rate": 1.8963178335334708e-06, "loss": 0.6147, "step": 23550 }, { "epoch": 0.7218033590780925, "grad_norm": 0.769452391940307, "learning_rate": 1.8959287260549724e-06, "loss": 0.3845, "step": 23551 }, { "epoch": 0.7218340076008336, "grad_norm": 1.8142998215194899, "learning_rate": 1.895539649161684e-06, "loss": 0.5951, "step": 23552 }, { "epoch": 0.7218646561235749, "grad_norm": 1.5497786153930262, "learning_rate": 1.8951506028574356e-06, "loss": 0.4747, "step": 23553 }, { "epoch": 0.721895304646316, "grad_norm": 1.6301862071111497, "learning_rate": 1.894761587146065e-06, "loss": 0.6157, "step": 23554 }, { "epoch": 0.7219259531690573, "grad_norm": 1.6731374071486254, "learning_rate": 1.894372602031399e-06, "loss": 0.559, "step": 23555 }, { "epoch": 0.7219566016917984, "grad_norm": 1.9394501034187261, "learning_rate": 1.8939836475172784e-06, "loss": 0.6297, "step": 23556 }, { "epoch": 0.7219872502145397, "grad_norm": 1.9870121882256164, "learning_rate": 1.8935947236075314e-06, "loss": 0.694, "step": 23557 }, { "epoch": 0.7220178987372808, "grad_norm": 1.7433311633298354, "learning_rate": 1.8932058303059887e-06, "loss": 0.5789, "step": 23558 }, { "epoch": 0.7220485472600221, "grad_norm": 0.769341413695377, "learning_rate": 1.8928169676164836e-06, "loss": 0.4004, "step": 23559 }, { "epoch": 0.7220791957827633, "grad_norm": 0.789461747523172, "learning_rate": 1.8924281355428498e-06, "loss": 0.3966, "step": 23560 }, { "epoch": 0.7221098443055045, "grad_norm": 2.0845206704717234, "learning_rate": 1.8920393340889148e-06, "loss": 0.528, "step": 23561 }, { "epoch": 0.7221404928282457, "grad_norm": 1.7376859393044635, "learning_rate": 1.8916505632585119e-06, "loss": 0.6202, "step": 23562 }, { "epoch": 0.7221711413509869, "grad_norm": 0.8070436356383585, "learning_rate": 1.8912618230554708e-06, "loss": 0.3833, "step": 23563 }, { "epoch": 0.7222017898737281, "grad_norm": 1.8193366107294033, "learning_rate": 1.8908731134836244e-06, "loss": 0.6359, "step": 23564 }, { "epoch": 0.7222324383964693, "grad_norm": 1.7205131383001757, "learning_rate": 1.8904844345468004e-06, "loss": 0.5052, "step": 23565 }, { "epoch": 0.7222630869192105, "grad_norm": 1.908856116006507, "learning_rate": 1.8900957862488262e-06, "loss": 0.5911, "step": 23566 }, { "epoch": 0.7222937354419517, "grad_norm": 1.816678693021545, "learning_rate": 1.8897071685935364e-06, "loss": 0.6027, "step": 23567 }, { "epoch": 0.7223243839646929, "grad_norm": 1.99305475059761, "learning_rate": 1.8893185815847581e-06, "loss": 0.6252, "step": 23568 }, { "epoch": 0.7223550324874342, "grad_norm": 1.8662611468509147, "learning_rate": 1.888930025226318e-06, "loss": 0.6439, "step": 23569 }, { "epoch": 0.7223856810101753, "grad_norm": 2.20779106720283, "learning_rate": 1.8885414995220464e-06, "loss": 0.6479, "step": 23570 }, { "epoch": 0.7224163295329166, "grad_norm": 1.6965996852079672, "learning_rate": 1.8881530044757718e-06, "loss": 0.5826, "step": 23571 }, { "epoch": 0.7224469780556577, "grad_norm": 1.7782616025204068, "learning_rate": 1.8877645400913231e-06, "loss": 0.5534, "step": 23572 }, { "epoch": 0.7224776265783989, "grad_norm": 1.726411936104694, "learning_rate": 1.887376106372525e-06, "loss": 0.5634, "step": 23573 }, { "epoch": 0.7225082751011401, "grad_norm": 0.8175669420513361, "learning_rate": 1.8869877033232065e-06, "loss": 0.4072, "step": 23574 }, { "epoch": 0.7225389236238813, "grad_norm": 1.6004215273465234, "learning_rate": 1.8865993309471965e-06, "loss": 0.5822, "step": 23575 }, { "epoch": 0.7225695721466225, "grad_norm": 2.174462247108078, "learning_rate": 1.88621098924832e-06, "loss": 0.5576, "step": 23576 }, { "epoch": 0.7226002206693637, "grad_norm": 0.7816493579468372, "learning_rate": 1.8858226782303996e-06, "loss": 0.3926, "step": 23577 }, { "epoch": 0.722630869192105, "grad_norm": 1.7893022455516534, "learning_rate": 1.885434397897269e-06, "loss": 0.6623, "step": 23578 }, { "epoch": 0.7226615177148461, "grad_norm": 1.858636746964808, "learning_rate": 1.8850461482527498e-06, "loss": 0.6137, "step": 23579 }, { "epoch": 0.7226921662375874, "grad_norm": 2.014959102851492, "learning_rate": 1.884657929300666e-06, "loss": 0.6366, "step": 23580 }, { "epoch": 0.7227228147603285, "grad_norm": 1.7872554743155444, "learning_rate": 1.8842697410448457e-06, "loss": 0.5609, "step": 23581 }, { "epoch": 0.7227534632830698, "grad_norm": 1.7929278320379127, "learning_rate": 1.8838815834891116e-06, "loss": 0.5682, "step": 23582 }, { "epoch": 0.7227841118058109, "grad_norm": 2.1330583121348776, "learning_rate": 1.883493456637292e-06, "loss": 0.5443, "step": 23583 }, { "epoch": 0.7228147603285522, "grad_norm": 1.9067476673430586, "learning_rate": 1.883105360493207e-06, "loss": 0.5462, "step": 23584 }, { "epoch": 0.7228454088512933, "grad_norm": 1.7610874203953972, "learning_rate": 1.882717295060682e-06, "loss": 0.6061, "step": 23585 }, { "epoch": 0.7228760573740346, "grad_norm": 1.6435088783695408, "learning_rate": 1.8823292603435434e-06, "loss": 0.5941, "step": 23586 }, { "epoch": 0.7229067058967757, "grad_norm": 1.8692435048472746, "learning_rate": 1.8819412563456118e-06, "loss": 0.5967, "step": 23587 }, { "epoch": 0.722937354419517, "grad_norm": 1.6099213201950993, "learning_rate": 1.881553283070708e-06, "loss": 0.4693, "step": 23588 }, { "epoch": 0.7229680029422582, "grad_norm": 1.7143783418053415, "learning_rate": 1.88116534052266e-06, "loss": 0.6067, "step": 23589 }, { "epoch": 0.7229986514649994, "grad_norm": 2.1895654471685497, "learning_rate": 1.8807774287052866e-06, "loss": 0.6029, "step": 23590 }, { "epoch": 0.7230292999877406, "grad_norm": 1.9175254126925068, "learning_rate": 1.8803895476224133e-06, "loss": 0.6015, "step": 23591 }, { "epoch": 0.7230599485104818, "grad_norm": 1.8710855378860727, "learning_rate": 1.8800016972778578e-06, "loss": 0.5149, "step": 23592 }, { "epoch": 0.723090597033223, "grad_norm": 2.025386839100477, "learning_rate": 1.8796138776754442e-06, "loss": 0.5428, "step": 23593 }, { "epoch": 0.7231212455559642, "grad_norm": 2.2171967890484194, "learning_rate": 1.879226088818995e-06, "loss": 0.549, "step": 23594 }, { "epoch": 0.7231518940787054, "grad_norm": 1.6651590521868171, "learning_rate": 1.878838330712328e-06, "loss": 0.4874, "step": 23595 }, { "epoch": 0.7231825426014467, "grad_norm": 2.0159466194327935, "learning_rate": 1.8784506033592658e-06, "loss": 0.5331, "step": 23596 }, { "epoch": 0.7232131911241878, "grad_norm": 1.7141253532012406, "learning_rate": 1.8780629067636296e-06, "loss": 0.4797, "step": 23597 }, { "epoch": 0.7232438396469291, "grad_norm": 1.5561007941555325, "learning_rate": 1.877675240929237e-06, "loss": 0.578, "step": 23598 }, { "epoch": 0.7232744881696702, "grad_norm": 1.791866809724895, "learning_rate": 1.877287605859911e-06, "loss": 0.567, "step": 23599 }, { "epoch": 0.7233051366924115, "grad_norm": 1.8255657397526583, "learning_rate": 1.8769000015594675e-06, "loss": 0.4815, "step": 23600 }, { "epoch": 0.7233357852151526, "grad_norm": 1.9380429070512997, "learning_rate": 1.876512428031727e-06, "loss": 0.5863, "step": 23601 }, { "epoch": 0.7233664337378939, "grad_norm": 1.8531753076863224, "learning_rate": 1.8761248852805103e-06, "loss": 0.6219, "step": 23602 }, { "epoch": 0.723397082260635, "grad_norm": 1.6391216460595268, "learning_rate": 1.8757373733096334e-06, "loss": 0.5537, "step": 23603 }, { "epoch": 0.7234277307833762, "grad_norm": 1.895189498376453, "learning_rate": 1.8753498921229146e-06, "loss": 0.6285, "step": 23604 }, { "epoch": 0.7234583793061174, "grad_norm": 1.6519396448652868, "learning_rate": 1.8749624417241751e-06, "loss": 0.6174, "step": 23605 }, { "epoch": 0.7234890278288586, "grad_norm": 1.743440696646368, "learning_rate": 1.8745750221172305e-06, "loss": 0.5124, "step": 23606 }, { "epoch": 0.7235196763515999, "grad_norm": 2.0655312921981572, "learning_rate": 1.874187633305894e-06, "loss": 0.6476, "step": 23607 }, { "epoch": 0.723550324874341, "grad_norm": 1.7823332172603725, "learning_rate": 1.8738002752939899e-06, "loss": 0.586, "step": 23608 }, { "epoch": 0.7235809733970823, "grad_norm": 1.9169545895763345, "learning_rate": 1.8734129480853302e-06, "loss": 0.5519, "step": 23609 }, { "epoch": 0.7236116219198234, "grad_norm": 0.7916970001660412, "learning_rate": 1.8730256516837342e-06, "loss": 0.4087, "step": 23610 }, { "epoch": 0.7236422704425647, "grad_norm": 0.8076674187788606, "learning_rate": 1.8726383860930148e-06, "loss": 0.4173, "step": 23611 }, { "epoch": 0.7236729189653058, "grad_norm": 1.9410132593787743, "learning_rate": 1.8722511513169894e-06, "loss": 0.5877, "step": 23612 }, { "epoch": 0.7237035674880471, "grad_norm": 1.698650069915404, "learning_rate": 1.8718639473594757e-06, "loss": 0.5389, "step": 23613 }, { "epoch": 0.7237342160107882, "grad_norm": 1.9348031989071006, "learning_rate": 1.871476774224285e-06, "loss": 0.6353, "step": 23614 }, { "epoch": 0.7237648645335295, "grad_norm": 2.0085274441030347, "learning_rate": 1.8710896319152339e-06, "loss": 0.6625, "step": 23615 }, { "epoch": 0.7237955130562707, "grad_norm": 1.8596882353255513, "learning_rate": 1.8707025204361396e-06, "loss": 0.5919, "step": 23616 }, { "epoch": 0.7238261615790119, "grad_norm": 1.823003094406272, "learning_rate": 1.8703154397908119e-06, "loss": 0.4584, "step": 23617 }, { "epoch": 0.7238568101017531, "grad_norm": 1.6372637314401668, "learning_rate": 1.8699283899830667e-06, "loss": 0.5705, "step": 23618 }, { "epoch": 0.7238874586244943, "grad_norm": 1.9909673524770821, "learning_rate": 1.86954137101672e-06, "loss": 0.5902, "step": 23619 }, { "epoch": 0.7239181071472355, "grad_norm": 1.9892246608537425, "learning_rate": 1.869154382895581e-06, "loss": 0.6545, "step": 23620 }, { "epoch": 0.7239487556699767, "grad_norm": 1.883259720233067, "learning_rate": 1.8687674256234672e-06, "loss": 0.583, "step": 23621 }, { "epoch": 0.7239794041927179, "grad_norm": 1.9619113581729777, "learning_rate": 1.868380499204187e-06, "loss": 0.6328, "step": 23622 }, { "epoch": 0.7240100527154592, "grad_norm": 1.730612312478149, "learning_rate": 1.8679936036415552e-06, "loss": 0.5766, "step": 23623 }, { "epoch": 0.7240407012382003, "grad_norm": 1.9219009120968114, "learning_rate": 1.8676067389393854e-06, "loss": 0.5837, "step": 23624 }, { "epoch": 0.7240713497609416, "grad_norm": 0.7948307730618924, "learning_rate": 1.8672199051014862e-06, "loss": 0.4121, "step": 23625 }, { "epoch": 0.7241019982836827, "grad_norm": 1.821016852331546, "learning_rate": 1.866833102131671e-06, "loss": 0.5882, "step": 23626 }, { "epoch": 0.724132646806424, "grad_norm": 2.037493223906067, "learning_rate": 1.8664463300337521e-06, "loss": 0.6532, "step": 23627 }, { "epoch": 0.7241632953291651, "grad_norm": 1.8309628924205872, "learning_rate": 1.866059588811538e-06, "loss": 0.5732, "step": 23628 }, { "epoch": 0.7241939438519064, "grad_norm": 1.7931590935811126, "learning_rate": 1.8656728784688404e-06, "loss": 0.5506, "step": 23629 }, { "epoch": 0.7242245923746475, "grad_norm": 1.6972926383384812, "learning_rate": 1.8652861990094717e-06, "loss": 0.6424, "step": 23630 }, { "epoch": 0.7242552408973888, "grad_norm": 1.807269060650492, "learning_rate": 1.8648995504372386e-06, "loss": 0.6024, "step": 23631 }, { "epoch": 0.72428588942013, "grad_norm": 2.112082512860469, "learning_rate": 1.8645129327559536e-06, "loss": 0.6491, "step": 23632 }, { "epoch": 0.7243165379428712, "grad_norm": 1.708731834026289, "learning_rate": 1.8641263459694237e-06, "loss": 0.5725, "step": 23633 }, { "epoch": 0.7243471864656124, "grad_norm": 1.8717157134843863, "learning_rate": 1.8637397900814591e-06, "loss": 0.5537, "step": 23634 }, { "epoch": 0.7243778349883535, "grad_norm": 2.052151666190116, "learning_rate": 1.8633532650958702e-06, "loss": 0.5991, "step": 23635 }, { "epoch": 0.7244084835110948, "grad_norm": 1.7325647815142775, "learning_rate": 1.8629667710164628e-06, "loss": 0.6141, "step": 23636 }, { "epoch": 0.7244391320338359, "grad_norm": 2.3123830237432728, "learning_rate": 1.8625803078470467e-06, "loss": 0.5923, "step": 23637 }, { "epoch": 0.7244697805565772, "grad_norm": 1.6336184759119345, "learning_rate": 1.8621938755914309e-06, "loss": 0.5338, "step": 23638 }, { "epoch": 0.7245004290793183, "grad_norm": 1.812341813967547, "learning_rate": 1.8618074742534199e-06, "loss": 0.582, "step": 23639 }, { "epoch": 0.7245310776020596, "grad_norm": 1.8814005793216233, "learning_rate": 1.861421103836823e-06, "loss": 0.5712, "step": 23640 }, { "epoch": 0.7245617261248007, "grad_norm": 0.7868925864291164, "learning_rate": 1.8610347643454486e-06, "loss": 0.3995, "step": 23641 }, { "epoch": 0.724592374647542, "grad_norm": 1.9284383600008839, "learning_rate": 1.8606484557831e-06, "loss": 0.6487, "step": 23642 }, { "epoch": 0.7246230231702832, "grad_norm": 1.6051914708197514, "learning_rate": 1.8602621781535874e-06, "loss": 0.494, "step": 23643 }, { "epoch": 0.7246536716930244, "grad_norm": 1.741967422009554, "learning_rate": 1.8598759314607129e-06, "loss": 0.537, "step": 23644 }, { "epoch": 0.7246843202157656, "grad_norm": 1.8079545019589545, "learning_rate": 1.859489715708284e-06, "loss": 0.6507, "step": 23645 }, { "epoch": 0.7247149687385068, "grad_norm": 1.8282718971735163, "learning_rate": 1.8591035309001087e-06, "loss": 0.5189, "step": 23646 }, { "epoch": 0.724745617261248, "grad_norm": 1.7932263397522004, "learning_rate": 1.8587173770399886e-06, "loss": 0.5697, "step": 23647 }, { "epoch": 0.7247762657839892, "grad_norm": 2.0707977396423027, "learning_rate": 1.8583312541317288e-06, "loss": 0.6336, "step": 23648 }, { "epoch": 0.7248069143067304, "grad_norm": 0.7931147362794108, "learning_rate": 1.8579451621791377e-06, "loss": 0.4119, "step": 23649 }, { "epoch": 0.7248375628294716, "grad_norm": 0.7993938618776201, "learning_rate": 1.8575591011860146e-06, "loss": 0.4073, "step": 23650 }, { "epoch": 0.7248682113522128, "grad_norm": 1.762246807745324, "learning_rate": 1.8571730711561675e-06, "loss": 0.613, "step": 23651 }, { "epoch": 0.7248988598749541, "grad_norm": 0.7832727319857001, "learning_rate": 1.8567870720933967e-06, "loss": 0.3943, "step": 23652 }, { "epoch": 0.7249295083976952, "grad_norm": 1.7535005532221168, "learning_rate": 1.8564011040015074e-06, "loss": 0.5814, "step": 23653 }, { "epoch": 0.7249601569204365, "grad_norm": 1.6686425625681625, "learning_rate": 1.8560151668843035e-06, "loss": 0.5206, "step": 23654 }, { "epoch": 0.7249908054431776, "grad_norm": 0.8164883573193509, "learning_rate": 1.855629260745585e-06, "loss": 0.4126, "step": 23655 }, { "epoch": 0.7250214539659189, "grad_norm": 2.088653850077777, "learning_rate": 1.855243385589156e-06, "loss": 0.6939, "step": 23656 }, { "epoch": 0.72505210248866, "grad_norm": 1.9911288221847707, "learning_rate": 1.8548575414188202e-06, "loss": 0.6209, "step": 23657 }, { "epoch": 0.7250827510114013, "grad_norm": 1.7852247083604722, "learning_rate": 1.8544717282383755e-06, "loss": 0.5933, "step": 23658 }, { "epoch": 0.7251133995341424, "grad_norm": 0.8173694953636712, "learning_rate": 1.8540859460516265e-06, "loss": 0.4164, "step": 23659 }, { "epoch": 0.7251440480568837, "grad_norm": 1.9719303541805167, "learning_rate": 1.853700194862375e-06, "loss": 0.5628, "step": 23660 }, { "epoch": 0.7251746965796249, "grad_norm": 1.6003462254873972, "learning_rate": 1.8533144746744192e-06, "loss": 0.5413, "step": 23661 }, { "epoch": 0.7252053451023661, "grad_norm": 0.7825712943379513, "learning_rate": 1.8529287854915622e-06, "loss": 0.4033, "step": 23662 }, { "epoch": 0.7252359936251073, "grad_norm": 2.1960210005027143, "learning_rate": 1.8525431273175998e-06, "loss": 0.5784, "step": 23663 }, { "epoch": 0.7252666421478485, "grad_norm": 1.7444551011308385, "learning_rate": 1.8521575001563386e-06, "loss": 0.6398, "step": 23664 }, { "epoch": 0.7252972906705897, "grad_norm": 1.840040514813414, "learning_rate": 1.851771904011575e-06, "loss": 0.5668, "step": 23665 }, { "epoch": 0.7253279391933308, "grad_norm": 1.8201166492484842, "learning_rate": 1.8513863388871067e-06, "loss": 0.6428, "step": 23666 }, { "epoch": 0.7253585877160721, "grad_norm": 1.934954085868941, "learning_rate": 1.8510008047867345e-06, "loss": 0.6069, "step": 23667 }, { "epoch": 0.7253892362388132, "grad_norm": 1.6940865444321855, "learning_rate": 1.8506153017142587e-06, "loss": 0.5541, "step": 23668 }, { "epoch": 0.7254198847615545, "grad_norm": 0.8009417493664558, "learning_rate": 1.8502298296734744e-06, "loss": 0.4029, "step": 23669 }, { "epoch": 0.7254505332842957, "grad_norm": 1.624727992190881, "learning_rate": 1.8498443886681817e-06, "loss": 0.5127, "step": 23670 }, { "epoch": 0.7254811818070369, "grad_norm": 1.8705230498936096, "learning_rate": 1.8494589787021777e-06, "loss": 0.6332, "step": 23671 }, { "epoch": 0.7255118303297781, "grad_norm": 1.7526639434017057, "learning_rate": 1.8490735997792625e-06, "loss": 0.6043, "step": 23672 }, { "epoch": 0.7255424788525193, "grad_norm": 1.5582284174367746, "learning_rate": 1.8486882519032317e-06, "loss": 0.6058, "step": 23673 }, { "epoch": 0.7255731273752605, "grad_norm": 1.797496843706629, "learning_rate": 1.848302935077878e-06, "loss": 0.5982, "step": 23674 }, { "epoch": 0.7256037758980017, "grad_norm": 1.927694271302029, "learning_rate": 1.8479176493070055e-06, "loss": 0.5258, "step": 23675 }, { "epoch": 0.7256344244207429, "grad_norm": 0.8055443586712197, "learning_rate": 1.8475323945944067e-06, "loss": 0.3955, "step": 23676 }, { "epoch": 0.7256650729434841, "grad_norm": 2.053414580442222, "learning_rate": 1.847147170943876e-06, "loss": 0.5537, "step": 23677 }, { "epoch": 0.7256957214662253, "grad_norm": 1.6238446293304238, "learning_rate": 1.8467619783592112e-06, "loss": 0.5503, "step": 23678 }, { "epoch": 0.7257263699889666, "grad_norm": 1.8127300756791953, "learning_rate": 1.8463768168442091e-06, "loss": 0.5801, "step": 23679 }, { "epoch": 0.7257570185117077, "grad_norm": 1.8334390392233737, "learning_rate": 1.8459916864026611e-06, "loss": 0.5598, "step": 23680 }, { "epoch": 0.725787667034449, "grad_norm": 1.8628688516913747, "learning_rate": 1.8456065870383643e-06, "loss": 0.5955, "step": 23681 }, { "epoch": 0.7258183155571901, "grad_norm": 1.777147371939837, "learning_rate": 1.8452215187551132e-06, "loss": 0.5528, "step": 23682 }, { "epoch": 0.7258489640799314, "grad_norm": 1.777365639447432, "learning_rate": 1.8448364815567027e-06, "loss": 0.6461, "step": 23683 }, { "epoch": 0.7258796126026725, "grad_norm": 1.8215694165183798, "learning_rate": 1.844451475446926e-06, "loss": 0.5612, "step": 23684 }, { "epoch": 0.7259102611254138, "grad_norm": 1.832726734789226, "learning_rate": 1.8440665004295743e-06, "loss": 0.5975, "step": 23685 }, { "epoch": 0.7259409096481549, "grad_norm": 1.7374041779046592, "learning_rate": 1.8436815565084432e-06, "loss": 0.5522, "step": 23686 }, { "epoch": 0.7259715581708962, "grad_norm": 1.9243182998132673, "learning_rate": 1.8432966436873268e-06, "loss": 0.5793, "step": 23687 }, { "epoch": 0.7260022066936374, "grad_norm": 1.81278373972721, "learning_rate": 1.8429117619700149e-06, "loss": 0.5869, "step": 23688 }, { "epoch": 0.7260328552163786, "grad_norm": 1.7930993459990894, "learning_rate": 1.8425269113603005e-06, "loss": 0.5424, "step": 23689 }, { "epoch": 0.7260635037391198, "grad_norm": 2.120387730676026, "learning_rate": 1.842142091861977e-06, "loss": 0.5182, "step": 23690 }, { "epoch": 0.726094152261861, "grad_norm": 1.7031301097664127, "learning_rate": 1.8417573034788367e-06, "loss": 0.6101, "step": 23691 }, { "epoch": 0.7261248007846022, "grad_norm": 1.9813338655680002, "learning_rate": 1.841372546214668e-06, "loss": 0.5597, "step": 23692 }, { "epoch": 0.7261554493073434, "grad_norm": 1.6990132610041984, "learning_rate": 1.8409878200732644e-06, "loss": 0.6139, "step": 23693 }, { "epoch": 0.7261860978300846, "grad_norm": 0.8878027399291436, "learning_rate": 1.8406031250584171e-06, "loss": 0.3961, "step": 23694 }, { "epoch": 0.7262167463528258, "grad_norm": 2.0464813733657867, "learning_rate": 1.840218461173916e-06, "loss": 0.5515, "step": 23695 }, { "epoch": 0.726247394875567, "grad_norm": 1.991413298996768, "learning_rate": 1.839833828423549e-06, "loss": 0.6184, "step": 23696 }, { "epoch": 0.7262780433983081, "grad_norm": 1.9629058213318809, "learning_rate": 1.8394492268111081e-06, "loss": 0.5414, "step": 23697 }, { "epoch": 0.7263086919210494, "grad_norm": 1.590648154938428, "learning_rate": 1.8390646563403819e-06, "loss": 0.4786, "step": 23698 }, { "epoch": 0.7263393404437906, "grad_norm": 1.9829250480587828, "learning_rate": 1.838680117015163e-06, "loss": 0.5789, "step": 23699 }, { "epoch": 0.7263699889665318, "grad_norm": 1.9462454061747982, "learning_rate": 1.8382956088392356e-06, "loss": 0.6104, "step": 23700 }, { "epoch": 0.726400637489273, "grad_norm": 1.8702662538550066, "learning_rate": 1.8379111318163905e-06, "loss": 0.5022, "step": 23701 }, { "epoch": 0.7264312860120142, "grad_norm": 0.8083772187652074, "learning_rate": 1.8375266859504177e-06, "loss": 0.4177, "step": 23702 }, { "epoch": 0.7264619345347554, "grad_norm": 2.46004491569008, "learning_rate": 1.8371422712451037e-06, "loss": 0.5906, "step": 23703 }, { "epoch": 0.7264925830574966, "grad_norm": 1.8663293790080848, "learning_rate": 1.836757887704233e-06, "loss": 0.6114, "step": 23704 }, { "epoch": 0.7265232315802378, "grad_norm": 1.9367950530403173, "learning_rate": 1.8363735353315991e-06, "loss": 0.5907, "step": 23705 }, { "epoch": 0.726553880102979, "grad_norm": 1.7977216258346558, "learning_rate": 1.835989214130987e-06, "loss": 0.5111, "step": 23706 }, { "epoch": 0.7265845286257202, "grad_norm": 1.6629549971973105, "learning_rate": 1.8356049241061802e-06, "loss": 0.4909, "step": 23707 }, { "epoch": 0.7266151771484615, "grad_norm": 1.6981717800905178, "learning_rate": 1.8352206652609682e-06, "loss": 0.5507, "step": 23708 }, { "epoch": 0.7266458256712026, "grad_norm": 1.8661301489898685, "learning_rate": 1.8348364375991368e-06, "loss": 0.5522, "step": 23709 }, { "epoch": 0.7266764741939439, "grad_norm": 1.8511404126520008, "learning_rate": 1.834452241124473e-06, "loss": 0.5741, "step": 23710 }, { "epoch": 0.726707122716685, "grad_norm": 0.7833608833381842, "learning_rate": 1.8340680758407597e-06, "loss": 0.3929, "step": 23711 }, { "epoch": 0.7267377712394263, "grad_norm": 1.659913316793096, "learning_rate": 1.8336839417517837e-06, "loss": 0.5595, "step": 23712 }, { "epoch": 0.7267684197621674, "grad_norm": 1.8177841946900821, "learning_rate": 1.833299838861332e-06, "loss": 0.6221, "step": 23713 }, { "epoch": 0.7267990682849087, "grad_norm": 2.047608936872808, "learning_rate": 1.8329157671731873e-06, "loss": 0.6541, "step": 23714 }, { "epoch": 0.7268297168076499, "grad_norm": 1.884726393516129, "learning_rate": 1.8325317266911297e-06, "loss": 0.591, "step": 23715 }, { "epoch": 0.7268603653303911, "grad_norm": 0.7761702668237423, "learning_rate": 1.8321477174189518e-06, "loss": 0.399, "step": 23716 }, { "epoch": 0.7268910138531323, "grad_norm": 2.1239688074440464, "learning_rate": 1.8317637393604304e-06, "loss": 0.5486, "step": 23717 }, { "epoch": 0.7269216623758735, "grad_norm": 1.691856234125103, "learning_rate": 1.831379792519354e-06, "loss": 0.6114, "step": 23718 }, { "epoch": 0.7269523108986147, "grad_norm": 1.6669324116631852, "learning_rate": 1.8309958768995007e-06, "loss": 0.5773, "step": 23719 }, { "epoch": 0.7269829594213559, "grad_norm": 1.9575730749305265, "learning_rate": 1.830611992504656e-06, "loss": 0.6861, "step": 23720 }, { "epoch": 0.7270136079440971, "grad_norm": 1.6421773119356988, "learning_rate": 1.8302281393386046e-06, "loss": 0.6943, "step": 23721 }, { "epoch": 0.7270442564668383, "grad_norm": 1.9961307851699104, "learning_rate": 1.829844317405124e-06, "loss": 0.6499, "step": 23722 }, { "epoch": 0.7270749049895795, "grad_norm": 1.719638155734453, "learning_rate": 1.829460526707999e-06, "loss": 0.5283, "step": 23723 }, { "epoch": 0.7271055535123208, "grad_norm": 2.0401879294500564, "learning_rate": 1.8290767672510117e-06, "loss": 0.5909, "step": 23724 }, { "epoch": 0.7271362020350619, "grad_norm": 0.8251873691250361, "learning_rate": 1.8286930390379416e-06, "loss": 0.3957, "step": 23725 }, { "epoch": 0.7271668505578032, "grad_norm": 1.7284635854409847, "learning_rate": 1.8283093420725695e-06, "loss": 0.5699, "step": 23726 }, { "epoch": 0.7271974990805443, "grad_norm": 1.7124235154371021, "learning_rate": 1.827925676358679e-06, "loss": 0.7001, "step": 23727 }, { "epoch": 0.7272281476032855, "grad_norm": 1.7550776444823593, "learning_rate": 1.8275420419000466e-06, "loss": 0.5788, "step": 23728 }, { "epoch": 0.7272587961260267, "grad_norm": 1.8490519877437848, "learning_rate": 1.8271584387004559e-06, "loss": 0.6139, "step": 23729 }, { "epoch": 0.7272894446487679, "grad_norm": 1.8213708745627641, "learning_rate": 1.8267748667636831e-06, "loss": 0.4918, "step": 23730 }, { "epoch": 0.7273200931715091, "grad_norm": 2.0587085742756464, "learning_rate": 1.8263913260935102e-06, "loss": 0.6257, "step": 23731 }, { "epoch": 0.7273507416942503, "grad_norm": 1.995798736711881, "learning_rate": 1.8260078166937161e-06, "loss": 0.6335, "step": 23732 }, { "epoch": 0.7273813902169916, "grad_norm": 1.9232264896536388, "learning_rate": 1.8256243385680782e-06, "loss": 0.5547, "step": 23733 }, { "epoch": 0.7274120387397327, "grad_norm": 1.7639603109713948, "learning_rate": 1.8252408917203756e-06, "loss": 0.6711, "step": 23734 }, { "epoch": 0.727442687262474, "grad_norm": 1.6568102390797945, "learning_rate": 1.8248574761543885e-06, "loss": 0.4974, "step": 23735 }, { "epoch": 0.7274733357852151, "grad_norm": 1.8783213697255392, "learning_rate": 1.8244740918738917e-06, "loss": 0.5718, "step": 23736 }, { "epoch": 0.7275039843079564, "grad_norm": 1.8530967908217524, "learning_rate": 1.8240907388826656e-06, "loss": 0.5693, "step": 23737 }, { "epoch": 0.7275346328306975, "grad_norm": 1.885380377954822, "learning_rate": 1.8237074171844843e-06, "loss": 0.5919, "step": 23738 }, { "epoch": 0.7275652813534388, "grad_norm": 0.8567712902903787, "learning_rate": 1.8233241267831265e-06, "loss": 0.4084, "step": 23739 }, { "epoch": 0.7275959298761799, "grad_norm": 1.707558166745491, "learning_rate": 1.8229408676823707e-06, "loss": 0.5546, "step": 23740 }, { "epoch": 0.7276265783989212, "grad_norm": 0.7837737023220215, "learning_rate": 1.8225576398859896e-06, "loss": 0.4018, "step": 23741 }, { "epoch": 0.7276572269216623, "grad_norm": 1.8373402638860483, "learning_rate": 1.8221744433977612e-06, "loss": 0.5569, "step": 23742 }, { "epoch": 0.7276878754444036, "grad_norm": 1.9856191969846115, "learning_rate": 1.8217912782214625e-06, "loss": 0.6231, "step": 23743 }, { "epoch": 0.7277185239671448, "grad_norm": 1.7628960700099687, "learning_rate": 1.821408144360866e-06, "loss": 0.5434, "step": 23744 }, { "epoch": 0.727749172489886, "grad_norm": 1.61509431373873, "learning_rate": 1.8210250418197484e-06, "loss": 0.4947, "step": 23745 }, { "epoch": 0.7277798210126272, "grad_norm": 1.7427345797794458, "learning_rate": 1.8206419706018857e-06, "loss": 0.6228, "step": 23746 }, { "epoch": 0.7278104695353684, "grad_norm": 1.6304327922344528, "learning_rate": 1.8202589307110501e-06, "loss": 0.5466, "step": 23747 }, { "epoch": 0.7278411180581096, "grad_norm": 1.4112944783000525, "learning_rate": 1.8198759221510182e-06, "loss": 0.508, "step": 23748 }, { "epoch": 0.7278717665808508, "grad_norm": 1.777656660081527, "learning_rate": 1.8194929449255605e-06, "loss": 0.5766, "step": 23749 }, { "epoch": 0.727902415103592, "grad_norm": 1.7118158064753104, "learning_rate": 1.819109999038453e-06, "loss": 0.6463, "step": 23750 }, { "epoch": 0.7279330636263333, "grad_norm": 1.8562587551802279, "learning_rate": 1.8187270844934702e-06, "loss": 0.593, "step": 23751 }, { "epoch": 0.7279637121490744, "grad_norm": 0.7905912232099206, "learning_rate": 1.818344201294382e-06, "loss": 0.4021, "step": 23752 }, { "epoch": 0.7279943606718157, "grad_norm": 1.6248693599144586, "learning_rate": 1.8179613494449615e-06, "loss": 0.546, "step": 23753 }, { "epoch": 0.7280250091945568, "grad_norm": 1.6405780011831939, "learning_rate": 1.8175785289489844e-06, "loss": 0.5584, "step": 23754 }, { "epoch": 0.7280556577172981, "grad_norm": 0.7894914061740405, "learning_rate": 1.8171957398102186e-06, "loss": 0.4011, "step": 23755 }, { "epoch": 0.7280863062400392, "grad_norm": 1.8688671583914354, "learning_rate": 1.8168129820324375e-06, "loss": 0.6049, "step": 23756 }, { "epoch": 0.7281169547627805, "grad_norm": 1.4092334818959438, "learning_rate": 1.816430255619414e-06, "loss": 0.5266, "step": 23757 }, { "epoch": 0.7281476032855216, "grad_norm": 0.7839300456444426, "learning_rate": 1.8160475605749166e-06, "loss": 0.4105, "step": 23758 }, { "epoch": 0.7281782518082628, "grad_norm": 1.7062309054737061, "learning_rate": 1.8156648969027186e-06, "loss": 0.5302, "step": 23759 }, { "epoch": 0.728208900331004, "grad_norm": 1.8879637990344047, "learning_rate": 1.815282264606587e-06, "loss": 0.6241, "step": 23760 }, { "epoch": 0.7282395488537452, "grad_norm": 2.1046765325199313, "learning_rate": 1.814899663690295e-06, "loss": 0.698, "step": 23761 }, { "epoch": 0.7282701973764865, "grad_norm": 0.797813094607992, "learning_rate": 1.8145170941576124e-06, "loss": 0.408, "step": 23762 }, { "epoch": 0.7283008458992276, "grad_norm": 1.953827827739131, "learning_rate": 1.8141345560123065e-06, "loss": 0.6052, "step": 23763 }, { "epoch": 0.7283314944219689, "grad_norm": 1.8312720654898318, "learning_rate": 1.8137520492581478e-06, "loss": 0.7026, "step": 23764 }, { "epoch": 0.72836214294471, "grad_norm": 1.738468900790825, "learning_rate": 1.8133695738989077e-06, "loss": 0.6965, "step": 23765 }, { "epoch": 0.7283927914674513, "grad_norm": 1.8893276291245196, "learning_rate": 1.81298712993835e-06, "loss": 0.6329, "step": 23766 }, { "epoch": 0.7284234399901924, "grad_norm": 1.7416568363436828, "learning_rate": 1.8126047173802463e-06, "loss": 0.5697, "step": 23767 }, { "epoch": 0.7284540885129337, "grad_norm": 1.8824481566401323, "learning_rate": 1.8122223362283653e-06, "loss": 0.6004, "step": 23768 }, { "epoch": 0.7284847370356748, "grad_norm": 1.803499461618619, "learning_rate": 1.811839986486471e-06, "loss": 0.4917, "step": 23769 }, { "epoch": 0.7285153855584161, "grad_norm": 1.680302075562044, "learning_rate": 1.8114576681583351e-06, "loss": 0.5745, "step": 23770 }, { "epoch": 0.7285460340811573, "grad_norm": 1.8219266741265359, "learning_rate": 1.8110753812477195e-06, "loss": 0.575, "step": 23771 }, { "epoch": 0.7285766826038985, "grad_norm": 1.6863654545952722, "learning_rate": 1.8106931257583975e-06, "loss": 0.5889, "step": 23772 }, { "epoch": 0.7286073311266397, "grad_norm": 0.8283719788781881, "learning_rate": 1.8103109016941317e-06, "loss": 0.4023, "step": 23773 }, { "epoch": 0.7286379796493809, "grad_norm": 1.6349446265014338, "learning_rate": 1.8099287090586876e-06, "loss": 0.5225, "step": 23774 }, { "epoch": 0.7286686281721221, "grad_norm": 1.615926896130914, "learning_rate": 1.8095465478558317e-06, "loss": 0.5632, "step": 23775 }, { "epoch": 0.7286992766948633, "grad_norm": 0.7925078946746684, "learning_rate": 1.8091644180893313e-06, "loss": 0.4165, "step": 23776 }, { "epoch": 0.7287299252176045, "grad_norm": 1.7651059880382887, "learning_rate": 1.8087823197629495e-06, "loss": 0.5314, "step": 23777 }, { "epoch": 0.7287605737403458, "grad_norm": 1.7541025645307073, "learning_rate": 1.8084002528804518e-06, "loss": 0.508, "step": 23778 }, { "epoch": 0.7287912222630869, "grad_norm": 1.62310584319389, "learning_rate": 1.8080182174456024e-06, "loss": 0.5494, "step": 23779 }, { "epoch": 0.7288218707858282, "grad_norm": 1.8021184367559537, "learning_rate": 1.8076362134621683e-06, "loss": 0.5042, "step": 23780 }, { "epoch": 0.7288525193085693, "grad_norm": 1.7998237185338497, "learning_rate": 1.8072542409339117e-06, "loss": 0.591, "step": 23781 }, { "epoch": 0.7288831678313106, "grad_norm": 1.8080418495893762, "learning_rate": 1.8068722998645939e-06, "loss": 0.5872, "step": 23782 }, { "epoch": 0.7289138163540517, "grad_norm": 1.9278855401149362, "learning_rate": 1.80649039025798e-06, "loss": 0.6841, "step": 23783 }, { "epoch": 0.728944464876793, "grad_norm": 1.7388436620195689, "learning_rate": 1.8061085121178357e-06, "loss": 0.5463, "step": 23784 }, { "epoch": 0.7289751133995341, "grad_norm": 1.6696312419797377, "learning_rate": 1.8057266654479195e-06, "loss": 0.5672, "step": 23785 }, { "epoch": 0.7290057619222754, "grad_norm": 1.7996785218783526, "learning_rate": 1.8053448502519954e-06, "loss": 0.6141, "step": 23786 }, { "epoch": 0.7290364104450165, "grad_norm": 0.8468008916562215, "learning_rate": 1.804963066533828e-06, "loss": 0.4249, "step": 23787 }, { "epoch": 0.7290670589677578, "grad_norm": 1.764157388161216, "learning_rate": 1.8045813142971752e-06, "loss": 0.6003, "step": 23788 }, { "epoch": 0.729097707490499, "grad_norm": 1.8480699905925029, "learning_rate": 1.8041995935458023e-06, "loss": 0.5736, "step": 23789 }, { "epoch": 0.7291283560132401, "grad_norm": 2.1672603956672187, "learning_rate": 1.8038179042834648e-06, "loss": 0.6116, "step": 23790 }, { "epoch": 0.7291590045359814, "grad_norm": 2.0849853039387916, "learning_rate": 1.8034362465139304e-06, "loss": 0.6255, "step": 23791 }, { "epoch": 0.7291896530587225, "grad_norm": 1.9314191052056495, "learning_rate": 1.803054620240957e-06, "loss": 0.5229, "step": 23792 }, { "epoch": 0.7292203015814638, "grad_norm": 1.7842759365630572, "learning_rate": 1.8026730254683023e-06, "loss": 0.5642, "step": 23793 }, { "epoch": 0.7292509501042049, "grad_norm": 1.9073529200099781, "learning_rate": 1.802291462199729e-06, "loss": 0.622, "step": 23794 }, { "epoch": 0.7292815986269462, "grad_norm": 1.8071910677856036, "learning_rate": 1.801909930438997e-06, "loss": 0.554, "step": 23795 }, { "epoch": 0.7293122471496873, "grad_norm": 1.9364046495710492, "learning_rate": 1.8015284301898633e-06, "loss": 0.6473, "step": 23796 }, { "epoch": 0.7293428956724286, "grad_norm": 2.0052171303856414, "learning_rate": 1.8011469614560883e-06, "loss": 0.582, "step": 23797 }, { "epoch": 0.7293735441951698, "grad_norm": 1.8695102752151385, "learning_rate": 1.8007655242414313e-06, "loss": 0.5416, "step": 23798 }, { "epoch": 0.729404192717911, "grad_norm": 1.981807023645212, "learning_rate": 1.8003841185496513e-06, "loss": 0.564, "step": 23799 }, { "epoch": 0.7294348412406522, "grad_norm": 1.7899694967073805, "learning_rate": 1.8000027443845052e-06, "loss": 0.6276, "step": 23800 }, { "epoch": 0.7294654897633934, "grad_norm": 1.9744174204402782, "learning_rate": 1.7996214017497477e-06, "loss": 0.6386, "step": 23801 }, { "epoch": 0.7294961382861346, "grad_norm": 2.0244735661260775, "learning_rate": 1.7992400906491426e-06, "loss": 0.5874, "step": 23802 }, { "epoch": 0.7295267868088758, "grad_norm": 1.9244274319224235, "learning_rate": 1.7988588110864436e-06, "loss": 0.5931, "step": 23803 }, { "epoch": 0.729557435331617, "grad_norm": 1.7147003321526666, "learning_rate": 1.7984775630654067e-06, "loss": 0.5641, "step": 23804 }, { "epoch": 0.7295880838543582, "grad_norm": 1.8070246448667648, "learning_rate": 1.798096346589789e-06, "loss": 0.6718, "step": 23805 }, { "epoch": 0.7296187323770994, "grad_norm": 1.7083325176276196, "learning_rate": 1.7977151616633475e-06, "loss": 0.6485, "step": 23806 }, { "epoch": 0.7296493808998407, "grad_norm": 1.9564739593157445, "learning_rate": 1.7973340082898395e-06, "loss": 0.6055, "step": 23807 }, { "epoch": 0.7296800294225818, "grad_norm": 1.8272141537864455, "learning_rate": 1.7969528864730168e-06, "loss": 0.5752, "step": 23808 }, { "epoch": 0.7297106779453231, "grad_norm": 1.9213144903871913, "learning_rate": 1.7965717962166374e-06, "loss": 0.5975, "step": 23809 }, { "epoch": 0.7297413264680642, "grad_norm": 1.8017352118516068, "learning_rate": 1.7961907375244574e-06, "loss": 0.6015, "step": 23810 }, { "epoch": 0.7297719749908055, "grad_norm": 1.8971792875390205, "learning_rate": 1.7958097104002297e-06, "loss": 0.5927, "step": 23811 }, { "epoch": 0.7298026235135466, "grad_norm": 1.7553504814619265, "learning_rate": 1.795428714847705e-06, "loss": 0.6209, "step": 23812 }, { "epoch": 0.7298332720362879, "grad_norm": 1.6962626867223687, "learning_rate": 1.7950477508706448e-06, "loss": 0.5287, "step": 23813 }, { "epoch": 0.729863920559029, "grad_norm": 1.7618262727792353, "learning_rate": 1.7946668184727995e-06, "loss": 0.5579, "step": 23814 }, { "epoch": 0.7298945690817703, "grad_norm": 1.7129960209747996, "learning_rate": 1.7942859176579203e-06, "loss": 0.5687, "step": 23815 }, { "epoch": 0.7299252176045115, "grad_norm": 1.8100797456996047, "learning_rate": 1.7939050484297616e-06, "loss": 0.5918, "step": 23816 }, { "epoch": 0.7299558661272527, "grad_norm": 1.8133110150812166, "learning_rate": 1.7935242107920775e-06, "loss": 0.504, "step": 23817 }, { "epoch": 0.7299865146499939, "grad_norm": 1.8138094282366066, "learning_rate": 1.7931434047486208e-06, "loss": 0.6626, "step": 23818 }, { "epoch": 0.7300171631727351, "grad_norm": 1.7317006790915594, "learning_rate": 1.7927626303031414e-06, "loss": 0.5884, "step": 23819 }, { "epoch": 0.7300478116954763, "grad_norm": 1.8456848065689915, "learning_rate": 1.7923818874593924e-06, "loss": 0.5167, "step": 23820 }, { "epoch": 0.7300784602182174, "grad_norm": 3.6076548042700582, "learning_rate": 1.792001176221127e-06, "loss": 0.5392, "step": 23821 }, { "epoch": 0.7301091087409587, "grad_norm": 1.9595749536914717, "learning_rate": 1.7916204965920946e-06, "loss": 0.5506, "step": 23822 }, { "epoch": 0.7301397572636998, "grad_norm": 1.9619151919099156, "learning_rate": 1.791239848576043e-06, "loss": 0.6443, "step": 23823 }, { "epoch": 0.7301704057864411, "grad_norm": 1.963281292539214, "learning_rate": 1.7908592321767298e-06, "loss": 0.5632, "step": 23824 }, { "epoch": 0.7302010543091823, "grad_norm": 1.423731824158883, "learning_rate": 1.7904786473978996e-06, "loss": 0.4993, "step": 23825 }, { "epoch": 0.7302317028319235, "grad_norm": 1.6672462689508838, "learning_rate": 1.7900980942433067e-06, "loss": 0.5479, "step": 23826 }, { "epoch": 0.7302623513546647, "grad_norm": 1.9643513306760192, "learning_rate": 1.7897175727166966e-06, "loss": 0.5702, "step": 23827 }, { "epoch": 0.7302929998774059, "grad_norm": 1.753856356271015, "learning_rate": 1.7893370828218204e-06, "loss": 0.6209, "step": 23828 }, { "epoch": 0.7303236484001471, "grad_norm": 0.7800120868802147, "learning_rate": 1.7889566245624296e-06, "loss": 0.3917, "step": 23829 }, { "epoch": 0.7303542969228883, "grad_norm": 1.9483753418090246, "learning_rate": 1.788576197942269e-06, "loss": 0.5413, "step": 23830 }, { "epoch": 0.7303849454456295, "grad_norm": 0.8267175276459704, "learning_rate": 1.788195802965088e-06, "loss": 0.4071, "step": 23831 }, { "epoch": 0.7304155939683707, "grad_norm": 0.8229609254751508, "learning_rate": 1.787815439634638e-06, "loss": 0.4189, "step": 23832 }, { "epoch": 0.7304462424911119, "grad_norm": 1.8548058384890016, "learning_rate": 1.7874351079546642e-06, "loss": 0.6221, "step": 23833 }, { "epoch": 0.7304768910138532, "grad_norm": 1.84596737131439, "learning_rate": 1.7870548079289123e-06, "loss": 0.63, "step": 23834 }, { "epoch": 0.7305075395365943, "grad_norm": 2.66000451799753, "learning_rate": 1.7866745395611318e-06, "loss": 0.6136, "step": 23835 }, { "epoch": 0.7305381880593356, "grad_norm": 1.9481267129973876, "learning_rate": 1.7862943028550694e-06, "loss": 0.6728, "step": 23836 }, { "epoch": 0.7305688365820767, "grad_norm": 1.9454152706193344, "learning_rate": 1.785914097814473e-06, "loss": 0.528, "step": 23837 }, { "epoch": 0.730599485104818, "grad_norm": 1.838821335263542, "learning_rate": 1.7855339244430852e-06, "loss": 0.6247, "step": 23838 }, { "epoch": 0.7306301336275591, "grad_norm": 1.7923431192890655, "learning_rate": 1.7851537827446548e-06, "loss": 0.6247, "step": 23839 }, { "epoch": 0.7306607821503004, "grad_norm": 1.736034712566646, "learning_rate": 1.7847736727229276e-06, "loss": 0.6184, "step": 23840 }, { "epoch": 0.7306914306730415, "grad_norm": 1.5765000619039575, "learning_rate": 1.7843935943816488e-06, "loss": 0.5591, "step": 23841 }, { "epoch": 0.7307220791957828, "grad_norm": 1.7833385368097805, "learning_rate": 1.784013547724559e-06, "loss": 0.5208, "step": 23842 }, { "epoch": 0.730752727718524, "grad_norm": 0.822676123605763, "learning_rate": 1.7836335327554099e-06, "loss": 0.4064, "step": 23843 }, { "epoch": 0.7307833762412652, "grad_norm": 1.9194274780041607, "learning_rate": 1.7832535494779408e-06, "loss": 0.5395, "step": 23844 }, { "epoch": 0.7308140247640064, "grad_norm": 2.0588491676834737, "learning_rate": 1.7828735978958995e-06, "loss": 0.5473, "step": 23845 }, { "epoch": 0.7308446732867476, "grad_norm": 1.5857973892914101, "learning_rate": 1.782493678013026e-06, "loss": 0.5335, "step": 23846 }, { "epoch": 0.7308753218094888, "grad_norm": 1.7433562073695892, "learning_rate": 1.7821137898330654e-06, "loss": 0.5605, "step": 23847 }, { "epoch": 0.73090597033223, "grad_norm": 0.7962291151472806, "learning_rate": 1.7817339333597622e-06, "loss": 0.3978, "step": 23848 }, { "epoch": 0.7309366188549712, "grad_norm": 0.8527349644951404, "learning_rate": 1.7813541085968573e-06, "loss": 0.417, "step": 23849 }, { "epoch": 0.7309672673777124, "grad_norm": 1.8340263875829854, "learning_rate": 1.7809743155480929e-06, "loss": 0.6177, "step": 23850 }, { "epoch": 0.7309979159004536, "grad_norm": 1.8143092754886585, "learning_rate": 1.7805945542172143e-06, "loss": 0.6481, "step": 23851 }, { "epoch": 0.7310285644231947, "grad_norm": 1.632758870857113, "learning_rate": 1.7802148246079597e-06, "loss": 0.5699, "step": 23852 }, { "epoch": 0.731059212945936, "grad_norm": 1.7740028007405304, "learning_rate": 1.7798351267240722e-06, "loss": 0.5181, "step": 23853 }, { "epoch": 0.7310898614686772, "grad_norm": 1.8913410604172456, "learning_rate": 1.779455460569295e-06, "loss": 0.5989, "step": 23854 }, { "epoch": 0.7311205099914184, "grad_norm": 1.7741022317844741, "learning_rate": 1.7790758261473651e-06, "loss": 0.6335, "step": 23855 }, { "epoch": 0.7311511585141596, "grad_norm": 1.9791574961319802, "learning_rate": 1.778696223462027e-06, "loss": 0.5985, "step": 23856 }, { "epoch": 0.7311818070369008, "grad_norm": 1.7153411113734238, "learning_rate": 1.7783166525170175e-06, "loss": 0.5446, "step": 23857 }, { "epoch": 0.731212455559642, "grad_norm": 1.7660807041122104, "learning_rate": 1.7779371133160784e-06, "loss": 0.4973, "step": 23858 }, { "epoch": 0.7312431040823832, "grad_norm": 0.8059062766205362, "learning_rate": 1.7775576058629512e-06, "loss": 0.4128, "step": 23859 }, { "epoch": 0.7312737526051244, "grad_norm": 1.7871196141458865, "learning_rate": 1.7771781301613716e-06, "loss": 0.61, "step": 23860 }, { "epoch": 0.7313044011278657, "grad_norm": 2.0457232338334492, "learning_rate": 1.7767986862150805e-06, "loss": 0.5724, "step": 23861 }, { "epoch": 0.7313350496506068, "grad_norm": 1.869874476643524, "learning_rate": 1.776419274027818e-06, "loss": 0.5776, "step": 23862 }, { "epoch": 0.7313656981733481, "grad_norm": 0.8627962806129347, "learning_rate": 1.7760398936033195e-06, "loss": 0.4153, "step": 23863 }, { "epoch": 0.7313963466960892, "grad_norm": 1.6932720576459883, "learning_rate": 1.7756605449453252e-06, "loss": 0.6483, "step": 23864 }, { "epoch": 0.7314269952188305, "grad_norm": 1.7842339261919726, "learning_rate": 1.7752812280575737e-06, "loss": 0.5434, "step": 23865 }, { "epoch": 0.7314576437415716, "grad_norm": 1.9842373808765952, "learning_rate": 1.7749019429438003e-06, "loss": 0.5274, "step": 23866 }, { "epoch": 0.7314882922643129, "grad_norm": 2.0837312656855382, "learning_rate": 1.7745226896077444e-06, "loss": 0.5937, "step": 23867 }, { "epoch": 0.731518940787054, "grad_norm": 0.7822548459331304, "learning_rate": 1.7741434680531405e-06, "loss": 0.4159, "step": 23868 }, { "epoch": 0.7315495893097953, "grad_norm": 2.0835895808692473, "learning_rate": 1.773764278283726e-06, "loss": 0.5648, "step": 23869 }, { "epoch": 0.7315802378325365, "grad_norm": 1.921924211524516, "learning_rate": 1.7733851203032393e-06, "loss": 0.5908, "step": 23870 }, { "epoch": 0.7316108863552777, "grad_norm": 1.9154499408003975, "learning_rate": 1.7730059941154133e-06, "loss": 0.6333, "step": 23871 }, { "epoch": 0.7316415348780189, "grad_norm": 1.850786509766599, "learning_rate": 1.7726268997239843e-06, "loss": 0.6277, "step": 23872 }, { "epoch": 0.7316721834007601, "grad_norm": 1.9873510899272928, "learning_rate": 1.7722478371326902e-06, "loss": 0.561, "step": 23873 }, { "epoch": 0.7317028319235013, "grad_norm": 1.8368303894997853, "learning_rate": 1.7718688063452621e-06, "loss": 0.6309, "step": 23874 }, { "epoch": 0.7317334804462425, "grad_norm": 1.834014511548775, "learning_rate": 1.7714898073654368e-06, "loss": 0.5504, "step": 23875 }, { "epoch": 0.7317641289689837, "grad_norm": 1.636445011852327, "learning_rate": 1.7711108401969502e-06, "loss": 0.652, "step": 23876 }, { "epoch": 0.731794777491725, "grad_norm": 0.8125582678830822, "learning_rate": 1.770731904843533e-06, "loss": 0.4122, "step": 23877 }, { "epoch": 0.7318254260144661, "grad_norm": 2.0787282687078883, "learning_rate": 1.7703530013089221e-06, "loss": 0.6228, "step": 23878 }, { "epoch": 0.7318560745372074, "grad_norm": 1.661915450698347, "learning_rate": 1.7699741295968476e-06, "loss": 0.4833, "step": 23879 }, { "epoch": 0.7318867230599485, "grad_norm": 1.809959577680124, "learning_rate": 1.7695952897110447e-06, "loss": 0.5638, "step": 23880 }, { "epoch": 0.7319173715826898, "grad_norm": 2.0297936692388556, "learning_rate": 1.7692164816552476e-06, "loss": 0.6217, "step": 23881 }, { "epoch": 0.7319480201054309, "grad_norm": 1.9970056797040947, "learning_rate": 1.7688377054331858e-06, "loss": 0.5074, "step": 23882 }, { "epoch": 0.7319786686281721, "grad_norm": 1.8969536911514309, "learning_rate": 1.768458961048592e-06, "loss": 0.5357, "step": 23883 }, { "epoch": 0.7320093171509133, "grad_norm": 1.90534349798699, "learning_rate": 1.7680802485052011e-06, "loss": 0.6967, "step": 23884 }, { "epoch": 0.7320399656736545, "grad_norm": 0.7891779710067349, "learning_rate": 1.7677015678067405e-06, "loss": 0.4211, "step": 23885 }, { "epoch": 0.7320706141963957, "grad_norm": 1.7615975125769983, "learning_rate": 1.7673229189569451e-06, "loss": 0.5926, "step": 23886 }, { "epoch": 0.7321012627191369, "grad_norm": 1.9628145543826014, "learning_rate": 1.766944301959543e-06, "loss": 0.5821, "step": 23887 }, { "epoch": 0.7321319112418782, "grad_norm": 1.8256775497105198, "learning_rate": 1.7665657168182655e-06, "loss": 0.5555, "step": 23888 }, { "epoch": 0.7321625597646193, "grad_norm": 1.7423154073781888, "learning_rate": 1.7661871635368444e-06, "loss": 0.5808, "step": 23889 }, { "epoch": 0.7321932082873606, "grad_norm": 1.9206973500993323, "learning_rate": 1.7658086421190074e-06, "loss": 0.5713, "step": 23890 }, { "epoch": 0.7322238568101017, "grad_norm": 1.8619465312032317, "learning_rate": 1.7654301525684853e-06, "loss": 0.607, "step": 23891 }, { "epoch": 0.732254505332843, "grad_norm": 1.8351592719540613, "learning_rate": 1.7650516948890095e-06, "loss": 0.5142, "step": 23892 }, { "epoch": 0.7322851538555841, "grad_norm": 1.9852553928432808, "learning_rate": 1.764673269084305e-06, "loss": 0.6256, "step": 23893 }, { "epoch": 0.7323158023783254, "grad_norm": 0.8023265636437574, "learning_rate": 1.7642948751581029e-06, "loss": 0.4158, "step": 23894 }, { "epoch": 0.7323464509010665, "grad_norm": 1.7098548534433013, "learning_rate": 1.7639165131141329e-06, "loss": 0.6484, "step": 23895 }, { "epoch": 0.7323770994238078, "grad_norm": 1.8222362367564786, "learning_rate": 1.7635381829561193e-06, "loss": 0.6397, "step": 23896 }, { "epoch": 0.732407747946549, "grad_norm": 2.1539234205490505, "learning_rate": 1.763159884687794e-06, "loss": 0.5628, "step": 23897 }, { "epoch": 0.7324383964692902, "grad_norm": 0.7766743282990782, "learning_rate": 1.7627816183128793e-06, "loss": 0.3945, "step": 23898 }, { "epoch": 0.7324690449920314, "grad_norm": 1.8412375615732324, "learning_rate": 1.762403383835109e-06, "loss": 0.5203, "step": 23899 }, { "epoch": 0.7324996935147726, "grad_norm": 0.7395619024612202, "learning_rate": 1.7620251812582068e-06, "loss": 0.4135, "step": 23900 }, { "epoch": 0.7325303420375138, "grad_norm": 1.7704711702608684, "learning_rate": 1.7616470105858968e-06, "loss": 0.673, "step": 23901 }, { "epoch": 0.732560990560255, "grad_norm": 1.7703494693285735, "learning_rate": 1.7612688718219072e-06, "loss": 0.5922, "step": 23902 }, { "epoch": 0.7325916390829962, "grad_norm": 1.8043693815990902, "learning_rate": 1.7608907649699663e-06, "loss": 0.6666, "step": 23903 }, { "epoch": 0.7326222876057374, "grad_norm": 0.7772018258470565, "learning_rate": 1.7605126900337953e-06, "loss": 0.3963, "step": 23904 }, { "epoch": 0.7326529361284786, "grad_norm": 1.799666670690649, "learning_rate": 1.760134647017122e-06, "loss": 0.6156, "step": 23905 }, { "epoch": 0.7326835846512199, "grad_norm": 1.6140949310489305, "learning_rate": 1.7597566359236712e-06, "loss": 0.6272, "step": 23906 }, { "epoch": 0.732714233173961, "grad_norm": 2.031949488763994, "learning_rate": 1.7593786567571686e-06, "loss": 0.5495, "step": 23907 }, { "epoch": 0.7327448816967023, "grad_norm": 0.7814652379718872, "learning_rate": 1.7590007095213369e-06, "loss": 0.3952, "step": 23908 }, { "epoch": 0.7327755302194434, "grad_norm": 0.7455357408279671, "learning_rate": 1.7586227942198975e-06, "loss": 0.4063, "step": 23909 }, { "epoch": 0.7328061787421847, "grad_norm": 0.8200386539101083, "learning_rate": 1.7582449108565807e-06, "loss": 0.4391, "step": 23910 }, { "epoch": 0.7328368272649258, "grad_norm": 1.8111157405092233, "learning_rate": 1.757867059435106e-06, "loss": 0.5881, "step": 23911 }, { "epoch": 0.7328674757876671, "grad_norm": 1.6613911815178843, "learning_rate": 1.7574892399591947e-06, "loss": 0.5828, "step": 23912 }, { "epoch": 0.7328981243104082, "grad_norm": 1.865624225553855, "learning_rate": 1.7571114524325716e-06, "loss": 0.5484, "step": 23913 }, { "epoch": 0.7329287728331494, "grad_norm": 2.034839694305301, "learning_rate": 1.756733696858961e-06, "loss": 0.6257, "step": 23914 }, { "epoch": 0.7329594213558907, "grad_norm": 1.778162931911195, "learning_rate": 1.7563559732420815e-06, "loss": 0.5896, "step": 23915 }, { "epoch": 0.7329900698786318, "grad_norm": 1.609269725753223, "learning_rate": 1.7559782815856563e-06, "loss": 0.5184, "step": 23916 }, { "epoch": 0.7330207184013731, "grad_norm": 0.8052006401208394, "learning_rate": 1.7556006218934074e-06, "loss": 0.3935, "step": 23917 }, { "epoch": 0.7330513669241142, "grad_norm": 1.9692937024975452, "learning_rate": 1.7552229941690573e-06, "loss": 0.6815, "step": 23918 }, { "epoch": 0.7330820154468555, "grad_norm": 0.7529404241827129, "learning_rate": 1.754845398416325e-06, "loss": 0.3977, "step": 23919 }, { "epoch": 0.7331126639695966, "grad_norm": 1.8025274117917076, "learning_rate": 1.7544678346389283e-06, "loss": 0.5409, "step": 23920 }, { "epoch": 0.7331433124923379, "grad_norm": 0.8070072425350245, "learning_rate": 1.7540903028405936e-06, "loss": 0.4223, "step": 23921 }, { "epoch": 0.733173961015079, "grad_norm": 1.743227037485688, "learning_rate": 1.7537128030250372e-06, "loss": 0.5317, "step": 23922 }, { "epoch": 0.7332046095378203, "grad_norm": 1.7211801396517799, "learning_rate": 1.7533353351959782e-06, "loss": 0.5993, "step": 23923 }, { "epoch": 0.7332352580605614, "grad_norm": 1.7650799229366296, "learning_rate": 1.7529578993571367e-06, "loss": 0.621, "step": 23924 }, { "epoch": 0.7332659065833027, "grad_norm": 1.6647248836319541, "learning_rate": 1.7525804955122316e-06, "loss": 0.5598, "step": 23925 }, { "epoch": 0.7332965551060439, "grad_norm": 1.8863092412387978, "learning_rate": 1.7522031236649833e-06, "loss": 0.6417, "step": 23926 }, { "epoch": 0.7333272036287851, "grad_norm": 1.8164990906060665, "learning_rate": 1.7518257838191073e-06, "loss": 0.6085, "step": 23927 }, { "epoch": 0.7333578521515263, "grad_norm": 1.7403313993274645, "learning_rate": 1.7514484759783223e-06, "loss": 0.5547, "step": 23928 }, { "epoch": 0.7333885006742675, "grad_norm": 0.809469615093356, "learning_rate": 1.7510712001463493e-06, "loss": 0.4155, "step": 23929 }, { "epoch": 0.7334191491970087, "grad_norm": 0.791151092341208, "learning_rate": 1.7506939563269021e-06, "loss": 0.4038, "step": 23930 }, { "epoch": 0.7334497977197499, "grad_norm": 1.7083744765314646, "learning_rate": 1.7503167445236974e-06, "loss": 0.5543, "step": 23931 }, { "epoch": 0.7334804462424911, "grad_norm": 0.7917076892362213, "learning_rate": 1.7499395647404532e-06, "loss": 0.4138, "step": 23932 }, { "epoch": 0.7335110947652324, "grad_norm": 1.7033490760777705, "learning_rate": 1.7495624169808862e-06, "loss": 0.6094, "step": 23933 }, { "epoch": 0.7335417432879735, "grad_norm": 1.8838599001911147, "learning_rate": 1.7491853012487141e-06, "loss": 0.5649, "step": 23934 }, { "epoch": 0.7335723918107148, "grad_norm": 1.7391887167025943, "learning_rate": 1.7488082175476495e-06, "loss": 0.5846, "step": 23935 }, { "epoch": 0.7336030403334559, "grad_norm": 0.8094770966827673, "learning_rate": 1.748431165881409e-06, "loss": 0.4285, "step": 23936 }, { "epoch": 0.7336336888561972, "grad_norm": 0.7658928942830058, "learning_rate": 1.7480541462537098e-06, "loss": 0.4098, "step": 23937 }, { "epoch": 0.7336643373789383, "grad_norm": 1.6576275956952442, "learning_rate": 1.7476771586682655e-06, "loss": 0.5071, "step": 23938 }, { "epoch": 0.7336949859016796, "grad_norm": 0.8460876374011885, "learning_rate": 1.7473002031287867e-06, "loss": 0.4214, "step": 23939 }, { "epoch": 0.7337256344244207, "grad_norm": 1.9387084650443869, "learning_rate": 1.7469232796389945e-06, "loss": 0.5144, "step": 23940 }, { "epoch": 0.733756282947162, "grad_norm": 1.9511956528594474, "learning_rate": 1.7465463882025995e-06, "loss": 0.607, "step": 23941 }, { "epoch": 0.7337869314699031, "grad_norm": 0.7687569348190431, "learning_rate": 1.7461695288233138e-06, "loss": 0.393, "step": 23942 }, { "epoch": 0.7338175799926444, "grad_norm": 0.7477933897355844, "learning_rate": 1.7457927015048526e-06, "loss": 0.4007, "step": 23943 }, { "epoch": 0.7338482285153856, "grad_norm": 1.8028855895386808, "learning_rate": 1.7454159062509286e-06, "loss": 0.5216, "step": 23944 }, { "epoch": 0.7338788770381267, "grad_norm": 1.9150293005629442, "learning_rate": 1.7450391430652552e-06, "loss": 0.6422, "step": 23945 }, { "epoch": 0.733909525560868, "grad_norm": 1.940019492963828, "learning_rate": 1.7446624119515432e-06, "loss": 0.6769, "step": 23946 }, { "epoch": 0.7339401740836091, "grad_norm": 1.6531273006012215, "learning_rate": 1.744285712913505e-06, "loss": 0.4697, "step": 23947 }, { "epoch": 0.7339708226063504, "grad_norm": 1.7415879545603603, "learning_rate": 1.7439090459548541e-06, "loss": 0.508, "step": 23948 }, { "epoch": 0.7340014711290915, "grad_norm": 1.9199120257820312, "learning_rate": 1.7435324110793006e-06, "loss": 0.6186, "step": 23949 }, { "epoch": 0.7340321196518328, "grad_norm": 1.8769172573862363, "learning_rate": 1.7431558082905525e-06, "loss": 0.6181, "step": 23950 }, { "epoch": 0.7340627681745739, "grad_norm": 1.8805924942091008, "learning_rate": 1.7427792375923264e-06, "loss": 0.648, "step": 23951 }, { "epoch": 0.7340934166973152, "grad_norm": 2.083754076764416, "learning_rate": 1.7424026989883285e-06, "loss": 0.6045, "step": 23952 }, { "epoch": 0.7341240652200564, "grad_norm": 0.8301593613165619, "learning_rate": 1.7420261924822717e-06, "loss": 0.4103, "step": 23953 }, { "epoch": 0.7341547137427976, "grad_norm": 2.0047568906887974, "learning_rate": 1.741649718077863e-06, "loss": 0.6219, "step": 23954 }, { "epoch": 0.7341853622655388, "grad_norm": 1.9497046981738713, "learning_rate": 1.741273275778813e-06, "loss": 0.5544, "step": 23955 }, { "epoch": 0.73421601078828, "grad_norm": 1.7026753836319268, "learning_rate": 1.740896865588833e-06, "loss": 0.5145, "step": 23956 }, { "epoch": 0.7342466593110212, "grad_norm": 0.8500631274825664, "learning_rate": 1.7405204875116289e-06, "loss": 0.4333, "step": 23957 }, { "epoch": 0.7342773078337624, "grad_norm": 0.7669424744748414, "learning_rate": 1.7401441415509096e-06, "loss": 0.398, "step": 23958 }, { "epoch": 0.7343079563565036, "grad_norm": 1.7292074725778506, "learning_rate": 1.7397678277103863e-06, "loss": 0.6402, "step": 23959 }, { "epoch": 0.7343386048792448, "grad_norm": 1.8067726013465015, "learning_rate": 1.7393915459937631e-06, "loss": 0.5214, "step": 23960 }, { "epoch": 0.734369253401986, "grad_norm": 1.5171390853685869, "learning_rate": 1.7390152964047492e-06, "loss": 0.471, "step": 23961 }, { "epoch": 0.7343999019247273, "grad_norm": 1.8667934507353925, "learning_rate": 1.7386390789470536e-06, "loss": 0.5966, "step": 23962 }, { "epoch": 0.7344305504474684, "grad_norm": 1.8298625702497266, "learning_rate": 1.73826289362438e-06, "loss": 0.566, "step": 23963 }, { "epoch": 0.7344611989702097, "grad_norm": 2.1395365253693726, "learning_rate": 1.7378867404404382e-06, "loss": 0.5898, "step": 23964 }, { "epoch": 0.7344918474929508, "grad_norm": 1.9277274770116335, "learning_rate": 1.737510619398931e-06, "loss": 0.632, "step": 23965 }, { "epoch": 0.7345224960156921, "grad_norm": 1.5825640053847125, "learning_rate": 1.737134530503567e-06, "loss": 0.5649, "step": 23966 }, { "epoch": 0.7345531445384332, "grad_norm": 1.9068683324554845, "learning_rate": 1.7367584737580528e-06, "loss": 0.6237, "step": 23967 }, { "epoch": 0.7345837930611745, "grad_norm": 1.8658359522900565, "learning_rate": 1.7363824491660902e-06, "loss": 0.5999, "step": 23968 }, { "epoch": 0.7346144415839156, "grad_norm": 1.8036675306648564, "learning_rate": 1.7360064567313866e-06, "loss": 0.5782, "step": 23969 }, { "epoch": 0.7346450901066569, "grad_norm": 1.7846515401877265, "learning_rate": 1.7356304964576488e-06, "loss": 0.5099, "step": 23970 }, { "epoch": 0.7346757386293981, "grad_norm": 1.837843453904586, "learning_rate": 1.7352545683485766e-06, "loss": 0.5636, "step": 23971 }, { "epoch": 0.7347063871521393, "grad_norm": 1.8776161440116013, "learning_rate": 1.7348786724078765e-06, "loss": 0.6361, "step": 23972 }, { "epoch": 0.7347370356748805, "grad_norm": 1.9329097037864582, "learning_rate": 1.734502808639254e-06, "loss": 0.573, "step": 23973 }, { "epoch": 0.7347676841976217, "grad_norm": 1.8280324627972608, "learning_rate": 1.7341269770464091e-06, "loss": 0.6634, "step": 23974 }, { "epoch": 0.7347983327203629, "grad_norm": 0.7829820378463246, "learning_rate": 1.733751177633049e-06, "loss": 0.3972, "step": 23975 }, { "epoch": 0.734828981243104, "grad_norm": 0.8054572767135662, "learning_rate": 1.7333754104028721e-06, "loss": 0.3984, "step": 23976 }, { "epoch": 0.7348596297658453, "grad_norm": 1.7106397078313453, "learning_rate": 1.732999675359583e-06, "loss": 0.5567, "step": 23977 }, { "epoch": 0.7348902782885864, "grad_norm": 1.9147815326629523, "learning_rate": 1.7326239725068856e-06, "loss": 0.6107, "step": 23978 }, { "epoch": 0.7349209268113277, "grad_norm": 0.8143565772755553, "learning_rate": 1.7322483018484787e-06, "loss": 0.4, "step": 23979 }, { "epoch": 0.7349515753340689, "grad_norm": 1.7513444933778777, "learning_rate": 1.7318726633880655e-06, "loss": 0.6067, "step": 23980 }, { "epoch": 0.7349822238568101, "grad_norm": 1.962419015742906, "learning_rate": 1.7314970571293488e-06, "loss": 0.6268, "step": 23981 }, { "epoch": 0.7350128723795513, "grad_norm": 0.8132790575572494, "learning_rate": 1.7311214830760258e-06, "loss": 0.4038, "step": 23982 }, { "epoch": 0.7350435209022925, "grad_norm": 1.64074826658688, "learning_rate": 1.7307459412318013e-06, "loss": 0.5363, "step": 23983 }, { "epoch": 0.7350741694250337, "grad_norm": 1.65045123607994, "learning_rate": 1.7303704316003716e-06, "loss": 0.6531, "step": 23984 }, { "epoch": 0.7351048179477749, "grad_norm": 2.1095754893557883, "learning_rate": 1.7299949541854382e-06, "loss": 0.7221, "step": 23985 }, { "epoch": 0.7351354664705161, "grad_norm": 1.6177523716987217, "learning_rate": 1.7296195089907037e-06, "loss": 0.5066, "step": 23986 }, { "epoch": 0.7351661149932573, "grad_norm": 0.7758927132053962, "learning_rate": 1.7292440960198631e-06, "loss": 0.3919, "step": 23987 }, { "epoch": 0.7351967635159985, "grad_norm": 1.766678509223574, "learning_rate": 1.728868715276617e-06, "loss": 0.6735, "step": 23988 }, { "epoch": 0.7352274120387398, "grad_norm": 1.7819023919369854, "learning_rate": 1.728493366764666e-06, "loss": 0.5443, "step": 23989 }, { "epoch": 0.7352580605614809, "grad_norm": 2.084078246127482, "learning_rate": 1.7281180504877053e-06, "loss": 0.6131, "step": 23990 }, { "epoch": 0.7352887090842222, "grad_norm": 1.7820566516198633, "learning_rate": 1.7277427664494352e-06, "loss": 0.5879, "step": 23991 }, { "epoch": 0.7353193576069633, "grad_norm": 0.8434255611031117, "learning_rate": 1.7273675146535535e-06, "loss": 0.4169, "step": 23992 }, { "epoch": 0.7353500061297046, "grad_norm": 1.7598520488311216, "learning_rate": 1.726992295103756e-06, "loss": 0.4623, "step": 23993 }, { "epoch": 0.7353806546524457, "grad_norm": 0.7908934863581122, "learning_rate": 1.7266171078037424e-06, "loss": 0.4154, "step": 23994 }, { "epoch": 0.735411303175187, "grad_norm": 1.7378730387453423, "learning_rate": 1.7262419527572062e-06, "loss": 0.5243, "step": 23995 }, { "epoch": 0.7354419516979281, "grad_norm": 0.7856200985132971, "learning_rate": 1.725866829967846e-06, "loss": 0.4038, "step": 23996 }, { "epoch": 0.7354726002206694, "grad_norm": 0.7895627780334575, "learning_rate": 1.7254917394393588e-06, "loss": 0.409, "step": 23997 }, { "epoch": 0.7355032487434106, "grad_norm": 1.9526883377327158, "learning_rate": 1.7251166811754384e-06, "loss": 0.6767, "step": 23998 }, { "epoch": 0.7355338972661518, "grad_norm": 1.8115187235056223, "learning_rate": 1.7247416551797802e-06, "loss": 0.5661, "step": 23999 }, { "epoch": 0.735564545788893, "grad_norm": 1.9909806889018398, "learning_rate": 1.7243666614560828e-06, "loss": 0.6739, "step": 24000 }, { "epoch": 0.7355951943116342, "grad_norm": 1.747050840700644, "learning_rate": 1.723991700008037e-06, "loss": 0.5206, "step": 24001 }, { "epoch": 0.7356258428343754, "grad_norm": 2.0587624501288735, "learning_rate": 1.7236167708393393e-06, "loss": 0.6115, "step": 24002 }, { "epoch": 0.7356564913571166, "grad_norm": 0.7779830030830005, "learning_rate": 1.7232418739536854e-06, "loss": 0.3839, "step": 24003 }, { "epoch": 0.7356871398798578, "grad_norm": 0.8449442575238152, "learning_rate": 1.7228670093547661e-06, "loss": 0.3914, "step": 24004 }, { "epoch": 0.735717788402599, "grad_norm": 2.253668157519409, "learning_rate": 1.7224921770462782e-06, "loss": 0.5908, "step": 24005 }, { "epoch": 0.7357484369253402, "grad_norm": 1.5532669170727593, "learning_rate": 1.7221173770319105e-06, "loss": 0.4836, "step": 24006 }, { "epoch": 0.7357790854480813, "grad_norm": 0.8115460929199667, "learning_rate": 1.7217426093153623e-06, "loss": 0.3887, "step": 24007 }, { "epoch": 0.7358097339708226, "grad_norm": 1.6741575531656194, "learning_rate": 1.7213678739003225e-06, "loss": 0.5156, "step": 24008 }, { "epoch": 0.7358403824935638, "grad_norm": 1.6974744491344596, "learning_rate": 1.7209931707904826e-06, "loss": 0.5404, "step": 24009 }, { "epoch": 0.735871031016305, "grad_norm": 2.071684173737199, "learning_rate": 1.7206184999895354e-06, "loss": 0.5753, "step": 24010 }, { "epoch": 0.7359016795390462, "grad_norm": 1.7729865196724313, "learning_rate": 1.7202438615011757e-06, "loss": 0.5662, "step": 24011 }, { "epoch": 0.7359323280617874, "grad_norm": 1.9331741222717833, "learning_rate": 1.7198692553290903e-06, "loss": 0.5528, "step": 24012 }, { "epoch": 0.7359629765845286, "grad_norm": 1.9945744683101614, "learning_rate": 1.719494681476972e-06, "loss": 0.6243, "step": 24013 }, { "epoch": 0.7359936251072698, "grad_norm": 2.041746480630505, "learning_rate": 1.7191201399485141e-06, "loss": 0.5131, "step": 24014 }, { "epoch": 0.736024273630011, "grad_norm": 1.8335342762696252, "learning_rate": 1.7187456307474031e-06, "loss": 0.6535, "step": 24015 }, { "epoch": 0.7360549221527523, "grad_norm": 2.0973456077743404, "learning_rate": 1.7183711538773328e-06, "loss": 0.6076, "step": 24016 }, { "epoch": 0.7360855706754934, "grad_norm": 1.81550240222822, "learning_rate": 1.7179967093419876e-06, "loss": 0.555, "step": 24017 }, { "epoch": 0.7361162191982347, "grad_norm": 2.0527760828948063, "learning_rate": 1.717622297145064e-06, "loss": 0.5881, "step": 24018 }, { "epoch": 0.7361468677209758, "grad_norm": 1.7484130137949416, "learning_rate": 1.7172479172902474e-06, "loss": 0.5415, "step": 24019 }, { "epoch": 0.7361775162437171, "grad_norm": 1.9609722665930376, "learning_rate": 1.7168735697812254e-06, "loss": 0.6109, "step": 24020 }, { "epoch": 0.7362081647664582, "grad_norm": 1.6999722507034822, "learning_rate": 1.7164992546216886e-06, "loss": 0.6002, "step": 24021 }, { "epoch": 0.7362388132891995, "grad_norm": 2.1844235287647416, "learning_rate": 1.7161249718153266e-06, "loss": 0.5839, "step": 24022 }, { "epoch": 0.7362694618119406, "grad_norm": 2.0302098589960567, "learning_rate": 1.7157507213658232e-06, "loss": 0.5174, "step": 24023 }, { "epoch": 0.7363001103346819, "grad_norm": 1.7459611140059905, "learning_rate": 1.7153765032768683e-06, "loss": 0.5682, "step": 24024 }, { "epoch": 0.736330758857423, "grad_norm": 1.9457457788387653, "learning_rate": 1.7150023175521496e-06, "loss": 0.5913, "step": 24025 }, { "epoch": 0.7363614073801643, "grad_norm": 0.7749967061392755, "learning_rate": 1.714628164195355e-06, "loss": 0.4088, "step": 24026 }, { "epoch": 0.7363920559029055, "grad_norm": 1.7799738219695296, "learning_rate": 1.7142540432101695e-06, "loss": 0.5659, "step": 24027 }, { "epoch": 0.7364227044256467, "grad_norm": 2.0174702086677736, "learning_rate": 1.7138799546002776e-06, "loss": 0.5648, "step": 24028 }, { "epoch": 0.7364533529483879, "grad_norm": 1.8171373196240475, "learning_rate": 1.7135058983693682e-06, "loss": 0.4563, "step": 24029 }, { "epoch": 0.7364840014711291, "grad_norm": 1.6584289140236674, "learning_rate": 1.7131318745211272e-06, "loss": 0.5389, "step": 24030 }, { "epoch": 0.7365146499938703, "grad_norm": 1.7677529814753294, "learning_rate": 1.7127578830592374e-06, "loss": 0.5905, "step": 24031 }, { "epoch": 0.7365452985166115, "grad_norm": 2.0339682035653945, "learning_rate": 1.7123839239873845e-06, "loss": 0.6401, "step": 24032 }, { "epoch": 0.7365759470393527, "grad_norm": 1.7124312120238039, "learning_rate": 1.7120099973092551e-06, "loss": 0.5527, "step": 24033 }, { "epoch": 0.736606595562094, "grad_norm": 1.8418378475027826, "learning_rate": 1.7116361030285334e-06, "loss": 0.5915, "step": 24034 }, { "epoch": 0.7366372440848351, "grad_norm": 0.7913027730157475, "learning_rate": 1.7112622411489026e-06, "loss": 0.3941, "step": 24035 }, { "epoch": 0.7366678926075764, "grad_norm": 1.9471344433111468, "learning_rate": 1.7108884116740432e-06, "loss": 0.6063, "step": 24036 }, { "epoch": 0.7366985411303175, "grad_norm": 1.8172732173294197, "learning_rate": 1.7105146146076452e-06, "loss": 0.5016, "step": 24037 }, { "epoch": 0.7367291896530587, "grad_norm": 1.790618649611728, "learning_rate": 1.7101408499533883e-06, "loss": 0.487, "step": 24038 }, { "epoch": 0.7367598381757999, "grad_norm": 1.974958107456145, "learning_rate": 1.7097671177149538e-06, "loss": 0.5738, "step": 24039 }, { "epoch": 0.7367904866985411, "grad_norm": 1.892182381745854, "learning_rate": 1.7093934178960258e-06, "loss": 0.5948, "step": 24040 }, { "epoch": 0.7368211352212823, "grad_norm": 1.7973839637424187, "learning_rate": 1.7090197505002877e-06, "loss": 0.5705, "step": 24041 }, { "epoch": 0.7368517837440235, "grad_norm": 1.8135534400605315, "learning_rate": 1.7086461155314189e-06, "loss": 0.5614, "step": 24042 }, { "epoch": 0.7368824322667648, "grad_norm": 1.9153140471172598, "learning_rate": 1.7082725129931015e-06, "loss": 0.6053, "step": 24043 }, { "epoch": 0.7369130807895059, "grad_norm": 2.205531654078606, "learning_rate": 1.7078989428890176e-06, "loss": 0.5381, "step": 24044 }, { "epoch": 0.7369437293122472, "grad_norm": 1.7884332525940074, "learning_rate": 1.707525405222849e-06, "loss": 0.5215, "step": 24045 }, { "epoch": 0.7369743778349883, "grad_norm": 2.030383379158576, "learning_rate": 1.7071518999982756e-06, "loss": 0.5961, "step": 24046 }, { "epoch": 0.7370050263577296, "grad_norm": 1.952451595505761, "learning_rate": 1.706778427218973e-06, "loss": 0.5734, "step": 24047 }, { "epoch": 0.7370356748804707, "grad_norm": 1.7764004705716503, "learning_rate": 1.706404986888629e-06, "loss": 0.6025, "step": 24048 }, { "epoch": 0.737066323403212, "grad_norm": 1.8698515131449114, "learning_rate": 1.7060315790109195e-06, "loss": 0.4871, "step": 24049 }, { "epoch": 0.7370969719259531, "grad_norm": 1.7564034519536496, "learning_rate": 1.7056582035895213e-06, "loss": 0.5414, "step": 24050 }, { "epoch": 0.7371276204486944, "grad_norm": 1.7069360143454901, "learning_rate": 1.7052848606281164e-06, "loss": 0.5699, "step": 24051 }, { "epoch": 0.7371582689714355, "grad_norm": 2.064321752671944, "learning_rate": 1.7049115501303827e-06, "loss": 0.6963, "step": 24052 }, { "epoch": 0.7371889174941768, "grad_norm": 1.7924722103242556, "learning_rate": 1.7045382720999997e-06, "loss": 0.5234, "step": 24053 }, { "epoch": 0.737219566016918, "grad_norm": 1.853975944058435, "learning_rate": 1.7041650265406428e-06, "loss": 0.5774, "step": 24054 }, { "epoch": 0.7372502145396592, "grad_norm": 1.9331853236703607, "learning_rate": 1.7037918134559917e-06, "loss": 0.5034, "step": 24055 }, { "epoch": 0.7372808630624004, "grad_norm": 0.775436815961532, "learning_rate": 1.7034186328497243e-06, "loss": 0.3967, "step": 24056 }, { "epoch": 0.7373115115851416, "grad_norm": 1.7969621963997657, "learning_rate": 1.7030454847255168e-06, "loss": 0.4372, "step": 24057 }, { "epoch": 0.7373421601078828, "grad_norm": 0.8283680075863681, "learning_rate": 1.7026723690870422e-06, "loss": 0.3903, "step": 24058 }, { "epoch": 0.737372808630624, "grad_norm": 1.5691687239531171, "learning_rate": 1.7022992859379844e-06, "loss": 0.4403, "step": 24059 }, { "epoch": 0.7374034571533652, "grad_norm": 0.7721453342117082, "learning_rate": 1.7019262352820132e-06, "loss": 0.3965, "step": 24060 }, { "epoch": 0.7374341056761065, "grad_norm": 2.0069182830787557, "learning_rate": 1.7015532171228083e-06, "loss": 0.7018, "step": 24061 }, { "epoch": 0.7374647541988476, "grad_norm": 1.6931630841790974, "learning_rate": 1.7011802314640418e-06, "loss": 0.5478, "step": 24062 }, { "epoch": 0.7374954027215889, "grad_norm": 0.8030223630219183, "learning_rate": 1.7008072783093909e-06, "loss": 0.3726, "step": 24063 }, { "epoch": 0.73752605124433, "grad_norm": 1.941652870570404, "learning_rate": 1.7004343576625315e-06, "loss": 0.586, "step": 24064 }, { "epoch": 0.7375566997670713, "grad_norm": 1.8595896340846658, "learning_rate": 1.700061469527135e-06, "loss": 0.524, "step": 24065 }, { "epoch": 0.7375873482898124, "grad_norm": 1.8699534449343103, "learning_rate": 1.699688613906877e-06, "loss": 0.6213, "step": 24066 }, { "epoch": 0.7376179968125537, "grad_norm": 1.9340189881680847, "learning_rate": 1.6993157908054335e-06, "loss": 0.5126, "step": 24067 }, { "epoch": 0.7376486453352948, "grad_norm": 0.8072833266069257, "learning_rate": 1.6989430002264757e-06, "loss": 0.4232, "step": 24068 }, { "epoch": 0.737679293858036, "grad_norm": 1.6774475058257272, "learning_rate": 1.698570242173674e-06, "loss": 0.5493, "step": 24069 }, { "epoch": 0.7377099423807773, "grad_norm": 1.9161680577075912, "learning_rate": 1.6981975166507076e-06, "loss": 0.5127, "step": 24070 }, { "epoch": 0.7377405909035184, "grad_norm": 1.826611033862438, "learning_rate": 1.6978248236612443e-06, "loss": 0.6271, "step": 24071 }, { "epoch": 0.7377712394262597, "grad_norm": 0.7730443581295793, "learning_rate": 1.6974521632089597e-06, "loss": 0.3867, "step": 24072 }, { "epoch": 0.7378018879490008, "grad_norm": 1.7671696136984223, "learning_rate": 1.6970795352975216e-06, "loss": 0.6295, "step": 24073 }, { "epoch": 0.7378325364717421, "grad_norm": 1.978551704947063, "learning_rate": 1.6967069399306047e-06, "loss": 0.6516, "step": 24074 }, { "epoch": 0.7378631849944832, "grad_norm": 0.7662092219454187, "learning_rate": 1.6963343771118806e-06, "loss": 0.419, "step": 24075 }, { "epoch": 0.7378938335172245, "grad_norm": 1.7382233875415467, "learning_rate": 1.6959618468450179e-06, "loss": 0.5499, "step": 24076 }, { "epoch": 0.7379244820399656, "grad_norm": 1.8988648795304006, "learning_rate": 1.6955893491336884e-06, "loss": 0.5785, "step": 24077 }, { "epoch": 0.7379551305627069, "grad_norm": 1.7747995430997143, "learning_rate": 1.695216883981564e-06, "loss": 0.6275, "step": 24078 }, { "epoch": 0.737985779085448, "grad_norm": 1.7100988118112566, "learning_rate": 1.6948444513923118e-06, "loss": 0.5739, "step": 24079 }, { "epoch": 0.7380164276081893, "grad_norm": 1.8620807298716655, "learning_rate": 1.6944720513696045e-06, "loss": 0.5067, "step": 24080 }, { "epoch": 0.7380470761309305, "grad_norm": 1.1763667017258288, "learning_rate": 1.694099683917108e-06, "loss": 0.3846, "step": 24081 }, { "epoch": 0.7380777246536717, "grad_norm": 1.8457987459412943, "learning_rate": 1.6937273490384936e-06, "loss": 0.5957, "step": 24082 }, { "epoch": 0.7381083731764129, "grad_norm": 1.908813999059978, "learning_rate": 1.693355046737431e-06, "loss": 0.6566, "step": 24083 }, { "epoch": 0.7381390216991541, "grad_norm": 1.7195108906972316, "learning_rate": 1.6929827770175849e-06, "loss": 0.5041, "step": 24084 }, { "epoch": 0.7381696702218953, "grad_norm": 1.9377871759166183, "learning_rate": 1.6926105398826264e-06, "loss": 0.5524, "step": 24085 }, { "epoch": 0.7382003187446365, "grad_norm": 1.8402373558146645, "learning_rate": 1.6922383353362237e-06, "loss": 0.5621, "step": 24086 }, { "epoch": 0.7382309672673777, "grad_norm": 1.831718349498934, "learning_rate": 1.6918661633820415e-06, "loss": 0.5807, "step": 24087 }, { "epoch": 0.738261615790119, "grad_norm": 1.8658995005175902, "learning_rate": 1.6914940240237486e-06, "loss": 0.5153, "step": 24088 }, { "epoch": 0.7382922643128601, "grad_norm": 1.8626079953471335, "learning_rate": 1.6911219172650133e-06, "loss": 0.5704, "step": 24089 }, { "epoch": 0.7383229128356014, "grad_norm": 1.8084816188275739, "learning_rate": 1.690749843109498e-06, "loss": 0.586, "step": 24090 }, { "epoch": 0.7383535613583425, "grad_norm": 1.8296876382296925, "learning_rate": 1.690377801560874e-06, "loss": 0.6381, "step": 24091 }, { "epoch": 0.7383842098810838, "grad_norm": 1.5531470187579903, "learning_rate": 1.690005792622802e-06, "loss": 0.5967, "step": 24092 }, { "epoch": 0.7384148584038249, "grad_norm": 1.748830631636864, "learning_rate": 1.6896338162989494e-06, "loss": 0.5439, "step": 24093 }, { "epoch": 0.7384455069265662, "grad_norm": 1.6339649765912923, "learning_rate": 1.6892618725929843e-06, "loss": 0.6516, "step": 24094 }, { "epoch": 0.7384761554493073, "grad_norm": 1.7267064456680499, "learning_rate": 1.6888899615085668e-06, "loss": 0.5277, "step": 24095 }, { "epoch": 0.7385068039720486, "grad_norm": 0.7705694568587557, "learning_rate": 1.688518083049364e-06, "loss": 0.4397, "step": 24096 }, { "epoch": 0.7385374524947897, "grad_norm": 1.7131068191721142, "learning_rate": 1.6881462372190415e-06, "loss": 0.5601, "step": 24097 }, { "epoch": 0.738568101017531, "grad_norm": 1.8971957456471915, "learning_rate": 1.6877744240212596e-06, "loss": 0.5777, "step": 24098 }, { "epoch": 0.7385987495402722, "grad_norm": 2.4292370775707477, "learning_rate": 1.687402643459684e-06, "loss": 0.637, "step": 24099 }, { "epoch": 0.7386293980630133, "grad_norm": 1.6376083731974438, "learning_rate": 1.6870308955379795e-06, "loss": 0.5984, "step": 24100 }, { "epoch": 0.7386600465857546, "grad_norm": 1.7752869237108555, "learning_rate": 1.6866591802598054e-06, "loss": 0.6569, "step": 24101 }, { "epoch": 0.7386906951084957, "grad_norm": 1.9024997861619335, "learning_rate": 1.6862874976288274e-06, "loss": 0.5845, "step": 24102 }, { "epoch": 0.738721343631237, "grad_norm": 1.832857099028389, "learning_rate": 1.6859158476487053e-06, "loss": 0.6093, "step": 24103 }, { "epoch": 0.7387519921539781, "grad_norm": 1.6841789015952051, "learning_rate": 1.6855442303231023e-06, "loss": 0.6333, "step": 24104 }, { "epoch": 0.7387826406767194, "grad_norm": 1.6135827727813945, "learning_rate": 1.6851726456556816e-06, "loss": 0.5596, "step": 24105 }, { "epoch": 0.7388132891994605, "grad_norm": 0.7476955279542156, "learning_rate": 1.6848010936501014e-06, "loss": 0.3994, "step": 24106 }, { "epoch": 0.7388439377222018, "grad_norm": 1.76420088723063, "learning_rate": 1.6844295743100243e-06, "loss": 0.6109, "step": 24107 }, { "epoch": 0.738874586244943, "grad_norm": 1.7976132171673715, "learning_rate": 1.6840580876391126e-06, "loss": 0.5927, "step": 24108 }, { "epoch": 0.7389052347676842, "grad_norm": 1.9294232094042851, "learning_rate": 1.6836866336410229e-06, "loss": 0.5287, "step": 24109 }, { "epoch": 0.7389358832904254, "grad_norm": 1.98300269015024, "learning_rate": 1.683315212319418e-06, "loss": 0.6067, "step": 24110 }, { "epoch": 0.7389665318131666, "grad_norm": 1.8494067495101383, "learning_rate": 1.6829438236779582e-06, "loss": 0.5443, "step": 24111 }, { "epoch": 0.7389971803359078, "grad_norm": 0.832623443149836, "learning_rate": 1.6825724677202998e-06, "loss": 0.4114, "step": 24112 }, { "epoch": 0.739027828858649, "grad_norm": 1.9059858062827255, "learning_rate": 1.6822011444501058e-06, "loss": 0.6145, "step": 24113 }, { "epoch": 0.7390584773813902, "grad_norm": 1.7646214292898814, "learning_rate": 1.6818298538710287e-06, "loss": 0.5992, "step": 24114 }, { "epoch": 0.7390891259041314, "grad_norm": 1.6259842981493777, "learning_rate": 1.6814585959867353e-06, "loss": 0.5247, "step": 24115 }, { "epoch": 0.7391197744268726, "grad_norm": 1.7688743110206133, "learning_rate": 1.681087370800879e-06, "loss": 0.6135, "step": 24116 }, { "epoch": 0.7391504229496139, "grad_norm": 1.7314199601759754, "learning_rate": 1.680716178317116e-06, "loss": 0.6063, "step": 24117 }, { "epoch": 0.739181071472355, "grad_norm": 1.7613701068947547, "learning_rate": 1.6803450185391063e-06, "loss": 0.5136, "step": 24118 }, { "epoch": 0.7392117199950963, "grad_norm": 1.647230965055815, "learning_rate": 1.6799738914705078e-06, "loss": 0.5021, "step": 24119 }, { "epoch": 0.7392423685178374, "grad_norm": 1.8652583712323285, "learning_rate": 1.6796027971149748e-06, "loss": 0.6538, "step": 24120 }, { "epoch": 0.7392730170405787, "grad_norm": 1.667351138176356, "learning_rate": 1.6792317354761644e-06, "loss": 0.548, "step": 24121 }, { "epoch": 0.7393036655633198, "grad_norm": 1.7442880792053652, "learning_rate": 1.6788607065577355e-06, "loss": 0.5691, "step": 24122 }, { "epoch": 0.7393343140860611, "grad_norm": 1.797847613227407, "learning_rate": 1.6784897103633401e-06, "loss": 0.6725, "step": 24123 }, { "epoch": 0.7393649626088022, "grad_norm": 1.5774448046423182, "learning_rate": 1.678118746896637e-06, "loss": 0.4932, "step": 24124 }, { "epoch": 0.7393956111315435, "grad_norm": 1.6855004215805103, "learning_rate": 1.6777478161612781e-06, "loss": 0.617, "step": 24125 }, { "epoch": 0.7394262596542847, "grad_norm": 1.6431411529972224, "learning_rate": 1.6773769181609201e-06, "loss": 0.4547, "step": 24126 }, { "epoch": 0.7394569081770259, "grad_norm": 0.8247397482241303, "learning_rate": 1.6770060528992194e-06, "loss": 0.4156, "step": 24127 }, { "epoch": 0.7394875566997671, "grad_norm": 1.7850023007926634, "learning_rate": 1.676635220379826e-06, "loss": 0.5368, "step": 24128 }, { "epoch": 0.7395182052225083, "grad_norm": 1.7732554476151263, "learning_rate": 1.6762644206063967e-06, "loss": 0.6028, "step": 24129 }, { "epoch": 0.7395488537452495, "grad_norm": 1.637207421399197, "learning_rate": 1.6758936535825853e-06, "loss": 0.5027, "step": 24130 }, { "epoch": 0.7395795022679906, "grad_norm": 0.8253674810815347, "learning_rate": 1.6755229193120437e-06, "loss": 0.4258, "step": 24131 }, { "epoch": 0.7396101507907319, "grad_norm": 0.788935433581776, "learning_rate": 1.6751522177984264e-06, "loss": 0.4001, "step": 24132 }, { "epoch": 0.739640799313473, "grad_norm": 1.9573198018727667, "learning_rate": 1.6747815490453816e-06, "loss": 0.6156, "step": 24133 }, { "epoch": 0.7396714478362143, "grad_norm": 1.8655001018409787, "learning_rate": 1.6744109130565684e-06, "loss": 0.6395, "step": 24134 }, { "epoch": 0.7397020963589555, "grad_norm": 0.849143879977456, "learning_rate": 1.6740403098356357e-06, "loss": 0.3903, "step": 24135 }, { "epoch": 0.7397327448816967, "grad_norm": 1.8940380683949292, "learning_rate": 1.6736697393862328e-06, "loss": 0.5729, "step": 24136 }, { "epoch": 0.7397633934044379, "grad_norm": 2.016673908922913, "learning_rate": 1.673299201712013e-06, "loss": 0.5788, "step": 24137 }, { "epoch": 0.7397940419271791, "grad_norm": 1.9725412738233832, "learning_rate": 1.6729286968166291e-06, "loss": 0.5634, "step": 24138 }, { "epoch": 0.7398246904499203, "grad_norm": 1.5970140048230188, "learning_rate": 1.672558224703728e-06, "loss": 0.5918, "step": 24139 }, { "epoch": 0.7398553389726615, "grad_norm": 2.082901820620476, "learning_rate": 1.6721877853769624e-06, "loss": 0.6617, "step": 24140 }, { "epoch": 0.7398859874954027, "grad_norm": 1.6171484817693647, "learning_rate": 1.6718173788399822e-06, "loss": 0.4959, "step": 24141 }, { "epoch": 0.739916636018144, "grad_norm": 2.090888046654332, "learning_rate": 1.6714470050964387e-06, "loss": 0.6275, "step": 24142 }, { "epoch": 0.7399472845408851, "grad_norm": 1.761833263682472, "learning_rate": 1.6710766641499793e-06, "loss": 0.5591, "step": 24143 }, { "epoch": 0.7399779330636264, "grad_norm": 1.8758378617745404, "learning_rate": 1.6707063560042497e-06, "loss": 0.5279, "step": 24144 }, { "epoch": 0.7400085815863675, "grad_norm": 0.7894765759511093, "learning_rate": 1.6703360806629055e-06, "loss": 0.4071, "step": 24145 }, { "epoch": 0.7400392301091088, "grad_norm": 1.815398128242508, "learning_rate": 1.6699658381295919e-06, "loss": 0.5387, "step": 24146 }, { "epoch": 0.7400698786318499, "grad_norm": 2.1844226249953493, "learning_rate": 1.6695956284079557e-06, "loss": 0.6147, "step": 24147 }, { "epoch": 0.7401005271545912, "grad_norm": 0.8129615095365459, "learning_rate": 1.6692254515016455e-06, "loss": 0.4131, "step": 24148 }, { "epoch": 0.7401311756773323, "grad_norm": 1.7386662642334927, "learning_rate": 1.668855307414311e-06, "loss": 0.5021, "step": 24149 }, { "epoch": 0.7401618242000736, "grad_norm": 1.78752165874345, "learning_rate": 1.6684851961495956e-06, "loss": 0.5586, "step": 24150 }, { "epoch": 0.7401924727228147, "grad_norm": 1.8568503465927733, "learning_rate": 1.6681151177111482e-06, "loss": 0.5517, "step": 24151 }, { "epoch": 0.740223121245556, "grad_norm": 1.9031700833934044, "learning_rate": 1.667745072102615e-06, "loss": 0.5708, "step": 24152 }, { "epoch": 0.7402537697682972, "grad_norm": 1.9326690040003038, "learning_rate": 1.6673750593276433e-06, "loss": 0.5873, "step": 24153 }, { "epoch": 0.7402844182910384, "grad_norm": 1.7599279858888774, "learning_rate": 1.6670050793898785e-06, "loss": 0.5906, "step": 24154 }, { "epoch": 0.7403150668137796, "grad_norm": 1.8736572442021195, "learning_rate": 1.6666351322929618e-06, "loss": 0.5639, "step": 24155 }, { "epoch": 0.7403457153365208, "grad_norm": 1.811254105526503, "learning_rate": 1.6662652180405458e-06, "loss": 0.5279, "step": 24156 }, { "epoch": 0.740376363859262, "grad_norm": 1.8392648011322967, "learning_rate": 1.6658953366362713e-06, "loss": 0.6369, "step": 24157 }, { "epoch": 0.7404070123820032, "grad_norm": 1.728125527095768, "learning_rate": 1.6655254880837812e-06, "loss": 0.5986, "step": 24158 }, { "epoch": 0.7404376609047444, "grad_norm": 1.830146849712259, "learning_rate": 1.6651556723867219e-06, "loss": 0.5563, "step": 24159 }, { "epoch": 0.7404683094274856, "grad_norm": 2.0322158486860062, "learning_rate": 1.6647858895487368e-06, "loss": 0.5548, "step": 24160 }, { "epoch": 0.7404989579502268, "grad_norm": 1.8161469505059624, "learning_rate": 1.6644161395734715e-06, "loss": 0.6417, "step": 24161 }, { "epoch": 0.740529606472968, "grad_norm": 0.7531913264220548, "learning_rate": 1.6640464224645657e-06, "loss": 0.3997, "step": 24162 }, { "epoch": 0.7405602549957092, "grad_norm": 1.8980081403201592, "learning_rate": 1.6636767382256641e-06, "loss": 0.6282, "step": 24163 }, { "epoch": 0.7405909035184504, "grad_norm": 1.9298081432984497, "learning_rate": 1.6633070868604107e-06, "loss": 0.563, "step": 24164 }, { "epoch": 0.7406215520411916, "grad_norm": 0.849624859426378, "learning_rate": 1.6629374683724465e-06, "loss": 0.4132, "step": 24165 }, { "epoch": 0.7406522005639328, "grad_norm": 0.776519284136179, "learning_rate": 1.6625678827654102e-06, "loss": 0.3916, "step": 24166 }, { "epoch": 0.740682849086674, "grad_norm": 1.6652539929351595, "learning_rate": 1.6621983300429495e-06, "loss": 0.5366, "step": 24167 }, { "epoch": 0.7407134976094152, "grad_norm": 1.8829882706715957, "learning_rate": 1.6618288102087026e-06, "loss": 0.5649, "step": 24168 }, { "epoch": 0.7407441461321564, "grad_norm": 1.7735075319957023, "learning_rate": 1.6614593232663089e-06, "loss": 0.5164, "step": 24169 }, { "epoch": 0.7407747946548976, "grad_norm": 1.5919993477873893, "learning_rate": 1.6610898692194106e-06, "loss": 0.5047, "step": 24170 }, { "epoch": 0.7408054431776389, "grad_norm": 1.8482714773652458, "learning_rate": 1.6607204480716483e-06, "loss": 0.609, "step": 24171 }, { "epoch": 0.74083609170038, "grad_norm": 1.7860345546713032, "learning_rate": 1.6603510598266631e-06, "loss": 0.6342, "step": 24172 }, { "epoch": 0.7408667402231213, "grad_norm": 1.9409836918173038, "learning_rate": 1.6599817044880923e-06, "loss": 0.6567, "step": 24173 }, { "epoch": 0.7408973887458624, "grad_norm": 1.7290076323442716, "learning_rate": 1.659612382059576e-06, "loss": 0.5574, "step": 24174 }, { "epoch": 0.7409280372686037, "grad_norm": 1.9652687153377029, "learning_rate": 1.6592430925447557e-06, "loss": 0.6518, "step": 24175 }, { "epoch": 0.7409586857913448, "grad_norm": 1.8866220408075132, "learning_rate": 1.6588738359472672e-06, "loss": 0.6983, "step": 24176 }, { "epoch": 0.7409893343140861, "grad_norm": 2.0017912707071934, "learning_rate": 1.6585046122707489e-06, "loss": 0.6626, "step": 24177 }, { "epoch": 0.7410199828368272, "grad_norm": 0.8623277572024906, "learning_rate": 1.658135421518839e-06, "loss": 0.4213, "step": 24178 }, { "epoch": 0.7410506313595685, "grad_norm": 1.8427770407992852, "learning_rate": 1.6577662636951758e-06, "loss": 0.6615, "step": 24179 }, { "epoch": 0.7410812798823097, "grad_norm": 1.8423158420937706, "learning_rate": 1.6573971388033989e-06, "loss": 0.6999, "step": 24180 }, { "epoch": 0.7411119284050509, "grad_norm": 0.850092647730154, "learning_rate": 1.6570280468471412e-06, "loss": 0.4103, "step": 24181 }, { "epoch": 0.7411425769277921, "grad_norm": 1.720942781501406, "learning_rate": 1.6566589878300416e-06, "loss": 0.5811, "step": 24182 }, { "epoch": 0.7411732254505333, "grad_norm": 1.8663969134795149, "learning_rate": 1.6562899617557377e-06, "loss": 0.6886, "step": 24183 }, { "epoch": 0.7412038739732745, "grad_norm": 2.191049375239189, "learning_rate": 1.6559209686278648e-06, "loss": 0.6133, "step": 24184 }, { "epoch": 0.7412345224960157, "grad_norm": 1.8652217288801451, "learning_rate": 1.655552008450055e-06, "loss": 0.472, "step": 24185 }, { "epoch": 0.7412651710187569, "grad_norm": 0.814557886969701, "learning_rate": 1.6551830812259494e-06, "loss": 0.4112, "step": 24186 }, { "epoch": 0.7412958195414981, "grad_norm": 1.746553246550039, "learning_rate": 1.65481418695918e-06, "loss": 0.5385, "step": 24187 }, { "epoch": 0.7413264680642393, "grad_norm": 1.9104602542920937, "learning_rate": 1.6544453256533838e-06, "loss": 0.6537, "step": 24188 }, { "epoch": 0.7413571165869806, "grad_norm": 1.7084233085744944, "learning_rate": 1.654076497312192e-06, "loss": 0.5884, "step": 24189 }, { "epoch": 0.7413877651097217, "grad_norm": 1.9014348792551758, "learning_rate": 1.6537077019392406e-06, "loss": 0.5051, "step": 24190 }, { "epoch": 0.741418413632463, "grad_norm": 1.9388751178853816, "learning_rate": 1.653338939538165e-06, "loss": 0.6378, "step": 24191 }, { "epoch": 0.7414490621552041, "grad_norm": 0.7661092399573848, "learning_rate": 1.6529702101125955e-06, "loss": 0.4048, "step": 24192 }, { "epoch": 0.7414797106779453, "grad_norm": 1.7680805147287453, "learning_rate": 1.6526015136661666e-06, "loss": 0.6302, "step": 24193 }, { "epoch": 0.7415103592006865, "grad_norm": 1.8353069551274914, "learning_rate": 1.6522328502025137e-06, "loss": 0.6561, "step": 24194 }, { "epoch": 0.7415410077234277, "grad_norm": 1.7844305436521022, "learning_rate": 1.6518642197252666e-06, "loss": 0.6237, "step": 24195 }, { "epoch": 0.7415716562461689, "grad_norm": 0.8488845638172405, "learning_rate": 1.6514956222380552e-06, "loss": 0.4055, "step": 24196 }, { "epoch": 0.7416023047689101, "grad_norm": 1.8103731692631, "learning_rate": 1.6511270577445171e-06, "loss": 0.549, "step": 24197 }, { "epoch": 0.7416329532916514, "grad_norm": 1.8245091793462564, "learning_rate": 1.650758526248279e-06, "loss": 0.521, "step": 24198 }, { "epoch": 0.7416636018143925, "grad_norm": 1.7095690382304052, "learning_rate": 1.6503900277529761e-06, "loss": 0.4977, "step": 24199 }, { "epoch": 0.7416942503371338, "grad_norm": 1.909902947475136, "learning_rate": 1.6500215622622356e-06, "loss": 0.6123, "step": 24200 }, { "epoch": 0.7417248988598749, "grad_norm": 1.9255899530557103, "learning_rate": 1.6496531297796902e-06, "loss": 0.5994, "step": 24201 }, { "epoch": 0.7417555473826162, "grad_norm": 0.8060436833963406, "learning_rate": 1.649284730308971e-06, "loss": 0.3956, "step": 24202 }, { "epoch": 0.7417861959053573, "grad_norm": 1.7509613941438307, "learning_rate": 1.6489163638537048e-06, "loss": 0.6218, "step": 24203 }, { "epoch": 0.7418168444280986, "grad_norm": 1.7223848273886915, "learning_rate": 1.6485480304175232e-06, "loss": 0.4976, "step": 24204 }, { "epoch": 0.7418474929508397, "grad_norm": 1.8070522253721197, "learning_rate": 1.648179730004057e-06, "loss": 0.6155, "step": 24205 }, { "epoch": 0.741878141473581, "grad_norm": 0.7933377649684685, "learning_rate": 1.6478114626169322e-06, "loss": 0.4157, "step": 24206 }, { "epoch": 0.7419087899963221, "grad_norm": 0.8135516387951294, "learning_rate": 1.6474432282597784e-06, "loss": 0.4074, "step": 24207 }, { "epoch": 0.7419394385190634, "grad_norm": 1.808994212163182, "learning_rate": 1.6470750269362263e-06, "loss": 0.6506, "step": 24208 }, { "epoch": 0.7419700870418046, "grad_norm": 1.6566832720158342, "learning_rate": 1.6467068586498997e-06, "loss": 0.481, "step": 24209 }, { "epoch": 0.7420007355645458, "grad_norm": 1.8835746118013175, "learning_rate": 1.6463387234044303e-06, "loss": 0.5837, "step": 24210 }, { "epoch": 0.742031384087287, "grad_norm": 1.8479277194401922, "learning_rate": 1.6459706212034421e-06, "loss": 0.5806, "step": 24211 }, { "epoch": 0.7420620326100282, "grad_norm": 1.733586150309773, "learning_rate": 1.6456025520505631e-06, "loss": 0.5522, "step": 24212 }, { "epoch": 0.7420926811327694, "grad_norm": 2.0446646016276753, "learning_rate": 1.6452345159494222e-06, "loss": 0.6114, "step": 24213 }, { "epoch": 0.7421233296555106, "grad_norm": 0.8094135077733344, "learning_rate": 1.6448665129036423e-06, "loss": 0.3921, "step": 24214 }, { "epoch": 0.7421539781782518, "grad_norm": 1.8090437584845198, "learning_rate": 1.6444985429168514e-06, "loss": 0.6164, "step": 24215 }, { "epoch": 0.742184626700993, "grad_norm": 0.8124044411692609, "learning_rate": 1.6441306059926765e-06, "loss": 0.4332, "step": 24216 }, { "epoch": 0.7422152752237342, "grad_norm": 1.5935798987002117, "learning_rate": 1.643762702134739e-06, "loss": 0.617, "step": 24217 }, { "epoch": 0.7422459237464755, "grad_norm": 0.7874337301698, "learning_rate": 1.6433948313466675e-06, "loss": 0.4022, "step": 24218 }, { "epoch": 0.7422765722692166, "grad_norm": 1.7265561083928682, "learning_rate": 1.6430269936320864e-06, "loss": 0.5677, "step": 24219 }, { "epoch": 0.7423072207919579, "grad_norm": 1.9214615118424627, "learning_rate": 1.6426591889946176e-06, "loss": 0.6306, "step": 24220 }, { "epoch": 0.742337869314699, "grad_norm": 1.9095133549993666, "learning_rate": 1.642291417437889e-06, "loss": 0.6937, "step": 24221 }, { "epoch": 0.7423685178374403, "grad_norm": 1.7699191958023064, "learning_rate": 1.6419236789655202e-06, "loss": 0.5844, "step": 24222 }, { "epoch": 0.7423991663601814, "grad_norm": 2.0510711373536767, "learning_rate": 1.6415559735811365e-06, "loss": 0.6042, "step": 24223 }, { "epoch": 0.7424298148829226, "grad_norm": 1.9470676938955167, "learning_rate": 1.641188301288363e-06, "loss": 0.6174, "step": 24224 }, { "epoch": 0.7424604634056639, "grad_norm": 1.7192546872407772, "learning_rate": 1.6408206620908185e-06, "loss": 0.4799, "step": 24225 }, { "epoch": 0.742491111928405, "grad_norm": 1.657014250842389, "learning_rate": 1.6404530559921279e-06, "loss": 0.5462, "step": 24226 }, { "epoch": 0.7425217604511463, "grad_norm": 1.8831344641523768, "learning_rate": 1.640085482995914e-06, "loss": 0.607, "step": 24227 }, { "epoch": 0.7425524089738874, "grad_norm": 1.5866225750162803, "learning_rate": 1.6397179431057965e-06, "loss": 0.6203, "step": 24228 }, { "epoch": 0.7425830574966287, "grad_norm": 1.8364506110821164, "learning_rate": 1.6393504363253986e-06, "loss": 0.5789, "step": 24229 }, { "epoch": 0.7426137060193698, "grad_norm": 2.090955871066241, "learning_rate": 1.6389829626583404e-06, "loss": 0.5707, "step": 24230 }, { "epoch": 0.7426443545421111, "grad_norm": 1.9213924113127523, "learning_rate": 1.6386155221082422e-06, "loss": 0.6005, "step": 24231 }, { "epoch": 0.7426750030648522, "grad_norm": 1.701092093988014, "learning_rate": 1.6382481146787272e-06, "loss": 0.6813, "step": 24232 }, { "epoch": 0.7427056515875935, "grad_norm": 2.0467077871549257, "learning_rate": 1.6378807403734115e-06, "loss": 0.6894, "step": 24233 }, { "epoch": 0.7427363001103346, "grad_norm": 1.92579645798093, "learning_rate": 1.6375133991959174e-06, "loss": 0.6131, "step": 24234 }, { "epoch": 0.7427669486330759, "grad_norm": 1.7032873296121482, "learning_rate": 1.637146091149866e-06, "loss": 0.5614, "step": 24235 }, { "epoch": 0.7427975971558171, "grad_norm": 1.7497895655358036, "learning_rate": 1.6367788162388732e-06, "loss": 0.5153, "step": 24236 }, { "epoch": 0.7428282456785583, "grad_norm": 1.803728666953605, "learning_rate": 1.6364115744665588e-06, "loss": 0.6004, "step": 24237 }, { "epoch": 0.7428588942012995, "grad_norm": 1.8779210714489125, "learning_rate": 1.6360443658365433e-06, "loss": 0.5841, "step": 24238 }, { "epoch": 0.7428895427240407, "grad_norm": 0.7741960833215877, "learning_rate": 1.6356771903524416e-06, "loss": 0.3962, "step": 24239 }, { "epoch": 0.7429201912467819, "grad_norm": 0.7805286052390958, "learning_rate": 1.6353100480178756e-06, "loss": 0.3982, "step": 24240 }, { "epoch": 0.7429508397695231, "grad_norm": 1.6680989099778234, "learning_rate": 1.6349429388364568e-06, "loss": 0.5704, "step": 24241 }, { "epoch": 0.7429814882922643, "grad_norm": 1.8485027003918197, "learning_rate": 1.6345758628118096e-06, "loss": 0.5934, "step": 24242 }, { "epoch": 0.7430121368150056, "grad_norm": 1.6951427507123809, "learning_rate": 1.6342088199475475e-06, "loss": 0.6339, "step": 24243 }, { "epoch": 0.7430427853377467, "grad_norm": 1.3852841652959413, "learning_rate": 1.6338418102472857e-06, "loss": 0.5011, "step": 24244 }, { "epoch": 0.743073433860488, "grad_norm": 2.046969934082676, "learning_rate": 1.6334748337146417e-06, "loss": 0.5692, "step": 24245 }, { "epoch": 0.7431040823832291, "grad_norm": 1.9462280472359281, "learning_rate": 1.6331078903532332e-06, "loss": 0.6385, "step": 24246 }, { "epoch": 0.7431347309059704, "grad_norm": 0.8118441522760563, "learning_rate": 1.6327409801666722e-06, "loss": 0.3911, "step": 24247 }, { "epoch": 0.7431653794287115, "grad_norm": 1.7163216866516242, "learning_rate": 1.632374103158576e-06, "loss": 0.5693, "step": 24248 }, { "epoch": 0.7431960279514528, "grad_norm": 1.9359365996464906, "learning_rate": 1.6320072593325608e-06, "loss": 0.5652, "step": 24249 }, { "epoch": 0.7432266764741939, "grad_norm": 1.8867002399917792, "learning_rate": 1.631640448692239e-06, "loss": 0.6133, "step": 24250 }, { "epoch": 0.7432573249969352, "grad_norm": 1.9178933052486085, "learning_rate": 1.6312736712412264e-06, "loss": 0.616, "step": 24251 }, { "epoch": 0.7432879735196763, "grad_norm": 0.755938150159709, "learning_rate": 1.6309069269831334e-06, "loss": 0.3876, "step": 24252 }, { "epoch": 0.7433186220424176, "grad_norm": 1.8264055534548036, "learning_rate": 1.6305402159215799e-06, "loss": 0.6136, "step": 24253 }, { "epoch": 0.7433492705651588, "grad_norm": 1.6176531655592468, "learning_rate": 1.6301735380601751e-06, "loss": 0.6309, "step": 24254 }, { "epoch": 0.7433799190878999, "grad_norm": 1.6189965892544331, "learning_rate": 1.6298068934025318e-06, "loss": 0.4862, "step": 24255 }, { "epoch": 0.7434105676106412, "grad_norm": 1.6932536577991302, "learning_rate": 1.6294402819522632e-06, "loss": 0.5737, "step": 24256 }, { "epoch": 0.7434412161333823, "grad_norm": 1.9952910788130322, "learning_rate": 1.6290737037129834e-06, "loss": 0.6189, "step": 24257 }, { "epoch": 0.7434718646561236, "grad_norm": 1.860948961295285, "learning_rate": 1.6287071586883014e-06, "loss": 0.5657, "step": 24258 }, { "epoch": 0.7435025131788647, "grad_norm": 1.8100401486570614, "learning_rate": 1.6283406468818303e-06, "loss": 0.4752, "step": 24259 }, { "epoch": 0.743533161701606, "grad_norm": 1.971362835161339, "learning_rate": 1.627974168297181e-06, "loss": 0.5502, "step": 24260 }, { "epoch": 0.7435638102243471, "grad_norm": 1.9102748785623431, "learning_rate": 1.6276077229379672e-06, "loss": 0.5341, "step": 24261 }, { "epoch": 0.7435944587470884, "grad_norm": 1.9119228089750515, "learning_rate": 1.6272413108077973e-06, "loss": 0.5916, "step": 24262 }, { "epoch": 0.7436251072698296, "grad_norm": 2.0020253332496427, "learning_rate": 1.6268749319102784e-06, "loss": 0.6902, "step": 24263 }, { "epoch": 0.7436557557925708, "grad_norm": 0.7740583559059347, "learning_rate": 1.626508586249027e-06, "loss": 0.4198, "step": 24264 }, { "epoch": 0.743686404315312, "grad_norm": 1.8429869607187526, "learning_rate": 1.62614227382765e-06, "loss": 0.639, "step": 24265 }, { "epoch": 0.7437170528380532, "grad_norm": 1.5878503753115574, "learning_rate": 1.6257759946497542e-06, "loss": 0.4545, "step": 24266 }, { "epoch": 0.7437477013607944, "grad_norm": 1.5811471835547861, "learning_rate": 1.6254097487189513e-06, "loss": 0.5965, "step": 24267 }, { "epoch": 0.7437783498835356, "grad_norm": 0.8636557553631324, "learning_rate": 1.6250435360388494e-06, "loss": 0.4073, "step": 24268 }, { "epoch": 0.7438089984062768, "grad_norm": 1.5990888972407737, "learning_rate": 1.624677356613059e-06, "loss": 0.4972, "step": 24269 }, { "epoch": 0.743839646929018, "grad_norm": 1.6947268212102808, "learning_rate": 1.624311210445184e-06, "loss": 0.4985, "step": 24270 }, { "epoch": 0.7438702954517592, "grad_norm": 2.068615590142298, "learning_rate": 1.623945097538835e-06, "loss": 0.6969, "step": 24271 }, { "epoch": 0.7439009439745005, "grad_norm": 1.790476618274568, "learning_rate": 1.62357901789762e-06, "loss": 0.5223, "step": 24272 }, { "epoch": 0.7439315924972416, "grad_norm": 2.041739646365549, "learning_rate": 1.6232129715251449e-06, "loss": 0.5245, "step": 24273 }, { "epoch": 0.7439622410199829, "grad_norm": 1.700463823864323, "learning_rate": 1.6228469584250151e-06, "loss": 0.5742, "step": 24274 }, { "epoch": 0.743992889542724, "grad_norm": 1.8164953434746791, "learning_rate": 1.6224809786008377e-06, "loss": 0.5417, "step": 24275 }, { "epoch": 0.7440235380654653, "grad_norm": 1.8449769484998253, "learning_rate": 1.6221150320562212e-06, "loss": 0.5472, "step": 24276 }, { "epoch": 0.7440541865882064, "grad_norm": 1.6872568905991072, "learning_rate": 1.6217491187947682e-06, "loss": 0.5585, "step": 24277 }, { "epoch": 0.7440848351109477, "grad_norm": 1.8650482893887061, "learning_rate": 1.621383238820085e-06, "loss": 0.6079, "step": 24278 }, { "epoch": 0.7441154836336888, "grad_norm": 1.6335628796958934, "learning_rate": 1.6210173921357775e-06, "loss": 0.5178, "step": 24279 }, { "epoch": 0.7441461321564301, "grad_norm": 1.8473026224228022, "learning_rate": 1.6206515787454518e-06, "loss": 0.5885, "step": 24280 }, { "epoch": 0.7441767806791713, "grad_norm": 1.5967575822021365, "learning_rate": 1.620285798652711e-06, "loss": 0.4836, "step": 24281 }, { "epoch": 0.7442074292019125, "grad_norm": 1.8080328839104696, "learning_rate": 1.6199200518611553e-06, "loss": 0.5426, "step": 24282 }, { "epoch": 0.7442380777246537, "grad_norm": 1.8365183296230996, "learning_rate": 1.6195543383743956e-06, "loss": 0.529, "step": 24283 }, { "epoch": 0.7442687262473949, "grad_norm": 1.9050641750872874, "learning_rate": 1.619188658196032e-06, "loss": 0.5519, "step": 24284 }, { "epoch": 0.7442993747701361, "grad_norm": 1.6164904052314664, "learning_rate": 1.618823011329666e-06, "loss": 0.4712, "step": 24285 }, { "epoch": 0.7443300232928772, "grad_norm": 1.9913634811649283, "learning_rate": 1.6184573977789014e-06, "loss": 0.5691, "step": 24286 }, { "epoch": 0.7443606718156185, "grad_norm": 0.7823933237411925, "learning_rate": 1.618091817547342e-06, "loss": 0.3767, "step": 24287 }, { "epoch": 0.7443913203383596, "grad_norm": 1.7815395761422337, "learning_rate": 1.6177262706385904e-06, "loss": 0.6673, "step": 24288 }, { "epoch": 0.7444219688611009, "grad_norm": 1.7998692767803333, "learning_rate": 1.617360757056246e-06, "loss": 0.4531, "step": 24289 }, { "epoch": 0.744452617383842, "grad_norm": 1.8629732373673678, "learning_rate": 1.616995276803911e-06, "loss": 0.5383, "step": 24290 }, { "epoch": 0.7444832659065833, "grad_norm": 1.5977757845815477, "learning_rate": 1.616629829885189e-06, "loss": 0.5282, "step": 24291 }, { "epoch": 0.7445139144293245, "grad_norm": 2.042471315904534, "learning_rate": 1.6162644163036795e-06, "loss": 0.652, "step": 24292 }, { "epoch": 0.7445445629520657, "grad_norm": 0.7947588268794535, "learning_rate": 1.6158990360629783e-06, "loss": 0.4009, "step": 24293 }, { "epoch": 0.7445752114748069, "grad_norm": 2.0109888134488125, "learning_rate": 1.6155336891666935e-06, "loss": 0.5694, "step": 24294 }, { "epoch": 0.7446058599975481, "grad_norm": 1.9199873713019242, "learning_rate": 1.6151683756184193e-06, "loss": 0.6809, "step": 24295 }, { "epoch": 0.7446365085202893, "grad_norm": 0.7661132173888433, "learning_rate": 1.6148030954217592e-06, "loss": 0.3928, "step": 24296 }, { "epoch": 0.7446671570430305, "grad_norm": 0.788739996449937, "learning_rate": 1.6144378485803086e-06, "loss": 0.4028, "step": 24297 }, { "epoch": 0.7446978055657717, "grad_norm": 1.722439231043296, "learning_rate": 1.6140726350976683e-06, "loss": 0.5728, "step": 24298 }, { "epoch": 0.744728454088513, "grad_norm": 1.938736089885582, "learning_rate": 1.613707454977438e-06, "loss": 0.6152, "step": 24299 }, { "epoch": 0.7447591026112541, "grad_norm": 1.8807557888894526, "learning_rate": 1.6133423082232131e-06, "loss": 0.6593, "step": 24300 }, { "epoch": 0.7447897511339954, "grad_norm": 0.7656949822209946, "learning_rate": 1.6129771948385926e-06, "loss": 0.3932, "step": 24301 }, { "epoch": 0.7448203996567365, "grad_norm": 1.8890684285580719, "learning_rate": 1.612612114827176e-06, "loss": 0.5917, "step": 24302 }, { "epoch": 0.7448510481794778, "grad_norm": 1.9568732743264596, "learning_rate": 1.6122470681925594e-06, "loss": 0.6344, "step": 24303 }, { "epoch": 0.7448816967022189, "grad_norm": 1.8228344541528216, "learning_rate": 1.6118820549383358e-06, "loss": 0.6262, "step": 24304 }, { "epoch": 0.7449123452249602, "grad_norm": 1.756614005170314, "learning_rate": 1.611517075068108e-06, "loss": 0.5687, "step": 24305 }, { "epoch": 0.7449429937477013, "grad_norm": 2.001584125202152, "learning_rate": 1.6111521285854687e-06, "loss": 0.5759, "step": 24306 }, { "epoch": 0.7449736422704426, "grad_norm": 0.7587031264369603, "learning_rate": 1.6107872154940152e-06, "loss": 0.4141, "step": 24307 }, { "epoch": 0.7450042907931838, "grad_norm": 1.8897733405515849, "learning_rate": 1.6104223357973414e-06, "loss": 0.6521, "step": 24308 }, { "epoch": 0.745034939315925, "grad_norm": 1.8540328078920265, "learning_rate": 1.6100574894990433e-06, "loss": 0.5911, "step": 24309 }, { "epoch": 0.7450655878386662, "grad_norm": 1.920341277962546, "learning_rate": 1.6096926766027183e-06, "loss": 0.5617, "step": 24310 }, { "epoch": 0.7450962363614074, "grad_norm": 1.6198919915557983, "learning_rate": 1.6093278971119569e-06, "loss": 0.4879, "step": 24311 }, { "epoch": 0.7451268848841486, "grad_norm": 1.87099326718208, "learning_rate": 1.608963151030355e-06, "loss": 0.5285, "step": 24312 }, { "epoch": 0.7451575334068898, "grad_norm": 1.9499799268342393, "learning_rate": 1.6085984383615084e-06, "loss": 0.5797, "step": 24313 }, { "epoch": 0.745188181929631, "grad_norm": 1.9535101497883527, "learning_rate": 1.608233759109008e-06, "loss": 0.6315, "step": 24314 }, { "epoch": 0.7452188304523722, "grad_norm": 1.5855461272343965, "learning_rate": 1.6078691132764478e-06, "loss": 0.5485, "step": 24315 }, { "epoch": 0.7452494789751134, "grad_norm": 1.785665384640759, "learning_rate": 1.6075045008674228e-06, "loss": 0.6612, "step": 24316 }, { "epoch": 0.7452801274978545, "grad_norm": 1.8101738454170442, "learning_rate": 1.6071399218855222e-06, "loss": 0.52, "step": 24317 }, { "epoch": 0.7453107760205958, "grad_norm": 1.9815419169184407, "learning_rate": 1.606775376334342e-06, "loss": 0.689, "step": 24318 }, { "epoch": 0.745341424543337, "grad_norm": 1.7675516441538162, "learning_rate": 1.6064108642174702e-06, "loss": 0.5623, "step": 24319 }, { "epoch": 0.7453720730660782, "grad_norm": 0.9760449807258725, "learning_rate": 1.6060463855385005e-06, "loss": 0.4102, "step": 24320 }, { "epoch": 0.7454027215888194, "grad_norm": 1.7621810697215874, "learning_rate": 1.6056819403010265e-06, "loss": 0.5963, "step": 24321 }, { "epoch": 0.7454333701115606, "grad_norm": 1.8666783806064449, "learning_rate": 1.6053175285086341e-06, "loss": 0.5923, "step": 24322 }, { "epoch": 0.7454640186343018, "grad_norm": 1.8879684894781383, "learning_rate": 1.6049531501649173e-06, "loss": 0.5975, "step": 24323 }, { "epoch": 0.745494667157043, "grad_norm": 1.9035263828003683, "learning_rate": 1.6045888052734676e-06, "loss": 0.6123, "step": 24324 }, { "epoch": 0.7455253156797842, "grad_norm": 1.8047970217942624, "learning_rate": 1.6042244938378709e-06, "loss": 0.6477, "step": 24325 }, { "epoch": 0.7455559642025255, "grad_norm": 2.022527798466008, "learning_rate": 1.6038602158617211e-06, "loss": 0.6297, "step": 24326 }, { "epoch": 0.7455866127252666, "grad_norm": 2.0952317855688927, "learning_rate": 1.6034959713486043e-06, "loss": 0.661, "step": 24327 }, { "epoch": 0.7456172612480079, "grad_norm": 1.8453775899341511, "learning_rate": 1.6031317603021101e-06, "loss": 0.551, "step": 24328 }, { "epoch": 0.745647909770749, "grad_norm": 1.8033937741105688, "learning_rate": 1.6027675827258294e-06, "loss": 0.5626, "step": 24329 }, { "epoch": 0.7456785582934903, "grad_norm": 2.0102034442643433, "learning_rate": 1.6024034386233477e-06, "loss": 0.6282, "step": 24330 }, { "epoch": 0.7457092068162314, "grad_norm": 1.8919232174283342, "learning_rate": 1.6020393279982539e-06, "loss": 0.547, "step": 24331 }, { "epoch": 0.7457398553389727, "grad_norm": 1.6905373198226097, "learning_rate": 1.6016752508541377e-06, "loss": 0.6058, "step": 24332 }, { "epoch": 0.7457705038617138, "grad_norm": 1.6968366323115958, "learning_rate": 1.6013112071945835e-06, "loss": 0.609, "step": 24333 }, { "epoch": 0.7458011523844551, "grad_norm": 2.1675851268677526, "learning_rate": 1.6009471970231793e-06, "loss": 0.5335, "step": 24334 }, { "epoch": 0.7458318009071963, "grad_norm": 2.095639775929135, "learning_rate": 1.6005832203435135e-06, "loss": 0.6368, "step": 24335 }, { "epoch": 0.7458624494299375, "grad_norm": 1.8633463010420261, "learning_rate": 1.6002192771591697e-06, "loss": 0.6264, "step": 24336 }, { "epoch": 0.7458930979526787, "grad_norm": 0.7599336631673017, "learning_rate": 1.5998553674737365e-06, "loss": 0.4035, "step": 24337 }, { "epoch": 0.7459237464754199, "grad_norm": 1.8123622360972969, "learning_rate": 1.5994914912907973e-06, "loss": 0.6095, "step": 24338 }, { "epoch": 0.7459543949981611, "grad_norm": 1.7543388962766462, "learning_rate": 1.599127648613938e-06, "loss": 0.48, "step": 24339 }, { "epoch": 0.7459850435209023, "grad_norm": 1.8914996201555507, "learning_rate": 1.5987638394467454e-06, "loss": 0.6304, "step": 24340 }, { "epoch": 0.7460156920436435, "grad_norm": 1.5815108985108028, "learning_rate": 1.598400063792802e-06, "loss": 0.5141, "step": 24341 }, { "epoch": 0.7460463405663847, "grad_norm": 1.8275269827408704, "learning_rate": 1.5980363216556926e-06, "loss": 0.5952, "step": 24342 }, { "epoch": 0.7460769890891259, "grad_norm": 1.726552967760059, "learning_rate": 1.5976726130390036e-06, "loss": 0.5184, "step": 24343 }, { "epoch": 0.7461076376118672, "grad_norm": 1.743367078880319, "learning_rate": 1.5973089379463152e-06, "loss": 0.6081, "step": 24344 }, { "epoch": 0.7461382861346083, "grad_norm": 1.7718412751806505, "learning_rate": 1.5969452963812126e-06, "loss": 0.5784, "step": 24345 }, { "epoch": 0.7461689346573496, "grad_norm": 1.7840907508599595, "learning_rate": 1.5965816883472807e-06, "loss": 0.5681, "step": 24346 }, { "epoch": 0.7461995831800907, "grad_norm": 1.7593460339764606, "learning_rate": 1.5962181138480981e-06, "loss": 0.5757, "step": 24347 }, { "epoch": 0.7462302317028319, "grad_norm": 1.8394624998061326, "learning_rate": 1.595854572887251e-06, "loss": 0.5287, "step": 24348 }, { "epoch": 0.7462608802255731, "grad_norm": 1.8672077933352735, "learning_rate": 1.595491065468318e-06, "loss": 0.5678, "step": 24349 }, { "epoch": 0.7462915287483143, "grad_norm": 1.6567575254785534, "learning_rate": 1.5951275915948827e-06, "loss": 0.5271, "step": 24350 }, { "epoch": 0.7463221772710555, "grad_norm": 1.8812894976326064, "learning_rate": 1.5947641512705282e-06, "loss": 0.6069, "step": 24351 }, { "epoch": 0.7463528257937967, "grad_norm": 0.7918080936275257, "learning_rate": 1.5944007444988318e-06, "loss": 0.4031, "step": 24352 }, { "epoch": 0.746383474316538, "grad_norm": 1.862571335701285, "learning_rate": 1.5940373712833768e-06, "loss": 0.631, "step": 24353 }, { "epoch": 0.7464141228392791, "grad_norm": 1.80166663778883, "learning_rate": 1.5936740316277444e-06, "loss": 0.6387, "step": 24354 }, { "epoch": 0.7464447713620204, "grad_norm": 1.7208989185363615, "learning_rate": 1.5933107255355113e-06, "loss": 0.5181, "step": 24355 }, { "epoch": 0.7464754198847615, "grad_norm": 1.8027097334650586, "learning_rate": 1.5929474530102596e-06, "loss": 0.645, "step": 24356 }, { "epoch": 0.7465060684075028, "grad_norm": 1.6324180438212748, "learning_rate": 1.5925842140555704e-06, "loss": 0.5068, "step": 24357 }, { "epoch": 0.7465367169302439, "grad_norm": 1.8600575458932187, "learning_rate": 1.5922210086750183e-06, "loss": 0.5678, "step": 24358 }, { "epoch": 0.7465673654529852, "grad_norm": 1.7728321149331994, "learning_rate": 1.5918578368721865e-06, "loss": 0.4846, "step": 24359 }, { "epoch": 0.7465980139757263, "grad_norm": 1.892698120407675, "learning_rate": 1.5914946986506502e-06, "loss": 0.6614, "step": 24360 }, { "epoch": 0.7466286624984676, "grad_norm": 1.779638018284253, "learning_rate": 1.5911315940139883e-06, "loss": 0.5713, "step": 24361 }, { "epoch": 0.7466593110212087, "grad_norm": 1.7511235806874261, "learning_rate": 1.590768522965781e-06, "loss": 0.5658, "step": 24362 }, { "epoch": 0.74668995954395, "grad_norm": 1.8888192016428904, "learning_rate": 1.5904054855096019e-06, "loss": 0.6208, "step": 24363 }, { "epoch": 0.7467206080666912, "grad_norm": 1.9481354881930824, "learning_rate": 1.5900424816490295e-06, "loss": 0.7048, "step": 24364 }, { "epoch": 0.7467512565894324, "grad_norm": 1.644624794049074, "learning_rate": 1.5896795113876435e-06, "loss": 0.5084, "step": 24365 }, { "epoch": 0.7467819051121736, "grad_norm": 1.880616072379981, "learning_rate": 1.5893165747290156e-06, "loss": 0.4932, "step": 24366 }, { "epoch": 0.7468125536349148, "grad_norm": 1.8710763587496209, "learning_rate": 1.5889536716767246e-06, "loss": 0.6612, "step": 24367 }, { "epoch": 0.746843202157656, "grad_norm": 1.6630290747702565, "learning_rate": 1.5885908022343454e-06, "loss": 0.542, "step": 24368 }, { "epoch": 0.7468738506803972, "grad_norm": 1.8729171040152772, "learning_rate": 1.5882279664054557e-06, "loss": 0.5599, "step": 24369 }, { "epoch": 0.7469044992031384, "grad_norm": 1.7318082273794044, "learning_rate": 1.5878651641936283e-06, "loss": 0.6363, "step": 24370 }, { "epoch": 0.7469351477258797, "grad_norm": 1.8906851309644526, "learning_rate": 1.5875023956024377e-06, "loss": 0.494, "step": 24371 }, { "epoch": 0.7469657962486208, "grad_norm": 1.8660462997785057, "learning_rate": 1.5871396606354584e-06, "loss": 0.5191, "step": 24372 }, { "epoch": 0.7469964447713621, "grad_norm": 0.7911380941692518, "learning_rate": 1.5867769592962673e-06, "loss": 0.4059, "step": 24373 }, { "epoch": 0.7470270932941032, "grad_norm": 0.8073431258247442, "learning_rate": 1.586414291588434e-06, "loss": 0.4113, "step": 24374 }, { "epoch": 0.7470577418168445, "grad_norm": 1.7529733281481688, "learning_rate": 1.586051657515535e-06, "loss": 0.6123, "step": 24375 }, { "epoch": 0.7470883903395856, "grad_norm": 1.8479411657892533, "learning_rate": 1.5856890570811433e-06, "loss": 0.6269, "step": 24376 }, { "epoch": 0.7471190388623269, "grad_norm": 1.9055635895263505, "learning_rate": 1.5853264902888294e-06, "loss": 0.5568, "step": 24377 }, { "epoch": 0.747149687385068, "grad_norm": 1.644671508102661, "learning_rate": 1.5849639571421693e-06, "loss": 0.6139, "step": 24378 }, { "epoch": 0.7471803359078092, "grad_norm": 1.590810238766832, "learning_rate": 1.5846014576447294e-06, "loss": 0.5359, "step": 24379 }, { "epoch": 0.7472109844305505, "grad_norm": 0.7917303375394957, "learning_rate": 1.5842389918000888e-06, "loss": 0.4116, "step": 24380 }, { "epoch": 0.7472416329532916, "grad_norm": 1.9979909708581187, "learning_rate": 1.583876559611815e-06, "loss": 0.5593, "step": 24381 }, { "epoch": 0.7472722814760329, "grad_norm": 0.7590969209104466, "learning_rate": 1.583514161083478e-06, "loss": 0.3875, "step": 24382 }, { "epoch": 0.747302929998774, "grad_norm": 2.0052099360057793, "learning_rate": 1.5831517962186505e-06, "loss": 0.6054, "step": 24383 }, { "epoch": 0.7473335785215153, "grad_norm": 1.7496503429638675, "learning_rate": 1.582789465020904e-06, "loss": 0.6204, "step": 24384 }, { "epoch": 0.7473642270442564, "grad_norm": 1.759670771831446, "learning_rate": 1.5824271674938057e-06, "loss": 0.5294, "step": 24385 }, { "epoch": 0.7473948755669977, "grad_norm": 2.0201455772049846, "learning_rate": 1.5820649036409269e-06, "loss": 0.6422, "step": 24386 }, { "epoch": 0.7474255240897388, "grad_norm": 1.9824056847751497, "learning_rate": 1.5817026734658369e-06, "loss": 0.5366, "step": 24387 }, { "epoch": 0.7474561726124801, "grad_norm": 1.6888044123894463, "learning_rate": 1.581340476972107e-06, "loss": 0.6069, "step": 24388 }, { "epoch": 0.7474868211352212, "grad_norm": 1.7452459971162817, "learning_rate": 1.580978314163304e-06, "loss": 0.6401, "step": 24389 }, { "epoch": 0.7475174696579625, "grad_norm": 1.9254677532102626, "learning_rate": 1.580616185042993e-06, "loss": 0.5816, "step": 24390 }, { "epoch": 0.7475481181807037, "grad_norm": 1.8204310357687157, "learning_rate": 1.580254089614749e-06, "loss": 0.5108, "step": 24391 }, { "epoch": 0.7475787667034449, "grad_norm": 1.6472400416626833, "learning_rate": 1.5798920278821362e-06, "loss": 0.5781, "step": 24392 }, { "epoch": 0.7476094152261861, "grad_norm": 2.0012435933572026, "learning_rate": 1.5795299998487212e-06, "loss": 0.6362, "step": 24393 }, { "epoch": 0.7476400637489273, "grad_norm": 0.7821015052330924, "learning_rate": 1.579168005518072e-06, "loss": 0.4034, "step": 24394 }, { "epoch": 0.7476707122716685, "grad_norm": 1.9985816546839343, "learning_rate": 1.5788060448937554e-06, "loss": 0.6512, "step": 24395 }, { "epoch": 0.7477013607944097, "grad_norm": 1.9169749631223112, "learning_rate": 1.5784441179793402e-06, "loss": 0.5794, "step": 24396 }, { "epoch": 0.7477320093171509, "grad_norm": 1.8411281818979761, "learning_rate": 1.5780822247783888e-06, "loss": 0.6587, "step": 24397 }, { "epoch": 0.7477626578398922, "grad_norm": 2.022875908478752, "learning_rate": 1.577720365294469e-06, "loss": 0.5973, "step": 24398 }, { "epoch": 0.7477933063626333, "grad_norm": 1.59261131674756, "learning_rate": 1.5773585395311474e-06, "loss": 0.5212, "step": 24399 }, { "epoch": 0.7478239548853746, "grad_norm": 0.7892987693682662, "learning_rate": 1.576996747491988e-06, "loss": 0.4099, "step": 24400 }, { "epoch": 0.7478546034081157, "grad_norm": 1.6552061010909127, "learning_rate": 1.5766349891805521e-06, "loss": 0.5254, "step": 24401 }, { "epoch": 0.747885251930857, "grad_norm": 1.7438998643546193, "learning_rate": 1.5762732646004109e-06, "loss": 0.5323, "step": 24402 }, { "epoch": 0.7479159004535981, "grad_norm": 1.6953087341897746, "learning_rate": 1.575911573755125e-06, "loss": 0.4992, "step": 24403 }, { "epoch": 0.7479465489763394, "grad_norm": 0.7690725561863851, "learning_rate": 1.5755499166482568e-06, "loss": 0.3973, "step": 24404 }, { "epoch": 0.7479771974990805, "grad_norm": 1.5399041056620424, "learning_rate": 1.5751882932833717e-06, "loss": 0.6282, "step": 24405 }, { "epoch": 0.7480078460218218, "grad_norm": 1.661137947347663, "learning_rate": 1.574826703664033e-06, "loss": 0.5459, "step": 24406 }, { "epoch": 0.748038494544563, "grad_norm": 1.7409596537753218, "learning_rate": 1.574465147793804e-06, "loss": 0.5348, "step": 24407 }, { "epoch": 0.7480691430673042, "grad_norm": 0.8210418303245343, "learning_rate": 1.5741036256762455e-06, "loss": 0.4126, "step": 24408 }, { "epoch": 0.7480997915900454, "grad_norm": 2.028082986070615, "learning_rate": 1.5737421373149198e-06, "loss": 0.6213, "step": 24409 }, { "epoch": 0.7481304401127865, "grad_norm": 1.9060914295290614, "learning_rate": 1.5733806827133913e-06, "loss": 0.6381, "step": 24410 }, { "epoch": 0.7481610886355278, "grad_norm": 2.0330118380323565, "learning_rate": 1.57301926187522e-06, "loss": 0.6145, "step": 24411 }, { "epoch": 0.7481917371582689, "grad_norm": 0.8453235404704115, "learning_rate": 1.572657874803965e-06, "loss": 0.4301, "step": 24412 }, { "epoch": 0.7482223856810102, "grad_norm": 0.7925653726519702, "learning_rate": 1.5722965215031888e-06, "loss": 0.4212, "step": 24413 }, { "epoch": 0.7482530342037513, "grad_norm": 1.7828693118531094, "learning_rate": 1.5719352019764516e-06, "loss": 0.5649, "step": 24414 }, { "epoch": 0.7482836827264926, "grad_norm": 1.7280035535895941, "learning_rate": 1.5715739162273163e-06, "loss": 0.5386, "step": 24415 }, { "epoch": 0.7483143312492337, "grad_norm": 1.7154919699926026, "learning_rate": 1.5712126642593385e-06, "loss": 0.5771, "step": 24416 }, { "epoch": 0.748344979771975, "grad_norm": 1.8823423268268953, "learning_rate": 1.5708514460760794e-06, "loss": 0.6267, "step": 24417 }, { "epoch": 0.7483756282947162, "grad_norm": 1.8136087982074505, "learning_rate": 1.5704902616811002e-06, "loss": 0.5977, "step": 24418 }, { "epoch": 0.7484062768174574, "grad_norm": 2.043385190146566, "learning_rate": 1.5701291110779565e-06, "loss": 0.6116, "step": 24419 }, { "epoch": 0.7484369253401986, "grad_norm": 0.8109373617522732, "learning_rate": 1.5697679942702077e-06, "loss": 0.4151, "step": 24420 }, { "epoch": 0.7484675738629398, "grad_norm": 1.7969953858285148, "learning_rate": 1.5694069112614146e-06, "loss": 0.6114, "step": 24421 }, { "epoch": 0.748498222385681, "grad_norm": 1.8592831042635263, "learning_rate": 1.5690458620551313e-06, "loss": 0.5343, "step": 24422 }, { "epoch": 0.7485288709084222, "grad_norm": 1.653705351801182, "learning_rate": 1.5686848466549182e-06, "loss": 0.5702, "step": 24423 }, { "epoch": 0.7485595194311634, "grad_norm": 0.7961570336327392, "learning_rate": 1.5683238650643302e-06, "loss": 0.4053, "step": 24424 }, { "epoch": 0.7485901679539047, "grad_norm": 1.8714786427176189, "learning_rate": 1.567962917286925e-06, "loss": 0.5434, "step": 24425 }, { "epoch": 0.7486208164766458, "grad_norm": 1.6630761238068648, "learning_rate": 1.5676020033262606e-06, "loss": 0.5716, "step": 24426 }, { "epoch": 0.7486514649993871, "grad_norm": 1.8473531361422744, "learning_rate": 1.5672411231858903e-06, "loss": 0.6016, "step": 24427 }, { "epoch": 0.7486821135221282, "grad_norm": 1.8120377622971628, "learning_rate": 1.566880276869372e-06, "loss": 0.6514, "step": 24428 }, { "epoch": 0.7487127620448695, "grad_norm": 1.9498844951064023, "learning_rate": 1.5665194643802617e-06, "loss": 0.6441, "step": 24429 }, { "epoch": 0.7487434105676106, "grad_norm": 1.8319188683963374, "learning_rate": 1.5661586857221139e-06, "loss": 0.5417, "step": 24430 }, { "epoch": 0.7487740590903519, "grad_norm": 2.0052262828400065, "learning_rate": 1.5657979408984803e-06, "loss": 0.6073, "step": 24431 }, { "epoch": 0.748804707613093, "grad_norm": 2.0018235558817414, "learning_rate": 1.5654372299129212e-06, "loss": 0.5748, "step": 24432 }, { "epoch": 0.7488353561358343, "grad_norm": 1.7485608951072453, "learning_rate": 1.565076552768986e-06, "loss": 0.5681, "step": 24433 }, { "epoch": 0.7488660046585754, "grad_norm": 1.8820455559907003, "learning_rate": 1.5647159094702325e-06, "loss": 0.6095, "step": 24434 }, { "epoch": 0.7488966531813167, "grad_norm": 1.6957068690965946, "learning_rate": 1.56435530002021e-06, "loss": 0.5351, "step": 24435 }, { "epoch": 0.7489273017040579, "grad_norm": 1.8285620263300206, "learning_rate": 1.563994724422474e-06, "loss": 0.5695, "step": 24436 }, { "epoch": 0.7489579502267991, "grad_norm": 1.8329245486406684, "learning_rate": 1.5636341826805783e-06, "loss": 0.6374, "step": 24437 }, { "epoch": 0.7489885987495403, "grad_norm": 1.8161722932820163, "learning_rate": 1.563273674798073e-06, "loss": 0.5744, "step": 24438 }, { "epoch": 0.7490192472722815, "grad_norm": 1.7934021535151567, "learning_rate": 1.562913200778512e-06, "loss": 0.5277, "step": 24439 }, { "epoch": 0.7490498957950227, "grad_norm": 1.850143524942469, "learning_rate": 1.5625527606254477e-06, "loss": 0.5809, "step": 24440 }, { "epoch": 0.7490805443177638, "grad_norm": 1.7483901908703865, "learning_rate": 1.5621923543424288e-06, "loss": 0.5434, "step": 24441 }, { "epoch": 0.7491111928405051, "grad_norm": 1.9069633969180189, "learning_rate": 1.5618319819330086e-06, "loss": 0.5745, "step": 24442 }, { "epoch": 0.7491418413632462, "grad_norm": 2.141317733258504, "learning_rate": 1.5614716434007393e-06, "loss": 0.5917, "step": 24443 }, { "epoch": 0.7491724898859875, "grad_norm": 2.1890308444503135, "learning_rate": 1.5611113387491678e-06, "loss": 0.6365, "step": 24444 }, { "epoch": 0.7492031384087287, "grad_norm": 1.651274692259101, "learning_rate": 1.5607510679818478e-06, "loss": 0.5049, "step": 24445 }, { "epoch": 0.7492337869314699, "grad_norm": 1.846276399278453, "learning_rate": 1.5603908311023258e-06, "loss": 0.5623, "step": 24446 }, { "epoch": 0.7492644354542111, "grad_norm": 2.402557525526752, "learning_rate": 1.560030628114153e-06, "loss": 0.6268, "step": 24447 }, { "epoch": 0.7492950839769523, "grad_norm": 1.7880961815162462, "learning_rate": 1.5596704590208806e-06, "loss": 0.5969, "step": 24448 }, { "epoch": 0.7493257324996935, "grad_norm": 1.8328626858285644, "learning_rate": 1.5593103238260531e-06, "loss": 0.6556, "step": 24449 }, { "epoch": 0.7493563810224347, "grad_norm": 1.8307913625754573, "learning_rate": 1.558950222533222e-06, "loss": 0.6807, "step": 24450 }, { "epoch": 0.7493870295451759, "grad_norm": 2.1129965245243767, "learning_rate": 1.558590155145936e-06, "loss": 0.5633, "step": 24451 }, { "epoch": 0.7494176780679171, "grad_norm": 0.8158529492627402, "learning_rate": 1.5582301216677399e-06, "loss": 0.3978, "step": 24452 }, { "epoch": 0.7494483265906583, "grad_norm": 1.767669226339956, "learning_rate": 1.5578701221021835e-06, "loss": 0.5404, "step": 24453 }, { "epoch": 0.7494789751133996, "grad_norm": 1.7712143297409972, "learning_rate": 1.557510156452815e-06, "loss": 0.5125, "step": 24454 }, { "epoch": 0.7495096236361407, "grad_norm": 1.9384708041714072, "learning_rate": 1.5571502247231778e-06, "loss": 0.458, "step": 24455 }, { "epoch": 0.749540272158882, "grad_norm": 2.253021362220373, "learning_rate": 1.5567903269168222e-06, "loss": 0.6008, "step": 24456 }, { "epoch": 0.7495709206816231, "grad_norm": 1.9506885681398658, "learning_rate": 1.5564304630372912e-06, "loss": 0.6716, "step": 24457 }, { "epoch": 0.7496015692043644, "grad_norm": 1.827711714446942, "learning_rate": 1.5560706330881313e-06, "loss": 0.5669, "step": 24458 }, { "epoch": 0.7496322177271055, "grad_norm": 1.6314627738569039, "learning_rate": 1.5557108370728908e-06, "loss": 0.5963, "step": 24459 }, { "epoch": 0.7496628662498468, "grad_norm": 2.027852318020845, "learning_rate": 1.5553510749951106e-06, "loss": 0.5974, "step": 24460 }, { "epoch": 0.7496935147725879, "grad_norm": 1.5714387032006805, "learning_rate": 1.5549913468583378e-06, "loss": 0.5783, "step": 24461 }, { "epoch": 0.7497241632953292, "grad_norm": 2.1029279170013266, "learning_rate": 1.554631652666118e-06, "loss": 0.6299, "step": 24462 }, { "epoch": 0.7497548118180704, "grad_norm": 2.0925110955244595, "learning_rate": 1.5542719924219928e-06, "loss": 0.6691, "step": 24463 }, { "epoch": 0.7497854603408116, "grad_norm": 2.0238917386589743, "learning_rate": 1.553912366129509e-06, "loss": 0.5932, "step": 24464 }, { "epoch": 0.7498161088635528, "grad_norm": 1.9484277610174672, "learning_rate": 1.5535527737922068e-06, "loss": 0.5843, "step": 24465 }, { "epoch": 0.749846757386294, "grad_norm": 1.878488835642931, "learning_rate": 1.5531932154136308e-06, "loss": 0.6267, "step": 24466 }, { "epoch": 0.7498774059090352, "grad_norm": 1.5680618718894357, "learning_rate": 1.5528336909973258e-06, "loss": 0.5774, "step": 24467 }, { "epoch": 0.7499080544317764, "grad_norm": 2.1043616826762697, "learning_rate": 1.5524742005468302e-06, "loss": 0.7187, "step": 24468 }, { "epoch": 0.7499387029545176, "grad_norm": 1.8139112973813076, "learning_rate": 1.5521147440656887e-06, "loss": 0.6011, "step": 24469 }, { "epoch": 0.7499693514772588, "grad_norm": 1.7897334607972653, "learning_rate": 1.5517553215574444e-06, "loss": 0.5658, "step": 24470 }, { "epoch": 0.75, "grad_norm": 1.6229820948025304, "learning_rate": 1.5513959330256357e-06, "loss": 0.5602, "step": 24471 }, { "epoch": 0.7500306485227412, "grad_norm": 2.13113796843086, "learning_rate": 1.5510365784738052e-06, "loss": 0.6035, "step": 24472 }, { "epoch": 0.7500612970454824, "grad_norm": 2.0322740895730247, "learning_rate": 1.5506772579054952e-06, "loss": 0.629, "step": 24473 }, { "epoch": 0.7500919455682236, "grad_norm": 0.7658545653733213, "learning_rate": 1.5503179713242432e-06, "loss": 0.3916, "step": 24474 }, { "epoch": 0.7501225940909648, "grad_norm": 1.9359813711674, "learning_rate": 1.549958718733593e-06, "loss": 0.5769, "step": 24475 }, { "epoch": 0.750153242613706, "grad_norm": 1.8013542376412384, "learning_rate": 1.5495995001370784e-06, "loss": 0.5525, "step": 24476 }, { "epoch": 0.7501838911364472, "grad_norm": 1.9813763900001828, "learning_rate": 1.5492403155382462e-06, "loss": 0.576, "step": 24477 }, { "epoch": 0.7502145396591884, "grad_norm": 1.8769631268813425, "learning_rate": 1.5488811649406322e-06, "loss": 0.6734, "step": 24478 }, { "epoch": 0.7502451881819296, "grad_norm": 1.7798893763906878, "learning_rate": 1.5485220483477731e-06, "loss": 0.6315, "step": 24479 }, { "epoch": 0.7502758367046708, "grad_norm": 1.933286086306785, "learning_rate": 1.5481629657632096e-06, "loss": 0.6803, "step": 24480 }, { "epoch": 0.7503064852274121, "grad_norm": 2.145750614647648, "learning_rate": 1.5478039171904813e-06, "loss": 0.5713, "step": 24481 }, { "epoch": 0.7503371337501532, "grad_norm": 2.0341657117987486, "learning_rate": 1.5474449026331222e-06, "loss": 0.635, "step": 24482 }, { "epoch": 0.7503677822728945, "grad_norm": 1.6664275488630371, "learning_rate": 1.5470859220946722e-06, "loss": 0.494, "step": 24483 }, { "epoch": 0.7503984307956356, "grad_norm": 0.7701918625922152, "learning_rate": 1.5467269755786695e-06, "loss": 0.4002, "step": 24484 }, { "epoch": 0.7504290793183769, "grad_norm": 0.8101886093514586, "learning_rate": 1.5463680630886486e-06, "loss": 0.4245, "step": 24485 }, { "epoch": 0.750459727841118, "grad_norm": 1.7851013812456324, "learning_rate": 1.5460091846281477e-06, "loss": 0.6528, "step": 24486 }, { "epoch": 0.7504903763638593, "grad_norm": 1.6606797914271947, "learning_rate": 1.5456503402006989e-06, "loss": 0.5773, "step": 24487 }, { "epoch": 0.7505210248866004, "grad_norm": 1.6965378455045854, "learning_rate": 1.545291529809844e-06, "loss": 0.5037, "step": 24488 }, { "epoch": 0.7505516734093417, "grad_norm": 1.9181893332121651, "learning_rate": 1.5449327534591164e-06, "loss": 0.4787, "step": 24489 }, { "epoch": 0.7505823219320829, "grad_norm": 0.8116273547633057, "learning_rate": 1.5445740111520486e-06, "loss": 0.4065, "step": 24490 }, { "epoch": 0.7506129704548241, "grad_norm": 1.9332917664261486, "learning_rate": 1.5442153028921768e-06, "loss": 0.6123, "step": 24491 }, { "epoch": 0.7506436189775653, "grad_norm": 0.7471680024768039, "learning_rate": 1.5438566286830376e-06, "loss": 0.3888, "step": 24492 }, { "epoch": 0.7506742675003065, "grad_norm": 1.7827956535369334, "learning_rate": 1.5434979885281615e-06, "loss": 0.6249, "step": 24493 }, { "epoch": 0.7507049160230477, "grad_norm": 2.058083034472991, "learning_rate": 1.5431393824310847e-06, "loss": 0.5573, "step": 24494 }, { "epoch": 0.7507355645457889, "grad_norm": 1.9932973315629696, "learning_rate": 1.5427808103953396e-06, "loss": 0.4972, "step": 24495 }, { "epoch": 0.7507662130685301, "grad_norm": 0.7734390905277421, "learning_rate": 1.5424222724244615e-06, "loss": 0.4103, "step": 24496 }, { "epoch": 0.7507968615912713, "grad_norm": 0.7901425826093396, "learning_rate": 1.5420637685219814e-06, "loss": 0.4086, "step": 24497 }, { "epoch": 0.7508275101140125, "grad_norm": 1.6804009220908531, "learning_rate": 1.5417052986914283e-06, "loss": 0.5735, "step": 24498 }, { "epoch": 0.7508581586367538, "grad_norm": 1.7185055520924466, "learning_rate": 1.541346862936341e-06, "loss": 0.6634, "step": 24499 }, { "epoch": 0.7508888071594949, "grad_norm": 1.8121137277871302, "learning_rate": 1.5409884612602482e-06, "loss": 0.6176, "step": 24500 }, { "epoch": 0.7509194556822362, "grad_norm": 1.7665495832578506, "learning_rate": 1.5406300936666796e-06, "loss": 0.5403, "step": 24501 }, { "epoch": 0.7509501042049773, "grad_norm": 1.9008058765849458, "learning_rate": 1.5402717601591676e-06, "loss": 0.5451, "step": 24502 }, { "epoch": 0.7509807527277185, "grad_norm": 1.9004686152049928, "learning_rate": 1.5399134607412447e-06, "loss": 0.5814, "step": 24503 }, { "epoch": 0.7510114012504597, "grad_norm": 1.9806004444324792, "learning_rate": 1.5395551954164383e-06, "loss": 0.6432, "step": 24504 }, { "epoch": 0.7510420497732009, "grad_norm": 1.6910823478986405, "learning_rate": 1.5391969641882798e-06, "loss": 0.522, "step": 24505 }, { "epoch": 0.7510726982959421, "grad_norm": 0.832537712591996, "learning_rate": 1.5388387670602995e-06, "loss": 0.4148, "step": 24506 }, { "epoch": 0.7511033468186833, "grad_norm": 1.961484227580124, "learning_rate": 1.5384806040360272e-06, "loss": 0.7051, "step": 24507 }, { "epoch": 0.7511339953414246, "grad_norm": 1.6540569175260493, "learning_rate": 1.538122475118992e-06, "loss": 0.5257, "step": 24508 }, { "epoch": 0.7511646438641657, "grad_norm": 1.9420103960446717, "learning_rate": 1.5377643803127196e-06, "loss": 0.574, "step": 24509 }, { "epoch": 0.751195292386907, "grad_norm": 1.8769486690322112, "learning_rate": 1.5374063196207411e-06, "loss": 0.5987, "step": 24510 }, { "epoch": 0.7512259409096481, "grad_norm": 1.7831970758145943, "learning_rate": 1.5370482930465858e-06, "loss": 0.5948, "step": 24511 }, { "epoch": 0.7512565894323894, "grad_norm": 1.9411836571523136, "learning_rate": 1.5366903005937777e-06, "loss": 0.5847, "step": 24512 }, { "epoch": 0.7512872379551305, "grad_norm": 1.8619906843515923, "learning_rate": 1.536332342265846e-06, "loss": 0.5513, "step": 24513 }, { "epoch": 0.7513178864778718, "grad_norm": 2.157706064501577, "learning_rate": 1.5359744180663184e-06, "loss": 0.6636, "step": 24514 }, { "epoch": 0.7513485350006129, "grad_norm": 1.787280416933499, "learning_rate": 1.5356165279987222e-06, "loss": 0.5635, "step": 24515 }, { "epoch": 0.7513791835233542, "grad_norm": 1.8339434335690443, "learning_rate": 1.5352586720665835e-06, "loss": 0.5426, "step": 24516 }, { "epoch": 0.7514098320460953, "grad_norm": 1.8462749133289285, "learning_rate": 1.5349008502734242e-06, "loss": 0.6054, "step": 24517 }, { "epoch": 0.7514404805688366, "grad_norm": 0.7757833678202719, "learning_rate": 1.5345430626227763e-06, "loss": 0.406, "step": 24518 }, { "epoch": 0.7514711290915778, "grad_norm": 1.8096113681274562, "learning_rate": 1.5341853091181624e-06, "loss": 0.5309, "step": 24519 }, { "epoch": 0.751501777614319, "grad_norm": 1.799434421611656, "learning_rate": 1.533827589763106e-06, "loss": 0.6423, "step": 24520 }, { "epoch": 0.7515324261370602, "grad_norm": 0.8273771284524293, "learning_rate": 1.533469904561133e-06, "loss": 0.4007, "step": 24521 }, { "epoch": 0.7515630746598014, "grad_norm": 0.8281640230668735, "learning_rate": 1.5331122535157677e-06, "loss": 0.4103, "step": 24522 }, { "epoch": 0.7515937231825426, "grad_norm": 1.823928053945998, "learning_rate": 1.5327546366305368e-06, "loss": 0.5652, "step": 24523 }, { "epoch": 0.7516243717052838, "grad_norm": 1.9952518328786117, "learning_rate": 1.5323970539089595e-06, "loss": 0.5533, "step": 24524 }, { "epoch": 0.751655020228025, "grad_norm": 0.7846753383149855, "learning_rate": 1.5320395053545612e-06, "loss": 0.4015, "step": 24525 }, { "epoch": 0.7516856687507663, "grad_norm": 1.905806162489496, "learning_rate": 1.5316819909708668e-06, "loss": 0.5584, "step": 24526 }, { "epoch": 0.7517163172735074, "grad_norm": 1.687976589939438, "learning_rate": 1.5313245107613967e-06, "loss": 0.6606, "step": 24527 }, { "epoch": 0.7517469657962487, "grad_norm": 1.924508944831169, "learning_rate": 1.530967064729671e-06, "loss": 0.6069, "step": 24528 }, { "epoch": 0.7517776143189898, "grad_norm": 0.7972916714606927, "learning_rate": 1.5306096528792175e-06, "loss": 0.4007, "step": 24529 }, { "epoch": 0.7518082628417311, "grad_norm": 1.8711136101036363, "learning_rate": 1.5302522752135546e-06, "loss": 0.4907, "step": 24530 }, { "epoch": 0.7518389113644722, "grad_norm": 0.8129300081491276, "learning_rate": 1.5298949317362022e-06, "loss": 0.394, "step": 24531 }, { "epoch": 0.7518695598872135, "grad_norm": 1.9303039445022376, "learning_rate": 1.529537622450683e-06, "loss": 0.483, "step": 24532 }, { "epoch": 0.7519002084099546, "grad_norm": 1.861846154133219, "learning_rate": 1.5291803473605176e-06, "loss": 0.6125, "step": 24533 }, { "epoch": 0.7519308569326958, "grad_norm": 2.0501510924802107, "learning_rate": 1.5288231064692277e-06, "loss": 0.5711, "step": 24534 }, { "epoch": 0.751961505455437, "grad_norm": 1.8574862167985062, "learning_rate": 1.528465899780331e-06, "loss": 0.57, "step": 24535 }, { "epoch": 0.7519921539781782, "grad_norm": 2.121910298002044, "learning_rate": 1.5281087272973471e-06, "loss": 0.637, "step": 24536 }, { "epoch": 0.7520228025009195, "grad_norm": 1.7860494603222457, "learning_rate": 1.527751589023798e-06, "loss": 0.5866, "step": 24537 }, { "epoch": 0.7520534510236606, "grad_norm": 2.0680304733555896, "learning_rate": 1.527394484963201e-06, "loss": 0.5925, "step": 24538 }, { "epoch": 0.7520840995464019, "grad_norm": 1.779877133148587, "learning_rate": 1.527037415119072e-06, "loss": 0.5673, "step": 24539 }, { "epoch": 0.752114748069143, "grad_norm": 1.7782342098069286, "learning_rate": 1.5266803794949343e-06, "loss": 0.6399, "step": 24540 }, { "epoch": 0.7521453965918843, "grad_norm": 1.9953131262226698, "learning_rate": 1.5263233780943027e-06, "loss": 0.5412, "step": 24541 }, { "epoch": 0.7521760451146254, "grad_norm": 2.004246837523395, "learning_rate": 1.5259664109206966e-06, "loss": 0.6156, "step": 24542 }, { "epoch": 0.7522066936373667, "grad_norm": 1.958977276744303, "learning_rate": 1.5256094779776308e-06, "loss": 0.4956, "step": 24543 }, { "epoch": 0.7522373421601078, "grad_norm": 1.6954657409904663, "learning_rate": 1.525252579268624e-06, "loss": 0.5033, "step": 24544 }, { "epoch": 0.7522679906828491, "grad_norm": 1.7732880265681235, "learning_rate": 1.5248957147971939e-06, "loss": 0.6731, "step": 24545 }, { "epoch": 0.7522986392055903, "grad_norm": 0.78448538397984, "learning_rate": 1.5245388845668541e-06, "loss": 0.4043, "step": 24546 }, { "epoch": 0.7523292877283315, "grad_norm": 2.0707697869364172, "learning_rate": 1.524182088581122e-06, "loss": 0.614, "step": 24547 }, { "epoch": 0.7523599362510727, "grad_norm": 2.1507607317377984, "learning_rate": 1.5238253268435143e-06, "loss": 0.5648, "step": 24548 }, { "epoch": 0.7523905847738139, "grad_norm": 2.049157393297596, "learning_rate": 1.5234685993575439e-06, "loss": 0.6302, "step": 24549 }, { "epoch": 0.7524212332965551, "grad_norm": 1.9608925197116385, "learning_rate": 1.5231119061267268e-06, "loss": 0.5852, "step": 24550 }, { "epoch": 0.7524518818192963, "grad_norm": 1.7359251585269881, "learning_rate": 1.5227552471545793e-06, "loss": 0.5906, "step": 24551 }, { "epoch": 0.7524825303420375, "grad_norm": 1.8059216136818705, "learning_rate": 1.5223986224446124e-06, "loss": 0.548, "step": 24552 }, { "epoch": 0.7525131788647788, "grad_norm": 1.7355859271256955, "learning_rate": 1.5220420320003433e-06, "loss": 0.5448, "step": 24553 }, { "epoch": 0.7525438273875199, "grad_norm": 2.062321529205591, "learning_rate": 1.521685475825282e-06, "loss": 0.6594, "step": 24554 }, { "epoch": 0.7525744759102612, "grad_norm": 2.0444687440783844, "learning_rate": 1.5213289539229443e-06, "loss": 0.6066, "step": 24555 }, { "epoch": 0.7526051244330023, "grad_norm": 1.710679173983627, "learning_rate": 1.5209724662968433e-06, "loss": 0.5578, "step": 24556 }, { "epoch": 0.7526357729557436, "grad_norm": 2.0316624761259523, "learning_rate": 1.52061601295049e-06, "loss": 0.6569, "step": 24557 }, { "epoch": 0.7526664214784847, "grad_norm": 1.845352935519372, "learning_rate": 1.5202595938873965e-06, "loss": 0.5568, "step": 24558 }, { "epoch": 0.752697070001226, "grad_norm": 0.7637750340001729, "learning_rate": 1.5199032091110777e-06, "loss": 0.3901, "step": 24559 }, { "epoch": 0.7527277185239671, "grad_norm": 0.8605433319335132, "learning_rate": 1.5195468586250412e-06, "loss": 0.4241, "step": 24560 }, { "epoch": 0.7527583670467084, "grad_norm": 1.7855041978504462, "learning_rate": 1.5191905424328019e-06, "loss": 0.5978, "step": 24561 }, { "epoch": 0.7527890155694495, "grad_norm": 0.7994506259643128, "learning_rate": 1.518834260537867e-06, "loss": 0.4101, "step": 24562 }, { "epoch": 0.7528196640921908, "grad_norm": 1.9052479552496109, "learning_rate": 1.5184780129437487e-06, "loss": 0.6255, "step": 24563 }, { "epoch": 0.752850312614932, "grad_norm": 0.7891597805602222, "learning_rate": 1.5181217996539589e-06, "loss": 0.39, "step": 24564 }, { "epoch": 0.7528809611376731, "grad_norm": 1.9362609479483113, "learning_rate": 1.5177656206720043e-06, "loss": 0.5531, "step": 24565 }, { "epoch": 0.7529116096604144, "grad_norm": 1.684339880703327, "learning_rate": 1.5174094760013963e-06, "loss": 0.5331, "step": 24566 }, { "epoch": 0.7529422581831555, "grad_norm": 0.8115545251402779, "learning_rate": 1.517053365645645e-06, "loss": 0.3925, "step": 24567 }, { "epoch": 0.7529729067058968, "grad_norm": 1.83106865645937, "learning_rate": 1.5166972896082565e-06, "loss": 0.6235, "step": 24568 }, { "epoch": 0.7530035552286379, "grad_norm": 1.701346452760213, "learning_rate": 1.5163412478927408e-06, "loss": 0.428, "step": 24569 }, { "epoch": 0.7530342037513792, "grad_norm": 1.7651198147571365, "learning_rate": 1.5159852405026082e-06, "loss": 0.6191, "step": 24570 }, { "epoch": 0.7530648522741203, "grad_norm": 1.7155230543633946, "learning_rate": 1.5156292674413625e-06, "loss": 0.5752, "step": 24571 }, { "epoch": 0.7530955007968616, "grad_norm": 1.992003426069001, "learning_rate": 1.515273328712515e-06, "loss": 0.5864, "step": 24572 }, { "epoch": 0.7531261493196028, "grad_norm": 2.04321227350391, "learning_rate": 1.5149174243195692e-06, "loss": 0.6136, "step": 24573 }, { "epoch": 0.753156797842344, "grad_norm": 1.8935870710237561, "learning_rate": 1.5145615542660335e-06, "loss": 0.5951, "step": 24574 }, { "epoch": 0.7531874463650852, "grad_norm": 1.9769899756180673, "learning_rate": 1.5142057185554165e-06, "loss": 0.6278, "step": 24575 }, { "epoch": 0.7532180948878264, "grad_norm": 1.7846572368909155, "learning_rate": 1.5138499171912208e-06, "loss": 0.5351, "step": 24576 }, { "epoch": 0.7532487434105676, "grad_norm": 1.9667204609952107, "learning_rate": 1.5134941501769534e-06, "loss": 0.5577, "step": 24577 }, { "epoch": 0.7532793919333088, "grad_norm": 1.7489712392061976, "learning_rate": 1.5131384175161223e-06, "loss": 0.5948, "step": 24578 }, { "epoch": 0.75331004045605, "grad_norm": 2.0023650103358963, "learning_rate": 1.512782719212229e-06, "loss": 0.6298, "step": 24579 }, { "epoch": 0.7533406889787913, "grad_norm": 1.9473604477866115, "learning_rate": 1.5124270552687798e-06, "loss": 0.6443, "step": 24580 }, { "epoch": 0.7533713375015324, "grad_norm": 1.6065296779120832, "learning_rate": 1.5120714256892804e-06, "loss": 0.539, "step": 24581 }, { "epoch": 0.7534019860242737, "grad_norm": 1.7992376163212112, "learning_rate": 1.5117158304772322e-06, "loss": 0.5427, "step": 24582 }, { "epoch": 0.7534326345470148, "grad_norm": 1.6562916735531263, "learning_rate": 1.5113602696361419e-06, "loss": 0.5771, "step": 24583 }, { "epoch": 0.7534632830697561, "grad_norm": 1.7957723234460214, "learning_rate": 1.51100474316951e-06, "loss": 0.5544, "step": 24584 }, { "epoch": 0.7534939315924972, "grad_norm": 1.8252938382524768, "learning_rate": 1.5106492510808413e-06, "loss": 0.5633, "step": 24585 }, { "epoch": 0.7535245801152385, "grad_norm": 1.8245205393426052, "learning_rate": 1.5102937933736394e-06, "loss": 0.5615, "step": 24586 }, { "epoch": 0.7535552286379796, "grad_norm": 1.8765582437048653, "learning_rate": 1.5099383700514047e-06, "loss": 0.5186, "step": 24587 }, { "epoch": 0.7535858771607209, "grad_norm": 1.9361595965483611, "learning_rate": 1.5095829811176399e-06, "loss": 0.5735, "step": 24588 }, { "epoch": 0.753616525683462, "grad_norm": 1.8374442946773772, "learning_rate": 1.5092276265758483e-06, "loss": 0.5656, "step": 24589 }, { "epoch": 0.7536471742062033, "grad_norm": 1.6861531931629605, "learning_rate": 1.5088723064295291e-06, "loss": 0.5965, "step": 24590 }, { "epoch": 0.7536778227289445, "grad_norm": 2.0092615168279613, "learning_rate": 1.5085170206821836e-06, "loss": 0.6202, "step": 24591 }, { "epoch": 0.7537084712516857, "grad_norm": 1.7579759276146978, "learning_rate": 1.5081617693373151e-06, "loss": 0.4976, "step": 24592 }, { "epoch": 0.7537391197744269, "grad_norm": 2.235599886984649, "learning_rate": 1.5078065523984208e-06, "loss": 0.6496, "step": 24593 }, { "epoch": 0.7537697682971681, "grad_norm": 1.7939657950890446, "learning_rate": 1.5074513698690036e-06, "loss": 0.5474, "step": 24594 }, { "epoch": 0.7538004168199093, "grad_norm": 0.8013894982193677, "learning_rate": 1.5070962217525582e-06, "loss": 0.4064, "step": 24595 }, { "epoch": 0.7538310653426504, "grad_norm": 1.6412286955265247, "learning_rate": 1.5067411080525907e-06, "loss": 0.5494, "step": 24596 }, { "epoch": 0.7538617138653917, "grad_norm": 0.8387074966619215, "learning_rate": 1.5063860287725968e-06, "loss": 0.4026, "step": 24597 }, { "epoch": 0.7538923623881328, "grad_norm": 1.6675305686489004, "learning_rate": 1.5060309839160737e-06, "loss": 0.488, "step": 24598 }, { "epoch": 0.7539230109108741, "grad_norm": 1.8883604499333047, "learning_rate": 1.5056759734865218e-06, "loss": 0.4514, "step": 24599 }, { "epoch": 0.7539536594336153, "grad_norm": 1.9570228851867995, "learning_rate": 1.5053209974874393e-06, "loss": 0.6233, "step": 24600 }, { "epoch": 0.7539843079563565, "grad_norm": 2.0506520912995256, "learning_rate": 1.5049660559223223e-06, "loss": 0.6445, "step": 24601 }, { "epoch": 0.7540149564790977, "grad_norm": 1.882827742430076, "learning_rate": 1.5046111487946685e-06, "loss": 0.6602, "step": 24602 }, { "epoch": 0.7540456050018389, "grad_norm": 0.7649830379036856, "learning_rate": 1.5042562761079755e-06, "loss": 0.3985, "step": 24603 }, { "epoch": 0.7540762535245801, "grad_norm": 1.9096140306312108, "learning_rate": 1.5039014378657413e-06, "loss": 0.6066, "step": 24604 }, { "epoch": 0.7541069020473213, "grad_norm": 1.8844078130184316, "learning_rate": 1.503546634071461e-06, "loss": 0.5748, "step": 24605 }, { "epoch": 0.7541375505700625, "grad_norm": 1.8048841478331228, "learning_rate": 1.5031918647286287e-06, "loss": 0.5522, "step": 24606 }, { "epoch": 0.7541681990928037, "grad_norm": 2.0802882449985636, "learning_rate": 1.5028371298407418e-06, "loss": 0.6355, "step": 24607 }, { "epoch": 0.7541988476155449, "grad_norm": 1.8829586030638779, "learning_rate": 1.5024824294112967e-06, "loss": 0.5565, "step": 24608 }, { "epoch": 0.7542294961382862, "grad_norm": 1.726905934695489, "learning_rate": 1.5021277634437858e-06, "loss": 0.5963, "step": 24609 }, { "epoch": 0.7542601446610273, "grad_norm": 1.821053868040754, "learning_rate": 1.501773131941705e-06, "loss": 0.6274, "step": 24610 }, { "epoch": 0.7542907931837686, "grad_norm": 1.8446959249498835, "learning_rate": 1.5014185349085498e-06, "loss": 0.6404, "step": 24611 }, { "epoch": 0.7543214417065097, "grad_norm": 1.75966247588415, "learning_rate": 1.5010639723478115e-06, "loss": 0.5845, "step": 24612 }, { "epoch": 0.754352090229251, "grad_norm": 1.9524306135485754, "learning_rate": 1.500709444262987e-06, "loss": 0.6496, "step": 24613 }, { "epoch": 0.7543827387519921, "grad_norm": 1.783664660326747, "learning_rate": 1.5003549506575643e-06, "loss": 0.5485, "step": 24614 }, { "epoch": 0.7544133872747334, "grad_norm": 1.8558487382565578, "learning_rate": 1.500000491535043e-06, "loss": 0.666, "step": 24615 }, { "epoch": 0.7544440357974745, "grad_norm": 1.6828429011145951, "learning_rate": 1.499646066898912e-06, "loss": 0.5071, "step": 24616 }, { "epoch": 0.7544746843202158, "grad_norm": 1.9167852254077964, "learning_rate": 1.4992916767526627e-06, "loss": 0.6202, "step": 24617 }, { "epoch": 0.754505332842957, "grad_norm": 1.7852682556523873, "learning_rate": 1.4989373210997882e-06, "loss": 0.5712, "step": 24618 }, { "epoch": 0.7545359813656982, "grad_norm": 1.6846633826252777, "learning_rate": 1.4985829999437813e-06, "loss": 0.5566, "step": 24619 }, { "epoch": 0.7545666298884394, "grad_norm": 1.8836462959815303, "learning_rate": 1.4982287132881307e-06, "loss": 0.6, "step": 24620 }, { "epoch": 0.7545972784111806, "grad_norm": 1.8087283331064656, "learning_rate": 1.4978744611363284e-06, "loss": 0.5818, "step": 24621 }, { "epoch": 0.7546279269339218, "grad_norm": 1.8993798713132366, "learning_rate": 1.4975202434918652e-06, "loss": 0.6676, "step": 24622 }, { "epoch": 0.754658575456663, "grad_norm": 1.8004965870030085, "learning_rate": 1.4971660603582328e-06, "loss": 0.5562, "step": 24623 }, { "epoch": 0.7546892239794042, "grad_norm": 1.6858035726104197, "learning_rate": 1.4968119117389197e-06, "loss": 0.5322, "step": 24624 }, { "epoch": 0.7547198725021455, "grad_norm": 1.7599461214154362, "learning_rate": 1.4964577976374112e-06, "loss": 0.5252, "step": 24625 }, { "epoch": 0.7547505210248866, "grad_norm": 1.7597030897085342, "learning_rate": 1.496103718057204e-06, "loss": 0.5854, "step": 24626 }, { "epoch": 0.7547811695476278, "grad_norm": 1.9088011107841938, "learning_rate": 1.4957496730017834e-06, "loss": 0.615, "step": 24627 }, { "epoch": 0.754811818070369, "grad_norm": 0.814187208503512, "learning_rate": 1.4953956624746369e-06, "loss": 0.3825, "step": 24628 }, { "epoch": 0.7548424665931102, "grad_norm": 1.9241490816539855, "learning_rate": 1.4950416864792528e-06, "loss": 0.5627, "step": 24629 }, { "epoch": 0.7548731151158514, "grad_norm": 1.9078801639641187, "learning_rate": 1.49468774501912e-06, "loss": 0.7265, "step": 24630 }, { "epoch": 0.7549037636385926, "grad_norm": 2.039436164189281, "learning_rate": 1.4943338380977274e-06, "loss": 0.6087, "step": 24631 }, { "epoch": 0.7549344121613338, "grad_norm": 1.6818688378674682, "learning_rate": 1.4939799657185589e-06, "loss": 0.4951, "step": 24632 }, { "epoch": 0.754965060684075, "grad_norm": 0.8071884081236316, "learning_rate": 1.4936261278851028e-06, "loss": 0.3929, "step": 24633 }, { "epoch": 0.7549957092068162, "grad_norm": 1.8218229979756644, "learning_rate": 1.4932723246008468e-06, "loss": 0.5823, "step": 24634 }, { "epoch": 0.7550263577295574, "grad_norm": 0.7972968359222046, "learning_rate": 1.4929185558692767e-06, "loss": 0.4074, "step": 24635 }, { "epoch": 0.7550570062522987, "grad_norm": 2.057382287057678, "learning_rate": 1.4925648216938732e-06, "loss": 0.5834, "step": 24636 }, { "epoch": 0.7550876547750398, "grad_norm": 2.094321980225072, "learning_rate": 1.4922111220781299e-06, "loss": 0.4844, "step": 24637 }, { "epoch": 0.7551183032977811, "grad_norm": 1.6821710304897564, "learning_rate": 1.4918574570255272e-06, "loss": 0.5638, "step": 24638 }, { "epoch": 0.7551489518205222, "grad_norm": 1.972173049990898, "learning_rate": 1.4915038265395498e-06, "loss": 0.6116, "step": 24639 }, { "epoch": 0.7551796003432635, "grad_norm": 1.8886458254555554, "learning_rate": 1.4911502306236825e-06, "loss": 0.5684, "step": 24640 }, { "epoch": 0.7552102488660046, "grad_norm": 1.8480488117207803, "learning_rate": 1.4907966692814096e-06, "loss": 0.6763, "step": 24641 }, { "epoch": 0.7552408973887459, "grad_norm": 2.008678460019266, "learning_rate": 1.4904431425162164e-06, "loss": 0.6087, "step": 24642 }, { "epoch": 0.755271545911487, "grad_norm": 2.1865655520675347, "learning_rate": 1.4900896503315836e-06, "loss": 0.617, "step": 24643 }, { "epoch": 0.7553021944342283, "grad_norm": 1.8644434835560069, "learning_rate": 1.4897361927309955e-06, "loss": 0.6455, "step": 24644 }, { "epoch": 0.7553328429569695, "grad_norm": 1.8740278061939053, "learning_rate": 1.489382769717937e-06, "loss": 0.6427, "step": 24645 }, { "epoch": 0.7553634914797107, "grad_norm": 1.8882230752259594, "learning_rate": 1.4890293812958873e-06, "loss": 0.6105, "step": 24646 }, { "epoch": 0.7553941400024519, "grad_norm": 1.7199419335933874, "learning_rate": 1.488676027468327e-06, "loss": 0.5787, "step": 24647 }, { "epoch": 0.7554247885251931, "grad_norm": 1.9464411499130705, "learning_rate": 1.488322708238743e-06, "loss": 0.5726, "step": 24648 }, { "epoch": 0.7554554370479343, "grad_norm": 1.9027533893433901, "learning_rate": 1.4879694236106124e-06, "loss": 0.6001, "step": 24649 }, { "epoch": 0.7554860855706755, "grad_norm": 1.8952258409116556, "learning_rate": 1.487616173587419e-06, "loss": 0.5243, "step": 24650 }, { "epoch": 0.7555167340934167, "grad_norm": 0.8355216516772395, "learning_rate": 1.4872629581726406e-06, "loss": 0.4265, "step": 24651 }, { "epoch": 0.755547382616158, "grad_norm": 1.7608579484380258, "learning_rate": 1.4869097773697594e-06, "loss": 0.5603, "step": 24652 }, { "epoch": 0.7555780311388991, "grad_norm": 1.722713707710728, "learning_rate": 1.4865566311822565e-06, "loss": 0.5923, "step": 24653 }, { "epoch": 0.7556086796616404, "grad_norm": 1.8030583082914682, "learning_rate": 1.4862035196136083e-06, "loss": 0.5761, "step": 24654 }, { "epoch": 0.7556393281843815, "grad_norm": 1.8831265900227094, "learning_rate": 1.485850442667296e-06, "loss": 0.6644, "step": 24655 }, { "epoch": 0.7556699767071228, "grad_norm": 1.7729355853591025, "learning_rate": 1.4854974003467997e-06, "loss": 0.5414, "step": 24656 }, { "epoch": 0.7557006252298639, "grad_norm": 1.6845946914677505, "learning_rate": 1.4851443926555953e-06, "loss": 0.5593, "step": 24657 }, { "epoch": 0.7557312737526051, "grad_norm": 1.5270843089942887, "learning_rate": 1.484791419597164e-06, "loss": 0.4976, "step": 24658 }, { "epoch": 0.7557619222753463, "grad_norm": 1.941704419449683, "learning_rate": 1.4844384811749812e-06, "loss": 0.5941, "step": 24659 }, { "epoch": 0.7557925707980875, "grad_norm": 1.6623236475302583, "learning_rate": 1.484085577392525e-06, "loss": 0.6312, "step": 24660 }, { "epoch": 0.7558232193208287, "grad_norm": 1.6510136024669113, "learning_rate": 1.4837327082532755e-06, "loss": 0.543, "step": 24661 }, { "epoch": 0.7558538678435699, "grad_norm": 1.7278670017857733, "learning_rate": 1.4833798737607046e-06, "loss": 0.5917, "step": 24662 }, { "epoch": 0.7558845163663112, "grad_norm": 0.7480996805562427, "learning_rate": 1.4830270739182922e-06, "loss": 0.4007, "step": 24663 }, { "epoch": 0.7559151648890523, "grad_norm": 1.5762614393889536, "learning_rate": 1.482674308729516e-06, "loss": 0.5283, "step": 24664 }, { "epoch": 0.7559458134117936, "grad_norm": 1.5985406049571464, "learning_rate": 1.482321578197849e-06, "loss": 0.5523, "step": 24665 }, { "epoch": 0.7559764619345347, "grad_norm": 1.8991352668612984, "learning_rate": 1.4819688823267647e-06, "loss": 0.5195, "step": 24666 }, { "epoch": 0.756007110457276, "grad_norm": 1.7304596023809136, "learning_rate": 1.4816162211197438e-06, "loss": 0.5748, "step": 24667 }, { "epoch": 0.7560377589800171, "grad_norm": 0.7847328273762514, "learning_rate": 1.4812635945802574e-06, "loss": 0.3931, "step": 24668 }, { "epoch": 0.7560684075027584, "grad_norm": 0.7904517230552177, "learning_rate": 1.480911002711783e-06, "loss": 0.4096, "step": 24669 }, { "epoch": 0.7560990560254995, "grad_norm": 2.1794738805740206, "learning_rate": 1.4805584455177908e-06, "loss": 0.5743, "step": 24670 }, { "epoch": 0.7561297045482408, "grad_norm": 0.7848361595015962, "learning_rate": 1.4802059230017568e-06, "loss": 0.3973, "step": 24671 }, { "epoch": 0.756160353070982, "grad_norm": 0.8425311661011755, "learning_rate": 1.4798534351671556e-06, "loss": 0.4158, "step": 24672 }, { "epoch": 0.7561910015937232, "grad_norm": 1.7250533851804233, "learning_rate": 1.4795009820174584e-06, "loss": 0.6371, "step": 24673 }, { "epoch": 0.7562216501164644, "grad_norm": 1.9245865574842744, "learning_rate": 1.4791485635561382e-06, "loss": 0.6118, "step": 24674 }, { "epoch": 0.7562522986392056, "grad_norm": 0.7929902593928305, "learning_rate": 1.4787961797866695e-06, "loss": 0.3968, "step": 24675 }, { "epoch": 0.7562829471619468, "grad_norm": 1.9012771767476844, "learning_rate": 1.478443830712522e-06, "loss": 0.5759, "step": 24676 }, { "epoch": 0.756313595684688, "grad_norm": 1.7831055500284172, "learning_rate": 1.4780915163371685e-06, "loss": 0.5448, "step": 24677 }, { "epoch": 0.7563442442074292, "grad_norm": 1.7239954430426363, "learning_rate": 1.4777392366640813e-06, "loss": 0.5104, "step": 24678 }, { "epoch": 0.7563748927301704, "grad_norm": 1.7983766665302785, "learning_rate": 1.4773869916967292e-06, "loss": 0.5869, "step": 24679 }, { "epoch": 0.7564055412529116, "grad_norm": 1.8382744057723133, "learning_rate": 1.4770347814385865e-06, "loss": 0.5653, "step": 24680 }, { "epoch": 0.7564361897756529, "grad_norm": 1.6405046609067837, "learning_rate": 1.4766826058931195e-06, "loss": 0.6224, "step": 24681 }, { "epoch": 0.756466838298394, "grad_norm": 1.6852507378364316, "learning_rate": 1.4763304650638e-06, "loss": 0.6316, "step": 24682 }, { "epoch": 0.7564974868211353, "grad_norm": 2.117075172253822, "learning_rate": 1.4759783589541e-06, "loss": 0.5764, "step": 24683 }, { "epoch": 0.7565281353438764, "grad_norm": 1.7980098201707089, "learning_rate": 1.4756262875674855e-06, "loss": 0.5066, "step": 24684 }, { "epoch": 0.7565587838666177, "grad_norm": 1.8188649425688874, "learning_rate": 1.4752742509074265e-06, "loss": 0.5666, "step": 24685 }, { "epoch": 0.7565894323893588, "grad_norm": 0.8033286801307418, "learning_rate": 1.474922248977394e-06, "loss": 0.4194, "step": 24686 }, { "epoch": 0.7566200809121001, "grad_norm": 1.738430694952628, "learning_rate": 1.4745702817808527e-06, "loss": 0.5387, "step": 24687 }, { "epoch": 0.7566507294348412, "grad_norm": 1.6741450785489254, "learning_rate": 1.4742183493212726e-06, "loss": 0.6702, "step": 24688 }, { "epoch": 0.7566813779575824, "grad_norm": 1.9267989532397496, "learning_rate": 1.4738664516021228e-06, "loss": 0.6499, "step": 24689 }, { "epoch": 0.7567120264803237, "grad_norm": 2.0183848136441425, "learning_rate": 1.473514588626867e-06, "loss": 0.7615, "step": 24690 }, { "epoch": 0.7567426750030648, "grad_norm": 0.8030391933350816, "learning_rate": 1.4731627603989762e-06, "loss": 0.4089, "step": 24691 }, { "epoch": 0.7567733235258061, "grad_norm": 1.9477816111756059, "learning_rate": 1.4728109669219137e-06, "loss": 0.6188, "step": 24692 }, { "epoch": 0.7568039720485472, "grad_norm": 1.8437384362086209, "learning_rate": 1.4724592081991473e-06, "loss": 0.5897, "step": 24693 }, { "epoch": 0.7568346205712885, "grad_norm": 0.774854524660453, "learning_rate": 1.4721074842341444e-06, "loss": 0.4026, "step": 24694 }, { "epoch": 0.7568652690940296, "grad_norm": 1.703005427570352, "learning_rate": 1.4717557950303669e-06, "loss": 0.6248, "step": 24695 }, { "epoch": 0.7568959176167709, "grad_norm": 1.8359933388741076, "learning_rate": 1.4714041405912828e-06, "loss": 0.5919, "step": 24696 }, { "epoch": 0.756926566139512, "grad_norm": 2.0751143095412687, "learning_rate": 1.471052520920358e-06, "loss": 0.5808, "step": 24697 }, { "epoch": 0.7569572146622533, "grad_norm": 1.806057008838536, "learning_rate": 1.4707009360210544e-06, "loss": 0.5747, "step": 24698 }, { "epoch": 0.7569878631849944, "grad_norm": 1.8723842858505315, "learning_rate": 1.470349385896837e-06, "loss": 0.5969, "step": 24699 }, { "epoch": 0.7570185117077357, "grad_norm": 1.7660501912102315, "learning_rate": 1.469997870551172e-06, "loss": 0.5984, "step": 24700 }, { "epoch": 0.7570491602304769, "grad_norm": 1.8301619814637122, "learning_rate": 1.4696463899875196e-06, "loss": 0.6579, "step": 24701 }, { "epoch": 0.7570798087532181, "grad_norm": 1.760177316722251, "learning_rate": 1.4692949442093462e-06, "loss": 0.5114, "step": 24702 }, { "epoch": 0.7571104572759593, "grad_norm": 1.936463545654551, "learning_rate": 1.4689435332201113e-06, "loss": 0.5564, "step": 24703 }, { "epoch": 0.7571411057987005, "grad_norm": 1.711193763553575, "learning_rate": 1.4685921570232797e-06, "loss": 0.5568, "step": 24704 }, { "epoch": 0.7571717543214417, "grad_norm": 2.0063907975828577, "learning_rate": 1.4682408156223144e-06, "loss": 0.6441, "step": 24705 }, { "epoch": 0.7572024028441829, "grad_norm": 1.8290375603813638, "learning_rate": 1.467889509020674e-06, "loss": 0.5596, "step": 24706 }, { "epoch": 0.7572330513669241, "grad_norm": 2.108986627724803, "learning_rate": 1.4675382372218227e-06, "loss": 0.5842, "step": 24707 }, { "epoch": 0.7572636998896654, "grad_norm": 1.71338857161079, "learning_rate": 1.4671870002292221e-06, "loss": 0.6954, "step": 24708 }, { "epoch": 0.7572943484124065, "grad_norm": 1.7973196237630913, "learning_rate": 1.4668357980463305e-06, "loss": 0.5638, "step": 24709 }, { "epoch": 0.7573249969351478, "grad_norm": 2.1886221873410716, "learning_rate": 1.4664846306766112e-06, "loss": 0.6743, "step": 24710 }, { "epoch": 0.7573556454578889, "grad_norm": 0.7472112088605755, "learning_rate": 1.4661334981235215e-06, "loss": 0.3879, "step": 24711 }, { "epoch": 0.7573862939806302, "grad_norm": 1.7766886912585755, "learning_rate": 1.4657824003905225e-06, "loss": 0.4694, "step": 24712 }, { "epoch": 0.7574169425033713, "grad_norm": 1.851418519151221, "learning_rate": 1.4654313374810752e-06, "loss": 0.6269, "step": 24713 }, { "epoch": 0.7574475910261126, "grad_norm": 1.9277424404619412, "learning_rate": 1.465080309398636e-06, "loss": 0.5925, "step": 24714 }, { "epoch": 0.7574782395488537, "grad_norm": 1.9350892763498928, "learning_rate": 1.4647293161466652e-06, "loss": 0.6088, "step": 24715 }, { "epoch": 0.757508888071595, "grad_norm": 1.7171351865481506, "learning_rate": 1.4643783577286225e-06, "loss": 0.5771, "step": 24716 }, { "epoch": 0.7575395365943361, "grad_norm": 1.845653795195688, "learning_rate": 1.4640274341479621e-06, "loss": 0.5826, "step": 24717 }, { "epoch": 0.7575701851170774, "grad_norm": 1.9913968440621408, "learning_rate": 1.4636765454081447e-06, "loss": 0.6053, "step": 24718 }, { "epoch": 0.7576008336398186, "grad_norm": 1.7960180871040636, "learning_rate": 1.4633256915126287e-06, "loss": 0.5633, "step": 24719 }, { "epoch": 0.7576314821625597, "grad_norm": 1.8328809737150422, "learning_rate": 1.462974872464868e-06, "loss": 0.6381, "step": 24720 }, { "epoch": 0.757662130685301, "grad_norm": 1.8393922586018288, "learning_rate": 1.4626240882683224e-06, "loss": 0.5282, "step": 24721 }, { "epoch": 0.7576927792080421, "grad_norm": 1.9178077743594095, "learning_rate": 1.4622733389264438e-06, "loss": 0.6187, "step": 24722 }, { "epoch": 0.7577234277307834, "grad_norm": 1.6211556606329525, "learning_rate": 1.4619226244426938e-06, "loss": 0.5677, "step": 24723 }, { "epoch": 0.7577540762535245, "grad_norm": 1.8342053955457527, "learning_rate": 1.4615719448205257e-06, "loss": 0.5897, "step": 24724 }, { "epoch": 0.7577847247762658, "grad_norm": 1.9020701964444686, "learning_rate": 1.4612213000633929e-06, "loss": 0.5903, "step": 24725 }, { "epoch": 0.757815373299007, "grad_norm": 2.0804448492378222, "learning_rate": 1.4608706901747527e-06, "loss": 0.6823, "step": 24726 }, { "epoch": 0.7578460218217482, "grad_norm": 1.5903663243662705, "learning_rate": 1.46052011515806e-06, "loss": 0.6373, "step": 24727 }, { "epoch": 0.7578766703444894, "grad_norm": 0.7893823247481232, "learning_rate": 1.4601695750167667e-06, "loss": 0.3838, "step": 24728 }, { "epoch": 0.7579073188672306, "grad_norm": 1.8042012649200998, "learning_rate": 1.4598190697543286e-06, "loss": 0.6062, "step": 24729 }, { "epoch": 0.7579379673899718, "grad_norm": 1.8962805143439276, "learning_rate": 1.4594685993741987e-06, "loss": 0.6552, "step": 24730 }, { "epoch": 0.757968615912713, "grad_norm": 1.859063883801317, "learning_rate": 1.4591181638798318e-06, "loss": 0.6068, "step": 24731 }, { "epoch": 0.7579992644354542, "grad_norm": 1.7753169121031556, "learning_rate": 1.4587677632746805e-06, "loss": 0.5204, "step": 24732 }, { "epoch": 0.7580299129581954, "grad_norm": 1.7214253701443523, "learning_rate": 1.458417397562193e-06, "loss": 0.712, "step": 24733 }, { "epoch": 0.7580605614809366, "grad_norm": 1.960270196262818, "learning_rate": 1.458067066745828e-06, "loss": 0.5919, "step": 24734 }, { "epoch": 0.7580912100036779, "grad_norm": 1.979938192989257, "learning_rate": 1.457716770829034e-06, "loss": 0.613, "step": 24735 }, { "epoch": 0.758121858526419, "grad_norm": 2.4137558307553753, "learning_rate": 1.4573665098152622e-06, "loss": 0.6192, "step": 24736 }, { "epoch": 0.7581525070491603, "grad_norm": 1.712240524353611, "learning_rate": 1.4570162837079644e-06, "loss": 0.6219, "step": 24737 }, { "epoch": 0.7581831555719014, "grad_norm": 0.8222432565568124, "learning_rate": 1.456666092510593e-06, "loss": 0.4109, "step": 24738 }, { "epoch": 0.7582138040946427, "grad_norm": 0.8010690553773673, "learning_rate": 1.456315936226596e-06, "loss": 0.3942, "step": 24739 }, { "epoch": 0.7582444526173838, "grad_norm": 0.7932475611541061, "learning_rate": 1.4559658148594246e-06, "loss": 0.3917, "step": 24740 }, { "epoch": 0.7582751011401251, "grad_norm": 1.7872839978604598, "learning_rate": 1.4556157284125283e-06, "loss": 0.5204, "step": 24741 }, { "epoch": 0.7583057496628662, "grad_norm": 1.7533517601116677, "learning_rate": 1.4552656768893593e-06, "loss": 0.5188, "step": 24742 }, { "epoch": 0.7583363981856075, "grad_norm": 0.8342079608546024, "learning_rate": 1.4549156602933644e-06, "loss": 0.4009, "step": 24743 }, { "epoch": 0.7583670467083486, "grad_norm": 1.921266397765172, "learning_rate": 1.4545656786279894e-06, "loss": 0.6322, "step": 24744 }, { "epoch": 0.7583976952310899, "grad_norm": 1.8427788100838607, "learning_rate": 1.4542157318966894e-06, "loss": 0.5749, "step": 24745 }, { "epoch": 0.7584283437538311, "grad_norm": 1.7092636020844945, "learning_rate": 1.4538658201029082e-06, "loss": 0.6577, "step": 24746 }, { "epoch": 0.7584589922765723, "grad_norm": 0.7901998474229884, "learning_rate": 1.4535159432500933e-06, "loss": 0.3881, "step": 24747 }, { "epoch": 0.7584896407993135, "grad_norm": 1.8142924347668354, "learning_rate": 1.4531661013416932e-06, "loss": 0.5254, "step": 24748 }, { "epoch": 0.7585202893220547, "grad_norm": 1.7644307108045698, "learning_rate": 1.4528162943811552e-06, "loss": 0.5341, "step": 24749 }, { "epoch": 0.7585509378447959, "grad_norm": 1.6993125548785044, "learning_rate": 1.452466522371927e-06, "loss": 0.6167, "step": 24750 }, { "epoch": 0.758581586367537, "grad_norm": 1.9781871558042705, "learning_rate": 1.4521167853174523e-06, "loss": 0.5762, "step": 24751 }, { "epoch": 0.7586122348902783, "grad_norm": 1.9430667217809898, "learning_rate": 1.4517670832211789e-06, "loss": 0.539, "step": 24752 }, { "epoch": 0.7586428834130194, "grad_norm": 1.7566465705242382, "learning_rate": 1.4514174160865541e-06, "loss": 0.5449, "step": 24753 }, { "epoch": 0.7586735319357607, "grad_norm": 1.9351791427970815, "learning_rate": 1.4510677839170213e-06, "loss": 0.5468, "step": 24754 }, { "epoch": 0.7587041804585019, "grad_norm": 1.980453251820995, "learning_rate": 1.4507181867160237e-06, "loss": 0.6119, "step": 24755 }, { "epoch": 0.7587348289812431, "grad_norm": 2.3391244161032994, "learning_rate": 1.450368624487008e-06, "loss": 0.6103, "step": 24756 }, { "epoch": 0.7587654775039843, "grad_norm": 0.8203580643571775, "learning_rate": 1.4500190972334188e-06, "loss": 0.4117, "step": 24757 }, { "epoch": 0.7587961260267255, "grad_norm": 1.8953479057452098, "learning_rate": 1.449669604958701e-06, "loss": 0.541, "step": 24758 }, { "epoch": 0.7588267745494667, "grad_norm": 1.8487752483531885, "learning_rate": 1.4493201476662956e-06, "loss": 0.504, "step": 24759 }, { "epoch": 0.7588574230722079, "grad_norm": 1.8384662370655451, "learning_rate": 1.4489707253596468e-06, "loss": 0.6134, "step": 24760 }, { "epoch": 0.7588880715949491, "grad_norm": 1.6937491886795397, "learning_rate": 1.4486213380422003e-06, "loss": 0.6426, "step": 24761 }, { "epoch": 0.7589187201176903, "grad_norm": 1.9042606326552984, "learning_rate": 1.4482719857173961e-06, "loss": 0.6454, "step": 24762 }, { "epoch": 0.7589493686404315, "grad_norm": 0.8244348025938641, "learning_rate": 1.4479226683886731e-06, "loss": 0.3775, "step": 24763 }, { "epoch": 0.7589800171631728, "grad_norm": 2.0917963475755244, "learning_rate": 1.447573386059481e-06, "loss": 0.6319, "step": 24764 }, { "epoch": 0.7590106656859139, "grad_norm": 1.6374510639404336, "learning_rate": 1.4472241387332565e-06, "loss": 0.5725, "step": 24765 }, { "epoch": 0.7590413142086552, "grad_norm": 2.038303793043331, "learning_rate": 1.4468749264134401e-06, "loss": 0.5514, "step": 24766 }, { "epoch": 0.7590719627313963, "grad_norm": 1.6284843356956964, "learning_rate": 1.4465257491034746e-06, "loss": 0.4299, "step": 24767 }, { "epoch": 0.7591026112541376, "grad_norm": 1.7713901250292368, "learning_rate": 1.4461766068068e-06, "loss": 0.5889, "step": 24768 }, { "epoch": 0.7591332597768787, "grad_norm": 1.8633236429393083, "learning_rate": 1.4458274995268578e-06, "loss": 0.627, "step": 24769 }, { "epoch": 0.75916390829962, "grad_norm": 1.7907910346405518, "learning_rate": 1.4454784272670851e-06, "loss": 0.6282, "step": 24770 }, { "epoch": 0.7591945568223611, "grad_norm": 1.778010742258258, "learning_rate": 1.4451293900309233e-06, "loss": 0.5667, "step": 24771 }, { "epoch": 0.7592252053451024, "grad_norm": 1.7305411993952287, "learning_rate": 1.4447803878218126e-06, "loss": 0.5888, "step": 24772 }, { "epoch": 0.7592558538678436, "grad_norm": 1.9789453531059946, "learning_rate": 1.4444314206431904e-06, "loss": 0.5956, "step": 24773 }, { "epoch": 0.7592865023905848, "grad_norm": 0.7852149191820815, "learning_rate": 1.4440824884984917e-06, "loss": 0.3978, "step": 24774 }, { "epoch": 0.759317150913326, "grad_norm": 1.8946451484719578, "learning_rate": 1.4437335913911615e-06, "loss": 0.6011, "step": 24775 }, { "epoch": 0.7593477994360672, "grad_norm": 1.9356736434699473, "learning_rate": 1.4433847293246322e-06, "loss": 0.5875, "step": 24776 }, { "epoch": 0.7593784479588084, "grad_norm": 1.9811152827722696, "learning_rate": 1.443035902302345e-06, "loss": 0.637, "step": 24777 }, { "epoch": 0.7594090964815496, "grad_norm": 1.8096242296074294, "learning_rate": 1.4426871103277334e-06, "loss": 0.6052, "step": 24778 }, { "epoch": 0.7594397450042908, "grad_norm": 1.7564603669580228, "learning_rate": 1.4423383534042361e-06, "loss": 0.551, "step": 24779 }, { "epoch": 0.759470393527032, "grad_norm": 1.8457087650723463, "learning_rate": 1.441989631535291e-06, "loss": 0.566, "step": 24780 }, { "epoch": 0.7595010420497732, "grad_norm": 0.7832115793019261, "learning_rate": 1.4416409447243307e-06, "loss": 0.391, "step": 24781 }, { "epoch": 0.7595316905725144, "grad_norm": 1.8371786961142225, "learning_rate": 1.4412922929747925e-06, "loss": 0.5425, "step": 24782 }, { "epoch": 0.7595623390952556, "grad_norm": 1.564121631914217, "learning_rate": 1.4409436762901135e-06, "loss": 0.4737, "step": 24783 }, { "epoch": 0.7595929876179968, "grad_norm": 1.6030723367643902, "learning_rate": 1.4405950946737258e-06, "loss": 0.5089, "step": 24784 }, { "epoch": 0.759623636140738, "grad_norm": 1.9539386283465006, "learning_rate": 1.4402465481290646e-06, "loss": 0.6093, "step": 24785 }, { "epoch": 0.7596542846634792, "grad_norm": 2.035306341948478, "learning_rate": 1.4398980366595672e-06, "loss": 0.6813, "step": 24786 }, { "epoch": 0.7596849331862204, "grad_norm": 2.020153089297788, "learning_rate": 1.4395495602686631e-06, "loss": 0.5841, "step": 24787 }, { "epoch": 0.7597155817089616, "grad_norm": 1.7304206892961354, "learning_rate": 1.4392011189597903e-06, "loss": 0.6136, "step": 24788 }, { "epoch": 0.7597462302317028, "grad_norm": 0.782238169184541, "learning_rate": 1.4388527127363784e-06, "loss": 0.3897, "step": 24789 }, { "epoch": 0.759776878754444, "grad_norm": 1.770648198402609, "learning_rate": 1.4385043416018618e-06, "loss": 0.6552, "step": 24790 }, { "epoch": 0.7598075272771853, "grad_norm": 1.6556922088465034, "learning_rate": 1.438156005559675e-06, "loss": 0.5668, "step": 24791 }, { "epoch": 0.7598381757999264, "grad_norm": 0.7725598137860805, "learning_rate": 1.437807704613246e-06, "loss": 0.4067, "step": 24792 }, { "epoch": 0.7598688243226677, "grad_norm": 0.8367428778761083, "learning_rate": 1.4374594387660096e-06, "loss": 0.3946, "step": 24793 }, { "epoch": 0.7598994728454088, "grad_norm": 1.8077585379617718, "learning_rate": 1.437111208021399e-06, "loss": 0.6129, "step": 24794 }, { "epoch": 0.7599301213681501, "grad_norm": 1.521918441181058, "learning_rate": 1.4367630123828407e-06, "loss": 0.5651, "step": 24795 }, { "epoch": 0.7599607698908912, "grad_norm": 0.824561363487711, "learning_rate": 1.4364148518537685e-06, "loss": 0.4165, "step": 24796 }, { "epoch": 0.7599914184136325, "grad_norm": 0.8145242630841576, "learning_rate": 1.4360667264376144e-06, "loss": 0.4024, "step": 24797 }, { "epoch": 0.7600220669363736, "grad_norm": 1.7878674821972458, "learning_rate": 1.435718636137805e-06, "loss": 0.5923, "step": 24798 }, { "epoch": 0.7600527154591149, "grad_norm": 0.7908658724696284, "learning_rate": 1.4353705809577735e-06, "loss": 0.391, "step": 24799 }, { "epoch": 0.760083363981856, "grad_norm": 1.7497627549476622, "learning_rate": 1.435022560900946e-06, "loss": 0.4982, "step": 24800 }, { "epoch": 0.7601140125045973, "grad_norm": 0.7644629105469888, "learning_rate": 1.4346745759707531e-06, "loss": 0.3999, "step": 24801 }, { "epoch": 0.7601446610273385, "grad_norm": 1.7188363113895992, "learning_rate": 1.4343266261706257e-06, "loss": 0.5263, "step": 24802 }, { "epoch": 0.7601753095500797, "grad_norm": 1.9575518426543839, "learning_rate": 1.4339787115039887e-06, "loss": 0.5641, "step": 24803 }, { "epoch": 0.7602059580728209, "grad_norm": 0.7663223554994002, "learning_rate": 1.433630831974272e-06, "loss": 0.4022, "step": 24804 }, { "epoch": 0.7602366065955621, "grad_norm": 1.6644154594106835, "learning_rate": 1.4332829875849047e-06, "loss": 0.4906, "step": 24805 }, { "epoch": 0.7602672551183033, "grad_norm": 1.853901642435731, "learning_rate": 1.4329351783393114e-06, "loss": 0.5756, "step": 24806 }, { "epoch": 0.7602979036410445, "grad_norm": 1.7012388673246064, "learning_rate": 1.4325874042409215e-06, "loss": 0.5242, "step": 24807 }, { "epoch": 0.7603285521637857, "grad_norm": 2.113676732905704, "learning_rate": 1.4322396652931592e-06, "loss": 0.6477, "step": 24808 }, { "epoch": 0.760359200686527, "grad_norm": 0.8251993609303115, "learning_rate": 1.4318919614994525e-06, "loss": 0.4005, "step": 24809 }, { "epoch": 0.7603898492092681, "grad_norm": 0.7813391839662355, "learning_rate": 1.431544292863229e-06, "loss": 0.3859, "step": 24810 }, { "epoch": 0.7604204977320094, "grad_norm": 1.678479217918481, "learning_rate": 1.4311966593879106e-06, "loss": 0.5148, "step": 24811 }, { "epoch": 0.7604511462547505, "grad_norm": 0.8143410180170565, "learning_rate": 1.430849061076925e-06, "loss": 0.3975, "step": 24812 }, { "epoch": 0.7604817947774917, "grad_norm": 1.8140844601682906, "learning_rate": 1.4305014979336983e-06, "loss": 0.6019, "step": 24813 }, { "epoch": 0.7605124433002329, "grad_norm": 0.778638496331109, "learning_rate": 1.4301539699616523e-06, "loss": 0.411, "step": 24814 }, { "epoch": 0.7605430918229741, "grad_norm": 1.8397520277852732, "learning_rate": 1.4298064771642123e-06, "loss": 0.5794, "step": 24815 }, { "epoch": 0.7605737403457153, "grad_norm": 2.1350998047518766, "learning_rate": 1.4294590195448039e-06, "loss": 0.6065, "step": 24816 }, { "epoch": 0.7606043888684565, "grad_norm": 0.8148430868815184, "learning_rate": 1.4291115971068482e-06, "loss": 0.4232, "step": 24817 }, { "epoch": 0.7606350373911978, "grad_norm": 1.7961943100148297, "learning_rate": 1.4287642098537707e-06, "loss": 0.6231, "step": 24818 }, { "epoch": 0.7606656859139389, "grad_norm": 1.7310450321840625, "learning_rate": 1.4284168577889924e-06, "loss": 0.594, "step": 24819 }, { "epoch": 0.7606963344366802, "grad_norm": 2.134739374410096, "learning_rate": 1.4280695409159357e-06, "loss": 0.6113, "step": 24820 }, { "epoch": 0.7607269829594213, "grad_norm": 2.233427196526304, "learning_rate": 1.4277222592380259e-06, "loss": 0.6675, "step": 24821 }, { "epoch": 0.7607576314821626, "grad_norm": 0.7965259999138242, "learning_rate": 1.4273750127586811e-06, "loss": 0.3832, "step": 24822 }, { "epoch": 0.7607882800049037, "grad_norm": 1.9408997830210273, "learning_rate": 1.4270278014813244e-06, "loss": 0.5511, "step": 24823 }, { "epoch": 0.760818928527645, "grad_norm": 1.957989102643923, "learning_rate": 1.426680625409378e-06, "loss": 0.595, "step": 24824 }, { "epoch": 0.7608495770503861, "grad_norm": 1.974418949804892, "learning_rate": 1.4263334845462606e-06, "loss": 0.6068, "step": 24825 }, { "epoch": 0.7608802255731274, "grad_norm": 1.7594658962763288, "learning_rate": 1.4259863788953938e-06, "loss": 0.5689, "step": 24826 }, { "epoch": 0.7609108740958686, "grad_norm": 1.8073737300056343, "learning_rate": 1.4256393084601987e-06, "loss": 0.5536, "step": 24827 }, { "epoch": 0.7609415226186098, "grad_norm": 1.7493487369252028, "learning_rate": 1.4252922732440926e-06, "loss": 0.5955, "step": 24828 }, { "epoch": 0.760972171141351, "grad_norm": 0.7906862577784701, "learning_rate": 1.4249452732504987e-06, "loss": 0.397, "step": 24829 }, { "epoch": 0.7610028196640922, "grad_norm": 1.8354956849987498, "learning_rate": 1.4245983084828296e-06, "loss": 0.5829, "step": 24830 }, { "epoch": 0.7610334681868334, "grad_norm": 0.7627335762876938, "learning_rate": 1.4242513789445117e-06, "loss": 0.4112, "step": 24831 }, { "epoch": 0.7610641167095746, "grad_norm": 0.8317372577339103, "learning_rate": 1.42390448463896e-06, "loss": 0.386, "step": 24832 }, { "epoch": 0.7610947652323158, "grad_norm": 1.7492095230029094, "learning_rate": 1.423557625569591e-06, "loss": 0.5422, "step": 24833 }, { "epoch": 0.761125413755057, "grad_norm": 0.7514528705665021, "learning_rate": 1.4232108017398232e-06, "loss": 0.3749, "step": 24834 }, { "epoch": 0.7611560622777982, "grad_norm": 1.9307100781188056, "learning_rate": 1.4228640131530764e-06, "loss": 0.5152, "step": 24835 }, { "epoch": 0.7611867108005395, "grad_norm": 1.8200023435394361, "learning_rate": 1.4225172598127645e-06, "loss": 0.6405, "step": 24836 }, { "epoch": 0.7612173593232806, "grad_norm": 1.8531711441765313, "learning_rate": 1.4221705417223047e-06, "loss": 0.6496, "step": 24837 }, { "epoch": 0.7612480078460219, "grad_norm": 1.6195091896491216, "learning_rate": 1.4218238588851147e-06, "loss": 0.5242, "step": 24838 }, { "epoch": 0.761278656368763, "grad_norm": 1.8224569150178365, "learning_rate": 1.421477211304611e-06, "loss": 0.5698, "step": 24839 }, { "epoch": 0.7613093048915043, "grad_norm": 1.9490161469337277, "learning_rate": 1.4211305989842079e-06, "loss": 0.5948, "step": 24840 }, { "epoch": 0.7613399534142454, "grad_norm": 1.9633288945671508, "learning_rate": 1.4207840219273172e-06, "loss": 0.588, "step": 24841 }, { "epoch": 0.7613706019369867, "grad_norm": 1.837340614340622, "learning_rate": 1.420437480137361e-06, "loss": 0.3886, "step": 24842 }, { "epoch": 0.7614012504597278, "grad_norm": 1.607728553270485, "learning_rate": 1.4200909736177497e-06, "loss": 0.5569, "step": 24843 }, { "epoch": 0.761431898982469, "grad_norm": 2.470892865979138, "learning_rate": 1.419744502371897e-06, "loss": 0.6172, "step": 24844 }, { "epoch": 0.7614625475052103, "grad_norm": 2.072498506200291, "learning_rate": 1.4193980664032176e-06, "loss": 0.5541, "step": 24845 }, { "epoch": 0.7614931960279514, "grad_norm": 1.7408814855017278, "learning_rate": 1.4190516657151266e-06, "loss": 0.5833, "step": 24846 }, { "epoch": 0.7615238445506927, "grad_norm": 1.8169459880995604, "learning_rate": 1.4187053003110341e-06, "loss": 0.5726, "step": 24847 }, { "epoch": 0.7615544930734338, "grad_norm": 1.8851596592223785, "learning_rate": 1.4183589701943545e-06, "loss": 0.6063, "step": 24848 }, { "epoch": 0.7615851415961751, "grad_norm": 1.7660484446943578, "learning_rate": 1.4180126753685008e-06, "loss": 0.578, "step": 24849 }, { "epoch": 0.7616157901189162, "grad_norm": 1.8445433053567581, "learning_rate": 1.4176664158368857e-06, "loss": 0.6038, "step": 24850 }, { "epoch": 0.7616464386416575, "grad_norm": 1.832939456536706, "learning_rate": 1.4173201916029206e-06, "loss": 0.5757, "step": 24851 }, { "epoch": 0.7616770871643986, "grad_norm": 1.8365424999973972, "learning_rate": 1.4169740026700146e-06, "loss": 0.59, "step": 24852 }, { "epoch": 0.7617077356871399, "grad_norm": 1.7005357371855532, "learning_rate": 1.4166278490415802e-06, "loss": 0.5794, "step": 24853 }, { "epoch": 0.761738384209881, "grad_norm": 1.7602093771464948, "learning_rate": 1.4162817307210303e-06, "loss": 0.5338, "step": 24854 }, { "epoch": 0.7617690327326223, "grad_norm": 2.0513266136399984, "learning_rate": 1.4159356477117714e-06, "loss": 0.6258, "step": 24855 }, { "epoch": 0.7617996812553635, "grad_norm": 1.8379121534505374, "learning_rate": 1.4155896000172164e-06, "loss": 0.6696, "step": 24856 }, { "epoch": 0.7618303297781047, "grad_norm": 1.7129515427300948, "learning_rate": 1.4152435876407733e-06, "loss": 0.5297, "step": 24857 }, { "epoch": 0.7618609783008459, "grad_norm": 1.8524648750452837, "learning_rate": 1.4148976105858548e-06, "loss": 0.5596, "step": 24858 }, { "epoch": 0.7618916268235871, "grad_norm": 1.911838077409093, "learning_rate": 1.4145516688558669e-06, "loss": 0.5268, "step": 24859 }, { "epoch": 0.7619222753463283, "grad_norm": 1.7829234786685924, "learning_rate": 1.4142057624542156e-06, "loss": 0.6509, "step": 24860 }, { "epoch": 0.7619529238690695, "grad_norm": 1.8111287699714333, "learning_rate": 1.4138598913843154e-06, "loss": 0.5419, "step": 24861 }, { "epoch": 0.7619835723918107, "grad_norm": 1.9486554362360693, "learning_rate": 1.413514055649572e-06, "loss": 0.6206, "step": 24862 }, { "epoch": 0.762014220914552, "grad_norm": 1.9116549083595824, "learning_rate": 1.41316825525339e-06, "loss": 0.616, "step": 24863 }, { "epoch": 0.7620448694372931, "grad_norm": 1.8744680397346953, "learning_rate": 1.4128224901991788e-06, "loss": 0.688, "step": 24864 }, { "epoch": 0.7620755179600344, "grad_norm": 1.688018071807526, "learning_rate": 1.4124767604903472e-06, "loss": 0.5936, "step": 24865 }, { "epoch": 0.7621061664827755, "grad_norm": 2.0575074275335363, "learning_rate": 1.412131066130299e-06, "loss": 0.5957, "step": 24866 }, { "epoch": 0.7621368150055168, "grad_norm": 1.8706998932558057, "learning_rate": 1.4117854071224408e-06, "loss": 0.575, "step": 24867 }, { "epoch": 0.7621674635282579, "grad_norm": 1.8142252958157645, "learning_rate": 1.4114397834701788e-06, "loss": 0.5987, "step": 24868 }, { "epoch": 0.7621981120509992, "grad_norm": 1.6803621728130749, "learning_rate": 1.4110941951769208e-06, "loss": 0.5675, "step": 24869 }, { "epoch": 0.7622287605737403, "grad_norm": 1.9016181449495553, "learning_rate": 1.4107486422460698e-06, "loss": 0.6602, "step": 24870 }, { "epoch": 0.7622594090964816, "grad_norm": 1.862390536293591, "learning_rate": 1.4104031246810278e-06, "loss": 0.6097, "step": 24871 }, { "epoch": 0.7622900576192227, "grad_norm": 1.9153575412322905, "learning_rate": 1.4100576424852052e-06, "loss": 0.5965, "step": 24872 }, { "epoch": 0.762320706141964, "grad_norm": 2.030390655144899, "learning_rate": 1.4097121956620036e-06, "loss": 0.6209, "step": 24873 }, { "epoch": 0.7623513546647052, "grad_norm": 1.6499079821264857, "learning_rate": 1.4093667842148245e-06, "loss": 0.5878, "step": 24874 }, { "epoch": 0.7623820031874463, "grad_norm": 0.8382257019697181, "learning_rate": 1.4090214081470726e-06, "loss": 0.4023, "step": 24875 }, { "epoch": 0.7624126517101876, "grad_norm": 1.6823903597411678, "learning_rate": 1.4086760674621525e-06, "loss": 0.5896, "step": 24876 }, { "epoch": 0.7624433002329287, "grad_norm": 1.7707289787206142, "learning_rate": 1.4083307621634673e-06, "loss": 0.5531, "step": 24877 }, { "epoch": 0.76247394875567, "grad_norm": 2.023966947816422, "learning_rate": 1.407985492254416e-06, "loss": 0.5443, "step": 24878 }, { "epoch": 0.7625045972784111, "grad_norm": 1.9497429049593025, "learning_rate": 1.407640257738403e-06, "loss": 0.7019, "step": 24879 }, { "epoch": 0.7625352458011524, "grad_norm": 2.018745094943233, "learning_rate": 1.4072950586188316e-06, "loss": 0.5569, "step": 24880 }, { "epoch": 0.7625658943238935, "grad_norm": 1.9865936534014355, "learning_rate": 1.4069498948991007e-06, "loss": 0.5767, "step": 24881 }, { "epoch": 0.7625965428466348, "grad_norm": 1.5147020880424993, "learning_rate": 1.4066047665826089e-06, "loss": 0.513, "step": 24882 }, { "epoch": 0.762627191369376, "grad_norm": 1.7779868110485133, "learning_rate": 1.406259673672763e-06, "loss": 0.6013, "step": 24883 }, { "epoch": 0.7626578398921172, "grad_norm": 1.9107065378707624, "learning_rate": 1.405914616172958e-06, "loss": 0.5394, "step": 24884 }, { "epoch": 0.7626884884148584, "grad_norm": 1.8953103274434866, "learning_rate": 1.4055695940865983e-06, "loss": 0.6444, "step": 24885 }, { "epoch": 0.7627191369375996, "grad_norm": 1.949382250160696, "learning_rate": 1.405224607417079e-06, "loss": 0.5751, "step": 24886 }, { "epoch": 0.7627497854603408, "grad_norm": 1.8139128050511866, "learning_rate": 1.4048796561678012e-06, "loss": 0.6092, "step": 24887 }, { "epoch": 0.762780433983082, "grad_norm": 2.0030579117961325, "learning_rate": 1.4045347403421656e-06, "loss": 0.6021, "step": 24888 }, { "epoch": 0.7628110825058232, "grad_norm": 1.9322257169094863, "learning_rate": 1.4041898599435678e-06, "loss": 0.6244, "step": 24889 }, { "epoch": 0.7628417310285645, "grad_norm": 1.9079506309442842, "learning_rate": 1.4038450149754074e-06, "loss": 0.6374, "step": 24890 }, { "epoch": 0.7628723795513056, "grad_norm": 0.7751866672068466, "learning_rate": 1.4035002054410835e-06, "loss": 0.3769, "step": 24891 }, { "epoch": 0.7629030280740469, "grad_norm": 1.9289707920804815, "learning_rate": 1.4031554313439921e-06, "loss": 0.6611, "step": 24892 }, { "epoch": 0.762933676596788, "grad_norm": 1.7605791458647393, "learning_rate": 1.402810692687528e-06, "loss": 0.5367, "step": 24893 }, { "epoch": 0.7629643251195293, "grad_norm": 1.7971450227666037, "learning_rate": 1.402465989475093e-06, "loss": 0.517, "step": 24894 }, { "epoch": 0.7629949736422704, "grad_norm": 1.835132311164271, "learning_rate": 1.4021213217100805e-06, "loss": 0.5406, "step": 24895 }, { "epoch": 0.7630256221650117, "grad_norm": 1.9413492059729383, "learning_rate": 1.4017766893958878e-06, "loss": 0.6367, "step": 24896 }, { "epoch": 0.7630562706877528, "grad_norm": 2.0068634059008104, "learning_rate": 1.4014320925359086e-06, "loss": 0.6407, "step": 24897 }, { "epoch": 0.7630869192104941, "grad_norm": 2.009243072679063, "learning_rate": 1.4010875311335398e-06, "loss": 0.5591, "step": 24898 }, { "epoch": 0.7631175677332352, "grad_norm": 0.7946735595618257, "learning_rate": 1.4007430051921783e-06, "loss": 0.4045, "step": 24899 }, { "epoch": 0.7631482162559765, "grad_norm": 0.7926675857560941, "learning_rate": 1.400398514715215e-06, "loss": 0.4131, "step": 24900 }, { "epoch": 0.7631788647787177, "grad_norm": 0.8254913957398085, "learning_rate": 1.4000540597060463e-06, "loss": 0.42, "step": 24901 }, { "epoch": 0.7632095133014589, "grad_norm": 1.7655095874184261, "learning_rate": 1.3997096401680672e-06, "loss": 0.6443, "step": 24902 }, { "epoch": 0.7632401618242001, "grad_norm": 1.811723545465187, "learning_rate": 1.399365256104669e-06, "loss": 0.5312, "step": 24903 }, { "epoch": 0.7632708103469413, "grad_norm": 1.640536256960796, "learning_rate": 1.3990209075192473e-06, "loss": 0.4824, "step": 24904 }, { "epoch": 0.7633014588696825, "grad_norm": 2.0396752712735777, "learning_rate": 1.3986765944151932e-06, "loss": 0.6559, "step": 24905 }, { "epoch": 0.7633321073924236, "grad_norm": 1.8155598877926213, "learning_rate": 1.3983323167958996e-06, "loss": 0.6331, "step": 24906 }, { "epoch": 0.7633627559151649, "grad_norm": 1.6742470289880542, "learning_rate": 1.3979880746647607e-06, "loss": 0.6241, "step": 24907 }, { "epoch": 0.763393404437906, "grad_norm": 0.8016619679732142, "learning_rate": 1.3976438680251652e-06, "loss": 0.4129, "step": 24908 }, { "epoch": 0.7634240529606473, "grad_norm": 1.786936972968552, "learning_rate": 1.397299696880507e-06, "loss": 0.4815, "step": 24909 }, { "epoch": 0.7634547014833885, "grad_norm": 0.7904584006646125, "learning_rate": 1.3969555612341773e-06, "loss": 0.3966, "step": 24910 }, { "epoch": 0.7634853500061297, "grad_norm": 0.7664196777056085, "learning_rate": 1.3966114610895653e-06, "loss": 0.4025, "step": 24911 }, { "epoch": 0.7635159985288709, "grad_norm": 1.7386995880128968, "learning_rate": 1.3962673964500629e-06, "loss": 0.5752, "step": 24912 }, { "epoch": 0.7635466470516121, "grad_norm": 1.9574897453463687, "learning_rate": 1.3959233673190608e-06, "loss": 0.6648, "step": 24913 }, { "epoch": 0.7635772955743533, "grad_norm": 1.8033543790682454, "learning_rate": 1.3955793736999468e-06, "loss": 0.5454, "step": 24914 }, { "epoch": 0.7636079440970945, "grad_norm": 1.7921021169841689, "learning_rate": 1.3952354155961128e-06, "loss": 0.5919, "step": 24915 }, { "epoch": 0.7636385926198357, "grad_norm": 1.7940200854921244, "learning_rate": 1.3948914930109453e-06, "loss": 0.5398, "step": 24916 }, { "epoch": 0.763669241142577, "grad_norm": 1.8419970316868077, "learning_rate": 1.3945476059478336e-06, "loss": 0.6725, "step": 24917 }, { "epoch": 0.7636998896653181, "grad_norm": 1.7989633078344427, "learning_rate": 1.394203754410169e-06, "loss": 0.5062, "step": 24918 }, { "epoch": 0.7637305381880594, "grad_norm": 1.689869252045394, "learning_rate": 1.3938599384013357e-06, "loss": 0.5243, "step": 24919 }, { "epoch": 0.7637611867108005, "grad_norm": 1.7406261108926826, "learning_rate": 1.393516157924723e-06, "loss": 0.5876, "step": 24920 }, { "epoch": 0.7637918352335418, "grad_norm": 1.8659111949616365, "learning_rate": 1.3931724129837205e-06, "loss": 0.6036, "step": 24921 }, { "epoch": 0.7638224837562829, "grad_norm": 1.7840587962010874, "learning_rate": 1.3928287035817105e-06, "loss": 0.5602, "step": 24922 }, { "epoch": 0.7638531322790242, "grad_norm": 1.6784079624662644, "learning_rate": 1.3924850297220831e-06, "loss": 0.5253, "step": 24923 }, { "epoch": 0.7638837808017653, "grad_norm": 0.7978713679239939, "learning_rate": 1.3921413914082249e-06, "loss": 0.3901, "step": 24924 }, { "epoch": 0.7639144293245066, "grad_norm": 1.7761228431199294, "learning_rate": 1.391797788643519e-06, "loss": 0.552, "step": 24925 }, { "epoch": 0.7639450778472477, "grad_norm": 2.195269470754084, "learning_rate": 1.3914542214313547e-06, "loss": 0.7443, "step": 24926 }, { "epoch": 0.763975726369989, "grad_norm": 1.8290910732605856, "learning_rate": 1.391110689775113e-06, "loss": 0.4554, "step": 24927 }, { "epoch": 0.7640063748927302, "grad_norm": 1.6365743622737143, "learning_rate": 1.3907671936781814e-06, "loss": 0.4741, "step": 24928 }, { "epoch": 0.7640370234154714, "grad_norm": 0.7661233031689668, "learning_rate": 1.3904237331439457e-06, "loss": 0.3822, "step": 24929 }, { "epoch": 0.7640676719382126, "grad_norm": 1.7707143726336574, "learning_rate": 1.3900803081757875e-06, "loss": 0.5674, "step": 24930 }, { "epoch": 0.7640983204609538, "grad_norm": 1.8854113527872722, "learning_rate": 1.389736918777091e-06, "loss": 0.6393, "step": 24931 }, { "epoch": 0.764128968983695, "grad_norm": 0.7853501345542698, "learning_rate": 1.3893935649512419e-06, "loss": 0.382, "step": 24932 }, { "epoch": 0.7641596175064362, "grad_norm": 1.6968349376170786, "learning_rate": 1.3890502467016204e-06, "loss": 0.5008, "step": 24933 }, { "epoch": 0.7641902660291774, "grad_norm": 1.8135977750663692, "learning_rate": 1.3887069640316104e-06, "loss": 0.5901, "step": 24934 }, { "epoch": 0.7642209145519187, "grad_norm": 1.9253519340496636, "learning_rate": 1.3883637169445967e-06, "loss": 0.5547, "step": 24935 }, { "epoch": 0.7642515630746598, "grad_norm": 1.8723802923178783, "learning_rate": 1.3880205054439578e-06, "loss": 0.5727, "step": 24936 }, { "epoch": 0.764282211597401, "grad_norm": 1.6641551069489189, "learning_rate": 1.3876773295330782e-06, "loss": 0.5606, "step": 24937 }, { "epoch": 0.7643128601201422, "grad_norm": 2.186230490885531, "learning_rate": 1.3873341892153353e-06, "loss": 0.5894, "step": 24938 }, { "epoch": 0.7643435086428834, "grad_norm": 1.99684740516089, "learning_rate": 1.386991084494116e-06, "loss": 0.6365, "step": 24939 }, { "epoch": 0.7643741571656246, "grad_norm": 1.948341414064031, "learning_rate": 1.3866480153727978e-06, "loss": 0.5975, "step": 24940 }, { "epoch": 0.7644048056883658, "grad_norm": 2.082488809506079, "learning_rate": 1.3863049818547604e-06, "loss": 0.6778, "step": 24941 }, { "epoch": 0.764435454211107, "grad_norm": 1.8905383719837394, "learning_rate": 1.3859619839433836e-06, "loss": 0.5496, "step": 24942 }, { "epoch": 0.7644661027338482, "grad_norm": 1.679522309459, "learning_rate": 1.3856190216420501e-06, "loss": 0.5348, "step": 24943 }, { "epoch": 0.7644967512565894, "grad_norm": 1.9591898774045122, "learning_rate": 1.385276094954136e-06, "loss": 0.5959, "step": 24944 }, { "epoch": 0.7645273997793306, "grad_norm": 2.0046289658243404, "learning_rate": 1.384933203883021e-06, "loss": 0.6088, "step": 24945 }, { "epoch": 0.7645580483020719, "grad_norm": 1.6515472559803983, "learning_rate": 1.3845903484320855e-06, "loss": 0.595, "step": 24946 }, { "epoch": 0.764588696824813, "grad_norm": 1.87356018261785, "learning_rate": 1.3842475286047047e-06, "loss": 0.5425, "step": 24947 }, { "epoch": 0.7646193453475543, "grad_norm": 1.8574832173746858, "learning_rate": 1.3839047444042603e-06, "loss": 0.5503, "step": 24948 }, { "epoch": 0.7646499938702954, "grad_norm": 1.815715113601585, "learning_rate": 1.3835619958341257e-06, "loss": 0.5175, "step": 24949 }, { "epoch": 0.7646806423930367, "grad_norm": 1.7053597681107722, "learning_rate": 1.3832192828976798e-06, "loss": 0.647, "step": 24950 }, { "epoch": 0.7647112909157778, "grad_norm": 1.8839369254082865, "learning_rate": 1.3828766055983018e-06, "loss": 0.5997, "step": 24951 }, { "epoch": 0.7647419394385191, "grad_norm": 2.1259932020469527, "learning_rate": 1.3825339639393642e-06, "loss": 0.6395, "step": 24952 }, { "epoch": 0.7647725879612602, "grad_norm": 1.9538880732306139, "learning_rate": 1.3821913579242453e-06, "loss": 0.62, "step": 24953 }, { "epoch": 0.7648032364840015, "grad_norm": 1.8954641269447183, "learning_rate": 1.3818487875563218e-06, "loss": 0.599, "step": 24954 }, { "epoch": 0.7648338850067427, "grad_norm": 1.8581479147567135, "learning_rate": 1.3815062528389667e-06, "loss": 0.6401, "step": 24955 }, { "epoch": 0.7648645335294839, "grad_norm": 1.7585899772996905, "learning_rate": 1.381163753775558e-06, "loss": 0.5093, "step": 24956 }, { "epoch": 0.7648951820522251, "grad_norm": 1.9368746571468498, "learning_rate": 1.380821290369465e-06, "loss": 0.6063, "step": 24957 }, { "epoch": 0.7649258305749663, "grad_norm": 1.8212893547183602, "learning_rate": 1.3804788626240695e-06, "loss": 0.6481, "step": 24958 }, { "epoch": 0.7649564790977075, "grad_norm": 1.8471943574814345, "learning_rate": 1.3801364705427417e-06, "loss": 0.611, "step": 24959 }, { "epoch": 0.7649871276204487, "grad_norm": 1.6366354055195504, "learning_rate": 1.379794114128854e-06, "loss": 0.5077, "step": 24960 }, { "epoch": 0.7650177761431899, "grad_norm": 1.9886167066066627, "learning_rate": 1.379451793385781e-06, "loss": 0.5573, "step": 24961 }, { "epoch": 0.7650484246659311, "grad_norm": 1.7052721162589617, "learning_rate": 1.379109508316897e-06, "loss": 0.4718, "step": 24962 }, { "epoch": 0.7650790731886723, "grad_norm": 1.6573305060744423, "learning_rate": 1.3787672589255724e-06, "loss": 0.5949, "step": 24963 }, { "epoch": 0.7651097217114136, "grad_norm": 1.8623359259163002, "learning_rate": 1.3784250452151804e-06, "loss": 0.5605, "step": 24964 }, { "epoch": 0.7651403702341547, "grad_norm": 2.138568955772286, "learning_rate": 1.3780828671890933e-06, "loss": 0.5955, "step": 24965 }, { "epoch": 0.765171018756896, "grad_norm": 1.770606758917957, "learning_rate": 1.3777407248506835e-06, "loss": 0.5609, "step": 24966 }, { "epoch": 0.7652016672796371, "grad_norm": 1.6219610658808195, "learning_rate": 1.3773986182033216e-06, "loss": 0.6007, "step": 24967 }, { "epoch": 0.7652323158023783, "grad_norm": 2.062042401434007, "learning_rate": 1.377056547250375e-06, "loss": 0.5887, "step": 24968 }, { "epoch": 0.7652629643251195, "grad_norm": 1.7281666571258993, "learning_rate": 1.3767145119952196e-06, "loss": 0.578, "step": 24969 }, { "epoch": 0.7652936128478607, "grad_norm": 1.695549173866923, "learning_rate": 1.3763725124412236e-06, "loss": 0.5596, "step": 24970 }, { "epoch": 0.7653242613706019, "grad_norm": 0.8358313109499685, "learning_rate": 1.3760305485917553e-06, "loss": 0.3874, "step": 24971 }, { "epoch": 0.7653549098933431, "grad_norm": 1.9773760412023451, "learning_rate": 1.3756886204501845e-06, "loss": 0.6383, "step": 24972 }, { "epoch": 0.7653855584160844, "grad_norm": 1.776180197983091, "learning_rate": 1.375346728019883e-06, "loss": 0.5114, "step": 24973 }, { "epoch": 0.7654162069388255, "grad_norm": 1.7206429052393233, "learning_rate": 1.3750048713042159e-06, "loss": 0.5974, "step": 24974 }, { "epoch": 0.7654468554615668, "grad_norm": 1.8109806738816918, "learning_rate": 1.3746630503065532e-06, "loss": 0.5745, "step": 24975 }, { "epoch": 0.7654775039843079, "grad_norm": 1.6376184225140737, "learning_rate": 1.3743212650302629e-06, "loss": 0.4595, "step": 24976 }, { "epoch": 0.7655081525070492, "grad_norm": 1.9030023885816116, "learning_rate": 1.373979515478715e-06, "loss": 0.5463, "step": 24977 }, { "epoch": 0.7655388010297903, "grad_norm": 1.9851312509309034, "learning_rate": 1.3736378016552743e-06, "loss": 0.5474, "step": 24978 }, { "epoch": 0.7655694495525316, "grad_norm": 2.0011422820728906, "learning_rate": 1.3732961235633047e-06, "loss": 0.5119, "step": 24979 }, { "epoch": 0.7656000980752727, "grad_norm": 1.886319637574287, "learning_rate": 1.37295448120618e-06, "loss": 0.5238, "step": 24980 }, { "epoch": 0.765630746598014, "grad_norm": 1.835427342535112, "learning_rate": 1.3726128745872625e-06, "loss": 0.5593, "step": 24981 }, { "epoch": 0.7656613951207552, "grad_norm": 2.0234757492932007, "learning_rate": 1.3722713037099173e-06, "loss": 0.6276, "step": 24982 }, { "epoch": 0.7656920436434964, "grad_norm": 1.7706466914682515, "learning_rate": 1.3719297685775108e-06, "loss": 0.5303, "step": 24983 }, { "epoch": 0.7657226921662376, "grad_norm": 1.7069671747858945, "learning_rate": 1.3715882691934086e-06, "loss": 0.5875, "step": 24984 }, { "epoch": 0.7657533406889788, "grad_norm": 1.7221889681825104, "learning_rate": 1.3712468055609774e-06, "loss": 0.6354, "step": 24985 }, { "epoch": 0.76578398921172, "grad_norm": 1.7149581213550205, "learning_rate": 1.3709053776835778e-06, "loss": 0.5285, "step": 24986 }, { "epoch": 0.7658146377344612, "grad_norm": 0.7867504339292976, "learning_rate": 1.3705639855645764e-06, "loss": 0.3962, "step": 24987 }, { "epoch": 0.7658452862572024, "grad_norm": 1.9498518748650309, "learning_rate": 1.3702226292073385e-06, "loss": 0.6176, "step": 24988 }, { "epoch": 0.7658759347799436, "grad_norm": 1.810154040592565, "learning_rate": 1.3698813086152252e-06, "loss": 0.5494, "step": 24989 }, { "epoch": 0.7659065833026848, "grad_norm": 2.0106717950931112, "learning_rate": 1.3695400237915974e-06, "loss": 0.6785, "step": 24990 }, { "epoch": 0.7659372318254261, "grad_norm": 1.9206931515427932, "learning_rate": 1.369198774739824e-06, "loss": 0.6312, "step": 24991 }, { "epoch": 0.7659678803481672, "grad_norm": 1.8213263704124822, "learning_rate": 1.3688575614632616e-06, "loss": 0.5899, "step": 24992 }, { "epoch": 0.7659985288709085, "grad_norm": 1.925406979545765, "learning_rate": 1.368516383965277e-06, "loss": 0.594, "step": 24993 }, { "epoch": 0.7660291773936496, "grad_norm": 1.027416616118653, "learning_rate": 1.3681752422492279e-06, "loss": 0.397, "step": 24994 }, { "epoch": 0.7660598259163909, "grad_norm": 1.9375022515189784, "learning_rate": 1.3678341363184772e-06, "loss": 0.589, "step": 24995 }, { "epoch": 0.766090474439132, "grad_norm": 1.6775654046842896, "learning_rate": 1.367493066176388e-06, "loss": 0.5655, "step": 24996 }, { "epoch": 0.7661211229618733, "grad_norm": 2.0143985826993713, "learning_rate": 1.367152031826317e-06, "loss": 0.5462, "step": 24997 }, { "epoch": 0.7661517714846144, "grad_norm": 1.7436908061512446, "learning_rate": 1.3668110332716272e-06, "loss": 0.535, "step": 24998 }, { "epoch": 0.7661824200073556, "grad_norm": 1.7780124882162225, "learning_rate": 1.3664700705156792e-06, "loss": 0.5654, "step": 24999 }, { "epoch": 0.7662130685300969, "grad_norm": 1.8359017665130042, "learning_rate": 1.3661291435618307e-06, "loss": 0.5372, "step": 25000 }, { "epoch": 0.766243717052838, "grad_norm": 0.7599362439109247, "learning_rate": 1.3657882524134409e-06, "loss": 0.3919, "step": 25001 }, { "epoch": 0.7662743655755793, "grad_norm": 1.950051300816729, "learning_rate": 1.3654473970738684e-06, "loss": 0.617, "step": 25002 }, { "epoch": 0.7663050140983204, "grad_norm": 1.6064222133867156, "learning_rate": 1.3651065775464734e-06, "loss": 0.5919, "step": 25003 }, { "epoch": 0.7663356626210617, "grad_norm": 0.8083131392172285, "learning_rate": 1.3647657938346143e-06, "loss": 0.3939, "step": 25004 }, { "epoch": 0.7663663111438028, "grad_norm": 1.99104240718104, "learning_rate": 1.3644250459416464e-06, "loss": 0.5931, "step": 25005 }, { "epoch": 0.7663969596665441, "grad_norm": 1.849676338954872, "learning_rate": 1.3640843338709292e-06, "loss": 0.539, "step": 25006 }, { "epoch": 0.7664276081892852, "grad_norm": 1.9300284558530951, "learning_rate": 1.3637436576258206e-06, "loss": 0.6219, "step": 25007 }, { "epoch": 0.7664582567120265, "grad_norm": 1.787486475835619, "learning_rate": 1.3634030172096762e-06, "loss": 0.5299, "step": 25008 }, { "epoch": 0.7664889052347676, "grad_norm": 2.719130083603761, "learning_rate": 1.363062412625849e-06, "loss": 0.5836, "step": 25009 }, { "epoch": 0.7665195537575089, "grad_norm": 1.7826575998061274, "learning_rate": 1.3627218438777017e-06, "loss": 0.5106, "step": 25010 }, { "epoch": 0.7665502022802501, "grad_norm": 1.9679967817907902, "learning_rate": 1.3623813109685852e-06, "loss": 0.5539, "step": 25011 }, { "epoch": 0.7665808508029913, "grad_norm": 0.7929543058464603, "learning_rate": 1.3620408139018575e-06, "loss": 0.4067, "step": 25012 }, { "epoch": 0.7666114993257325, "grad_norm": 1.7411468072653273, "learning_rate": 1.3617003526808714e-06, "loss": 0.572, "step": 25013 }, { "epoch": 0.7666421478484737, "grad_norm": 1.9857293080083227, "learning_rate": 1.3613599273089828e-06, "loss": 0.6323, "step": 25014 }, { "epoch": 0.7666727963712149, "grad_norm": 1.760997552350771, "learning_rate": 1.361019537789547e-06, "loss": 0.532, "step": 25015 }, { "epoch": 0.7667034448939561, "grad_norm": 1.7303941977071142, "learning_rate": 1.3606791841259153e-06, "loss": 0.5672, "step": 25016 }, { "epoch": 0.7667340934166973, "grad_norm": 0.8585467630504745, "learning_rate": 1.360338866321443e-06, "loss": 0.42, "step": 25017 }, { "epoch": 0.7667647419394386, "grad_norm": 1.9813884293843076, "learning_rate": 1.359998584379484e-06, "loss": 0.7092, "step": 25018 }, { "epoch": 0.7667953904621797, "grad_norm": 1.912273989338425, "learning_rate": 1.3596583383033906e-06, "loss": 0.64, "step": 25019 }, { "epoch": 0.766826038984921, "grad_norm": 1.7765105189569934, "learning_rate": 1.3593181280965124e-06, "loss": 0.6446, "step": 25020 }, { "epoch": 0.7668566875076621, "grad_norm": 1.9998521420385176, "learning_rate": 1.3589779537622066e-06, "loss": 0.6255, "step": 25021 }, { "epoch": 0.7668873360304034, "grad_norm": 1.7671518244230278, "learning_rate": 1.3586378153038215e-06, "loss": 0.5569, "step": 25022 }, { "epoch": 0.7669179845531445, "grad_norm": 1.8177763045662474, "learning_rate": 1.358297712724711e-06, "loss": 0.5463, "step": 25023 }, { "epoch": 0.7669486330758858, "grad_norm": 0.7983133599028462, "learning_rate": 1.3579576460282234e-06, "loss": 0.3969, "step": 25024 }, { "epoch": 0.7669792815986269, "grad_norm": 1.5572272332400088, "learning_rate": 1.3576176152177112e-06, "loss": 0.5055, "step": 25025 }, { "epoch": 0.7670099301213682, "grad_norm": 1.6476886122946852, "learning_rate": 1.357277620296526e-06, "loss": 0.5769, "step": 25026 }, { "epoch": 0.7670405786441093, "grad_norm": 1.788360369831937, "learning_rate": 1.3569376612680146e-06, "loss": 0.5462, "step": 25027 }, { "epoch": 0.7670712271668506, "grad_norm": 2.0967009328534143, "learning_rate": 1.3565977381355288e-06, "loss": 0.6125, "step": 25028 }, { "epoch": 0.7671018756895918, "grad_norm": 0.8086664896128498, "learning_rate": 1.3562578509024194e-06, "loss": 0.3955, "step": 25029 }, { "epoch": 0.7671325242123329, "grad_norm": 2.2900778282330663, "learning_rate": 1.3559179995720318e-06, "loss": 0.6044, "step": 25030 }, { "epoch": 0.7671631727350742, "grad_norm": 1.8253323207265895, "learning_rate": 1.3555781841477167e-06, "loss": 0.5957, "step": 25031 }, { "epoch": 0.7671938212578153, "grad_norm": 1.990042988511186, "learning_rate": 1.3552384046328238e-06, "loss": 0.5792, "step": 25032 }, { "epoch": 0.7672244697805566, "grad_norm": 1.8001337975455454, "learning_rate": 1.354898661030698e-06, "loss": 0.5668, "step": 25033 }, { "epoch": 0.7672551183032977, "grad_norm": 1.8162724777850392, "learning_rate": 1.3545589533446897e-06, "loss": 0.6343, "step": 25034 }, { "epoch": 0.767285766826039, "grad_norm": 1.7864769511953467, "learning_rate": 1.354219281578143e-06, "loss": 0.5381, "step": 25035 }, { "epoch": 0.7673164153487801, "grad_norm": 2.0212939857628704, "learning_rate": 1.3538796457344066e-06, "loss": 0.5639, "step": 25036 }, { "epoch": 0.7673470638715214, "grad_norm": 1.8078154619502598, "learning_rate": 1.3535400458168292e-06, "loss": 0.6313, "step": 25037 }, { "epoch": 0.7673777123942626, "grad_norm": 1.619304183263403, "learning_rate": 1.3532004818287525e-06, "loss": 0.5847, "step": 25038 }, { "epoch": 0.7674083609170038, "grad_norm": 1.717259727056899, "learning_rate": 1.3528609537735244e-06, "loss": 0.592, "step": 25039 }, { "epoch": 0.767439009439745, "grad_norm": 1.9390771563359432, "learning_rate": 1.3525214616544924e-06, "loss": 0.656, "step": 25040 }, { "epoch": 0.7674696579624862, "grad_norm": 1.9650551100575124, "learning_rate": 1.3521820054749979e-06, "loss": 0.6183, "step": 25041 }, { "epoch": 0.7675003064852274, "grad_norm": 0.8305294997430789, "learning_rate": 1.3518425852383876e-06, "loss": 0.4051, "step": 25042 }, { "epoch": 0.7675309550079686, "grad_norm": 1.8285480704083557, "learning_rate": 1.3515032009480072e-06, "loss": 0.5441, "step": 25043 }, { "epoch": 0.7675616035307098, "grad_norm": 1.8739123266019768, "learning_rate": 1.351163852607198e-06, "loss": 0.5983, "step": 25044 }, { "epoch": 0.767592252053451, "grad_norm": 1.921740929509455, "learning_rate": 1.3508245402193065e-06, "loss": 0.6149, "step": 25045 }, { "epoch": 0.7676229005761922, "grad_norm": 1.762178114112741, "learning_rate": 1.3504852637876725e-06, "loss": 0.5213, "step": 25046 }, { "epoch": 0.7676535490989335, "grad_norm": 1.9797529596866856, "learning_rate": 1.350146023315641e-06, "loss": 0.5582, "step": 25047 }, { "epoch": 0.7676841976216746, "grad_norm": 1.860893228261341, "learning_rate": 1.3498068188065566e-06, "loss": 0.608, "step": 25048 }, { "epoch": 0.7677148461444159, "grad_norm": 1.8422089062812523, "learning_rate": 1.3494676502637577e-06, "loss": 0.6117, "step": 25049 }, { "epoch": 0.767745494667157, "grad_norm": 1.7808569612585983, "learning_rate": 1.3491285176905882e-06, "loss": 0.6052, "step": 25050 }, { "epoch": 0.7677761431898983, "grad_norm": 0.7685529704192198, "learning_rate": 1.3487894210903907e-06, "loss": 0.393, "step": 25051 }, { "epoch": 0.7678067917126394, "grad_norm": 2.075117551483464, "learning_rate": 1.3484503604665038e-06, "loss": 0.5736, "step": 25052 }, { "epoch": 0.7678374402353807, "grad_norm": 0.7569987195139367, "learning_rate": 1.3481113358222718e-06, "loss": 0.3773, "step": 25053 }, { "epoch": 0.7678680887581218, "grad_norm": 0.7638078820690204, "learning_rate": 1.3477723471610315e-06, "loss": 0.4005, "step": 25054 }, { "epoch": 0.7678987372808631, "grad_norm": 2.013138505678402, "learning_rate": 1.3474333944861245e-06, "loss": 0.65, "step": 25055 }, { "epoch": 0.7679293858036043, "grad_norm": 2.008261487119537, "learning_rate": 1.347094477800892e-06, "loss": 0.6294, "step": 25056 }, { "epoch": 0.7679600343263455, "grad_norm": 1.7038356978473588, "learning_rate": 1.3467555971086715e-06, "loss": 0.5537, "step": 25057 }, { "epoch": 0.7679906828490867, "grad_norm": 1.8802323208424283, "learning_rate": 1.3464167524128025e-06, "loss": 0.5806, "step": 25058 }, { "epoch": 0.7680213313718279, "grad_norm": 2.113386761652852, "learning_rate": 1.3460779437166255e-06, "loss": 0.6419, "step": 25059 }, { "epoch": 0.7680519798945691, "grad_norm": 1.886147002869556, "learning_rate": 1.3457391710234757e-06, "loss": 0.6225, "step": 25060 }, { "epoch": 0.7680826284173102, "grad_norm": 1.8252843183226588, "learning_rate": 1.3454004343366932e-06, "loss": 0.5984, "step": 25061 }, { "epoch": 0.7681132769400515, "grad_norm": 1.853061330908429, "learning_rate": 1.3450617336596166e-06, "loss": 0.6196, "step": 25062 }, { "epoch": 0.7681439254627926, "grad_norm": 1.8127145024461528, "learning_rate": 1.3447230689955803e-06, "loss": 0.6087, "step": 25063 }, { "epoch": 0.7681745739855339, "grad_norm": 2.2738084993950016, "learning_rate": 1.3443844403479244e-06, "loss": 0.6186, "step": 25064 }, { "epoch": 0.768205222508275, "grad_norm": 1.71226716776165, "learning_rate": 1.3440458477199813e-06, "loss": 0.6042, "step": 25065 }, { "epoch": 0.7682358710310163, "grad_norm": 3.195409046615833, "learning_rate": 1.3437072911150927e-06, "loss": 0.5162, "step": 25066 }, { "epoch": 0.7682665195537575, "grad_norm": 1.6488650195804755, "learning_rate": 1.3433687705365917e-06, "loss": 0.4884, "step": 25067 }, { "epoch": 0.7682971680764987, "grad_norm": 1.7740319310596602, "learning_rate": 1.3430302859878119e-06, "loss": 0.6398, "step": 25068 }, { "epoch": 0.7683278165992399, "grad_norm": 1.9768840022584226, "learning_rate": 1.3426918374720904e-06, "loss": 0.5974, "step": 25069 }, { "epoch": 0.7683584651219811, "grad_norm": 2.0338426040367183, "learning_rate": 1.342353424992764e-06, "loss": 0.6033, "step": 25070 }, { "epoch": 0.7683891136447223, "grad_norm": 1.8781727426325283, "learning_rate": 1.342015048553163e-06, "loss": 0.5501, "step": 25071 }, { "epoch": 0.7684197621674635, "grad_norm": 2.0023400400374993, "learning_rate": 1.3416767081566244e-06, "loss": 0.6245, "step": 25072 }, { "epoch": 0.7684504106902047, "grad_norm": 1.9989378005295733, "learning_rate": 1.3413384038064826e-06, "loss": 0.5749, "step": 25073 }, { "epoch": 0.768481059212946, "grad_norm": 0.7806923124017259, "learning_rate": 1.3410001355060676e-06, "loss": 0.3941, "step": 25074 }, { "epoch": 0.7685117077356871, "grad_norm": 1.7957500311022772, "learning_rate": 1.340661903258716e-06, "loss": 0.581, "step": 25075 }, { "epoch": 0.7685423562584284, "grad_norm": 1.6993892467560625, "learning_rate": 1.340323707067756e-06, "loss": 0.5803, "step": 25076 }, { "epoch": 0.7685730047811695, "grad_norm": 1.8703205882867806, "learning_rate": 1.3399855469365264e-06, "loss": 0.5944, "step": 25077 }, { "epoch": 0.7686036533039108, "grad_norm": 1.7396213374482372, "learning_rate": 1.3396474228683554e-06, "loss": 0.5411, "step": 25078 }, { "epoch": 0.7686343018266519, "grad_norm": 0.8706865333779221, "learning_rate": 1.3393093348665732e-06, "loss": 0.3932, "step": 25079 }, { "epoch": 0.7686649503493932, "grad_norm": 1.9011549408745643, "learning_rate": 1.338971282934513e-06, "loss": 0.5766, "step": 25080 }, { "epoch": 0.7686955988721343, "grad_norm": 2.0004899350120904, "learning_rate": 1.3386332670755065e-06, "loss": 0.581, "step": 25081 }, { "epoch": 0.7687262473948756, "grad_norm": 1.925578895948059, "learning_rate": 1.3382952872928823e-06, "loss": 0.6468, "step": 25082 }, { "epoch": 0.7687568959176168, "grad_norm": 1.6865581073122644, "learning_rate": 1.3379573435899713e-06, "loss": 0.5501, "step": 25083 }, { "epoch": 0.768787544440358, "grad_norm": 1.7667782829708678, "learning_rate": 1.3376194359701034e-06, "loss": 0.5419, "step": 25084 }, { "epoch": 0.7688181929630992, "grad_norm": 0.8015191028894406, "learning_rate": 1.33728156443661e-06, "loss": 0.3769, "step": 25085 }, { "epoch": 0.7688488414858404, "grad_norm": 1.6295311830599521, "learning_rate": 1.3369437289928184e-06, "loss": 0.6247, "step": 25086 }, { "epoch": 0.7688794900085816, "grad_norm": 0.8354130804991641, "learning_rate": 1.336605929642056e-06, "loss": 0.3883, "step": 25087 }, { "epoch": 0.7689101385313228, "grad_norm": 1.7274948630613522, "learning_rate": 1.3362681663876526e-06, "loss": 0.5009, "step": 25088 }, { "epoch": 0.768940787054064, "grad_norm": 2.028807509612038, "learning_rate": 1.3359304392329374e-06, "loss": 0.629, "step": 25089 }, { "epoch": 0.7689714355768053, "grad_norm": 1.625224182401249, "learning_rate": 1.3355927481812364e-06, "loss": 0.5199, "step": 25090 }, { "epoch": 0.7690020840995464, "grad_norm": 1.815427830371705, "learning_rate": 1.335255093235877e-06, "loss": 0.5125, "step": 25091 }, { "epoch": 0.7690327326222876, "grad_norm": 1.919661129742888, "learning_rate": 1.3349174744001875e-06, "loss": 0.6936, "step": 25092 }, { "epoch": 0.7690633811450288, "grad_norm": 0.7881825747876735, "learning_rate": 1.334579891677495e-06, "loss": 0.395, "step": 25093 }, { "epoch": 0.76909402966777, "grad_norm": 1.966538782669029, "learning_rate": 1.3342423450711246e-06, "loss": 0.5661, "step": 25094 }, { "epoch": 0.7691246781905112, "grad_norm": 0.8403403723897411, "learning_rate": 1.3339048345843992e-06, "loss": 0.4286, "step": 25095 }, { "epoch": 0.7691553267132524, "grad_norm": 1.5687302660532834, "learning_rate": 1.3335673602206506e-06, "loss": 0.5659, "step": 25096 }, { "epoch": 0.7691859752359936, "grad_norm": 1.8524029452859812, "learning_rate": 1.3332299219832011e-06, "loss": 0.5341, "step": 25097 }, { "epoch": 0.7692166237587348, "grad_norm": 1.8367280180236798, "learning_rate": 1.3328925198753739e-06, "loss": 0.6201, "step": 25098 }, { "epoch": 0.769247272281476, "grad_norm": 1.8121280535976945, "learning_rate": 1.332555153900495e-06, "loss": 0.5387, "step": 25099 }, { "epoch": 0.7692779208042172, "grad_norm": 1.7398149466410464, "learning_rate": 1.3322178240618893e-06, "loss": 0.525, "step": 25100 }, { "epoch": 0.7693085693269585, "grad_norm": 1.8021509807465161, "learning_rate": 1.331880530362879e-06, "loss": 0.5587, "step": 25101 }, { "epoch": 0.7693392178496996, "grad_norm": 0.8214413065993182, "learning_rate": 1.3315432728067884e-06, "loss": 0.4166, "step": 25102 }, { "epoch": 0.7693698663724409, "grad_norm": 1.746528440108579, "learning_rate": 1.3312060513969399e-06, "loss": 0.5537, "step": 25103 }, { "epoch": 0.769400514895182, "grad_norm": 2.0468201883701673, "learning_rate": 1.3308688661366592e-06, "loss": 0.4943, "step": 25104 }, { "epoch": 0.7694311634179233, "grad_norm": 1.940709993400627, "learning_rate": 1.3305317170292658e-06, "loss": 0.6103, "step": 25105 }, { "epoch": 0.7694618119406644, "grad_norm": 1.609875934352975, "learning_rate": 1.3301946040780794e-06, "loss": 0.5031, "step": 25106 }, { "epoch": 0.7694924604634057, "grad_norm": 2.074506825863726, "learning_rate": 1.3298575272864277e-06, "loss": 0.6479, "step": 25107 }, { "epoch": 0.7695231089861468, "grad_norm": 0.7651906106961331, "learning_rate": 1.3295204866576289e-06, "loss": 0.4197, "step": 25108 }, { "epoch": 0.7695537575088881, "grad_norm": 1.915782533030814, "learning_rate": 1.3291834821950017e-06, "loss": 0.5425, "step": 25109 }, { "epoch": 0.7695844060316293, "grad_norm": 1.762282345589327, "learning_rate": 1.3288465139018696e-06, "loss": 0.6507, "step": 25110 }, { "epoch": 0.7696150545543705, "grad_norm": 0.8029195064117757, "learning_rate": 1.3285095817815518e-06, "loss": 0.3927, "step": 25111 }, { "epoch": 0.7696457030771117, "grad_norm": 1.9333618870473308, "learning_rate": 1.32817268583737e-06, "loss": 0.5521, "step": 25112 }, { "epoch": 0.7696763515998529, "grad_norm": 1.790951473767084, "learning_rate": 1.3278358260726409e-06, "loss": 0.6472, "step": 25113 }, { "epoch": 0.7697070001225941, "grad_norm": 1.9455523266378874, "learning_rate": 1.3274990024906847e-06, "loss": 0.5794, "step": 25114 }, { "epoch": 0.7697376486453353, "grad_norm": 1.875695449893981, "learning_rate": 1.3271622150948222e-06, "loss": 0.5504, "step": 25115 }, { "epoch": 0.7697682971680765, "grad_norm": 0.8183999305818805, "learning_rate": 1.32682546388837e-06, "loss": 0.4152, "step": 25116 }, { "epoch": 0.7697989456908177, "grad_norm": 1.7975350093648113, "learning_rate": 1.326488748874643e-06, "loss": 0.6007, "step": 25117 }, { "epoch": 0.7698295942135589, "grad_norm": 0.8110545878491089, "learning_rate": 1.3261520700569658e-06, "loss": 0.3898, "step": 25118 }, { "epoch": 0.7698602427363002, "grad_norm": 1.7414977385531847, "learning_rate": 1.3258154274386502e-06, "loss": 0.4696, "step": 25119 }, { "epoch": 0.7698908912590413, "grad_norm": 0.7932710059629492, "learning_rate": 1.3254788210230175e-06, "loss": 0.4128, "step": 25120 }, { "epoch": 0.7699215397817826, "grad_norm": 1.7691356688390245, "learning_rate": 1.3251422508133805e-06, "loss": 0.6126, "step": 25121 }, { "epoch": 0.7699521883045237, "grad_norm": 1.4792288323854283, "learning_rate": 1.3248057168130567e-06, "loss": 0.4834, "step": 25122 }, { "epoch": 0.7699828368272649, "grad_norm": 0.8204129790629034, "learning_rate": 1.3244692190253644e-06, "loss": 0.4004, "step": 25123 }, { "epoch": 0.7700134853500061, "grad_norm": 1.784837306127818, "learning_rate": 1.3241327574536157e-06, "loss": 0.5451, "step": 25124 }, { "epoch": 0.7700441338727473, "grad_norm": 2.021496175885507, "learning_rate": 1.3237963321011272e-06, "loss": 0.5835, "step": 25125 }, { "epoch": 0.7700747823954885, "grad_norm": 1.8109332020422575, "learning_rate": 1.3234599429712159e-06, "loss": 0.5138, "step": 25126 }, { "epoch": 0.7701054309182297, "grad_norm": 1.9754487999011936, "learning_rate": 1.323123590067194e-06, "loss": 0.608, "step": 25127 }, { "epoch": 0.770136079440971, "grad_norm": 1.832800986833745, "learning_rate": 1.3227872733923736e-06, "loss": 0.6143, "step": 25128 }, { "epoch": 0.7701667279637121, "grad_norm": 1.8035724319220423, "learning_rate": 1.322450992950074e-06, "loss": 0.5332, "step": 25129 }, { "epoch": 0.7701973764864534, "grad_norm": 2.1915920190995686, "learning_rate": 1.3221147487436036e-06, "loss": 0.6606, "step": 25130 }, { "epoch": 0.7702280250091945, "grad_norm": 1.8691302055977495, "learning_rate": 1.3217785407762801e-06, "loss": 0.5488, "step": 25131 }, { "epoch": 0.7702586735319358, "grad_norm": 1.927916313368889, "learning_rate": 1.3214423690514117e-06, "loss": 0.5455, "step": 25132 }, { "epoch": 0.7702893220546769, "grad_norm": 1.5863815787030369, "learning_rate": 1.3211062335723128e-06, "loss": 0.561, "step": 25133 }, { "epoch": 0.7703199705774182, "grad_norm": 1.7923042079264104, "learning_rate": 1.3207701343422968e-06, "loss": 0.6026, "step": 25134 }, { "epoch": 0.7703506191001593, "grad_norm": 1.5633297656603813, "learning_rate": 1.320434071364673e-06, "loss": 0.5888, "step": 25135 }, { "epoch": 0.7703812676229006, "grad_norm": 1.9189576524866003, "learning_rate": 1.3200980446427536e-06, "loss": 0.6122, "step": 25136 }, { "epoch": 0.7704119161456418, "grad_norm": 1.509399055705542, "learning_rate": 1.3197620541798512e-06, "loss": 0.4344, "step": 25137 }, { "epoch": 0.770442564668383, "grad_norm": 1.9333745857123787, "learning_rate": 1.319426099979273e-06, "loss": 0.5742, "step": 25138 }, { "epoch": 0.7704732131911242, "grad_norm": 2.0369775517964985, "learning_rate": 1.319090182044333e-06, "loss": 0.5803, "step": 25139 }, { "epoch": 0.7705038617138654, "grad_norm": 1.7864622844327716, "learning_rate": 1.3187543003783383e-06, "loss": 0.5486, "step": 25140 }, { "epoch": 0.7705345102366066, "grad_norm": 2.037153688783522, "learning_rate": 1.3184184549845985e-06, "loss": 0.5967, "step": 25141 }, { "epoch": 0.7705651587593478, "grad_norm": 1.89530205627128, "learning_rate": 1.3180826458664253e-06, "loss": 0.5977, "step": 25142 }, { "epoch": 0.770595807282089, "grad_norm": 1.9854668209791273, "learning_rate": 1.3177468730271247e-06, "loss": 0.5757, "step": 25143 }, { "epoch": 0.7706264558048302, "grad_norm": 2.0809534690920857, "learning_rate": 1.317411136470006e-06, "loss": 0.5248, "step": 25144 }, { "epoch": 0.7706571043275714, "grad_norm": 1.8484533812370452, "learning_rate": 1.317075436198379e-06, "loss": 0.5758, "step": 25145 }, { "epoch": 0.7706877528503127, "grad_norm": 1.7770667989146467, "learning_rate": 1.316739772215549e-06, "loss": 0.534, "step": 25146 }, { "epoch": 0.7707184013730538, "grad_norm": 1.506135802539274, "learning_rate": 1.3164041445248244e-06, "loss": 0.4542, "step": 25147 }, { "epoch": 0.7707490498957951, "grad_norm": 0.7690890612129693, "learning_rate": 1.3160685531295132e-06, "loss": 0.3907, "step": 25148 }, { "epoch": 0.7707796984185362, "grad_norm": 2.0235112769192782, "learning_rate": 1.3157329980329204e-06, "loss": 0.6204, "step": 25149 }, { "epoch": 0.7708103469412775, "grad_norm": 1.7597133967173135, "learning_rate": 1.315397479238354e-06, "loss": 0.5186, "step": 25150 }, { "epoch": 0.7708409954640186, "grad_norm": 1.9863582323859232, "learning_rate": 1.3150619967491173e-06, "loss": 0.5904, "step": 25151 }, { "epoch": 0.7708716439867599, "grad_norm": 0.791352680816449, "learning_rate": 1.314726550568518e-06, "loss": 0.4154, "step": 25152 }, { "epoch": 0.770902292509501, "grad_norm": 0.8473609365074987, "learning_rate": 1.3143911406998621e-06, "loss": 0.3945, "step": 25153 }, { "epoch": 0.7709329410322422, "grad_norm": 0.8225721666440826, "learning_rate": 1.3140557671464522e-06, "loss": 0.4036, "step": 25154 }, { "epoch": 0.7709635895549835, "grad_norm": 1.8896029275394945, "learning_rate": 1.3137204299115935e-06, "loss": 0.5708, "step": 25155 }, { "epoch": 0.7709942380777246, "grad_norm": 1.8649557654145839, "learning_rate": 1.3133851289985922e-06, "loss": 0.5994, "step": 25156 }, { "epoch": 0.7710248866004659, "grad_norm": 0.7971353070794513, "learning_rate": 1.3130498644107492e-06, "loss": 0.4011, "step": 25157 }, { "epoch": 0.771055535123207, "grad_norm": 1.7942397781359911, "learning_rate": 1.3127146361513687e-06, "loss": 0.6507, "step": 25158 }, { "epoch": 0.7710861836459483, "grad_norm": 2.2917177853131223, "learning_rate": 1.3123794442237564e-06, "loss": 0.5664, "step": 25159 }, { "epoch": 0.7711168321686894, "grad_norm": 1.7080906310100712, "learning_rate": 1.3120442886312113e-06, "loss": 0.5749, "step": 25160 }, { "epoch": 0.7711474806914307, "grad_norm": 2.0310268340319273, "learning_rate": 1.311709169377039e-06, "loss": 0.5817, "step": 25161 }, { "epoch": 0.7711781292141718, "grad_norm": 1.6217881013690547, "learning_rate": 1.311374086464538e-06, "loss": 0.5429, "step": 25162 }, { "epoch": 0.7712087777369131, "grad_norm": 0.7896038262247373, "learning_rate": 1.3110390398970124e-06, "loss": 0.3994, "step": 25163 }, { "epoch": 0.7712394262596542, "grad_norm": 1.9672898781066392, "learning_rate": 1.3107040296777645e-06, "loss": 0.6162, "step": 25164 }, { "epoch": 0.7712700747823955, "grad_norm": 1.7744119921002077, "learning_rate": 1.3103690558100918e-06, "loss": 0.6073, "step": 25165 }, { "epoch": 0.7713007233051367, "grad_norm": 1.833354219029408, "learning_rate": 1.310034118297297e-06, "loss": 0.5396, "step": 25166 }, { "epoch": 0.7713313718278779, "grad_norm": 1.6388928764327004, "learning_rate": 1.309699217142682e-06, "loss": 0.5945, "step": 25167 }, { "epoch": 0.7713620203506191, "grad_norm": 1.782935290583931, "learning_rate": 1.3093643523495432e-06, "loss": 0.468, "step": 25168 }, { "epoch": 0.7713926688733603, "grad_norm": 2.038814206302105, "learning_rate": 1.3090295239211815e-06, "loss": 0.5833, "step": 25169 }, { "epoch": 0.7714233173961015, "grad_norm": 2.0092808887208506, "learning_rate": 1.3086947318608977e-06, "loss": 0.5617, "step": 25170 }, { "epoch": 0.7714539659188427, "grad_norm": 1.7690182965060646, "learning_rate": 1.3083599761719878e-06, "loss": 0.6258, "step": 25171 }, { "epoch": 0.7714846144415839, "grad_norm": 2.0182097956848852, "learning_rate": 1.3080252568577534e-06, "loss": 0.637, "step": 25172 }, { "epoch": 0.7715152629643252, "grad_norm": 0.8025354782342626, "learning_rate": 1.3076905739214873e-06, "loss": 0.4046, "step": 25173 }, { "epoch": 0.7715459114870663, "grad_norm": 1.8497369097816874, "learning_rate": 1.3073559273664937e-06, "loss": 0.63, "step": 25174 }, { "epoch": 0.7715765600098076, "grad_norm": 2.13286544703499, "learning_rate": 1.3070213171960672e-06, "loss": 0.6645, "step": 25175 }, { "epoch": 0.7716072085325487, "grad_norm": 0.8363528542835371, "learning_rate": 1.3066867434135033e-06, "loss": 0.3978, "step": 25176 }, { "epoch": 0.77163785705529, "grad_norm": 0.8049792058780855, "learning_rate": 1.3063522060220995e-06, "loss": 0.4244, "step": 25177 }, { "epoch": 0.7716685055780311, "grad_norm": 1.5755743430991784, "learning_rate": 1.3060177050251537e-06, "loss": 0.5443, "step": 25178 }, { "epoch": 0.7716991541007724, "grad_norm": 1.7294873312009889, "learning_rate": 1.3056832404259596e-06, "loss": 0.5953, "step": 25179 }, { "epoch": 0.7717298026235135, "grad_norm": 1.6418347677673977, "learning_rate": 1.3053488122278136e-06, "loss": 0.5997, "step": 25180 }, { "epoch": 0.7717604511462548, "grad_norm": 1.9289396557148661, "learning_rate": 1.3050144204340127e-06, "loss": 0.579, "step": 25181 }, { "epoch": 0.771791099668996, "grad_norm": 1.9233917959961448, "learning_rate": 1.3046800650478487e-06, "loss": 0.5682, "step": 25182 }, { "epoch": 0.7718217481917372, "grad_norm": 1.8942675680410215, "learning_rate": 1.304345746072619e-06, "loss": 0.5506, "step": 25183 }, { "epoch": 0.7718523967144784, "grad_norm": 1.957887222545395, "learning_rate": 1.3040114635116141e-06, "loss": 0.6125, "step": 25184 }, { "epoch": 0.7718830452372195, "grad_norm": 0.7642759241438386, "learning_rate": 1.3036772173681306e-06, "loss": 0.3944, "step": 25185 }, { "epoch": 0.7719136937599608, "grad_norm": 1.7021915654102604, "learning_rate": 1.3033430076454623e-06, "loss": 0.5466, "step": 25186 }, { "epoch": 0.7719443422827019, "grad_norm": 1.9473032555941399, "learning_rate": 1.3030088343469e-06, "loss": 0.6465, "step": 25187 }, { "epoch": 0.7719749908054432, "grad_norm": 1.7494982120386964, "learning_rate": 1.3026746974757376e-06, "loss": 0.642, "step": 25188 }, { "epoch": 0.7720056393281843, "grad_norm": 0.7908975972930995, "learning_rate": 1.3023405970352688e-06, "loss": 0.4152, "step": 25189 }, { "epoch": 0.7720362878509256, "grad_norm": 1.8476623029710508, "learning_rate": 1.3020065330287823e-06, "loss": 0.6144, "step": 25190 }, { "epoch": 0.7720669363736667, "grad_norm": 0.7830730121747156, "learning_rate": 1.3016725054595737e-06, "loss": 0.3953, "step": 25191 }, { "epoch": 0.772097584896408, "grad_norm": 1.9213237452983285, "learning_rate": 1.3013385143309287e-06, "loss": 0.5621, "step": 25192 }, { "epoch": 0.7721282334191492, "grad_norm": 0.7936744575656439, "learning_rate": 1.3010045596461451e-06, "loss": 0.408, "step": 25193 }, { "epoch": 0.7721588819418904, "grad_norm": 0.8207347864617894, "learning_rate": 1.3006706414085096e-06, "loss": 0.3972, "step": 25194 }, { "epoch": 0.7721895304646316, "grad_norm": 0.8657235775280551, "learning_rate": 1.3003367596213113e-06, "loss": 0.3939, "step": 25195 }, { "epoch": 0.7722201789873728, "grad_norm": 0.792065730121645, "learning_rate": 1.3000029142878417e-06, "loss": 0.4046, "step": 25196 }, { "epoch": 0.772250827510114, "grad_norm": 1.9022495239892507, "learning_rate": 1.2996691054113913e-06, "loss": 0.7143, "step": 25197 }, { "epoch": 0.7722814760328552, "grad_norm": 1.9848827833093392, "learning_rate": 1.2993353329952468e-06, "loss": 0.5737, "step": 25198 }, { "epoch": 0.7723121245555964, "grad_norm": 1.5425531534981816, "learning_rate": 1.2990015970426984e-06, "loss": 0.4954, "step": 25199 }, { "epoch": 0.7723427730783377, "grad_norm": 1.6903082681663735, "learning_rate": 1.298667897557035e-06, "loss": 0.6065, "step": 25200 }, { "epoch": 0.7723734216010788, "grad_norm": 2.1602511735600536, "learning_rate": 1.298334234541543e-06, "loss": 0.5396, "step": 25201 }, { "epoch": 0.7724040701238201, "grad_norm": 1.9329211569858877, "learning_rate": 1.2980006079995117e-06, "loss": 0.7009, "step": 25202 }, { "epoch": 0.7724347186465612, "grad_norm": 1.922467576349303, "learning_rate": 1.2976670179342248e-06, "loss": 0.6093, "step": 25203 }, { "epoch": 0.7724653671693025, "grad_norm": 1.8552669638655526, "learning_rate": 1.2973334643489754e-06, "loss": 0.5468, "step": 25204 }, { "epoch": 0.7724960156920436, "grad_norm": 2.1404570232685574, "learning_rate": 1.2969999472470468e-06, "loss": 0.6461, "step": 25205 }, { "epoch": 0.7725266642147849, "grad_norm": 1.9919472544290657, "learning_rate": 1.2966664666317237e-06, "loss": 0.5756, "step": 25206 }, { "epoch": 0.772557312737526, "grad_norm": 0.814252881528571, "learning_rate": 1.2963330225062925e-06, "loss": 0.4051, "step": 25207 }, { "epoch": 0.7725879612602673, "grad_norm": 1.6621817585924008, "learning_rate": 1.2959996148740423e-06, "loss": 0.5053, "step": 25208 }, { "epoch": 0.7726186097830084, "grad_norm": 1.7943528800810638, "learning_rate": 1.2956662437382534e-06, "loss": 0.5495, "step": 25209 }, { "epoch": 0.7726492583057497, "grad_norm": 1.8072410650464157, "learning_rate": 1.2953329091022131e-06, "loss": 0.6299, "step": 25210 }, { "epoch": 0.7726799068284909, "grad_norm": 1.787450650374374, "learning_rate": 1.2949996109692053e-06, "loss": 0.5822, "step": 25211 }, { "epoch": 0.7727105553512321, "grad_norm": 0.8058785729101846, "learning_rate": 1.294666349342516e-06, "loss": 0.4152, "step": 25212 }, { "epoch": 0.7727412038739733, "grad_norm": 1.816776852496896, "learning_rate": 1.294333124225427e-06, "loss": 0.626, "step": 25213 }, { "epoch": 0.7727718523967145, "grad_norm": 1.8179558327302203, "learning_rate": 1.2939999356212191e-06, "loss": 0.6287, "step": 25214 }, { "epoch": 0.7728025009194557, "grad_norm": 1.8180649546110772, "learning_rate": 1.2936667835331813e-06, "loss": 0.5782, "step": 25215 }, { "epoch": 0.7728331494421968, "grad_norm": 1.818864871990556, "learning_rate": 1.2933336679645925e-06, "loss": 0.5422, "step": 25216 }, { "epoch": 0.7728637979649381, "grad_norm": 1.8181561033079656, "learning_rate": 1.2930005889187342e-06, "loss": 0.4776, "step": 25217 }, { "epoch": 0.7728944464876792, "grad_norm": 1.9485803077646444, "learning_rate": 1.2926675463988898e-06, "loss": 0.5706, "step": 25218 }, { "epoch": 0.7729250950104205, "grad_norm": 0.7789209170748856, "learning_rate": 1.2923345404083398e-06, "loss": 0.3908, "step": 25219 }, { "epoch": 0.7729557435331617, "grad_norm": 1.9105703394485591, "learning_rate": 1.2920015709503687e-06, "loss": 0.604, "step": 25220 }, { "epoch": 0.7729863920559029, "grad_norm": 0.777427508181924, "learning_rate": 1.2916686380282528e-06, "loss": 0.391, "step": 25221 }, { "epoch": 0.7730170405786441, "grad_norm": 1.6448175896252384, "learning_rate": 1.291335741645275e-06, "loss": 0.574, "step": 25222 }, { "epoch": 0.7730476891013853, "grad_norm": 1.8387549576572724, "learning_rate": 1.291002881804716e-06, "loss": 0.5435, "step": 25223 }, { "epoch": 0.7730783376241265, "grad_norm": 1.8875114360246985, "learning_rate": 1.2906700585098548e-06, "loss": 0.5016, "step": 25224 }, { "epoch": 0.7731089861468677, "grad_norm": 1.7058954994968027, "learning_rate": 1.2903372717639678e-06, "loss": 0.5734, "step": 25225 }, { "epoch": 0.7731396346696089, "grad_norm": 1.786136175965757, "learning_rate": 1.2900045215703394e-06, "loss": 0.5358, "step": 25226 }, { "epoch": 0.7731702831923501, "grad_norm": 1.9684329110091126, "learning_rate": 1.2896718079322462e-06, "loss": 0.6747, "step": 25227 }, { "epoch": 0.7732009317150913, "grad_norm": 1.7206181169045496, "learning_rate": 1.289339130852964e-06, "loss": 0.5797, "step": 25228 }, { "epoch": 0.7732315802378326, "grad_norm": 1.7528130864379334, "learning_rate": 1.2890064903357729e-06, "loss": 0.6324, "step": 25229 }, { "epoch": 0.7732622287605737, "grad_norm": 2.0509375556338236, "learning_rate": 1.288673886383951e-06, "loss": 0.611, "step": 25230 }, { "epoch": 0.773292877283315, "grad_norm": 1.8337476666732782, "learning_rate": 1.2883413190007753e-06, "loss": 0.6476, "step": 25231 }, { "epoch": 0.7733235258060561, "grad_norm": 1.9951921845912537, "learning_rate": 1.2880087881895214e-06, "loss": 0.6809, "step": 25232 }, { "epoch": 0.7733541743287974, "grad_norm": 1.8042477484942085, "learning_rate": 1.2876762939534665e-06, "loss": 0.615, "step": 25233 }, { "epoch": 0.7733848228515385, "grad_norm": 2.0341547173097174, "learning_rate": 1.2873438362958884e-06, "loss": 0.5872, "step": 25234 }, { "epoch": 0.7734154713742798, "grad_norm": 2.2777187782612116, "learning_rate": 1.2870114152200618e-06, "loss": 0.6361, "step": 25235 }, { "epoch": 0.773446119897021, "grad_norm": 1.8146033558463273, "learning_rate": 1.2866790307292599e-06, "loss": 0.5845, "step": 25236 }, { "epoch": 0.7734767684197622, "grad_norm": 1.8932909267567732, "learning_rate": 1.2863466828267596e-06, "loss": 0.5802, "step": 25237 }, { "epoch": 0.7735074169425034, "grad_norm": 0.7817662657365145, "learning_rate": 1.2860143715158359e-06, "loss": 0.409, "step": 25238 }, { "epoch": 0.7735380654652446, "grad_norm": 1.8155609237017565, "learning_rate": 1.2856820967997642e-06, "loss": 0.5975, "step": 25239 }, { "epoch": 0.7735687139879858, "grad_norm": 1.876652946486727, "learning_rate": 1.2853498586818154e-06, "loss": 0.7456, "step": 25240 }, { "epoch": 0.773599362510727, "grad_norm": 1.6920544955522263, "learning_rate": 1.285017657165265e-06, "loss": 0.5758, "step": 25241 }, { "epoch": 0.7736300110334682, "grad_norm": 1.919777428759319, "learning_rate": 1.2846854922533874e-06, "loss": 0.5455, "step": 25242 }, { "epoch": 0.7736606595562094, "grad_norm": 1.9111518964808316, "learning_rate": 1.284353363949455e-06, "loss": 0.5183, "step": 25243 }, { "epoch": 0.7736913080789506, "grad_norm": 2.0135418690561044, "learning_rate": 1.2840212722567359e-06, "loss": 0.6276, "step": 25244 }, { "epoch": 0.7737219566016919, "grad_norm": 1.7470352431300553, "learning_rate": 1.2836892171785093e-06, "loss": 0.5764, "step": 25245 }, { "epoch": 0.773752605124433, "grad_norm": 1.8917869028761416, "learning_rate": 1.2833571987180421e-06, "loss": 0.5708, "step": 25246 }, { "epoch": 0.7737832536471742, "grad_norm": 2.076981307285056, "learning_rate": 1.2830252168786089e-06, "loss": 0.6708, "step": 25247 }, { "epoch": 0.7738139021699154, "grad_norm": 1.7170823716808667, "learning_rate": 1.2826932716634776e-06, "loss": 0.537, "step": 25248 }, { "epoch": 0.7738445506926566, "grad_norm": 1.7740738075697295, "learning_rate": 1.2823613630759208e-06, "loss": 0.5929, "step": 25249 }, { "epoch": 0.7738751992153978, "grad_norm": 1.7927700762344878, "learning_rate": 1.2820294911192098e-06, "loss": 0.5623, "step": 25250 }, { "epoch": 0.773905847738139, "grad_norm": 1.9892673815479138, "learning_rate": 1.2816976557966127e-06, "loss": 0.5299, "step": 25251 }, { "epoch": 0.7739364962608802, "grad_norm": 1.7470520616335359, "learning_rate": 1.2813658571113997e-06, "loss": 0.5345, "step": 25252 }, { "epoch": 0.7739671447836214, "grad_norm": 0.8225756679398746, "learning_rate": 1.2810340950668415e-06, "loss": 0.4172, "step": 25253 }, { "epoch": 0.7739977933063626, "grad_norm": 1.6894990542942971, "learning_rate": 1.2807023696662063e-06, "loss": 0.5354, "step": 25254 }, { "epoch": 0.7740284418291038, "grad_norm": 1.895398619092306, "learning_rate": 1.280370680912759e-06, "loss": 0.6181, "step": 25255 }, { "epoch": 0.7740590903518451, "grad_norm": 1.718641634572524, "learning_rate": 1.2800390288097742e-06, "loss": 0.5901, "step": 25256 }, { "epoch": 0.7740897388745862, "grad_norm": 1.130993285187929, "learning_rate": 1.2797074133605153e-06, "loss": 0.3686, "step": 25257 }, { "epoch": 0.7741203873973275, "grad_norm": 1.9686195385264997, "learning_rate": 1.2793758345682522e-06, "loss": 0.5534, "step": 25258 }, { "epoch": 0.7741510359200686, "grad_norm": 1.9613323250470733, "learning_rate": 1.27904429243625e-06, "loss": 0.5095, "step": 25259 }, { "epoch": 0.7741816844428099, "grad_norm": 1.8704695127895958, "learning_rate": 1.2787127869677762e-06, "loss": 0.611, "step": 25260 }, { "epoch": 0.774212332965551, "grad_norm": 1.7949462487341816, "learning_rate": 1.2783813181660986e-06, "loss": 0.4778, "step": 25261 }, { "epoch": 0.7742429814882923, "grad_norm": 1.9985538212616563, "learning_rate": 1.2780498860344814e-06, "loss": 0.5741, "step": 25262 }, { "epoch": 0.7742736300110334, "grad_norm": 1.6040422998571389, "learning_rate": 1.2777184905761901e-06, "loss": 0.5466, "step": 25263 }, { "epoch": 0.7743042785337747, "grad_norm": 1.854474507376789, "learning_rate": 1.277387131794493e-06, "loss": 0.5805, "step": 25264 }, { "epoch": 0.7743349270565159, "grad_norm": 2.020062704296817, "learning_rate": 1.2770558096926512e-06, "loss": 0.5338, "step": 25265 }, { "epoch": 0.7743655755792571, "grad_norm": 1.8341802432223435, "learning_rate": 1.2767245242739313e-06, "loss": 0.549, "step": 25266 }, { "epoch": 0.7743962241019983, "grad_norm": 1.7959106539442113, "learning_rate": 1.2763932755415986e-06, "loss": 0.5137, "step": 25267 }, { "epoch": 0.7744268726247395, "grad_norm": 1.6909191994609682, "learning_rate": 1.2760620634989141e-06, "loss": 0.6269, "step": 25268 }, { "epoch": 0.7744575211474807, "grad_norm": 1.8925000893930675, "learning_rate": 1.2757308881491449e-06, "loss": 0.6067, "step": 25269 }, { "epoch": 0.7744881696702219, "grad_norm": 1.7101274725105085, "learning_rate": 1.27539974949555e-06, "loss": 0.5376, "step": 25270 }, { "epoch": 0.7745188181929631, "grad_norm": 1.78923587671887, "learning_rate": 1.2750686475413948e-06, "loss": 0.6256, "step": 25271 }, { "epoch": 0.7745494667157043, "grad_norm": 2.0210020846419097, "learning_rate": 1.2747375822899421e-06, "loss": 0.5761, "step": 25272 }, { "epoch": 0.7745801152384455, "grad_norm": 1.7171821013686475, "learning_rate": 1.2744065537444522e-06, "loss": 0.524, "step": 25273 }, { "epoch": 0.7746107637611868, "grad_norm": 1.992358608396205, "learning_rate": 1.2740755619081879e-06, "loss": 0.6392, "step": 25274 }, { "epoch": 0.7746414122839279, "grad_norm": 2.0015306688239694, "learning_rate": 1.2737446067844116e-06, "loss": 0.6211, "step": 25275 }, { "epoch": 0.7746720608066692, "grad_norm": 1.6324967087995519, "learning_rate": 1.2734136883763821e-06, "loss": 0.5093, "step": 25276 }, { "epoch": 0.7747027093294103, "grad_norm": 1.7848078347902825, "learning_rate": 1.2730828066873603e-06, "loss": 0.6145, "step": 25277 }, { "epoch": 0.7747333578521515, "grad_norm": 0.7776938714782584, "learning_rate": 1.272751961720609e-06, "loss": 0.3777, "step": 25278 }, { "epoch": 0.7747640063748927, "grad_norm": 2.03274346446264, "learning_rate": 1.2724211534793851e-06, "loss": 0.6023, "step": 25279 }, { "epoch": 0.7747946548976339, "grad_norm": 1.9286150186933662, "learning_rate": 1.2720903819669506e-06, "loss": 0.6144, "step": 25280 }, { "epoch": 0.7748253034203751, "grad_norm": 1.8336707729715807, "learning_rate": 1.2717596471865619e-06, "loss": 0.5631, "step": 25281 }, { "epoch": 0.7748559519431163, "grad_norm": 1.9972918892116343, "learning_rate": 1.271428949141479e-06, "loss": 0.6162, "step": 25282 }, { "epoch": 0.7748866004658576, "grad_norm": 2.107573825334399, "learning_rate": 1.2710982878349621e-06, "loss": 0.5954, "step": 25283 }, { "epoch": 0.7749172489885987, "grad_norm": 1.7993660879749453, "learning_rate": 1.2707676632702665e-06, "loss": 0.529, "step": 25284 }, { "epoch": 0.77494789751134, "grad_norm": 2.085631879949078, "learning_rate": 1.2704370754506517e-06, "loss": 0.5799, "step": 25285 }, { "epoch": 0.7749785460340811, "grad_norm": 1.8120555771246496, "learning_rate": 1.270106524379376e-06, "loss": 0.5906, "step": 25286 }, { "epoch": 0.7750091945568224, "grad_norm": 1.8432488203949169, "learning_rate": 1.2697760100596929e-06, "loss": 0.5222, "step": 25287 }, { "epoch": 0.7750398430795635, "grad_norm": 1.7051093136846425, "learning_rate": 1.2694455324948634e-06, "loss": 0.5972, "step": 25288 }, { "epoch": 0.7750704916023048, "grad_norm": 1.831272144929833, "learning_rate": 1.26911509168814e-06, "loss": 0.6101, "step": 25289 }, { "epoch": 0.7751011401250459, "grad_norm": 1.7097674654098982, "learning_rate": 1.2687846876427801e-06, "loss": 0.5059, "step": 25290 }, { "epoch": 0.7751317886477872, "grad_norm": 1.5382498649742855, "learning_rate": 1.2684543203620402e-06, "loss": 0.4345, "step": 25291 }, { "epoch": 0.7751624371705284, "grad_norm": 1.9998179109044913, "learning_rate": 1.2681239898491743e-06, "loss": 0.6977, "step": 25292 }, { "epoch": 0.7751930856932696, "grad_norm": 1.9585158173651553, "learning_rate": 1.2677936961074366e-06, "loss": 0.643, "step": 25293 }, { "epoch": 0.7752237342160108, "grad_norm": 1.7921862824349994, "learning_rate": 1.2674634391400848e-06, "loss": 0.5479, "step": 25294 }, { "epoch": 0.775254382738752, "grad_norm": 1.9226192298101776, "learning_rate": 1.2671332189503688e-06, "loss": 0.5289, "step": 25295 }, { "epoch": 0.7752850312614932, "grad_norm": 1.8201316096085443, "learning_rate": 1.2668030355415446e-06, "loss": 0.6129, "step": 25296 }, { "epoch": 0.7753156797842344, "grad_norm": 1.6731136902335593, "learning_rate": 1.266472888916866e-06, "loss": 0.5316, "step": 25297 }, { "epoch": 0.7753463283069756, "grad_norm": 0.7969630070158952, "learning_rate": 1.2661427790795844e-06, "loss": 0.4118, "step": 25298 }, { "epoch": 0.7753769768297168, "grad_norm": 1.8064197021558224, "learning_rate": 1.265812706032955e-06, "loss": 0.5049, "step": 25299 }, { "epoch": 0.775407625352458, "grad_norm": 0.7905502788868985, "learning_rate": 1.2654826697802253e-06, "loss": 0.3927, "step": 25300 }, { "epoch": 0.7754382738751993, "grad_norm": 1.9192786448076788, "learning_rate": 1.2651526703246531e-06, "loss": 0.5357, "step": 25301 }, { "epoch": 0.7754689223979404, "grad_norm": 1.8596200178246636, "learning_rate": 1.2648227076694875e-06, "loss": 0.4399, "step": 25302 }, { "epoch": 0.7754995709206817, "grad_norm": 1.8283189969820335, "learning_rate": 1.2644927818179775e-06, "loss": 0.5518, "step": 25303 }, { "epoch": 0.7755302194434228, "grad_norm": 0.802368631759564, "learning_rate": 1.2641628927733768e-06, "loss": 0.3991, "step": 25304 }, { "epoch": 0.7755608679661641, "grad_norm": 1.724023963756258, "learning_rate": 1.2638330405389354e-06, "loss": 0.5379, "step": 25305 }, { "epoch": 0.7755915164889052, "grad_norm": 0.7547238587436349, "learning_rate": 1.2635032251179025e-06, "loss": 0.3876, "step": 25306 }, { "epoch": 0.7756221650116465, "grad_norm": 0.7608818872276577, "learning_rate": 1.2631734465135275e-06, "loss": 0.3712, "step": 25307 }, { "epoch": 0.7756528135343876, "grad_norm": 1.8510300967514126, "learning_rate": 1.2628437047290626e-06, "loss": 0.5421, "step": 25308 }, { "epoch": 0.7756834620571288, "grad_norm": 1.7719947329206853, "learning_rate": 1.2625139997677533e-06, "loss": 0.5129, "step": 25309 }, { "epoch": 0.77571411057987, "grad_norm": 2.0072088889745636, "learning_rate": 1.2621843316328513e-06, "loss": 0.5871, "step": 25310 }, { "epoch": 0.7757447591026112, "grad_norm": 1.9025011906800222, "learning_rate": 1.2618547003276005e-06, "loss": 0.4625, "step": 25311 }, { "epoch": 0.7757754076253525, "grad_norm": 0.8202184619863335, "learning_rate": 1.2615251058552547e-06, "loss": 0.4087, "step": 25312 }, { "epoch": 0.7758060561480936, "grad_norm": 1.817311890100346, "learning_rate": 1.2611955482190586e-06, "loss": 0.5715, "step": 25313 }, { "epoch": 0.7758367046708349, "grad_norm": 1.9169985771460851, "learning_rate": 1.2608660274222578e-06, "loss": 0.6151, "step": 25314 }, { "epoch": 0.775867353193576, "grad_norm": 1.776116332281693, "learning_rate": 1.260536543468101e-06, "loss": 0.6563, "step": 25315 }, { "epoch": 0.7758980017163173, "grad_norm": 1.9487064398797667, "learning_rate": 1.2602070963598356e-06, "loss": 0.5801, "step": 25316 }, { "epoch": 0.7759286502390584, "grad_norm": 0.7931286484098226, "learning_rate": 1.259877686100705e-06, "loss": 0.3984, "step": 25317 }, { "epoch": 0.7759592987617997, "grad_norm": 0.8041105904820227, "learning_rate": 1.2595483126939572e-06, "loss": 0.4185, "step": 25318 }, { "epoch": 0.7759899472845408, "grad_norm": 1.7087688356522794, "learning_rate": 1.2592189761428364e-06, "loss": 0.4905, "step": 25319 }, { "epoch": 0.7760205958072821, "grad_norm": 1.7022540180484564, "learning_rate": 1.2588896764505893e-06, "loss": 0.6791, "step": 25320 }, { "epoch": 0.7760512443300233, "grad_norm": 0.8053104042592474, "learning_rate": 1.2585604136204599e-06, "loss": 0.4167, "step": 25321 }, { "epoch": 0.7760818928527645, "grad_norm": 1.891763931893259, "learning_rate": 1.258231187655689e-06, "loss": 0.5552, "step": 25322 }, { "epoch": 0.7761125413755057, "grad_norm": 1.9623142754801122, "learning_rate": 1.2579019985595264e-06, "loss": 0.5589, "step": 25323 }, { "epoch": 0.7761431898982469, "grad_norm": 1.937262640992157, "learning_rate": 1.2575728463352127e-06, "loss": 0.5932, "step": 25324 }, { "epoch": 0.7761738384209881, "grad_norm": 1.8977387165786994, "learning_rate": 1.2572437309859902e-06, "loss": 0.5329, "step": 25325 }, { "epoch": 0.7762044869437293, "grad_norm": 1.7454570981341135, "learning_rate": 1.2569146525151027e-06, "loss": 0.5152, "step": 25326 }, { "epoch": 0.7762351354664705, "grad_norm": 1.9495109415222576, "learning_rate": 1.2565856109257929e-06, "loss": 0.5369, "step": 25327 }, { "epoch": 0.7762657839892118, "grad_norm": 1.8161792407848905, "learning_rate": 1.2562566062213044e-06, "loss": 0.5627, "step": 25328 }, { "epoch": 0.7762964325119529, "grad_norm": 1.9805110181574592, "learning_rate": 1.2559276384048758e-06, "loss": 0.6716, "step": 25329 }, { "epoch": 0.7763270810346942, "grad_norm": 1.738269012873017, "learning_rate": 1.2555987074797499e-06, "loss": 0.6027, "step": 25330 }, { "epoch": 0.7763577295574353, "grad_norm": 1.934544469707654, "learning_rate": 1.2552698134491697e-06, "loss": 0.6784, "step": 25331 }, { "epoch": 0.7763883780801766, "grad_norm": 1.8157098971330752, "learning_rate": 1.2549409563163744e-06, "loss": 0.6376, "step": 25332 }, { "epoch": 0.7764190266029177, "grad_norm": 2.0484374172200663, "learning_rate": 1.2546121360846025e-06, "loss": 0.6873, "step": 25333 }, { "epoch": 0.776449675125659, "grad_norm": 1.7105750837038973, "learning_rate": 1.2542833527570952e-06, "loss": 0.5697, "step": 25334 }, { "epoch": 0.7764803236484001, "grad_norm": 2.3111662345460253, "learning_rate": 1.2539546063370944e-06, "loss": 0.6097, "step": 25335 }, { "epoch": 0.7765109721711414, "grad_norm": 1.6480416369592399, "learning_rate": 1.2536258968278352e-06, "loss": 0.4708, "step": 25336 }, { "epoch": 0.7765416206938826, "grad_norm": 1.7985314634343588, "learning_rate": 1.2532972242325593e-06, "loss": 0.5332, "step": 25337 }, { "epoch": 0.7765722692166238, "grad_norm": 0.8083928812172182, "learning_rate": 1.252968588554504e-06, "loss": 0.4005, "step": 25338 }, { "epoch": 0.776602917739365, "grad_norm": 1.822583118570753, "learning_rate": 1.2526399897969093e-06, "loss": 0.5069, "step": 25339 }, { "epoch": 0.7766335662621061, "grad_norm": 1.853646030615274, "learning_rate": 1.2523114279630122e-06, "loss": 0.5654, "step": 25340 }, { "epoch": 0.7766642147848474, "grad_norm": 1.9409014406724165, "learning_rate": 1.251982903056046e-06, "loss": 0.6031, "step": 25341 }, { "epoch": 0.7766948633075885, "grad_norm": 1.8686295733322724, "learning_rate": 1.2516544150792543e-06, "loss": 0.6297, "step": 25342 }, { "epoch": 0.7767255118303298, "grad_norm": 1.7487449305567455, "learning_rate": 1.2513259640358705e-06, "loss": 0.5612, "step": 25343 }, { "epoch": 0.7767561603530709, "grad_norm": 0.812825450614452, "learning_rate": 1.25099754992913e-06, "loss": 0.3802, "step": 25344 }, { "epoch": 0.7767868088758122, "grad_norm": 2.1044893611569946, "learning_rate": 1.2506691727622699e-06, "loss": 0.6122, "step": 25345 }, { "epoch": 0.7768174573985533, "grad_norm": 1.8850833552421216, "learning_rate": 1.2503408325385251e-06, "loss": 0.5294, "step": 25346 }, { "epoch": 0.7768481059212946, "grad_norm": 0.7760911259876242, "learning_rate": 1.2500125292611336e-06, "loss": 0.3944, "step": 25347 }, { "epoch": 0.7768787544440358, "grad_norm": 1.725636729161641, "learning_rate": 1.2496842629333267e-06, "loss": 0.5345, "step": 25348 }, { "epoch": 0.776909402966777, "grad_norm": 1.7458941271342976, "learning_rate": 1.2493560335583399e-06, "loss": 0.5209, "step": 25349 }, { "epoch": 0.7769400514895182, "grad_norm": 1.8769287658014004, "learning_rate": 1.2490278411394097e-06, "loss": 0.6155, "step": 25350 }, { "epoch": 0.7769707000122594, "grad_norm": 1.8074976340874356, "learning_rate": 1.2486996856797673e-06, "loss": 0.5401, "step": 25351 }, { "epoch": 0.7770013485350006, "grad_norm": 1.9767158747257745, "learning_rate": 1.248371567182644e-06, "loss": 0.6054, "step": 25352 }, { "epoch": 0.7770319970577418, "grad_norm": 1.8325752084471494, "learning_rate": 1.2480434856512786e-06, "loss": 0.5026, "step": 25353 }, { "epoch": 0.777062645580483, "grad_norm": 2.019743986234257, "learning_rate": 1.2477154410888992e-06, "loss": 0.582, "step": 25354 }, { "epoch": 0.7770932941032243, "grad_norm": 1.8460944439941191, "learning_rate": 1.2473874334987412e-06, "loss": 0.6565, "step": 25355 }, { "epoch": 0.7771239426259654, "grad_norm": 1.9111494657449934, "learning_rate": 1.2470594628840333e-06, "loss": 0.5718, "step": 25356 }, { "epoch": 0.7771545911487067, "grad_norm": 1.9510687255927208, "learning_rate": 1.2467315292480093e-06, "loss": 0.619, "step": 25357 }, { "epoch": 0.7771852396714478, "grad_norm": 1.8597262661461846, "learning_rate": 1.2464036325939004e-06, "loss": 0.5522, "step": 25358 }, { "epoch": 0.7772158881941891, "grad_norm": 1.8436897624777175, "learning_rate": 1.2460757729249363e-06, "loss": 0.5583, "step": 25359 }, { "epoch": 0.7772465367169302, "grad_norm": 1.851483874675521, "learning_rate": 1.2457479502443475e-06, "loss": 0.5337, "step": 25360 }, { "epoch": 0.7772771852396715, "grad_norm": 0.760840968491998, "learning_rate": 1.2454201645553665e-06, "loss": 0.4112, "step": 25361 }, { "epoch": 0.7773078337624126, "grad_norm": 1.9941469045519022, "learning_rate": 1.245092415861221e-06, "loss": 0.6048, "step": 25362 }, { "epoch": 0.7773384822851539, "grad_norm": 2.1904106120785247, "learning_rate": 1.2447647041651378e-06, "loss": 0.6736, "step": 25363 }, { "epoch": 0.777369130807895, "grad_norm": 2.1484654862127144, "learning_rate": 1.2444370294703517e-06, "loss": 0.6415, "step": 25364 }, { "epoch": 0.7773997793306363, "grad_norm": 0.812470811178368, "learning_rate": 1.2441093917800872e-06, "loss": 0.4039, "step": 25365 }, { "epoch": 0.7774304278533775, "grad_norm": 1.9318706019906557, "learning_rate": 1.2437817910975752e-06, "loss": 0.518, "step": 25366 }, { "epoch": 0.7774610763761187, "grad_norm": 1.922408244397969, "learning_rate": 1.2434542274260408e-06, "loss": 0.6069, "step": 25367 }, { "epoch": 0.7774917248988599, "grad_norm": 2.0606088035647825, "learning_rate": 1.2431267007687132e-06, "loss": 0.6023, "step": 25368 }, { "epoch": 0.7775223734216011, "grad_norm": 1.7233354575354172, "learning_rate": 1.2427992111288206e-06, "loss": 0.6311, "step": 25369 }, { "epoch": 0.7775530219443423, "grad_norm": 1.6981404967861848, "learning_rate": 1.2424717585095875e-06, "loss": 0.5294, "step": 25370 }, { "epoch": 0.7775836704670834, "grad_norm": 1.8556908595806878, "learning_rate": 1.2421443429142415e-06, "loss": 0.5337, "step": 25371 }, { "epoch": 0.7776143189898247, "grad_norm": 1.7331465275776896, "learning_rate": 1.2418169643460098e-06, "loss": 0.5086, "step": 25372 }, { "epoch": 0.7776449675125658, "grad_norm": 1.9210509800205942, "learning_rate": 1.2414896228081164e-06, "loss": 0.6452, "step": 25373 }, { "epoch": 0.7776756160353071, "grad_norm": 1.7695798393783349, "learning_rate": 1.2411623183037869e-06, "loss": 0.6436, "step": 25374 }, { "epoch": 0.7777062645580483, "grad_norm": 1.702054265987618, "learning_rate": 1.2408350508362489e-06, "loss": 0.5533, "step": 25375 }, { "epoch": 0.7777369130807895, "grad_norm": 1.7192983636714543, "learning_rate": 1.2405078204087228e-06, "loss": 0.6091, "step": 25376 }, { "epoch": 0.7777675616035307, "grad_norm": 1.789733867106938, "learning_rate": 1.2401806270244366e-06, "loss": 0.5728, "step": 25377 }, { "epoch": 0.7777982101262719, "grad_norm": 1.701162609974035, "learning_rate": 1.2398534706866116e-06, "loss": 0.505, "step": 25378 }, { "epoch": 0.7778288586490131, "grad_norm": 1.7414557273770013, "learning_rate": 1.2395263513984724e-06, "loss": 0.5366, "step": 25379 }, { "epoch": 0.7778595071717543, "grad_norm": 1.9343268877848896, "learning_rate": 1.239199269163243e-06, "loss": 0.6392, "step": 25380 }, { "epoch": 0.7778901556944955, "grad_norm": 1.885311505465103, "learning_rate": 1.238872223984145e-06, "loss": 0.546, "step": 25381 }, { "epoch": 0.7779208042172367, "grad_norm": 1.6377387267159267, "learning_rate": 1.2385452158644006e-06, "loss": 0.5743, "step": 25382 }, { "epoch": 0.7779514527399779, "grad_norm": 1.9368952544905405, "learning_rate": 1.2382182448072344e-06, "loss": 0.6174, "step": 25383 }, { "epoch": 0.7779821012627192, "grad_norm": 2.1000674673295543, "learning_rate": 1.2378913108158647e-06, "loss": 0.6276, "step": 25384 }, { "epoch": 0.7780127497854603, "grad_norm": 1.7817332464466766, "learning_rate": 1.2375644138935156e-06, "loss": 0.6589, "step": 25385 }, { "epoch": 0.7780433983082016, "grad_norm": 1.8527029187346424, "learning_rate": 1.2372375540434063e-06, "loss": 0.5409, "step": 25386 }, { "epoch": 0.7780740468309427, "grad_norm": 1.7414443977052712, "learning_rate": 1.2369107312687572e-06, "loss": 0.5048, "step": 25387 }, { "epoch": 0.778104695353684, "grad_norm": 1.872538123951027, "learning_rate": 1.2365839455727919e-06, "loss": 0.5558, "step": 25388 }, { "epoch": 0.7781353438764251, "grad_norm": 0.791247579822726, "learning_rate": 1.2362571969587255e-06, "loss": 0.4069, "step": 25389 }, { "epoch": 0.7781659923991664, "grad_norm": 1.8938733214705368, "learning_rate": 1.235930485429781e-06, "loss": 0.5476, "step": 25390 }, { "epoch": 0.7781966409219075, "grad_norm": 0.8409964509056679, "learning_rate": 1.235603810989177e-06, "loss": 0.4002, "step": 25391 }, { "epoch": 0.7782272894446488, "grad_norm": 1.9894083715284836, "learning_rate": 1.235277173640131e-06, "loss": 0.7125, "step": 25392 }, { "epoch": 0.77825793796739, "grad_norm": 1.9262133860926256, "learning_rate": 1.2349505733858618e-06, "loss": 0.5257, "step": 25393 }, { "epoch": 0.7782885864901312, "grad_norm": 1.822744010385604, "learning_rate": 1.2346240102295898e-06, "loss": 0.5117, "step": 25394 }, { "epoch": 0.7783192350128724, "grad_norm": 1.8320206872986333, "learning_rate": 1.2342974841745292e-06, "loss": 0.5884, "step": 25395 }, { "epoch": 0.7783498835356136, "grad_norm": 1.8194392834305875, "learning_rate": 1.2339709952239003e-06, "loss": 0.5785, "step": 25396 }, { "epoch": 0.7783805320583548, "grad_norm": 1.8957438037947927, "learning_rate": 1.2336445433809175e-06, "loss": 0.5237, "step": 25397 }, { "epoch": 0.778411180581096, "grad_norm": 0.7875151742625159, "learning_rate": 1.2333181286487982e-06, "loss": 0.3944, "step": 25398 }, { "epoch": 0.7784418291038372, "grad_norm": 1.9587257928770543, "learning_rate": 1.2329917510307616e-06, "loss": 0.6039, "step": 25399 }, { "epoch": 0.7784724776265785, "grad_norm": 0.7635292185652287, "learning_rate": 1.232665410530019e-06, "loss": 0.393, "step": 25400 }, { "epoch": 0.7785031261493196, "grad_norm": 1.9615660915633764, "learning_rate": 1.2323391071497882e-06, "loss": 0.5713, "step": 25401 }, { "epoch": 0.7785337746720608, "grad_norm": 1.7197916162379667, "learning_rate": 1.2320128408932852e-06, "loss": 0.4431, "step": 25402 }, { "epoch": 0.778564423194802, "grad_norm": 1.9897406636950623, "learning_rate": 1.2316866117637226e-06, "loss": 0.5964, "step": 25403 }, { "epoch": 0.7785950717175432, "grad_norm": 1.6846051172857406, "learning_rate": 1.2313604197643158e-06, "loss": 0.5678, "step": 25404 }, { "epoch": 0.7786257202402844, "grad_norm": 1.9583763377811043, "learning_rate": 1.231034264898281e-06, "loss": 0.6224, "step": 25405 }, { "epoch": 0.7786563687630256, "grad_norm": 1.8777913215341846, "learning_rate": 1.2307081471688282e-06, "loss": 0.5747, "step": 25406 }, { "epoch": 0.7786870172857668, "grad_norm": 0.7501813696350947, "learning_rate": 1.2303820665791739e-06, "loss": 0.407, "step": 25407 }, { "epoch": 0.778717665808508, "grad_norm": 1.818189393531593, "learning_rate": 1.2300560231325275e-06, "loss": 0.566, "step": 25408 }, { "epoch": 0.7787483143312492, "grad_norm": 1.8255245295258535, "learning_rate": 1.2297300168321047e-06, "loss": 0.5625, "step": 25409 }, { "epoch": 0.7787789628539904, "grad_norm": 0.8323670239602011, "learning_rate": 1.2294040476811176e-06, "loss": 0.4097, "step": 25410 }, { "epoch": 0.7788096113767317, "grad_norm": 2.0342163635273085, "learning_rate": 1.2290781156827758e-06, "loss": 0.5887, "step": 25411 }, { "epoch": 0.7788402598994728, "grad_norm": 1.8894564132650773, "learning_rate": 1.228752220840292e-06, "loss": 0.6458, "step": 25412 }, { "epoch": 0.7788709084222141, "grad_norm": 1.747882029277539, "learning_rate": 1.2284263631568794e-06, "loss": 0.437, "step": 25413 }, { "epoch": 0.7789015569449552, "grad_norm": 1.9258543228996736, "learning_rate": 1.228100542635745e-06, "loss": 0.5402, "step": 25414 }, { "epoch": 0.7789322054676965, "grad_norm": 2.0530471263487415, "learning_rate": 1.227774759280101e-06, "loss": 0.6782, "step": 25415 }, { "epoch": 0.7789628539904376, "grad_norm": 1.9519002588382304, "learning_rate": 1.2274490130931593e-06, "loss": 0.6459, "step": 25416 }, { "epoch": 0.7789935025131789, "grad_norm": 1.8990302856724728, "learning_rate": 1.227123304078126e-06, "loss": 0.4983, "step": 25417 }, { "epoch": 0.77902415103592, "grad_norm": 1.758998878819253, "learning_rate": 1.2267976322382136e-06, "loss": 0.5425, "step": 25418 }, { "epoch": 0.7790547995586613, "grad_norm": 1.8518793109081388, "learning_rate": 1.2264719975766266e-06, "loss": 0.6056, "step": 25419 }, { "epoch": 0.7790854480814025, "grad_norm": 2.0381785939431274, "learning_rate": 1.2261464000965795e-06, "loss": 0.6036, "step": 25420 }, { "epoch": 0.7791160966041437, "grad_norm": 1.9189421665156938, "learning_rate": 1.2258208398012772e-06, "loss": 0.6112, "step": 25421 }, { "epoch": 0.7791467451268849, "grad_norm": 1.941675103282926, "learning_rate": 1.2254953166939266e-06, "loss": 0.5643, "step": 25422 }, { "epoch": 0.7791773936496261, "grad_norm": 1.737690235458732, "learning_rate": 1.2251698307777365e-06, "loss": 0.6594, "step": 25423 }, { "epoch": 0.7792080421723673, "grad_norm": 1.7306574579581848, "learning_rate": 1.2248443820559154e-06, "loss": 0.5897, "step": 25424 }, { "epoch": 0.7792386906951085, "grad_norm": 2.0174886937805163, "learning_rate": 1.2245189705316668e-06, "loss": 0.5501, "step": 25425 }, { "epoch": 0.7792693392178497, "grad_norm": 0.8356482676973969, "learning_rate": 1.2241935962081991e-06, "loss": 0.4135, "step": 25426 }, { "epoch": 0.779299987740591, "grad_norm": 1.9315939449686104, "learning_rate": 1.2238682590887174e-06, "loss": 0.6204, "step": 25427 }, { "epoch": 0.7793306362633321, "grad_norm": 1.6130614677028885, "learning_rate": 1.2235429591764303e-06, "loss": 0.5186, "step": 25428 }, { "epoch": 0.7793612847860734, "grad_norm": 1.9429803606484508, "learning_rate": 1.22321769647454e-06, "loss": 0.5706, "step": 25429 }, { "epoch": 0.7793919333088145, "grad_norm": 2.024996929745456, "learning_rate": 1.2228924709862506e-06, "loss": 0.5662, "step": 25430 }, { "epoch": 0.7794225818315558, "grad_norm": 1.8044253737404448, "learning_rate": 1.2225672827147684e-06, "loss": 0.5964, "step": 25431 }, { "epoch": 0.7794532303542969, "grad_norm": 1.732217927777637, "learning_rate": 1.2222421316632981e-06, "loss": 0.5955, "step": 25432 }, { "epoch": 0.7794838788770381, "grad_norm": 1.8845158384912615, "learning_rate": 1.221917017835042e-06, "loss": 0.6026, "step": 25433 }, { "epoch": 0.7795145273997793, "grad_norm": 1.9784903413055368, "learning_rate": 1.2215919412332038e-06, "loss": 0.5471, "step": 25434 }, { "epoch": 0.7795451759225205, "grad_norm": 1.82179905895946, "learning_rate": 1.2212669018609884e-06, "loss": 0.6175, "step": 25435 }, { "epoch": 0.7795758244452617, "grad_norm": 1.933068690110983, "learning_rate": 1.2209418997215955e-06, "loss": 0.5662, "step": 25436 }, { "epoch": 0.7796064729680029, "grad_norm": 1.8525790206068695, "learning_rate": 1.2206169348182307e-06, "loss": 0.5337, "step": 25437 }, { "epoch": 0.7796371214907442, "grad_norm": 0.7853302917462658, "learning_rate": 1.2202920071540913e-06, "loss": 0.4123, "step": 25438 }, { "epoch": 0.7796677700134853, "grad_norm": 1.846643205111925, "learning_rate": 1.2199671167323846e-06, "loss": 0.5902, "step": 25439 }, { "epoch": 0.7796984185362266, "grad_norm": 1.7339089366496419, "learning_rate": 1.2196422635563093e-06, "loss": 0.521, "step": 25440 }, { "epoch": 0.7797290670589677, "grad_norm": 2.1780951106946658, "learning_rate": 1.2193174476290643e-06, "loss": 0.6461, "step": 25441 }, { "epoch": 0.779759715581709, "grad_norm": 1.7457641721396748, "learning_rate": 1.2189926689538516e-06, "loss": 0.5026, "step": 25442 }, { "epoch": 0.7797903641044501, "grad_norm": 1.989509152831434, "learning_rate": 1.2186679275338737e-06, "loss": 0.6035, "step": 25443 }, { "epoch": 0.7798210126271914, "grad_norm": 1.8246065542092784, "learning_rate": 1.2183432233723263e-06, "loss": 0.5751, "step": 25444 }, { "epoch": 0.7798516611499325, "grad_norm": 1.8372226162859056, "learning_rate": 1.2180185564724106e-06, "loss": 0.6589, "step": 25445 }, { "epoch": 0.7798823096726738, "grad_norm": 1.7280877437785749, "learning_rate": 1.2176939268373255e-06, "loss": 0.5783, "step": 25446 }, { "epoch": 0.779912958195415, "grad_norm": 1.9280244575706036, "learning_rate": 1.217369334470272e-06, "loss": 0.5806, "step": 25447 }, { "epoch": 0.7799436067181562, "grad_norm": 0.7765963781512538, "learning_rate": 1.217044779374446e-06, "loss": 0.4151, "step": 25448 }, { "epoch": 0.7799742552408974, "grad_norm": 1.7149005189530309, "learning_rate": 1.2167202615530427e-06, "loss": 0.5377, "step": 25449 }, { "epoch": 0.7800049037636386, "grad_norm": 1.7731851006927295, "learning_rate": 1.2163957810092659e-06, "loss": 0.5076, "step": 25450 }, { "epoch": 0.7800355522863798, "grad_norm": 1.8818508354390218, "learning_rate": 1.216071337746309e-06, "loss": 0.4762, "step": 25451 }, { "epoch": 0.780066200809121, "grad_norm": 1.9867193716166152, "learning_rate": 1.2157469317673682e-06, "loss": 0.6121, "step": 25452 }, { "epoch": 0.7800968493318622, "grad_norm": 1.812192112751662, "learning_rate": 1.2154225630756411e-06, "loss": 0.5053, "step": 25453 }, { "epoch": 0.7801274978546034, "grad_norm": 1.8091194556448476, "learning_rate": 1.2150982316743236e-06, "loss": 0.5879, "step": 25454 }, { "epoch": 0.7801581463773446, "grad_norm": 1.8824417612061444, "learning_rate": 1.2147739375666134e-06, "loss": 0.601, "step": 25455 }, { "epoch": 0.7801887949000859, "grad_norm": 1.6892237373430392, "learning_rate": 1.2144496807557027e-06, "loss": 0.5358, "step": 25456 }, { "epoch": 0.780219443422827, "grad_norm": 1.6274763685982774, "learning_rate": 1.2141254612447877e-06, "loss": 0.4946, "step": 25457 }, { "epoch": 0.7802500919455683, "grad_norm": 1.7994442011329137, "learning_rate": 1.2138012790370645e-06, "loss": 0.5721, "step": 25458 }, { "epoch": 0.7802807404683094, "grad_norm": 0.7977928735146252, "learning_rate": 1.2134771341357266e-06, "loss": 0.3942, "step": 25459 }, { "epoch": 0.7803113889910507, "grad_norm": 1.933528026001969, "learning_rate": 1.2131530265439639e-06, "loss": 0.4928, "step": 25460 }, { "epoch": 0.7803420375137918, "grad_norm": 1.8225866325846856, "learning_rate": 1.2128289562649765e-06, "loss": 0.6441, "step": 25461 }, { "epoch": 0.7803726860365331, "grad_norm": 1.8746863168092436, "learning_rate": 1.2125049233019543e-06, "loss": 0.5018, "step": 25462 }, { "epoch": 0.7804033345592742, "grad_norm": 1.7554330864764605, "learning_rate": 1.2121809276580887e-06, "loss": 0.6212, "step": 25463 }, { "epoch": 0.7804339830820154, "grad_norm": 2.1339794320039287, "learning_rate": 1.2118569693365733e-06, "loss": 0.5719, "step": 25464 }, { "epoch": 0.7804646316047567, "grad_norm": 1.5776675550990054, "learning_rate": 1.2115330483406006e-06, "loss": 0.4695, "step": 25465 }, { "epoch": 0.7804952801274978, "grad_norm": 1.87853211243902, "learning_rate": 1.2112091646733636e-06, "loss": 0.581, "step": 25466 }, { "epoch": 0.7805259286502391, "grad_norm": 0.7895801879639803, "learning_rate": 1.2108853183380509e-06, "loss": 0.4116, "step": 25467 }, { "epoch": 0.7805565771729802, "grad_norm": 1.7407730981760288, "learning_rate": 1.2105615093378543e-06, "loss": 0.5082, "step": 25468 }, { "epoch": 0.7805872256957215, "grad_norm": 1.9344075471974018, "learning_rate": 1.210237737675966e-06, "loss": 0.5645, "step": 25469 }, { "epoch": 0.7806178742184626, "grad_norm": 2.1448290439075137, "learning_rate": 1.209914003355575e-06, "loss": 0.5927, "step": 25470 }, { "epoch": 0.7806485227412039, "grad_norm": 1.837392835047438, "learning_rate": 1.2095903063798687e-06, "loss": 0.5248, "step": 25471 }, { "epoch": 0.780679171263945, "grad_norm": 2.237309111093012, "learning_rate": 1.2092666467520415e-06, "loss": 0.6072, "step": 25472 }, { "epoch": 0.7807098197866863, "grad_norm": 1.8209682588190519, "learning_rate": 1.2089430244752782e-06, "loss": 0.5111, "step": 25473 }, { "epoch": 0.7807404683094274, "grad_norm": 1.659558957838385, "learning_rate": 1.2086194395527712e-06, "loss": 0.5619, "step": 25474 }, { "epoch": 0.7807711168321687, "grad_norm": 0.7909896316008342, "learning_rate": 1.2082958919877052e-06, "loss": 0.3752, "step": 25475 }, { "epoch": 0.7808017653549099, "grad_norm": 1.7648403608842196, "learning_rate": 1.20797238178327e-06, "loss": 0.5555, "step": 25476 }, { "epoch": 0.7808324138776511, "grad_norm": 1.972471201922596, "learning_rate": 1.2076489089426545e-06, "loss": 0.5657, "step": 25477 }, { "epoch": 0.7808630624003923, "grad_norm": 1.8800960173443666, "learning_rate": 1.2073254734690433e-06, "loss": 0.6147, "step": 25478 }, { "epoch": 0.7808937109231335, "grad_norm": 1.7525478315352658, "learning_rate": 1.2070020753656248e-06, "loss": 0.5577, "step": 25479 }, { "epoch": 0.7809243594458747, "grad_norm": 1.9821668662475895, "learning_rate": 1.2066787146355863e-06, "loss": 0.63, "step": 25480 }, { "epoch": 0.7809550079686159, "grad_norm": 2.0202215932811356, "learning_rate": 1.2063553912821118e-06, "loss": 0.5609, "step": 25481 }, { "epoch": 0.7809856564913571, "grad_norm": 1.858146002127331, "learning_rate": 1.2060321053083895e-06, "loss": 0.5536, "step": 25482 }, { "epoch": 0.7810163050140984, "grad_norm": 1.7848485781591836, "learning_rate": 1.2057088567176024e-06, "loss": 0.5039, "step": 25483 }, { "epoch": 0.7810469535368395, "grad_norm": 1.8761269346742995, "learning_rate": 1.2053856455129365e-06, "loss": 0.6767, "step": 25484 }, { "epoch": 0.7810776020595808, "grad_norm": 0.8257464960588935, "learning_rate": 1.2050624716975785e-06, "loss": 0.3992, "step": 25485 }, { "epoch": 0.7811082505823219, "grad_norm": 1.9256334965623039, "learning_rate": 1.2047393352747095e-06, "loss": 0.6018, "step": 25486 }, { "epoch": 0.7811388991050632, "grad_norm": 1.9334689059523567, "learning_rate": 1.2044162362475148e-06, "loss": 0.5282, "step": 25487 }, { "epoch": 0.7811695476278043, "grad_norm": 1.8267974315617903, "learning_rate": 1.2040931746191792e-06, "loss": 0.5294, "step": 25488 }, { "epoch": 0.7812001961505456, "grad_norm": 1.7339696218781457, "learning_rate": 1.203770150392885e-06, "loss": 0.5793, "step": 25489 }, { "epoch": 0.7812308446732867, "grad_norm": 1.7303123767524313, "learning_rate": 1.2034471635718121e-06, "loss": 0.5344, "step": 25490 }, { "epoch": 0.781261493196028, "grad_norm": 1.6443644008979679, "learning_rate": 1.203124214159148e-06, "loss": 0.482, "step": 25491 }, { "epoch": 0.7812921417187692, "grad_norm": 1.8374435251226766, "learning_rate": 1.202801302158072e-06, "loss": 0.6, "step": 25492 }, { "epoch": 0.7813227902415104, "grad_norm": 1.7034430114329309, "learning_rate": 1.202478427571767e-06, "loss": 0.5184, "step": 25493 }, { "epoch": 0.7813534387642516, "grad_norm": 1.7842832281681396, "learning_rate": 1.2021555904034127e-06, "loss": 0.6012, "step": 25494 }, { "epoch": 0.7813840872869927, "grad_norm": 0.7936134037844215, "learning_rate": 1.2018327906561911e-06, "loss": 0.4076, "step": 25495 }, { "epoch": 0.781414735809734, "grad_norm": 1.932145823769056, "learning_rate": 1.2015100283332838e-06, "loss": 0.6626, "step": 25496 }, { "epoch": 0.7814453843324751, "grad_norm": 1.8451534591215044, "learning_rate": 1.201187303437869e-06, "loss": 0.5927, "step": 25497 }, { "epoch": 0.7814760328552164, "grad_norm": 1.995984506727971, "learning_rate": 1.2008646159731274e-06, "loss": 0.5546, "step": 25498 }, { "epoch": 0.7815066813779575, "grad_norm": 1.9678483874722457, "learning_rate": 1.2005419659422401e-06, "loss": 0.6436, "step": 25499 }, { "epoch": 0.7815373299006988, "grad_norm": 1.7880660055591715, "learning_rate": 1.2002193533483842e-06, "loss": 0.5253, "step": 25500 }, { "epoch": 0.78156797842344, "grad_norm": 1.8864221811059234, "learning_rate": 1.1998967781947385e-06, "loss": 0.6138, "step": 25501 }, { "epoch": 0.7815986269461812, "grad_norm": 1.9273178655052015, "learning_rate": 1.199574240484484e-06, "loss": 0.5433, "step": 25502 }, { "epoch": 0.7816292754689224, "grad_norm": 1.8869990693445389, "learning_rate": 1.1992517402207954e-06, "loss": 0.628, "step": 25503 }, { "epoch": 0.7816599239916636, "grad_norm": 0.8080072928369175, "learning_rate": 1.1989292774068533e-06, "loss": 0.3932, "step": 25504 }, { "epoch": 0.7816905725144048, "grad_norm": 1.614899788860525, "learning_rate": 1.1986068520458322e-06, "loss": 0.5923, "step": 25505 }, { "epoch": 0.781721221037146, "grad_norm": 1.8068276178559246, "learning_rate": 1.1982844641409103e-06, "loss": 0.5722, "step": 25506 }, { "epoch": 0.7817518695598872, "grad_norm": 1.7473165258909782, "learning_rate": 1.1979621136952657e-06, "loss": 0.53, "step": 25507 }, { "epoch": 0.7817825180826284, "grad_norm": 2.006655676254433, "learning_rate": 1.1976398007120715e-06, "loss": 0.5616, "step": 25508 }, { "epoch": 0.7818131666053696, "grad_norm": 0.8347117606606713, "learning_rate": 1.1973175251945058e-06, "loss": 0.4109, "step": 25509 }, { "epoch": 0.7818438151281109, "grad_norm": 1.8382675845348582, "learning_rate": 1.1969952871457442e-06, "loss": 0.545, "step": 25510 }, { "epoch": 0.781874463650852, "grad_norm": 2.0196631876433613, "learning_rate": 1.1966730865689602e-06, "loss": 0.5981, "step": 25511 }, { "epoch": 0.7819051121735933, "grad_norm": 1.9371876156549523, "learning_rate": 1.1963509234673293e-06, "loss": 0.6722, "step": 25512 }, { "epoch": 0.7819357606963344, "grad_norm": 1.5558954022796738, "learning_rate": 1.196028797844027e-06, "loss": 0.5948, "step": 25513 }, { "epoch": 0.7819664092190757, "grad_norm": 1.8560942891451793, "learning_rate": 1.1957067097022252e-06, "loss": 0.622, "step": 25514 }, { "epoch": 0.7819970577418168, "grad_norm": 1.870811770979526, "learning_rate": 1.1953846590451002e-06, "loss": 0.5651, "step": 25515 }, { "epoch": 0.7820277062645581, "grad_norm": 1.9604185193880619, "learning_rate": 1.1950626458758218e-06, "loss": 0.6349, "step": 25516 }, { "epoch": 0.7820583547872992, "grad_norm": 1.893724071205314, "learning_rate": 1.194740670197565e-06, "loss": 0.6211, "step": 25517 }, { "epoch": 0.7820890033100405, "grad_norm": 1.9354084483623535, "learning_rate": 1.1944187320135031e-06, "loss": 0.5596, "step": 25518 }, { "epoch": 0.7821196518327816, "grad_norm": 1.9922043492384196, "learning_rate": 1.1940968313268058e-06, "loss": 0.5911, "step": 25519 }, { "epoch": 0.7821503003555229, "grad_norm": 1.9532987588015658, "learning_rate": 1.1937749681406464e-06, "loss": 0.634, "step": 25520 }, { "epoch": 0.7821809488782641, "grad_norm": 1.8634415273675662, "learning_rate": 1.1934531424581973e-06, "loss": 0.5593, "step": 25521 }, { "epoch": 0.7822115974010053, "grad_norm": 1.671472742444399, "learning_rate": 1.1931313542826268e-06, "loss": 0.5899, "step": 25522 }, { "epoch": 0.7822422459237465, "grad_norm": 1.8880155959396399, "learning_rate": 1.1928096036171072e-06, "loss": 0.5392, "step": 25523 }, { "epoch": 0.7822728944464877, "grad_norm": 1.9614411281607347, "learning_rate": 1.19248789046481e-06, "loss": 0.6063, "step": 25524 }, { "epoch": 0.7823035429692289, "grad_norm": 1.740684483794386, "learning_rate": 1.1921662148289027e-06, "loss": 0.5174, "step": 25525 }, { "epoch": 0.78233419149197, "grad_norm": 1.9716302209940941, "learning_rate": 1.1918445767125575e-06, "loss": 0.5812, "step": 25526 }, { "epoch": 0.7823648400147113, "grad_norm": 1.876050699096946, "learning_rate": 1.19152297611894e-06, "loss": 0.6121, "step": 25527 }, { "epoch": 0.7823954885374524, "grad_norm": 0.7682922449127667, "learning_rate": 1.1912014130512216e-06, "loss": 0.3974, "step": 25528 }, { "epoch": 0.7824261370601937, "grad_norm": 1.8794290590526108, "learning_rate": 1.1908798875125715e-06, "loss": 0.5157, "step": 25529 }, { "epoch": 0.7824567855829349, "grad_norm": 2.038987256035558, "learning_rate": 1.1905583995061548e-06, "loss": 0.6629, "step": 25530 }, { "epoch": 0.7824874341056761, "grad_norm": 2.113537004067147, "learning_rate": 1.1902369490351412e-06, "loss": 0.6091, "step": 25531 }, { "epoch": 0.7825180826284173, "grad_norm": 1.980263786878026, "learning_rate": 1.1899155361026992e-06, "loss": 0.5527, "step": 25532 }, { "epoch": 0.7825487311511585, "grad_norm": 2.236010638807427, "learning_rate": 1.1895941607119926e-06, "loss": 0.6702, "step": 25533 }, { "epoch": 0.7825793796738997, "grad_norm": 2.136739899966405, "learning_rate": 1.189272822866191e-06, "loss": 0.5792, "step": 25534 }, { "epoch": 0.7826100281966409, "grad_norm": 1.5794125407944957, "learning_rate": 1.1889515225684583e-06, "loss": 0.6001, "step": 25535 }, { "epoch": 0.7826406767193821, "grad_norm": 1.999162170812214, "learning_rate": 1.1886302598219607e-06, "loss": 0.5843, "step": 25536 }, { "epoch": 0.7826713252421234, "grad_norm": 1.8239291708473673, "learning_rate": 1.1883090346298665e-06, "loss": 0.6468, "step": 25537 }, { "epoch": 0.7827019737648645, "grad_norm": 2.0081445296308416, "learning_rate": 1.1879878469953366e-06, "loss": 0.6397, "step": 25538 }, { "epoch": 0.7827326222876058, "grad_norm": 1.7677449537829244, "learning_rate": 1.1876666969215384e-06, "loss": 0.5529, "step": 25539 }, { "epoch": 0.7827632708103469, "grad_norm": 1.921878653011173, "learning_rate": 1.1873455844116366e-06, "loss": 0.539, "step": 25540 }, { "epoch": 0.7827939193330882, "grad_norm": 1.8805462215730675, "learning_rate": 1.1870245094687926e-06, "loss": 0.5599, "step": 25541 }, { "epoch": 0.7828245678558293, "grad_norm": 1.8426211609772019, "learning_rate": 1.1867034720961722e-06, "loss": 0.6152, "step": 25542 }, { "epoch": 0.7828552163785706, "grad_norm": 1.8864192086484388, "learning_rate": 1.1863824722969396e-06, "loss": 0.5954, "step": 25543 }, { "epoch": 0.7828858649013117, "grad_norm": 1.929996317013276, "learning_rate": 1.1860615100742546e-06, "loss": 0.5664, "step": 25544 }, { "epoch": 0.782916513424053, "grad_norm": 0.8091621079768189, "learning_rate": 1.1857405854312832e-06, "loss": 0.3935, "step": 25545 }, { "epoch": 0.7829471619467941, "grad_norm": 2.036799757478781, "learning_rate": 1.1854196983711823e-06, "loss": 0.5287, "step": 25546 }, { "epoch": 0.7829778104695354, "grad_norm": 1.7352745772650258, "learning_rate": 1.1850988488971205e-06, "loss": 0.5589, "step": 25547 }, { "epoch": 0.7830084589922766, "grad_norm": 1.8517170527735176, "learning_rate": 1.1847780370122552e-06, "loss": 0.5149, "step": 25548 }, { "epoch": 0.7830391075150178, "grad_norm": 2.016789227163648, "learning_rate": 1.184457262719747e-06, "loss": 0.6227, "step": 25549 }, { "epoch": 0.783069756037759, "grad_norm": 1.884364346409054, "learning_rate": 1.1841365260227578e-06, "loss": 0.5971, "step": 25550 }, { "epoch": 0.7831004045605002, "grad_norm": 2.0733367121535577, "learning_rate": 1.1838158269244488e-06, "loss": 0.566, "step": 25551 }, { "epoch": 0.7831310530832414, "grad_norm": 1.7607742147154741, "learning_rate": 1.1834951654279775e-06, "loss": 0.6521, "step": 25552 }, { "epoch": 0.7831617016059826, "grad_norm": 1.7936697605314413, "learning_rate": 1.1831745415365054e-06, "loss": 0.4597, "step": 25553 }, { "epoch": 0.7831923501287238, "grad_norm": 1.8015753744187202, "learning_rate": 1.1828539552531903e-06, "loss": 0.6069, "step": 25554 }, { "epoch": 0.783222998651465, "grad_norm": 1.8851216349676905, "learning_rate": 1.182533406581194e-06, "loss": 0.5534, "step": 25555 }, { "epoch": 0.7832536471742062, "grad_norm": 0.8124082533278855, "learning_rate": 1.1822128955236722e-06, "loss": 0.3968, "step": 25556 }, { "epoch": 0.7832842956969474, "grad_norm": 1.8099481058511488, "learning_rate": 1.1818924220837812e-06, "loss": 0.6114, "step": 25557 }, { "epoch": 0.7833149442196886, "grad_norm": 1.8329388460698022, "learning_rate": 1.1815719862646835e-06, "loss": 0.567, "step": 25558 }, { "epoch": 0.7833455927424298, "grad_norm": 2.036050647272853, "learning_rate": 1.1812515880695342e-06, "loss": 0.6352, "step": 25559 }, { "epoch": 0.783376241265171, "grad_norm": 2.0918945066827925, "learning_rate": 1.180931227501489e-06, "loss": 0.6527, "step": 25560 }, { "epoch": 0.7834068897879122, "grad_norm": 2.054677305368272, "learning_rate": 1.1806109045637048e-06, "loss": 0.5543, "step": 25561 }, { "epoch": 0.7834375383106534, "grad_norm": 2.0919218887449875, "learning_rate": 1.1802906192593404e-06, "loss": 0.5872, "step": 25562 }, { "epoch": 0.7834681868333946, "grad_norm": 1.6119745156130327, "learning_rate": 1.1799703715915485e-06, "loss": 0.5887, "step": 25563 }, { "epoch": 0.7834988353561358, "grad_norm": 1.8524861435443771, "learning_rate": 1.179650161563486e-06, "loss": 0.6166, "step": 25564 }, { "epoch": 0.783529483878877, "grad_norm": 1.7106221456885078, "learning_rate": 1.1793299891783078e-06, "loss": 0.557, "step": 25565 }, { "epoch": 0.7835601324016183, "grad_norm": 1.796679861139894, "learning_rate": 1.1790098544391699e-06, "loss": 0.5403, "step": 25566 }, { "epoch": 0.7835907809243594, "grad_norm": 2.02425143792035, "learning_rate": 1.1786897573492262e-06, "loss": 0.6641, "step": 25567 }, { "epoch": 0.7836214294471007, "grad_norm": 1.9950176920864904, "learning_rate": 1.1783696979116265e-06, "loss": 0.5543, "step": 25568 }, { "epoch": 0.7836520779698418, "grad_norm": 1.7686315843244333, "learning_rate": 1.1780496761295312e-06, "loss": 0.5972, "step": 25569 }, { "epoch": 0.7836827264925831, "grad_norm": 2.298778537065116, "learning_rate": 1.1777296920060905e-06, "loss": 0.6085, "step": 25570 }, { "epoch": 0.7837133750153242, "grad_norm": 2.6051507364299757, "learning_rate": 1.1774097455444554e-06, "loss": 0.6383, "step": 25571 }, { "epoch": 0.7837440235380655, "grad_norm": 0.7874421508022017, "learning_rate": 1.17708983674778e-06, "loss": 0.3852, "step": 25572 }, { "epoch": 0.7837746720608066, "grad_norm": 1.9644427767609698, "learning_rate": 1.1767699656192172e-06, "loss": 0.6202, "step": 25573 }, { "epoch": 0.7838053205835479, "grad_norm": 1.903226753773476, "learning_rate": 1.1764501321619186e-06, "loss": 0.5639, "step": 25574 }, { "epoch": 0.783835969106289, "grad_norm": 2.105942463013197, "learning_rate": 1.1761303363790343e-06, "loss": 0.6155, "step": 25575 }, { "epoch": 0.7838666176290303, "grad_norm": 2.280672470701753, "learning_rate": 1.1758105782737167e-06, "loss": 0.5418, "step": 25576 }, { "epoch": 0.7838972661517715, "grad_norm": 0.789093146168813, "learning_rate": 1.1754908578491164e-06, "loss": 0.3977, "step": 25577 }, { "epoch": 0.7839279146745127, "grad_norm": 1.7344315528393568, "learning_rate": 1.175171175108384e-06, "loss": 0.5232, "step": 25578 }, { "epoch": 0.7839585631972539, "grad_norm": 1.7309331097547225, "learning_rate": 1.1748515300546666e-06, "loss": 0.5679, "step": 25579 }, { "epoch": 0.7839892117199951, "grad_norm": 1.7707555286689194, "learning_rate": 1.174531922691116e-06, "loss": 0.5838, "step": 25580 }, { "epoch": 0.7840198602427363, "grad_norm": 1.7464958530403942, "learning_rate": 1.174212353020881e-06, "loss": 0.7272, "step": 25581 }, { "epoch": 0.7840505087654775, "grad_norm": 1.6774384943974456, "learning_rate": 1.1738928210471124e-06, "loss": 0.5187, "step": 25582 }, { "epoch": 0.7840811572882187, "grad_norm": 0.7947921045594606, "learning_rate": 1.173573326772955e-06, "loss": 0.38, "step": 25583 }, { "epoch": 0.78411180581096, "grad_norm": 1.9163016123761394, "learning_rate": 1.173253870201559e-06, "loss": 0.6579, "step": 25584 }, { "epoch": 0.7841424543337011, "grad_norm": 1.743981943428278, "learning_rate": 1.172934451336073e-06, "loss": 0.5601, "step": 25585 }, { "epoch": 0.7841731028564424, "grad_norm": 1.8519655907563166, "learning_rate": 1.172615070179643e-06, "loss": 0.5645, "step": 25586 }, { "epoch": 0.7842037513791835, "grad_norm": 1.864466337722945, "learning_rate": 1.172295726735413e-06, "loss": 0.6248, "step": 25587 }, { "epoch": 0.7842343999019247, "grad_norm": 0.8801204673574002, "learning_rate": 1.1719764210065354e-06, "loss": 0.4203, "step": 25588 }, { "epoch": 0.7842650484246659, "grad_norm": 1.4942644745866673, "learning_rate": 1.1716571529961535e-06, "loss": 0.5876, "step": 25589 }, { "epoch": 0.7842956969474071, "grad_norm": 1.7428638811368102, "learning_rate": 1.1713379227074123e-06, "loss": 0.5534, "step": 25590 }, { "epoch": 0.7843263454701483, "grad_norm": 1.9780117399772432, "learning_rate": 1.1710187301434578e-06, "loss": 0.617, "step": 25591 }, { "epoch": 0.7843569939928895, "grad_norm": 2.096028775460724, "learning_rate": 1.1706995753074352e-06, "loss": 0.6393, "step": 25592 }, { "epoch": 0.7843876425156308, "grad_norm": 2.0320856457821783, "learning_rate": 1.1703804582024914e-06, "loss": 0.6143, "step": 25593 }, { "epoch": 0.7844182910383719, "grad_norm": 1.8374403695072348, "learning_rate": 1.1700613788317666e-06, "loss": 0.561, "step": 25594 }, { "epoch": 0.7844489395611132, "grad_norm": 0.7838493448541929, "learning_rate": 1.1697423371984079e-06, "loss": 0.4164, "step": 25595 }, { "epoch": 0.7844795880838543, "grad_norm": 1.8064642389454857, "learning_rate": 1.169423333305559e-06, "loss": 0.5279, "step": 25596 }, { "epoch": 0.7845102366065956, "grad_norm": 1.8978374979007542, "learning_rate": 1.1691043671563619e-06, "loss": 0.5784, "step": 25597 }, { "epoch": 0.7845408851293367, "grad_norm": 1.854611296622981, "learning_rate": 1.1687854387539566e-06, "loss": 0.585, "step": 25598 }, { "epoch": 0.784571533652078, "grad_norm": 1.6479425642055983, "learning_rate": 1.1684665481014922e-06, "loss": 0.627, "step": 25599 }, { "epoch": 0.7846021821748191, "grad_norm": 1.784048312730774, "learning_rate": 1.1681476952021054e-06, "loss": 0.5573, "step": 25600 }, { "epoch": 0.7846328306975604, "grad_norm": 1.9047477551453642, "learning_rate": 1.167828880058941e-06, "loss": 0.5734, "step": 25601 }, { "epoch": 0.7846634792203016, "grad_norm": 1.8507187216918959, "learning_rate": 1.1675101026751378e-06, "loss": 0.5987, "step": 25602 }, { "epoch": 0.7846941277430428, "grad_norm": 1.7482305462331633, "learning_rate": 1.1671913630538384e-06, "loss": 0.5552, "step": 25603 }, { "epoch": 0.784724776265784, "grad_norm": 1.8396251199802252, "learning_rate": 1.1668726611981846e-06, "loss": 0.6695, "step": 25604 }, { "epoch": 0.7847554247885252, "grad_norm": 1.8594267289883233, "learning_rate": 1.1665539971113138e-06, "loss": 0.5321, "step": 25605 }, { "epoch": 0.7847860733112664, "grad_norm": 1.846242104861063, "learning_rate": 1.166235370796367e-06, "loss": 0.5763, "step": 25606 }, { "epoch": 0.7848167218340076, "grad_norm": 2.035663560784449, "learning_rate": 1.165916782256486e-06, "loss": 0.6692, "step": 25607 }, { "epoch": 0.7848473703567488, "grad_norm": 1.5814053939183326, "learning_rate": 1.165598231494806e-06, "loss": 0.4151, "step": 25608 }, { "epoch": 0.78487801887949, "grad_norm": 0.7733966431608071, "learning_rate": 1.1652797185144677e-06, "loss": 0.3884, "step": 25609 }, { "epoch": 0.7849086674022312, "grad_norm": 1.9579217296039682, "learning_rate": 1.1649612433186108e-06, "loss": 0.5532, "step": 25610 }, { "epoch": 0.7849393159249725, "grad_norm": 1.9216209946107115, "learning_rate": 1.1646428059103709e-06, "loss": 0.6234, "step": 25611 }, { "epoch": 0.7849699644477136, "grad_norm": 1.8703794677942303, "learning_rate": 1.1643244062928881e-06, "loss": 0.6068, "step": 25612 }, { "epoch": 0.7850006129704549, "grad_norm": 1.7620639674454057, "learning_rate": 1.1640060444692968e-06, "loss": 0.5024, "step": 25613 }, { "epoch": 0.785031261493196, "grad_norm": 1.736769132345883, "learning_rate": 1.163687720442736e-06, "loss": 0.5395, "step": 25614 }, { "epoch": 0.7850619100159373, "grad_norm": 1.9396663405503274, "learning_rate": 1.1633694342163426e-06, "loss": 0.5702, "step": 25615 }, { "epoch": 0.7850925585386784, "grad_norm": 0.810204543308388, "learning_rate": 1.1630511857932504e-06, "loss": 0.382, "step": 25616 }, { "epoch": 0.7851232070614197, "grad_norm": 0.7920873845855663, "learning_rate": 1.1627329751765964e-06, "loss": 0.3925, "step": 25617 }, { "epoch": 0.7851538555841608, "grad_norm": 1.7810224970426516, "learning_rate": 1.1624148023695175e-06, "loss": 0.5086, "step": 25618 }, { "epoch": 0.785184504106902, "grad_norm": 1.7210807027014947, "learning_rate": 1.1620966673751466e-06, "loss": 0.5752, "step": 25619 }, { "epoch": 0.7852151526296433, "grad_norm": 1.9310199608444223, "learning_rate": 1.1617785701966188e-06, "loss": 0.6486, "step": 25620 }, { "epoch": 0.7852458011523844, "grad_norm": 1.8172119448385147, "learning_rate": 1.1614605108370703e-06, "loss": 0.5279, "step": 25621 }, { "epoch": 0.7852764496751257, "grad_norm": 1.8890912312441974, "learning_rate": 1.1611424892996327e-06, "loss": 0.5947, "step": 25622 }, { "epoch": 0.7853070981978668, "grad_norm": 1.7160535239908234, "learning_rate": 1.1608245055874407e-06, "loss": 0.5251, "step": 25623 }, { "epoch": 0.7853377467206081, "grad_norm": 1.901824710017792, "learning_rate": 1.1605065597036264e-06, "loss": 0.6681, "step": 25624 }, { "epoch": 0.7853683952433492, "grad_norm": 0.8192769736573641, "learning_rate": 1.1601886516513234e-06, "loss": 0.399, "step": 25625 }, { "epoch": 0.7853990437660905, "grad_norm": 1.7409619243828682, "learning_rate": 1.159870781433665e-06, "loss": 0.4774, "step": 25626 }, { "epoch": 0.7854296922888316, "grad_norm": 1.7972995488960952, "learning_rate": 1.1595529490537815e-06, "loss": 0.5403, "step": 25627 }, { "epoch": 0.7854603408115729, "grad_norm": 1.8038974189988566, "learning_rate": 1.1592351545148051e-06, "loss": 0.5704, "step": 25628 }, { "epoch": 0.785490989334314, "grad_norm": 2.2232571704118715, "learning_rate": 1.1589173978198687e-06, "loss": 0.6018, "step": 25629 }, { "epoch": 0.7855216378570553, "grad_norm": 1.8566855808104021, "learning_rate": 1.1585996789721004e-06, "loss": 0.6753, "step": 25630 }, { "epoch": 0.7855522863797965, "grad_norm": 1.8481001183865218, "learning_rate": 1.1582819979746347e-06, "loss": 0.5467, "step": 25631 }, { "epoch": 0.7855829349025377, "grad_norm": 2.044876785417199, "learning_rate": 1.157964354830597e-06, "loss": 0.5894, "step": 25632 }, { "epoch": 0.7856135834252789, "grad_norm": 1.9169288522844508, "learning_rate": 1.1576467495431199e-06, "loss": 0.6327, "step": 25633 }, { "epoch": 0.7856442319480201, "grad_norm": 0.8233247976709562, "learning_rate": 1.1573291821153338e-06, "loss": 0.4077, "step": 25634 }, { "epoch": 0.7856748804707613, "grad_norm": 1.9168976009429755, "learning_rate": 1.157011652550365e-06, "loss": 0.5323, "step": 25635 }, { "epoch": 0.7857055289935025, "grad_norm": 1.909237428416549, "learning_rate": 1.1566941608513438e-06, "loss": 0.5957, "step": 25636 }, { "epoch": 0.7857361775162437, "grad_norm": 1.7452430288705314, "learning_rate": 1.1563767070214e-06, "loss": 0.5879, "step": 25637 }, { "epoch": 0.785766826038985, "grad_norm": 1.9610915986712318, "learning_rate": 1.1560592910636582e-06, "loss": 0.5363, "step": 25638 }, { "epoch": 0.7857974745617261, "grad_norm": 1.8776103481367277, "learning_rate": 1.155741912981248e-06, "loss": 0.6578, "step": 25639 }, { "epoch": 0.7858281230844674, "grad_norm": 1.9726636985994037, "learning_rate": 1.1554245727772978e-06, "loss": 0.5366, "step": 25640 }, { "epoch": 0.7858587716072085, "grad_norm": 1.7854971899740806, "learning_rate": 1.1551072704549309e-06, "loss": 0.5414, "step": 25641 }, { "epoch": 0.7858894201299498, "grad_norm": 1.808059718543966, "learning_rate": 1.1547900060172779e-06, "loss": 0.5848, "step": 25642 }, { "epoch": 0.7859200686526909, "grad_norm": 1.892252903165144, "learning_rate": 1.154472779467461e-06, "loss": 0.5797, "step": 25643 }, { "epoch": 0.7859507171754322, "grad_norm": 1.8710268183950054, "learning_rate": 1.1541555908086077e-06, "loss": 0.5317, "step": 25644 }, { "epoch": 0.7859813656981733, "grad_norm": 1.825259335821043, "learning_rate": 1.1538384400438451e-06, "loss": 0.6045, "step": 25645 }, { "epoch": 0.7860120142209146, "grad_norm": 2.207541899416707, "learning_rate": 1.153521327176295e-06, "loss": 0.5714, "step": 25646 }, { "epoch": 0.7860426627436558, "grad_norm": 1.9086537868483093, "learning_rate": 1.153204252209083e-06, "loss": 0.5953, "step": 25647 }, { "epoch": 0.786073311266397, "grad_norm": 1.9165482012498356, "learning_rate": 1.1528872151453357e-06, "loss": 0.6281, "step": 25648 }, { "epoch": 0.7861039597891382, "grad_norm": 1.8888487125199735, "learning_rate": 1.1525702159881735e-06, "loss": 0.6435, "step": 25649 }, { "epoch": 0.7861346083118793, "grad_norm": 2.2097493200317313, "learning_rate": 1.1522532547407212e-06, "loss": 0.6534, "step": 25650 }, { "epoch": 0.7861652568346206, "grad_norm": 1.870642914067741, "learning_rate": 1.1519363314061033e-06, "loss": 0.6118, "step": 25651 }, { "epoch": 0.7861959053573617, "grad_norm": 2.1239714317486573, "learning_rate": 1.1516194459874403e-06, "loss": 0.6082, "step": 25652 }, { "epoch": 0.786226553880103, "grad_norm": 1.7120261010487192, "learning_rate": 1.1513025984878567e-06, "loss": 0.513, "step": 25653 }, { "epoch": 0.7862572024028441, "grad_norm": 1.9982386603093907, "learning_rate": 1.1509857889104704e-06, "loss": 0.688, "step": 25654 }, { "epoch": 0.7862878509255854, "grad_norm": 1.895794079719645, "learning_rate": 1.150669017258409e-06, "loss": 0.5812, "step": 25655 }, { "epoch": 0.7863184994483265, "grad_norm": 1.9360601672573337, "learning_rate": 1.1503522835347908e-06, "loss": 0.5117, "step": 25656 }, { "epoch": 0.7863491479710678, "grad_norm": 1.8521998634602193, "learning_rate": 1.1500355877427348e-06, "loss": 0.5741, "step": 25657 }, { "epoch": 0.786379796493809, "grad_norm": 1.8928892426322224, "learning_rate": 1.1497189298853634e-06, "loss": 0.6195, "step": 25658 }, { "epoch": 0.7864104450165502, "grad_norm": 1.9231033131899737, "learning_rate": 1.1494023099657975e-06, "loss": 0.6278, "step": 25659 }, { "epoch": 0.7864410935392914, "grad_norm": 1.9099769669206585, "learning_rate": 1.1490857279871548e-06, "loss": 0.5317, "step": 25660 }, { "epoch": 0.7864717420620326, "grad_norm": 0.7691002765438859, "learning_rate": 1.1487691839525561e-06, "loss": 0.3908, "step": 25661 }, { "epoch": 0.7865023905847738, "grad_norm": 1.8384381731152966, "learning_rate": 1.1484526778651195e-06, "loss": 0.5875, "step": 25662 }, { "epoch": 0.786533039107515, "grad_norm": 1.8255319955939604, "learning_rate": 1.1481362097279653e-06, "loss": 0.5592, "step": 25663 }, { "epoch": 0.7865636876302562, "grad_norm": 1.613420647317439, "learning_rate": 1.147819779544211e-06, "loss": 0.5552, "step": 25664 }, { "epoch": 0.7865943361529975, "grad_norm": 1.5722181669108917, "learning_rate": 1.1475033873169728e-06, "loss": 0.483, "step": 25665 }, { "epoch": 0.7866249846757386, "grad_norm": 2.0358690913923856, "learning_rate": 1.147187033049369e-06, "loss": 0.6762, "step": 25666 }, { "epoch": 0.7866556331984799, "grad_norm": 1.9592261409302976, "learning_rate": 1.1468707167445187e-06, "loss": 0.6596, "step": 25667 }, { "epoch": 0.786686281721221, "grad_norm": 0.7772816235770293, "learning_rate": 1.1465544384055355e-06, "loss": 0.3903, "step": 25668 }, { "epoch": 0.7867169302439623, "grad_norm": 1.9236085203289683, "learning_rate": 1.1462381980355381e-06, "loss": 0.5277, "step": 25669 }, { "epoch": 0.7867475787667034, "grad_norm": 2.0494583849894346, "learning_rate": 1.1459219956376421e-06, "loss": 0.6267, "step": 25670 }, { "epoch": 0.7867782272894447, "grad_norm": 1.8277934090628583, "learning_rate": 1.145605831214962e-06, "loss": 0.5744, "step": 25671 }, { "epoch": 0.7868088758121858, "grad_norm": 1.7411982741222904, "learning_rate": 1.145289704770614e-06, "loss": 0.5325, "step": 25672 }, { "epoch": 0.7868395243349271, "grad_norm": 2.5590662801631696, "learning_rate": 1.1449736163077125e-06, "loss": 0.6953, "step": 25673 }, { "epoch": 0.7868701728576682, "grad_norm": 1.8170873177262017, "learning_rate": 1.144657565829374e-06, "loss": 0.5843, "step": 25674 }, { "epoch": 0.7869008213804095, "grad_norm": 2.027625266066971, "learning_rate": 1.1443415533387103e-06, "loss": 0.6305, "step": 25675 }, { "epoch": 0.7869314699031507, "grad_norm": 2.083860343337145, "learning_rate": 1.144025578838835e-06, "loss": 0.6226, "step": 25676 }, { "epoch": 0.7869621184258919, "grad_norm": 1.6389142243359496, "learning_rate": 1.143709642332862e-06, "loss": 0.5542, "step": 25677 }, { "epoch": 0.7869927669486331, "grad_norm": 1.590653846139505, "learning_rate": 1.1433937438239062e-06, "loss": 0.5283, "step": 25678 }, { "epoch": 0.7870234154713743, "grad_norm": 1.88684274259642, "learning_rate": 1.1430778833150768e-06, "loss": 0.5857, "step": 25679 }, { "epoch": 0.7870540639941155, "grad_norm": 2.014558046828614, "learning_rate": 1.1427620608094881e-06, "loss": 0.6237, "step": 25680 }, { "epoch": 0.7870847125168566, "grad_norm": 1.6692505618068267, "learning_rate": 1.1424462763102517e-06, "loss": 0.6487, "step": 25681 }, { "epoch": 0.7871153610395979, "grad_norm": 1.7579977676517642, "learning_rate": 1.1421305298204805e-06, "loss": 0.5309, "step": 25682 }, { "epoch": 0.787146009562339, "grad_norm": 2.0632654077345647, "learning_rate": 1.1418148213432846e-06, "loss": 0.5395, "step": 25683 }, { "epoch": 0.7871766580850803, "grad_norm": 1.7621415318802536, "learning_rate": 1.1414991508817713e-06, "loss": 0.6224, "step": 25684 }, { "epoch": 0.7872073066078215, "grad_norm": 1.879171496025268, "learning_rate": 1.1411835184390569e-06, "loss": 0.5838, "step": 25685 }, { "epoch": 0.7872379551305627, "grad_norm": 1.8561768619943328, "learning_rate": 1.1408679240182485e-06, "loss": 0.5106, "step": 25686 }, { "epoch": 0.7872686036533039, "grad_norm": 1.7868907852963172, "learning_rate": 1.1405523676224551e-06, "loss": 0.5079, "step": 25687 }, { "epoch": 0.7872992521760451, "grad_norm": 1.969347006674886, "learning_rate": 1.140236849254786e-06, "loss": 0.6648, "step": 25688 }, { "epoch": 0.7873299006987863, "grad_norm": 1.8199204756381435, "learning_rate": 1.1399213689183509e-06, "loss": 0.52, "step": 25689 }, { "epoch": 0.7873605492215275, "grad_norm": 1.9717954536909663, "learning_rate": 1.1396059266162596e-06, "loss": 0.7031, "step": 25690 }, { "epoch": 0.7873911977442687, "grad_norm": 0.8004217296433636, "learning_rate": 1.1392905223516175e-06, "loss": 0.4143, "step": 25691 }, { "epoch": 0.78742184626701, "grad_norm": 1.724391063391672, "learning_rate": 1.1389751561275336e-06, "loss": 0.5379, "step": 25692 }, { "epoch": 0.7874524947897511, "grad_norm": 2.1216363250935917, "learning_rate": 1.1386598279471174e-06, "loss": 0.6701, "step": 25693 }, { "epoch": 0.7874831433124924, "grad_norm": 2.0158827376263218, "learning_rate": 1.1383445378134734e-06, "loss": 0.5946, "step": 25694 }, { "epoch": 0.7875137918352335, "grad_norm": 2.3827908408844944, "learning_rate": 1.1380292857297053e-06, "loss": 0.6098, "step": 25695 }, { "epoch": 0.7875444403579748, "grad_norm": 0.783131588281687, "learning_rate": 1.1377140716989265e-06, "loss": 0.4058, "step": 25696 }, { "epoch": 0.7875750888807159, "grad_norm": 1.8794484468869455, "learning_rate": 1.1373988957242388e-06, "loss": 0.6268, "step": 25697 }, { "epoch": 0.7876057374034572, "grad_norm": 1.769229335724374, "learning_rate": 1.1370837578087468e-06, "loss": 0.4851, "step": 25698 }, { "epoch": 0.7876363859261983, "grad_norm": 1.9029736601124614, "learning_rate": 1.1367686579555565e-06, "loss": 0.5931, "step": 25699 }, { "epoch": 0.7876670344489396, "grad_norm": 1.8929493620774591, "learning_rate": 1.1364535961677736e-06, "loss": 0.4703, "step": 25700 }, { "epoch": 0.7876976829716807, "grad_norm": 1.8945231235020152, "learning_rate": 1.136138572448503e-06, "loss": 0.6285, "step": 25701 }, { "epoch": 0.787728331494422, "grad_norm": 1.8734663908225966, "learning_rate": 1.1358235868008466e-06, "loss": 0.5932, "step": 25702 }, { "epoch": 0.7877589800171632, "grad_norm": 1.9468322588618536, "learning_rate": 1.1355086392279085e-06, "loss": 0.5681, "step": 25703 }, { "epoch": 0.7877896285399044, "grad_norm": 1.8929176246178465, "learning_rate": 1.1351937297327942e-06, "loss": 0.5468, "step": 25704 }, { "epoch": 0.7878202770626456, "grad_norm": 2.0484879189298324, "learning_rate": 1.1348788583186054e-06, "loss": 0.6176, "step": 25705 }, { "epoch": 0.7878509255853868, "grad_norm": 1.9039432736286452, "learning_rate": 1.134564024988441e-06, "loss": 0.5631, "step": 25706 }, { "epoch": 0.787881574108128, "grad_norm": 1.8904853051051402, "learning_rate": 1.134249229745409e-06, "loss": 0.6112, "step": 25707 }, { "epoch": 0.7879122226308692, "grad_norm": 2.0356131291070927, "learning_rate": 1.133934472592607e-06, "loss": 0.592, "step": 25708 }, { "epoch": 0.7879428711536104, "grad_norm": 2.0630740374864556, "learning_rate": 1.1336197535331395e-06, "loss": 0.6012, "step": 25709 }, { "epoch": 0.7879735196763517, "grad_norm": 1.5946304613919993, "learning_rate": 1.133305072570104e-06, "loss": 0.6115, "step": 25710 }, { "epoch": 0.7880041681990928, "grad_norm": 1.9195050315291362, "learning_rate": 1.132990429706603e-06, "loss": 0.5041, "step": 25711 }, { "epoch": 0.788034816721834, "grad_norm": 1.772918169131188, "learning_rate": 1.1326758249457387e-06, "loss": 0.6134, "step": 25712 }, { "epoch": 0.7880654652445752, "grad_norm": 1.9733430501304678, "learning_rate": 1.1323612582906069e-06, "loss": 0.5447, "step": 25713 }, { "epoch": 0.7880961137673164, "grad_norm": 1.6339319566928525, "learning_rate": 1.1320467297443094e-06, "loss": 0.4575, "step": 25714 }, { "epoch": 0.7881267622900576, "grad_norm": 2.034848614908111, "learning_rate": 1.1317322393099468e-06, "loss": 0.5606, "step": 25715 }, { "epoch": 0.7881574108127988, "grad_norm": 1.7022424047544413, "learning_rate": 1.1314177869906163e-06, "loss": 0.6203, "step": 25716 }, { "epoch": 0.78818805933554, "grad_norm": 1.8693575653713563, "learning_rate": 1.1311033727894144e-06, "loss": 0.5776, "step": 25717 }, { "epoch": 0.7882187078582812, "grad_norm": 1.8895149063166807, "learning_rate": 1.130788996709441e-06, "loss": 0.5613, "step": 25718 }, { "epoch": 0.7882493563810224, "grad_norm": 1.7998053082651104, "learning_rate": 1.1304746587537935e-06, "loss": 0.5621, "step": 25719 }, { "epoch": 0.7882800049037636, "grad_norm": 1.7630972169982786, "learning_rate": 1.1301603589255705e-06, "loss": 0.5259, "step": 25720 }, { "epoch": 0.7883106534265049, "grad_norm": 1.6532125854427955, "learning_rate": 1.1298460972278663e-06, "loss": 0.5105, "step": 25721 }, { "epoch": 0.788341301949246, "grad_norm": 1.780206920737253, "learning_rate": 1.129531873663779e-06, "loss": 0.5464, "step": 25722 }, { "epoch": 0.7883719504719873, "grad_norm": 0.8190016879983659, "learning_rate": 1.129217688236406e-06, "loss": 0.4034, "step": 25723 }, { "epoch": 0.7884025989947284, "grad_norm": 1.8865081209012018, "learning_rate": 1.1289035409488391e-06, "loss": 0.4262, "step": 25724 }, { "epoch": 0.7884332475174697, "grad_norm": 0.7798930817471839, "learning_rate": 1.1285894318041769e-06, "loss": 0.4213, "step": 25725 }, { "epoch": 0.7884638960402108, "grad_norm": 1.6608144921281376, "learning_rate": 1.1282753608055152e-06, "loss": 0.5891, "step": 25726 }, { "epoch": 0.7884945445629521, "grad_norm": 1.7505975668992235, "learning_rate": 1.127961327955945e-06, "loss": 0.5386, "step": 25727 }, { "epoch": 0.7885251930856932, "grad_norm": 1.7812592246947043, "learning_rate": 1.127647333258564e-06, "loss": 0.4731, "step": 25728 }, { "epoch": 0.7885558416084345, "grad_norm": 1.9482627547634486, "learning_rate": 1.1273333767164634e-06, "loss": 0.5401, "step": 25729 }, { "epoch": 0.7885864901311757, "grad_norm": 2.0121553373332683, "learning_rate": 1.127019458332738e-06, "loss": 0.5778, "step": 25730 }, { "epoch": 0.7886171386539169, "grad_norm": 1.8268850698348729, "learning_rate": 1.126705578110482e-06, "loss": 0.6526, "step": 25731 }, { "epoch": 0.7886477871766581, "grad_norm": 2.0793170214438033, "learning_rate": 1.126391736052786e-06, "loss": 0.5874, "step": 25732 }, { "epoch": 0.7886784356993993, "grad_norm": 2.020186068571469, "learning_rate": 1.1260779321627429e-06, "loss": 0.6141, "step": 25733 }, { "epoch": 0.7887090842221405, "grad_norm": 1.9098481423915588, "learning_rate": 1.1257641664434466e-06, "loss": 0.6906, "step": 25734 }, { "epoch": 0.7887397327448817, "grad_norm": 1.95257480091386, "learning_rate": 1.1254504388979859e-06, "loss": 0.6436, "step": 25735 }, { "epoch": 0.7887703812676229, "grad_norm": 2.0329813520974724, "learning_rate": 1.125136749529453e-06, "loss": 0.598, "step": 25736 }, { "epoch": 0.7888010297903641, "grad_norm": 1.7883237279501585, "learning_rate": 1.1248230983409409e-06, "loss": 0.5157, "step": 25737 }, { "epoch": 0.7888316783131053, "grad_norm": 1.8996986247474337, "learning_rate": 1.124509485335537e-06, "loss": 0.5697, "step": 25738 }, { "epoch": 0.7888623268358466, "grad_norm": 2.019509430173143, "learning_rate": 1.124195910516334e-06, "loss": 0.5473, "step": 25739 }, { "epoch": 0.7888929753585877, "grad_norm": 1.696329292018439, "learning_rate": 1.123882373886419e-06, "loss": 0.5886, "step": 25740 }, { "epoch": 0.788923623881329, "grad_norm": 0.8229543816059511, "learning_rate": 1.1235688754488828e-06, "loss": 0.4023, "step": 25741 }, { "epoch": 0.7889542724040701, "grad_norm": 2.104088856655647, "learning_rate": 1.1232554152068154e-06, "loss": 0.5416, "step": 25742 }, { "epoch": 0.7889849209268113, "grad_norm": 0.7841715084486532, "learning_rate": 1.122941993163303e-06, "loss": 0.388, "step": 25743 }, { "epoch": 0.7890155694495525, "grad_norm": 1.7177865928951856, "learning_rate": 1.122628609321435e-06, "loss": 0.6317, "step": 25744 }, { "epoch": 0.7890462179722937, "grad_norm": 0.8256783996254407, "learning_rate": 1.1223152636843016e-06, "loss": 0.4238, "step": 25745 }, { "epoch": 0.789076866495035, "grad_norm": 1.9202985172585352, "learning_rate": 1.1220019562549856e-06, "loss": 0.5675, "step": 25746 }, { "epoch": 0.7891075150177761, "grad_norm": 2.1099399686904308, "learning_rate": 1.1216886870365774e-06, "loss": 0.6294, "step": 25747 }, { "epoch": 0.7891381635405174, "grad_norm": 1.697674146117691, "learning_rate": 1.1213754560321638e-06, "loss": 0.5844, "step": 25748 }, { "epoch": 0.7891688120632585, "grad_norm": 1.7384654081829347, "learning_rate": 1.1210622632448287e-06, "loss": 0.4928, "step": 25749 }, { "epoch": 0.7891994605859998, "grad_norm": 1.9512946671436373, "learning_rate": 1.1207491086776613e-06, "loss": 0.4977, "step": 25750 }, { "epoch": 0.7892301091087409, "grad_norm": 1.8921650776999708, "learning_rate": 1.1204359923337437e-06, "loss": 0.5606, "step": 25751 }, { "epoch": 0.7892607576314822, "grad_norm": 0.7975247440459553, "learning_rate": 1.1201229142161634e-06, "loss": 0.4042, "step": 25752 }, { "epoch": 0.7892914061542233, "grad_norm": 1.88367691200478, "learning_rate": 1.1198098743280056e-06, "loss": 0.5642, "step": 25753 }, { "epoch": 0.7893220546769646, "grad_norm": 1.9449373997845831, "learning_rate": 1.1194968726723533e-06, "loss": 0.5972, "step": 25754 }, { "epoch": 0.7893527031997057, "grad_norm": 2.056291962809844, "learning_rate": 1.1191839092522904e-06, "loss": 0.6399, "step": 25755 }, { "epoch": 0.789383351722447, "grad_norm": 1.8060432114924956, "learning_rate": 1.1188709840709028e-06, "loss": 0.4402, "step": 25756 }, { "epoch": 0.7894140002451882, "grad_norm": 1.888446910168868, "learning_rate": 1.1185580971312716e-06, "loss": 0.5668, "step": 25757 }, { "epoch": 0.7894446487679294, "grad_norm": 1.7997957559713742, "learning_rate": 1.1182452484364803e-06, "loss": 0.6154, "step": 25758 }, { "epoch": 0.7894752972906706, "grad_norm": 1.743806328802246, "learning_rate": 1.117932437989613e-06, "loss": 0.601, "step": 25759 }, { "epoch": 0.7895059458134118, "grad_norm": 1.8709187502374023, "learning_rate": 1.1176196657937495e-06, "loss": 0.6054, "step": 25760 }, { "epoch": 0.789536594336153, "grad_norm": 2.1180724632424717, "learning_rate": 1.117306931851974e-06, "loss": 0.5711, "step": 25761 }, { "epoch": 0.7895672428588942, "grad_norm": 1.92079012190885, "learning_rate": 1.1169942361673651e-06, "loss": 0.5694, "step": 25762 }, { "epoch": 0.7895978913816354, "grad_norm": 1.889918489608592, "learning_rate": 1.1166815787430062e-06, "loss": 0.5559, "step": 25763 }, { "epoch": 0.7896285399043766, "grad_norm": 1.5785438586385963, "learning_rate": 1.1163689595819783e-06, "loss": 0.5499, "step": 25764 }, { "epoch": 0.7896591884271178, "grad_norm": 0.8384167901571851, "learning_rate": 1.1160563786873591e-06, "loss": 0.4115, "step": 25765 }, { "epoch": 0.7896898369498591, "grad_norm": 1.7460366510302678, "learning_rate": 1.1157438360622302e-06, "loss": 0.4978, "step": 25766 }, { "epoch": 0.7897204854726002, "grad_norm": 1.8602432058312177, "learning_rate": 1.1154313317096726e-06, "loss": 0.5462, "step": 25767 }, { "epoch": 0.7897511339953415, "grad_norm": 1.997887482501697, "learning_rate": 1.1151188656327627e-06, "loss": 0.6422, "step": 25768 }, { "epoch": 0.7897817825180826, "grad_norm": 1.9982100291073461, "learning_rate": 1.1148064378345825e-06, "loss": 0.6328, "step": 25769 }, { "epoch": 0.7898124310408239, "grad_norm": 0.7720116881616541, "learning_rate": 1.1144940483182064e-06, "loss": 0.3988, "step": 25770 }, { "epoch": 0.789843079563565, "grad_norm": 1.8369675688925569, "learning_rate": 1.1141816970867148e-06, "loss": 0.5795, "step": 25771 }, { "epoch": 0.7898737280863063, "grad_norm": 2.1515489961881493, "learning_rate": 1.1138693841431864e-06, "loss": 0.6281, "step": 25772 }, { "epoch": 0.7899043766090474, "grad_norm": 1.907252552102599, "learning_rate": 1.1135571094906961e-06, "loss": 0.6189, "step": 25773 }, { "epoch": 0.7899350251317886, "grad_norm": 1.7167953944747814, "learning_rate": 1.113244873132322e-06, "loss": 0.5744, "step": 25774 }, { "epoch": 0.7899656736545299, "grad_norm": 2.224734786882194, "learning_rate": 1.1129326750711417e-06, "loss": 0.6224, "step": 25775 }, { "epoch": 0.789996322177271, "grad_norm": 1.8149214392905249, "learning_rate": 1.1126205153102293e-06, "loss": 0.5398, "step": 25776 }, { "epoch": 0.7900269707000123, "grad_norm": 2.0422294710112063, "learning_rate": 1.1123083938526612e-06, "loss": 0.5958, "step": 25777 }, { "epoch": 0.7900576192227534, "grad_norm": 2.1876939167671505, "learning_rate": 1.1119963107015153e-06, "loss": 0.6027, "step": 25778 }, { "epoch": 0.7900882677454947, "grad_norm": 0.7706934551011383, "learning_rate": 1.1116842658598626e-06, "loss": 0.3712, "step": 25779 }, { "epoch": 0.7901189162682358, "grad_norm": 1.734122933614931, "learning_rate": 1.1113722593307813e-06, "loss": 0.5696, "step": 25780 }, { "epoch": 0.7901495647909771, "grad_norm": 0.8018352011407547, "learning_rate": 1.1110602911173413e-06, "loss": 0.3821, "step": 25781 }, { "epoch": 0.7901802133137182, "grad_norm": 1.9077320969606766, "learning_rate": 1.1107483612226216e-06, "loss": 0.5725, "step": 25782 }, { "epoch": 0.7902108618364595, "grad_norm": 1.7679086568229316, "learning_rate": 1.1104364696496938e-06, "loss": 0.522, "step": 25783 }, { "epoch": 0.7902415103592006, "grad_norm": 1.9172514240917384, "learning_rate": 1.1101246164016295e-06, "loss": 0.6317, "step": 25784 }, { "epoch": 0.7902721588819419, "grad_norm": 1.6991185258611734, "learning_rate": 1.109812801481502e-06, "loss": 0.4657, "step": 25785 }, { "epoch": 0.7903028074046831, "grad_norm": 1.8027631799983699, "learning_rate": 1.1095010248923859e-06, "loss": 0.5856, "step": 25786 }, { "epoch": 0.7903334559274243, "grad_norm": 2.030345779974763, "learning_rate": 1.1091892866373506e-06, "loss": 0.5743, "step": 25787 }, { "epoch": 0.7903641044501655, "grad_norm": 1.8135802231994402, "learning_rate": 1.1088775867194684e-06, "loss": 0.5607, "step": 25788 }, { "epoch": 0.7903947529729067, "grad_norm": 1.9344182617543486, "learning_rate": 1.1085659251418113e-06, "loss": 0.6434, "step": 25789 }, { "epoch": 0.7904254014956479, "grad_norm": 2.0327770669780976, "learning_rate": 1.108254301907451e-06, "loss": 0.6951, "step": 25790 }, { "epoch": 0.7904560500183891, "grad_norm": 1.9880141083828557, "learning_rate": 1.1079427170194568e-06, "loss": 0.5271, "step": 25791 }, { "epoch": 0.7904866985411303, "grad_norm": 2.16869261457797, "learning_rate": 1.1076311704808957e-06, "loss": 0.636, "step": 25792 }, { "epoch": 0.7905173470638716, "grad_norm": 1.7968405316406504, "learning_rate": 1.107319662294844e-06, "loss": 0.5726, "step": 25793 }, { "epoch": 0.7905479955866127, "grad_norm": 1.776102426565221, "learning_rate": 1.1070081924643672e-06, "loss": 0.5315, "step": 25794 }, { "epoch": 0.790578644109354, "grad_norm": 1.7969389217529164, "learning_rate": 1.1066967609925333e-06, "loss": 0.6192, "step": 25795 }, { "epoch": 0.7906092926320951, "grad_norm": 1.7646871326932818, "learning_rate": 1.1063853678824127e-06, "loss": 0.5417, "step": 25796 }, { "epoch": 0.7906399411548364, "grad_norm": 1.838908124490944, "learning_rate": 1.1060740131370744e-06, "loss": 0.6514, "step": 25797 }, { "epoch": 0.7906705896775775, "grad_norm": 2.034824490542588, "learning_rate": 1.105762696759584e-06, "loss": 0.6176, "step": 25798 }, { "epoch": 0.7907012382003188, "grad_norm": 1.8398918213418438, "learning_rate": 1.1054514187530102e-06, "loss": 0.6038, "step": 25799 }, { "epoch": 0.7907318867230599, "grad_norm": 2.0355405428750233, "learning_rate": 1.1051401791204197e-06, "loss": 0.6327, "step": 25800 }, { "epoch": 0.7907625352458012, "grad_norm": 1.9855550022465438, "learning_rate": 1.1048289778648814e-06, "loss": 0.5569, "step": 25801 }, { "epoch": 0.7907931837685424, "grad_norm": 1.8886383385497958, "learning_rate": 1.1045178149894592e-06, "loss": 0.6492, "step": 25802 }, { "epoch": 0.7908238322912836, "grad_norm": 1.8140558314968835, "learning_rate": 1.104206690497217e-06, "loss": 0.5273, "step": 25803 }, { "epoch": 0.7908544808140248, "grad_norm": 1.9148488040239386, "learning_rate": 1.1038956043912264e-06, "loss": 0.5449, "step": 25804 }, { "epoch": 0.7908851293367659, "grad_norm": 0.7870000838118864, "learning_rate": 1.103584556674549e-06, "loss": 0.402, "step": 25805 }, { "epoch": 0.7909157778595072, "grad_norm": 1.9256904893713043, "learning_rate": 1.103273547350248e-06, "loss": 0.5324, "step": 25806 }, { "epoch": 0.7909464263822483, "grad_norm": 2.0622070044386183, "learning_rate": 1.1029625764213903e-06, "loss": 0.5827, "step": 25807 }, { "epoch": 0.7909770749049896, "grad_norm": 0.8181791845763883, "learning_rate": 1.1026516438910396e-06, "loss": 0.4053, "step": 25808 }, { "epoch": 0.7910077234277307, "grad_norm": 1.6598400980886387, "learning_rate": 1.1023407497622601e-06, "loss": 0.5503, "step": 25809 }, { "epoch": 0.791038371950472, "grad_norm": 1.8924688453168308, "learning_rate": 1.1020298940381135e-06, "loss": 0.5337, "step": 25810 }, { "epoch": 0.7910690204732131, "grad_norm": 2.134440408882015, "learning_rate": 1.101719076721664e-06, "loss": 0.5863, "step": 25811 }, { "epoch": 0.7910996689959544, "grad_norm": 1.8537086827096976, "learning_rate": 1.101408297815975e-06, "loss": 0.5447, "step": 25812 }, { "epoch": 0.7911303175186956, "grad_norm": 0.9841333921619142, "learning_rate": 1.1010975573241073e-06, "loss": 0.4059, "step": 25813 }, { "epoch": 0.7911609660414368, "grad_norm": 1.5938240125924656, "learning_rate": 1.1007868552491219e-06, "loss": 0.5001, "step": 25814 }, { "epoch": 0.791191614564178, "grad_norm": 1.7035010606255123, "learning_rate": 1.100476191594081e-06, "loss": 0.491, "step": 25815 }, { "epoch": 0.7912222630869192, "grad_norm": 1.6710275977491071, "learning_rate": 1.1001655663620463e-06, "loss": 0.5939, "step": 25816 }, { "epoch": 0.7912529116096604, "grad_norm": 1.8476087089363569, "learning_rate": 1.0998549795560791e-06, "loss": 0.6268, "step": 25817 }, { "epoch": 0.7912835601324016, "grad_norm": 1.6785639972174, "learning_rate": 1.0995444311792374e-06, "loss": 0.582, "step": 25818 }, { "epoch": 0.7913142086551428, "grad_norm": 2.0809384478052375, "learning_rate": 1.0992339212345827e-06, "loss": 0.5313, "step": 25819 }, { "epoch": 0.791344857177884, "grad_norm": 1.891315227032959, "learning_rate": 1.098923449725175e-06, "loss": 0.5818, "step": 25820 }, { "epoch": 0.7913755057006252, "grad_norm": 0.8083399499010081, "learning_rate": 1.098613016654073e-06, "loss": 0.3941, "step": 25821 }, { "epoch": 0.7914061542233665, "grad_norm": 1.818311901723467, "learning_rate": 1.0983026220243326e-06, "loss": 0.5485, "step": 25822 }, { "epoch": 0.7914368027461076, "grad_norm": 1.8952718833394377, "learning_rate": 1.097992265839017e-06, "loss": 0.5722, "step": 25823 }, { "epoch": 0.7914674512688489, "grad_norm": 1.9967638672897579, "learning_rate": 1.0976819481011824e-06, "loss": 0.6122, "step": 25824 }, { "epoch": 0.79149809979159, "grad_norm": 0.8178962455093635, "learning_rate": 1.0973716688138847e-06, "loss": 0.3944, "step": 25825 }, { "epoch": 0.7915287483143313, "grad_norm": 2.2092735604270834, "learning_rate": 1.0970614279801823e-06, "loss": 0.5654, "step": 25826 }, { "epoch": 0.7915593968370724, "grad_norm": 1.7646726086309157, "learning_rate": 1.0967512256031322e-06, "loss": 0.5912, "step": 25827 }, { "epoch": 0.7915900453598137, "grad_norm": 1.9348892433228961, "learning_rate": 1.0964410616857924e-06, "loss": 0.5803, "step": 25828 }, { "epoch": 0.7916206938825548, "grad_norm": 1.8660775211065936, "learning_rate": 1.0961309362312162e-06, "loss": 0.6262, "step": 25829 }, { "epoch": 0.7916513424052961, "grad_norm": 1.630218610115692, "learning_rate": 1.0958208492424605e-06, "loss": 0.5096, "step": 25830 }, { "epoch": 0.7916819909280373, "grad_norm": 1.8414183617182622, "learning_rate": 1.0955108007225828e-06, "loss": 0.5709, "step": 25831 }, { "epoch": 0.7917126394507785, "grad_norm": 1.7837005391562606, "learning_rate": 1.095200790674636e-06, "loss": 0.6266, "step": 25832 }, { "epoch": 0.7917432879735197, "grad_norm": 1.9003660153701558, "learning_rate": 1.0948908191016716e-06, "loss": 0.5887, "step": 25833 }, { "epoch": 0.7917739364962609, "grad_norm": 1.9358025143380722, "learning_rate": 1.0945808860067502e-06, "loss": 0.5811, "step": 25834 }, { "epoch": 0.7918045850190021, "grad_norm": 1.895704627261191, "learning_rate": 1.0942709913929216e-06, "loss": 0.5546, "step": 25835 }, { "epoch": 0.7918352335417432, "grad_norm": 1.9139216543701274, "learning_rate": 1.0939611352632413e-06, "loss": 0.6032, "step": 25836 }, { "epoch": 0.7918658820644845, "grad_norm": 2.0242225649232606, "learning_rate": 1.0936513176207603e-06, "loss": 0.5708, "step": 25837 }, { "epoch": 0.7918965305872256, "grad_norm": 2.0656175197337925, "learning_rate": 1.093341538468532e-06, "loss": 0.6766, "step": 25838 }, { "epoch": 0.7919271791099669, "grad_norm": 1.74844369930058, "learning_rate": 1.0930317978096106e-06, "loss": 0.6053, "step": 25839 }, { "epoch": 0.7919578276327081, "grad_norm": 1.619258970720239, "learning_rate": 1.0927220956470446e-06, "loss": 0.5499, "step": 25840 }, { "epoch": 0.7919884761554493, "grad_norm": 0.7946987675412542, "learning_rate": 1.092412431983888e-06, "loss": 0.3984, "step": 25841 }, { "epoch": 0.7920191246781905, "grad_norm": 1.9417019566703928, "learning_rate": 1.092102806823193e-06, "loss": 0.5596, "step": 25842 }, { "epoch": 0.7920497732009317, "grad_norm": 1.7448725160492466, "learning_rate": 1.0917932201680075e-06, "loss": 0.6284, "step": 25843 }, { "epoch": 0.7920804217236729, "grad_norm": 2.206720235875401, "learning_rate": 1.0914836720213835e-06, "loss": 0.6402, "step": 25844 }, { "epoch": 0.7921110702464141, "grad_norm": 1.8272605384777896, "learning_rate": 1.091174162386372e-06, "loss": 0.5525, "step": 25845 }, { "epoch": 0.7921417187691553, "grad_norm": 1.921453328499941, "learning_rate": 1.0908646912660204e-06, "loss": 0.6145, "step": 25846 }, { "epoch": 0.7921723672918966, "grad_norm": 2.7925708414827097, "learning_rate": 1.0905552586633804e-06, "loss": 0.5334, "step": 25847 }, { "epoch": 0.7922030158146377, "grad_norm": 1.9339812611417075, "learning_rate": 1.0902458645814984e-06, "loss": 0.6422, "step": 25848 }, { "epoch": 0.792233664337379, "grad_norm": 1.649497130898711, "learning_rate": 1.0899365090234244e-06, "loss": 0.5901, "step": 25849 }, { "epoch": 0.7922643128601201, "grad_norm": 1.921526298784752, "learning_rate": 1.0896271919922074e-06, "loss": 0.5331, "step": 25850 }, { "epoch": 0.7922949613828614, "grad_norm": 1.8103876554331297, "learning_rate": 1.0893179134908932e-06, "loss": 0.5722, "step": 25851 }, { "epoch": 0.7923256099056025, "grad_norm": 1.7039781787743051, "learning_rate": 1.0890086735225296e-06, "loss": 0.5742, "step": 25852 }, { "epoch": 0.7923562584283438, "grad_norm": 2.0016189488584746, "learning_rate": 1.088699472090166e-06, "loss": 0.5104, "step": 25853 }, { "epoch": 0.7923869069510849, "grad_norm": 1.9681233981690147, "learning_rate": 1.0883903091968461e-06, "loss": 0.6633, "step": 25854 }, { "epoch": 0.7924175554738262, "grad_norm": 1.9904552244551041, "learning_rate": 1.0880811848456169e-06, "loss": 0.5422, "step": 25855 }, { "epoch": 0.7924482039965673, "grad_norm": 2.201774504906676, "learning_rate": 1.0877720990395263e-06, "loss": 0.6652, "step": 25856 }, { "epoch": 0.7924788525193086, "grad_norm": 1.8861051906655923, "learning_rate": 1.087463051781617e-06, "loss": 0.6124, "step": 25857 }, { "epoch": 0.7925095010420498, "grad_norm": 1.9408065102790715, "learning_rate": 1.0871540430749366e-06, "loss": 0.5135, "step": 25858 }, { "epoch": 0.792540149564791, "grad_norm": 2.1217754598717278, "learning_rate": 1.0868450729225272e-06, "loss": 0.6575, "step": 25859 }, { "epoch": 0.7925707980875322, "grad_norm": 1.875832284526621, "learning_rate": 1.086536141327434e-06, "loss": 0.5733, "step": 25860 }, { "epoch": 0.7926014466102734, "grad_norm": 2.147801848771749, "learning_rate": 1.0862272482927033e-06, "loss": 0.6392, "step": 25861 }, { "epoch": 0.7926320951330146, "grad_norm": 1.7081125461156976, "learning_rate": 1.0859183938213759e-06, "loss": 0.5026, "step": 25862 }, { "epoch": 0.7926627436557558, "grad_norm": 1.8814889444927552, "learning_rate": 1.0856095779164955e-06, "loss": 0.4914, "step": 25863 }, { "epoch": 0.792693392178497, "grad_norm": 1.9526520449331002, "learning_rate": 1.0853008005811067e-06, "loss": 0.5424, "step": 25864 }, { "epoch": 0.7927240407012383, "grad_norm": 1.8840800400307685, "learning_rate": 1.0849920618182496e-06, "loss": 0.5463, "step": 25865 }, { "epoch": 0.7927546892239794, "grad_norm": 1.5776287334246981, "learning_rate": 1.0846833616309687e-06, "loss": 0.491, "step": 25866 }, { "epoch": 0.7927853377467206, "grad_norm": 1.9402568797200148, "learning_rate": 1.084374700022303e-06, "loss": 0.6231, "step": 25867 }, { "epoch": 0.7928159862694618, "grad_norm": 1.7690488734115346, "learning_rate": 1.084066076995296e-06, "loss": 0.5779, "step": 25868 }, { "epoch": 0.792846634792203, "grad_norm": 1.8216260712534555, "learning_rate": 1.0837574925529887e-06, "loss": 0.5218, "step": 25869 }, { "epoch": 0.7928772833149442, "grad_norm": 2.007774289005818, "learning_rate": 1.0834489466984193e-06, "loss": 0.5782, "step": 25870 }, { "epoch": 0.7929079318376854, "grad_norm": 1.8735357925918872, "learning_rate": 1.0831404394346294e-06, "loss": 0.605, "step": 25871 }, { "epoch": 0.7929385803604266, "grad_norm": 2.071166039841302, "learning_rate": 1.0828319707646606e-06, "loss": 0.5779, "step": 25872 }, { "epoch": 0.7929692288831678, "grad_norm": 2.127612430682575, "learning_rate": 1.082523540691549e-06, "loss": 0.7186, "step": 25873 }, { "epoch": 0.792999877405909, "grad_norm": 0.8152545277844255, "learning_rate": 1.0822151492183359e-06, "loss": 0.4042, "step": 25874 }, { "epoch": 0.7930305259286502, "grad_norm": 1.808931852428712, "learning_rate": 1.08190679634806e-06, "loss": 0.6175, "step": 25875 }, { "epoch": 0.7930611744513915, "grad_norm": 0.9036355033712701, "learning_rate": 1.0815984820837577e-06, "loss": 0.4079, "step": 25876 }, { "epoch": 0.7930918229741326, "grad_norm": 1.6542373500862808, "learning_rate": 1.0812902064284697e-06, "loss": 0.4807, "step": 25877 }, { "epoch": 0.7931224714968739, "grad_norm": 1.8104677646841503, "learning_rate": 1.0809819693852308e-06, "loss": 0.5932, "step": 25878 }, { "epoch": 0.793153120019615, "grad_norm": 0.8146735844086849, "learning_rate": 1.0806737709570786e-06, "loss": 0.3923, "step": 25879 }, { "epoch": 0.7931837685423563, "grad_norm": 2.075829639056029, "learning_rate": 1.0803656111470523e-06, "loss": 0.6813, "step": 25880 }, { "epoch": 0.7932144170650974, "grad_norm": 0.8076094335479616, "learning_rate": 1.080057489958185e-06, "loss": 0.3997, "step": 25881 }, { "epoch": 0.7932450655878387, "grad_norm": 1.8273266964088521, "learning_rate": 1.0797494073935143e-06, "loss": 0.5045, "step": 25882 }, { "epoch": 0.7932757141105798, "grad_norm": 2.0424645290178858, "learning_rate": 1.0794413634560775e-06, "loss": 0.5825, "step": 25883 }, { "epoch": 0.7933063626333211, "grad_norm": 1.9595095404646925, "learning_rate": 1.0791333581489061e-06, "loss": 0.566, "step": 25884 }, { "epoch": 0.7933370111560623, "grad_norm": 1.9007469101032297, "learning_rate": 1.0788253914750373e-06, "loss": 0.5363, "step": 25885 }, { "epoch": 0.7933676596788035, "grad_norm": 1.5917228879619256, "learning_rate": 1.0785174634375068e-06, "loss": 0.5035, "step": 25886 }, { "epoch": 0.7933983082015447, "grad_norm": 1.9991443927939958, "learning_rate": 1.078209574039345e-06, "loss": 0.5862, "step": 25887 }, { "epoch": 0.7934289567242859, "grad_norm": 2.118536077422673, "learning_rate": 1.0779017232835893e-06, "loss": 0.5313, "step": 25888 }, { "epoch": 0.7934596052470271, "grad_norm": 1.9366073382139253, "learning_rate": 1.0775939111732692e-06, "loss": 0.6205, "step": 25889 }, { "epoch": 0.7934902537697683, "grad_norm": 1.8479617722140982, "learning_rate": 1.077286137711422e-06, "loss": 0.5795, "step": 25890 }, { "epoch": 0.7935209022925095, "grad_norm": 2.030219969178157, "learning_rate": 1.0769784029010783e-06, "loss": 0.5962, "step": 25891 }, { "epoch": 0.7935515508152508, "grad_norm": 1.6101033114275618, "learning_rate": 1.0766707067452692e-06, "loss": 0.4461, "step": 25892 }, { "epoch": 0.7935821993379919, "grad_norm": 1.9523532713297347, "learning_rate": 1.0763630492470267e-06, "loss": 0.543, "step": 25893 }, { "epoch": 0.7936128478607332, "grad_norm": 1.9947389779204554, "learning_rate": 1.076055430409385e-06, "loss": 0.5836, "step": 25894 }, { "epoch": 0.7936434963834743, "grad_norm": 1.6024874331635282, "learning_rate": 1.0757478502353713e-06, "loss": 0.4679, "step": 25895 }, { "epoch": 0.7936741449062156, "grad_norm": 1.7492948404265276, "learning_rate": 1.0754403087280185e-06, "loss": 0.5645, "step": 25896 }, { "epoch": 0.7937047934289567, "grad_norm": 1.917619573529259, "learning_rate": 1.0751328058903576e-06, "loss": 0.5513, "step": 25897 }, { "epoch": 0.7937354419516979, "grad_norm": 1.8626110806310638, "learning_rate": 1.0748253417254162e-06, "loss": 0.554, "step": 25898 }, { "epoch": 0.7937660904744391, "grad_norm": 1.910147154114865, "learning_rate": 1.0745179162362263e-06, "loss": 0.5422, "step": 25899 }, { "epoch": 0.7937967389971803, "grad_norm": 1.764864558900632, "learning_rate": 1.0742105294258126e-06, "loss": 0.5494, "step": 25900 }, { "epoch": 0.7938273875199215, "grad_norm": 2.047504217528488, "learning_rate": 1.07390318129721e-06, "loss": 0.6341, "step": 25901 }, { "epoch": 0.7938580360426627, "grad_norm": 1.8037700561950087, "learning_rate": 1.0735958718534445e-06, "loss": 0.5257, "step": 25902 }, { "epoch": 0.793888684565404, "grad_norm": 1.9262975995276992, "learning_rate": 1.0732886010975414e-06, "loss": 0.5904, "step": 25903 }, { "epoch": 0.7939193330881451, "grad_norm": 1.9018028746415085, "learning_rate": 1.0729813690325307e-06, "loss": 0.6341, "step": 25904 }, { "epoch": 0.7939499816108864, "grad_norm": 2.1114348777166616, "learning_rate": 1.0726741756614406e-06, "loss": 0.7043, "step": 25905 }, { "epoch": 0.7939806301336275, "grad_norm": 1.9596036817627798, "learning_rate": 1.0723670209872956e-06, "loss": 0.5435, "step": 25906 }, { "epoch": 0.7940112786563688, "grad_norm": 0.7823141929826044, "learning_rate": 1.0720599050131225e-06, "loss": 0.3933, "step": 25907 }, { "epoch": 0.7940419271791099, "grad_norm": 1.8584348729521527, "learning_rate": 1.0717528277419491e-06, "loss": 0.6454, "step": 25908 }, { "epoch": 0.7940725757018512, "grad_norm": 1.6457504977077222, "learning_rate": 1.071445789176801e-06, "loss": 0.5523, "step": 25909 }, { "epoch": 0.7941032242245923, "grad_norm": 1.5730878410506381, "learning_rate": 1.0711387893207026e-06, "loss": 0.5269, "step": 25910 }, { "epoch": 0.7941338727473336, "grad_norm": 2.082524824800221, "learning_rate": 1.0708318281766784e-06, "loss": 0.6448, "step": 25911 }, { "epoch": 0.7941645212700748, "grad_norm": 0.8136065375205893, "learning_rate": 1.0705249057477524e-06, "loss": 0.4006, "step": 25912 }, { "epoch": 0.794195169792816, "grad_norm": 1.8529854253804243, "learning_rate": 1.070218022036952e-06, "loss": 0.627, "step": 25913 }, { "epoch": 0.7942258183155572, "grad_norm": 2.1475883114926404, "learning_rate": 1.0699111770472976e-06, "loss": 0.7147, "step": 25914 }, { "epoch": 0.7942564668382984, "grad_norm": 1.8630111557446898, "learning_rate": 1.0696043707818132e-06, "loss": 0.6563, "step": 25915 }, { "epoch": 0.7942871153610396, "grad_norm": 1.8422842697171837, "learning_rate": 1.0692976032435232e-06, "loss": 0.5795, "step": 25916 }, { "epoch": 0.7943177638837808, "grad_norm": 0.7827271017555331, "learning_rate": 1.068990874435451e-06, "loss": 0.4022, "step": 25917 }, { "epoch": 0.794348412406522, "grad_norm": 0.7882953081892908, "learning_rate": 1.0686841843606171e-06, "loss": 0.4042, "step": 25918 }, { "epoch": 0.7943790609292632, "grad_norm": 2.0062812127930907, "learning_rate": 1.0683775330220414e-06, "loss": 0.5839, "step": 25919 }, { "epoch": 0.7944097094520044, "grad_norm": 1.769268883656798, "learning_rate": 1.0680709204227507e-06, "loss": 0.6036, "step": 25920 }, { "epoch": 0.7944403579747457, "grad_norm": 1.8588561208423622, "learning_rate": 1.0677643465657628e-06, "loss": 0.5704, "step": 25921 }, { "epoch": 0.7944710064974868, "grad_norm": 0.8550623641047733, "learning_rate": 1.0674578114540979e-06, "loss": 0.3958, "step": 25922 }, { "epoch": 0.7945016550202281, "grad_norm": 1.7873603695493288, "learning_rate": 1.0671513150907776e-06, "loss": 0.5436, "step": 25923 }, { "epoch": 0.7945323035429692, "grad_norm": 1.7459529594972651, "learning_rate": 1.066844857478822e-06, "loss": 0.5486, "step": 25924 }, { "epoch": 0.7945629520657105, "grad_norm": 1.9438268961390945, "learning_rate": 1.0665384386212501e-06, "loss": 0.5956, "step": 25925 }, { "epoch": 0.7945936005884516, "grad_norm": 1.8991529616361236, "learning_rate": 1.0662320585210806e-06, "loss": 0.5912, "step": 25926 }, { "epoch": 0.7946242491111929, "grad_norm": 2.2453047838628106, "learning_rate": 1.0659257171813337e-06, "loss": 0.6397, "step": 25927 }, { "epoch": 0.794654897633934, "grad_norm": 1.797332577133649, "learning_rate": 1.0656194146050281e-06, "loss": 0.4891, "step": 25928 }, { "epoch": 0.7946855461566752, "grad_norm": 1.7102190629705996, "learning_rate": 1.0653131507951814e-06, "loss": 0.4871, "step": 25929 }, { "epoch": 0.7947161946794165, "grad_norm": 1.8707337659523076, "learning_rate": 1.0650069257548084e-06, "loss": 0.6478, "step": 25930 }, { "epoch": 0.7947468432021576, "grad_norm": 1.6005410993167772, "learning_rate": 1.0647007394869313e-06, "loss": 0.5104, "step": 25931 }, { "epoch": 0.7947774917248989, "grad_norm": 1.6831803572750714, "learning_rate": 1.0643945919945652e-06, "loss": 0.4944, "step": 25932 }, { "epoch": 0.79480814024764, "grad_norm": 1.8002530795961846, "learning_rate": 1.064088483280724e-06, "loss": 0.5627, "step": 25933 }, { "epoch": 0.7948387887703813, "grad_norm": 1.9208468676742594, "learning_rate": 1.063782413348427e-06, "loss": 0.5312, "step": 25934 }, { "epoch": 0.7948694372931224, "grad_norm": 0.7874364193818946, "learning_rate": 1.0634763822006883e-06, "loss": 0.4098, "step": 25935 }, { "epoch": 0.7949000858158637, "grad_norm": 1.8755754927208699, "learning_rate": 1.0631703898405255e-06, "loss": 0.5891, "step": 25936 }, { "epoch": 0.7949307343386048, "grad_norm": 1.829917132975799, "learning_rate": 1.0628644362709512e-06, "loss": 0.5203, "step": 25937 }, { "epoch": 0.7949613828613461, "grad_norm": 1.8616354740385925, "learning_rate": 1.0625585214949802e-06, "loss": 0.6192, "step": 25938 }, { "epoch": 0.7949920313840872, "grad_norm": 2.2072050692676153, "learning_rate": 1.0622526455156297e-06, "loss": 0.59, "step": 25939 }, { "epoch": 0.7950226799068285, "grad_norm": 1.7973763097692645, "learning_rate": 1.0619468083359107e-06, "loss": 0.563, "step": 25940 }, { "epoch": 0.7950533284295697, "grad_norm": 0.8024034477102129, "learning_rate": 1.0616410099588349e-06, "loss": 0.4001, "step": 25941 }, { "epoch": 0.7950839769523109, "grad_norm": 0.7639607522656413, "learning_rate": 1.0613352503874209e-06, "loss": 0.3976, "step": 25942 }, { "epoch": 0.7951146254750521, "grad_norm": 0.8012403082875711, "learning_rate": 1.061029529624677e-06, "loss": 0.3954, "step": 25943 }, { "epoch": 0.7951452739977933, "grad_norm": 1.564893110865973, "learning_rate": 1.060723847673618e-06, "loss": 0.5734, "step": 25944 }, { "epoch": 0.7951759225205345, "grad_norm": 1.7146803972843896, "learning_rate": 1.0604182045372535e-06, "loss": 0.4893, "step": 25945 }, { "epoch": 0.7952065710432757, "grad_norm": 1.8204465891539194, "learning_rate": 1.060112600218597e-06, "loss": 0.597, "step": 25946 }, { "epoch": 0.7952372195660169, "grad_norm": 1.8680081009826284, "learning_rate": 1.0598070347206595e-06, "loss": 0.5425, "step": 25947 }, { "epoch": 0.7952678680887582, "grad_norm": 0.7891064522925632, "learning_rate": 1.059501508046451e-06, "loss": 0.4126, "step": 25948 }, { "epoch": 0.7952985166114993, "grad_norm": 0.7996226174796336, "learning_rate": 1.0591960201989821e-06, "loss": 0.3886, "step": 25949 }, { "epoch": 0.7953291651342406, "grad_norm": 1.9106810791541327, "learning_rate": 1.0588905711812641e-06, "loss": 0.5954, "step": 25950 }, { "epoch": 0.7953598136569817, "grad_norm": 0.8396902705572098, "learning_rate": 1.058585160996306e-06, "loss": 0.4168, "step": 25951 }, { "epoch": 0.795390462179723, "grad_norm": 1.8258253352905986, "learning_rate": 1.0582797896471143e-06, "loss": 0.6103, "step": 25952 }, { "epoch": 0.7954211107024641, "grad_norm": 1.6454569546712199, "learning_rate": 1.0579744571367023e-06, "loss": 0.5508, "step": 25953 }, { "epoch": 0.7954517592252054, "grad_norm": 1.9512526739112341, "learning_rate": 1.0576691634680758e-06, "loss": 0.6436, "step": 25954 }, { "epoch": 0.7954824077479465, "grad_norm": 2.0657018839694667, "learning_rate": 1.057363908644245e-06, "loss": 0.6254, "step": 25955 }, { "epoch": 0.7955130562706878, "grad_norm": 1.7809464273906204, "learning_rate": 1.0570586926682153e-06, "loss": 0.5392, "step": 25956 }, { "epoch": 0.795543704793429, "grad_norm": 0.7830664200829626, "learning_rate": 1.0567535155429947e-06, "loss": 0.3914, "step": 25957 }, { "epoch": 0.7955743533161702, "grad_norm": 1.7102686677617935, "learning_rate": 1.0564483772715922e-06, "loss": 0.5046, "step": 25958 }, { "epoch": 0.7956050018389114, "grad_norm": 1.874538231779064, "learning_rate": 1.0561432778570113e-06, "loss": 0.5861, "step": 25959 }, { "epoch": 0.7956356503616525, "grad_norm": 1.738255880265191, "learning_rate": 1.0558382173022596e-06, "loss": 0.5951, "step": 25960 }, { "epoch": 0.7956662988843938, "grad_norm": 1.756065316178672, "learning_rate": 1.0555331956103448e-06, "loss": 0.6938, "step": 25961 }, { "epoch": 0.7956969474071349, "grad_norm": 1.8707062815465851, "learning_rate": 1.0552282127842694e-06, "loss": 0.6379, "step": 25962 }, { "epoch": 0.7957275959298762, "grad_norm": 1.819761009070845, "learning_rate": 1.054923268827041e-06, "loss": 0.4404, "step": 25963 }, { "epoch": 0.7957582444526173, "grad_norm": 1.6588154829921478, "learning_rate": 1.0546183637416612e-06, "loss": 0.5583, "step": 25964 }, { "epoch": 0.7957888929753586, "grad_norm": 0.7870858308767446, "learning_rate": 1.054313497531137e-06, "loss": 0.3931, "step": 25965 }, { "epoch": 0.7958195414980997, "grad_norm": 1.8365058036012263, "learning_rate": 1.054008670198472e-06, "loss": 0.555, "step": 25966 }, { "epoch": 0.795850190020841, "grad_norm": 1.7792989964976111, "learning_rate": 1.0537038817466682e-06, "loss": 0.5613, "step": 25967 }, { "epoch": 0.7958808385435822, "grad_norm": 1.8121226165326696, "learning_rate": 1.05339913217873e-06, "loss": 0.6186, "step": 25968 }, { "epoch": 0.7959114870663234, "grad_norm": 0.8055070106780433, "learning_rate": 1.0530944214976608e-06, "loss": 0.4061, "step": 25969 }, { "epoch": 0.7959421355890646, "grad_norm": 1.7886380610534551, "learning_rate": 1.052789749706461e-06, "loss": 0.6141, "step": 25970 }, { "epoch": 0.7959727841118058, "grad_norm": 1.6933957902906949, "learning_rate": 1.0524851168081341e-06, "loss": 0.5464, "step": 25971 }, { "epoch": 0.796003432634547, "grad_norm": 1.8199869398999662, "learning_rate": 1.052180522805682e-06, "loss": 0.6278, "step": 25972 }, { "epoch": 0.7960340811572882, "grad_norm": 1.782937362215698, "learning_rate": 1.0518759677021046e-06, "loss": 0.6037, "step": 25973 }, { "epoch": 0.7960647296800294, "grad_norm": 2.1217435576565085, "learning_rate": 1.0515714515004043e-06, "loss": 0.6837, "step": 25974 }, { "epoch": 0.7960953782027707, "grad_norm": 1.6873008535746354, "learning_rate": 1.0512669742035798e-06, "loss": 0.5034, "step": 25975 }, { "epoch": 0.7961260267255118, "grad_norm": 1.9407082097226136, "learning_rate": 1.050962535814632e-06, "loss": 0.6545, "step": 25976 }, { "epoch": 0.7961566752482531, "grad_norm": 1.7854605637542695, "learning_rate": 1.0506581363365615e-06, "loss": 0.6531, "step": 25977 }, { "epoch": 0.7961873237709942, "grad_norm": 1.9558923377623683, "learning_rate": 1.0503537757723664e-06, "loss": 0.636, "step": 25978 }, { "epoch": 0.7962179722937355, "grad_norm": 1.6828694814928027, "learning_rate": 1.0500494541250455e-06, "loss": 0.5239, "step": 25979 }, { "epoch": 0.7962486208164766, "grad_norm": 2.432279834713902, "learning_rate": 1.0497451713975997e-06, "loss": 0.538, "step": 25980 }, { "epoch": 0.7962792693392179, "grad_norm": 1.983266601030714, "learning_rate": 1.049440927593024e-06, "loss": 0.525, "step": 25981 }, { "epoch": 0.796309917861959, "grad_norm": 1.8692831588331311, "learning_rate": 1.0491367227143173e-06, "loss": 0.6633, "step": 25982 }, { "epoch": 0.7963405663847003, "grad_norm": 0.8131243113643114, "learning_rate": 1.0488325567644792e-06, "loss": 0.4288, "step": 25983 }, { "epoch": 0.7963712149074414, "grad_norm": 1.889756181477782, "learning_rate": 1.0485284297465032e-06, "loss": 0.6101, "step": 25984 }, { "epoch": 0.7964018634301827, "grad_norm": 2.0107740039701825, "learning_rate": 1.048224341663389e-06, "loss": 0.5348, "step": 25985 }, { "epoch": 0.7964325119529239, "grad_norm": 1.7783909031528875, "learning_rate": 1.0479202925181303e-06, "loss": 0.5977, "step": 25986 }, { "epoch": 0.7964631604756651, "grad_norm": 1.6219140866544677, "learning_rate": 1.0476162823137238e-06, "loss": 0.5178, "step": 25987 }, { "epoch": 0.7964938089984063, "grad_norm": 1.5270029057843097, "learning_rate": 1.047312311053167e-06, "loss": 0.5411, "step": 25988 }, { "epoch": 0.7965244575211475, "grad_norm": 2.1121674661491077, "learning_rate": 1.047008378739452e-06, "loss": 0.6727, "step": 25989 }, { "epoch": 0.7965551060438887, "grad_norm": 0.752912156539349, "learning_rate": 1.0467044853755741e-06, "loss": 0.4054, "step": 25990 }, { "epoch": 0.7965857545666298, "grad_norm": 0.75714799726157, "learning_rate": 1.0464006309645308e-06, "loss": 0.3974, "step": 25991 }, { "epoch": 0.7966164030893711, "grad_norm": 1.706465949970194, "learning_rate": 1.046096815509312e-06, "loss": 0.6009, "step": 25992 }, { "epoch": 0.7966470516121122, "grad_norm": 2.1530107191219643, "learning_rate": 1.0457930390129129e-06, "loss": 0.6938, "step": 25993 }, { "epoch": 0.7966777001348535, "grad_norm": 0.8367988423434436, "learning_rate": 1.045489301478328e-06, "loss": 0.4237, "step": 25994 }, { "epoch": 0.7967083486575947, "grad_norm": 0.7849988294240116, "learning_rate": 1.0451856029085473e-06, "loss": 0.3989, "step": 25995 }, { "epoch": 0.7967389971803359, "grad_norm": 1.8013982609075272, "learning_rate": 1.044881943306566e-06, "loss": 0.5722, "step": 25996 }, { "epoch": 0.7967696457030771, "grad_norm": 1.7966897930805648, "learning_rate": 1.0445783226753725e-06, "loss": 0.5285, "step": 25997 }, { "epoch": 0.7968002942258183, "grad_norm": 1.5150830207109018, "learning_rate": 1.0442747410179633e-06, "loss": 0.4986, "step": 25998 }, { "epoch": 0.7968309427485595, "grad_norm": 1.8717052356590318, "learning_rate": 1.0439711983373275e-06, "loss": 0.6492, "step": 25999 }, { "epoch": 0.7968615912713007, "grad_norm": 1.8637418433901038, "learning_rate": 1.0436676946364544e-06, "loss": 0.5879, "step": 26000 }, { "epoch": 0.7968922397940419, "grad_norm": 2.132964941314673, "learning_rate": 1.0433642299183355e-06, "loss": 0.633, "step": 26001 }, { "epoch": 0.7969228883167832, "grad_norm": 1.9595822998414234, "learning_rate": 1.0430608041859624e-06, "loss": 0.5633, "step": 26002 }, { "epoch": 0.7969535368395243, "grad_norm": 1.7866456147669543, "learning_rate": 1.042757417442322e-06, "loss": 0.5602, "step": 26003 }, { "epoch": 0.7969841853622656, "grad_norm": 1.6535474444051013, "learning_rate": 1.042454069690406e-06, "loss": 0.616, "step": 26004 }, { "epoch": 0.7970148338850067, "grad_norm": 1.8155733645744634, "learning_rate": 1.0421507609332038e-06, "loss": 0.5372, "step": 26005 }, { "epoch": 0.797045482407748, "grad_norm": 1.8195430130496222, "learning_rate": 1.0418474911737014e-06, "loss": 0.5767, "step": 26006 }, { "epoch": 0.7970761309304891, "grad_norm": 1.8102206894781827, "learning_rate": 1.0415442604148896e-06, "loss": 0.5081, "step": 26007 }, { "epoch": 0.7971067794532304, "grad_norm": 1.8149017082953716, "learning_rate": 1.0412410686597542e-06, "loss": 0.5848, "step": 26008 }, { "epoch": 0.7971374279759715, "grad_norm": 1.6901568113245087, "learning_rate": 1.0409379159112826e-06, "loss": 0.6525, "step": 26009 }, { "epoch": 0.7971680764987128, "grad_norm": 1.6780743543714027, "learning_rate": 1.0406348021724645e-06, "loss": 0.5794, "step": 26010 }, { "epoch": 0.797198725021454, "grad_norm": 1.773130312409526, "learning_rate": 1.0403317274462833e-06, "loss": 0.5299, "step": 26011 }, { "epoch": 0.7972293735441952, "grad_norm": 1.8797275004123073, "learning_rate": 1.0400286917357267e-06, "loss": 0.6071, "step": 26012 }, { "epoch": 0.7972600220669364, "grad_norm": 1.824750540110969, "learning_rate": 1.0397256950437822e-06, "loss": 0.5209, "step": 26013 }, { "epoch": 0.7972906705896776, "grad_norm": 2.036468693138532, "learning_rate": 1.0394227373734322e-06, "loss": 0.5847, "step": 26014 }, { "epoch": 0.7973213191124188, "grad_norm": 1.6678492048650346, "learning_rate": 1.0391198187276646e-06, "loss": 0.5037, "step": 26015 }, { "epoch": 0.79735196763516, "grad_norm": 1.8854588812028343, "learning_rate": 1.03881693910946e-06, "loss": 0.5808, "step": 26016 }, { "epoch": 0.7973826161579012, "grad_norm": 1.9099180595621135, "learning_rate": 1.0385140985218085e-06, "loss": 0.6297, "step": 26017 }, { "epoch": 0.7974132646806424, "grad_norm": 2.2827380935475654, "learning_rate": 1.038211296967691e-06, "loss": 0.6009, "step": 26018 }, { "epoch": 0.7974439132033836, "grad_norm": 0.8530809395939623, "learning_rate": 1.0379085344500905e-06, "loss": 0.4061, "step": 26019 }, { "epoch": 0.7974745617261249, "grad_norm": 1.654384323252351, "learning_rate": 1.0376058109719906e-06, "loss": 0.6581, "step": 26020 }, { "epoch": 0.797505210248866, "grad_norm": 1.6219164714084497, "learning_rate": 1.037303126536376e-06, "loss": 0.5467, "step": 26021 }, { "epoch": 0.7975358587716072, "grad_norm": 1.91810911603109, "learning_rate": 1.0370004811462258e-06, "loss": 0.5702, "step": 26022 }, { "epoch": 0.7975665072943484, "grad_norm": 1.8542668241097078, "learning_rate": 1.0366978748045236e-06, "loss": 0.5821, "step": 26023 }, { "epoch": 0.7975971558170896, "grad_norm": 2.110365008351913, "learning_rate": 1.0363953075142519e-06, "loss": 0.5558, "step": 26024 }, { "epoch": 0.7976278043398308, "grad_norm": 1.9874631805795586, "learning_rate": 1.0360927792783925e-06, "loss": 0.6541, "step": 26025 }, { "epoch": 0.797658452862572, "grad_norm": 1.9072067418808345, "learning_rate": 1.0357902900999256e-06, "loss": 0.6326, "step": 26026 }, { "epoch": 0.7976891013853132, "grad_norm": 1.9040083867626232, "learning_rate": 1.035487839981828e-06, "loss": 0.5997, "step": 26027 }, { "epoch": 0.7977197499080544, "grad_norm": 1.7374403788860255, "learning_rate": 1.0351854289270857e-06, "loss": 0.5532, "step": 26028 }, { "epoch": 0.7977503984307956, "grad_norm": 1.6236163370658117, "learning_rate": 1.0348830569386764e-06, "loss": 0.5148, "step": 26029 }, { "epoch": 0.7977810469535368, "grad_norm": 2.078446786262601, "learning_rate": 1.034580724019577e-06, "loss": 0.6514, "step": 26030 }, { "epoch": 0.7978116954762781, "grad_norm": 1.7420933983474365, "learning_rate": 1.0342784301727688e-06, "loss": 0.517, "step": 26031 }, { "epoch": 0.7978423439990192, "grad_norm": 0.7866787880647522, "learning_rate": 1.0339761754012307e-06, "loss": 0.3882, "step": 26032 }, { "epoch": 0.7978729925217605, "grad_norm": 1.8603604881460427, "learning_rate": 1.0336739597079387e-06, "loss": 0.6794, "step": 26033 }, { "epoch": 0.7979036410445016, "grad_norm": 1.8553193984197682, "learning_rate": 1.0333717830958729e-06, "loss": 0.5091, "step": 26034 }, { "epoch": 0.7979342895672429, "grad_norm": 2.12302684811726, "learning_rate": 1.0330696455680089e-06, "loss": 0.6314, "step": 26035 }, { "epoch": 0.797964938089984, "grad_norm": 1.7492633847903012, "learning_rate": 1.0327675471273262e-06, "loss": 0.5294, "step": 26036 }, { "epoch": 0.7979955866127253, "grad_norm": 1.8410278773437136, "learning_rate": 1.0324654877768003e-06, "loss": 0.5036, "step": 26037 }, { "epoch": 0.7980262351354664, "grad_norm": 1.8316322847685231, "learning_rate": 1.0321634675194042e-06, "loss": 0.5629, "step": 26038 }, { "epoch": 0.7980568836582077, "grad_norm": 1.8919986670734477, "learning_rate": 1.0318614863581188e-06, "loss": 0.6126, "step": 26039 }, { "epoch": 0.7980875321809489, "grad_norm": 1.7700748628592318, "learning_rate": 1.0315595442959182e-06, "loss": 0.4748, "step": 26040 }, { "epoch": 0.7981181807036901, "grad_norm": 2.281831099801764, "learning_rate": 1.0312576413357755e-06, "loss": 0.5557, "step": 26041 }, { "epoch": 0.7981488292264313, "grad_norm": 1.5189443555213993, "learning_rate": 1.0309557774806662e-06, "loss": 0.5558, "step": 26042 }, { "epoch": 0.7981794777491725, "grad_norm": 1.8263371691211765, "learning_rate": 1.0306539527335652e-06, "loss": 0.6064, "step": 26043 }, { "epoch": 0.7982101262719137, "grad_norm": 1.8715885364833527, "learning_rate": 1.0303521670974481e-06, "loss": 0.5092, "step": 26044 }, { "epoch": 0.7982407747946549, "grad_norm": 0.8044169165864871, "learning_rate": 1.0300504205752853e-06, "loss": 0.419, "step": 26045 }, { "epoch": 0.7982714233173961, "grad_norm": 1.918465961024517, "learning_rate": 1.0297487131700512e-06, "loss": 0.6405, "step": 26046 }, { "epoch": 0.7983020718401374, "grad_norm": 0.7784603079180578, "learning_rate": 1.0294470448847204e-06, "loss": 0.4021, "step": 26047 }, { "epoch": 0.7983327203628785, "grad_norm": 1.783006551125467, "learning_rate": 1.029145415722264e-06, "loss": 0.5502, "step": 26048 }, { "epoch": 0.7983633688856198, "grad_norm": 1.9650486533100116, "learning_rate": 1.028843825685651e-06, "loss": 0.6102, "step": 26049 }, { "epoch": 0.7983940174083609, "grad_norm": 2.016234607805312, "learning_rate": 1.0285422747778583e-06, "loss": 0.587, "step": 26050 }, { "epoch": 0.7984246659311022, "grad_norm": 1.679658872130664, "learning_rate": 1.028240763001855e-06, "loss": 0.5592, "step": 26051 }, { "epoch": 0.7984553144538433, "grad_norm": 1.804055179795286, "learning_rate": 1.02793929036061e-06, "loss": 0.607, "step": 26052 }, { "epoch": 0.7984859629765845, "grad_norm": 1.9255819415070292, "learning_rate": 1.0276378568570955e-06, "loss": 0.6644, "step": 26053 }, { "epoch": 0.7985166114993257, "grad_norm": 1.8931568539559667, "learning_rate": 1.0273364624942816e-06, "loss": 0.5662, "step": 26054 }, { "epoch": 0.7985472600220669, "grad_norm": 2.102157569515186, "learning_rate": 1.0270351072751394e-06, "loss": 0.5857, "step": 26055 }, { "epoch": 0.7985779085448081, "grad_norm": 1.9998062503935907, "learning_rate": 1.026733791202636e-06, "loss": 0.5143, "step": 26056 }, { "epoch": 0.7986085570675493, "grad_norm": 2.06049708678213, "learning_rate": 1.0264325142797405e-06, "loss": 0.6227, "step": 26057 }, { "epoch": 0.7986392055902906, "grad_norm": 1.9150531802775768, "learning_rate": 1.026131276509424e-06, "loss": 0.5179, "step": 26058 }, { "epoch": 0.7986698541130317, "grad_norm": 1.759222650809232, "learning_rate": 1.0258300778946522e-06, "loss": 0.6154, "step": 26059 }, { "epoch": 0.798700502635773, "grad_norm": 1.9456466003343982, "learning_rate": 1.0255289184383921e-06, "loss": 0.5278, "step": 26060 }, { "epoch": 0.7987311511585141, "grad_norm": 1.8218682073431542, "learning_rate": 1.0252277981436131e-06, "loss": 0.5936, "step": 26061 }, { "epoch": 0.7987617996812554, "grad_norm": 1.7611774840661472, "learning_rate": 1.024926717013281e-06, "loss": 0.5973, "step": 26062 }, { "epoch": 0.7987924482039965, "grad_norm": 1.9448502693565306, "learning_rate": 1.0246256750503648e-06, "loss": 0.5539, "step": 26063 }, { "epoch": 0.7988230967267378, "grad_norm": 0.7658759223642343, "learning_rate": 1.024324672257827e-06, "loss": 0.3881, "step": 26064 }, { "epoch": 0.7988537452494789, "grad_norm": 1.9280811091014367, "learning_rate": 1.0240237086386363e-06, "loss": 0.5887, "step": 26065 }, { "epoch": 0.7988843937722202, "grad_norm": 1.9741591480031835, "learning_rate": 1.023722784195758e-06, "loss": 0.5559, "step": 26066 }, { "epoch": 0.7989150422949614, "grad_norm": 1.9890358395991263, "learning_rate": 1.0234218989321564e-06, "loss": 0.5456, "step": 26067 }, { "epoch": 0.7989456908177026, "grad_norm": 0.7768655811073157, "learning_rate": 1.023121052850794e-06, "loss": 0.3968, "step": 26068 }, { "epoch": 0.7989763393404438, "grad_norm": 1.6861323997356252, "learning_rate": 1.0228202459546398e-06, "loss": 0.5446, "step": 26069 }, { "epoch": 0.799006987863185, "grad_norm": 1.9160051815893373, "learning_rate": 1.0225194782466546e-06, "loss": 0.6236, "step": 26070 }, { "epoch": 0.7990376363859262, "grad_norm": 1.8873551316504142, "learning_rate": 1.0222187497298037e-06, "loss": 0.5898, "step": 26071 }, { "epoch": 0.7990682849086674, "grad_norm": 2.0782895289569483, "learning_rate": 1.0219180604070472e-06, "loss": 0.6236, "step": 26072 }, { "epoch": 0.7990989334314086, "grad_norm": 1.9204161823535848, "learning_rate": 1.0216174102813504e-06, "loss": 0.4801, "step": 26073 }, { "epoch": 0.7991295819541498, "grad_norm": 1.9293885904553625, "learning_rate": 1.0213167993556767e-06, "loss": 0.6284, "step": 26074 }, { "epoch": 0.799160230476891, "grad_norm": 1.876909454839717, "learning_rate": 1.021016227632985e-06, "loss": 0.5713, "step": 26075 }, { "epoch": 0.7991908789996323, "grad_norm": 1.9142746320543116, "learning_rate": 1.0207156951162384e-06, "loss": 0.5941, "step": 26076 }, { "epoch": 0.7992215275223734, "grad_norm": 1.8112365728283166, "learning_rate": 1.0204152018083995e-06, "loss": 0.6095, "step": 26077 }, { "epoch": 0.7992521760451147, "grad_norm": 1.9547686905664496, "learning_rate": 1.0201147477124284e-06, "loss": 0.5493, "step": 26078 }, { "epoch": 0.7992828245678558, "grad_norm": 1.9697808137387518, "learning_rate": 1.0198143328312816e-06, "loss": 0.5075, "step": 26079 }, { "epoch": 0.7993134730905971, "grad_norm": 0.8185680474137734, "learning_rate": 1.0195139571679258e-06, "loss": 0.396, "step": 26080 }, { "epoch": 0.7993441216133382, "grad_norm": 1.8025465054399923, "learning_rate": 1.0192136207253156e-06, "loss": 0.6123, "step": 26081 }, { "epoch": 0.7993747701360795, "grad_norm": 1.8648609581089357, "learning_rate": 1.0189133235064135e-06, "loss": 0.5348, "step": 26082 }, { "epoch": 0.7994054186588206, "grad_norm": 1.9879418694023487, "learning_rate": 1.0186130655141763e-06, "loss": 0.5867, "step": 26083 }, { "epoch": 0.7994360671815618, "grad_norm": 1.920920448155268, "learning_rate": 1.0183128467515625e-06, "loss": 0.5911, "step": 26084 }, { "epoch": 0.799466715704303, "grad_norm": 1.8374864884109359, "learning_rate": 1.018012667221533e-06, "loss": 0.5829, "step": 26085 }, { "epoch": 0.7994973642270442, "grad_norm": 1.9668754752359796, "learning_rate": 1.0177125269270415e-06, "loss": 0.5284, "step": 26086 }, { "epoch": 0.7995280127497855, "grad_norm": 1.8452806084392948, "learning_rate": 1.0174124258710477e-06, "loss": 0.5635, "step": 26087 }, { "epoch": 0.7995586612725266, "grad_norm": 1.6035415231382866, "learning_rate": 1.0171123640565095e-06, "loss": 0.5452, "step": 26088 }, { "epoch": 0.7995893097952679, "grad_norm": 1.8687356320623076, "learning_rate": 1.0168123414863813e-06, "loss": 0.6277, "step": 26089 }, { "epoch": 0.799619958318009, "grad_norm": 1.686271094312086, "learning_rate": 1.01651235816362e-06, "loss": 0.6015, "step": 26090 }, { "epoch": 0.7996506068407503, "grad_norm": 1.8479716792946173, "learning_rate": 1.0162124140911827e-06, "loss": 0.5894, "step": 26091 }, { "epoch": 0.7996812553634914, "grad_norm": 0.8213219912088767, "learning_rate": 1.015912509272023e-06, "loss": 0.3983, "step": 26092 }, { "epoch": 0.7997119038862327, "grad_norm": 1.7379752679168352, "learning_rate": 1.0156126437090973e-06, "loss": 0.6808, "step": 26093 }, { "epoch": 0.7997425524089739, "grad_norm": 2.0494180685390524, "learning_rate": 1.015312817405359e-06, "loss": 0.5828, "step": 26094 }, { "epoch": 0.7997732009317151, "grad_norm": 1.7592458207140953, "learning_rate": 1.0150130303637628e-06, "loss": 0.5785, "step": 26095 }, { "epoch": 0.7998038494544563, "grad_norm": 1.8580981049667475, "learning_rate": 1.0147132825872641e-06, "loss": 0.6111, "step": 26096 }, { "epoch": 0.7998344979771975, "grad_norm": 1.8989358380582781, "learning_rate": 1.0144135740788142e-06, "loss": 0.552, "step": 26097 }, { "epoch": 0.7998651464999387, "grad_norm": 2.014848883280252, "learning_rate": 1.014113904841366e-06, "loss": 0.5785, "step": 26098 }, { "epoch": 0.7998957950226799, "grad_norm": 2.016258257636917, "learning_rate": 1.0138142748778756e-06, "loss": 0.6527, "step": 26099 }, { "epoch": 0.7999264435454211, "grad_norm": 2.0763789601660823, "learning_rate": 1.0135146841912918e-06, "loss": 0.5493, "step": 26100 }, { "epoch": 0.7999570920681623, "grad_norm": 1.884396345123129, "learning_rate": 1.0132151327845674e-06, "loss": 0.5802, "step": 26101 }, { "epoch": 0.7999877405909035, "grad_norm": 1.9710871627195854, "learning_rate": 1.012915620660656e-06, "loss": 0.5227, "step": 26102 }, { "epoch": 0.8000183891136448, "grad_norm": 1.6737243812277465, "learning_rate": 1.0126161478225055e-06, "loss": 0.4852, "step": 26103 }, { "epoch": 0.8000490376363859, "grad_norm": 0.8122184856665697, "learning_rate": 1.01231671427307e-06, "loss": 0.4058, "step": 26104 }, { "epoch": 0.8000796861591272, "grad_norm": 2.153752984553308, "learning_rate": 1.0120173200152967e-06, "loss": 0.5546, "step": 26105 }, { "epoch": 0.8001103346818683, "grad_norm": 1.9300925166206475, "learning_rate": 1.011717965052137e-06, "loss": 0.5859, "step": 26106 }, { "epoch": 0.8001409832046096, "grad_norm": 1.7353092336238305, "learning_rate": 1.011418649386542e-06, "loss": 0.5766, "step": 26107 }, { "epoch": 0.8001716317273507, "grad_norm": 0.8738421806085811, "learning_rate": 1.0111193730214581e-06, "loss": 0.4086, "step": 26108 }, { "epoch": 0.800202280250092, "grad_norm": 1.7625668441217093, "learning_rate": 1.0108201359598357e-06, "loss": 0.5108, "step": 26109 }, { "epoch": 0.8002329287728331, "grad_norm": 1.9705520365666656, "learning_rate": 1.0105209382046244e-06, "loss": 0.5557, "step": 26110 }, { "epoch": 0.8002635772955744, "grad_norm": 1.8989137892119694, "learning_rate": 1.01022177975877e-06, "loss": 0.5349, "step": 26111 }, { "epoch": 0.8002942258183156, "grad_norm": 1.6156468800394446, "learning_rate": 1.0099226606252226e-06, "loss": 0.4823, "step": 26112 }, { "epoch": 0.8003248743410568, "grad_norm": 1.7503513424842825, "learning_rate": 1.0096235808069266e-06, "loss": 0.5967, "step": 26113 }, { "epoch": 0.800355522863798, "grad_norm": 1.787610838067192, "learning_rate": 1.0093245403068308e-06, "loss": 0.6103, "step": 26114 }, { "epoch": 0.8003861713865391, "grad_norm": 1.6854541778601533, "learning_rate": 1.0090255391278819e-06, "loss": 0.5759, "step": 26115 }, { "epoch": 0.8004168199092804, "grad_norm": 1.726295719646213, "learning_rate": 1.008726577273025e-06, "loss": 0.486, "step": 26116 }, { "epoch": 0.8004474684320215, "grad_norm": 1.8101769031890214, "learning_rate": 1.008427654745206e-06, "loss": 0.5445, "step": 26117 }, { "epoch": 0.8004781169547628, "grad_norm": 1.8573565168849522, "learning_rate": 1.008128771547372e-06, "loss": 0.6069, "step": 26118 }, { "epoch": 0.8005087654775039, "grad_norm": 2.1360355279108454, "learning_rate": 1.007829927682465e-06, "loss": 0.6015, "step": 26119 }, { "epoch": 0.8005394140002452, "grad_norm": 2.086306428578305, "learning_rate": 1.0075311231534314e-06, "loss": 0.5229, "step": 26120 }, { "epoch": 0.8005700625229863, "grad_norm": 1.7633849845792613, "learning_rate": 1.0072323579632165e-06, "loss": 0.6318, "step": 26121 }, { "epoch": 0.8006007110457276, "grad_norm": 1.856986754421337, "learning_rate": 1.0069336321147616e-06, "loss": 0.5835, "step": 26122 }, { "epoch": 0.8006313595684688, "grad_norm": 1.9233028407954684, "learning_rate": 1.006634945611012e-06, "loss": 0.5832, "step": 26123 }, { "epoch": 0.80066200809121, "grad_norm": 1.8789933686314473, "learning_rate": 1.006336298454908e-06, "loss": 0.5727, "step": 26124 }, { "epoch": 0.8006926566139512, "grad_norm": 0.7878722981572625, "learning_rate": 1.0060376906493968e-06, "loss": 0.3889, "step": 26125 }, { "epoch": 0.8007233051366924, "grad_norm": 1.928541056829663, "learning_rate": 1.0057391221974178e-06, "loss": 0.5339, "step": 26126 }, { "epoch": 0.8007539536594336, "grad_norm": 1.9009837087860721, "learning_rate": 1.0054405931019124e-06, "loss": 0.5132, "step": 26127 }, { "epoch": 0.8007846021821748, "grad_norm": 0.7884926151227881, "learning_rate": 1.0051421033658226e-06, "loss": 0.3849, "step": 26128 }, { "epoch": 0.800815250704916, "grad_norm": 2.027840013978195, "learning_rate": 1.0048436529920908e-06, "loss": 0.5818, "step": 26129 }, { "epoch": 0.8008458992276573, "grad_norm": 1.6466222263003327, "learning_rate": 1.004545241983656e-06, "loss": 0.6384, "step": 26130 }, { "epoch": 0.8008765477503984, "grad_norm": 1.8764523237956545, "learning_rate": 1.0042468703434588e-06, "loss": 0.5933, "step": 26131 }, { "epoch": 0.8009071962731397, "grad_norm": 1.8563893057283993, "learning_rate": 1.0039485380744408e-06, "loss": 0.6296, "step": 26132 }, { "epoch": 0.8009378447958808, "grad_norm": 2.0077577626253924, "learning_rate": 1.0036502451795393e-06, "loss": 0.5694, "step": 26133 }, { "epoch": 0.8009684933186221, "grad_norm": 1.8625991833908435, "learning_rate": 1.0033519916616958e-06, "loss": 0.5874, "step": 26134 }, { "epoch": 0.8009991418413632, "grad_norm": 1.918837678011883, "learning_rate": 1.0030537775238447e-06, "loss": 0.5885, "step": 26135 }, { "epoch": 0.8010297903641045, "grad_norm": 1.774614278415829, "learning_rate": 1.00275560276893e-06, "loss": 0.581, "step": 26136 }, { "epoch": 0.8010604388868456, "grad_norm": 1.7867448814839093, "learning_rate": 1.0024574673998871e-06, "loss": 0.5668, "step": 26137 }, { "epoch": 0.8010910874095869, "grad_norm": 1.8981238082515532, "learning_rate": 1.0021593714196525e-06, "loss": 0.6153, "step": 26138 }, { "epoch": 0.801121735932328, "grad_norm": 1.70014390109954, "learning_rate": 1.0018613148311646e-06, "loss": 0.4931, "step": 26139 }, { "epoch": 0.8011523844550693, "grad_norm": 2.0073713129705677, "learning_rate": 1.0015632976373612e-06, "loss": 0.6461, "step": 26140 }, { "epoch": 0.8011830329778105, "grad_norm": 0.8298734246945647, "learning_rate": 1.0012653198411765e-06, "loss": 0.3924, "step": 26141 }, { "epoch": 0.8012136815005517, "grad_norm": 1.874746741224655, "learning_rate": 1.0009673814455478e-06, "loss": 0.513, "step": 26142 }, { "epoch": 0.8012443300232929, "grad_norm": 1.9373197021592123, "learning_rate": 1.0006694824534109e-06, "loss": 0.6044, "step": 26143 }, { "epoch": 0.8012749785460341, "grad_norm": 0.7680307389356031, "learning_rate": 1.0003716228677018e-06, "loss": 0.3952, "step": 26144 }, { "epoch": 0.8013056270687753, "grad_norm": 1.723050579758409, "learning_rate": 1.0000738026913542e-06, "loss": 0.5878, "step": 26145 }, { "epoch": 0.8013362755915164, "grad_norm": 1.8191972215395507, "learning_rate": 9.997760219273006e-07, "loss": 0.5716, "step": 26146 }, { "epoch": 0.8013669241142577, "grad_norm": 1.898059632192553, "learning_rate": 9.994782805784798e-07, "loss": 0.5534, "step": 26147 }, { "epoch": 0.8013975726369988, "grad_norm": 0.8331844465084138, "learning_rate": 9.99180578647823e-07, "loss": 0.4274, "step": 26148 }, { "epoch": 0.8014282211597401, "grad_norm": 1.7391552526974166, "learning_rate": 9.98882916138263e-07, "loss": 0.6173, "step": 26149 }, { "epoch": 0.8014588696824813, "grad_norm": 1.8556868302716203, "learning_rate": 9.985852930527329e-07, "loss": 0.6098, "step": 26150 }, { "epoch": 0.8014895182052225, "grad_norm": 1.7606834523200425, "learning_rate": 9.982877093941655e-07, "loss": 0.5563, "step": 26151 }, { "epoch": 0.8015201667279637, "grad_norm": 0.7925331232453956, "learning_rate": 9.979901651654944e-07, "loss": 0.3966, "step": 26152 }, { "epoch": 0.8015508152507049, "grad_norm": 1.8881959315365682, "learning_rate": 9.97692660369649e-07, "loss": 0.6156, "step": 26153 }, { "epoch": 0.8015814637734461, "grad_norm": 1.8502194597257016, "learning_rate": 9.973951950095624e-07, "loss": 0.6049, "step": 26154 }, { "epoch": 0.8016121122961873, "grad_norm": 0.7962988045953375, "learning_rate": 9.970977690881656e-07, "loss": 0.4005, "step": 26155 }, { "epoch": 0.8016427608189285, "grad_norm": 1.9782990692260882, "learning_rate": 9.968003826083889e-07, "loss": 0.4479, "step": 26156 }, { "epoch": 0.8016734093416698, "grad_norm": 1.728587039673, "learning_rate": 9.965030355731614e-07, "loss": 0.5398, "step": 26157 }, { "epoch": 0.8017040578644109, "grad_norm": 1.8885025777162254, "learning_rate": 9.962057279854132e-07, "loss": 0.5979, "step": 26158 }, { "epoch": 0.8017347063871522, "grad_norm": 2.2026228345520886, "learning_rate": 9.959084598480762e-07, "loss": 0.6565, "step": 26159 }, { "epoch": 0.8017653549098933, "grad_norm": 1.8345478848191348, "learning_rate": 9.956112311640758e-07, "loss": 0.5704, "step": 26160 }, { "epoch": 0.8017960034326346, "grad_norm": 2.1220426648335384, "learning_rate": 9.953140419363433e-07, "loss": 0.616, "step": 26161 }, { "epoch": 0.8018266519553757, "grad_norm": 1.7128513377643748, "learning_rate": 9.950168921678056e-07, "loss": 0.5542, "step": 26162 }, { "epoch": 0.801857300478117, "grad_norm": 0.7597129868241397, "learning_rate": 9.947197818613923e-07, "loss": 0.4058, "step": 26163 }, { "epoch": 0.8018879490008581, "grad_norm": 1.7152632707324793, "learning_rate": 9.944227110200305e-07, "loss": 0.5329, "step": 26164 }, { "epoch": 0.8019185975235994, "grad_norm": 1.8118879722032344, "learning_rate": 9.941256796466432e-07, "loss": 0.5937, "step": 26165 }, { "epoch": 0.8019492460463405, "grad_norm": 2.0322270182760653, "learning_rate": 9.938286877441639e-07, "loss": 0.5735, "step": 26166 }, { "epoch": 0.8019798945690818, "grad_norm": 1.6733979523549105, "learning_rate": 9.93531735315515e-07, "loss": 0.5088, "step": 26167 }, { "epoch": 0.802010543091823, "grad_norm": 0.7799536862483254, "learning_rate": 9.932348223636217e-07, "loss": 0.4106, "step": 26168 }, { "epoch": 0.8020411916145642, "grad_norm": 0.8012456504520188, "learning_rate": 9.92937948891411e-07, "loss": 0.3979, "step": 26169 }, { "epoch": 0.8020718401373054, "grad_norm": 2.1176645205591997, "learning_rate": 9.92641114901808e-07, "loss": 0.6682, "step": 26170 }, { "epoch": 0.8021024886600466, "grad_norm": 1.833546434773196, "learning_rate": 9.92344320397739e-07, "loss": 0.5713, "step": 26171 }, { "epoch": 0.8021331371827878, "grad_norm": 1.8922760634190028, "learning_rate": 9.920475653821248e-07, "loss": 0.5642, "step": 26172 }, { "epoch": 0.802163785705529, "grad_norm": 1.882000557135685, "learning_rate": 9.91750849857892e-07, "loss": 0.5962, "step": 26173 }, { "epoch": 0.8021944342282702, "grad_norm": 2.003508979572098, "learning_rate": 9.914541738279648e-07, "loss": 0.5075, "step": 26174 }, { "epoch": 0.8022250827510115, "grad_norm": 1.891745633152644, "learning_rate": 9.911575372952653e-07, "loss": 0.6289, "step": 26175 }, { "epoch": 0.8022557312737526, "grad_norm": 1.823837881711338, "learning_rate": 9.908609402627135e-07, "loss": 0.5596, "step": 26176 }, { "epoch": 0.8022863797964938, "grad_norm": 1.9172256993348644, "learning_rate": 9.905643827332373e-07, "loss": 0.5306, "step": 26177 }, { "epoch": 0.802317028319235, "grad_norm": 1.7924971589513377, "learning_rate": 9.902678647097547e-07, "loss": 0.6609, "step": 26178 }, { "epoch": 0.8023476768419762, "grad_norm": 1.8732564103747447, "learning_rate": 9.899713861951905e-07, "loss": 0.5888, "step": 26179 }, { "epoch": 0.8023783253647174, "grad_norm": 1.7263223086229078, "learning_rate": 9.896749471924627e-07, "loss": 0.5299, "step": 26180 }, { "epoch": 0.8024089738874586, "grad_norm": 1.7731963419750416, "learning_rate": 9.893785477044936e-07, "loss": 0.5347, "step": 26181 }, { "epoch": 0.8024396224101998, "grad_norm": 1.9617102021623796, "learning_rate": 9.89082187734205e-07, "loss": 0.6023, "step": 26182 }, { "epoch": 0.802470270932941, "grad_norm": 1.948681447724386, "learning_rate": 9.887858672845146e-07, "loss": 0.5438, "step": 26183 }, { "epoch": 0.8025009194556822, "grad_norm": 1.774014650634734, "learning_rate": 9.884895863583437e-07, "loss": 0.4343, "step": 26184 }, { "epoch": 0.8025315679784234, "grad_norm": 2.033381487822126, "learning_rate": 9.881933449586123e-07, "loss": 0.5873, "step": 26185 }, { "epoch": 0.8025622165011647, "grad_norm": 1.7532509973431196, "learning_rate": 9.878971430882388e-07, "loss": 0.5169, "step": 26186 }, { "epoch": 0.8025928650239058, "grad_norm": 1.7942593081941955, "learning_rate": 9.87600980750138e-07, "loss": 0.628, "step": 26187 }, { "epoch": 0.8026235135466471, "grad_norm": 1.7282220687965761, "learning_rate": 9.873048579472344e-07, "loss": 0.5492, "step": 26188 }, { "epoch": 0.8026541620693882, "grad_norm": 1.6256882030775344, "learning_rate": 9.870087746824414e-07, "loss": 0.5115, "step": 26189 }, { "epoch": 0.8026848105921295, "grad_norm": 1.9521805443056934, "learning_rate": 9.86712730958679e-07, "loss": 0.5954, "step": 26190 }, { "epoch": 0.8027154591148706, "grad_norm": 1.8432392394557024, "learning_rate": 9.864167267788615e-07, "loss": 0.5374, "step": 26191 }, { "epoch": 0.8027461076376119, "grad_norm": 1.7965245853361593, "learning_rate": 9.861207621459068e-07, "loss": 0.5466, "step": 26192 }, { "epoch": 0.802776756160353, "grad_norm": 1.9699125447670376, "learning_rate": 9.858248370627327e-07, "loss": 0.6078, "step": 26193 }, { "epoch": 0.8028074046830943, "grad_norm": 2.0454755936965543, "learning_rate": 9.855289515322524e-07, "loss": 0.6278, "step": 26194 }, { "epoch": 0.8028380532058355, "grad_norm": 1.9380326999281836, "learning_rate": 9.852331055573822e-07, "loss": 0.5767, "step": 26195 }, { "epoch": 0.8028687017285767, "grad_norm": 1.870714868748047, "learning_rate": 9.849372991410388e-07, "loss": 0.5705, "step": 26196 }, { "epoch": 0.8028993502513179, "grad_norm": 1.7785790037146638, "learning_rate": 9.84641532286134e-07, "loss": 0.5181, "step": 26197 }, { "epoch": 0.8029299987740591, "grad_norm": 1.957437394997724, "learning_rate": 9.843458049955839e-07, "loss": 0.6728, "step": 26198 }, { "epoch": 0.8029606472968003, "grad_norm": 2.1847302182840025, "learning_rate": 9.840501172723033e-07, "loss": 0.6107, "step": 26199 }, { "epoch": 0.8029912958195415, "grad_norm": 1.812210836445899, "learning_rate": 9.83754469119203e-07, "loss": 0.5841, "step": 26200 }, { "epoch": 0.8030219443422827, "grad_norm": 1.824109626985059, "learning_rate": 9.834588605391988e-07, "loss": 0.5698, "step": 26201 }, { "epoch": 0.803052592865024, "grad_norm": 1.8737032397963795, "learning_rate": 9.831632915352013e-07, "loss": 0.5654, "step": 26202 }, { "epoch": 0.8030832413877651, "grad_norm": 1.8998754070060928, "learning_rate": 9.828677621101229e-07, "loss": 0.5878, "step": 26203 }, { "epoch": 0.8031138899105064, "grad_norm": 1.9042096224704, "learning_rate": 9.82572272266878e-07, "loss": 0.5906, "step": 26204 }, { "epoch": 0.8031445384332475, "grad_norm": 1.9604465407271352, "learning_rate": 9.822768220083751e-07, "loss": 0.6018, "step": 26205 }, { "epoch": 0.8031751869559888, "grad_norm": 1.9625821446734626, "learning_rate": 9.819814113375264e-07, "loss": 0.5305, "step": 26206 }, { "epoch": 0.8032058354787299, "grad_norm": 1.8950482547931602, "learning_rate": 9.816860402572442e-07, "loss": 0.5624, "step": 26207 }, { "epoch": 0.8032364840014711, "grad_norm": 1.7290411591442736, "learning_rate": 9.813907087704366e-07, "loss": 0.5442, "step": 26208 }, { "epoch": 0.8032671325242123, "grad_norm": 1.702032191988799, "learning_rate": 9.810954168800157e-07, "loss": 0.4893, "step": 26209 }, { "epoch": 0.8032977810469535, "grad_norm": 1.8350904990417867, "learning_rate": 9.808001645888888e-07, "loss": 0.6298, "step": 26210 }, { "epoch": 0.8033284295696947, "grad_norm": 1.772604286327237, "learning_rate": 9.80504951899966e-07, "loss": 0.4855, "step": 26211 }, { "epoch": 0.8033590780924359, "grad_norm": 1.9244431988099782, "learning_rate": 9.802097788161574e-07, "loss": 0.6504, "step": 26212 }, { "epoch": 0.8033897266151772, "grad_norm": 1.8724956407471003, "learning_rate": 9.799146453403696e-07, "loss": 0.4837, "step": 26213 }, { "epoch": 0.8034203751379183, "grad_norm": 1.6741921575849945, "learning_rate": 9.796195514755107e-07, "loss": 0.5836, "step": 26214 }, { "epoch": 0.8034510236606596, "grad_norm": 1.761782172275186, "learning_rate": 9.79324497224491e-07, "loss": 0.539, "step": 26215 }, { "epoch": 0.8034816721834007, "grad_norm": 1.5932967327757592, "learning_rate": 9.790294825902141e-07, "loss": 0.5722, "step": 26216 }, { "epoch": 0.803512320706142, "grad_norm": 1.936130383038261, "learning_rate": 9.78734507575589e-07, "loss": 0.6017, "step": 26217 }, { "epoch": 0.8035429692288831, "grad_norm": 1.873026075605795, "learning_rate": 9.784395721835222e-07, "loss": 0.6104, "step": 26218 }, { "epoch": 0.8035736177516244, "grad_norm": 1.762881754968436, "learning_rate": 9.781446764169184e-07, "loss": 0.5999, "step": 26219 }, { "epoch": 0.8036042662743655, "grad_norm": 1.913632225597898, "learning_rate": 9.778498202786858e-07, "loss": 0.5713, "step": 26220 }, { "epoch": 0.8036349147971068, "grad_norm": 0.8219142036181154, "learning_rate": 9.775550037717263e-07, "loss": 0.3935, "step": 26221 }, { "epoch": 0.803665563319848, "grad_norm": 2.0026608643448225, "learning_rate": 9.772602268989462e-07, "loss": 0.5857, "step": 26222 }, { "epoch": 0.8036962118425892, "grad_norm": 1.4777919048282195, "learning_rate": 9.769654896632524e-07, "loss": 0.4253, "step": 26223 }, { "epoch": 0.8037268603653304, "grad_norm": 1.8371669026781667, "learning_rate": 9.76670792067545e-07, "loss": 0.5762, "step": 26224 }, { "epoch": 0.8037575088880716, "grad_norm": 2.023513656475765, "learning_rate": 9.763761341147299e-07, "loss": 0.5707, "step": 26225 }, { "epoch": 0.8037881574108128, "grad_norm": 1.5928168825081994, "learning_rate": 9.76081515807712e-07, "loss": 0.6121, "step": 26226 }, { "epoch": 0.803818805933554, "grad_norm": 2.4658270592255733, "learning_rate": 9.757869371493906e-07, "loss": 0.5905, "step": 26227 }, { "epoch": 0.8038494544562952, "grad_norm": 1.998195029205417, "learning_rate": 9.754923981426706e-07, "loss": 0.5523, "step": 26228 }, { "epoch": 0.8038801029790364, "grad_norm": 1.896078880724743, "learning_rate": 9.751978987904547e-07, "loss": 0.5119, "step": 26229 }, { "epoch": 0.8039107515017776, "grad_norm": 1.741814861416227, "learning_rate": 9.749034390956424e-07, "loss": 0.5444, "step": 26230 }, { "epoch": 0.8039414000245189, "grad_norm": 1.866981501015517, "learning_rate": 9.74609019061138e-07, "loss": 0.6335, "step": 26231 }, { "epoch": 0.80397204854726, "grad_norm": 1.9415553270997574, "learning_rate": 9.74314638689839e-07, "loss": 0.59, "step": 26232 }, { "epoch": 0.8040026970700013, "grad_norm": 2.0092616324969246, "learning_rate": 9.74020297984648e-07, "loss": 0.6024, "step": 26233 }, { "epoch": 0.8040333455927424, "grad_norm": 1.7905019084473464, "learning_rate": 9.73725996948467e-07, "loss": 0.6474, "step": 26234 }, { "epoch": 0.8040639941154837, "grad_norm": 2.052272109527024, "learning_rate": 9.734317355841922e-07, "loss": 0.6526, "step": 26235 }, { "epoch": 0.8040946426382248, "grad_norm": 2.0168103327617644, "learning_rate": 9.731375138947246e-07, "loss": 0.6404, "step": 26236 }, { "epoch": 0.8041252911609661, "grad_norm": 1.782115093888291, "learning_rate": 9.72843331882965e-07, "loss": 0.508, "step": 26237 }, { "epoch": 0.8041559396837072, "grad_norm": 1.8302360381456209, "learning_rate": 9.725491895518092e-07, "loss": 0.5685, "step": 26238 }, { "epoch": 0.8041865882064484, "grad_norm": 1.9008263916115187, "learning_rate": 9.722550869041563e-07, "loss": 0.5649, "step": 26239 }, { "epoch": 0.8042172367291897, "grad_norm": 1.9739616912490416, "learning_rate": 9.719610239429062e-07, "loss": 0.7112, "step": 26240 }, { "epoch": 0.8042478852519308, "grad_norm": 2.0295059722682947, "learning_rate": 9.716670006709533e-07, "loss": 0.5436, "step": 26241 }, { "epoch": 0.8042785337746721, "grad_norm": 1.7472044059198801, "learning_rate": 9.713730170911973e-07, "loss": 0.4824, "step": 26242 }, { "epoch": 0.8043091822974132, "grad_norm": 1.8625606856347658, "learning_rate": 9.71079073206531e-07, "loss": 0.5441, "step": 26243 }, { "epoch": 0.8043398308201545, "grad_norm": 1.724699010871267, "learning_rate": 9.707851690198565e-07, "loss": 0.5396, "step": 26244 }, { "epoch": 0.8043704793428956, "grad_norm": 1.8011271159355218, "learning_rate": 9.704913045340664e-07, "loss": 0.5005, "step": 26245 }, { "epoch": 0.8044011278656369, "grad_norm": 0.8432712198232949, "learning_rate": 9.701974797520553e-07, "loss": 0.4073, "step": 26246 }, { "epoch": 0.804431776388378, "grad_norm": 1.9436238702327526, "learning_rate": 9.6990369467672e-07, "loss": 0.5883, "step": 26247 }, { "epoch": 0.8044624249111193, "grad_norm": 0.9286851455587088, "learning_rate": 9.69609949310955e-07, "loss": 0.4122, "step": 26248 }, { "epoch": 0.8044930734338605, "grad_norm": 2.1578477812081402, "learning_rate": 9.693162436576537e-07, "loss": 0.6495, "step": 26249 }, { "epoch": 0.8045237219566017, "grad_norm": 1.8772487226038959, "learning_rate": 9.690225777197104e-07, "loss": 0.6311, "step": 26250 }, { "epoch": 0.8045543704793429, "grad_norm": 1.7928331039755163, "learning_rate": 9.687289515000192e-07, "loss": 0.6016, "step": 26251 }, { "epoch": 0.8045850190020841, "grad_norm": 1.8025644020653475, "learning_rate": 9.684353650014749e-07, "loss": 0.5709, "step": 26252 }, { "epoch": 0.8046156675248253, "grad_norm": 1.9625767332241548, "learning_rate": 9.681418182269682e-07, "loss": 0.5641, "step": 26253 }, { "epoch": 0.8046463160475665, "grad_norm": 1.7833714723608445, "learning_rate": 9.678483111793896e-07, "loss": 0.513, "step": 26254 }, { "epoch": 0.8046769645703077, "grad_norm": 1.9061165942589224, "learning_rate": 9.67554843861634e-07, "loss": 0.5651, "step": 26255 }, { "epoch": 0.804707613093049, "grad_norm": 1.7577822752764571, "learning_rate": 9.672614162765936e-07, "loss": 0.5453, "step": 26256 }, { "epoch": 0.8047382616157901, "grad_norm": 1.8823108243714546, "learning_rate": 9.66968028427157e-07, "loss": 0.5788, "step": 26257 }, { "epoch": 0.8047689101385314, "grad_norm": 1.7800551274617664, "learning_rate": 9.666746803162163e-07, "loss": 0.5573, "step": 26258 }, { "epoch": 0.8047995586612725, "grad_norm": 0.8281288007835865, "learning_rate": 9.663813719466631e-07, "loss": 0.4194, "step": 26259 }, { "epoch": 0.8048302071840138, "grad_norm": 0.7855870355386062, "learning_rate": 9.660881033213847e-07, "loss": 0.394, "step": 26260 }, { "epoch": 0.8048608557067549, "grad_norm": 1.6461751672476506, "learning_rate": 9.657948744432743e-07, "loss": 0.5001, "step": 26261 }, { "epoch": 0.8048915042294962, "grad_norm": 1.7330623440745248, "learning_rate": 9.65501685315216e-07, "loss": 0.5287, "step": 26262 }, { "epoch": 0.8049221527522373, "grad_norm": 1.7144524325201715, "learning_rate": 9.652085359401047e-07, "loss": 0.4938, "step": 26263 }, { "epoch": 0.8049528012749786, "grad_norm": 0.7839410377798828, "learning_rate": 9.64915426320826e-07, "loss": 0.3988, "step": 26264 }, { "epoch": 0.8049834497977197, "grad_norm": 2.0415632558354257, "learning_rate": 9.64622356460267e-07, "loss": 0.58, "step": 26265 }, { "epoch": 0.805014098320461, "grad_norm": 0.8455080108304754, "learning_rate": 9.643293263613162e-07, "loss": 0.3856, "step": 26266 }, { "epoch": 0.8050447468432022, "grad_norm": 2.1527136846432913, "learning_rate": 9.640363360268623e-07, "loss": 0.6343, "step": 26267 }, { "epoch": 0.8050753953659434, "grad_norm": 1.924386175000749, "learning_rate": 9.6374338545979e-07, "loss": 0.5601, "step": 26268 }, { "epoch": 0.8051060438886846, "grad_norm": 1.7869571195497487, "learning_rate": 9.634504746629863e-07, "loss": 0.5707, "step": 26269 }, { "epoch": 0.8051366924114257, "grad_norm": 1.7234956257541394, "learning_rate": 9.631576036393386e-07, "loss": 0.4538, "step": 26270 }, { "epoch": 0.805167340934167, "grad_norm": 1.81999470294471, "learning_rate": 9.628647723917329e-07, "loss": 0.5097, "step": 26271 }, { "epoch": 0.8051979894569081, "grad_norm": 1.6597095646547755, "learning_rate": 9.625719809230532e-07, "loss": 0.5331, "step": 26272 }, { "epoch": 0.8052286379796494, "grad_norm": 1.8466803005193464, "learning_rate": 9.622792292361827e-07, "loss": 0.5072, "step": 26273 }, { "epoch": 0.8052592865023905, "grad_norm": 1.8804555342670435, "learning_rate": 9.619865173340105e-07, "loss": 0.6231, "step": 26274 }, { "epoch": 0.8052899350251318, "grad_norm": 1.6560936410867864, "learning_rate": 9.61693845219418e-07, "loss": 0.622, "step": 26275 }, { "epoch": 0.805320583547873, "grad_norm": 1.4911852674553374, "learning_rate": 9.614012128952888e-07, "loss": 0.478, "step": 26276 }, { "epoch": 0.8053512320706142, "grad_norm": 1.7372019802317107, "learning_rate": 9.61108620364506e-07, "loss": 0.5712, "step": 26277 }, { "epoch": 0.8053818805933554, "grad_norm": 1.7807459510926913, "learning_rate": 9.608160676299534e-07, "loss": 0.5301, "step": 26278 }, { "epoch": 0.8054125291160966, "grad_norm": 1.8247704277900039, "learning_rate": 9.605235546945152e-07, "loss": 0.5181, "step": 26279 }, { "epoch": 0.8054431776388378, "grad_norm": 1.7222265388790243, "learning_rate": 9.602310815610705e-07, "loss": 0.4751, "step": 26280 }, { "epoch": 0.805473826161579, "grad_norm": 2.1025485460105564, "learning_rate": 9.599386482325024e-07, "loss": 0.5176, "step": 26281 }, { "epoch": 0.8055044746843202, "grad_norm": 1.7041471793900134, "learning_rate": 9.596462547116942e-07, "loss": 0.5321, "step": 26282 }, { "epoch": 0.8055351232070614, "grad_norm": 1.985194756622553, "learning_rate": 9.593539010015245e-07, "loss": 0.5932, "step": 26283 }, { "epoch": 0.8055657717298026, "grad_norm": 1.9225659402347681, "learning_rate": 9.59061587104873e-07, "loss": 0.5213, "step": 26284 }, { "epoch": 0.8055964202525439, "grad_norm": 1.717187126036837, "learning_rate": 9.587693130246235e-07, "loss": 0.5913, "step": 26285 }, { "epoch": 0.805627068775285, "grad_norm": 1.8359607097881212, "learning_rate": 9.584770787636543e-07, "loss": 0.5912, "step": 26286 }, { "epoch": 0.8056577172980263, "grad_norm": 1.7550955108337, "learning_rate": 9.58184884324843e-07, "loss": 0.5037, "step": 26287 }, { "epoch": 0.8056883658207674, "grad_norm": 1.9779285663426873, "learning_rate": 9.578927297110701e-07, "loss": 0.6559, "step": 26288 }, { "epoch": 0.8057190143435087, "grad_norm": 1.9013527174378035, "learning_rate": 9.576006149252148e-07, "loss": 0.5013, "step": 26289 }, { "epoch": 0.8057496628662498, "grad_norm": 0.840385278175887, "learning_rate": 9.573085399701558e-07, "loss": 0.3957, "step": 26290 }, { "epoch": 0.8057803113889911, "grad_norm": 1.9617551651764797, "learning_rate": 9.57016504848769e-07, "loss": 0.4532, "step": 26291 }, { "epoch": 0.8058109599117322, "grad_norm": 1.884749962492417, "learning_rate": 9.567245095639333e-07, "loss": 0.5629, "step": 26292 }, { "epoch": 0.8058416084344735, "grad_norm": 1.8054500748529694, "learning_rate": 9.564325541185266e-07, "loss": 0.6619, "step": 26293 }, { "epoch": 0.8058722569572146, "grad_norm": 1.8838234249244548, "learning_rate": 9.561406385154243e-07, "loss": 0.5734, "step": 26294 }, { "epoch": 0.8059029054799559, "grad_norm": 1.7542934997389954, "learning_rate": 9.558487627575002e-07, "loss": 0.5862, "step": 26295 }, { "epoch": 0.8059335540026971, "grad_norm": 1.8135774377359308, "learning_rate": 9.55556926847636e-07, "loss": 0.569, "step": 26296 }, { "epoch": 0.8059642025254383, "grad_norm": 0.7652951485104869, "learning_rate": 9.552651307887028e-07, "loss": 0.3884, "step": 26297 }, { "epoch": 0.8059948510481795, "grad_norm": 1.8215311586135017, "learning_rate": 9.549733745835787e-07, "loss": 0.5447, "step": 26298 }, { "epoch": 0.8060254995709207, "grad_norm": 2.0440272760440457, "learning_rate": 9.546816582351354e-07, "loss": 0.5715, "step": 26299 }, { "epoch": 0.8060561480936619, "grad_norm": 1.7846096321237972, "learning_rate": 9.543899817462488e-07, "loss": 0.5577, "step": 26300 }, { "epoch": 0.806086796616403, "grad_norm": 2.0954333878326334, "learning_rate": 9.540983451197939e-07, "loss": 0.5909, "step": 26301 }, { "epoch": 0.8061174451391443, "grad_norm": 2.1029546954457476, "learning_rate": 9.538067483586422e-07, "loss": 0.5789, "step": 26302 }, { "epoch": 0.8061480936618854, "grad_norm": 0.8046464688774853, "learning_rate": 9.535151914656676e-07, "loss": 0.4084, "step": 26303 }, { "epoch": 0.8061787421846267, "grad_norm": 2.0581127377905664, "learning_rate": 9.532236744437445e-07, "loss": 0.6385, "step": 26304 }, { "epoch": 0.8062093907073679, "grad_norm": 2.0599512073308825, "learning_rate": 9.529321972957428e-07, "loss": 0.5972, "step": 26305 }, { "epoch": 0.8062400392301091, "grad_norm": 1.7524532228537022, "learning_rate": 9.526407600245369e-07, "loss": 0.5789, "step": 26306 }, { "epoch": 0.8062706877528503, "grad_norm": 1.9688377083593853, "learning_rate": 9.523493626329961e-07, "loss": 0.5656, "step": 26307 }, { "epoch": 0.8063013362755915, "grad_norm": 2.125030041056799, "learning_rate": 9.520580051239925e-07, "loss": 0.6175, "step": 26308 }, { "epoch": 0.8063319847983327, "grad_norm": 1.6833146497650555, "learning_rate": 9.517666875003978e-07, "loss": 0.5201, "step": 26309 }, { "epoch": 0.8063626333210739, "grad_norm": 1.8816176996023481, "learning_rate": 9.514754097650813e-07, "loss": 0.623, "step": 26310 }, { "epoch": 0.8063932818438151, "grad_norm": 2.06620570357826, "learning_rate": 9.51184171920913e-07, "loss": 0.5355, "step": 26311 }, { "epoch": 0.8064239303665564, "grad_norm": 0.7900476026228758, "learning_rate": 9.508929739707639e-07, "loss": 0.4014, "step": 26312 }, { "epoch": 0.8064545788892975, "grad_norm": 2.0595555671351136, "learning_rate": 9.506018159175029e-07, "loss": 0.547, "step": 26313 }, { "epoch": 0.8064852274120388, "grad_norm": 1.8847705673228858, "learning_rate": 9.503106977639959e-07, "loss": 0.5472, "step": 26314 }, { "epoch": 0.8065158759347799, "grad_norm": 1.7335330025587286, "learning_rate": 9.500196195131156e-07, "loss": 0.5475, "step": 26315 }, { "epoch": 0.8065465244575212, "grad_norm": 1.974993912703293, "learning_rate": 9.497285811677276e-07, "loss": 0.5732, "step": 26316 }, { "epoch": 0.8065771729802623, "grad_norm": 1.9659255578705228, "learning_rate": 9.494375827307012e-07, "loss": 0.6067, "step": 26317 }, { "epoch": 0.8066078215030036, "grad_norm": 1.945740007827795, "learning_rate": 9.491466242049014e-07, "loss": 0.537, "step": 26318 }, { "epoch": 0.8066384700257447, "grad_norm": 1.8164286200457658, "learning_rate": 9.488557055931963e-07, "loss": 0.6193, "step": 26319 }, { "epoch": 0.806669118548486, "grad_norm": 1.9996814211963863, "learning_rate": 9.485648268984538e-07, "loss": 0.5613, "step": 26320 }, { "epoch": 0.8066997670712271, "grad_norm": 1.7263898385301102, "learning_rate": 9.482739881235375e-07, "loss": 0.5216, "step": 26321 }, { "epoch": 0.8067304155939684, "grad_norm": 1.9711259113089057, "learning_rate": 9.479831892713143e-07, "loss": 0.5021, "step": 26322 }, { "epoch": 0.8067610641167096, "grad_norm": 1.800129882579945, "learning_rate": 9.476924303446505e-07, "loss": 0.5662, "step": 26323 }, { "epoch": 0.8067917126394508, "grad_norm": 1.759640779933341, "learning_rate": 9.47401711346409e-07, "loss": 0.5155, "step": 26324 }, { "epoch": 0.806822361162192, "grad_norm": 2.0309990883640334, "learning_rate": 9.471110322794552e-07, "loss": 0.524, "step": 26325 }, { "epoch": 0.8068530096849332, "grad_norm": 1.9755056795982435, "learning_rate": 9.468203931466546e-07, "loss": 0.6387, "step": 26326 }, { "epoch": 0.8068836582076744, "grad_norm": 1.8720369853973924, "learning_rate": 9.465297939508688e-07, "loss": 0.5592, "step": 26327 }, { "epoch": 0.8069143067304156, "grad_norm": 1.8279501507106737, "learning_rate": 9.462392346949629e-07, "loss": 0.6006, "step": 26328 }, { "epoch": 0.8069449552531568, "grad_norm": 2.068855138317394, "learning_rate": 9.459487153817981e-07, "loss": 0.4927, "step": 26329 }, { "epoch": 0.806975603775898, "grad_norm": 1.9939692325819405, "learning_rate": 9.456582360142375e-07, "loss": 0.5543, "step": 26330 }, { "epoch": 0.8070062522986392, "grad_norm": 2.0068042283590466, "learning_rate": 9.453677965951452e-07, "loss": 0.4733, "step": 26331 }, { "epoch": 0.8070369008213804, "grad_norm": 1.8779786372396379, "learning_rate": 9.450773971273797e-07, "loss": 0.531, "step": 26332 }, { "epoch": 0.8070675493441216, "grad_norm": 0.8394409800139712, "learning_rate": 9.447870376138047e-07, "loss": 0.3876, "step": 26333 }, { "epoch": 0.8070981978668628, "grad_norm": 1.9959265934745012, "learning_rate": 9.444967180572817e-07, "loss": 0.5955, "step": 26334 }, { "epoch": 0.807128846389604, "grad_norm": 1.9424118302882558, "learning_rate": 9.442064384606687e-07, "loss": 0.5332, "step": 26335 }, { "epoch": 0.8071594949123452, "grad_norm": 1.7550557708305683, "learning_rate": 9.439161988268275e-07, "loss": 0.5221, "step": 26336 }, { "epoch": 0.8071901434350864, "grad_norm": 0.8029449512480945, "learning_rate": 9.436259991586188e-07, "loss": 0.3856, "step": 26337 }, { "epoch": 0.8072207919578276, "grad_norm": 1.8736763512116281, "learning_rate": 9.433358394589003e-07, "loss": 0.6575, "step": 26338 }, { "epoch": 0.8072514404805688, "grad_norm": 1.8374312253823764, "learning_rate": 9.430457197305326e-07, "loss": 0.6318, "step": 26339 }, { "epoch": 0.80728208900331, "grad_norm": 1.88788148418415, "learning_rate": 9.427556399763721e-07, "loss": 0.5803, "step": 26340 }, { "epoch": 0.8073127375260513, "grad_norm": 1.7744558106832462, "learning_rate": 9.424656001992788e-07, "loss": 0.563, "step": 26341 }, { "epoch": 0.8073433860487924, "grad_norm": 1.753291461108071, "learning_rate": 9.42175600402111e-07, "loss": 0.6326, "step": 26342 }, { "epoch": 0.8073740345715337, "grad_norm": 1.8005207184953473, "learning_rate": 9.418856405877241e-07, "loss": 0.5891, "step": 26343 }, { "epoch": 0.8074046830942748, "grad_norm": 1.7326144694204832, "learning_rate": 9.415957207589765e-07, "loss": 0.4994, "step": 26344 }, { "epoch": 0.8074353316170161, "grad_norm": 1.9849169788212653, "learning_rate": 9.413058409187254e-07, "loss": 0.4772, "step": 26345 }, { "epoch": 0.8074659801397572, "grad_norm": 0.7863856362872214, "learning_rate": 9.410160010698255e-07, "loss": 0.3972, "step": 26346 }, { "epoch": 0.8074966286624985, "grad_norm": 1.8456406073617395, "learning_rate": 9.40726201215133e-07, "loss": 0.6639, "step": 26347 }, { "epoch": 0.8075272771852396, "grad_norm": 0.8068290155866257, "learning_rate": 9.404364413575057e-07, "loss": 0.4018, "step": 26348 }, { "epoch": 0.8075579257079809, "grad_norm": 1.763876438048496, "learning_rate": 9.401467214997956e-07, "loss": 0.5527, "step": 26349 }, { "epoch": 0.8075885742307221, "grad_norm": 1.9551632529338463, "learning_rate": 9.398570416448593e-07, "loss": 0.5212, "step": 26350 }, { "epoch": 0.8076192227534633, "grad_norm": 1.7560686883511174, "learning_rate": 9.395674017955492e-07, "loss": 0.4723, "step": 26351 }, { "epoch": 0.8076498712762045, "grad_norm": 1.9705612890638555, "learning_rate": 9.392778019547205e-07, "loss": 0.6223, "step": 26352 }, { "epoch": 0.8076805197989457, "grad_norm": 2.147624280080635, "learning_rate": 9.389882421252284e-07, "loss": 0.6441, "step": 26353 }, { "epoch": 0.8077111683216869, "grad_norm": 1.726760630708477, "learning_rate": 9.386987223099225e-07, "loss": 0.5557, "step": 26354 }, { "epoch": 0.8077418168444281, "grad_norm": 0.7913451335845388, "learning_rate": 9.38409242511657e-07, "loss": 0.3789, "step": 26355 }, { "epoch": 0.8077724653671693, "grad_norm": 1.973165834106145, "learning_rate": 9.381198027332861e-07, "loss": 0.5849, "step": 26356 }, { "epoch": 0.8078031138899106, "grad_norm": 1.9075005453550173, "learning_rate": 9.378304029776586e-07, "loss": 0.6703, "step": 26357 }, { "epoch": 0.8078337624126517, "grad_norm": 2.0434296859880066, "learning_rate": 9.37541043247629e-07, "loss": 0.5666, "step": 26358 }, { "epoch": 0.807864410935393, "grad_norm": 1.9750672887672518, "learning_rate": 9.372517235460437e-07, "loss": 0.6726, "step": 26359 }, { "epoch": 0.8078950594581341, "grad_norm": 0.7558115809218715, "learning_rate": 9.369624438757597e-07, "loss": 0.4081, "step": 26360 }, { "epoch": 0.8079257079808754, "grad_norm": 1.8187641767655014, "learning_rate": 9.366732042396243e-07, "loss": 0.4905, "step": 26361 }, { "epoch": 0.8079563565036165, "grad_norm": 1.9327128447613875, "learning_rate": 9.363840046404865e-07, "loss": 0.6092, "step": 26362 }, { "epoch": 0.8079870050263577, "grad_norm": 2.1076040808743284, "learning_rate": 9.360948450811963e-07, "loss": 0.5612, "step": 26363 }, { "epoch": 0.8080176535490989, "grad_norm": 1.8561764607093938, "learning_rate": 9.358057255646047e-07, "loss": 0.5133, "step": 26364 }, { "epoch": 0.8080483020718401, "grad_norm": 1.9897836625336922, "learning_rate": 9.355166460935583e-07, "loss": 0.6312, "step": 26365 }, { "epoch": 0.8080789505945813, "grad_norm": 1.7734903955870736, "learning_rate": 9.352276066709059e-07, "loss": 0.508, "step": 26366 }, { "epoch": 0.8081095991173225, "grad_norm": 1.8953738490140228, "learning_rate": 9.349386072994976e-07, "loss": 0.665, "step": 26367 }, { "epoch": 0.8081402476400638, "grad_norm": 1.8508859393690822, "learning_rate": 9.346496479821776e-07, "loss": 0.5083, "step": 26368 }, { "epoch": 0.8081708961628049, "grad_norm": 1.7947429720352746, "learning_rate": 9.343607287217959e-07, "loss": 0.5159, "step": 26369 }, { "epoch": 0.8082015446855462, "grad_norm": 1.6317696853512484, "learning_rate": 9.340718495211965e-07, "loss": 0.4873, "step": 26370 }, { "epoch": 0.8082321932082873, "grad_norm": 1.7692432849150495, "learning_rate": 9.337830103832291e-07, "loss": 0.5779, "step": 26371 }, { "epoch": 0.8082628417310286, "grad_norm": 1.9927879275733564, "learning_rate": 9.334942113107387e-07, "loss": 0.5368, "step": 26372 }, { "epoch": 0.8082934902537697, "grad_norm": 2.1923664437376273, "learning_rate": 9.332054523065686e-07, "loss": 0.5944, "step": 26373 }, { "epoch": 0.808324138776511, "grad_norm": 1.8949056817585108, "learning_rate": 9.329167333735661e-07, "loss": 0.5996, "step": 26374 }, { "epoch": 0.8083547872992521, "grad_norm": 1.8089036952465434, "learning_rate": 9.326280545145766e-07, "loss": 0.5385, "step": 26375 }, { "epoch": 0.8083854358219934, "grad_norm": 2.0428068053915536, "learning_rate": 9.323394157324422e-07, "loss": 0.6103, "step": 26376 }, { "epoch": 0.8084160843447346, "grad_norm": 1.767100984245416, "learning_rate": 9.320508170300085e-07, "loss": 0.5319, "step": 26377 }, { "epoch": 0.8084467328674758, "grad_norm": 1.981208083069247, "learning_rate": 9.317622584101194e-07, "loss": 0.5878, "step": 26378 }, { "epoch": 0.808477381390217, "grad_norm": 1.7493992267944227, "learning_rate": 9.31473739875618e-07, "loss": 0.578, "step": 26379 }, { "epoch": 0.8085080299129582, "grad_norm": 1.9993581818098458, "learning_rate": 9.311852614293476e-07, "loss": 0.6454, "step": 26380 }, { "epoch": 0.8085386784356994, "grad_norm": 1.8182869332195684, "learning_rate": 9.308968230741467e-07, "loss": 0.4986, "step": 26381 }, { "epoch": 0.8085693269584406, "grad_norm": 1.813603413234137, "learning_rate": 9.306084248128638e-07, "loss": 0.5524, "step": 26382 }, { "epoch": 0.8085999754811818, "grad_norm": 1.754964382635899, "learning_rate": 9.303200666483364e-07, "loss": 0.6506, "step": 26383 }, { "epoch": 0.808630624003923, "grad_norm": 0.811038844389649, "learning_rate": 9.30031748583406e-07, "loss": 0.3935, "step": 26384 }, { "epoch": 0.8086612725266642, "grad_norm": 2.204214320629235, "learning_rate": 9.297434706209141e-07, "loss": 0.6102, "step": 26385 }, { "epoch": 0.8086919210494055, "grad_norm": 1.9716153691684584, "learning_rate": 9.294552327637025e-07, "loss": 0.5555, "step": 26386 }, { "epoch": 0.8087225695721466, "grad_norm": 0.8007162992089707, "learning_rate": 9.291670350146087e-07, "loss": 0.428, "step": 26387 }, { "epoch": 0.8087532180948879, "grad_norm": 1.7411227643226441, "learning_rate": 9.288788773764734e-07, "loss": 0.4808, "step": 26388 }, { "epoch": 0.808783866617629, "grad_norm": 1.7931411862505335, "learning_rate": 9.285907598521359e-07, "loss": 0.5463, "step": 26389 }, { "epoch": 0.8088145151403703, "grad_norm": 2.010556078852886, "learning_rate": 9.283026824444374e-07, "loss": 0.5926, "step": 26390 }, { "epoch": 0.8088451636631114, "grad_norm": 1.8677679207681461, "learning_rate": 9.280146451562139e-07, "loss": 0.5071, "step": 26391 }, { "epoch": 0.8088758121858527, "grad_norm": 1.7116468597394994, "learning_rate": 9.27726647990303e-07, "loss": 0.618, "step": 26392 }, { "epoch": 0.8089064607085938, "grad_norm": 0.7553239703095436, "learning_rate": 9.274386909495431e-07, "loss": 0.3985, "step": 26393 }, { "epoch": 0.808937109231335, "grad_norm": 1.8507710391001866, "learning_rate": 9.27150774036773e-07, "loss": 0.4873, "step": 26394 }, { "epoch": 0.8089677577540763, "grad_norm": 0.8353300319945084, "learning_rate": 9.268628972548272e-07, "loss": 0.4113, "step": 26395 }, { "epoch": 0.8089984062768174, "grad_norm": 1.8416658279657776, "learning_rate": 9.265750606065438e-07, "loss": 0.6802, "step": 26396 }, { "epoch": 0.8090290547995587, "grad_norm": 1.9062137718618664, "learning_rate": 9.262872640947579e-07, "loss": 0.5909, "step": 26397 }, { "epoch": 0.8090597033222998, "grad_norm": 1.8789253788007927, "learning_rate": 9.259995077223077e-07, "loss": 0.6413, "step": 26398 }, { "epoch": 0.8090903518450411, "grad_norm": 2.109039490794495, "learning_rate": 9.257117914920249e-07, "loss": 0.6449, "step": 26399 }, { "epoch": 0.8091210003677822, "grad_norm": 1.7891726739373328, "learning_rate": 9.254241154067467e-07, "loss": 0.5111, "step": 26400 }, { "epoch": 0.8091516488905235, "grad_norm": 1.8162977342649314, "learning_rate": 9.251364794693085e-07, "loss": 0.4934, "step": 26401 }, { "epoch": 0.8091822974132646, "grad_norm": 1.8937482840417892, "learning_rate": 9.248488836825431e-07, "loss": 0.5539, "step": 26402 }, { "epoch": 0.8092129459360059, "grad_norm": 2.4319153376809406, "learning_rate": 9.245613280492833e-07, "loss": 0.6298, "step": 26403 }, { "epoch": 0.809243594458747, "grad_norm": 1.6930940294838606, "learning_rate": 9.242738125723633e-07, "loss": 0.5368, "step": 26404 }, { "epoch": 0.8092742429814883, "grad_norm": 1.7804313828722087, "learning_rate": 9.239863372546159e-07, "loss": 0.5789, "step": 26405 }, { "epoch": 0.8093048915042295, "grad_norm": 1.8409592184058172, "learning_rate": 9.236989020988757e-07, "loss": 0.5342, "step": 26406 }, { "epoch": 0.8093355400269707, "grad_norm": 2.1933448554938435, "learning_rate": 9.234115071079713e-07, "loss": 0.6751, "step": 26407 }, { "epoch": 0.8093661885497119, "grad_norm": 1.904211877848435, "learning_rate": 9.231241522847373e-07, "loss": 0.5891, "step": 26408 }, { "epoch": 0.8093968370724531, "grad_norm": 2.1294492799025693, "learning_rate": 9.228368376320046e-07, "loss": 0.5927, "step": 26409 }, { "epoch": 0.8094274855951943, "grad_norm": 2.113748991876853, "learning_rate": 9.225495631526044e-07, "loss": 0.5615, "step": 26410 }, { "epoch": 0.8094581341179355, "grad_norm": 1.5217046380710724, "learning_rate": 9.222623288493637e-07, "loss": 0.4973, "step": 26411 }, { "epoch": 0.8094887826406767, "grad_norm": 1.9863043028669853, "learning_rate": 9.219751347251183e-07, "loss": 0.632, "step": 26412 }, { "epoch": 0.809519431163418, "grad_norm": 2.090630062825337, "learning_rate": 9.216879807826951e-07, "loss": 0.5863, "step": 26413 }, { "epoch": 0.8095500796861591, "grad_norm": 1.9621711054033064, "learning_rate": 9.214008670249225e-07, "loss": 0.6475, "step": 26414 }, { "epoch": 0.8095807282089004, "grad_norm": 1.8723600706391115, "learning_rate": 9.211137934546304e-07, "loss": 0.5797, "step": 26415 }, { "epoch": 0.8096113767316415, "grad_norm": 1.7206111564549267, "learning_rate": 9.208267600746479e-07, "loss": 0.6091, "step": 26416 }, { "epoch": 0.8096420252543828, "grad_norm": 1.8089467358861415, "learning_rate": 9.205397668878046e-07, "loss": 0.5889, "step": 26417 }, { "epoch": 0.8096726737771239, "grad_norm": 0.7796867707382262, "learning_rate": 9.202528138969252e-07, "loss": 0.4098, "step": 26418 }, { "epoch": 0.8097033222998652, "grad_norm": 1.6043990376504147, "learning_rate": 9.199659011048389e-07, "loss": 0.4616, "step": 26419 }, { "epoch": 0.8097339708226063, "grad_norm": 0.8012528557636576, "learning_rate": 9.196790285143736e-07, "loss": 0.4072, "step": 26420 }, { "epoch": 0.8097646193453476, "grad_norm": 1.8694004837526812, "learning_rate": 9.193921961283552e-07, "loss": 0.5353, "step": 26421 }, { "epoch": 0.8097952678680888, "grad_norm": 2.086002710790227, "learning_rate": 9.191054039496067e-07, "loss": 0.6237, "step": 26422 }, { "epoch": 0.80982591639083, "grad_norm": 1.8634571952388037, "learning_rate": 9.188186519809594e-07, "loss": 0.615, "step": 26423 }, { "epoch": 0.8098565649135712, "grad_norm": 1.7419994862968435, "learning_rate": 9.185319402252346e-07, "loss": 0.4913, "step": 26424 }, { "epoch": 0.8098872134363123, "grad_norm": 2.1897435601221678, "learning_rate": 9.182452686852605e-07, "loss": 0.6218, "step": 26425 }, { "epoch": 0.8099178619590536, "grad_norm": 2.0896677924714826, "learning_rate": 9.179586373638588e-07, "loss": 0.6083, "step": 26426 }, { "epoch": 0.8099485104817947, "grad_norm": 1.7572719178860123, "learning_rate": 9.176720462638549e-07, "loss": 0.55, "step": 26427 }, { "epoch": 0.809979159004536, "grad_norm": 1.5692708717071824, "learning_rate": 9.173854953880745e-07, "loss": 0.5454, "step": 26428 }, { "epoch": 0.8100098075272771, "grad_norm": 1.9759988627911556, "learning_rate": 9.170989847393375e-07, "loss": 0.6053, "step": 26429 }, { "epoch": 0.8100404560500184, "grad_norm": 1.8475503634894075, "learning_rate": 9.168125143204692e-07, "loss": 0.5622, "step": 26430 }, { "epoch": 0.8100711045727595, "grad_norm": 2.0311331291711907, "learning_rate": 9.165260841342933e-07, "loss": 0.5798, "step": 26431 }, { "epoch": 0.8101017530955008, "grad_norm": 0.7902616485108538, "learning_rate": 9.162396941836293e-07, "loss": 0.4179, "step": 26432 }, { "epoch": 0.810132401618242, "grad_norm": 2.010939927342932, "learning_rate": 9.159533444713003e-07, "loss": 0.6334, "step": 26433 }, { "epoch": 0.8101630501409832, "grad_norm": 2.1398079166925212, "learning_rate": 9.156670350001295e-07, "loss": 0.6215, "step": 26434 }, { "epoch": 0.8101936986637244, "grad_norm": 1.9082780002239472, "learning_rate": 9.153807657729352e-07, "loss": 0.634, "step": 26435 }, { "epoch": 0.8102243471864656, "grad_norm": 1.84686522898894, "learning_rate": 9.150945367925407e-07, "loss": 0.5818, "step": 26436 }, { "epoch": 0.8102549957092068, "grad_norm": 1.7612001686145295, "learning_rate": 9.148083480617631e-07, "loss": 0.5604, "step": 26437 }, { "epoch": 0.810285644231948, "grad_norm": 0.7956326627861707, "learning_rate": 9.145221995834247e-07, "loss": 0.3956, "step": 26438 }, { "epoch": 0.8103162927546892, "grad_norm": 1.8637785424077487, "learning_rate": 9.142360913603449e-07, "loss": 0.5283, "step": 26439 }, { "epoch": 0.8103469412774305, "grad_norm": 2.066202300687306, "learning_rate": 9.139500233953419e-07, "loss": 0.6449, "step": 26440 }, { "epoch": 0.8103775898001716, "grad_norm": 1.7664424703587438, "learning_rate": 9.136639956912341e-07, "loss": 0.56, "step": 26441 }, { "epoch": 0.8104082383229129, "grad_norm": 2.0610359428624467, "learning_rate": 9.13378008250842e-07, "loss": 0.5916, "step": 26442 }, { "epoch": 0.810438886845654, "grad_norm": 1.817246564664792, "learning_rate": 9.130920610769806e-07, "loss": 0.602, "step": 26443 }, { "epoch": 0.8104695353683953, "grad_norm": 1.9166427761196645, "learning_rate": 9.128061541724704e-07, "loss": 0.5678, "step": 26444 }, { "epoch": 0.8105001838911364, "grad_norm": 0.7630192916981438, "learning_rate": 9.125202875401251e-07, "loss": 0.3994, "step": 26445 }, { "epoch": 0.8105308324138777, "grad_norm": 1.9684935423847185, "learning_rate": 9.122344611827639e-07, "loss": 0.6287, "step": 26446 }, { "epoch": 0.8105614809366188, "grad_norm": 0.7926983234763548, "learning_rate": 9.119486751032031e-07, "loss": 0.3939, "step": 26447 }, { "epoch": 0.8105921294593601, "grad_norm": 0.7656997748710983, "learning_rate": 9.116629293042567e-07, "loss": 0.3942, "step": 26448 }, { "epoch": 0.8106227779821013, "grad_norm": 0.7775152114579147, "learning_rate": 9.11377223788742e-07, "loss": 0.4036, "step": 26449 }, { "epoch": 0.8106534265048425, "grad_norm": 1.8246651260297069, "learning_rate": 9.110915585594748e-07, "loss": 0.6035, "step": 26450 }, { "epoch": 0.8106840750275837, "grad_norm": 2.027045943294949, "learning_rate": 9.10805933619267e-07, "loss": 0.5507, "step": 26451 }, { "epoch": 0.8107147235503249, "grad_norm": 0.7638878266693246, "learning_rate": 9.105203489709353e-07, "loss": 0.3766, "step": 26452 }, { "epoch": 0.8107453720730661, "grad_norm": 2.067012536783393, "learning_rate": 9.102348046172937e-07, "loss": 0.5242, "step": 26453 }, { "epoch": 0.8107760205958073, "grad_norm": 1.8476097629884858, "learning_rate": 9.099493005611537e-07, "loss": 0.5584, "step": 26454 }, { "epoch": 0.8108066691185485, "grad_norm": 1.6612161777799652, "learning_rate": 9.096638368053312e-07, "loss": 0.6059, "step": 26455 }, { "epoch": 0.8108373176412896, "grad_norm": 1.9549241267556, "learning_rate": 9.093784133526357e-07, "loss": 0.5102, "step": 26456 }, { "epoch": 0.8108679661640309, "grad_norm": 1.9081998626958692, "learning_rate": 9.09093030205882e-07, "loss": 0.6002, "step": 26457 }, { "epoch": 0.810898614686772, "grad_norm": 1.8214767784001098, "learning_rate": 9.088076873678825e-07, "loss": 0.5714, "step": 26458 }, { "epoch": 0.8109292632095133, "grad_norm": 1.9417695591361783, "learning_rate": 9.08522384841446e-07, "loss": 0.5382, "step": 26459 }, { "epoch": 0.8109599117322545, "grad_norm": 1.9609287273350555, "learning_rate": 9.082371226293856e-07, "loss": 0.6044, "step": 26460 }, { "epoch": 0.8109905602549957, "grad_norm": 1.899300351480911, "learning_rate": 9.079519007345128e-07, "loss": 0.5667, "step": 26461 }, { "epoch": 0.8110212087777369, "grad_norm": 1.5703835451836219, "learning_rate": 9.076667191596355e-07, "loss": 0.5849, "step": 26462 }, { "epoch": 0.8110518573004781, "grad_norm": 2.1578915588780014, "learning_rate": 9.073815779075657e-07, "loss": 0.612, "step": 26463 }, { "epoch": 0.8110825058232193, "grad_norm": 1.6296550630209607, "learning_rate": 9.070964769811131e-07, "loss": 0.5528, "step": 26464 }, { "epoch": 0.8111131543459605, "grad_norm": 1.9299833346384947, "learning_rate": 9.068114163830854e-07, "loss": 0.6569, "step": 26465 }, { "epoch": 0.8111438028687017, "grad_norm": 1.794850647566138, "learning_rate": 9.065263961162929e-07, "loss": 0.5095, "step": 26466 }, { "epoch": 0.811174451391443, "grad_norm": 1.8023504039065226, "learning_rate": 9.06241416183542e-07, "loss": 0.6079, "step": 26467 }, { "epoch": 0.8112050999141841, "grad_norm": 1.933160771754221, "learning_rate": 9.059564765876417e-07, "loss": 0.5966, "step": 26468 }, { "epoch": 0.8112357484369254, "grad_norm": 1.858042190424, "learning_rate": 9.056715773314012e-07, "loss": 0.5637, "step": 26469 }, { "epoch": 0.8112663969596665, "grad_norm": 1.9860868196663208, "learning_rate": 9.053867184176252e-07, "loss": 0.5494, "step": 26470 }, { "epoch": 0.8112970454824078, "grad_norm": 0.7738587279189221, "learning_rate": 9.051018998491212e-07, "loss": 0.4105, "step": 26471 }, { "epoch": 0.8113276940051489, "grad_norm": 1.7939136306553893, "learning_rate": 9.048171216286971e-07, "loss": 0.5763, "step": 26472 }, { "epoch": 0.8113583425278902, "grad_norm": 1.8366770864550543, "learning_rate": 9.045323837591569e-07, "loss": 0.603, "step": 26473 }, { "epoch": 0.8113889910506313, "grad_norm": 1.8415084730056048, "learning_rate": 9.042476862433064e-07, "loss": 0.6278, "step": 26474 }, { "epoch": 0.8114196395733726, "grad_norm": 2.0692826207512405, "learning_rate": 9.039630290839529e-07, "loss": 0.6314, "step": 26475 }, { "epoch": 0.8114502880961137, "grad_norm": 1.7749378505394342, "learning_rate": 9.036784122838987e-07, "loss": 0.5923, "step": 26476 }, { "epoch": 0.811480936618855, "grad_norm": 2.0542851663268475, "learning_rate": 9.033938358459504e-07, "loss": 0.6141, "step": 26477 }, { "epoch": 0.8115115851415962, "grad_norm": 1.6875747867519975, "learning_rate": 9.03109299772908e-07, "loss": 0.5081, "step": 26478 }, { "epoch": 0.8115422336643374, "grad_norm": 2.0102589646674955, "learning_rate": 9.028248040675802e-07, "loss": 0.5371, "step": 26479 }, { "epoch": 0.8115728821870786, "grad_norm": 1.8765793586540982, "learning_rate": 9.025403487327683e-07, "loss": 0.5341, "step": 26480 }, { "epoch": 0.8116035307098198, "grad_norm": 1.9866288962365055, "learning_rate": 9.022559337712733e-07, "loss": 0.6444, "step": 26481 }, { "epoch": 0.811634179232561, "grad_norm": 2.0746399743581376, "learning_rate": 9.01971559185899e-07, "loss": 0.5979, "step": 26482 }, { "epoch": 0.8116648277553022, "grad_norm": 1.8268201255686478, "learning_rate": 9.01687224979449e-07, "loss": 0.5903, "step": 26483 }, { "epoch": 0.8116954762780434, "grad_norm": 1.8921982098829726, "learning_rate": 9.014029311547223e-07, "loss": 0.5555, "step": 26484 }, { "epoch": 0.8117261248007847, "grad_norm": 1.9884520914684813, "learning_rate": 9.011186777145209e-07, "loss": 0.5509, "step": 26485 }, { "epoch": 0.8117567733235258, "grad_norm": 2.0529192718572973, "learning_rate": 9.008344646616457e-07, "loss": 0.6788, "step": 26486 }, { "epoch": 0.811787421846267, "grad_norm": 1.8675721358347317, "learning_rate": 9.005502919988984e-07, "loss": 0.6472, "step": 26487 }, { "epoch": 0.8118180703690082, "grad_norm": 1.8610013309506732, "learning_rate": 9.002661597290785e-07, "loss": 0.4743, "step": 26488 }, { "epoch": 0.8118487188917494, "grad_norm": 0.8157836992984859, "learning_rate": 8.999820678549836e-07, "loss": 0.4144, "step": 26489 }, { "epoch": 0.8118793674144906, "grad_norm": 1.4968906919266287, "learning_rate": 8.996980163794145e-07, "loss": 0.4564, "step": 26490 }, { "epoch": 0.8119100159372318, "grad_norm": 1.8445084977194364, "learning_rate": 8.994140053051715e-07, "loss": 0.5615, "step": 26491 }, { "epoch": 0.811940664459973, "grad_norm": 0.8027761209309102, "learning_rate": 8.991300346350495e-07, "loss": 0.405, "step": 26492 }, { "epoch": 0.8119713129827142, "grad_norm": 1.948310385062276, "learning_rate": 8.988461043718489e-07, "loss": 0.5939, "step": 26493 }, { "epoch": 0.8120019615054554, "grad_norm": 1.8789034442442618, "learning_rate": 8.985622145183687e-07, "loss": 0.5484, "step": 26494 }, { "epoch": 0.8120326100281966, "grad_norm": 2.128569501793506, "learning_rate": 8.982783650774024e-07, "loss": 0.6018, "step": 26495 }, { "epoch": 0.8120632585509379, "grad_norm": 1.8218008999769815, "learning_rate": 8.979945560517506e-07, "loss": 0.5862, "step": 26496 }, { "epoch": 0.812093907073679, "grad_norm": 1.9846836724150319, "learning_rate": 8.977107874442048e-07, "loss": 0.519, "step": 26497 }, { "epoch": 0.8121245555964203, "grad_norm": 0.7691028310399245, "learning_rate": 8.974270592575673e-07, "loss": 0.3892, "step": 26498 }, { "epoch": 0.8121552041191614, "grad_norm": 1.6958140360841079, "learning_rate": 8.97143371494631e-07, "loss": 0.4981, "step": 26499 }, { "epoch": 0.8121858526419027, "grad_norm": 1.6632786389318053, "learning_rate": 8.968597241581889e-07, "loss": 0.5559, "step": 26500 }, { "epoch": 0.8122165011646438, "grad_norm": 1.9271231202274393, "learning_rate": 8.965761172510379e-07, "loss": 0.6052, "step": 26501 }, { "epoch": 0.8122471496873851, "grad_norm": 1.9922678899881174, "learning_rate": 8.96292550775974e-07, "loss": 0.5098, "step": 26502 }, { "epoch": 0.8122777982101262, "grad_norm": 0.8245725601654554, "learning_rate": 8.960090247357878e-07, "loss": 0.3911, "step": 26503 }, { "epoch": 0.8123084467328675, "grad_norm": 1.901999246957141, "learning_rate": 8.957255391332748e-07, "loss": 0.656, "step": 26504 }, { "epoch": 0.8123390952556087, "grad_norm": 1.9309444927593877, "learning_rate": 8.954420939712283e-07, "loss": 0.6139, "step": 26505 }, { "epoch": 0.8123697437783499, "grad_norm": 1.6198376897535887, "learning_rate": 8.951586892524422e-07, "loss": 0.4969, "step": 26506 }, { "epoch": 0.8124003923010911, "grad_norm": 1.9236675444755629, "learning_rate": 8.948753249797082e-07, "loss": 0.6765, "step": 26507 }, { "epoch": 0.8124310408238323, "grad_norm": 1.720132855557755, "learning_rate": 8.945920011558152e-07, "loss": 0.5609, "step": 26508 }, { "epoch": 0.8124616893465735, "grad_norm": 1.7585022721620394, "learning_rate": 8.943087177835602e-07, "loss": 0.51, "step": 26509 }, { "epoch": 0.8124923378693147, "grad_norm": 2.186849940184145, "learning_rate": 8.940254748657317e-07, "loss": 0.597, "step": 26510 }, { "epoch": 0.8125229863920559, "grad_norm": 0.7830577455961969, "learning_rate": 8.937422724051193e-07, "loss": 0.3838, "step": 26511 }, { "epoch": 0.8125536349147972, "grad_norm": 1.9560521111232492, "learning_rate": 8.934591104045154e-07, "loss": 0.6525, "step": 26512 }, { "epoch": 0.8125842834375383, "grad_norm": 2.158465658480585, "learning_rate": 8.931759888667096e-07, "loss": 0.5755, "step": 26513 }, { "epoch": 0.8126149319602796, "grad_norm": 0.7745351283846718, "learning_rate": 8.928929077944925e-07, "loss": 0.3996, "step": 26514 }, { "epoch": 0.8126455804830207, "grad_norm": 1.7209981737246653, "learning_rate": 8.926098671906514e-07, "loss": 0.4976, "step": 26515 }, { "epoch": 0.812676229005762, "grad_norm": 2.015154919196892, "learning_rate": 8.92326867057976e-07, "loss": 0.6463, "step": 26516 }, { "epoch": 0.8127068775285031, "grad_norm": 1.8408217602694985, "learning_rate": 8.920439073992565e-07, "loss": 0.5687, "step": 26517 }, { "epoch": 0.8127375260512444, "grad_norm": 0.762163865264814, "learning_rate": 8.91760988217279e-07, "loss": 0.4068, "step": 26518 }, { "epoch": 0.8127681745739855, "grad_norm": 1.7535516093093568, "learning_rate": 8.914781095148294e-07, "loss": 0.5687, "step": 26519 }, { "epoch": 0.8127988230967267, "grad_norm": 1.7206615461680987, "learning_rate": 8.911952712946997e-07, "loss": 0.5542, "step": 26520 }, { "epoch": 0.812829471619468, "grad_norm": 1.757189611438309, "learning_rate": 8.909124735596741e-07, "loss": 0.5385, "step": 26521 }, { "epoch": 0.8128601201422091, "grad_norm": 1.7209854344296622, "learning_rate": 8.906297163125382e-07, "loss": 0.5331, "step": 26522 }, { "epoch": 0.8128907686649504, "grad_norm": 1.726609174801975, "learning_rate": 8.903469995560792e-07, "loss": 0.5444, "step": 26523 }, { "epoch": 0.8129214171876915, "grad_norm": 1.919921547787409, "learning_rate": 8.900643232930827e-07, "loss": 0.6438, "step": 26524 }, { "epoch": 0.8129520657104328, "grad_norm": 1.9412833737899144, "learning_rate": 8.897816875263348e-07, "loss": 0.5456, "step": 26525 }, { "epoch": 0.8129827142331739, "grad_norm": 1.7393923286511976, "learning_rate": 8.894990922586189e-07, "loss": 0.6221, "step": 26526 }, { "epoch": 0.8130133627559152, "grad_norm": 1.6130794788375875, "learning_rate": 8.892165374927198e-07, "loss": 0.5457, "step": 26527 }, { "epoch": 0.8130440112786563, "grad_norm": 1.840113943335541, "learning_rate": 8.889340232314236e-07, "loss": 0.5288, "step": 26528 }, { "epoch": 0.8130746598013976, "grad_norm": 0.809292794811565, "learning_rate": 8.886515494775122e-07, "loss": 0.3978, "step": 26529 }, { "epoch": 0.8131053083241387, "grad_norm": 2.0369448289890983, "learning_rate": 8.88369116233766e-07, "loss": 0.5705, "step": 26530 }, { "epoch": 0.81313595684688, "grad_norm": 1.948787834333352, "learning_rate": 8.880867235029739e-07, "loss": 0.5549, "step": 26531 }, { "epoch": 0.8131666053696212, "grad_norm": 0.8259099999843044, "learning_rate": 8.878043712879142e-07, "loss": 0.4055, "step": 26532 }, { "epoch": 0.8131972538923624, "grad_norm": 1.6638296386242668, "learning_rate": 8.875220595913714e-07, "loss": 0.6059, "step": 26533 }, { "epoch": 0.8132279024151036, "grad_norm": 1.7650501794130204, "learning_rate": 8.872397884161244e-07, "loss": 0.5498, "step": 26534 }, { "epoch": 0.8132585509378448, "grad_norm": 1.7527572619584435, "learning_rate": 8.869575577649564e-07, "loss": 0.5784, "step": 26535 }, { "epoch": 0.813289199460586, "grad_norm": 1.763989697597091, "learning_rate": 8.866753676406486e-07, "loss": 0.4626, "step": 26536 }, { "epoch": 0.8133198479833272, "grad_norm": 1.8221231263962463, "learning_rate": 8.8639321804598e-07, "loss": 0.6267, "step": 26537 }, { "epoch": 0.8133504965060684, "grad_norm": 1.7151716622375597, "learning_rate": 8.861111089837315e-07, "loss": 0.4799, "step": 26538 }, { "epoch": 0.8133811450288096, "grad_norm": 1.7693551585102656, "learning_rate": 8.858290404566844e-07, "loss": 0.5041, "step": 26539 }, { "epoch": 0.8134117935515508, "grad_norm": 1.8098390072148707, "learning_rate": 8.855470124676152e-07, "loss": 0.4956, "step": 26540 }, { "epoch": 0.8134424420742921, "grad_norm": 1.7067273384987163, "learning_rate": 8.852650250193045e-07, "loss": 0.4943, "step": 26541 }, { "epoch": 0.8134730905970332, "grad_norm": 1.8816504525156963, "learning_rate": 8.849830781145297e-07, "loss": 0.5526, "step": 26542 }, { "epoch": 0.8135037391197745, "grad_norm": 1.9479439966905445, "learning_rate": 8.847011717560694e-07, "loss": 0.615, "step": 26543 }, { "epoch": 0.8135343876425156, "grad_norm": 1.9367369856809802, "learning_rate": 8.844193059467027e-07, "loss": 0.4829, "step": 26544 }, { "epoch": 0.8135650361652569, "grad_norm": 2.056680081051303, "learning_rate": 8.841374806892039e-07, "loss": 0.613, "step": 26545 }, { "epoch": 0.813595684687998, "grad_norm": 0.8067597937664143, "learning_rate": 8.83855695986352e-07, "loss": 0.423, "step": 26546 }, { "epoch": 0.8136263332107393, "grad_norm": 1.9351365477163203, "learning_rate": 8.835739518409242e-07, "loss": 0.6029, "step": 26547 }, { "epoch": 0.8136569817334804, "grad_norm": 0.780605084802672, "learning_rate": 8.832922482556961e-07, "loss": 0.3915, "step": 26548 }, { "epoch": 0.8136876302562217, "grad_norm": 0.8034712788005591, "learning_rate": 8.830105852334392e-07, "loss": 0.4147, "step": 26549 }, { "epoch": 0.8137182787789629, "grad_norm": 1.9314347717270925, "learning_rate": 8.827289627769358e-07, "loss": 0.6053, "step": 26550 }, { "epoch": 0.813748927301704, "grad_norm": 1.8910468315139852, "learning_rate": 8.824473808889555e-07, "loss": 0.6205, "step": 26551 }, { "epoch": 0.8137795758244453, "grad_norm": 1.9674391510743425, "learning_rate": 8.82165839572276e-07, "loss": 0.6078, "step": 26552 }, { "epoch": 0.8138102243471864, "grad_norm": 1.8478834227252021, "learning_rate": 8.818843388296694e-07, "loss": 0.5997, "step": 26553 }, { "epoch": 0.8138408728699277, "grad_norm": 1.8477656630136587, "learning_rate": 8.816028786639097e-07, "loss": 0.6106, "step": 26554 }, { "epoch": 0.8138715213926688, "grad_norm": 1.8059865651350702, "learning_rate": 8.813214590777713e-07, "loss": 0.6584, "step": 26555 }, { "epoch": 0.8139021699154101, "grad_norm": 2.030772742406324, "learning_rate": 8.81040080074026e-07, "loss": 0.6348, "step": 26556 }, { "epoch": 0.8139328184381512, "grad_norm": 1.7825765271637897, "learning_rate": 8.807587416554464e-07, "loss": 0.5615, "step": 26557 }, { "epoch": 0.8139634669608925, "grad_norm": 2.243687882365001, "learning_rate": 8.80477443824806e-07, "loss": 0.6416, "step": 26558 }, { "epoch": 0.8139941154836337, "grad_norm": 2.371235578015242, "learning_rate": 8.80196186584874e-07, "loss": 0.6051, "step": 26559 }, { "epoch": 0.8140247640063749, "grad_norm": 1.808606079031358, "learning_rate": 8.79914969938423e-07, "loss": 0.6168, "step": 26560 }, { "epoch": 0.8140554125291161, "grad_norm": 1.9184416062811394, "learning_rate": 8.796337938882254e-07, "loss": 0.5496, "step": 26561 }, { "epoch": 0.8140860610518573, "grad_norm": 1.7748993378075912, "learning_rate": 8.793526584370493e-07, "loss": 0.5636, "step": 26562 }, { "epoch": 0.8141167095745985, "grad_norm": 1.9455530880047245, "learning_rate": 8.790715635876667e-07, "loss": 0.6285, "step": 26563 }, { "epoch": 0.8141473580973397, "grad_norm": 0.788117360021848, "learning_rate": 8.78790509342845e-07, "loss": 0.3894, "step": 26564 }, { "epoch": 0.8141780066200809, "grad_norm": 1.8511633392698963, "learning_rate": 8.785094957053552e-07, "loss": 0.5267, "step": 26565 }, { "epoch": 0.8142086551428221, "grad_norm": 2.2100913535197653, "learning_rate": 8.782285226779669e-07, "loss": 0.6407, "step": 26566 }, { "epoch": 0.8142393036655633, "grad_norm": 1.8728733784292284, "learning_rate": 8.779475902634466e-07, "loss": 0.6313, "step": 26567 }, { "epoch": 0.8142699521883046, "grad_norm": 0.8047512057743897, "learning_rate": 8.776666984645632e-07, "loss": 0.396, "step": 26568 }, { "epoch": 0.8143006007110457, "grad_norm": 1.8901396309044531, "learning_rate": 8.773858472840857e-07, "loss": 0.5749, "step": 26569 }, { "epoch": 0.814331249233787, "grad_norm": 1.8810082183900751, "learning_rate": 8.771050367247791e-07, "loss": 0.5847, "step": 26570 }, { "epoch": 0.8143618977565281, "grad_norm": 2.014701011992346, "learning_rate": 8.768242667894112e-07, "loss": 0.5286, "step": 26571 }, { "epoch": 0.8143925462792694, "grad_norm": 2.0900621020310117, "learning_rate": 8.765435374807501e-07, "loss": 0.6418, "step": 26572 }, { "epoch": 0.8144231948020105, "grad_norm": 0.7922627472413012, "learning_rate": 8.762628488015596e-07, "loss": 0.4351, "step": 26573 }, { "epoch": 0.8144538433247518, "grad_norm": 1.9042522838293217, "learning_rate": 8.759822007546076e-07, "loss": 0.5351, "step": 26574 }, { "epoch": 0.8144844918474929, "grad_norm": 1.876603024364398, "learning_rate": 8.757015933426566e-07, "loss": 0.5719, "step": 26575 }, { "epoch": 0.8145151403702342, "grad_norm": 0.7918835757661257, "learning_rate": 8.754210265684732e-07, "loss": 0.3851, "step": 26576 }, { "epoch": 0.8145457888929754, "grad_norm": 2.013969349363403, "learning_rate": 8.751405004348229e-07, "loss": 0.6073, "step": 26577 }, { "epoch": 0.8145764374157166, "grad_norm": 1.9846279755358707, "learning_rate": 8.748600149444674e-07, "loss": 0.4878, "step": 26578 }, { "epoch": 0.8146070859384578, "grad_norm": 1.8747234728764777, "learning_rate": 8.745795701001719e-07, "loss": 0.5693, "step": 26579 }, { "epoch": 0.814637734461199, "grad_norm": 2.0091566519914896, "learning_rate": 8.742991659047006e-07, "loss": 0.5035, "step": 26580 }, { "epoch": 0.8146683829839402, "grad_norm": 1.822786573103614, "learning_rate": 8.740188023608137e-07, "loss": 0.5775, "step": 26581 }, { "epoch": 0.8146990315066813, "grad_norm": 0.8121495350380058, "learning_rate": 8.737384794712755e-07, "loss": 0.3871, "step": 26582 }, { "epoch": 0.8147296800294226, "grad_norm": 0.8359572586960695, "learning_rate": 8.73458197238849e-07, "loss": 0.3952, "step": 26583 }, { "epoch": 0.8147603285521637, "grad_norm": 1.7222873980946396, "learning_rate": 8.731779556662934e-07, "loss": 0.6577, "step": 26584 }, { "epoch": 0.814790977074905, "grad_norm": 1.7074648502885919, "learning_rate": 8.728977547563727e-07, "loss": 0.5061, "step": 26585 }, { "epoch": 0.8148216255976461, "grad_norm": 0.7971964649849852, "learning_rate": 8.726175945118449e-07, "loss": 0.3974, "step": 26586 }, { "epoch": 0.8148522741203874, "grad_norm": 1.7822876561790397, "learning_rate": 8.723374749354719e-07, "loss": 0.5238, "step": 26587 }, { "epoch": 0.8148829226431286, "grad_norm": 1.7064816305601227, "learning_rate": 8.720573960300155e-07, "loss": 0.4786, "step": 26588 }, { "epoch": 0.8149135711658698, "grad_norm": 1.941494606852312, "learning_rate": 8.717773577982325e-07, "loss": 0.5956, "step": 26589 }, { "epoch": 0.814944219688611, "grad_norm": 1.8312682416315127, "learning_rate": 8.714973602428828e-07, "loss": 0.5821, "step": 26590 }, { "epoch": 0.8149748682113522, "grad_norm": 1.8412463913811572, "learning_rate": 8.712174033667281e-07, "loss": 0.5816, "step": 26591 }, { "epoch": 0.8150055167340934, "grad_norm": 0.7922266264285495, "learning_rate": 8.70937487172523e-07, "loss": 0.3775, "step": 26592 }, { "epoch": 0.8150361652568346, "grad_norm": 1.5745773413136328, "learning_rate": 8.706576116630283e-07, "loss": 0.5056, "step": 26593 }, { "epoch": 0.8150668137795758, "grad_norm": 1.8304334047044508, "learning_rate": 8.703777768409999e-07, "loss": 0.5167, "step": 26594 }, { "epoch": 0.815097462302317, "grad_norm": 1.8277038579114735, "learning_rate": 8.700979827091954e-07, "loss": 0.6135, "step": 26595 }, { "epoch": 0.8151281108250582, "grad_norm": 1.7364009354437542, "learning_rate": 8.698182292703738e-07, "loss": 0.4876, "step": 26596 }, { "epoch": 0.8151587593477995, "grad_norm": 1.67257208046431, "learning_rate": 8.695385165272884e-07, "loss": 0.5798, "step": 26597 }, { "epoch": 0.8151894078705406, "grad_norm": 2.050487891744971, "learning_rate": 8.692588444826972e-07, "loss": 0.6631, "step": 26598 }, { "epoch": 0.8152200563932819, "grad_norm": 0.7984244247163753, "learning_rate": 8.689792131393566e-07, "loss": 0.4008, "step": 26599 }, { "epoch": 0.815250704916023, "grad_norm": 1.9275070814213564, "learning_rate": 8.686996225000194e-07, "loss": 0.5586, "step": 26600 }, { "epoch": 0.8152813534387643, "grad_norm": 1.8869470919841422, "learning_rate": 8.684200725674419e-07, "loss": 0.633, "step": 26601 }, { "epoch": 0.8153120019615054, "grad_norm": 1.6788378059254434, "learning_rate": 8.681405633443795e-07, "loss": 0.4888, "step": 26602 }, { "epoch": 0.8153426504842467, "grad_norm": 1.9630216509906946, "learning_rate": 8.678610948335847e-07, "loss": 0.6361, "step": 26603 }, { "epoch": 0.8153732990069879, "grad_norm": 1.8686909361042208, "learning_rate": 8.675816670378123e-07, "loss": 0.6086, "step": 26604 }, { "epoch": 0.8154039475297291, "grad_norm": 2.0199289426587868, "learning_rate": 8.67302279959813e-07, "loss": 0.5722, "step": 26605 }, { "epoch": 0.8154345960524703, "grad_norm": 1.9451738034735884, "learning_rate": 8.670229336023445e-07, "loss": 0.5669, "step": 26606 }, { "epoch": 0.8154652445752115, "grad_norm": 1.6622381675321631, "learning_rate": 8.667436279681563e-07, "loss": 0.5968, "step": 26607 }, { "epoch": 0.8154958930979527, "grad_norm": 2.3529656718603102, "learning_rate": 8.664643630599989e-07, "loss": 0.7075, "step": 26608 }, { "epoch": 0.8155265416206939, "grad_norm": 0.7641437074932086, "learning_rate": 8.661851388806264e-07, "loss": 0.38, "step": 26609 }, { "epoch": 0.8155571901434351, "grad_norm": 1.7458652455094983, "learning_rate": 8.659059554327904e-07, "loss": 0.5819, "step": 26610 }, { "epoch": 0.8155878386661763, "grad_norm": 1.9821641375884884, "learning_rate": 8.656268127192397e-07, "loss": 0.6608, "step": 26611 }, { "epoch": 0.8156184871889175, "grad_norm": 1.944149039888491, "learning_rate": 8.653477107427255e-07, "loss": 0.5729, "step": 26612 }, { "epoch": 0.8156491357116586, "grad_norm": 2.0973152581086447, "learning_rate": 8.650686495059984e-07, "loss": 0.687, "step": 26613 }, { "epoch": 0.8156797842343999, "grad_norm": 1.8928654912453768, "learning_rate": 8.64789629011809e-07, "loss": 0.5201, "step": 26614 }, { "epoch": 0.8157104327571411, "grad_norm": 1.7302370346571514, "learning_rate": 8.645106492629057e-07, "loss": 0.4959, "step": 26615 }, { "epoch": 0.8157410812798823, "grad_norm": 1.8568172102823648, "learning_rate": 8.642317102620346e-07, "loss": 0.566, "step": 26616 }, { "epoch": 0.8157717298026235, "grad_norm": 1.7197484682364903, "learning_rate": 8.639528120119489e-07, "loss": 0.5164, "step": 26617 }, { "epoch": 0.8158023783253647, "grad_norm": 1.923656609040762, "learning_rate": 8.636739545153944e-07, "loss": 0.5534, "step": 26618 }, { "epoch": 0.8158330268481059, "grad_norm": 0.8046668587131148, "learning_rate": 8.633951377751176e-07, "loss": 0.4142, "step": 26619 }, { "epoch": 0.8158636753708471, "grad_norm": 1.8072702172204407, "learning_rate": 8.631163617938665e-07, "loss": 0.5176, "step": 26620 }, { "epoch": 0.8158943238935883, "grad_norm": 1.8511846321994476, "learning_rate": 8.628376265743898e-07, "loss": 0.5316, "step": 26621 }, { "epoch": 0.8159249724163296, "grad_norm": 1.9324340531624886, "learning_rate": 8.625589321194317e-07, "loss": 0.5578, "step": 26622 }, { "epoch": 0.8159556209390707, "grad_norm": 1.863264324521101, "learning_rate": 8.622802784317385e-07, "loss": 0.5682, "step": 26623 }, { "epoch": 0.815986269461812, "grad_norm": 2.090704490250569, "learning_rate": 8.620016655140567e-07, "loss": 0.6042, "step": 26624 }, { "epoch": 0.8160169179845531, "grad_norm": 0.7990383836490678, "learning_rate": 8.617230933691329e-07, "loss": 0.3814, "step": 26625 }, { "epoch": 0.8160475665072944, "grad_norm": 1.8344705481155064, "learning_rate": 8.614445619997097e-07, "loss": 0.6456, "step": 26626 }, { "epoch": 0.8160782150300355, "grad_norm": 2.1696660357916357, "learning_rate": 8.611660714085296e-07, "loss": 0.5484, "step": 26627 }, { "epoch": 0.8161088635527768, "grad_norm": 1.9361690967142764, "learning_rate": 8.608876215983419e-07, "loss": 0.6616, "step": 26628 }, { "epoch": 0.8161395120755179, "grad_norm": 1.9203518351528803, "learning_rate": 8.606092125718873e-07, "loss": 0.5492, "step": 26629 }, { "epoch": 0.8161701605982592, "grad_norm": 1.7897678742118088, "learning_rate": 8.603308443319081e-07, "loss": 0.6627, "step": 26630 }, { "epoch": 0.8162008091210003, "grad_norm": 1.9088966220239427, "learning_rate": 8.600525168811485e-07, "loss": 0.5384, "step": 26631 }, { "epoch": 0.8162314576437416, "grad_norm": 0.7577930408475277, "learning_rate": 8.597742302223505e-07, "loss": 0.373, "step": 26632 }, { "epoch": 0.8162621061664828, "grad_norm": 1.920432153207324, "learning_rate": 8.594959843582573e-07, "loss": 0.5383, "step": 26633 }, { "epoch": 0.816292754689224, "grad_norm": 2.10629663419144, "learning_rate": 8.592177792916084e-07, "loss": 0.5705, "step": 26634 }, { "epoch": 0.8163234032119652, "grad_norm": 1.7481022037815672, "learning_rate": 8.589396150251467e-07, "loss": 0.5141, "step": 26635 }, { "epoch": 0.8163540517347064, "grad_norm": 1.8495641409703938, "learning_rate": 8.586614915616131e-07, "loss": 0.6509, "step": 26636 }, { "epoch": 0.8163847002574476, "grad_norm": 1.6991360488608092, "learning_rate": 8.583834089037479e-07, "loss": 0.5851, "step": 26637 }, { "epoch": 0.8164153487801888, "grad_norm": 1.7540403766409096, "learning_rate": 8.581053670542894e-07, "loss": 0.466, "step": 26638 }, { "epoch": 0.81644599730293, "grad_norm": 1.9453368918735787, "learning_rate": 8.57827366015978e-07, "loss": 0.6104, "step": 26639 }, { "epoch": 0.8164766458256713, "grad_norm": 1.8651487391322596, "learning_rate": 8.57549405791554e-07, "loss": 0.5383, "step": 26640 }, { "epoch": 0.8165072943484124, "grad_norm": 2.026769289316973, "learning_rate": 8.572714863837567e-07, "loss": 0.5228, "step": 26641 }, { "epoch": 0.8165379428711537, "grad_norm": 1.9965606082924567, "learning_rate": 8.569936077953217e-07, "loss": 0.5443, "step": 26642 }, { "epoch": 0.8165685913938948, "grad_norm": 2.024475068501668, "learning_rate": 8.567157700289891e-07, "loss": 0.6675, "step": 26643 }, { "epoch": 0.816599239916636, "grad_norm": 1.7799071731769691, "learning_rate": 8.564379730874972e-07, "loss": 0.6198, "step": 26644 }, { "epoch": 0.8166298884393772, "grad_norm": 1.7646405660472675, "learning_rate": 8.561602169735822e-07, "loss": 0.6084, "step": 26645 }, { "epoch": 0.8166605369621184, "grad_norm": 0.7864286408960919, "learning_rate": 8.558825016899785e-07, "loss": 0.3979, "step": 26646 }, { "epoch": 0.8166911854848596, "grad_norm": 1.7797476236433116, "learning_rate": 8.556048272394274e-07, "loss": 0.5778, "step": 26647 }, { "epoch": 0.8167218340076008, "grad_norm": 1.7682671563652046, "learning_rate": 8.553271936246621e-07, "loss": 0.5863, "step": 26648 }, { "epoch": 0.816752482530342, "grad_norm": 1.732314913716593, "learning_rate": 8.550496008484171e-07, "loss": 0.5017, "step": 26649 }, { "epoch": 0.8167831310530832, "grad_norm": 1.594650111103803, "learning_rate": 8.547720489134287e-07, "loss": 0.4448, "step": 26650 }, { "epoch": 0.8168137795758245, "grad_norm": 1.9963256523019524, "learning_rate": 8.544945378224323e-07, "loss": 0.6291, "step": 26651 }, { "epoch": 0.8168444280985656, "grad_norm": 1.9695106106411933, "learning_rate": 8.542170675781631e-07, "loss": 0.5284, "step": 26652 }, { "epoch": 0.8168750766213069, "grad_norm": 0.7698638940215241, "learning_rate": 8.539396381833526e-07, "loss": 0.3982, "step": 26653 }, { "epoch": 0.816905725144048, "grad_norm": 0.8059463933739092, "learning_rate": 8.536622496407354e-07, "loss": 0.4137, "step": 26654 }, { "epoch": 0.8169363736667893, "grad_norm": 0.8000233727304206, "learning_rate": 8.533849019530466e-07, "loss": 0.4081, "step": 26655 }, { "epoch": 0.8169670221895304, "grad_norm": 1.803550195644055, "learning_rate": 8.531075951230172e-07, "loss": 0.6483, "step": 26656 }, { "epoch": 0.8169976707122717, "grad_norm": 1.8154228065192723, "learning_rate": 8.528303291533774e-07, "loss": 0.5851, "step": 26657 }, { "epoch": 0.8170283192350128, "grad_norm": 0.7866460465819936, "learning_rate": 8.525531040468632e-07, "loss": 0.3729, "step": 26658 }, { "epoch": 0.8170589677577541, "grad_norm": 1.8631094387285008, "learning_rate": 8.522759198062036e-07, "loss": 0.6368, "step": 26659 }, { "epoch": 0.8170896162804953, "grad_norm": 1.7095657945432623, "learning_rate": 8.51998776434132e-07, "loss": 0.5808, "step": 26660 }, { "epoch": 0.8171202648032365, "grad_norm": 1.9008967990660826, "learning_rate": 8.517216739333767e-07, "loss": 0.5446, "step": 26661 }, { "epoch": 0.8171509133259777, "grad_norm": 0.8239723719002295, "learning_rate": 8.514446123066689e-07, "loss": 0.4096, "step": 26662 }, { "epoch": 0.8171815618487189, "grad_norm": 1.8474122806122573, "learning_rate": 8.511675915567402e-07, "loss": 0.5821, "step": 26663 }, { "epoch": 0.8172122103714601, "grad_norm": 1.729175689928521, "learning_rate": 8.508906116863169e-07, "loss": 0.5096, "step": 26664 }, { "epoch": 0.8172428588942013, "grad_norm": 1.9755580991163706, "learning_rate": 8.506136726981307e-07, "loss": 0.5556, "step": 26665 }, { "epoch": 0.8172735074169425, "grad_norm": 2.48976422910093, "learning_rate": 8.503367745949103e-07, "loss": 0.5032, "step": 26666 }, { "epoch": 0.8173041559396838, "grad_norm": 1.6897206910657316, "learning_rate": 8.500599173793828e-07, "loss": 0.5079, "step": 26667 }, { "epoch": 0.8173348044624249, "grad_norm": 1.8354920579038578, "learning_rate": 8.497831010542762e-07, "loss": 0.6001, "step": 26668 }, { "epoch": 0.8173654529851662, "grad_norm": 1.7468793942818994, "learning_rate": 8.495063256223201e-07, "loss": 0.5606, "step": 26669 }, { "epoch": 0.8173961015079073, "grad_norm": 1.7814642704669408, "learning_rate": 8.492295910862386e-07, "loss": 0.5737, "step": 26670 }, { "epoch": 0.8174267500306486, "grad_norm": 1.7974264481948452, "learning_rate": 8.489528974487615e-07, "loss": 0.5146, "step": 26671 }, { "epoch": 0.8174573985533897, "grad_norm": 1.8999838357943553, "learning_rate": 8.486762447126123e-07, "loss": 0.6354, "step": 26672 }, { "epoch": 0.817488047076131, "grad_norm": 1.7452930221426, "learning_rate": 8.483996328805183e-07, "loss": 0.5527, "step": 26673 }, { "epoch": 0.8175186955988721, "grad_norm": 0.7810201966017759, "learning_rate": 8.481230619552061e-07, "loss": 0.4192, "step": 26674 }, { "epoch": 0.8175493441216133, "grad_norm": 1.9670617986199006, "learning_rate": 8.478465319393986e-07, "loss": 0.5054, "step": 26675 }, { "epoch": 0.8175799926443545, "grad_norm": 1.7662240655636576, "learning_rate": 8.475700428358213e-07, "loss": 0.5342, "step": 26676 }, { "epoch": 0.8176106411670957, "grad_norm": 1.938701917891031, "learning_rate": 8.472935946472e-07, "loss": 0.6362, "step": 26677 }, { "epoch": 0.817641289689837, "grad_norm": 1.9789453464166797, "learning_rate": 8.470171873762561e-07, "loss": 0.6451, "step": 26678 }, { "epoch": 0.8176719382125781, "grad_norm": 1.7160970031792764, "learning_rate": 8.46740821025715e-07, "loss": 0.4999, "step": 26679 }, { "epoch": 0.8177025867353194, "grad_norm": 1.9131317058155275, "learning_rate": 8.464644955983004e-07, "loss": 0.627, "step": 26680 }, { "epoch": 0.8177332352580605, "grad_norm": 1.952675342058686, "learning_rate": 8.461882110967323e-07, "loss": 0.5014, "step": 26681 }, { "epoch": 0.8177638837808018, "grad_norm": 1.8251944206296682, "learning_rate": 8.459119675237354e-07, "loss": 0.5122, "step": 26682 }, { "epoch": 0.8177945323035429, "grad_norm": 1.7683258581692498, "learning_rate": 8.456357648820302e-07, "loss": 0.551, "step": 26683 }, { "epoch": 0.8178251808262842, "grad_norm": 2.4453897368572504, "learning_rate": 8.453596031743388e-07, "loss": 0.5576, "step": 26684 }, { "epoch": 0.8178558293490253, "grad_norm": 2.029143652898224, "learning_rate": 8.450834824033832e-07, "loss": 0.6573, "step": 26685 }, { "epoch": 0.8178864778717666, "grad_norm": 1.7798335497523834, "learning_rate": 8.448074025718816e-07, "loss": 0.6214, "step": 26686 }, { "epoch": 0.8179171263945078, "grad_norm": 0.8075122601553669, "learning_rate": 8.445313636825564e-07, "loss": 0.3895, "step": 26687 }, { "epoch": 0.817947774917249, "grad_norm": 0.8325934420446706, "learning_rate": 8.442553657381275e-07, "loss": 0.4056, "step": 26688 }, { "epoch": 0.8179784234399902, "grad_norm": 1.8422369982212616, "learning_rate": 8.439794087413133e-07, "loss": 0.5249, "step": 26689 }, { "epoch": 0.8180090719627314, "grad_norm": 1.9145669920941515, "learning_rate": 8.43703492694834e-07, "loss": 0.5214, "step": 26690 }, { "epoch": 0.8180397204854726, "grad_norm": 2.062975903693594, "learning_rate": 8.434276176014067e-07, "loss": 0.6706, "step": 26691 }, { "epoch": 0.8180703690082138, "grad_norm": 1.9098267341110131, "learning_rate": 8.431517834637504e-07, "loss": 0.583, "step": 26692 }, { "epoch": 0.818101017530955, "grad_norm": 2.1124820860539124, "learning_rate": 8.428759902845846e-07, "loss": 0.5926, "step": 26693 }, { "epoch": 0.8181316660536962, "grad_norm": 1.9535714290182369, "learning_rate": 8.426002380666237e-07, "loss": 0.5314, "step": 26694 }, { "epoch": 0.8181623145764374, "grad_norm": 2.0151375274757135, "learning_rate": 8.423245268125862e-07, "loss": 0.6599, "step": 26695 }, { "epoch": 0.8181929630991787, "grad_norm": 1.7469554831676775, "learning_rate": 8.420488565251911e-07, "loss": 0.5964, "step": 26696 }, { "epoch": 0.8182236116219198, "grad_norm": 1.6371175260174957, "learning_rate": 8.417732272071505e-07, "loss": 0.5191, "step": 26697 }, { "epoch": 0.8182542601446611, "grad_norm": 1.8723996464956039, "learning_rate": 8.414976388611823e-07, "loss": 0.6378, "step": 26698 }, { "epoch": 0.8182849086674022, "grad_norm": 1.6340591763630972, "learning_rate": 8.412220914900032e-07, "loss": 0.5813, "step": 26699 }, { "epoch": 0.8183155571901435, "grad_norm": 1.8258177279425782, "learning_rate": 8.409465850963255e-07, "loss": 0.5834, "step": 26700 }, { "epoch": 0.8183462057128846, "grad_norm": 1.8409364102723285, "learning_rate": 8.406711196828671e-07, "loss": 0.5684, "step": 26701 }, { "epoch": 0.8183768542356259, "grad_norm": 1.750931252659344, "learning_rate": 8.403956952523384e-07, "loss": 0.5197, "step": 26702 }, { "epoch": 0.818407502758367, "grad_norm": 1.8245439199482394, "learning_rate": 8.401203118074558e-07, "loss": 0.567, "step": 26703 }, { "epoch": 0.8184381512811083, "grad_norm": 2.268865105077229, "learning_rate": 8.398449693509325e-07, "loss": 0.5937, "step": 26704 }, { "epoch": 0.8184687998038495, "grad_norm": 1.7978044936431377, "learning_rate": 8.395696678854809e-07, "loss": 0.4472, "step": 26705 }, { "epoch": 0.8184994483265906, "grad_norm": 1.6562823344586788, "learning_rate": 8.392944074138132e-07, "loss": 0.5303, "step": 26706 }, { "epoch": 0.8185300968493319, "grad_norm": 2.05955930060119, "learning_rate": 8.390191879386439e-07, "loss": 0.546, "step": 26707 }, { "epoch": 0.818560745372073, "grad_norm": 2.153264393844235, "learning_rate": 8.387440094626815e-07, "loss": 0.592, "step": 26708 }, { "epoch": 0.8185913938948143, "grad_norm": 1.6270573836816247, "learning_rate": 8.384688719886391e-07, "loss": 0.5547, "step": 26709 }, { "epoch": 0.8186220424175554, "grad_norm": 1.89767298617084, "learning_rate": 8.381937755192293e-07, "loss": 0.5303, "step": 26710 }, { "epoch": 0.8186526909402967, "grad_norm": 1.8559305345816641, "learning_rate": 8.379187200571598e-07, "loss": 0.5552, "step": 26711 }, { "epoch": 0.8186833394630378, "grad_norm": 1.7169568160325308, "learning_rate": 8.376437056051429e-07, "loss": 0.4882, "step": 26712 }, { "epoch": 0.8187139879857791, "grad_norm": 0.8018944313732368, "learning_rate": 8.373687321658853e-07, "loss": 0.3815, "step": 26713 }, { "epoch": 0.8187446365085203, "grad_norm": 2.089152009974782, "learning_rate": 8.370937997421014e-07, "loss": 0.6183, "step": 26714 }, { "epoch": 0.8187752850312615, "grad_norm": 0.7730109224197056, "learning_rate": 8.368189083364969e-07, "loss": 0.3798, "step": 26715 }, { "epoch": 0.8188059335540027, "grad_norm": 1.9844247261109582, "learning_rate": 8.365440579517803e-07, "loss": 0.6528, "step": 26716 }, { "epoch": 0.8188365820767439, "grad_norm": 0.7679134142381215, "learning_rate": 8.362692485906599e-07, "loss": 0.4149, "step": 26717 }, { "epoch": 0.8188672305994851, "grad_norm": 1.828566717386852, "learning_rate": 8.35994480255845e-07, "loss": 0.5547, "step": 26718 }, { "epoch": 0.8188978791222263, "grad_norm": 0.8344417066549572, "learning_rate": 8.35719752950041e-07, "loss": 0.3973, "step": 26719 }, { "epoch": 0.8189285276449675, "grad_norm": 1.9222084418996386, "learning_rate": 8.354450666759556e-07, "loss": 0.5963, "step": 26720 }, { "epoch": 0.8189591761677087, "grad_norm": 1.773752744990228, "learning_rate": 8.351704214362955e-07, "loss": 0.5894, "step": 26721 }, { "epoch": 0.8189898246904499, "grad_norm": 1.9355170938304906, "learning_rate": 8.348958172337684e-07, "loss": 0.6201, "step": 26722 }, { "epoch": 0.8190204732131912, "grad_norm": 0.7671000962123068, "learning_rate": 8.34621254071078e-07, "loss": 0.4033, "step": 26723 }, { "epoch": 0.8190511217359323, "grad_norm": 1.9052136138896487, "learning_rate": 8.34346731950928e-07, "loss": 0.5108, "step": 26724 }, { "epoch": 0.8190817702586736, "grad_norm": 0.8341539326597527, "learning_rate": 8.34072250876028e-07, "loss": 0.4029, "step": 26725 }, { "epoch": 0.8191124187814147, "grad_norm": 1.7863861476868006, "learning_rate": 8.337978108490797e-07, "loss": 0.544, "step": 26726 }, { "epoch": 0.819143067304156, "grad_norm": 1.974640815368191, "learning_rate": 8.335234118727864e-07, "loss": 0.5169, "step": 26727 }, { "epoch": 0.8191737158268971, "grad_norm": 0.7596912932758496, "learning_rate": 8.332490539498533e-07, "loss": 0.3977, "step": 26728 }, { "epoch": 0.8192043643496384, "grad_norm": 1.7925429724644808, "learning_rate": 8.329747370829844e-07, "loss": 0.5605, "step": 26729 }, { "epoch": 0.8192350128723795, "grad_norm": 1.6893538714449776, "learning_rate": 8.327004612748802e-07, "loss": 0.5544, "step": 26730 }, { "epoch": 0.8192656613951208, "grad_norm": 2.1201574450618543, "learning_rate": 8.324262265282446e-07, "loss": 0.5366, "step": 26731 }, { "epoch": 0.819296309917862, "grad_norm": 1.7919578836158088, "learning_rate": 8.3215203284578e-07, "loss": 0.5641, "step": 26732 }, { "epoch": 0.8193269584406032, "grad_norm": 1.8790018010534575, "learning_rate": 8.318778802301891e-07, "loss": 0.5851, "step": 26733 }, { "epoch": 0.8193576069633444, "grad_norm": 1.9965441661197467, "learning_rate": 8.316037686841716e-07, "loss": 0.5151, "step": 26734 }, { "epoch": 0.8193882554860856, "grad_norm": 1.9438077464410968, "learning_rate": 8.313296982104274e-07, "loss": 0.6057, "step": 26735 }, { "epoch": 0.8194189040088268, "grad_norm": 0.7657630571831957, "learning_rate": 8.310556688116583e-07, "loss": 0.3844, "step": 26736 }, { "epoch": 0.8194495525315679, "grad_norm": 0.8027519143686455, "learning_rate": 8.307816804905661e-07, "loss": 0.4048, "step": 26737 }, { "epoch": 0.8194802010543092, "grad_norm": 1.8414485119286692, "learning_rate": 8.305077332498468e-07, "loss": 0.5109, "step": 26738 }, { "epoch": 0.8195108495770503, "grad_norm": 1.7564157441024273, "learning_rate": 8.302338270922022e-07, "loss": 0.5887, "step": 26739 }, { "epoch": 0.8195414980997916, "grad_norm": 1.8872685056940586, "learning_rate": 8.299599620203303e-07, "loss": 0.5584, "step": 26740 }, { "epoch": 0.8195721466225327, "grad_norm": 1.7068634548971988, "learning_rate": 8.296861380369308e-07, "loss": 0.629, "step": 26741 }, { "epoch": 0.819602795145274, "grad_norm": 1.745252307458341, "learning_rate": 8.294123551447014e-07, "loss": 0.5583, "step": 26742 }, { "epoch": 0.8196334436680152, "grad_norm": 1.9806372698750172, "learning_rate": 8.291386133463363e-07, "loss": 0.5324, "step": 26743 }, { "epoch": 0.8196640921907564, "grad_norm": 2.2201762869095956, "learning_rate": 8.288649126445381e-07, "loss": 0.5366, "step": 26744 }, { "epoch": 0.8196947407134976, "grad_norm": 1.8485362070916371, "learning_rate": 8.285912530420015e-07, "loss": 0.6075, "step": 26745 }, { "epoch": 0.8197253892362388, "grad_norm": 1.7442101325196053, "learning_rate": 8.283176345414212e-07, "loss": 0.5822, "step": 26746 }, { "epoch": 0.81975603775898, "grad_norm": 0.8003681330624549, "learning_rate": 8.280440571454945e-07, "loss": 0.4002, "step": 26747 }, { "epoch": 0.8197866862817212, "grad_norm": 1.7958579448806726, "learning_rate": 8.277705208569181e-07, "loss": 0.4819, "step": 26748 }, { "epoch": 0.8198173348044624, "grad_norm": 0.7820914256962461, "learning_rate": 8.274970256783854e-07, "loss": 0.3816, "step": 26749 }, { "epoch": 0.8198479833272037, "grad_norm": 1.7472986377833533, "learning_rate": 8.272235716125921e-07, "loss": 0.5517, "step": 26750 }, { "epoch": 0.8198786318499448, "grad_norm": 1.9249019963602192, "learning_rate": 8.269501586622325e-07, "loss": 0.5807, "step": 26751 }, { "epoch": 0.8199092803726861, "grad_norm": 1.5975701429215652, "learning_rate": 8.266767868300019e-07, "loss": 0.5475, "step": 26752 }, { "epoch": 0.8199399288954272, "grad_norm": 3.1623165677981055, "learning_rate": 8.264034561185924e-07, "loss": 0.6022, "step": 26753 }, { "epoch": 0.8199705774181685, "grad_norm": 2.0372622501229833, "learning_rate": 8.261301665306959e-07, "loss": 0.6057, "step": 26754 }, { "epoch": 0.8200012259409096, "grad_norm": 1.995940963202813, "learning_rate": 8.258569180690085e-07, "loss": 0.6533, "step": 26755 }, { "epoch": 0.8200318744636509, "grad_norm": 1.7347260798433213, "learning_rate": 8.255837107362213e-07, "loss": 0.5795, "step": 26756 }, { "epoch": 0.820062522986392, "grad_norm": 2.076935387572919, "learning_rate": 8.253105445350245e-07, "loss": 0.6366, "step": 26757 }, { "epoch": 0.8200931715091333, "grad_norm": 2.0170889073375244, "learning_rate": 8.25037419468111e-07, "loss": 0.5975, "step": 26758 }, { "epoch": 0.8201238200318745, "grad_norm": 1.8502941231716001, "learning_rate": 8.247643355381718e-07, "loss": 0.5254, "step": 26759 }, { "epoch": 0.8201544685546157, "grad_norm": 1.813215384025071, "learning_rate": 8.244912927478992e-07, "loss": 0.5777, "step": 26760 }, { "epoch": 0.8201851170773569, "grad_norm": 1.7136564109589645, "learning_rate": 8.242182910999813e-07, "loss": 0.5465, "step": 26761 }, { "epoch": 0.8202157656000981, "grad_norm": 1.9012223188061617, "learning_rate": 8.239453305971091e-07, "loss": 0.5245, "step": 26762 }, { "epoch": 0.8202464141228393, "grad_norm": 1.7286669800271637, "learning_rate": 8.236724112419731e-07, "loss": 0.6687, "step": 26763 }, { "epoch": 0.8202770626455805, "grad_norm": 1.9334465600293445, "learning_rate": 8.233995330372613e-07, "loss": 0.5784, "step": 26764 }, { "epoch": 0.8203077111683217, "grad_norm": 1.7773064095482802, "learning_rate": 8.231266959856599e-07, "loss": 0.5611, "step": 26765 }, { "epoch": 0.820338359691063, "grad_norm": 1.8601324280244345, "learning_rate": 8.228539000898627e-07, "loss": 0.6051, "step": 26766 }, { "epoch": 0.8203690082138041, "grad_norm": 2.0411955787733196, "learning_rate": 8.225811453525534e-07, "loss": 0.6034, "step": 26767 }, { "epoch": 0.8203996567365452, "grad_norm": 1.9459830227237074, "learning_rate": 8.223084317764219e-07, "loss": 0.5422, "step": 26768 }, { "epoch": 0.8204303052592865, "grad_norm": 1.91878999848975, "learning_rate": 8.220357593641532e-07, "loss": 0.6173, "step": 26769 }, { "epoch": 0.8204609537820277, "grad_norm": 1.6816162477563732, "learning_rate": 8.217631281184352e-07, "loss": 0.6435, "step": 26770 }, { "epoch": 0.8204916023047689, "grad_norm": 1.6768262439896517, "learning_rate": 8.214905380419557e-07, "loss": 0.5535, "step": 26771 }, { "epoch": 0.8205222508275101, "grad_norm": 1.9639468941615037, "learning_rate": 8.212179891373979e-07, "loss": 0.5933, "step": 26772 }, { "epoch": 0.8205528993502513, "grad_norm": 1.859934969440138, "learning_rate": 8.20945481407448e-07, "loss": 0.529, "step": 26773 }, { "epoch": 0.8205835478729925, "grad_norm": 1.903699582014121, "learning_rate": 8.206730148547926e-07, "loss": 0.5985, "step": 26774 }, { "epoch": 0.8206141963957337, "grad_norm": 1.800556521803119, "learning_rate": 8.204005894821154e-07, "loss": 0.4785, "step": 26775 }, { "epoch": 0.8206448449184749, "grad_norm": 2.1313628498845874, "learning_rate": 8.201282052920984e-07, "loss": 0.6696, "step": 26776 }, { "epoch": 0.8206754934412162, "grad_norm": 0.8290045596994083, "learning_rate": 8.198558622874303e-07, "loss": 0.4157, "step": 26777 }, { "epoch": 0.8207061419639573, "grad_norm": 1.847366575062571, "learning_rate": 8.195835604707903e-07, "loss": 0.5894, "step": 26778 }, { "epoch": 0.8207367904866986, "grad_norm": 1.6897999343036556, "learning_rate": 8.193112998448644e-07, "loss": 0.585, "step": 26779 }, { "epoch": 0.8207674390094397, "grad_norm": 1.7461062032752317, "learning_rate": 8.190390804123327e-07, "loss": 0.5574, "step": 26780 }, { "epoch": 0.820798087532181, "grad_norm": 2.276259083310716, "learning_rate": 8.187669021758788e-07, "loss": 0.5786, "step": 26781 }, { "epoch": 0.8208287360549221, "grad_norm": 1.9400858697419505, "learning_rate": 8.184947651381853e-07, "loss": 0.4588, "step": 26782 }, { "epoch": 0.8208593845776634, "grad_norm": 1.7769930737481792, "learning_rate": 8.182226693019318e-07, "loss": 0.5755, "step": 26783 }, { "epoch": 0.8208900331004045, "grad_norm": 1.9033869576730749, "learning_rate": 8.179506146698008e-07, "loss": 0.6212, "step": 26784 }, { "epoch": 0.8209206816231458, "grad_norm": 1.7922663973889725, "learning_rate": 8.176786012444727e-07, "loss": 0.5383, "step": 26785 }, { "epoch": 0.820951330145887, "grad_norm": 1.7904265618789983, "learning_rate": 8.17406629028627e-07, "loss": 0.5341, "step": 26786 }, { "epoch": 0.8209819786686282, "grad_norm": 2.046905229370849, "learning_rate": 8.171346980249445e-07, "loss": 0.5795, "step": 26787 }, { "epoch": 0.8210126271913694, "grad_norm": 2.0360597665169684, "learning_rate": 8.168628082361035e-07, "loss": 0.5629, "step": 26788 }, { "epoch": 0.8210432757141106, "grad_norm": 1.738323766581816, "learning_rate": 8.165909596647831e-07, "loss": 0.5582, "step": 26789 }, { "epoch": 0.8210739242368518, "grad_norm": 2.075429578883557, "learning_rate": 8.163191523136638e-07, "loss": 0.5148, "step": 26790 }, { "epoch": 0.821104572759593, "grad_norm": 1.925194142522436, "learning_rate": 8.160473861854206e-07, "loss": 0.4905, "step": 26791 }, { "epoch": 0.8211352212823342, "grad_norm": 1.910007724072935, "learning_rate": 8.157756612827334e-07, "loss": 0.5616, "step": 26792 }, { "epoch": 0.8211658698050754, "grad_norm": 1.8618612497314289, "learning_rate": 8.155039776082807e-07, "loss": 0.5908, "step": 26793 }, { "epoch": 0.8211965183278166, "grad_norm": 1.8937879247373468, "learning_rate": 8.152323351647362e-07, "loss": 0.5268, "step": 26794 }, { "epoch": 0.8212271668505579, "grad_norm": 1.7072357669311216, "learning_rate": 8.149607339547788e-07, "loss": 0.4349, "step": 26795 }, { "epoch": 0.821257815373299, "grad_norm": 1.9103027206527832, "learning_rate": 8.146891739810847e-07, "loss": 0.5469, "step": 26796 }, { "epoch": 0.8212884638960403, "grad_norm": 2.1156127326475076, "learning_rate": 8.144176552463285e-07, "loss": 0.6219, "step": 26797 }, { "epoch": 0.8213191124187814, "grad_norm": 1.8509093641782755, "learning_rate": 8.141461777531867e-07, "loss": 0.5274, "step": 26798 }, { "epoch": 0.8213497609415226, "grad_norm": 1.8508064547125798, "learning_rate": 8.138747415043324e-07, "loss": 0.5176, "step": 26799 }, { "epoch": 0.8213804094642638, "grad_norm": 1.8424731430197818, "learning_rate": 8.136033465024417e-07, "loss": 0.5032, "step": 26800 }, { "epoch": 0.821411057987005, "grad_norm": 1.8191058203853683, "learning_rate": 8.133319927501893e-07, "loss": 0.4764, "step": 26801 }, { "epoch": 0.8214417065097462, "grad_norm": 2.0524650757812344, "learning_rate": 8.130606802502467e-07, "loss": 0.5061, "step": 26802 }, { "epoch": 0.8214723550324874, "grad_norm": 1.7621188108086145, "learning_rate": 8.127894090052884e-07, "loss": 0.4932, "step": 26803 }, { "epoch": 0.8215030035552287, "grad_norm": 1.6807028974759437, "learning_rate": 8.125181790179892e-07, "loss": 0.5417, "step": 26804 }, { "epoch": 0.8215336520779698, "grad_norm": 1.806931279554893, "learning_rate": 8.12246990291018e-07, "loss": 0.5138, "step": 26805 }, { "epoch": 0.8215643006007111, "grad_norm": 1.8083027940169654, "learning_rate": 8.119758428270491e-07, "loss": 0.5193, "step": 26806 }, { "epoch": 0.8215949491234522, "grad_norm": 1.8291966623445661, "learning_rate": 8.117047366287545e-07, "loss": 0.5215, "step": 26807 }, { "epoch": 0.8216255976461935, "grad_norm": 1.9236759958911602, "learning_rate": 8.114336716988041e-07, "loss": 0.5967, "step": 26808 }, { "epoch": 0.8216562461689346, "grad_norm": 1.7765522422723692, "learning_rate": 8.111626480398705e-07, "loss": 0.4731, "step": 26809 }, { "epoch": 0.8216868946916759, "grad_norm": 0.7864492324755724, "learning_rate": 8.108916656546218e-07, "loss": 0.3934, "step": 26810 }, { "epoch": 0.821717543214417, "grad_norm": 1.777641493897877, "learning_rate": 8.106207245457293e-07, "loss": 0.566, "step": 26811 }, { "epoch": 0.8217481917371583, "grad_norm": 0.8535573933538726, "learning_rate": 8.103498247158636e-07, "loss": 0.3941, "step": 26812 }, { "epoch": 0.8217788402598994, "grad_norm": 2.007155641600989, "learning_rate": 8.100789661676922e-07, "loss": 0.5021, "step": 26813 }, { "epoch": 0.8218094887826407, "grad_norm": 1.9096743828995661, "learning_rate": 8.098081489038845e-07, "loss": 0.6304, "step": 26814 }, { "epoch": 0.8218401373053819, "grad_norm": 2.177888734475586, "learning_rate": 8.095373729271111e-07, "loss": 0.6201, "step": 26815 }, { "epoch": 0.8218707858281231, "grad_norm": 1.9829046906373484, "learning_rate": 8.09266638240036e-07, "loss": 0.557, "step": 26816 }, { "epoch": 0.8219014343508643, "grad_norm": 1.9452398459392795, "learning_rate": 8.0899594484533e-07, "loss": 0.5844, "step": 26817 }, { "epoch": 0.8219320828736055, "grad_norm": 1.8992749974354752, "learning_rate": 8.087252927456596e-07, "loss": 0.6174, "step": 26818 }, { "epoch": 0.8219627313963467, "grad_norm": 2.012008493258843, "learning_rate": 8.084546819436906e-07, "loss": 0.5572, "step": 26819 }, { "epoch": 0.8219933799190879, "grad_norm": 1.684307622647225, "learning_rate": 8.081841124420909e-07, "loss": 0.5786, "step": 26820 }, { "epoch": 0.8220240284418291, "grad_norm": 1.84615134762561, "learning_rate": 8.079135842435232e-07, "loss": 0.5394, "step": 26821 }, { "epoch": 0.8220546769645704, "grad_norm": 1.9742648179559388, "learning_rate": 8.076430973506583e-07, "loss": 0.5459, "step": 26822 }, { "epoch": 0.8220853254873115, "grad_norm": 1.9540235379100022, "learning_rate": 8.073726517661584e-07, "loss": 0.5596, "step": 26823 }, { "epoch": 0.8221159740100528, "grad_norm": 1.9442156389628402, "learning_rate": 8.071022474926876e-07, "loss": 0.6139, "step": 26824 }, { "epoch": 0.8221466225327939, "grad_norm": 0.7849868592408685, "learning_rate": 8.06831884532911e-07, "loss": 0.401, "step": 26825 }, { "epoch": 0.8221772710555352, "grad_norm": 1.8610562200201561, "learning_rate": 8.065615628894935e-07, "loss": 0.6004, "step": 26826 }, { "epoch": 0.8222079195782763, "grad_norm": 1.9019300842699387, "learning_rate": 8.06291282565097e-07, "loss": 0.6202, "step": 26827 }, { "epoch": 0.8222385681010176, "grad_norm": 1.7009406034588883, "learning_rate": 8.060210435623855e-07, "loss": 0.6361, "step": 26828 }, { "epoch": 0.8222692166237587, "grad_norm": 2.0912822451570983, "learning_rate": 8.05750845884023e-07, "loss": 0.5844, "step": 26829 }, { "epoch": 0.8222998651464999, "grad_norm": 0.787410672388245, "learning_rate": 8.054806895326695e-07, "loss": 0.3857, "step": 26830 }, { "epoch": 0.8223305136692411, "grad_norm": 1.917987337946808, "learning_rate": 8.052105745109889e-07, "loss": 0.6343, "step": 26831 }, { "epoch": 0.8223611621919823, "grad_norm": 2.009646355820297, "learning_rate": 8.049405008216405e-07, "loss": 0.6022, "step": 26832 }, { "epoch": 0.8223918107147236, "grad_norm": 1.8473222967494418, "learning_rate": 8.046704684672868e-07, "loss": 0.5883, "step": 26833 }, { "epoch": 0.8224224592374647, "grad_norm": 1.8496943577492957, "learning_rate": 8.044004774505898e-07, "loss": 0.5218, "step": 26834 }, { "epoch": 0.822453107760206, "grad_norm": 1.8059605162308439, "learning_rate": 8.041305277742073e-07, "loss": 0.5155, "step": 26835 }, { "epoch": 0.8224837562829471, "grad_norm": 1.7424460185654613, "learning_rate": 8.038606194408e-07, "loss": 0.5566, "step": 26836 }, { "epoch": 0.8225144048056884, "grad_norm": 0.7974104718209553, "learning_rate": 8.035907524530289e-07, "loss": 0.3946, "step": 26837 }, { "epoch": 0.8225450533284295, "grad_norm": 2.0306201017831493, "learning_rate": 8.033209268135506e-07, "loss": 0.5279, "step": 26838 }, { "epoch": 0.8225757018511708, "grad_norm": 1.835403736703498, "learning_rate": 8.03051142525026e-07, "loss": 0.5885, "step": 26839 }, { "epoch": 0.8226063503739119, "grad_norm": 0.8035346738997002, "learning_rate": 8.027813995901101e-07, "loss": 0.3951, "step": 26840 }, { "epoch": 0.8226369988966532, "grad_norm": 1.907973160487721, "learning_rate": 8.025116980114656e-07, "loss": 0.5931, "step": 26841 }, { "epoch": 0.8226676474193944, "grad_norm": 1.8071108629594332, "learning_rate": 8.022420377917467e-07, "loss": 0.5573, "step": 26842 }, { "epoch": 0.8226982959421356, "grad_norm": 1.8730823330601734, "learning_rate": 8.019724189336103e-07, "loss": 0.5147, "step": 26843 }, { "epoch": 0.8227289444648768, "grad_norm": 1.8184862424841985, "learning_rate": 8.017028414397138e-07, "loss": 0.5522, "step": 26844 }, { "epoch": 0.822759592987618, "grad_norm": 0.8056130869882842, "learning_rate": 8.014333053127144e-07, "loss": 0.3981, "step": 26845 }, { "epoch": 0.8227902415103592, "grad_norm": 1.6742951402989183, "learning_rate": 8.011638105552655e-07, "loss": 0.4766, "step": 26846 }, { "epoch": 0.8228208900331004, "grad_norm": 1.7575086033362006, "learning_rate": 8.008943571700245e-07, "loss": 0.5917, "step": 26847 }, { "epoch": 0.8228515385558416, "grad_norm": 1.870480313710315, "learning_rate": 8.006249451596454e-07, "loss": 0.572, "step": 26848 }, { "epoch": 0.8228821870785828, "grad_norm": 1.8479360857037048, "learning_rate": 8.003555745267844e-07, "loss": 0.5633, "step": 26849 }, { "epoch": 0.822912835601324, "grad_norm": 1.6978010643248487, "learning_rate": 8.000862452740943e-07, "loss": 0.5478, "step": 26850 }, { "epoch": 0.8229434841240653, "grad_norm": 1.9225419514351576, "learning_rate": 7.998169574042269e-07, "loss": 0.4767, "step": 26851 }, { "epoch": 0.8229741326468064, "grad_norm": 1.8546254998514176, "learning_rate": 7.995477109198397e-07, "loss": 0.5785, "step": 26852 }, { "epoch": 0.8230047811695477, "grad_norm": 2.0781878427546263, "learning_rate": 7.992785058235841e-07, "loss": 0.5502, "step": 26853 }, { "epoch": 0.8230354296922888, "grad_norm": 1.8371299766167235, "learning_rate": 7.990093421181106e-07, "loss": 0.6349, "step": 26854 }, { "epoch": 0.8230660782150301, "grad_norm": 1.8577605359497362, "learning_rate": 7.987402198060734e-07, "loss": 0.5538, "step": 26855 }, { "epoch": 0.8230967267377712, "grad_norm": 1.9038222665252367, "learning_rate": 7.984711388901246e-07, "loss": 0.5803, "step": 26856 }, { "epoch": 0.8231273752605125, "grad_norm": 1.942178559353822, "learning_rate": 7.982020993729134e-07, "loss": 0.5514, "step": 26857 }, { "epoch": 0.8231580237832536, "grad_norm": 1.860200135425837, "learning_rate": 7.979331012570923e-07, "loss": 0.5962, "step": 26858 }, { "epoch": 0.8231886723059949, "grad_norm": 1.8775390030405705, "learning_rate": 7.97664144545311e-07, "loss": 0.5218, "step": 26859 }, { "epoch": 0.8232193208287361, "grad_norm": 2.152579857834674, "learning_rate": 7.973952292402215e-07, "loss": 0.6041, "step": 26860 }, { "epoch": 0.8232499693514772, "grad_norm": 2.0536632621508364, "learning_rate": 7.971263553444725e-07, "loss": 0.5535, "step": 26861 }, { "epoch": 0.8232806178742185, "grad_norm": 1.9088456745602655, "learning_rate": 7.9685752286071e-07, "loss": 0.5333, "step": 26862 }, { "epoch": 0.8233112663969596, "grad_norm": 1.9147093703719176, "learning_rate": 7.965887317915882e-07, "loss": 0.5155, "step": 26863 }, { "epoch": 0.8233419149197009, "grad_norm": 1.8557092402602502, "learning_rate": 7.963199821397533e-07, "loss": 0.5144, "step": 26864 }, { "epoch": 0.823372563442442, "grad_norm": 2.09498466985198, "learning_rate": 7.960512739078519e-07, "loss": 0.5893, "step": 26865 }, { "epoch": 0.8234032119651833, "grad_norm": 1.6769172436910593, "learning_rate": 7.957826070985331e-07, "loss": 0.5954, "step": 26866 }, { "epoch": 0.8234338604879244, "grad_norm": 1.9538364632332874, "learning_rate": 7.955139817144436e-07, "loss": 0.487, "step": 26867 }, { "epoch": 0.8234645090106657, "grad_norm": 1.7957855537262792, "learning_rate": 7.952453977582325e-07, "loss": 0.5633, "step": 26868 }, { "epoch": 0.8234951575334069, "grad_norm": 1.9893228728286627, "learning_rate": 7.949768552325426e-07, "loss": 0.6763, "step": 26869 }, { "epoch": 0.8235258060561481, "grad_norm": 2.0009662371936563, "learning_rate": 7.947083541400213e-07, "loss": 0.6338, "step": 26870 }, { "epoch": 0.8235564545788893, "grad_norm": 1.9630302009566136, "learning_rate": 7.944398944833165e-07, "loss": 0.5878, "step": 26871 }, { "epoch": 0.8235871031016305, "grad_norm": 1.645076228892164, "learning_rate": 7.941714762650709e-07, "loss": 0.5366, "step": 26872 }, { "epoch": 0.8236177516243717, "grad_norm": 1.9109970894909116, "learning_rate": 7.939030994879277e-07, "loss": 0.5903, "step": 26873 }, { "epoch": 0.8236484001471129, "grad_norm": 2.041998199618427, "learning_rate": 7.936347641545356e-07, "loss": 0.5119, "step": 26874 }, { "epoch": 0.8236790486698541, "grad_norm": 0.8012178996349157, "learning_rate": 7.933664702675354e-07, "loss": 0.3931, "step": 26875 }, { "epoch": 0.8237096971925953, "grad_norm": 1.7759613800804153, "learning_rate": 7.930982178295732e-07, "loss": 0.6378, "step": 26876 }, { "epoch": 0.8237403457153365, "grad_norm": 1.607181132414293, "learning_rate": 7.928300068432887e-07, "loss": 0.4423, "step": 26877 }, { "epoch": 0.8237709942380778, "grad_norm": 1.6998491088438734, "learning_rate": 7.925618373113275e-07, "loss": 0.4569, "step": 26878 }, { "epoch": 0.8238016427608189, "grad_norm": 1.9344235916904151, "learning_rate": 7.922937092363315e-07, "loss": 0.5877, "step": 26879 }, { "epoch": 0.8238322912835602, "grad_norm": 1.7996216790268418, "learning_rate": 7.920256226209411e-07, "loss": 0.6045, "step": 26880 }, { "epoch": 0.8238629398063013, "grad_norm": 0.8029985849723281, "learning_rate": 7.917575774677994e-07, "loss": 0.3988, "step": 26881 }, { "epoch": 0.8238935883290426, "grad_norm": 1.9248492590090052, "learning_rate": 7.914895737795475e-07, "loss": 0.5625, "step": 26882 }, { "epoch": 0.8239242368517837, "grad_norm": 1.7280297777506046, "learning_rate": 7.912216115588262e-07, "loss": 0.4764, "step": 26883 }, { "epoch": 0.823954885374525, "grad_norm": 1.8253467316460903, "learning_rate": 7.909536908082737e-07, "loss": 0.6128, "step": 26884 }, { "epoch": 0.8239855338972661, "grad_norm": 2.1002767228306665, "learning_rate": 7.906858115305316e-07, "loss": 0.5569, "step": 26885 }, { "epoch": 0.8240161824200074, "grad_norm": 1.8760804687814179, "learning_rate": 7.904179737282392e-07, "loss": 0.5049, "step": 26886 }, { "epoch": 0.8240468309427486, "grad_norm": 1.8446002894575675, "learning_rate": 7.901501774040366e-07, "loss": 0.5188, "step": 26887 }, { "epoch": 0.8240774794654898, "grad_norm": 2.077106855416514, "learning_rate": 7.8988242256056e-07, "loss": 0.5716, "step": 26888 }, { "epoch": 0.824108127988231, "grad_norm": 0.9419687618676986, "learning_rate": 7.896147092004497e-07, "loss": 0.3816, "step": 26889 }, { "epoch": 0.8241387765109722, "grad_norm": 2.2482786781642394, "learning_rate": 7.893470373263434e-07, "loss": 0.6281, "step": 26890 }, { "epoch": 0.8241694250337134, "grad_norm": 2.148644291959295, "learning_rate": 7.890794069408786e-07, "loss": 0.5737, "step": 26891 }, { "epoch": 0.8242000735564545, "grad_norm": 1.7509267755985187, "learning_rate": 7.888118180466897e-07, "loss": 0.5502, "step": 26892 }, { "epoch": 0.8242307220791958, "grad_norm": 1.7379259900269002, "learning_rate": 7.885442706464175e-07, "loss": 0.6597, "step": 26893 }, { "epoch": 0.8242613706019369, "grad_norm": 1.8325900364785652, "learning_rate": 7.882767647426947e-07, "loss": 0.589, "step": 26894 }, { "epoch": 0.8242920191246782, "grad_norm": 2.0241875912410405, "learning_rate": 7.8800930033816e-07, "loss": 0.5842, "step": 26895 }, { "epoch": 0.8243226676474193, "grad_norm": 1.9466493407339007, "learning_rate": 7.877418774354462e-07, "loss": 0.5172, "step": 26896 }, { "epoch": 0.8243533161701606, "grad_norm": 1.9869569449811317, "learning_rate": 7.874744960371894e-07, "loss": 0.6444, "step": 26897 }, { "epoch": 0.8243839646929018, "grad_norm": 0.778012342893351, "learning_rate": 7.872071561460254e-07, "loss": 0.3896, "step": 26898 }, { "epoch": 0.824414613215643, "grad_norm": 1.8872456452510094, "learning_rate": 7.869398577645859e-07, "loss": 0.4959, "step": 26899 }, { "epoch": 0.8244452617383842, "grad_norm": 1.7071138013713334, "learning_rate": 7.866726008955062e-07, "loss": 0.5556, "step": 26900 }, { "epoch": 0.8244759102611254, "grad_norm": 0.7852652684502421, "learning_rate": 7.8640538554142e-07, "loss": 0.4015, "step": 26901 }, { "epoch": 0.8245065587838666, "grad_norm": 1.976555774045705, "learning_rate": 7.861382117049599e-07, "loss": 0.6232, "step": 26902 }, { "epoch": 0.8245372073066078, "grad_norm": 0.7931974059180377, "learning_rate": 7.858710793887558e-07, "loss": 0.3942, "step": 26903 }, { "epoch": 0.824567855829349, "grad_norm": 1.8176513829960987, "learning_rate": 7.856039885954447e-07, "loss": 0.5408, "step": 26904 }, { "epoch": 0.8245985043520903, "grad_norm": 1.7350243330265205, "learning_rate": 7.85336939327655e-07, "loss": 0.5336, "step": 26905 }, { "epoch": 0.8246291528748314, "grad_norm": 2.088809710537145, "learning_rate": 7.850699315880195e-07, "loss": 0.6044, "step": 26906 }, { "epoch": 0.8246598013975727, "grad_norm": 1.9100731483549718, "learning_rate": 7.848029653791673e-07, "loss": 0.5226, "step": 26907 }, { "epoch": 0.8246904499203138, "grad_norm": 0.7991406517915709, "learning_rate": 7.8453604070373e-07, "loss": 0.3887, "step": 26908 }, { "epoch": 0.8247210984430551, "grad_norm": 0.7894578429870722, "learning_rate": 7.842691575643385e-07, "loss": 0.3891, "step": 26909 }, { "epoch": 0.8247517469657962, "grad_norm": 0.8480039071327186, "learning_rate": 7.840023159636206e-07, "loss": 0.4124, "step": 26910 }, { "epoch": 0.8247823954885375, "grad_norm": 1.8840281674110542, "learning_rate": 7.83735515904207e-07, "loss": 0.6009, "step": 26911 }, { "epoch": 0.8248130440112786, "grad_norm": 1.9886196022800324, "learning_rate": 7.834687573887273e-07, "loss": 0.6006, "step": 26912 }, { "epoch": 0.8248436925340199, "grad_norm": 1.9624505577549967, "learning_rate": 7.832020404198071e-07, "loss": 0.543, "step": 26913 }, { "epoch": 0.824874341056761, "grad_norm": 2.1185450536495596, "learning_rate": 7.829353650000765e-07, "loss": 0.5048, "step": 26914 }, { "epoch": 0.8249049895795023, "grad_norm": 1.8552391891057385, "learning_rate": 7.826687311321635e-07, "loss": 0.5613, "step": 26915 }, { "epoch": 0.8249356381022435, "grad_norm": 1.814244611713134, "learning_rate": 7.824021388186936e-07, "loss": 0.5331, "step": 26916 }, { "epoch": 0.8249662866249847, "grad_norm": 0.7658985626415958, "learning_rate": 7.82135588062296e-07, "loss": 0.3987, "step": 26917 }, { "epoch": 0.8249969351477259, "grad_norm": 2.003392581153981, "learning_rate": 7.818690788655941e-07, "loss": 0.6623, "step": 26918 }, { "epoch": 0.8250275836704671, "grad_norm": 1.7866429218088336, "learning_rate": 7.816026112312159e-07, "loss": 0.5308, "step": 26919 }, { "epoch": 0.8250582321932083, "grad_norm": 0.7714152281222768, "learning_rate": 7.813361851617873e-07, "loss": 0.3908, "step": 26920 }, { "epoch": 0.8250888807159495, "grad_norm": 1.775889896371896, "learning_rate": 7.810698006599316e-07, "loss": 0.5607, "step": 26921 }, { "epoch": 0.8251195292386907, "grad_norm": 1.8258298015389747, "learning_rate": 7.808034577282741e-07, "loss": 0.5714, "step": 26922 }, { "epoch": 0.8251501777614318, "grad_norm": 1.74157015769171, "learning_rate": 7.805371563694413e-07, "loss": 0.501, "step": 26923 }, { "epoch": 0.8251808262841731, "grad_norm": 0.7540863107203393, "learning_rate": 7.802708965860545e-07, "loss": 0.3848, "step": 26924 }, { "epoch": 0.8252114748069143, "grad_norm": 0.7667060274227697, "learning_rate": 7.80004678380738e-07, "loss": 0.4013, "step": 26925 }, { "epoch": 0.8252421233296555, "grad_norm": 1.988562671014952, "learning_rate": 7.79738501756116e-07, "loss": 0.6094, "step": 26926 }, { "epoch": 0.8252727718523967, "grad_norm": 1.9029140219158294, "learning_rate": 7.794723667148097e-07, "loss": 0.589, "step": 26927 }, { "epoch": 0.8253034203751379, "grad_norm": 2.1441318097461313, "learning_rate": 7.79206273259443e-07, "loss": 0.5543, "step": 26928 }, { "epoch": 0.8253340688978791, "grad_norm": 2.04394021508776, "learning_rate": 7.789402213926356e-07, "loss": 0.55, "step": 26929 }, { "epoch": 0.8253647174206203, "grad_norm": 1.807884830258026, "learning_rate": 7.786742111170104e-07, "loss": 0.5546, "step": 26930 }, { "epoch": 0.8253953659433615, "grad_norm": 1.9999383696623938, "learning_rate": 7.784082424351891e-07, "loss": 0.5505, "step": 26931 }, { "epoch": 0.8254260144661028, "grad_norm": 1.8855594992568152, "learning_rate": 7.781423153497908e-07, "loss": 0.4899, "step": 26932 }, { "epoch": 0.8254566629888439, "grad_norm": 1.9070810680118948, "learning_rate": 7.778764298634361e-07, "loss": 0.5133, "step": 26933 }, { "epoch": 0.8254873115115852, "grad_norm": 2.1481024133070883, "learning_rate": 7.776105859787464e-07, "loss": 0.5823, "step": 26934 }, { "epoch": 0.8255179600343263, "grad_norm": 1.8866287159623882, "learning_rate": 7.773447836983388e-07, "loss": 0.5339, "step": 26935 }, { "epoch": 0.8255486085570676, "grad_norm": 1.6463555401640373, "learning_rate": 7.770790230248349e-07, "loss": 0.493, "step": 26936 }, { "epoch": 0.8255792570798087, "grad_norm": 1.728509578801977, "learning_rate": 7.768133039608506e-07, "loss": 0.5596, "step": 26937 }, { "epoch": 0.82560990560255, "grad_norm": 1.9802161785722272, "learning_rate": 7.765476265090049e-07, "loss": 0.5888, "step": 26938 }, { "epoch": 0.8256405541252911, "grad_norm": 1.969241387180846, "learning_rate": 7.762819906719177e-07, "loss": 0.604, "step": 26939 }, { "epoch": 0.8256712026480324, "grad_norm": 1.8515333938131775, "learning_rate": 7.760163964522033e-07, "loss": 0.547, "step": 26940 }, { "epoch": 0.8257018511707735, "grad_norm": 1.691540862519807, "learning_rate": 7.757508438524803e-07, "loss": 0.4382, "step": 26941 }, { "epoch": 0.8257324996935148, "grad_norm": 1.873290094260009, "learning_rate": 7.754853328753664e-07, "loss": 0.6489, "step": 26942 }, { "epoch": 0.825763148216256, "grad_norm": 1.7209913288933314, "learning_rate": 7.752198635234748e-07, "loss": 0.5373, "step": 26943 }, { "epoch": 0.8257937967389972, "grad_norm": 1.8507040611040477, "learning_rate": 7.749544357994232e-07, "loss": 0.6253, "step": 26944 }, { "epoch": 0.8258244452617384, "grad_norm": 0.7799396220796081, "learning_rate": 7.746890497058273e-07, "loss": 0.3974, "step": 26945 }, { "epoch": 0.8258550937844796, "grad_norm": 1.7936012435140065, "learning_rate": 7.744237052453007e-07, "loss": 0.5479, "step": 26946 }, { "epoch": 0.8258857423072208, "grad_norm": 1.845181697178688, "learning_rate": 7.741584024204596e-07, "loss": 0.6191, "step": 26947 }, { "epoch": 0.825916390829962, "grad_norm": 1.7140088662237145, "learning_rate": 7.73893141233914e-07, "loss": 0.4785, "step": 26948 }, { "epoch": 0.8259470393527032, "grad_norm": 1.7481273230310437, "learning_rate": 7.736279216882836e-07, "loss": 0.5211, "step": 26949 }, { "epoch": 0.8259776878754445, "grad_norm": 1.7101046629793866, "learning_rate": 7.733627437861784e-07, "loss": 0.4934, "step": 26950 }, { "epoch": 0.8260083363981856, "grad_norm": 1.949814465862903, "learning_rate": 7.730976075302099e-07, "loss": 0.5239, "step": 26951 }, { "epoch": 0.8260389849209269, "grad_norm": 1.659473044025143, "learning_rate": 7.72832512922993e-07, "loss": 0.5011, "step": 26952 }, { "epoch": 0.826069633443668, "grad_norm": 1.6243861418363668, "learning_rate": 7.725674599671395e-07, "loss": 0.4941, "step": 26953 }, { "epoch": 0.8261002819664092, "grad_norm": 1.8421307375544915, "learning_rate": 7.723024486652598e-07, "loss": 0.5424, "step": 26954 }, { "epoch": 0.8261309304891504, "grad_norm": 1.9743868710437258, "learning_rate": 7.720374790199653e-07, "loss": 0.606, "step": 26955 }, { "epoch": 0.8261615790118916, "grad_norm": 1.9578534483234502, "learning_rate": 7.717725510338686e-07, "loss": 0.5977, "step": 26956 }, { "epoch": 0.8261922275346328, "grad_norm": 1.9818897888541382, "learning_rate": 7.715076647095776e-07, "loss": 0.6282, "step": 26957 }, { "epoch": 0.826222876057374, "grad_norm": 1.8053150186755627, "learning_rate": 7.712428200497047e-07, "loss": 0.6074, "step": 26958 }, { "epoch": 0.8262535245801153, "grad_norm": 1.879326523598657, "learning_rate": 7.709780170568559e-07, "loss": 0.6571, "step": 26959 }, { "epoch": 0.8262841731028564, "grad_norm": 2.0003145843184695, "learning_rate": 7.70713255733645e-07, "loss": 0.5773, "step": 26960 }, { "epoch": 0.8263148216255977, "grad_norm": 1.806875016859727, "learning_rate": 7.704485360826785e-07, "loss": 0.5976, "step": 26961 }, { "epoch": 0.8263454701483388, "grad_norm": 0.7819641057548848, "learning_rate": 7.701838581065635e-07, "loss": 0.4002, "step": 26962 }, { "epoch": 0.8263761186710801, "grad_norm": 0.7805665990544595, "learning_rate": 7.699192218079093e-07, "loss": 0.3789, "step": 26963 }, { "epoch": 0.8264067671938212, "grad_norm": 2.034019541089921, "learning_rate": 7.696546271893252e-07, "loss": 0.5336, "step": 26964 }, { "epoch": 0.8264374157165625, "grad_norm": 1.817739836831582, "learning_rate": 7.693900742534144e-07, "loss": 0.6288, "step": 26965 }, { "epoch": 0.8264680642393036, "grad_norm": 2.0536500502653374, "learning_rate": 7.691255630027855e-07, "loss": 0.5468, "step": 26966 }, { "epoch": 0.8264987127620449, "grad_norm": 1.8022837208206255, "learning_rate": 7.688610934400453e-07, "loss": 0.4753, "step": 26967 }, { "epoch": 0.826529361284786, "grad_norm": 1.9439092687506352, "learning_rate": 7.685966655678006e-07, "loss": 0.5425, "step": 26968 }, { "epoch": 0.8265600098075273, "grad_norm": 1.8411421508783359, "learning_rate": 7.683322793886555e-07, "loss": 0.5326, "step": 26969 }, { "epoch": 0.8265906583302685, "grad_norm": 1.9148238217491245, "learning_rate": 7.680679349052128e-07, "loss": 0.6381, "step": 26970 }, { "epoch": 0.8266213068530097, "grad_norm": 1.934191357567882, "learning_rate": 7.678036321200821e-07, "loss": 0.6373, "step": 26971 }, { "epoch": 0.8266519553757509, "grad_norm": 1.888934070162858, "learning_rate": 7.675393710358647e-07, "loss": 0.6318, "step": 26972 }, { "epoch": 0.8266826038984921, "grad_norm": 1.9955076052044105, "learning_rate": 7.672751516551641e-07, "loss": 0.6654, "step": 26973 }, { "epoch": 0.8267132524212333, "grad_norm": 1.6480930773789375, "learning_rate": 7.670109739805842e-07, "loss": 0.515, "step": 26974 }, { "epoch": 0.8267439009439745, "grad_norm": 1.8403101720099797, "learning_rate": 7.667468380147281e-07, "loss": 0.561, "step": 26975 }, { "epoch": 0.8267745494667157, "grad_norm": 0.7441589819258513, "learning_rate": 7.664827437601996e-07, "loss": 0.3887, "step": 26976 }, { "epoch": 0.826805197989457, "grad_norm": 0.8036851061367859, "learning_rate": 7.66218691219599e-07, "loss": 0.4031, "step": 26977 }, { "epoch": 0.8268358465121981, "grad_norm": 1.9181068270865833, "learning_rate": 7.659546803955287e-07, "loss": 0.6152, "step": 26978 }, { "epoch": 0.8268664950349394, "grad_norm": 1.8047623120330862, "learning_rate": 7.656907112905915e-07, "loss": 0.478, "step": 26979 }, { "epoch": 0.8268971435576805, "grad_norm": 1.8621653866361234, "learning_rate": 7.65426783907387e-07, "loss": 0.6111, "step": 26980 }, { "epoch": 0.8269277920804218, "grad_norm": 0.8141558243322671, "learning_rate": 7.651628982485149e-07, "loss": 0.4081, "step": 26981 }, { "epoch": 0.8269584406031629, "grad_norm": 1.7508126236744666, "learning_rate": 7.648990543165757e-07, "loss": 0.5225, "step": 26982 }, { "epoch": 0.8269890891259042, "grad_norm": 0.8206753514282721, "learning_rate": 7.646352521141715e-07, "loss": 0.381, "step": 26983 }, { "epoch": 0.8270197376486453, "grad_norm": 0.9338947109298809, "learning_rate": 7.643714916438982e-07, "loss": 0.3773, "step": 26984 }, { "epoch": 0.8270503861713865, "grad_norm": 1.906266947311379, "learning_rate": 7.641077729083568e-07, "loss": 0.6147, "step": 26985 }, { "epoch": 0.8270810346941277, "grad_norm": 0.74424609747635, "learning_rate": 7.638440959101451e-07, "loss": 0.3755, "step": 26986 }, { "epoch": 0.8271116832168689, "grad_norm": 2.08730646319104, "learning_rate": 7.635804606518626e-07, "loss": 0.7024, "step": 26987 }, { "epoch": 0.8271423317396102, "grad_norm": 1.9720189782103228, "learning_rate": 7.633168671361058e-07, "loss": 0.5877, "step": 26988 }, { "epoch": 0.8271729802623513, "grad_norm": 1.9521482067098421, "learning_rate": 7.630533153654695e-07, "loss": 0.5996, "step": 26989 }, { "epoch": 0.8272036287850926, "grad_norm": 0.8014719830194103, "learning_rate": 7.627898053425553e-07, "loss": 0.3899, "step": 26990 }, { "epoch": 0.8272342773078337, "grad_norm": 1.782507896871469, "learning_rate": 7.62526337069957e-07, "loss": 0.5498, "step": 26991 }, { "epoch": 0.827264925830575, "grad_norm": 1.9462970070767065, "learning_rate": 7.622629105502704e-07, "loss": 0.5014, "step": 26992 }, { "epoch": 0.8272955743533161, "grad_norm": 2.1778689519624423, "learning_rate": 7.619995257860913e-07, "loss": 0.593, "step": 26993 }, { "epoch": 0.8273262228760574, "grad_norm": 1.9340635391034064, "learning_rate": 7.617361827800152e-07, "loss": 0.5569, "step": 26994 }, { "epoch": 0.8273568713987985, "grad_norm": 1.8157152629845656, "learning_rate": 7.614728815346378e-07, "loss": 0.5533, "step": 26995 }, { "epoch": 0.8273875199215398, "grad_norm": 1.7899626249999645, "learning_rate": 7.612096220525517e-07, "loss": 0.6011, "step": 26996 }, { "epoch": 0.827418168444281, "grad_norm": 2.056100007066122, "learning_rate": 7.609464043363513e-07, "loss": 0.6266, "step": 26997 }, { "epoch": 0.8274488169670222, "grad_norm": 1.7596797807265294, "learning_rate": 7.606832283886323e-07, "loss": 0.5422, "step": 26998 }, { "epoch": 0.8274794654897634, "grad_norm": 1.6096922527433786, "learning_rate": 7.604200942119861e-07, "loss": 0.4807, "step": 26999 }, { "epoch": 0.8275101140125046, "grad_norm": 2.036359801870182, "learning_rate": 7.601570018090027e-07, "loss": 0.6103, "step": 27000 }, { "epoch": 0.8275407625352458, "grad_norm": 1.6417034874689387, "learning_rate": 7.5989395118228e-07, "loss": 0.6127, "step": 27001 }, { "epoch": 0.827571411057987, "grad_norm": 1.7360696346559754, "learning_rate": 7.596309423344055e-07, "loss": 0.4807, "step": 27002 }, { "epoch": 0.8276020595807282, "grad_norm": 1.9575558663100905, "learning_rate": 7.593679752679733e-07, "loss": 0.5932, "step": 27003 }, { "epoch": 0.8276327081034694, "grad_norm": 2.0242778159816663, "learning_rate": 7.591050499855729e-07, "loss": 0.5824, "step": 27004 }, { "epoch": 0.8276633566262106, "grad_norm": 1.8835197486971325, "learning_rate": 7.588421664897949e-07, "loss": 0.5354, "step": 27005 }, { "epoch": 0.8276940051489519, "grad_norm": 1.7957669858931316, "learning_rate": 7.585793247832318e-07, "loss": 0.5252, "step": 27006 }, { "epoch": 0.827724653671693, "grad_norm": 1.8601854885903688, "learning_rate": 7.583165248684704e-07, "loss": 0.5342, "step": 27007 }, { "epoch": 0.8277553021944343, "grad_norm": 1.7293395137046594, "learning_rate": 7.580537667481019e-07, "loss": 0.5792, "step": 27008 }, { "epoch": 0.8277859507171754, "grad_norm": 1.9778257814641649, "learning_rate": 7.57791050424716e-07, "loss": 0.5662, "step": 27009 }, { "epoch": 0.8278165992399167, "grad_norm": 1.9798038268654716, "learning_rate": 7.575283759009e-07, "loss": 0.5843, "step": 27010 }, { "epoch": 0.8278472477626578, "grad_norm": 1.9055418104643642, "learning_rate": 7.572657431792402e-07, "loss": 0.5269, "step": 27011 }, { "epoch": 0.8278778962853991, "grad_norm": 1.6937037529965946, "learning_rate": 7.570031522623289e-07, "loss": 0.5381, "step": 27012 }, { "epoch": 0.8279085448081402, "grad_norm": 2.009742098225345, "learning_rate": 7.567406031527502e-07, "loss": 0.6465, "step": 27013 }, { "epoch": 0.8279391933308815, "grad_norm": 2.116618695450478, "learning_rate": 7.564780958530932e-07, "loss": 0.6011, "step": 27014 }, { "epoch": 0.8279698418536227, "grad_norm": 0.795047682922662, "learning_rate": 7.562156303659419e-07, "loss": 0.4023, "step": 27015 }, { "epoch": 0.8280004903763638, "grad_norm": 1.874664512443071, "learning_rate": 7.55953206693884e-07, "loss": 0.5884, "step": 27016 }, { "epoch": 0.8280311388991051, "grad_norm": 1.8060129581842579, "learning_rate": 7.556908248395062e-07, "loss": 0.5775, "step": 27017 }, { "epoch": 0.8280617874218462, "grad_norm": 0.8503135813539435, "learning_rate": 7.554284848053911e-07, "loss": 0.4152, "step": 27018 }, { "epoch": 0.8280924359445875, "grad_norm": 1.8503663945029998, "learning_rate": 7.551661865941257e-07, "loss": 0.4702, "step": 27019 }, { "epoch": 0.8281230844673286, "grad_norm": 2.046090898499313, "learning_rate": 7.54903930208295e-07, "loss": 0.592, "step": 27020 }, { "epoch": 0.8281537329900699, "grad_norm": 1.9344933524765133, "learning_rate": 7.546417156504804e-07, "loss": 0.667, "step": 27021 }, { "epoch": 0.828184381512811, "grad_norm": 2.1046744649205222, "learning_rate": 7.543795429232686e-07, "loss": 0.5893, "step": 27022 }, { "epoch": 0.8282150300355523, "grad_norm": 2.111230067285639, "learning_rate": 7.541174120292405e-07, "loss": 0.5974, "step": 27023 }, { "epoch": 0.8282456785582935, "grad_norm": 2.100836040232562, "learning_rate": 7.538553229709799e-07, "loss": 0.5015, "step": 27024 }, { "epoch": 0.8282763270810347, "grad_norm": 1.6418926995130538, "learning_rate": 7.535932757510705e-07, "loss": 0.5444, "step": 27025 }, { "epoch": 0.8283069756037759, "grad_norm": 1.6970302472799477, "learning_rate": 7.533312703720913e-07, "loss": 0.5652, "step": 27026 }, { "epoch": 0.8283376241265171, "grad_norm": 1.882851657560995, "learning_rate": 7.530693068366263e-07, "loss": 0.5362, "step": 27027 }, { "epoch": 0.8283682726492583, "grad_norm": 2.00796655706747, "learning_rate": 7.528073851472567e-07, "loss": 0.6293, "step": 27028 }, { "epoch": 0.8283989211719995, "grad_norm": 1.9511972888024642, "learning_rate": 7.525455053065617e-07, "loss": 0.6143, "step": 27029 }, { "epoch": 0.8284295696947407, "grad_norm": 1.8390753538633913, "learning_rate": 7.522836673171224e-07, "loss": 0.5797, "step": 27030 }, { "epoch": 0.828460218217482, "grad_norm": 1.928667399163506, "learning_rate": 7.520218711815202e-07, "loss": 0.6472, "step": 27031 }, { "epoch": 0.8284908667402231, "grad_norm": 1.7050114220992099, "learning_rate": 7.517601169023326e-07, "loss": 0.6267, "step": 27032 }, { "epoch": 0.8285215152629644, "grad_norm": 1.715570189875828, "learning_rate": 7.514984044821405e-07, "loss": 0.5294, "step": 27033 }, { "epoch": 0.8285521637857055, "grad_norm": 1.8697644865134395, "learning_rate": 7.512367339235205e-07, "loss": 0.5584, "step": 27034 }, { "epoch": 0.8285828123084468, "grad_norm": 0.8481464351877073, "learning_rate": 7.509751052290515e-07, "loss": 0.4189, "step": 27035 }, { "epoch": 0.8286134608311879, "grad_norm": 2.2249360210201767, "learning_rate": 7.507135184013137e-07, "loss": 0.5094, "step": 27036 }, { "epoch": 0.8286441093539292, "grad_norm": 1.7757487964820025, "learning_rate": 7.504519734428817e-07, "loss": 0.5372, "step": 27037 }, { "epoch": 0.8286747578766703, "grad_norm": 1.8118449220462185, "learning_rate": 7.501904703563334e-07, "loss": 0.4987, "step": 27038 }, { "epoch": 0.8287054063994116, "grad_norm": 1.8768924074289537, "learning_rate": 7.499290091442468e-07, "loss": 0.6534, "step": 27039 }, { "epoch": 0.8287360549221527, "grad_norm": 1.9492956184277552, "learning_rate": 7.496675898091965e-07, "loss": 0.634, "step": 27040 }, { "epoch": 0.828766703444894, "grad_norm": 1.7235938402020363, "learning_rate": 7.494062123537588e-07, "loss": 0.5663, "step": 27041 }, { "epoch": 0.8287973519676352, "grad_norm": 2.0549126567459717, "learning_rate": 7.491448767805098e-07, "loss": 0.5888, "step": 27042 }, { "epoch": 0.8288280004903764, "grad_norm": 1.8942006711848494, "learning_rate": 7.488835830920232e-07, "loss": 0.617, "step": 27043 }, { "epoch": 0.8288586490131176, "grad_norm": 1.7788201892561764, "learning_rate": 7.486223312908758e-07, "loss": 0.5591, "step": 27044 }, { "epoch": 0.8288892975358588, "grad_norm": 1.9172860328840091, "learning_rate": 7.483611213796388e-07, "loss": 0.5561, "step": 27045 }, { "epoch": 0.8289199460586, "grad_norm": 1.8795427717282334, "learning_rate": 7.480999533608874e-07, "loss": 0.6275, "step": 27046 }, { "epoch": 0.8289505945813411, "grad_norm": 1.9099408556357937, "learning_rate": 7.478388272371967e-07, "loss": 0.5634, "step": 27047 }, { "epoch": 0.8289812431040824, "grad_norm": 2.106794912602991, "learning_rate": 7.475777430111364e-07, "loss": 0.6228, "step": 27048 }, { "epoch": 0.8290118916268235, "grad_norm": 2.2177144885322986, "learning_rate": 7.473167006852805e-07, "loss": 0.6408, "step": 27049 }, { "epoch": 0.8290425401495648, "grad_norm": 0.7923239185535111, "learning_rate": 7.470557002622031e-07, "loss": 0.3805, "step": 27050 }, { "epoch": 0.829073188672306, "grad_norm": 1.9896291860210613, "learning_rate": 7.46794741744472e-07, "loss": 0.5651, "step": 27051 }, { "epoch": 0.8291038371950472, "grad_norm": 1.7655781898481773, "learning_rate": 7.465338251346616e-07, "loss": 0.5944, "step": 27052 }, { "epoch": 0.8291344857177884, "grad_norm": 1.657977206935957, "learning_rate": 7.462729504353422e-07, "loss": 0.5562, "step": 27053 }, { "epoch": 0.8291651342405296, "grad_norm": 2.006003647997409, "learning_rate": 7.460121176490826e-07, "loss": 0.6315, "step": 27054 }, { "epoch": 0.8291957827632708, "grad_norm": 1.7008234923090066, "learning_rate": 7.45751326778455e-07, "loss": 0.4715, "step": 27055 }, { "epoch": 0.829226431286012, "grad_norm": 1.9683124782697226, "learning_rate": 7.454905778260263e-07, "loss": 0.5464, "step": 27056 }, { "epoch": 0.8292570798087532, "grad_norm": 1.8407874586069586, "learning_rate": 7.452298707943694e-07, "loss": 0.5002, "step": 27057 }, { "epoch": 0.8292877283314944, "grad_norm": 2.0613495619806823, "learning_rate": 7.449692056860513e-07, "loss": 0.7331, "step": 27058 }, { "epoch": 0.8293183768542356, "grad_norm": 1.9119974480547943, "learning_rate": 7.447085825036393e-07, "loss": 0.6012, "step": 27059 }, { "epoch": 0.8293490253769769, "grad_norm": 1.7202059854404148, "learning_rate": 7.444480012497024e-07, "loss": 0.5027, "step": 27060 }, { "epoch": 0.829379673899718, "grad_norm": 1.744587332642698, "learning_rate": 7.441874619268091e-07, "loss": 0.5449, "step": 27061 }, { "epoch": 0.8294103224224593, "grad_norm": 0.7468764551214591, "learning_rate": 7.439269645375246e-07, "loss": 0.389, "step": 27062 }, { "epoch": 0.8294409709452004, "grad_norm": 1.8634346505894825, "learning_rate": 7.436665090844169e-07, "loss": 0.5943, "step": 27063 }, { "epoch": 0.8294716194679417, "grad_norm": 2.143608391334463, "learning_rate": 7.434060955700534e-07, "loss": 0.6646, "step": 27064 }, { "epoch": 0.8295022679906828, "grad_norm": 1.866921029968988, "learning_rate": 7.431457239969969e-07, "loss": 0.5511, "step": 27065 }, { "epoch": 0.8295329165134241, "grad_norm": 0.7578479424297278, "learning_rate": 7.428853943678166e-07, "loss": 0.3937, "step": 27066 }, { "epoch": 0.8295635650361652, "grad_norm": 1.8611308568233118, "learning_rate": 7.426251066850742e-07, "loss": 0.5536, "step": 27067 }, { "epoch": 0.8295942135589065, "grad_norm": 2.0105700403726217, "learning_rate": 7.423648609513356e-07, "loss": 0.5906, "step": 27068 }, { "epoch": 0.8296248620816477, "grad_norm": 1.9373454800553849, "learning_rate": 7.42104657169167e-07, "loss": 0.5129, "step": 27069 }, { "epoch": 0.8296555106043889, "grad_norm": 2.0041265026227135, "learning_rate": 7.418444953411297e-07, "loss": 0.6814, "step": 27070 }, { "epoch": 0.8296861591271301, "grad_norm": 1.7012229673804204, "learning_rate": 7.415843754697876e-07, "loss": 0.569, "step": 27071 }, { "epoch": 0.8297168076498713, "grad_norm": 1.7917862694136741, "learning_rate": 7.413242975577056e-07, "loss": 0.6013, "step": 27072 }, { "epoch": 0.8297474561726125, "grad_norm": 0.7884683239896739, "learning_rate": 7.410642616074437e-07, "loss": 0.3964, "step": 27073 }, { "epoch": 0.8297781046953537, "grad_norm": 1.8069163471439214, "learning_rate": 7.408042676215665e-07, "loss": 0.5516, "step": 27074 }, { "epoch": 0.8298087532180949, "grad_norm": 1.9425966099887115, "learning_rate": 7.405443156026327e-07, "loss": 0.5409, "step": 27075 }, { "epoch": 0.8298394017408361, "grad_norm": 0.7601482081145735, "learning_rate": 7.402844055532072e-07, "loss": 0.3872, "step": 27076 }, { "epoch": 0.8298700502635773, "grad_norm": 2.074613392161602, "learning_rate": 7.400245374758496e-07, "loss": 0.6123, "step": 27077 }, { "epoch": 0.8299006987863184, "grad_norm": 0.8103017984853688, "learning_rate": 7.397647113731194e-07, "loss": 0.4058, "step": 27078 }, { "epoch": 0.8299313473090597, "grad_norm": 2.0377052482511835, "learning_rate": 7.395049272475769e-07, "loss": 0.56, "step": 27079 }, { "epoch": 0.8299619958318009, "grad_norm": 1.7261101330475286, "learning_rate": 7.39245185101784e-07, "loss": 0.5236, "step": 27080 }, { "epoch": 0.8299926443545421, "grad_norm": 1.935853372259889, "learning_rate": 7.389854849382972e-07, "loss": 0.5285, "step": 27081 }, { "epoch": 0.8300232928772833, "grad_norm": 1.7648472476347952, "learning_rate": 7.38725826759677e-07, "loss": 0.5554, "step": 27082 }, { "epoch": 0.8300539414000245, "grad_norm": 2.0133963401944395, "learning_rate": 7.38466210568482e-07, "loss": 0.5439, "step": 27083 }, { "epoch": 0.8300845899227657, "grad_norm": 2.503945128851562, "learning_rate": 7.382066363672691e-07, "loss": 0.5403, "step": 27084 }, { "epoch": 0.8301152384455069, "grad_norm": 1.7074228580416035, "learning_rate": 7.379471041585979e-07, "loss": 0.5933, "step": 27085 }, { "epoch": 0.8301458869682481, "grad_norm": 2.069379097013741, "learning_rate": 7.376876139450217e-07, "loss": 0.6167, "step": 27086 }, { "epoch": 0.8301765354909894, "grad_norm": 1.7488548639355102, "learning_rate": 7.374281657291022e-07, "loss": 0.5211, "step": 27087 }, { "epoch": 0.8302071840137305, "grad_norm": 1.9956195445521938, "learning_rate": 7.371687595133942e-07, "loss": 0.5865, "step": 27088 }, { "epoch": 0.8302378325364718, "grad_norm": 1.8179270014484072, "learning_rate": 7.369093953004513e-07, "loss": 0.594, "step": 27089 }, { "epoch": 0.8302684810592129, "grad_norm": 1.8718349321444825, "learning_rate": 7.366500730928311e-07, "loss": 0.5745, "step": 27090 }, { "epoch": 0.8302991295819542, "grad_norm": 0.8199555911142876, "learning_rate": 7.363907928930903e-07, "loss": 0.4074, "step": 27091 }, { "epoch": 0.8303297781046953, "grad_norm": 1.7307927147637407, "learning_rate": 7.3613155470378e-07, "loss": 0.5387, "step": 27092 }, { "epoch": 0.8303604266274366, "grad_norm": 1.9158758129434357, "learning_rate": 7.358723585274569e-07, "loss": 0.5967, "step": 27093 }, { "epoch": 0.8303910751501777, "grad_norm": 2.06954386778551, "learning_rate": 7.356132043666741e-07, "loss": 0.5837, "step": 27094 }, { "epoch": 0.830421723672919, "grad_norm": 2.030652243092432, "learning_rate": 7.353540922239865e-07, "loss": 0.5604, "step": 27095 }, { "epoch": 0.8304523721956601, "grad_norm": 1.670532815559836, "learning_rate": 7.350950221019471e-07, "loss": 0.3932, "step": 27096 }, { "epoch": 0.8304830207184014, "grad_norm": 1.8700469814655827, "learning_rate": 7.348359940031046e-07, "loss": 0.5258, "step": 27097 }, { "epoch": 0.8305136692411426, "grad_norm": 2.0449181208840863, "learning_rate": 7.345770079300168e-07, "loss": 0.6434, "step": 27098 }, { "epoch": 0.8305443177638838, "grad_norm": 1.8170790559143042, "learning_rate": 7.343180638852332e-07, "loss": 0.5445, "step": 27099 }, { "epoch": 0.830574966286625, "grad_norm": 2.0357713594272617, "learning_rate": 7.340591618713039e-07, "loss": 0.6805, "step": 27100 }, { "epoch": 0.8306056148093662, "grad_norm": 2.090572491004008, "learning_rate": 7.338003018907808e-07, "loss": 0.5884, "step": 27101 }, { "epoch": 0.8306362633321074, "grad_norm": 2.2747747012101525, "learning_rate": 7.335414839462157e-07, "loss": 0.5826, "step": 27102 }, { "epoch": 0.8306669118548486, "grad_norm": 1.9210149694479501, "learning_rate": 7.332827080401584e-07, "loss": 0.6157, "step": 27103 }, { "epoch": 0.8306975603775898, "grad_norm": 1.7832703269832895, "learning_rate": 7.330239741751577e-07, "loss": 0.574, "step": 27104 }, { "epoch": 0.830728208900331, "grad_norm": 2.2186549601705714, "learning_rate": 7.327652823537628e-07, "loss": 0.5367, "step": 27105 }, { "epoch": 0.8307588574230722, "grad_norm": 1.7838312493728787, "learning_rate": 7.325066325785252e-07, "loss": 0.5449, "step": 27106 }, { "epoch": 0.8307895059458135, "grad_norm": 1.7312787923555664, "learning_rate": 7.322480248519915e-07, "loss": 0.5252, "step": 27107 }, { "epoch": 0.8308201544685546, "grad_norm": 1.8179298725468607, "learning_rate": 7.319894591767074e-07, "loss": 0.6424, "step": 27108 }, { "epoch": 0.8308508029912958, "grad_norm": 1.8253414511010373, "learning_rate": 7.317309355552254e-07, "loss": 0.5611, "step": 27109 }, { "epoch": 0.830881451514037, "grad_norm": 2.2667182472662684, "learning_rate": 7.314724539900913e-07, "loss": 0.6132, "step": 27110 }, { "epoch": 0.8309121000367782, "grad_norm": 1.7626361548017964, "learning_rate": 7.312140144838493e-07, "loss": 0.5564, "step": 27111 }, { "epoch": 0.8309427485595194, "grad_norm": 1.8790943136242626, "learning_rate": 7.30955617039048e-07, "loss": 0.558, "step": 27112 }, { "epoch": 0.8309733970822606, "grad_norm": 1.8257798635278786, "learning_rate": 7.306972616582336e-07, "loss": 0.5481, "step": 27113 }, { "epoch": 0.8310040456050019, "grad_norm": 1.9520604617656356, "learning_rate": 7.304389483439528e-07, "loss": 0.5966, "step": 27114 }, { "epoch": 0.831034694127743, "grad_norm": 1.9878898088402088, "learning_rate": 7.30180677098748e-07, "loss": 0.6142, "step": 27115 }, { "epoch": 0.8310653426504843, "grad_norm": 2.021501326336515, "learning_rate": 7.299224479251649e-07, "loss": 0.5919, "step": 27116 }, { "epoch": 0.8310959911732254, "grad_norm": 1.9464411983853434, "learning_rate": 7.296642608257503e-07, "loss": 0.6097, "step": 27117 }, { "epoch": 0.8311266396959667, "grad_norm": 1.9896298887228674, "learning_rate": 7.294061158030463e-07, "loss": 0.5885, "step": 27118 }, { "epoch": 0.8311572882187078, "grad_norm": 2.0731514018461192, "learning_rate": 7.291480128595951e-07, "loss": 0.5807, "step": 27119 }, { "epoch": 0.8311879367414491, "grad_norm": 2.0994682914604432, "learning_rate": 7.288899519979414e-07, "loss": 0.5559, "step": 27120 }, { "epoch": 0.8312185852641902, "grad_norm": 2.0856576568892327, "learning_rate": 7.286319332206276e-07, "loss": 0.5435, "step": 27121 }, { "epoch": 0.8312492337869315, "grad_norm": 1.8120001719419105, "learning_rate": 7.283739565301978e-07, "loss": 0.5564, "step": 27122 }, { "epoch": 0.8312798823096726, "grad_norm": 1.9263578967002972, "learning_rate": 7.281160219291911e-07, "loss": 0.5964, "step": 27123 }, { "epoch": 0.8313105308324139, "grad_norm": 0.8007490941791009, "learning_rate": 7.278581294201504e-07, "loss": 0.3952, "step": 27124 }, { "epoch": 0.8313411793551551, "grad_norm": 1.8539562289201372, "learning_rate": 7.276002790056175e-07, "loss": 0.556, "step": 27125 }, { "epoch": 0.8313718278778963, "grad_norm": 1.946409109670411, "learning_rate": 7.273424706881321e-07, "loss": 0.6542, "step": 27126 }, { "epoch": 0.8314024764006375, "grad_norm": 1.7279657218888103, "learning_rate": 7.270847044702322e-07, "loss": 0.6122, "step": 27127 }, { "epoch": 0.8314331249233787, "grad_norm": 1.9457108315561733, "learning_rate": 7.268269803544625e-07, "loss": 0.5664, "step": 27128 }, { "epoch": 0.8314637734461199, "grad_norm": 2.030195268683539, "learning_rate": 7.265692983433586e-07, "loss": 0.5485, "step": 27129 }, { "epoch": 0.8314944219688611, "grad_norm": 1.753175881135227, "learning_rate": 7.26311658439462e-07, "loss": 0.535, "step": 27130 }, { "epoch": 0.8315250704916023, "grad_norm": 1.7754014614832887, "learning_rate": 7.260540606453092e-07, "loss": 0.5679, "step": 27131 }, { "epoch": 0.8315557190143436, "grad_norm": 1.7872076583506076, "learning_rate": 7.257965049634391e-07, "loss": 0.4957, "step": 27132 }, { "epoch": 0.8315863675370847, "grad_norm": 1.8254578908015906, "learning_rate": 7.255389913963906e-07, "loss": 0.5975, "step": 27133 }, { "epoch": 0.831617016059826, "grad_norm": 1.864864359762946, "learning_rate": 7.252815199466994e-07, "loss": 0.4786, "step": 27134 }, { "epoch": 0.8316476645825671, "grad_norm": 1.9276790864530593, "learning_rate": 7.250240906169026e-07, "loss": 0.6146, "step": 27135 }, { "epoch": 0.8316783131053084, "grad_norm": 0.7793348567330975, "learning_rate": 7.247667034095385e-07, "loss": 0.4083, "step": 27136 }, { "epoch": 0.8317089616280495, "grad_norm": 1.8647357233504123, "learning_rate": 7.245093583271423e-07, "loss": 0.4812, "step": 27137 }, { "epoch": 0.8317396101507908, "grad_norm": 1.8970471538652114, "learning_rate": 7.242520553722466e-07, "loss": 0.6408, "step": 27138 }, { "epoch": 0.8317702586735319, "grad_norm": 1.6535594075325504, "learning_rate": 7.239947945473919e-07, "loss": 0.5253, "step": 27139 }, { "epoch": 0.8318009071962731, "grad_norm": 1.6591934144715157, "learning_rate": 7.237375758551096e-07, "loss": 0.5436, "step": 27140 }, { "epoch": 0.8318315557190143, "grad_norm": 0.7930853165369188, "learning_rate": 7.234803992979356e-07, "loss": 0.4151, "step": 27141 }, { "epoch": 0.8318622042417555, "grad_norm": 1.867869179658435, "learning_rate": 7.232232648784026e-07, "loss": 0.6369, "step": 27142 }, { "epoch": 0.8318928527644968, "grad_norm": 1.806361300529666, "learning_rate": 7.229661725990455e-07, "loss": 0.5179, "step": 27143 }, { "epoch": 0.8319235012872379, "grad_norm": 1.999394655185593, "learning_rate": 7.227091224623978e-07, "loss": 0.5245, "step": 27144 }, { "epoch": 0.8319541498099792, "grad_norm": 1.930972287255039, "learning_rate": 7.2245211447099e-07, "loss": 0.6381, "step": 27145 }, { "epoch": 0.8319847983327203, "grad_norm": 1.9490083237405118, "learning_rate": 7.221951486273566e-07, "loss": 0.5974, "step": 27146 }, { "epoch": 0.8320154468554616, "grad_norm": 2.065628333859209, "learning_rate": 7.219382249340296e-07, "loss": 0.5507, "step": 27147 }, { "epoch": 0.8320460953782027, "grad_norm": 1.883571525781592, "learning_rate": 7.216813433935388e-07, "loss": 0.6131, "step": 27148 }, { "epoch": 0.832076743900944, "grad_norm": 1.736149501924709, "learning_rate": 7.214245040084167e-07, "loss": 0.5946, "step": 27149 }, { "epoch": 0.8321073924236851, "grad_norm": 0.7875802094176125, "learning_rate": 7.211677067811945e-07, "loss": 0.4152, "step": 27150 }, { "epoch": 0.8321380409464264, "grad_norm": 0.8082260063558707, "learning_rate": 7.209109517144008e-07, "loss": 0.3989, "step": 27151 }, { "epoch": 0.8321686894691676, "grad_norm": 2.064988187477585, "learning_rate": 7.206542388105675e-07, "loss": 0.5382, "step": 27152 }, { "epoch": 0.8321993379919088, "grad_norm": 1.671177281757867, "learning_rate": 7.203975680722214e-07, "loss": 0.4914, "step": 27153 }, { "epoch": 0.83222998651465, "grad_norm": 0.8039508548345711, "learning_rate": 7.201409395018932e-07, "loss": 0.394, "step": 27154 }, { "epoch": 0.8322606350373912, "grad_norm": 1.6807950003418424, "learning_rate": 7.198843531021127e-07, "loss": 0.4554, "step": 27155 }, { "epoch": 0.8322912835601324, "grad_norm": 1.814605986436508, "learning_rate": 7.196278088754055e-07, "loss": 0.5893, "step": 27156 }, { "epoch": 0.8323219320828736, "grad_norm": 1.9912561042792816, "learning_rate": 7.193713068243007e-07, "loss": 0.6927, "step": 27157 }, { "epoch": 0.8323525806056148, "grad_norm": 1.946453615392313, "learning_rate": 7.191148469513265e-07, "loss": 0.5308, "step": 27158 }, { "epoch": 0.832383229128356, "grad_norm": 1.8054663365512238, "learning_rate": 7.188584292590084e-07, "loss": 0.5054, "step": 27159 }, { "epoch": 0.8324138776510972, "grad_norm": 1.7709216349279377, "learning_rate": 7.186020537498733e-07, "loss": 0.5289, "step": 27160 }, { "epoch": 0.8324445261738385, "grad_norm": 1.5197208381096825, "learning_rate": 7.183457204264488e-07, "loss": 0.5458, "step": 27161 }, { "epoch": 0.8324751746965796, "grad_norm": 1.7753841027319355, "learning_rate": 7.180894292912582e-07, "loss": 0.545, "step": 27162 }, { "epoch": 0.8325058232193209, "grad_norm": 1.977794013385123, "learning_rate": 7.178331803468292e-07, "loss": 0.6129, "step": 27163 }, { "epoch": 0.832536471742062, "grad_norm": 2.0680395207512663, "learning_rate": 7.175769735956844e-07, "loss": 0.6657, "step": 27164 }, { "epoch": 0.8325671202648033, "grad_norm": 1.6474868905101014, "learning_rate": 7.173208090403494e-07, "loss": 0.5431, "step": 27165 }, { "epoch": 0.8325977687875444, "grad_norm": 1.8302118108842047, "learning_rate": 7.170646866833491e-07, "loss": 0.5146, "step": 27166 }, { "epoch": 0.8326284173102857, "grad_norm": 1.5793151946496935, "learning_rate": 7.168086065272056e-07, "loss": 0.6292, "step": 27167 }, { "epoch": 0.8326590658330268, "grad_norm": 1.649982988017637, "learning_rate": 7.165525685744429e-07, "loss": 0.512, "step": 27168 }, { "epoch": 0.8326897143557681, "grad_norm": 2.105864401438965, "learning_rate": 7.162965728275844e-07, "loss": 0.6308, "step": 27169 }, { "epoch": 0.8327203628785093, "grad_norm": 1.9288706386079035, "learning_rate": 7.160406192891505e-07, "loss": 0.6194, "step": 27170 }, { "epoch": 0.8327510114012504, "grad_norm": 1.664259153899821, "learning_rate": 7.157847079616658e-07, "loss": 0.5632, "step": 27171 }, { "epoch": 0.8327816599239917, "grad_norm": 1.7347129321409573, "learning_rate": 7.1552883884765e-07, "loss": 0.5425, "step": 27172 }, { "epoch": 0.8328123084467328, "grad_norm": 1.9046965146823098, "learning_rate": 7.152730119496243e-07, "loss": 0.5944, "step": 27173 }, { "epoch": 0.8328429569694741, "grad_norm": 1.8917289593733175, "learning_rate": 7.150172272701111e-07, "loss": 0.508, "step": 27174 }, { "epoch": 0.8328736054922152, "grad_norm": 1.862695207672162, "learning_rate": 7.14761484811628e-07, "loss": 0.4917, "step": 27175 }, { "epoch": 0.8329042540149565, "grad_norm": 2.1003692592653587, "learning_rate": 7.145057845766967e-07, "loss": 0.6349, "step": 27176 }, { "epoch": 0.8329349025376976, "grad_norm": 1.8651624999535692, "learning_rate": 7.142501265678376e-07, "loss": 0.6283, "step": 27177 }, { "epoch": 0.8329655510604389, "grad_norm": 0.7730036837484717, "learning_rate": 7.139945107875673e-07, "loss": 0.3882, "step": 27178 }, { "epoch": 0.83299619958318, "grad_norm": 1.9844132523920655, "learning_rate": 7.137389372384063e-07, "loss": 0.541, "step": 27179 }, { "epoch": 0.8330268481059213, "grad_norm": 1.8998314420312776, "learning_rate": 7.134834059228729e-07, "loss": 0.5764, "step": 27180 }, { "epoch": 0.8330574966286625, "grad_norm": 2.1140805536411946, "learning_rate": 7.13227916843483e-07, "loss": 0.5656, "step": 27181 }, { "epoch": 0.8330881451514037, "grad_norm": 1.782205151091349, "learning_rate": 7.129724700027562e-07, "loss": 0.5623, "step": 27182 }, { "epoch": 0.8331187936741449, "grad_norm": 2.183339607432549, "learning_rate": 7.127170654032068e-07, "loss": 0.4354, "step": 27183 }, { "epoch": 0.8331494421968861, "grad_norm": 1.8177434274691884, "learning_rate": 7.124617030473552e-07, "loss": 0.516, "step": 27184 }, { "epoch": 0.8331800907196273, "grad_norm": 0.8138781703184309, "learning_rate": 7.122063829377151e-07, "loss": 0.404, "step": 27185 }, { "epoch": 0.8332107392423685, "grad_norm": 2.0868799760811596, "learning_rate": 7.119511050768019e-07, "loss": 0.5801, "step": 27186 }, { "epoch": 0.8332413877651097, "grad_norm": 1.7071083759045285, "learning_rate": 7.116958694671316e-07, "loss": 0.5716, "step": 27187 }, { "epoch": 0.833272036287851, "grad_norm": 2.0426216864576374, "learning_rate": 7.114406761112197e-07, "loss": 0.6412, "step": 27188 }, { "epoch": 0.8333026848105921, "grad_norm": 1.8190189966529262, "learning_rate": 7.111855250115795e-07, "loss": 0.4448, "step": 27189 }, { "epoch": 0.8333333333333334, "grad_norm": 0.7693631923291128, "learning_rate": 7.109304161707254e-07, "loss": 0.4013, "step": 27190 }, { "epoch": 0.8333639818560745, "grad_norm": 0.8749379288949245, "learning_rate": 7.106753495911727e-07, "loss": 0.395, "step": 27191 }, { "epoch": 0.8333946303788158, "grad_norm": 2.109228270510071, "learning_rate": 7.104203252754322e-07, "loss": 0.5677, "step": 27192 }, { "epoch": 0.8334252789015569, "grad_norm": 1.852229847912426, "learning_rate": 7.101653432260186e-07, "loss": 0.5654, "step": 27193 }, { "epoch": 0.8334559274242982, "grad_norm": 1.8781337720008306, "learning_rate": 7.099104034454413e-07, "loss": 0.5221, "step": 27194 }, { "epoch": 0.8334865759470393, "grad_norm": 1.8401838279397698, "learning_rate": 7.096555059362164e-07, "loss": 0.6366, "step": 27195 }, { "epoch": 0.8335172244697806, "grad_norm": 1.88015983749179, "learning_rate": 7.094006507008539e-07, "loss": 0.5662, "step": 27196 }, { "epoch": 0.8335478729925218, "grad_norm": 1.7231195044526868, "learning_rate": 7.091458377418636e-07, "loss": 0.6122, "step": 27197 }, { "epoch": 0.833578521515263, "grad_norm": 1.6652703442562318, "learning_rate": 7.088910670617572e-07, "loss": 0.5262, "step": 27198 }, { "epoch": 0.8336091700380042, "grad_norm": 1.8515555730266526, "learning_rate": 7.086363386630457e-07, "loss": 0.5788, "step": 27199 }, { "epoch": 0.8336398185607454, "grad_norm": 1.7118144473627257, "learning_rate": 7.083816525482373e-07, "loss": 0.4739, "step": 27200 }, { "epoch": 0.8336704670834866, "grad_norm": 1.9906714848885438, "learning_rate": 7.081270087198428e-07, "loss": 0.6243, "step": 27201 }, { "epoch": 0.8337011156062277, "grad_norm": 1.6947994786905953, "learning_rate": 7.078724071803711e-07, "loss": 0.4703, "step": 27202 }, { "epoch": 0.833731764128969, "grad_norm": 1.9035828979819267, "learning_rate": 7.07617847932332e-07, "loss": 0.5243, "step": 27203 }, { "epoch": 0.8337624126517101, "grad_norm": 1.9150572377698336, "learning_rate": 7.073633309782319e-07, "loss": 0.5494, "step": 27204 }, { "epoch": 0.8337930611744514, "grad_norm": 1.8305992896122196, "learning_rate": 7.071088563205774e-07, "loss": 0.5394, "step": 27205 }, { "epoch": 0.8338237096971925, "grad_norm": 0.7823471062067606, "learning_rate": 7.068544239618802e-07, "loss": 0.3904, "step": 27206 }, { "epoch": 0.8338543582199338, "grad_norm": 1.8639509179829747, "learning_rate": 7.066000339046442e-07, "loss": 0.5885, "step": 27207 }, { "epoch": 0.833885006742675, "grad_norm": 1.7151981317768796, "learning_rate": 7.063456861513756e-07, "loss": 0.6436, "step": 27208 }, { "epoch": 0.8339156552654162, "grad_norm": 1.8883357288634397, "learning_rate": 7.060913807045816e-07, "loss": 0.5089, "step": 27209 }, { "epoch": 0.8339463037881574, "grad_norm": 1.5699571946928634, "learning_rate": 7.058371175667683e-07, "loss": 0.5062, "step": 27210 }, { "epoch": 0.8339769523108986, "grad_norm": 5.936052494293772, "learning_rate": 7.055828967404415e-07, "loss": 0.614, "step": 27211 }, { "epoch": 0.8340076008336398, "grad_norm": 2.0474363114250704, "learning_rate": 7.053287182281038e-07, "loss": 0.5813, "step": 27212 }, { "epoch": 0.834038249356381, "grad_norm": 1.8971442853007554, "learning_rate": 7.050745820322613e-07, "loss": 0.5402, "step": 27213 }, { "epoch": 0.8340688978791222, "grad_norm": 2.05452754491487, "learning_rate": 7.048204881554188e-07, "loss": 0.4354, "step": 27214 }, { "epoch": 0.8340995464018635, "grad_norm": 0.7903643229529651, "learning_rate": 7.045664366000787e-07, "loss": 0.405, "step": 27215 }, { "epoch": 0.8341301949246046, "grad_norm": 1.9768778762570782, "learning_rate": 7.043124273687441e-07, "loss": 0.6225, "step": 27216 }, { "epoch": 0.8341608434473459, "grad_norm": 0.7486962573427021, "learning_rate": 7.040584604639178e-07, "loss": 0.3691, "step": 27217 }, { "epoch": 0.834191491970087, "grad_norm": 0.7857064343197719, "learning_rate": 7.038045358881041e-07, "loss": 0.3911, "step": 27218 }, { "epoch": 0.8342221404928283, "grad_norm": 2.174428448677365, "learning_rate": 7.03550653643802e-07, "loss": 0.6611, "step": 27219 }, { "epoch": 0.8342527890155694, "grad_norm": 1.9265422599014839, "learning_rate": 7.03296813733515e-07, "loss": 0.5263, "step": 27220 }, { "epoch": 0.8342834375383107, "grad_norm": 1.7148267479984358, "learning_rate": 7.030430161597435e-07, "loss": 0.5438, "step": 27221 }, { "epoch": 0.8343140860610518, "grad_norm": 1.9600182838978506, "learning_rate": 7.027892609249903e-07, "loss": 0.6981, "step": 27222 }, { "epoch": 0.8343447345837931, "grad_norm": 1.8621479780501682, "learning_rate": 7.025355480317536e-07, "loss": 0.5499, "step": 27223 }, { "epoch": 0.8343753831065343, "grad_norm": 1.9880005395875777, "learning_rate": 7.022818774825313e-07, "loss": 0.6346, "step": 27224 }, { "epoch": 0.8344060316292755, "grad_norm": 1.8460165916292897, "learning_rate": 7.020282492798275e-07, "loss": 0.5667, "step": 27225 }, { "epoch": 0.8344366801520167, "grad_norm": 2.0047130077884194, "learning_rate": 7.017746634261391e-07, "loss": 0.524, "step": 27226 }, { "epoch": 0.8344673286747579, "grad_norm": 1.697822669763398, "learning_rate": 7.015211199239641e-07, "loss": 0.4613, "step": 27227 }, { "epoch": 0.8344979771974991, "grad_norm": 1.7298512019821846, "learning_rate": 7.012676187758006e-07, "loss": 0.5516, "step": 27228 }, { "epoch": 0.8345286257202403, "grad_norm": 1.924618470870334, "learning_rate": 7.010141599841474e-07, "loss": 0.6476, "step": 27229 }, { "epoch": 0.8345592742429815, "grad_norm": 1.8727421156678832, "learning_rate": 7.00760743551503e-07, "loss": 0.6073, "step": 27230 }, { "epoch": 0.8345899227657227, "grad_norm": 2.3762161366162298, "learning_rate": 7.005073694803615e-07, "loss": 0.6143, "step": 27231 }, { "epoch": 0.8346205712884639, "grad_norm": 2.0148510671722577, "learning_rate": 7.002540377732215e-07, "loss": 0.6151, "step": 27232 }, { "epoch": 0.834651219811205, "grad_norm": 1.8819219553480333, "learning_rate": 7.000007484325788e-07, "loss": 0.5414, "step": 27233 }, { "epoch": 0.8346818683339463, "grad_norm": 2.031529198326926, "learning_rate": 6.997475014609295e-07, "loss": 0.5707, "step": 27234 }, { "epoch": 0.8347125168566875, "grad_norm": 1.9809638227501372, "learning_rate": 6.994942968607665e-07, "loss": 0.7184, "step": 27235 }, { "epoch": 0.8347431653794287, "grad_norm": 1.6923570532500285, "learning_rate": 6.992411346345884e-07, "loss": 0.4776, "step": 27236 }, { "epoch": 0.8347738139021699, "grad_norm": 1.90625586661087, "learning_rate": 6.989880147848865e-07, "loss": 0.5784, "step": 27237 }, { "epoch": 0.8348044624249111, "grad_norm": 0.7693777101695197, "learning_rate": 6.987349373141572e-07, "loss": 0.3943, "step": 27238 }, { "epoch": 0.8348351109476523, "grad_norm": 1.9876387242254074, "learning_rate": 6.984819022248923e-07, "loss": 0.5759, "step": 27239 }, { "epoch": 0.8348657594703935, "grad_norm": 2.128371977969533, "learning_rate": 6.982289095195855e-07, "loss": 0.6689, "step": 27240 }, { "epoch": 0.8348964079931347, "grad_norm": 1.8253893514179806, "learning_rate": 6.979759592007312e-07, "loss": 0.5725, "step": 27241 }, { "epoch": 0.834927056515876, "grad_norm": 1.7878063230573176, "learning_rate": 6.977230512708194e-07, "loss": 0.5866, "step": 27242 }, { "epoch": 0.8349577050386171, "grad_norm": 0.8652139336508525, "learning_rate": 6.974701857323429e-07, "loss": 0.4061, "step": 27243 }, { "epoch": 0.8349883535613584, "grad_norm": 1.830943706130405, "learning_rate": 6.972173625877949e-07, "loss": 0.6755, "step": 27244 }, { "epoch": 0.8350190020840995, "grad_norm": 1.8100938231711683, "learning_rate": 6.969645818396654e-07, "loss": 0.5419, "step": 27245 }, { "epoch": 0.8350496506068408, "grad_norm": 0.740892813518741, "learning_rate": 6.967118434904424e-07, "loss": 0.3695, "step": 27246 }, { "epoch": 0.8350802991295819, "grad_norm": 1.9851882180861564, "learning_rate": 6.964591475426208e-07, "loss": 0.512, "step": 27247 }, { "epoch": 0.8351109476523232, "grad_norm": 1.7829387792386227, "learning_rate": 6.962064939986868e-07, "loss": 0.5993, "step": 27248 }, { "epoch": 0.8351415961750643, "grad_norm": 2.025217885887236, "learning_rate": 6.959538828611329e-07, "loss": 0.5003, "step": 27249 }, { "epoch": 0.8351722446978056, "grad_norm": 1.6188055320392496, "learning_rate": 6.95701314132446e-07, "loss": 0.4774, "step": 27250 }, { "epoch": 0.8352028932205467, "grad_norm": 1.7106268833908407, "learning_rate": 6.954487878151145e-07, "loss": 0.52, "step": 27251 }, { "epoch": 0.835233541743288, "grad_norm": 0.8243267006094858, "learning_rate": 6.951963039116288e-07, "loss": 0.3941, "step": 27252 }, { "epoch": 0.8352641902660292, "grad_norm": 1.809520145881829, "learning_rate": 6.949438624244748e-07, "loss": 0.5505, "step": 27253 }, { "epoch": 0.8352948387887704, "grad_norm": 2.0819951441040265, "learning_rate": 6.946914633561397e-07, "loss": 0.5934, "step": 27254 }, { "epoch": 0.8353254873115116, "grad_norm": 2.017684426229085, "learning_rate": 6.944391067091127e-07, "loss": 0.5042, "step": 27255 }, { "epoch": 0.8353561358342528, "grad_norm": 1.7281857635098408, "learning_rate": 6.941867924858775e-07, "loss": 0.4863, "step": 27256 }, { "epoch": 0.835386784356994, "grad_norm": 1.8598532023914183, "learning_rate": 6.93934520688922e-07, "loss": 0.4608, "step": 27257 }, { "epoch": 0.8354174328797352, "grad_norm": 0.7587459074892586, "learning_rate": 6.936822913207319e-07, "loss": 0.3775, "step": 27258 }, { "epoch": 0.8354480814024764, "grad_norm": 1.8433183231578194, "learning_rate": 6.93430104383791e-07, "loss": 0.5048, "step": 27259 }, { "epoch": 0.8354787299252177, "grad_norm": 0.8635099911774682, "learning_rate": 6.931779598805865e-07, "loss": 0.4021, "step": 27260 }, { "epoch": 0.8355093784479588, "grad_norm": 1.8801751386233048, "learning_rate": 6.929258578136005e-07, "loss": 0.5814, "step": 27261 }, { "epoch": 0.8355400269707001, "grad_norm": 0.8061463077394829, "learning_rate": 6.926737981853177e-07, "loss": 0.4234, "step": 27262 }, { "epoch": 0.8355706754934412, "grad_norm": 1.9337414848145036, "learning_rate": 6.924217809982231e-07, "loss": 0.5576, "step": 27263 }, { "epoch": 0.8356013240161824, "grad_norm": 1.8044130033690924, "learning_rate": 6.921698062547983e-07, "loss": 0.6371, "step": 27264 }, { "epoch": 0.8356319725389236, "grad_norm": 1.901211422836862, "learning_rate": 6.919178739575261e-07, "loss": 0.5315, "step": 27265 }, { "epoch": 0.8356626210616648, "grad_norm": 2.060088795472128, "learning_rate": 6.916659841088908e-07, "loss": 0.5958, "step": 27266 }, { "epoch": 0.835693269584406, "grad_norm": 2.0204329426626675, "learning_rate": 6.914141367113714e-07, "loss": 0.5986, "step": 27267 }, { "epoch": 0.8357239181071472, "grad_norm": 1.8634070846804855, "learning_rate": 6.911623317674521e-07, "loss": 0.5855, "step": 27268 }, { "epoch": 0.8357545666298885, "grad_norm": 0.7819448911523152, "learning_rate": 6.90910569279612e-07, "loss": 0.4182, "step": 27269 }, { "epoch": 0.8357852151526296, "grad_norm": 1.861173236318662, "learning_rate": 6.906588492503325e-07, "loss": 0.486, "step": 27270 }, { "epoch": 0.8358158636753709, "grad_norm": 0.7803463319905231, "learning_rate": 6.904071716820948e-07, "loss": 0.3831, "step": 27271 }, { "epoch": 0.835846512198112, "grad_norm": 2.281489035477965, "learning_rate": 6.901555365773766e-07, "loss": 0.5714, "step": 27272 }, { "epoch": 0.8358771607208533, "grad_norm": 0.8305102209394917, "learning_rate": 6.89903943938659e-07, "loss": 0.3915, "step": 27273 }, { "epoch": 0.8359078092435944, "grad_norm": 1.8514609187250444, "learning_rate": 6.896523937684219e-07, "loss": 0.5401, "step": 27274 }, { "epoch": 0.8359384577663357, "grad_norm": 2.0189552645983966, "learning_rate": 6.894008860691415e-07, "loss": 0.6578, "step": 27275 }, { "epoch": 0.8359691062890768, "grad_norm": 2.0978559795693372, "learning_rate": 6.891494208432964e-07, "loss": 0.6903, "step": 27276 }, { "epoch": 0.8359997548118181, "grad_norm": 1.92175896739592, "learning_rate": 6.888979980933669e-07, "loss": 0.5037, "step": 27277 }, { "epoch": 0.8360304033345592, "grad_norm": 1.8244348546493103, "learning_rate": 6.88646617821827e-07, "loss": 0.6405, "step": 27278 }, { "epoch": 0.8360610518573005, "grad_norm": 1.9639379690025092, "learning_rate": 6.883952800311561e-07, "loss": 0.636, "step": 27279 }, { "epoch": 0.8360917003800417, "grad_norm": 0.8114342124163293, "learning_rate": 6.881439847238292e-07, "loss": 0.3922, "step": 27280 }, { "epoch": 0.8361223489027829, "grad_norm": 1.8200758059699478, "learning_rate": 6.878927319023221e-07, "loss": 0.5953, "step": 27281 }, { "epoch": 0.8361529974255241, "grad_norm": 1.726198001229524, "learning_rate": 6.876415215691124e-07, "loss": 0.5792, "step": 27282 }, { "epoch": 0.8361836459482653, "grad_norm": 2.176184216131103, "learning_rate": 6.873903537266735e-07, "loss": 0.6028, "step": 27283 }, { "epoch": 0.8362142944710065, "grad_norm": 0.7992713667114314, "learning_rate": 6.871392283774808e-07, "loss": 0.3865, "step": 27284 }, { "epoch": 0.8362449429937477, "grad_norm": 1.7728877811509014, "learning_rate": 6.868881455240095e-07, "loss": 0.5161, "step": 27285 }, { "epoch": 0.8362755915164889, "grad_norm": 2.1243300740249222, "learning_rate": 6.866371051687321e-07, "loss": 0.5824, "step": 27286 }, { "epoch": 0.8363062400392302, "grad_norm": 2.045954629707224, "learning_rate": 6.863861073141226e-07, "loss": 0.5713, "step": 27287 }, { "epoch": 0.8363368885619713, "grad_norm": 1.9372624434638461, "learning_rate": 6.861351519626558e-07, "loss": 0.5527, "step": 27288 }, { "epoch": 0.8363675370847126, "grad_norm": 1.7611734908483645, "learning_rate": 6.858842391168019e-07, "loss": 0.5291, "step": 27289 }, { "epoch": 0.8363981856074537, "grad_norm": 1.6298337331750052, "learning_rate": 6.856333687790357e-07, "loss": 0.6243, "step": 27290 }, { "epoch": 0.836428834130195, "grad_norm": 1.7815040078148647, "learning_rate": 6.853825409518266e-07, "loss": 0.5369, "step": 27291 }, { "epoch": 0.8364594826529361, "grad_norm": 0.7834252973006502, "learning_rate": 6.851317556376469e-07, "loss": 0.4016, "step": 27292 }, { "epoch": 0.8364901311756774, "grad_norm": 2.166208801109267, "learning_rate": 6.848810128389699e-07, "loss": 0.5118, "step": 27293 }, { "epoch": 0.8365207796984185, "grad_norm": 1.9901589809572249, "learning_rate": 6.846303125582626e-07, "loss": 0.6316, "step": 27294 }, { "epoch": 0.8365514282211597, "grad_norm": 1.99488589213486, "learning_rate": 6.84379654797997e-07, "loss": 0.6641, "step": 27295 }, { "epoch": 0.836582076743901, "grad_norm": 2.5065148888324202, "learning_rate": 6.841290395606443e-07, "loss": 0.6066, "step": 27296 }, { "epoch": 0.8366127252666421, "grad_norm": 1.9185496787234433, "learning_rate": 6.838784668486708e-07, "loss": 0.6355, "step": 27297 }, { "epoch": 0.8366433737893834, "grad_norm": 0.7567154542075465, "learning_rate": 6.836279366645477e-07, "loss": 0.3908, "step": 27298 }, { "epoch": 0.8366740223121245, "grad_norm": 1.6715213278577057, "learning_rate": 6.833774490107437e-07, "loss": 0.5536, "step": 27299 }, { "epoch": 0.8367046708348658, "grad_norm": 2.538747918141515, "learning_rate": 6.831270038897253e-07, "loss": 0.5706, "step": 27300 }, { "epoch": 0.8367353193576069, "grad_norm": 2.0251945906884368, "learning_rate": 6.828766013039617e-07, "loss": 0.6604, "step": 27301 }, { "epoch": 0.8367659678803482, "grad_norm": 1.918182011984863, "learning_rate": 6.826262412559176e-07, "loss": 0.5871, "step": 27302 }, { "epoch": 0.8367966164030893, "grad_norm": 0.830961535416196, "learning_rate": 6.823759237480643e-07, "loss": 0.4214, "step": 27303 }, { "epoch": 0.8368272649258306, "grad_norm": 1.7373247061174752, "learning_rate": 6.821256487828654e-07, "loss": 0.5275, "step": 27304 }, { "epoch": 0.8368579134485717, "grad_norm": 2.1980689779000095, "learning_rate": 6.818754163627861e-07, "loss": 0.5796, "step": 27305 }, { "epoch": 0.836888561971313, "grad_norm": 2.041854791923555, "learning_rate": 6.816252264902934e-07, "loss": 0.5353, "step": 27306 }, { "epoch": 0.8369192104940542, "grad_norm": 1.7810256433312575, "learning_rate": 6.813750791678531e-07, "loss": 0.5995, "step": 27307 }, { "epoch": 0.8369498590167954, "grad_norm": 1.8909829079860319, "learning_rate": 6.811249743979281e-07, "loss": 0.529, "step": 27308 }, { "epoch": 0.8369805075395366, "grad_norm": 1.6485860832398513, "learning_rate": 6.808749121829839e-07, "loss": 0.5376, "step": 27309 }, { "epoch": 0.8370111560622778, "grad_norm": 1.6345686734578417, "learning_rate": 6.806248925254844e-07, "loss": 0.5602, "step": 27310 }, { "epoch": 0.837041804585019, "grad_norm": 0.807778138943022, "learning_rate": 6.803749154278938e-07, "loss": 0.398, "step": 27311 }, { "epoch": 0.8370724531077602, "grad_norm": 0.7883831991622529, "learning_rate": 6.801249808926741e-07, "loss": 0.3731, "step": 27312 }, { "epoch": 0.8371031016305014, "grad_norm": 1.5428479154102488, "learning_rate": 6.798750889222877e-07, "loss": 0.5195, "step": 27313 }, { "epoch": 0.8371337501532427, "grad_norm": 1.8283788543743011, "learning_rate": 6.796252395191971e-07, "loss": 0.5824, "step": 27314 }, { "epoch": 0.8371643986759838, "grad_norm": 0.7734648020491443, "learning_rate": 6.793754326858659e-07, "loss": 0.3824, "step": 27315 }, { "epoch": 0.8371950471987251, "grad_norm": 0.8336272436279156, "learning_rate": 6.791256684247521e-07, "loss": 0.4006, "step": 27316 }, { "epoch": 0.8372256957214662, "grad_norm": 1.9509350994970835, "learning_rate": 6.788759467383194e-07, "loss": 0.5799, "step": 27317 }, { "epoch": 0.8372563442442075, "grad_norm": 1.8542881465617056, "learning_rate": 6.786262676290284e-07, "loss": 0.6215, "step": 27318 }, { "epoch": 0.8372869927669486, "grad_norm": 1.8497585155777663, "learning_rate": 6.783766310993378e-07, "loss": 0.6138, "step": 27319 }, { "epoch": 0.8373176412896899, "grad_norm": 1.962658246279553, "learning_rate": 6.781270371517084e-07, "loss": 0.6404, "step": 27320 }, { "epoch": 0.837348289812431, "grad_norm": 1.8641449296869432, "learning_rate": 6.778774857885973e-07, "loss": 0.6323, "step": 27321 }, { "epoch": 0.8373789383351723, "grad_norm": 1.8529135336133296, "learning_rate": 6.776279770124677e-07, "loss": 0.5511, "step": 27322 }, { "epoch": 0.8374095868579134, "grad_norm": 2.061693010652835, "learning_rate": 6.773785108257752e-07, "loss": 0.5703, "step": 27323 }, { "epoch": 0.8374402353806547, "grad_norm": 0.7675139055937162, "learning_rate": 6.771290872309771e-07, "loss": 0.4008, "step": 27324 }, { "epoch": 0.8374708839033959, "grad_norm": 0.8343115887320661, "learning_rate": 6.768797062305321e-07, "loss": 0.4154, "step": 27325 }, { "epoch": 0.837501532426137, "grad_norm": 1.6991443270136999, "learning_rate": 6.766303678268987e-07, "loss": 0.633, "step": 27326 }, { "epoch": 0.8375321809488783, "grad_norm": 2.080483574177966, "learning_rate": 6.763810720225311e-07, "loss": 0.5038, "step": 27327 }, { "epoch": 0.8375628294716194, "grad_norm": 0.7521936685693151, "learning_rate": 6.761318188198873e-07, "loss": 0.3749, "step": 27328 }, { "epoch": 0.8375934779943607, "grad_norm": 0.8050233488027767, "learning_rate": 6.758826082214232e-07, "loss": 0.4112, "step": 27329 }, { "epoch": 0.8376241265171018, "grad_norm": 1.8430827250123227, "learning_rate": 6.75633440229595e-07, "loss": 0.57, "step": 27330 }, { "epoch": 0.8376547750398431, "grad_norm": 1.9486956565320945, "learning_rate": 6.75384314846857e-07, "loss": 0.6776, "step": 27331 }, { "epoch": 0.8376854235625842, "grad_norm": 0.7948440744340078, "learning_rate": 6.751352320756615e-07, "loss": 0.3947, "step": 27332 }, { "epoch": 0.8377160720853255, "grad_norm": 1.9720075952074678, "learning_rate": 6.748861919184673e-07, "loss": 0.5968, "step": 27333 }, { "epoch": 0.8377467206080667, "grad_norm": 1.8370532529321006, "learning_rate": 6.746371943777263e-07, "loss": 0.5163, "step": 27334 }, { "epoch": 0.8377773691308079, "grad_norm": 1.8004812107481303, "learning_rate": 6.743882394558903e-07, "loss": 0.5412, "step": 27335 }, { "epoch": 0.8378080176535491, "grad_norm": 1.6357612635358842, "learning_rate": 6.741393271554142e-07, "loss": 0.4821, "step": 27336 }, { "epoch": 0.8378386661762903, "grad_norm": 0.8648215648132047, "learning_rate": 6.738904574787497e-07, "loss": 0.3895, "step": 27337 }, { "epoch": 0.8378693146990315, "grad_norm": 1.7152649351925995, "learning_rate": 6.73641630428351e-07, "loss": 0.5608, "step": 27338 }, { "epoch": 0.8378999632217727, "grad_norm": 1.8170270811254734, "learning_rate": 6.733928460066669e-07, "loss": 0.6068, "step": 27339 }, { "epoch": 0.8379306117445139, "grad_norm": 1.8624441836623333, "learning_rate": 6.731441042161496e-07, "loss": 0.5853, "step": 27340 }, { "epoch": 0.8379612602672551, "grad_norm": 1.8157389030888604, "learning_rate": 6.728954050592523e-07, "loss": 0.6393, "step": 27341 }, { "epoch": 0.8379919087899963, "grad_norm": 0.7765985259349343, "learning_rate": 6.726467485384236e-07, "loss": 0.378, "step": 27342 }, { "epoch": 0.8380225573127376, "grad_norm": 1.7125716872988415, "learning_rate": 6.723981346561115e-07, "loss": 0.5318, "step": 27343 }, { "epoch": 0.8380532058354787, "grad_norm": 2.134616715211792, "learning_rate": 6.721495634147696e-07, "loss": 0.5697, "step": 27344 }, { "epoch": 0.83808385435822, "grad_norm": 1.7786747036261148, "learning_rate": 6.719010348168459e-07, "loss": 0.5718, "step": 27345 }, { "epoch": 0.8381145028809611, "grad_norm": 2.0091284642709617, "learning_rate": 6.716525488647874e-07, "loss": 0.624, "step": 27346 }, { "epoch": 0.8381451514037024, "grad_norm": 1.843261768290642, "learning_rate": 6.714041055610437e-07, "loss": 0.6434, "step": 27347 }, { "epoch": 0.8381757999264435, "grad_norm": 2.1253017418804236, "learning_rate": 6.71155704908063e-07, "loss": 0.6353, "step": 27348 }, { "epoch": 0.8382064484491848, "grad_norm": 1.8823654545390067, "learning_rate": 6.709073469082938e-07, "loss": 0.491, "step": 27349 }, { "epoch": 0.8382370969719259, "grad_norm": 1.8416724732626844, "learning_rate": 6.706590315641809e-07, "loss": 0.5448, "step": 27350 }, { "epoch": 0.8382677454946672, "grad_norm": 1.9523878127805037, "learning_rate": 6.704107588781722e-07, "loss": 0.5858, "step": 27351 }, { "epoch": 0.8382983940174084, "grad_norm": 1.9190466930017434, "learning_rate": 6.701625288527152e-07, "loss": 0.6258, "step": 27352 }, { "epoch": 0.8383290425401496, "grad_norm": 1.8209947339703387, "learning_rate": 6.699143414902548e-07, "loss": 0.5611, "step": 27353 }, { "epoch": 0.8383596910628908, "grad_norm": 1.6885898346480057, "learning_rate": 6.696661967932344e-07, "loss": 0.5874, "step": 27354 }, { "epoch": 0.838390339585632, "grad_norm": 2.000887543005191, "learning_rate": 6.694180947641027e-07, "loss": 0.6009, "step": 27355 }, { "epoch": 0.8384209881083732, "grad_norm": 1.8642869318858795, "learning_rate": 6.691700354053016e-07, "loss": 0.5743, "step": 27356 }, { "epoch": 0.8384516366311143, "grad_norm": 1.9062556011103744, "learning_rate": 6.689220187192774e-07, "loss": 0.5536, "step": 27357 }, { "epoch": 0.8384822851538556, "grad_norm": 1.764445343052996, "learning_rate": 6.686740447084711e-07, "loss": 0.4657, "step": 27358 }, { "epoch": 0.8385129336765967, "grad_norm": 1.856235564544759, "learning_rate": 6.684261133753283e-07, "loss": 0.5677, "step": 27359 }, { "epoch": 0.838543582199338, "grad_norm": 1.9481585950167368, "learning_rate": 6.681782247222923e-07, "loss": 0.5841, "step": 27360 }, { "epoch": 0.8385742307220792, "grad_norm": 1.8914919246171034, "learning_rate": 6.679303787518032e-07, "loss": 0.5548, "step": 27361 }, { "epoch": 0.8386048792448204, "grad_norm": 1.912325355667656, "learning_rate": 6.676825754663046e-07, "loss": 0.557, "step": 27362 }, { "epoch": 0.8386355277675616, "grad_norm": 0.79939172376257, "learning_rate": 6.674348148682391e-07, "loss": 0.3974, "step": 27363 }, { "epoch": 0.8386661762903028, "grad_norm": 1.9858427626946327, "learning_rate": 6.671870969600458e-07, "loss": 0.6826, "step": 27364 }, { "epoch": 0.838696824813044, "grad_norm": 0.7863002808539167, "learning_rate": 6.669394217441677e-07, "loss": 0.4034, "step": 27365 }, { "epoch": 0.8387274733357852, "grad_norm": 2.168434527181981, "learning_rate": 6.666917892230434e-07, "loss": 0.5727, "step": 27366 }, { "epoch": 0.8387581218585264, "grad_norm": 2.0065262243225246, "learning_rate": 6.664441993991134e-07, "loss": 0.7276, "step": 27367 }, { "epoch": 0.8387887703812676, "grad_norm": 1.9176305749408051, "learning_rate": 6.661966522748187e-07, "loss": 0.4956, "step": 27368 }, { "epoch": 0.8388194189040088, "grad_norm": 1.9222356284327002, "learning_rate": 6.659491478525959e-07, "loss": 0.6156, "step": 27369 }, { "epoch": 0.8388500674267501, "grad_norm": 1.7459982212876128, "learning_rate": 6.657016861348852e-07, "loss": 0.6049, "step": 27370 }, { "epoch": 0.8388807159494912, "grad_norm": 1.796519241856739, "learning_rate": 6.654542671241254e-07, "loss": 0.542, "step": 27371 }, { "epoch": 0.8389113644722325, "grad_norm": 0.7913699514589612, "learning_rate": 6.652068908227543e-07, "loss": 0.3895, "step": 27372 }, { "epoch": 0.8389420129949736, "grad_norm": 1.9236671468039603, "learning_rate": 6.649595572332068e-07, "loss": 0.6079, "step": 27373 }, { "epoch": 0.8389726615177149, "grad_norm": 1.6156435766912853, "learning_rate": 6.647122663579236e-07, "loss": 0.5525, "step": 27374 }, { "epoch": 0.839003310040456, "grad_norm": 2.0045537873839256, "learning_rate": 6.644650181993389e-07, "loss": 0.5461, "step": 27375 }, { "epoch": 0.8390339585631973, "grad_norm": 1.8291969424122552, "learning_rate": 6.642178127598908e-07, "loss": 0.6734, "step": 27376 }, { "epoch": 0.8390646070859384, "grad_norm": 1.9531069775571328, "learning_rate": 6.639706500420124e-07, "loss": 0.5587, "step": 27377 }, { "epoch": 0.8390952556086797, "grad_norm": 2.141781412740373, "learning_rate": 6.637235300481409e-07, "loss": 0.5682, "step": 27378 }, { "epoch": 0.8391259041314209, "grad_norm": 2.0066956445099766, "learning_rate": 6.63476452780712e-07, "loss": 0.4861, "step": 27379 }, { "epoch": 0.8391565526541621, "grad_norm": 0.7935202617263462, "learning_rate": 6.632294182421584e-07, "loss": 0.4089, "step": 27380 }, { "epoch": 0.8391872011769033, "grad_norm": 1.7359260860023258, "learning_rate": 6.629824264349144e-07, "loss": 0.549, "step": 27381 }, { "epoch": 0.8392178496996445, "grad_norm": 1.9802353152777172, "learning_rate": 6.627354773614159e-07, "loss": 0.5691, "step": 27382 }, { "epoch": 0.8392484982223857, "grad_norm": 1.7427574329022775, "learning_rate": 6.624885710240931e-07, "loss": 0.6125, "step": 27383 }, { "epoch": 0.8392791467451269, "grad_norm": 1.8855209724902138, "learning_rate": 6.6224170742538e-07, "loss": 0.5418, "step": 27384 }, { "epoch": 0.8393097952678681, "grad_norm": 1.8350375124018412, "learning_rate": 6.619948865677107e-07, "loss": 0.5469, "step": 27385 }, { "epoch": 0.8393404437906093, "grad_norm": 1.8116306657963976, "learning_rate": 6.617481084535143e-07, "loss": 0.5606, "step": 27386 }, { "epoch": 0.8393710923133505, "grad_norm": 1.9609173820468575, "learning_rate": 6.615013730852249e-07, "loss": 0.5922, "step": 27387 }, { "epoch": 0.8394017408360916, "grad_norm": 0.772749483657047, "learning_rate": 6.612546804652714e-07, "loss": 0.3857, "step": 27388 }, { "epoch": 0.8394323893588329, "grad_norm": 0.8127450458393577, "learning_rate": 6.610080305960859e-07, "loss": 0.4057, "step": 27389 }, { "epoch": 0.8394630378815741, "grad_norm": 2.2104939807673216, "learning_rate": 6.607614234800997e-07, "loss": 0.5583, "step": 27390 }, { "epoch": 0.8394936864043153, "grad_norm": 1.7835263929923608, "learning_rate": 6.605148591197396e-07, "loss": 0.5412, "step": 27391 }, { "epoch": 0.8395243349270565, "grad_norm": 0.791329892590439, "learning_rate": 6.602683375174374e-07, "loss": 0.4135, "step": 27392 }, { "epoch": 0.8395549834497977, "grad_norm": 1.915585301940192, "learning_rate": 6.600218586756229e-07, "loss": 0.5877, "step": 27393 }, { "epoch": 0.8395856319725389, "grad_norm": 1.8853566902697259, "learning_rate": 6.597754225967223e-07, "loss": 0.7321, "step": 27394 }, { "epoch": 0.8396162804952801, "grad_norm": 0.7963256207833367, "learning_rate": 6.595290292831647e-07, "loss": 0.4051, "step": 27395 }, { "epoch": 0.8396469290180213, "grad_norm": 1.8244341840850933, "learning_rate": 6.592826787373791e-07, "loss": 0.5662, "step": 27396 }, { "epoch": 0.8396775775407626, "grad_norm": 2.1386053791827853, "learning_rate": 6.590363709617914e-07, "loss": 0.5256, "step": 27397 }, { "epoch": 0.8397082260635037, "grad_norm": 1.7348184988387505, "learning_rate": 6.587901059588298e-07, "loss": 0.5182, "step": 27398 }, { "epoch": 0.839738874586245, "grad_norm": 1.7285350512352684, "learning_rate": 6.585438837309188e-07, "loss": 0.5555, "step": 27399 }, { "epoch": 0.8397695231089861, "grad_norm": 0.7978463804530903, "learning_rate": 6.582977042804856e-07, "loss": 0.4012, "step": 27400 }, { "epoch": 0.8398001716317274, "grad_norm": 1.9070956411467663, "learning_rate": 6.580515676099575e-07, "loss": 0.5583, "step": 27401 }, { "epoch": 0.8398308201544685, "grad_norm": 0.7831108992053814, "learning_rate": 6.578054737217565e-07, "loss": 0.3772, "step": 27402 }, { "epoch": 0.8398614686772098, "grad_norm": 1.7779749631443789, "learning_rate": 6.575594226183096e-07, "loss": 0.499, "step": 27403 }, { "epoch": 0.8398921171999509, "grad_norm": 1.7680222486002204, "learning_rate": 6.573134143020421e-07, "loss": 0.5617, "step": 27404 }, { "epoch": 0.8399227657226922, "grad_norm": 1.964429662978589, "learning_rate": 6.570674487753753e-07, "loss": 0.5575, "step": 27405 }, { "epoch": 0.8399534142454333, "grad_norm": 0.8042011276669911, "learning_rate": 6.56821526040734e-07, "loss": 0.3953, "step": 27406 }, { "epoch": 0.8399840627681746, "grad_norm": 1.9372380242466556, "learning_rate": 6.565756461005429e-07, "loss": 0.592, "step": 27407 }, { "epoch": 0.8400147112909158, "grad_norm": 2.162517674610064, "learning_rate": 6.563298089572218e-07, "loss": 0.6165, "step": 27408 }, { "epoch": 0.840045359813657, "grad_norm": 1.7905504060237747, "learning_rate": 6.560840146131958e-07, "loss": 0.5477, "step": 27409 }, { "epoch": 0.8400760083363982, "grad_norm": 1.707195996207775, "learning_rate": 6.558382630708843e-07, "loss": 0.4856, "step": 27410 }, { "epoch": 0.8401066568591394, "grad_norm": 1.5352117670947798, "learning_rate": 6.555925543327097e-07, "loss": 0.5033, "step": 27411 }, { "epoch": 0.8401373053818806, "grad_norm": 1.777530707465699, "learning_rate": 6.553468884010949e-07, "loss": 0.5389, "step": 27412 }, { "epoch": 0.8401679539046218, "grad_norm": 1.8069718778110655, "learning_rate": 6.551012652784572e-07, "loss": 0.4622, "step": 27413 }, { "epoch": 0.840198602427363, "grad_norm": 1.9965163345924586, "learning_rate": 6.548556849672183e-07, "loss": 0.5806, "step": 27414 }, { "epoch": 0.8402292509501043, "grad_norm": 1.8919004655157114, "learning_rate": 6.546101474697996e-07, "loss": 0.5458, "step": 27415 }, { "epoch": 0.8402598994728454, "grad_norm": 0.795070923671218, "learning_rate": 6.543646527886177e-07, "loss": 0.3977, "step": 27416 }, { "epoch": 0.8402905479955867, "grad_norm": 1.884297951120508, "learning_rate": 6.541192009260938e-07, "loss": 0.6213, "step": 27417 }, { "epoch": 0.8403211965183278, "grad_norm": 2.1793846029586086, "learning_rate": 6.538737918846444e-07, "loss": 0.5362, "step": 27418 }, { "epoch": 0.840351845041069, "grad_norm": 1.7771914318396982, "learning_rate": 6.536284256666881e-07, "loss": 0.6596, "step": 27419 }, { "epoch": 0.8403824935638102, "grad_norm": 1.6728708630166385, "learning_rate": 6.533831022746445e-07, "loss": 0.5394, "step": 27420 }, { "epoch": 0.8404131420865514, "grad_norm": 2.0387323489757976, "learning_rate": 6.53137821710928e-07, "loss": 0.5276, "step": 27421 }, { "epoch": 0.8404437906092926, "grad_norm": 2.086171277655997, "learning_rate": 6.52892583977957e-07, "loss": 0.7222, "step": 27422 }, { "epoch": 0.8404744391320338, "grad_norm": 1.7681511073467933, "learning_rate": 6.526473890781487e-07, "loss": 0.5767, "step": 27423 }, { "epoch": 0.840505087654775, "grad_norm": 1.9336985255745196, "learning_rate": 6.524022370139166e-07, "loss": 0.6991, "step": 27424 }, { "epoch": 0.8405357361775162, "grad_norm": 1.965203245359283, "learning_rate": 6.521571277876782e-07, "loss": 0.5929, "step": 27425 }, { "epoch": 0.8405663847002575, "grad_norm": 1.985933590434382, "learning_rate": 6.519120614018487e-07, "loss": 0.5514, "step": 27426 }, { "epoch": 0.8405970332229986, "grad_norm": 1.9902124779378663, "learning_rate": 6.516670378588413e-07, "loss": 0.5209, "step": 27427 }, { "epoch": 0.8406276817457399, "grad_norm": 1.7617708687176135, "learning_rate": 6.514220571610719e-07, "loss": 0.5937, "step": 27428 }, { "epoch": 0.840658330268481, "grad_norm": 1.8660434761101825, "learning_rate": 6.511771193109517e-07, "loss": 0.5864, "step": 27429 }, { "epoch": 0.8406889787912223, "grad_norm": 0.8431198173393178, "learning_rate": 6.509322243108978e-07, "loss": 0.4087, "step": 27430 }, { "epoch": 0.8407196273139634, "grad_norm": 1.9045697337191614, "learning_rate": 6.506873721633223e-07, "loss": 0.587, "step": 27431 }, { "epoch": 0.8407502758367047, "grad_norm": 2.0307215211836653, "learning_rate": 6.504425628706352e-07, "loss": 0.6056, "step": 27432 }, { "epoch": 0.8407809243594458, "grad_norm": 2.1153164138370775, "learning_rate": 6.501977964352502e-07, "loss": 0.6121, "step": 27433 }, { "epoch": 0.8408115728821871, "grad_norm": 1.6799820432327828, "learning_rate": 6.499530728595809e-07, "loss": 0.531, "step": 27434 }, { "epoch": 0.8408422214049283, "grad_norm": 1.9274633926437361, "learning_rate": 6.497083921460356e-07, "loss": 0.5128, "step": 27435 }, { "epoch": 0.8408728699276695, "grad_norm": 1.7080989595693472, "learning_rate": 6.494637542970267e-07, "loss": 0.539, "step": 27436 }, { "epoch": 0.8409035184504107, "grad_norm": 2.0027935538651755, "learning_rate": 6.492191593149643e-07, "loss": 0.61, "step": 27437 }, { "epoch": 0.8409341669731519, "grad_norm": 2.2992715411142184, "learning_rate": 6.489746072022601e-07, "loss": 0.5774, "step": 27438 }, { "epoch": 0.8409648154958931, "grad_norm": 1.8068702944195778, "learning_rate": 6.487300979613226e-07, "loss": 0.5599, "step": 27439 }, { "epoch": 0.8409954640186343, "grad_norm": 1.9197103898594, "learning_rate": 6.484856315945581e-07, "loss": 0.5698, "step": 27440 }, { "epoch": 0.8410261125413755, "grad_norm": 1.8844364010588295, "learning_rate": 6.482412081043804e-07, "loss": 0.546, "step": 27441 }, { "epoch": 0.8410567610641168, "grad_norm": 1.7782227928068433, "learning_rate": 6.479968274931952e-07, "loss": 0.5241, "step": 27442 }, { "epoch": 0.8410874095868579, "grad_norm": 2.0258376067567974, "learning_rate": 6.477524897634102e-07, "loss": 0.5295, "step": 27443 }, { "epoch": 0.8411180581095992, "grad_norm": 1.9004356503354134, "learning_rate": 6.475081949174334e-07, "loss": 0.5655, "step": 27444 }, { "epoch": 0.8411487066323403, "grad_norm": 1.6303173354175697, "learning_rate": 6.472639429576732e-07, "loss": 0.5419, "step": 27445 }, { "epoch": 0.8411793551550816, "grad_norm": 2.0404304968691407, "learning_rate": 6.470197338865336e-07, "loss": 0.6846, "step": 27446 }, { "epoch": 0.8412100036778227, "grad_norm": 1.6588725643951798, "learning_rate": 6.467755677064225e-07, "loss": 0.5184, "step": 27447 }, { "epoch": 0.841240652200564, "grad_norm": 1.7079431937254517, "learning_rate": 6.465314444197457e-07, "loss": 0.5612, "step": 27448 }, { "epoch": 0.8412713007233051, "grad_norm": 2.1022724557101977, "learning_rate": 6.462873640289091e-07, "loss": 0.6085, "step": 27449 }, { "epoch": 0.8413019492460463, "grad_norm": 1.9606966368315786, "learning_rate": 6.460433265363175e-07, "loss": 0.5878, "step": 27450 }, { "epoch": 0.8413325977687875, "grad_norm": 1.8540245670200104, "learning_rate": 6.457993319443723e-07, "loss": 0.5432, "step": 27451 }, { "epoch": 0.8413632462915287, "grad_norm": 1.798406163570875, "learning_rate": 6.455553802554832e-07, "loss": 0.583, "step": 27452 }, { "epoch": 0.84139389481427, "grad_norm": 2.8597423454045923, "learning_rate": 6.453114714720504e-07, "loss": 0.5739, "step": 27453 }, { "epoch": 0.8414245433370111, "grad_norm": 2.2262065970299134, "learning_rate": 6.450676055964772e-07, "loss": 0.6077, "step": 27454 }, { "epoch": 0.8414551918597524, "grad_norm": 1.9190844785266743, "learning_rate": 6.448237826311671e-07, "loss": 0.4577, "step": 27455 }, { "epoch": 0.8414858403824935, "grad_norm": 0.8386856418913603, "learning_rate": 6.445800025785226e-07, "loss": 0.4162, "step": 27456 }, { "epoch": 0.8415164889052348, "grad_norm": 0.7970353951384314, "learning_rate": 6.443362654409469e-07, "loss": 0.3929, "step": 27457 }, { "epoch": 0.8415471374279759, "grad_norm": 0.8067082261824937, "learning_rate": 6.44092571220839e-07, "loss": 0.4157, "step": 27458 }, { "epoch": 0.8415777859507172, "grad_norm": 2.0316500090301535, "learning_rate": 6.438489199206017e-07, "loss": 0.5069, "step": 27459 }, { "epoch": 0.8416084344734583, "grad_norm": 1.8752670961264835, "learning_rate": 6.436053115426366e-07, "loss": 0.5985, "step": 27460 }, { "epoch": 0.8416390829961996, "grad_norm": 1.8643982376791381, "learning_rate": 6.433617460893432e-07, "loss": 0.5464, "step": 27461 }, { "epoch": 0.8416697315189408, "grad_norm": 1.772741347388297, "learning_rate": 6.431182235631201e-07, "loss": 0.4789, "step": 27462 }, { "epoch": 0.841700380041682, "grad_norm": 1.8668312877347382, "learning_rate": 6.428747439663674e-07, "loss": 0.5727, "step": 27463 }, { "epoch": 0.8417310285644232, "grad_norm": 1.850154427036235, "learning_rate": 6.42631307301485e-07, "loss": 0.5853, "step": 27464 }, { "epoch": 0.8417616770871644, "grad_norm": 1.8481080220330453, "learning_rate": 6.423879135708728e-07, "loss": 0.6709, "step": 27465 }, { "epoch": 0.8417923256099056, "grad_norm": 1.8569052610717052, "learning_rate": 6.421445627769258e-07, "loss": 0.6294, "step": 27466 }, { "epoch": 0.8418229741326468, "grad_norm": 1.8503826943718167, "learning_rate": 6.419012549220433e-07, "loss": 0.6287, "step": 27467 }, { "epoch": 0.841853622655388, "grad_norm": 1.8613813340597456, "learning_rate": 6.41657990008624e-07, "loss": 0.5599, "step": 27468 }, { "epoch": 0.8418842711781293, "grad_norm": 1.809033970289528, "learning_rate": 6.414147680390637e-07, "loss": 0.5188, "step": 27469 }, { "epoch": 0.8419149197008704, "grad_norm": 2.179944460560894, "learning_rate": 6.411715890157572e-07, "loss": 0.6101, "step": 27470 }, { "epoch": 0.8419455682236117, "grad_norm": 0.8011771784099166, "learning_rate": 6.409284529411036e-07, "loss": 0.382, "step": 27471 }, { "epoch": 0.8419762167463528, "grad_norm": 2.025088669750203, "learning_rate": 6.406853598174978e-07, "loss": 0.6747, "step": 27472 }, { "epoch": 0.8420068652690941, "grad_norm": 1.8928489442593897, "learning_rate": 6.404423096473334e-07, "loss": 0.5751, "step": 27473 }, { "epoch": 0.8420375137918352, "grad_norm": 1.8845265359144745, "learning_rate": 6.401993024330061e-07, "loss": 0.5075, "step": 27474 }, { "epoch": 0.8420681623145765, "grad_norm": 1.9594035166949166, "learning_rate": 6.399563381769108e-07, "loss": 0.5779, "step": 27475 }, { "epoch": 0.8420988108373176, "grad_norm": 1.7144015032447628, "learning_rate": 6.397134168814422e-07, "loss": 0.5846, "step": 27476 }, { "epoch": 0.8421294593600589, "grad_norm": 0.7669105144963545, "learning_rate": 6.394705385489925e-07, "loss": 0.3862, "step": 27477 }, { "epoch": 0.8421601078828, "grad_norm": 1.9607181678034347, "learning_rate": 6.392277031819544e-07, "loss": 0.685, "step": 27478 }, { "epoch": 0.8421907564055413, "grad_norm": 1.6600774554730993, "learning_rate": 6.389849107827229e-07, "loss": 0.5602, "step": 27479 }, { "epoch": 0.8422214049282825, "grad_norm": 1.8648859227878074, "learning_rate": 6.387421613536887e-07, "loss": 0.5627, "step": 27480 }, { "epoch": 0.8422520534510236, "grad_norm": 1.7159593181272994, "learning_rate": 6.384994548972423e-07, "loss": 0.5067, "step": 27481 }, { "epoch": 0.8422827019737649, "grad_norm": 0.8108367309107255, "learning_rate": 6.382567914157784e-07, "loss": 0.3912, "step": 27482 }, { "epoch": 0.842313350496506, "grad_norm": 0.7939335460649093, "learning_rate": 6.38014170911685e-07, "loss": 0.3936, "step": 27483 }, { "epoch": 0.8423439990192473, "grad_norm": 2.1736161619311716, "learning_rate": 6.377715933873557e-07, "loss": 0.5314, "step": 27484 }, { "epoch": 0.8423746475419884, "grad_norm": 2.127861971952592, "learning_rate": 6.375290588451777e-07, "loss": 0.5966, "step": 27485 }, { "epoch": 0.8424052960647297, "grad_norm": 1.870583074137742, "learning_rate": 6.372865672875416e-07, "loss": 0.5102, "step": 27486 }, { "epoch": 0.8424359445874708, "grad_norm": 0.8175839675291952, "learning_rate": 6.370441187168392e-07, "loss": 0.4077, "step": 27487 }, { "epoch": 0.8424665931102121, "grad_norm": 1.7852870622406751, "learning_rate": 6.368017131354559e-07, "loss": 0.5738, "step": 27488 }, { "epoch": 0.8424972416329533, "grad_norm": 2.0406956676140244, "learning_rate": 6.365593505457812e-07, "loss": 0.6285, "step": 27489 }, { "epoch": 0.8425278901556945, "grad_norm": 2.082287710749376, "learning_rate": 6.363170309502054e-07, "loss": 0.5409, "step": 27490 }, { "epoch": 0.8425585386784357, "grad_norm": 2.1142005879437726, "learning_rate": 6.360747543511131e-07, "loss": 0.6184, "step": 27491 }, { "epoch": 0.8425891872011769, "grad_norm": 2.174246981570346, "learning_rate": 6.35832520750893e-07, "loss": 0.5231, "step": 27492 }, { "epoch": 0.8426198357239181, "grad_norm": 1.9162158088487409, "learning_rate": 6.355903301519323e-07, "loss": 0.5514, "step": 27493 }, { "epoch": 0.8426504842466593, "grad_norm": 1.8503152689366047, "learning_rate": 6.35348182556616e-07, "loss": 0.5868, "step": 27494 }, { "epoch": 0.8426811327694005, "grad_norm": 1.7803922427864762, "learning_rate": 6.351060779673318e-07, "loss": 0.5423, "step": 27495 }, { "epoch": 0.8427117812921417, "grad_norm": 1.9419240774486495, "learning_rate": 6.348640163864633e-07, "loss": 0.589, "step": 27496 }, { "epoch": 0.8427424298148829, "grad_norm": 0.7852189902059026, "learning_rate": 6.346219978163964e-07, "loss": 0.3992, "step": 27497 }, { "epoch": 0.8427730783376242, "grad_norm": 1.9971739416619594, "learning_rate": 6.34380022259517e-07, "loss": 0.5598, "step": 27498 }, { "epoch": 0.8428037268603653, "grad_norm": 2.043721228229904, "learning_rate": 6.34138089718207e-07, "loss": 0.6432, "step": 27499 }, { "epoch": 0.8428343753831066, "grad_norm": 1.8038696300882253, "learning_rate": 6.338962001948512e-07, "loss": 0.4973, "step": 27500 }, { "epoch": 0.8428650239058477, "grad_norm": 1.663003647222235, "learning_rate": 6.336543536918349e-07, "loss": 0.5528, "step": 27501 }, { "epoch": 0.842895672428589, "grad_norm": 1.7564884590029965, "learning_rate": 6.334125502115385e-07, "loss": 0.5708, "step": 27502 }, { "epoch": 0.8429263209513301, "grad_norm": 1.9223172316652817, "learning_rate": 6.331707897563449e-07, "loss": 0.5349, "step": 27503 }, { "epoch": 0.8429569694740714, "grad_norm": 0.8044549935754616, "learning_rate": 6.32929072328638e-07, "loss": 0.3929, "step": 27504 }, { "epoch": 0.8429876179968125, "grad_norm": 1.833331573234653, "learning_rate": 6.326873979307973e-07, "loss": 0.5357, "step": 27505 }, { "epoch": 0.8430182665195538, "grad_norm": 1.8146689813109003, "learning_rate": 6.324457665652062e-07, "loss": 0.4302, "step": 27506 }, { "epoch": 0.843048915042295, "grad_norm": 1.9684643283681786, "learning_rate": 6.322041782342437e-07, "loss": 0.6375, "step": 27507 }, { "epoch": 0.8430795635650362, "grad_norm": 0.817231055392326, "learning_rate": 6.319626329402906e-07, "loss": 0.3991, "step": 27508 }, { "epoch": 0.8431102120877774, "grad_norm": 1.8359027048941345, "learning_rate": 6.317211306857285e-07, "loss": 0.5674, "step": 27509 }, { "epoch": 0.8431408606105186, "grad_norm": 2.0925590828002574, "learning_rate": 6.314796714729343e-07, "loss": 0.6662, "step": 27510 }, { "epoch": 0.8431715091332598, "grad_norm": 1.8243629591781942, "learning_rate": 6.312382553042889e-07, "loss": 0.5934, "step": 27511 }, { "epoch": 0.8432021576560009, "grad_norm": 1.8624311563878952, "learning_rate": 6.309968821821721e-07, "loss": 0.5275, "step": 27512 }, { "epoch": 0.8432328061787422, "grad_norm": 0.8375342096750676, "learning_rate": 6.307555521089598e-07, "loss": 0.4139, "step": 27513 }, { "epoch": 0.8432634547014833, "grad_norm": 1.972547173337922, "learning_rate": 6.305142650870316e-07, "loss": 0.6349, "step": 27514 }, { "epoch": 0.8432941032242246, "grad_norm": 1.900514348437833, "learning_rate": 6.302730211187635e-07, "loss": 0.5243, "step": 27515 }, { "epoch": 0.8433247517469658, "grad_norm": 1.607309909534858, "learning_rate": 6.300318202065337e-07, "loss": 0.427, "step": 27516 }, { "epoch": 0.843355400269707, "grad_norm": 1.7606678402340918, "learning_rate": 6.297906623527189e-07, "loss": 0.6262, "step": 27517 }, { "epoch": 0.8433860487924482, "grad_norm": 2.0116332733975235, "learning_rate": 6.295495475596941e-07, "loss": 0.6338, "step": 27518 }, { "epoch": 0.8434166973151894, "grad_norm": 1.9947774829309566, "learning_rate": 6.293084758298356e-07, "loss": 0.5532, "step": 27519 }, { "epoch": 0.8434473458379306, "grad_norm": 2.0240460014720147, "learning_rate": 6.290674471655206e-07, "loss": 0.5498, "step": 27520 }, { "epoch": 0.8434779943606718, "grad_norm": 1.7954115428133073, "learning_rate": 6.288264615691209e-07, "loss": 0.5442, "step": 27521 }, { "epoch": 0.843508642883413, "grad_norm": 1.8497955418968104, "learning_rate": 6.285855190430129e-07, "loss": 0.4707, "step": 27522 }, { "epoch": 0.8435392914061542, "grad_norm": 1.8557216015245186, "learning_rate": 6.28344619589571e-07, "loss": 0.5127, "step": 27523 }, { "epoch": 0.8435699399288954, "grad_norm": 1.8160714555645654, "learning_rate": 6.28103763211167e-07, "loss": 0.518, "step": 27524 }, { "epoch": 0.8436005884516367, "grad_norm": 1.8161347316533993, "learning_rate": 6.278629499101763e-07, "loss": 0.5456, "step": 27525 }, { "epoch": 0.8436312369743778, "grad_norm": 1.5201208140370486, "learning_rate": 6.276221796889692e-07, "loss": 0.4868, "step": 27526 }, { "epoch": 0.8436618854971191, "grad_norm": 1.8556156171590505, "learning_rate": 6.2738145254992e-07, "loss": 0.6415, "step": 27527 }, { "epoch": 0.8436925340198602, "grad_norm": 1.7936393992724162, "learning_rate": 6.271407684954012e-07, "loss": 0.5553, "step": 27528 }, { "epoch": 0.8437231825426015, "grad_norm": 0.7845988159658914, "learning_rate": 6.269001275277819e-07, "loss": 0.4005, "step": 27529 }, { "epoch": 0.8437538310653426, "grad_norm": 1.8687103503109002, "learning_rate": 6.266595296494349e-07, "loss": 0.5686, "step": 27530 }, { "epoch": 0.8437844795880839, "grad_norm": 0.7962057042931103, "learning_rate": 6.264189748627314e-07, "loss": 0.4096, "step": 27531 }, { "epoch": 0.843815128110825, "grad_norm": 1.721669496532375, "learning_rate": 6.261784631700396e-07, "loss": 0.5721, "step": 27532 }, { "epoch": 0.8438457766335663, "grad_norm": 1.847543727287113, "learning_rate": 6.25937994573731e-07, "loss": 0.4385, "step": 27533 }, { "epoch": 0.8438764251563075, "grad_norm": 1.941462270260146, "learning_rate": 6.256975690761746e-07, "loss": 0.5649, "step": 27534 }, { "epoch": 0.8439070736790487, "grad_norm": 0.7955177293893299, "learning_rate": 6.254571866797388e-07, "loss": 0.4186, "step": 27535 }, { "epoch": 0.8439377222017899, "grad_norm": 1.9404414350480077, "learning_rate": 6.252168473867937e-07, "loss": 0.5653, "step": 27536 }, { "epoch": 0.8439683707245311, "grad_norm": 2.0715789728419813, "learning_rate": 6.249765511997041e-07, "loss": 0.6262, "step": 27537 }, { "epoch": 0.8439990192472723, "grad_norm": 2.0692076992848647, "learning_rate": 6.247362981208422e-07, "loss": 0.5542, "step": 27538 }, { "epoch": 0.8440296677700135, "grad_norm": 1.6933457998212949, "learning_rate": 6.24496088152573e-07, "loss": 0.4897, "step": 27539 }, { "epoch": 0.8440603162927547, "grad_norm": 0.7768785321986683, "learning_rate": 6.242559212972621e-07, "loss": 0.4078, "step": 27540 }, { "epoch": 0.844090964815496, "grad_norm": 1.9443066250285237, "learning_rate": 6.240157975572775e-07, "loss": 0.5759, "step": 27541 }, { "epoch": 0.8441216133382371, "grad_norm": 2.0166592924501963, "learning_rate": 6.237757169349856e-07, "loss": 0.5735, "step": 27542 }, { "epoch": 0.8441522618609782, "grad_norm": 0.8066772202795817, "learning_rate": 6.235356794327507e-07, "loss": 0.3862, "step": 27543 }, { "epoch": 0.8441829103837195, "grad_norm": 1.9430968274436495, "learning_rate": 6.232956850529381e-07, "loss": 0.6254, "step": 27544 }, { "epoch": 0.8442135589064607, "grad_norm": 0.7748626107654407, "learning_rate": 6.230557337979126e-07, "loss": 0.3799, "step": 27545 }, { "epoch": 0.8442442074292019, "grad_norm": 1.7064225716806198, "learning_rate": 6.228158256700407e-07, "loss": 0.5326, "step": 27546 }, { "epoch": 0.8442748559519431, "grad_norm": 1.8356842866395127, "learning_rate": 6.225759606716841e-07, "loss": 0.5422, "step": 27547 }, { "epoch": 0.8443055044746843, "grad_norm": 1.7467184119284678, "learning_rate": 6.223361388052041e-07, "loss": 0.4946, "step": 27548 }, { "epoch": 0.8443361529974255, "grad_norm": 1.7345143482233214, "learning_rate": 6.22096360072969e-07, "loss": 0.519, "step": 27549 }, { "epoch": 0.8443668015201667, "grad_norm": 1.7027784152136196, "learning_rate": 6.218566244773383e-07, "loss": 0.5512, "step": 27550 }, { "epoch": 0.8443974500429079, "grad_norm": 0.7734516451791712, "learning_rate": 6.216169320206733e-07, "loss": 0.3806, "step": 27551 }, { "epoch": 0.8444280985656492, "grad_norm": 2.1413333256608444, "learning_rate": 6.213772827053366e-07, "loss": 0.5622, "step": 27552 }, { "epoch": 0.8444587470883903, "grad_norm": 1.785124337951291, "learning_rate": 6.211376765336913e-07, "loss": 0.5927, "step": 27553 }, { "epoch": 0.8444893956111316, "grad_norm": 1.9272066982421305, "learning_rate": 6.20898113508096e-07, "loss": 0.5238, "step": 27554 }, { "epoch": 0.8445200441338727, "grad_norm": 1.904375454690035, "learning_rate": 6.206585936309117e-07, "loss": 0.5551, "step": 27555 }, { "epoch": 0.844550692656614, "grad_norm": 2.2703597797438433, "learning_rate": 6.204191169044987e-07, "loss": 0.6663, "step": 27556 }, { "epoch": 0.8445813411793551, "grad_norm": 0.7715704060389049, "learning_rate": 6.201796833312179e-07, "loss": 0.3845, "step": 27557 }, { "epoch": 0.8446119897020964, "grad_norm": 2.015316752215406, "learning_rate": 6.199402929134273e-07, "loss": 0.5298, "step": 27558 }, { "epoch": 0.8446426382248375, "grad_norm": 1.7546331672530653, "learning_rate": 6.197009456534847e-07, "loss": 0.5341, "step": 27559 }, { "epoch": 0.8446732867475788, "grad_norm": 1.919588852069275, "learning_rate": 6.194616415537496e-07, "loss": 0.5916, "step": 27560 }, { "epoch": 0.84470393527032, "grad_norm": 1.995304338694731, "learning_rate": 6.192223806165809e-07, "loss": 0.501, "step": 27561 }, { "epoch": 0.8447345837930612, "grad_norm": 1.8968962647215828, "learning_rate": 6.189831628443333e-07, "loss": 0.5553, "step": 27562 }, { "epoch": 0.8447652323158024, "grad_norm": 1.767804264328688, "learning_rate": 6.187439882393659e-07, "loss": 0.5461, "step": 27563 }, { "epoch": 0.8447958808385436, "grad_norm": 1.6455310329641466, "learning_rate": 6.185048568040347e-07, "loss": 0.5776, "step": 27564 }, { "epoch": 0.8448265293612848, "grad_norm": 1.9393455784610298, "learning_rate": 6.182657685406979e-07, "loss": 0.6124, "step": 27565 }, { "epoch": 0.844857177884026, "grad_norm": 1.9887598796479191, "learning_rate": 6.18026723451709e-07, "loss": 0.6704, "step": 27566 }, { "epoch": 0.8448878264067672, "grad_norm": 2.091697952551731, "learning_rate": 6.177877215394218e-07, "loss": 0.5568, "step": 27567 }, { "epoch": 0.8449184749295084, "grad_norm": 2.1999232881863096, "learning_rate": 6.17548762806196e-07, "loss": 0.6039, "step": 27568 }, { "epoch": 0.8449491234522496, "grad_norm": 2.006647770050431, "learning_rate": 6.173098472543831e-07, "loss": 0.5426, "step": 27569 }, { "epoch": 0.8449797719749909, "grad_norm": 1.7588370152496557, "learning_rate": 6.170709748863368e-07, "loss": 0.5417, "step": 27570 }, { "epoch": 0.845010420497732, "grad_norm": 2.0031671692996813, "learning_rate": 6.168321457044119e-07, "loss": 0.5371, "step": 27571 }, { "epoch": 0.8450410690204733, "grad_norm": 1.8981685981265264, "learning_rate": 6.165933597109608e-07, "loss": 0.5593, "step": 27572 }, { "epoch": 0.8450717175432144, "grad_norm": 1.9378216124561989, "learning_rate": 6.163546169083384e-07, "loss": 0.5249, "step": 27573 }, { "epoch": 0.8451023660659556, "grad_norm": 1.9384873470857777, "learning_rate": 6.161159172988939e-07, "loss": 0.4928, "step": 27574 }, { "epoch": 0.8451330145886968, "grad_norm": 1.866665325213252, "learning_rate": 6.158772608849817e-07, "loss": 0.5105, "step": 27575 }, { "epoch": 0.845163663111438, "grad_norm": 1.7098863931110766, "learning_rate": 6.156386476689529e-07, "loss": 0.5319, "step": 27576 }, { "epoch": 0.8451943116341792, "grad_norm": 0.7861496539998927, "learning_rate": 6.154000776531588e-07, "loss": 0.3794, "step": 27577 }, { "epoch": 0.8452249601569204, "grad_norm": 1.9812826814705173, "learning_rate": 6.151615508399472e-07, "loss": 0.6315, "step": 27578 }, { "epoch": 0.8452556086796617, "grad_norm": 1.9037653716229186, "learning_rate": 6.149230672316731e-07, "loss": 0.6366, "step": 27579 }, { "epoch": 0.8452862572024028, "grad_norm": 2.010433779803031, "learning_rate": 6.146846268306839e-07, "loss": 0.5959, "step": 27580 }, { "epoch": 0.8453169057251441, "grad_norm": 1.8959404245222626, "learning_rate": 6.144462296393277e-07, "loss": 0.5362, "step": 27581 }, { "epoch": 0.8453475542478852, "grad_norm": 1.9338537494654013, "learning_rate": 6.142078756599551e-07, "loss": 0.5963, "step": 27582 }, { "epoch": 0.8453782027706265, "grad_norm": 2.015172596615257, "learning_rate": 6.139695648949145e-07, "loss": 0.5863, "step": 27583 }, { "epoch": 0.8454088512933676, "grad_norm": 0.7384442900369069, "learning_rate": 6.137312973465553e-07, "loss": 0.3864, "step": 27584 }, { "epoch": 0.8454394998161089, "grad_norm": 2.0965166719660524, "learning_rate": 6.134930730172223e-07, "loss": 0.6322, "step": 27585 }, { "epoch": 0.84547014833885, "grad_norm": 1.922037570765306, "learning_rate": 6.132548919092652e-07, "loss": 0.6477, "step": 27586 }, { "epoch": 0.8455007968615913, "grad_norm": 0.7708131377335563, "learning_rate": 6.130167540250304e-07, "loss": 0.3938, "step": 27587 }, { "epoch": 0.8455314453843324, "grad_norm": 1.8525139059829883, "learning_rate": 6.127786593668644e-07, "loss": 0.452, "step": 27588 }, { "epoch": 0.8455620939070737, "grad_norm": 1.9614217755221601, "learning_rate": 6.125406079371104e-07, "loss": 0.5815, "step": 27589 }, { "epoch": 0.8455927424298149, "grad_norm": 4.3277183165194275, "learning_rate": 6.123025997381182e-07, "loss": 0.512, "step": 27590 }, { "epoch": 0.8456233909525561, "grad_norm": 1.6729328106347234, "learning_rate": 6.120646347722304e-07, "loss": 0.5776, "step": 27591 }, { "epoch": 0.8456540394752973, "grad_norm": 1.6851851740494104, "learning_rate": 6.118267130417938e-07, "loss": 0.5496, "step": 27592 }, { "epoch": 0.8456846879980385, "grad_norm": 2.115514995683307, "learning_rate": 6.115888345491499e-07, "loss": 0.5749, "step": 27593 }, { "epoch": 0.8457153365207797, "grad_norm": 0.829417990870581, "learning_rate": 6.113509992966443e-07, "loss": 0.423, "step": 27594 }, { "epoch": 0.8457459850435209, "grad_norm": 1.731030428175851, "learning_rate": 6.11113207286621e-07, "loss": 0.5061, "step": 27595 }, { "epoch": 0.8457766335662621, "grad_norm": 1.7901000183990088, "learning_rate": 6.108754585214211e-07, "loss": 0.5719, "step": 27596 }, { "epoch": 0.8458072820890034, "grad_norm": 1.9819892663192578, "learning_rate": 6.106377530033885e-07, "loss": 0.5507, "step": 27597 }, { "epoch": 0.8458379306117445, "grad_norm": 2.001934381582051, "learning_rate": 6.104000907348662e-07, "loss": 0.6114, "step": 27598 }, { "epoch": 0.8458685791344858, "grad_norm": 1.9060077914395757, "learning_rate": 6.101624717181953e-07, "loss": 0.6113, "step": 27599 }, { "epoch": 0.8458992276572269, "grad_norm": 1.9728557024781357, "learning_rate": 6.099248959557141e-07, "loss": 0.5297, "step": 27600 }, { "epoch": 0.8459298761799682, "grad_norm": 1.8288803764895463, "learning_rate": 6.096873634497685e-07, "loss": 0.5977, "step": 27601 }, { "epoch": 0.8459605247027093, "grad_norm": 0.8005900757223732, "learning_rate": 6.094498742026955e-07, "loss": 0.42, "step": 27602 }, { "epoch": 0.8459911732254506, "grad_norm": 1.8909297338815019, "learning_rate": 6.092124282168377e-07, "loss": 0.4976, "step": 27603 }, { "epoch": 0.8460218217481917, "grad_norm": 1.8881240975982496, "learning_rate": 6.089750254945314e-07, "loss": 0.628, "step": 27604 }, { "epoch": 0.8460524702709329, "grad_norm": 1.875179302820956, "learning_rate": 6.087376660381183e-07, "loss": 0.588, "step": 27605 }, { "epoch": 0.8460831187936741, "grad_norm": 1.7610402783370476, "learning_rate": 6.085003498499376e-07, "loss": 0.4686, "step": 27606 }, { "epoch": 0.8461137673164153, "grad_norm": 2.0261384578231616, "learning_rate": 6.08263076932325e-07, "loss": 0.5739, "step": 27607 }, { "epoch": 0.8461444158391566, "grad_norm": 0.8370044739451207, "learning_rate": 6.080258472876205e-07, "loss": 0.3862, "step": 27608 }, { "epoch": 0.8461750643618977, "grad_norm": 1.9160581058493837, "learning_rate": 6.077886609181621e-07, "loss": 0.6686, "step": 27609 }, { "epoch": 0.846205712884639, "grad_norm": 1.8410109638543368, "learning_rate": 6.075515178262841e-07, "loss": 0.5764, "step": 27610 }, { "epoch": 0.8462363614073801, "grad_norm": 1.6036624371023407, "learning_rate": 6.073144180143265e-07, "loss": 0.5382, "step": 27611 }, { "epoch": 0.8462670099301214, "grad_norm": 1.8537455467066957, "learning_rate": 6.070773614846226e-07, "loss": 0.5075, "step": 27612 }, { "epoch": 0.8462976584528625, "grad_norm": 1.970070845668852, "learning_rate": 6.068403482395097e-07, "loss": 0.5697, "step": 27613 }, { "epoch": 0.8463283069756038, "grad_norm": 1.757485874740266, "learning_rate": 6.066033782813241e-07, "loss": 0.6001, "step": 27614 }, { "epoch": 0.846358955498345, "grad_norm": 1.9340803395502735, "learning_rate": 6.063664516123979e-07, "loss": 0.5651, "step": 27615 }, { "epoch": 0.8463896040210862, "grad_norm": 1.7963325758524114, "learning_rate": 6.061295682350676e-07, "loss": 0.5074, "step": 27616 }, { "epoch": 0.8464202525438274, "grad_norm": 1.832816850847356, "learning_rate": 6.058927281516675e-07, "loss": 0.5702, "step": 27617 }, { "epoch": 0.8464509010665686, "grad_norm": 1.9282537453161148, "learning_rate": 6.056559313645299e-07, "loss": 0.5577, "step": 27618 }, { "epoch": 0.8464815495893098, "grad_norm": 1.8483384062602102, "learning_rate": 6.054191778759888e-07, "loss": 0.5795, "step": 27619 }, { "epoch": 0.846512198112051, "grad_norm": 2.080497655647621, "learning_rate": 6.051824676883777e-07, "loss": 0.5498, "step": 27620 }, { "epoch": 0.8465428466347922, "grad_norm": 2.0845365591663017, "learning_rate": 6.049458008040276e-07, "loss": 0.6552, "step": 27621 }, { "epoch": 0.8465734951575334, "grad_norm": 1.951664809751996, "learning_rate": 6.047091772252716e-07, "loss": 0.5201, "step": 27622 }, { "epoch": 0.8466041436802746, "grad_norm": 1.9252341216911757, "learning_rate": 6.044725969544401e-07, "loss": 0.6039, "step": 27623 }, { "epoch": 0.8466347922030159, "grad_norm": 0.781562638183723, "learning_rate": 6.042360599938646e-07, "loss": 0.3848, "step": 27624 }, { "epoch": 0.846665440725757, "grad_norm": 0.8089965762215849, "learning_rate": 6.039995663458765e-07, "loss": 0.4039, "step": 27625 }, { "epoch": 0.8466960892484983, "grad_norm": 1.6660721245783892, "learning_rate": 6.037631160128049e-07, "loss": 0.5967, "step": 27626 }, { "epoch": 0.8467267377712394, "grad_norm": 1.8918036074783093, "learning_rate": 6.035267089969798e-07, "loss": 0.4804, "step": 27627 }, { "epoch": 0.8467573862939807, "grad_norm": 2.1607844513719248, "learning_rate": 6.032903453007322e-07, "loss": 0.6914, "step": 27628 }, { "epoch": 0.8467880348167218, "grad_norm": 0.7845617010080653, "learning_rate": 6.030540249263889e-07, "loss": 0.3858, "step": 27629 }, { "epoch": 0.8468186833394631, "grad_norm": 1.87790567647957, "learning_rate": 6.028177478762793e-07, "loss": 0.5584, "step": 27630 }, { "epoch": 0.8468493318622042, "grad_norm": 1.7903563664038087, "learning_rate": 6.025815141527319e-07, "loss": 0.4638, "step": 27631 }, { "epoch": 0.8468799803849455, "grad_norm": 1.7157770562525254, "learning_rate": 6.023453237580734e-07, "loss": 0.4863, "step": 27632 }, { "epoch": 0.8469106289076866, "grad_norm": 2.0414261731386256, "learning_rate": 6.021091766946329e-07, "loss": 0.5576, "step": 27633 }, { "epoch": 0.8469412774304279, "grad_norm": 2.0390604284893383, "learning_rate": 6.018730729647343e-07, "loss": 0.6065, "step": 27634 }, { "epoch": 0.8469719259531691, "grad_norm": 1.8570362027288694, "learning_rate": 6.01637012570706e-07, "loss": 0.5348, "step": 27635 }, { "epoch": 0.8470025744759102, "grad_norm": 1.9617465006228685, "learning_rate": 6.014009955148748e-07, "loss": 0.6099, "step": 27636 }, { "epoch": 0.8470332229986515, "grad_norm": 1.7807935330779519, "learning_rate": 6.011650217995634e-07, "loss": 0.5973, "step": 27637 }, { "epoch": 0.8470638715213926, "grad_norm": 0.8193431836334945, "learning_rate": 6.009290914270993e-07, "loss": 0.3967, "step": 27638 }, { "epoch": 0.8470945200441339, "grad_norm": 1.7312810288194294, "learning_rate": 6.00693204399807e-07, "loss": 0.466, "step": 27639 }, { "epoch": 0.847125168566875, "grad_norm": 1.8717714021776446, "learning_rate": 6.004573607200087e-07, "loss": 0.6312, "step": 27640 }, { "epoch": 0.8471558170896163, "grad_norm": 1.9265630103477975, "learning_rate": 6.002215603900302e-07, "loss": 0.6034, "step": 27641 }, { "epoch": 0.8471864656123574, "grad_norm": 1.8929371664261165, "learning_rate": 5.999858034121958e-07, "loss": 0.6114, "step": 27642 }, { "epoch": 0.8472171141350987, "grad_norm": 0.7473659513146266, "learning_rate": 5.997500897888253e-07, "loss": 0.3754, "step": 27643 }, { "epoch": 0.8472477626578399, "grad_norm": 1.8896956139597734, "learning_rate": 5.995144195222447e-07, "loss": 0.479, "step": 27644 }, { "epoch": 0.8472784111805811, "grad_norm": 1.9389748956627195, "learning_rate": 5.992787926147731e-07, "loss": 0.5817, "step": 27645 }, { "epoch": 0.8473090597033223, "grad_norm": 1.7354516961880286, "learning_rate": 5.990432090687332e-07, "loss": 0.6207, "step": 27646 }, { "epoch": 0.8473397082260635, "grad_norm": 1.869896638643898, "learning_rate": 5.98807668886448e-07, "loss": 0.4861, "step": 27647 }, { "epoch": 0.8473703567488047, "grad_norm": 1.7214583412064366, "learning_rate": 5.985721720702359e-07, "loss": 0.5253, "step": 27648 }, { "epoch": 0.8474010052715459, "grad_norm": 0.8621378083086043, "learning_rate": 5.983367186224182e-07, "loss": 0.417, "step": 27649 }, { "epoch": 0.8474316537942871, "grad_norm": 1.6976453504221116, "learning_rate": 5.981013085453158e-07, "loss": 0.5107, "step": 27650 }, { "epoch": 0.8474623023170283, "grad_norm": 0.760471320759579, "learning_rate": 5.978659418412469e-07, "loss": 0.3963, "step": 27651 }, { "epoch": 0.8474929508397695, "grad_norm": 1.8325599094428884, "learning_rate": 5.976306185125314e-07, "loss": 0.5258, "step": 27652 }, { "epoch": 0.8475235993625108, "grad_norm": 1.8367885112450961, "learning_rate": 5.973953385614883e-07, "loss": 0.5462, "step": 27653 }, { "epoch": 0.8475542478852519, "grad_norm": 2.0206085867334647, "learning_rate": 5.971601019904344e-07, "loss": 0.613, "step": 27654 }, { "epoch": 0.8475848964079932, "grad_norm": 1.686938974910816, "learning_rate": 5.969249088016899e-07, "loss": 0.5593, "step": 27655 }, { "epoch": 0.8476155449307343, "grad_norm": 1.8187435945821393, "learning_rate": 5.966897589975695e-07, "loss": 0.5576, "step": 27656 }, { "epoch": 0.8476461934534756, "grad_norm": 0.7575621373263949, "learning_rate": 5.964546525803916e-07, "loss": 0.3937, "step": 27657 }, { "epoch": 0.8476768419762167, "grad_norm": 1.8737697127544015, "learning_rate": 5.962195895524742e-07, "loss": 0.5619, "step": 27658 }, { "epoch": 0.847707490498958, "grad_norm": 1.9150798803780624, "learning_rate": 5.959845699161304e-07, "loss": 0.6112, "step": 27659 }, { "epoch": 0.8477381390216991, "grad_norm": 0.7796086681192073, "learning_rate": 5.957495936736774e-07, "loss": 0.3913, "step": 27660 }, { "epoch": 0.8477687875444404, "grad_norm": 1.9533503278570592, "learning_rate": 5.955146608274321e-07, "loss": 0.5898, "step": 27661 }, { "epoch": 0.8477994360671816, "grad_norm": 1.977721306694795, "learning_rate": 5.952797713797065e-07, "loss": 0.531, "step": 27662 }, { "epoch": 0.8478300845899228, "grad_norm": 0.7541772167425208, "learning_rate": 5.950449253328172e-07, "loss": 0.3822, "step": 27663 }, { "epoch": 0.847860733112664, "grad_norm": 0.8029495508841775, "learning_rate": 5.948101226890752e-07, "loss": 0.3907, "step": 27664 }, { "epoch": 0.8478913816354052, "grad_norm": 1.985752108700521, "learning_rate": 5.945753634507983e-07, "loss": 0.5813, "step": 27665 }, { "epoch": 0.8479220301581464, "grad_norm": 1.57138768543739, "learning_rate": 5.943406476202973e-07, "loss": 0.5264, "step": 27666 }, { "epoch": 0.8479526786808875, "grad_norm": 0.8134743577197276, "learning_rate": 5.941059751998846e-07, "loss": 0.4004, "step": 27667 }, { "epoch": 0.8479833272036288, "grad_norm": 1.6676968559529834, "learning_rate": 5.938713461918727e-07, "loss": 0.536, "step": 27668 }, { "epoch": 0.8480139757263699, "grad_norm": 1.9382991855012124, "learning_rate": 5.936367605985749e-07, "loss": 0.5977, "step": 27669 }, { "epoch": 0.8480446242491112, "grad_norm": 1.9349997090576767, "learning_rate": 5.934022184223004e-07, "loss": 0.5783, "step": 27670 }, { "epoch": 0.8480752727718524, "grad_norm": 0.8158978269215204, "learning_rate": 5.931677196653612e-07, "loss": 0.4327, "step": 27671 }, { "epoch": 0.8481059212945936, "grad_norm": 0.7858958794929292, "learning_rate": 5.929332643300683e-07, "loss": 0.398, "step": 27672 }, { "epoch": 0.8481365698173348, "grad_norm": 1.7875858684432218, "learning_rate": 5.926988524187327e-07, "loss": 0.591, "step": 27673 }, { "epoch": 0.848167218340076, "grad_norm": 1.8293405926772008, "learning_rate": 5.924644839336625e-07, "loss": 0.5541, "step": 27674 }, { "epoch": 0.8481978668628172, "grad_norm": 1.9594127426982566, "learning_rate": 5.922301588771657e-07, "loss": 0.5426, "step": 27675 }, { "epoch": 0.8482285153855584, "grad_norm": 2.1987481779131106, "learning_rate": 5.919958772515549e-07, "loss": 0.6789, "step": 27676 }, { "epoch": 0.8482591639082996, "grad_norm": 1.9427658029369046, "learning_rate": 5.917616390591363e-07, "loss": 0.589, "step": 27677 }, { "epoch": 0.8482898124310408, "grad_norm": 1.7324630105705157, "learning_rate": 5.915274443022179e-07, "loss": 0.5546, "step": 27678 }, { "epoch": 0.848320460953782, "grad_norm": 1.7974715719748426, "learning_rate": 5.912932929831066e-07, "loss": 0.588, "step": 27679 }, { "epoch": 0.8483511094765233, "grad_norm": 2.130370414861186, "learning_rate": 5.91059185104112e-07, "loss": 0.5792, "step": 27680 }, { "epoch": 0.8483817579992644, "grad_norm": 2.179908702489199, "learning_rate": 5.908251206675386e-07, "loss": 0.5539, "step": 27681 }, { "epoch": 0.8484124065220057, "grad_norm": 1.8297738795841578, "learning_rate": 5.90591099675693e-07, "loss": 0.6192, "step": 27682 }, { "epoch": 0.8484430550447468, "grad_norm": 1.7668037855680427, "learning_rate": 5.903571221308813e-07, "loss": 0.6006, "step": 27683 }, { "epoch": 0.8484737035674881, "grad_norm": 1.9368119778971642, "learning_rate": 5.901231880354108e-07, "loss": 0.5766, "step": 27684 }, { "epoch": 0.8485043520902292, "grad_norm": 1.6249033857407953, "learning_rate": 5.898892973915843e-07, "loss": 0.5651, "step": 27685 }, { "epoch": 0.8485350006129705, "grad_norm": 1.9152522835086296, "learning_rate": 5.896554502017049e-07, "loss": 0.553, "step": 27686 }, { "epoch": 0.8485656491357116, "grad_norm": 1.8890159780381086, "learning_rate": 5.894216464680813e-07, "loss": 0.5773, "step": 27687 }, { "epoch": 0.8485962976584529, "grad_norm": 1.9723964830955163, "learning_rate": 5.891878861930139e-07, "loss": 0.5365, "step": 27688 }, { "epoch": 0.848626946181194, "grad_norm": 0.7805097441535271, "learning_rate": 5.889541693788064e-07, "loss": 0.3948, "step": 27689 }, { "epoch": 0.8486575947039353, "grad_norm": 2.145748824492041, "learning_rate": 5.88720496027762e-07, "loss": 0.5096, "step": 27690 }, { "epoch": 0.8486882432266765, "grad_norm": 1.8444495325775205, "learning_rate": 5.884868661421833e-07, "loss": 0.5971, "step": 27691 }, { "epoch": 0.8487188917494177, "grad_norm": 2.058245196465411, "learning_rate": 5.882532797243734e-07, "loss": 0.5204, "step": 27692 }, { "epoch": 0.8487495402721589, "grad_norm": 1.9965831267821343, "learning_rate": 5.88019736776631e-07, "loss": 0.5907, "step": 27693 }, { "epoch": 0.8487801887949001, "grad_norm": 1.8728707905441482, "learning_rate": 5.877862373012599e-07, "loss": 0.4511, "step": 27694 }, { "epoch": 0.8488108373176413, "grad_norm": 1.7730318557425533, "learning_rate": 5.875527813005604e-07, "loss": 0.5906, "step": 27695 }, { "epoch": 0.8488414858403825, "grad_norm": 2.019741080249192, "learning_rate": 5.873193687768325e-07, "loss": 0.5413, "step": 27696 }, { "epoch": 0.8488721343631237, "grad_norm": 1.9559097681121327, "learning_rate": 5.870859997323746e-07, "loss": 0.5497, "step": 27697 }, { "epoch": 0.8489027828858648, "grad_norm": 1.6890043562867523, "learning_rate": 5.868526741694875e-07, "loss": 0.6326, "step": 27698 }, { "epoch": 0.8489334314086061, "grad_norm": 2.5696869501692294, "learning_rate": 5.866193920904706e-07, "loss": 0.626, "step": 27699 }, { "epoch": 0.8489640799313473, "grad_norm": 1.941336131661566, "learning_rate": 5.863861534976228e-07, "loss": 0.6486, "step": 27700 }, { "epoch": 0.8489947284540885, "grad_norm": 1.91176418377531, "learning_rate": 5.861529583932402e-07, "loss": 0.5783, "step": 27701 }, { "epoch": 0.8490253769768297, "grad_norm": 2.097372211310911, "learning_rate": 5.859198067796218e-07, "loss": 0.6981, "step": 27702 }, { "epoch": 0.8490560254995709, "grad_norm": 2.2159431286889806, "learning_rate": 5.856866986590665e-07, "loss": 0.6094, "step": 27703 }, { "epoch": 0.8490866740223121, "grad_norm": 0.7877377993568054, "learning_rate": 5.854536340338685e-07, "loss": 0.3945, "step": 27704 }, { "epoch": 0.8491173225450533, "grad_norm": 1.8939265768962772, "learning_rate": 5.852206129063248e-07, "loss": 0.6005, "step": 27705 }, { "epoch": 0.8491479710677945, "grad_norm": 2.0623161169783324, "learning_rate": 5.849876352787337e-07, "loss": 0.6189, "step": 27706 }, { "epoch": 0.8491786195905358, "grad_norm": 1.5790691688751022, "learning_rate": 5.847547011533882e-07, "loss": 0.4887, "step": 27707 }, { "epoch": 0.8492092681132769, "grad_norm": 2.1152634070765504, "learning_rate": 5.845218105325839e-07, "loss": 0.6171, "step": 27708 }, { "epoch": 0.8492399166360182, "grad_norm": 1.8864797608482688, "learning_rate": 5.842889634186161e-07, "loss": 0.6454, "step": 27709 }, { "epoch": 0.8492705651587593, "grad_norm": 0.7864453389387144, "learning_rate": 5.840561598137784e-07, "loss": 0.3968, "step": 27710 }, { "epoch": 0.8493012136815006, "grad_norm": 1.8675154077361096, "learning_rate": 5.838233997203668e-07, "loss": 0.5499, "step": 27711 }, { "epoch": 0.8493318622042417, "grad_norm": 2.3117395040877264, "learning_rate": 5.835906831406718e-07, "loss": 0.6094, "step": 27712 }, { "epoch": 0.849362510726983, "grad_norm": 1.966234396919756, "learning_rate": 5.833580100769881e-07, "loss": 0.5678, "step": 27713 }, { "epoch": 0.8493931592497241, "grad_norm": 0.7998168801230532, "learning_rate": 5.831253805316084e-07, "loss": 0.3992, "step": 27714 }, { "epoch": 0.8494238077724654, "grad_norm": 0.7720189223307833, "learning_rate": 5.828927945068252e-07, "loss": 0.3861, "step": 27715 }, { "epoch": 0.8494544562952066, "grad_norm": 1.7283859872247707, "learning_rate": 5.826602520049268e-07, "loss": 0.5162, "step": 27716 }, { "epoch": 0.8494851048179478, "grad_norm": 2.2630027352069573, "learning_rate": 5.824277530282096e-07, "loss": 0.6144, "step": 27717 }, { "epoch": 0.849515753340689, "grad_norm": 1.7015692892789716, "learning_rate": 5.821952975789608e-07, "loss": 0.4706, "step": 27718 }, { "epoch": 0.8495464018634302, "grad_norm": 1.9435570944443787, "learning_rate": 5.819628856594733e-07, "loss": 0.5116, "step": 27719 }, { "epoch": 0.8495770503861714, "grad_norm": 1.8650032898682236, "learning_rate": 5.817305172720344e-07, "loss": 0.4974, "step": 27720 }, { "epoch": 0.8496076989089126, "grad_norm": 0.8090217082764023, "learning_rate": 5.814981924189356e-07, "loss": 0.3911, "step": 27721 }, { "epoch": 0.8496383474316538, "grad_norm": 1.9731505933994826, "learning_rate": 5.812659111024666e-07, "loss": 0.6017, "step": 27722 }, { "epoch": 0.849668995954395, "grad_norm": 1.756774795645548, "learning_rate": 5.810336733249139e-07, "loss": 0.4757, "step": 27723 }, { "epoch": 0.8496996444771362, "grad_norm": 0.7914947791145174, "learning_rate": 5.808014790885674e-07, "loss": 0.4151, "step": 27724 }, { "epoch": 0.8497302929998775, "grad_norm": 1.9436554666756147, "learning_rate": 5.805693283957154e-07, "loss": 0.5275, "step": 27725 }, { "epoch": 0.8497609415226186, "grad_norm": 1.9708624403733432, "learning_rate": 5.803372212486436e-07, "loss": 0.5553, "step": 27726 }, { "epoch": 0.8497915900453599, "grad_norm": 1.7876219370599606, "learning_rate": 5.801051576496402e-07, "loss": 0.5472, "step": 27727 }, { "epoch": 0.849822238568101, "grad_norm": 1.897012935871373, "learning_rate": 5.798731376009925e-07, "loss": 0.5601, "step": 27728 }, { "epoch": 0.8498528870908422, "grad_norm": 2.1078340132096995, "learning_rate": 5.796411611049846e-07, "loss": 0.5432, "step": 27729 }, { "epoch": 0.8498835356135834, "grad_norm": 2.064132668800935, "learning_rate": 5.794092281639041e-07, "loss": 0.5694, "step": 27730 }, { "epoch": 0.8499141841363246, "grad_norm": 1.7904944893996957, "learning_rate": 5.791773387800348e-07, "loss": 0.5918, "step": 27731 }, { "epoch": 0.8499448326590658, "grad_norm": 1.7285457631680574, "learning_rate": 5.78945492955662e-07, "loss": 0.5637, "step": 27732 }, { "epoch": 0.849975481181807, "grad_norm": 1.896923839366322, "learning_rate": 5.787136906930719e-07, "loss": 0.5845, "step": 27733 }, { "epoch": 0.8500061297045483, "grad_norm": 0.8015220894243973, "learning_rate": 5.784819319945456e-07, "loss": 0.381, "step": 27734 }, { "epoch": 0.8500367782272894, "grad_norm": 1.756395244557813, "learning_rate": 5.782502168623688e-07, "loss": 0.5116, "step": 27735 }, { "epoch": 0.8500674267500307, "grad_norm": 1.6861575021816086, "learning_rate": 5.780185452988241e-07, "loss": 0.5788, "step": 27736 }, { "epoch": 0.8500980752727718, "grad_norm": 1.7981309282292355, "learning_rate": 5.777869173061939e-07, "loss": 0.6117, "step": 27737 }, { "epoch": 0.8501287237955131, "grad_norm": 1.672176294165713, "learning_rate": 5.7755533288676e-07, "loss": 0.5471, "step": 27738 }, { "epoch": 0.8501593723182542, "grad_norm": 1.717029833574175, "learning_rate": 5.773237920428065e-07, "loss": 0.6148, "step": 27739 }, { "epoch": 0.8501900208409955, "grad_norm": 1.7931120247588148, "learning_rate": 5.770922947766116e-07, "loss": 0.5576, "step": 27740 }, { "epoch": 0.8502206693637366, "grad_norm": 2.1518159046930245, "learning_rate": 5.768608410904597e-07, "loss": 0.5355, "step": 27741 }, { "epoch": 0.8502513178864779, "grad_norm": 1.828389063217523, "learning_rate": 5.766294309866283e-07, "loss": 0.596, "step": 27742 }, { "epoch": 0.850281966409219, "grad_norm": 1.8449576223826951, "learning_rate": 5.763980644673989e-07, "loss": 0.5177, "step": 27743 }, { "epoch": 0.8503126149319603, "grad_norm": 2.1930174819852017, "learning_rate": 5.761667415350519e-07, "loss": 0.5693, "step": 27744 }, { "epoch": 0.8503432634547015, "grad_norm": 2.1933579958853318, "learning_rate": 5.75935462191865e-07, "loss": 0.6383, "step": 27745 }, { "epoch": 0.8503739119774427, "grad_norm": 0.8081677478399539, "learning_rate": 5.757042264401186e-07, "loss": 0.4103, "step": 27746 }, { "epoch": 0.8504045605001839, "grad_norm": 1.9610642117422317, "learning_rate": 5.754730342820908e-07, "loss": 0.5115, "step": 27747 }, { "epoch": 0.8504352090229251, "grad_norm": 2.0655544763299507, "learning_rate": 5.752418857200582e-07, "loss": 0.5507, "step": 27748 }, { "epoch": 0.8504658575456663, "grad_norm": 1.8494014615625292, "learning_rate": 5.75010780756301e-07, "loss": 0.6386, "step": 27749 }, { "epoch": 0.8504965060684075, "grad_norm": 1.9299744669096466, "learning_rate": 5.747797193930932e-07, "loss": 0.6368, "step": 27750 }, { "epoch": 0.8505271545911487, "grad_norm": 2.161730395310195, "learning_rate": 5.745487016327134e-07, "loss": 0.5848, "step": 27751 }, { "epoch": 0.85055780311389, "grad_norm": 1.9192026995203342, "learning_rate": 5.74317727477438e-07, "loss": 0.549, "step": 27752 }, { "epoch": 0.8505884516366311, "grad_norm": 1.9137113481240244, "learning_rate": 5.740867969295422e-07, "loss": 0.5795, "step": 27753 }, { "epoch": 0.8506191001593724, "grad_norm": 2.0231775691357634, "learning_rate": 5.73855909991301e-07, "loss": 0.5634, "step": 27754 }, { "epoch": 0.8506497486821135, "grad_norm": 1.9773692358749357, "learning_rate": 5.736250666649911e-07, "loss": 0.6527, "step": 27755 }, { "epoch": 0.8506803972048548, "grad_norm": 1.9515193552756787, "learning_rate": 5.733942669528852e-07, "loss": 0.5444, "step": 27756 }, { "epoch": 0.8507110457275959, "grad_norm": 1.9953494811265668, "learning_rate": 5.731635108572581e-07, "loss": 0.6217, "step": 27757 }, { "epoch": 0.8507416942503372, "grad_norm": 0.8395444574715296, "learning_rate": 5.729327983803845e-07, "loss": 0.3812, "step": 27758 }, { "epoch": 0.8507723427730783, "grad_norm": 1.6773287132397814, "learning_rate": 5.727021295245356e-07, "loss": 0.5046, "step": 27759 }, { "epoch": 0.8508029912958195, "grad_norm": 1.9971348107740678, "learning_rate": 5.724715042919865e-07, "loss": 0.5628, "step": 27760 }, { "epoch": 0.8508336398185607, "grad_norm": 1.837319224448863, "learning_rate": 5.722409226850078e-07, "loss": 0.4624, "step": 27761 }, { "epoch": 0.8508642883413019, "grad_norm": 1.6956081699957126, "learning_rate": 5.720103847058717e-07, "loss": 0.5775, "step": 27762 }, { "epoch": 0.8508949368640432, "grad_norm": 1.947850540931733, "learning_rate": 5.717798903568517e-07, "loss": 0.5553, "step": 27763 }, { "epoch": 0.8509255853867843, "grad_norm": 2.1600593808953197, "learning_rate": 5.71549439640216e-07, "loss": 0.6475, "step": 27764 }, { "epoch": 0.8509562339095256, "grad_norm": 2.125925037702949, "learning_rate": 5.713190325582374e-07, "loss": 0.6386, "step": 27765 }, { "epoch": 0.8509868824322667, "grad_norm": 1.809697286683746, "learning_rate": 5.710886691131856e-07, "loss": 0.5803, "step": 27766 }, { "epoch": 0.851017530955008, "grad_norm": 1.9443810032609612, "learning_rate": 5.708583493073299e-07, "loss": 0.6381, "step": 27767 }, { "epoch": 0.8510481794777491, "grad_norm": 1.7270270654195097, "learning_rate": 5.706280731429404e-07, "loss": 0.5351, "step": 27768 }, { "epoch": 0.8510788280004904, "grad_norm": 1.6972328817815798, "learning_rate": 5.70397840622286e-07, "loss": 0.5099, "step": 27769 }, { "epoch": 0.8511094765232315, "grad_norm": 1.7457827522850942, "learning_rate": 5.701676517476345e-07, "loss": 0.5213, "step": 27770 }, { "epoch": 0.8511401250459728, "grad_norm": 1.7929237880515023, "learning_rate": 5.699375065212553e-07, "loss": 0.5635, "step": 27771 }, { "epoch": 0.851170773568714, "grad_norm": 1.830615364602478, "learning_rate": 5.697074049454138e-07, "loss": 0.5769, "step": 27772 }, { "epoch": 0.8512014220914552, "grad_norm": 1.7953051829462354, "learning_rate": 5.694773470223807e-07, "loss": 0.5904, "step": 27773 }, { "epoch": 0.8512320706141964, "grad_norm": 2.119037959132655, "learning_rate": 5.692473327544206e-07, "loss": 0.6141, "step": 27774 }, { "epoch": 0.8512627191369376, "grad_norm": 1.8362613671803072, "learning_rate": 5.690173621437995e-07, "loss": 0.5567, "step": 27775 }, { "epoch": 0.8512933676596788, "grad_norm": 1.8176537951830851, "learning_rate": 5.687874351927835e-07, "loss": 0.5645, "step": 27776 }, { "epoch": 0.85132401618242, "grad_norm": 0.7927902154035892, "learning_rate": 5.685575519036402e-07, "loss": 0.3848, "step": 27777 }, { "epoch": 0.8513546647051612, "grad_norm": 1.8722403783018904, "learning_rate": 5.683277122786318e-07, "loss": 0.5853, "step": 27778 }, { "epoch": 0.8513853132279025, "grad_norm": 1.7167918778221172, "learning_rate": 5.680979163200246e-07, "loss": 0.5463, "step": 27779 }, { "epoch": 0.8514159617506436, "grad_norm": 1.9125131729566789, "learning_rate": 5.678681640300837e-07, "loss": 0.6374, "step": 27780 }, { "epoch": 0.8514466102733849, "grad_norm": 1.9205289779174706, "learning_rate": 5.676384554110703e-07, "loss": 0.5481, "step": 27781 }, { "epoch": 0.851477258796126, "grad_norm": 0.790468456300217, "learning_rate": 5.674087904652509e-07, "loss": 0.4055, "step": 27782 }, { "epoch": 0.8515079073188673, "grad_norm": 2.0325037074454406, "learning_rate": 5.671791691948842e-07, "loss": 0.6093, "step": 27783 }, { "epoch": 0.8515385558416084, "grad_norm": 1.7356844276514327, "learning_rate": 5.669495916022377e-07, "loss": 0.5646, "step": 27784 }, { "epoch": 0.8515692043643497, "grad_norm": 1.9218718855143935, "learning_rate": 5.667200576895709e-07, "loss": 0.5519, "step": 27785 }, { "epoch": 0.8515998528870908, "grad_norm": 0.7640853268656586, "learning_rate": 5.664905674591448e-07, "loss": 0.4127, "step": 27786 }, { "epoch": 0.8516305014098321, "grad_norm": 1.8614473191967194, "learning_rate": 5.662611209132219e-07, "loss": 0.5825, "step": 27787 }, { "epoch": 0.8516611499325732, "grad_norm": 1.757102618404352, "learning_rate": 5.660317180540631e-07, "loss": 0.5812, "step": 27788 }, { "epoch": 0.8516917984553145, "grad_norm": 2.1956326880097965, "learning_rate": 5.658023588839273e-07, "loss": 0.6875, "step": 27789 }, { "epoch": 0.8517224469780557, "grad_norm": 2.0117024924554654, "learning_rate": 5.655730434050755e-07, "loss": 0.6074, "step": 27790 }, { "epoch": 0.8517530955007968, "grad_norm": 1.9459883216742373, "learning_rate": 5.653437716197669e-07, "loss": 0.6068, "step": 27791 }, { "epoch": 0.8517837440235381, "grad_norm": 1.8473477264174871, "learning_rate": 5.651145435302618e-07, "loss": 0.5449, "step": 27792 }, { "epoch": 0.8518143925462792, "grad_norm": 2.0201071161387456, "learning_rate": 5.648853591388181e-07, "loss": 0.6104, "step": 27793 }, { "epoch": 0.8518450410690205, "grad_norm": 1.8242840897757, "learning_rate": 5.646562184476928e-07, "loss": 0.6173, "step": 27794 }, { "epoch": 0.8518756895917616, "grad_norm": 1.8208566933467596, "learning_rate": 5.644271214591446e-07, "loss": 0.5493, "step": 27795 }, { "epoch": 0.8519063381145029, "grad_norm": 0.7884240569264828, "learning_rate": 5.641980681754317e-07, "loss": 0.4086, "step": 27796 }, { "epoch": 0.851936986637244, "grad_norm": 2.077287299461355, "learning_rate": 5.639690585988089e-07, "loss": 0.5691, "step": 27797 }, { "epoch": 0.8519676351599853, "grad_norm": 0.8392312516611843, "learning_rate": 5.637400927315339e-07, "loss": 0.3955, "step": 27798 }, { "epoch": 0.8519982836827265, "grad_norm": 1.9064610786950118, "learning_rate": 5.635111705758633e-07, "loss": 0.6334, "step": 27799 }, { "epoch": 0.8520289322054677, "grad_norm": 2.0701834688504857, "learning_rate": 5.63282292134053e-07, "loss": 0.6637, "step": 27800 }, { "epoch": 0.8520595807282089, "grad_norm": 1.9766074162692906, "learning_rate": 5.63053457408358e-07, "loss": 0.5869, "step": 27801 }, { "epoch": 0.8520902292509501, "grad_norm": 2.156063786986325, "learning_rate": 5.6282466640103e-07, "loss": 0.6628, "step": 27802 }, { "epoch": 0.8521208777736913, "grad_norm": 2.083316477214528, "learning_rate": 5.625959191143277e-07, "loss": 0.5047, "step": 27803 }, { "epoch": 0.8521515262964325, "grad_norm": 1.8770767296155761, "learning_rate": 5.623672155505038e-07, "loss": 0.5126, "step": 27804 }, { "epoch": 0.8521821748191737, "grad_norm": 1.8514690295783036, "learning_rate": 5.621385557118097e-07, "loss": 0.554, "step": 27805 }, { "epoch": 0.852212823341915, "grad_norm": 1.9088429207226107, "learning_rate": 5.619099396004996e-07, "loss": 0.6022, "step": 27806 }, { "epoch": 0.8522434718646561, "grad_norm": 1.7885361397767214, "learning_rate": 5.616813672188281e-07, "loss": 0.6301, "step": 27807 }, { "epoch": 0.8522741203873974, "grad_norm": 1.7594441444821023, "learning_rate": 5.614528385690443e-07, "loss": 0.5569, "step": 27808 }, { "epoch": 0.8523047689101385, "grad_norm": 1.9923732895633235, "learning_rate": 5.612243536534012e-07, "loss": 0.6007, "step": 27809 }, { "epoch": 0.8523354174328798, "grad_norm": 1.9150959173194342, "learning_rate": 5.609959124741504e-07, "loss": 0.5872, "step": 27810 }, { "epoch": 0.8523660659556209, "grad_norm": 2.165542196649166, "learning_rate": 5.60767515033544e-07, "loss": 0.6212, "step": 27811 }, { "epoch": 0.8523967144783622, "grad_norm": 1.993020748715085, "learning_rate": 5.605391613338307e-07, "loss": 0.5471, "step": 27812 }, { "epoch": 0.8524273630011033, "grad_norm": 2.1667562204173656, "learning_rate": 5.603108513772587e-07, "loss": 0.5813, "step": 27813 }, { "epoch": 0.8524580115238446, "grad_norm": 1.9680142130038163, "learning_rate": 5.600825851660824e-07, "loss": 0.5959, "step": 27814 }, { "epoch": 0.8524886600465857, "grad_norm": 0.7886970049878786, "learning_rate": 5.598543627025483e-07, "loss": 0.3753, "step": 27815 }, { "epoch": 0.852519308569327, "grad_norm": 1.9075708367362132, "learning_rate": 5.596261839889039e-07, "loss": 0.5573, "step": 27816 }, { "epoch": 0.8525499570920682, "grad_norm": 2.2641239024151925, "learning_rate": 5.593980490273987e-07, "loss": 0.6348, "step": 27817 }, { "epoch": 0.8525806056148094, "grad_norm": 1.9081558075407186, "learning_rate": 5.591699578202808e-07, "loss": 0.6126, "step": 27818 }, { "epoch": 0.8526112541375506, "grad_norm": 2.0559502822101083, "learning_rate": 5.589419103697991e-07, "loss": 0.6236, "step": 27819 }, { "epoch": 0.8526419026602918, "grad_norm": 1.8151178782109465, "learning_rate": 5.587139066781977e-07, "loss": 0.5552, "step": 27820 }, { "epoch": 0.852672551183033, "grad_norm": 1.6349831095245353, "learning_rate": 5.584859467477243e-07, "loss": 0.5097, "step": 27821 }, { "epoch": 0.8527031997057741, "grad_norm": 1.8696042323740845, "learning_rate": 5.582580305806262e-07, "loss": 0.5822, "step": 27822 }, { "epoch": 0.8527338482285154, "grad_norm": 1.869747583150493, "learning_rate": 5.580301581791487e-07, "loss": 0.5713, "step": 27823 }, { "epoch": 0.8527644967512565, "grad_norm": 1.9863493967209165, "learning_rate": 5.578023295455343e-07, "loss": 0.5024, "step": 27824 }, { "epoch": 0.8527951452739978, "grad_norm": 1.963925480583358, "learning_rate": 5.575745446820325e-07, "loss": 0.6021, "step": 27825 }, { "epoch": 0.852825793796739, "grad_norm": 2.0241979907786978, "learning_rate": 5.573468035908835e-07, "loss": 0.5812, "step": 27826 }, { "epoch": 0.8528564423194802, "grad_norm": 2.1646203255761396, "learning_rate": 5.571191062743347e-07, "loss": 0.567, "step": 27827 }, { "epoch": 0.8528870908422214, "grad_norm": 1.8959360072411107, "learning_rate": 5.568914527346269e-07, "loss": 0.5318, "step": 27828 }, { "epoch": 0.8529177393649626, "grad_norm": 1.8303386171377785, "learning_rate": 5.566638429740051e-07, "loss": 0.5302, "step": 27829 }, { "epoch": 0.8529483878877038, "grad_norm": 1.9340454976018588, "learning_rate": 5.564362769947118e-07, "loss": 0.5473, "step": 27830 }, { "epoch": 0.852979036410445, "grad_norm": 2.1754916064431145, "learning_rate": 5.562087547989875e-07, "loss": 0.5402, "step": 27831 }, { "epoch": 0.8530096849331862, "grad_norm": 2.0175078887907043, "learning_rate": 5.559812763890759e-07, "loss": 0.4982, "step": 27832 }, { "epoch": 0.8530403334559274, "grad_norm": 2.005851714490311, "learning_rate": 5.557538417672187e-07, "loss": 0.5636, "step": 27833 }, { "epoch": 0.8530709819786686, "grad_norm": 1.7152815290544514, "learning_rate": 5.555264509356556e-07, "loss": 0.6147, "step": 27834 }, { "epoch": 0.8531016305014099, "grad_norm": 1.6855206546789299, "learning_rate": 5.55299103896626e-07, "loss": 0.5727, "step": 27835 }, { "epoch": 0.853132279024151, "grad_norm": 1.7986609062028074, "learning_rate": 5.550718006523736e-07, "loss": 0.4855, "step": 27836 }, { "epoch": 0.8531629275468923, "grad_norm": 2.1247584034698215, "learning_rate": 5.548445412051345e-07, "loss": 0.6168, "step": 27837 }, { "epoch": 0.8531935760696334, "grad_norm": 2.0695182592930883, "learning_rate": 5.546173255571508e-07, "loss": 0.5734, "step": 27838 }, { "epoch": 0.8532242245923747, "grad_norm": 1.8493165366182538, "learning_rate": 5.543901537106594e-07, "loss": 0.5742, "step": 27839 }, { "epoch": 0.8532548731151158, "grad_norm": 1.8907634785068774, "learning_rate": 5.541630256678987e-07, "loss": 0.4976, "step": 27840 }, { "epoch": 0.8532855216378571, "grad_norm": 1.812383415231276, "learning_rate": 5.539359414311085e-07, "loss": 0.5222, "step": 27841 }, { "epoch": 0.8533161701605982, "grad_norm": 0.8113042710885422, "learning_rate": 5.537089010025237e-07, "loss": 0.4145, "step": 27842 }, { "epoch": 0.8533468186833395, "grad_norm": 1.9426297645160489, "learning_rate": 5.534819043843831e-07, "loss": 0.6, "step": 27843 }, { "epoch": 0.8533774672060807, "grad_norm": 0.8275805172393923, "learning_rate": 5.532549515789237e-07, "loss": 0.4066, "step": 27844 }, { "epoch": 0.8534081157288219, "grad_norm": 1.8579607196750212, "learning_rate": 5.530280425883805e-07, "loss": 0.569, "step": 27845 }, { "epoch": 0.8534387642515631, "grad_norm": 0.8325870518803236, "learning_rate": 5.528011774149905e-07, "loss": 0.3963, "step": 27846 }, { "epoch": 0.8534694127743043, "grad_norm": 1.9957606571384463, "learning_rate": 5.52574356060987e-07, "loss": 0.5985, "step": 27847 }, { "epoch": 0.8535000612970455, "grad_norm": 1.899531688746314, "learning_rate": 5.52347578528607e-07, "loss": 0.5386, "step": 27848 }, { "epoch": 0.8535307098197867, "grad_norm": 1.7263503806897942, "learning_rate": 5.521208448200849e-07, "loss": 0.5088, "step": 27849 }, { "epoch": 0.8535613583425279, "grad_norm": 2.089724923903295, "learning_rate": 5.518941549376527e-07, "loss": 0.5918, "step": 27850 }, { "epoch": 0.8535920068652691, "grad_norm": 1.999550067428268, "learning_rate": 5.51667508883546e-07, "loss": 0.5159, "step": 27851 }, { "epoch": 0.8536226553880103, "grad_norm": 2.103836695924699, "learning_rate": 5.514409066599985e-07, "loss": 0.6178, "step": 27852 }, { "epoch": 0.8536533039107514, "grad_norm": 2.1539035313665686, "learning_rate": 5.512143482692411e-07, "loss": 0.668, "step": 27853 }, { "epoch": 0.8536839524334927, "grad_norm": 1.9684140041291074, "learning_rate": 5.509878337135066e-07, "loss": 0.5407, "step": 27854 }, { "epoch": 0.8537146009562339, "grad_norm": 1.8355732136412206, "learning_rate": 5.507613629950287e-07, "loss": 0.5306, "step": 27855 }, { "epoch": 0.8537452494789751, "grad_norm": 1.633606767640871, "learning_rate": 5.505349361160362e-07, "loss": 0.5352, "step": 27856 }, { "epoch": 0.8537758980017163, "grad_norm": 0.78508854021618, "learning_rate": 5.503085530787628e-07, "loss": 0.3892, "step": 27857 }, { "epoch": 0.8538065465244575, "grad_norm": 1.8497546742364253, "learning_rate": 5.500822138854361e-07, "loss": 0.5692, "step": 27858 }, { "epoch": 0.8538371950471987, "grad_norm": 1.9397445820896986, "learning_rate": 5.498559185382885e-07, "loss": 0.5881, "step": 27859 }, { "epoch": 0.8538678435699399, "grad_norm": 1.7941391038921555, "learning_rate": 5.496296670395501e-07, "loss": 0.492, "step": 27860 }, { "epoch": 0.8538984920926811, "grad_norm": 1.907521962961939, "learning_rate": 5.494034593914476e-07, "loss": 0.5801, "step": 27861 }, { "epoch": 0.8539291406154224, "grad_norm": 2.1256528800900516, "learning_rate": 5.491772955962122e-07, "loss": 0.5739, "step": 27862 }, { "epoch": 0.8539597891381635, "grad_norm": 2.017115499423188, "learning_rate": 5.489511756560728e-07, "loss": 0.6497, "step": 27863 }, { "epoch": 0.8539904376609048, "grad_norm": 0.796535384966754, "learning_rate": 5.487250995732546e-07, "loss": 0.4123, "step": 27864 }, { "epoch": 0.8540210861836459, "grad_norm": 0.8062855180904316, "learning_rate": 5.484990673499874e-07, "loss": 0.4013, "step": 27865 }, { "epoch": 0.8540517347063872, "grad_norm": 1.9680087289351162, "learning_rate": 5.482730789884987e-07, "loss": 0.6, "step": 27866 }, { "epoch": 0.8540823832291283, "grad_norm": 2.0399062010180433, "learning_rate": 5.480471344910137e-07, "loss": 0.6689, "step": 27867 }, { "epoch": 0.8541130317518696, "grad_norm": 1.7847738129274915, "learning_rate": 5.4782123385976e-07, "loss": 0.6276, "step": 27868 }, { "epoch": 0.8541436802746107, "grad_norm": 0.7795414227333223, "learning_rate": 5.475953770969622e-07, "loss": 0.3816, "step": 27869 }, { "epoch": 0.854174328797352, "grad_norm": 2.073391062798166, "learning_rate": 5.47369564204846e-07, "loss": 0.5455, "step": 27870 }, { "epoch": 0.8542049773200932, "grad_norm": 2.219822833701992, "learning_rate": 5.471437951856378e-07, "loss": 0.5232, "step": 27871 }, { "epoch": 0.8542356258428344, "grad_norm": 0.7984988188065977, "learning_rate": 5.469180700415605e-07, "loss": 0.4059, "step": 27872 }, { "epoch": 0.8542662743655756, "grad_norm": 1.9300957037188229, "learning_rate": 5.466923887748382e-07, "loss": 0.5246, "step": 27873 }, { "epoch": 0.8542969228883168, "grad_norm": 1.8436806695081833, "learning_rate": 5.464667513876965e-07, "loss": 0.5477, "step": 27874 }, { "epoch": 0.854327571411058, "grad_norm": 1.868907386329961, "learning_rate": 5.462411578823562e-07, "loss": 0.5655, "step": 27875 }, { "epoch": 0.8543582199337992, "grad_norm": 1.9926627072862129, "learning_rate": 5.460156082610418e-07, "loss": 0.6513, "step": 27876 }, { "epoch": 0.8543888684565404, "grad_norm": 1.7627181181121985, "learning_rate": 5.457901025259759e-07, "loss": 0.5737, "step": 27877 }, { "epoch": 0.8544195169792816, "grad_norm": 1.9299523413434079, "learning_rate": 5.455646406793785e-07, "loss": 0.6263, "step": 27878 }, { "epoch": 0.8544501655020228, "grad_norm": 1.7671843348446428, "learning_rate": 5.453392227234739e-07, "loss": 0.5712, "step": 27879 }, { "epoch": 0.8544808140247641, "grad_norm": 1.7771959184535295, "learning_rate": 5.451138486604796e-07, "loss": 0.5746, "step": 27880 }, { "epoch": 0.8545114625475052, "grad_norm": 1.7932738628796667, "learning_rate": 5.4488851849262e-07, "loss": 0.5622, "step": 27881 }, { "epoch": 0.8545421110702465, "grad_norm": 2.0341942641140984, "learning_rate": 5.44663232222114e-07, "loss": 0.7567, "step": 27882 }, { "epoch": 0.8545727595929876, "grad_norm": 1.9493664703785079, "learning_rate": 5.444379898511803e-07, "loss": 0.6639, "step": 27883 }, { "epoch": 0.8546034081157288, "grad_norm": 1.722647863803046, "learning_rate": 5.442127913820389e-07, "loss": 0.5797, "step": 27884 }, { "epoch": 0.85463405663847, "grad_norm": 1.8556713709568584, "learning_rate": 5.439876368169101e-07, "loss": 0.4877, "step": 27885 }, { "epoch": 0.8546647051612112, "grad_norm": 1.5995461140832214, "learning_rate": 5.437625261580099e-07, "loss": 0.4384, "step": 27886 }, { "epoch": 0.8546953536839524, "grad_norm": 0.8298979604484785, "learning_rate": 5.435374594075576e-07, "loss": 0.3914, "step": 27887 }, { "epoch": 0.8547260022066936, "grad_norm": 0.8104257198756297, "learning_rate": 5.433124365677722e-07, "loss": 0.3987, "step": 27888 }, { "epoch": 0.8547566507294349, "grad_norm": 1.7935441703984205, "learning_rate": 5.43087457640869e-07, "loss": 0.4774, "step": 27889 }, { "epoch": 0.854787299252176, "grad_norm": 1.685431639098922, "learning_rate": 5.428625226290663e-07, "loss": 0.5412, "step": 27890 }, { "epoch": 0.8548179477749173, "grad_norm": 1.9593403432241216, "learning_rate": 5.426376315345783e-07, "loss": 0.5815, "step": 27891 }, { "epoch": 0.8548485962976584, "grad_norm": 2.0230006801075544, "learning_rate": 5.424127843596222e-07, "loss": 0.5723, "step": 27892 }, { "epoch": 0.8548792448203997, "grad_norm": 1.6941926145869581, "learning_rate": 5.421879811064145e-07, "loss": 0.5547, "step": 27893 }, { "epoch": 0.8549098933431408, "grad_norm": 2.027330220764987, "learning_rate": 5.419632217771681e-07, "loss": 0.6236, "step": 27894 }, { "epoch": 0.8549405418658821, "grad_norm": 2.1645184080162427, "learning_rate": 5.417385063740987e-07, "loss": 0.5785, "step": 27895 }, { "epoch": 0.8549711903886232, "grad_norm": 2.0304434141569585, "learning_rate": 5.41513834899422e-07, "loss": 0.538, "step": 27896 }, { "epoch": 0.8550018389113645, "grad_norm": 3.276105329077473, "learning_rate": 5.412892073553489e-07, "loss": 0.4834, "step": 27897 }, { "epoch": 0.8550324874341056, "grad_norm": 1.9161428266863978, "learning_rate": 5.410646237440947e-07, "loss": 0.519, "step": 27898 }, { "epoch": 0.8550631359568469, "grad_norm": 1.8413024360376078, "learning_rate": 5.408400840678701e-07, "loss": 0.5532, "step": 27899 }, { "epoch": 0.8550937844795881, "grad_norm": 1.9225466986953557, "learning_rate": 5.40615588328891e-07, "loss": 0.5342, "step": 27900 }, { "epoch": 0.8551244330023293, "grad_norm": 2.094874859667503, "learning_rate": 5.403911365293674e-07, "loss": 0.6111, "step": 27901 }, { "epoch": 0.8551550815250705, "grad_norm": 1.8016378962967075, "learning_rate": 5.401667286715096e-07, "loss": 0.5363, "step": 27902 }, { "epoch": 0.8551857300478117, "grad_norm": 1.7742498503199464, "learning_rate": 5.399423647575308e-07, "loss": 0.5254, "step": 27903 }, { "epoch": 0.8552163785705529, "grad_norm": 2.2739361215952534, "learning_rate": 5.397180447896416e-07, "loss": 0.6136, "step": 27904 }, { "epoch": 0.8552470270932941, "grad_norm": 1.7430305868874458, "learning_rate": 5.394937687700508e-07, "loss": 0.5511, "step": 27905 }, { "epoch": 0.8552776756160353, "grad_norm": 1.661865721750616, "learning_rate": 5.392695367009693e-07, "loss": 0.4425, "step": 27906 }, { "epoch": 0.8553083241387766, "grad_norm": 1.828349767536251, "learning_rate": 5.390453485846065e-07, "loss": 0.5751, "step": 27907 }, { "epoch": 0.8553389726615177, "grad_norm": 1.9902772195586644, "learning_rate": 5.388212044231716e-07, "loss": 0.5854, "step": 27908 }, { "epoch": 0.855369621184259, "grad_norm": 2.189571026816787, "learning_rate": 5.385971042188736e-07, "loss": 0.5071, "step": 27909 }, { "epoch": 0.8554002697070001, "grad_norm": 2.1432413120331395, "learning_rate": 5.383730479739174e-07, "loss": 0.56, "step": 27910 }, { "epoch": 0.8554309182297414, "grad_norm": 1.9385765944044069, "learning_rate": 5.381490356905155e-07, "loss": 0.5283, "step": 27911 }, { "epoch": 0.8554615667524825, "grad_norm": 0.7979678242890045, "learning_rate": 5.379250673708725e-07, "loss": 0.3904, "step": 27912 }, { "epoch": 0.8554922152752238, "grad_norm": 1.8479023930094045, "learning_rate": 5.377011430171941e-07, "loss": 0.5693, "step": 27913 }, { "epoch": 0.8555228637979649, "grad_norm": 1.9778884379240917, "learning_rate": 5.374772626316887e-07, "loss": 0.6578, "step": 27914 }, { "epoch": 0.8555535123207061, "grad_norm": 0.8239932975746957, "learning_rate": 5.372534262165624e-07, "loss": 0.4041, "step": 27915 }, { "epoch": 0.8555841608434473, "grad_norm": 1.944360286137296, "learning_rate": 5.370296337740188e-07, "loss": 0.587, "step": 27916 }, { "epoch": 0.8556148093661885, "grad_norm": 2.490063793735923, "learning_rate": 5.368058853062641e-07, "loss": 0.5638, "step": 27917 }, { "epoch": 0.8556454578889298, "grad_norm": 2.080106242577037, "learning_rate": 5.36582180815503e-07, "loss": 0.5281, "step": 27918 }, { "epoch": 0.8556761064116709, "grad_norm": 2.048198518563076, "learning_rate": 5.363585203039412e-07, "loss": 0.6477, "step": 27919 }, { "epoch": 0.8557067549344122, "grad_norm": 2.10395122854608, "learning_rate": 5.361349037737801e-07, "loss": 0.5819, "step": 27920 }, { "epoch": 0.8557374034571533, "grad_norm": 0.7750416354897839, "learning_rate": 5.359113312272224e-07, "loss": 0.384, "step": 27921 }, { "epoch": 0.8557680519798946, "grad_norm": 1.8228889505361503, "learning_rate": 5.356878026664747e-07, "loss": 0.614, "step": 27922 }, { "epoch": 0.8557987005026357, "grad_norm": 1.9880748543859117, "learning_rate": 5.354643180937368e-07, "loss": 0.6007, "step": 27923 }, { "epoch": 0.855829349025377, "grad_norm": 2.172241082990024, "learning_rate": 5.352408775112111e-07, "loss": 0.6486, "step": 27924 }, { "epoch": 0.8558599975481181, "grad_norm": 0.9426171135069277, "learning_rate": 5.350174809210989e-07, "loss": 0.4118, "step": 27925 }, { "epoch": 0.8558906460708594, "grad_norm": 2.007169387907579, "learning_rate": 5.347941283256014e-07, "loss": 0.5241, "step": 27926 }, { "epoch": 0.8559212945936006, "grad_norm": 1.8707436460837936, "learning_rate": 5.345708197269217e-07, "loss": 0.5987, "step": 27927 }, { "epoch": 0.8559519431163418, "grad_norm": 1.8692766677517025, "learning_rate": 5.343475551272565e-07, "loss": 0.5991, "step": 27928 }, { "epoch": 0.855982591639083, "grad_norm": 2.090901680817255, "learning_rate": 5.341243345288077e-07, "loss": 0.6291, "step": 27929 }, { "epoch": 0.8560132401618242, "grad_norm": 1.8410801556994723, "learning_rate": 5.339011579337761e-07, "loss": 0.5273, "step": 27930 }, { "epoch": 0.8560438886845654, "grad_norm": 2.018024049746451, "learning_rate": 5.336780253443579e-07, "loss": 0.5894, "step": 27931 }, { "epoch": 0.8560745372073066, "grad_norm": 2.149443959755096, "learning_rate": 5.334549367627518e-07, "loss": 0.501, "step": 27932 }, { "epoch": 0.8561051857300478, "grad_norm": 2.050541225421511, "learning_rate": 5.332318921911589e-07, "loss": 0.5188, "step": 27933 }, { "epoch": 0.856135834252789, "grad_norm": 0.7802440063309151, "learning_rate": 5.33008891631775e-07, "loss": 0.3896, "step": 27934 }, { "epoch": 0.8561664827755302, "grad_norm": 1.4628227088687444, "learning_rate": 5.327859350867959e-07, "loss": 0.4974, "step": 27935 }, { "epoch": 0.8561971312982715, "grad_norm": 1.9470374902621808, "learning_rate": 5.325630225584206e-07, "loss": 0.631, "step": 27936 }, { "epoch": 0.8562277798210126, "grad_norm": 2.1587250398495863, "learning_rate": 5.323401540488443e-07, "loss": 0.5845, "step": 27937 }, { "epoch": 0.8562584283437539, "grad_norm": 2.059076459678811, "learning_rate": 5.32117329560265e-07, "loss": 0.6148, "step": 27938 }, { "epoch": 0.856289076866495, "grad_norm": 1.7492127847236305, "learning_rate": 5.318945490948757e-07, "loss": 0.5612, "step": 27939 }, { "epoch": 0.8563197253892363, "grad_norm": 0.8333108702773458, "learning_rate": 5.316718126548726e-07, "loss": 0.418, "step": 27940 }, { "epoch": 0.8563503739119774, "grad_norm": 1.8508211965771968, "learning_rate": 5.314491202424515e-07, "loss": 0.5642, "step": 27941 }, { "epoch": 0.8563810224347187, "grad_norm": 0.7602896868234157, "learning_rate": 5.312264718598053e-07, "loss": 0.3724, "step": 27942 }, { "epoch": 0.8564116709574598, "grad_norm": 1.9592704772518872, "learning_rate": 5.310038675091273e-07, "loss": 0.648, "step": 27943 }, { "epoch": 0.8564423194802011, "grad_norm": 2.107759974506321, "learning_rate": 5.307813071926116e-07, "loss": 0.523, "step": 27944 }, { "epoch": 0.8564729680029423, "grad_norm": 1.9520881425592007, "learning_rate": 5.30558790912451e-07, "loss": 0.6609, "step": 27945 }, { "epoch": 0.8565036165256834, "grad_norm": 1.9120236761606986, "learning_rate": 5.303363186708394e-07, "loss": 0.6105, "step": 27946 }, { "epoch": 0.8565342650484247, "grad_norm": 1.8367827225603515, "learning_rate": 5.301138904699665e-07, "loss": 0.635, "step": 27947 }, { "epoch": 0.8565649135711658, "grad_norm": 1.905422386722036, "learning_rate": 5.298915063120252e-07, "loss": 0.585, "step": 27948 }, { "epoch": 0.8565955620939071, "grad_norm": 2.6656968157061023, "learning_rate": 5.296691661992081e-07, "loss": 0.61, "step": 27949 }, { "epoch": 0.8566262106166482, "grad_norm": 1.9380365533945436, "learning_rate": 5.294468701337036e-07, "loss": 0.6235, "step": 27950 }, { "epoch": 0.8566568591393895, "grad_norm": 1.798958957565058, "learning_rate": 5.292246181177014e-07, "loss": 0.4886, "step": 27951 }, { "epoch": 0.8566875076621306, "grad_norm": 1.9288485107777564, "learning_rate": 5.290024101533952e-07, "loss": 0.5417, "step": 27952 }, { "epoch": 0.8567181561848719, "grad_norm": 1.7487058200979726, "learning_rate": 5.287802462429708e-07, "loss": 0.5167, "step": 27953 }, { "epoch": 0.856748804707613, "grad_norm": 1.9039849804024467, "learning_rate": 5.285581263886197e-07, "loss": 0.5723, "step": 27954 }, { "epoch": 0.8567794532303543, "grad_norm": 0.7649259339154661, "learning_rate": 5.283360505925283e-07, "loss": 0.3906, "step": 27955 }, { "epoch": 0.8568101017530955, "grad_norm": 1.7863727624170853, "learning_rate": 5.281140188568862e-07, "loss": 0.5739, "step": 27956 }, { "epoch": 0.8568407502758367, "grad_norm": 0.7966182844846535, "learning_rate": 5.27892031183882e-07, "loss": 0.389, "step": 27957 }, { "epoch": 0.8568713987985779, "grad_norm": 1.788915437297957, "learning_rate": 5.276700875757002e-07, "loss": 0.558, "step": 27958 }, { "epoch": 0.8569020473213191, "grad_norm": 1.97104412847294, "learning_rate": 5.274481880345301e-07, "loss": 0.5545, "step": 27959 }, { "epoch": 0.8569326958440603, "grad_norm": 2.07845622994804, "learning_rate": 5.272263325625576e-07, "loss": 0.5825, "step": 27960 }, { "epoch": 0.8569633443668015, "grad_norm": 1.9936001958298706, "learning_rate": 5.27004521161969e-07, "loss": 0.5971, "step": 27961 }, { "epoch": 0.8569939928895427, "grad_norm": 2.102110466882034, "learning_rate": 5.267827538349474e-07, "loss": 0.5428, "step": 27962 }, { "epoch": 0.857024641412284, "grad_norm": 1.9135838833341303, "learning_rate": 5.26561030583681e-07, "loss": 0.6298, "step": 27963 }, { "epoch": 0.8570552899350251, "grad_norm": 1.7966195167504917, "learning_rate": 5.263393514103532e-07, "loss": 0.6068, "step": 27964 }, { "epoch": 0.8570859384577664, "grad_norm": 0.8069327071740833, "learning_rate": 5.261177163171494e-07, "loss": 0.4017, "step": 27965 }, { "epoch": 0.8571165869805075, "grad_norm": 1.9995979981989673, "learning_rate": 5.258961253062512e-07, "loss": 0.5543, "step": 27966 }, { "epoch": 0.8571472355032488, "grad_norm": 2.0323260826849077, "learning_rate": 5.256745783798428e-07, "loss": 0.5861, "step": 27967 }, { "epoch": 0.8571778840259899, "grad_norm": 1.8668628395937394, "learning_rate": 5.254530755401094e-07, "loss": 0.6262, "step": 27968 }, { "epoch": 0.8572085325487312, "grad_norm": 1.9445010022594906, "learning_rate": 5.252316167892301e-07, "loss": 0.5873, "step": 27969 }, { "epoch": 0.8572391810714723, "grad_norm": 1.7937313526493759, "learning_rate": 5.25010202129389e-07, "loss": 0.5257, "step": 27970 }, { "epoch": 0.8572698295942136, "grad_norm": 1.9252890452144158, "learning_rate": 5.24788831562768e-07, "loss": 0.5348, "step": 27971 }, { "epoch": 0.8573004781169548, "grad_norm": 0.8155394872519661, "learning_rate": 5.245675050915467e-07, "loss": 0.3992, "step": 27972 }, { "epoch": 0.857331126639696, "grad_norm": 1.9508139632217607, "learning_rate": 5.243462227179069e-07, "loss": 0.5984, "step": 27973 }, { "epoch": 0.8573617751624372, "grad_norm": 1.6022474841543883, "learning_rate": 5.241249844440299e-07, "loss": 0.4854, "step": 27974 }, { "epoch": 0.8573924236851784, "grad_norm": 1.9474551245827698, "learning_rate": 5.239037902720939e-07, "loss": 0.6314, "step": 27975 }, { "epoch": 0.8574230722079196, "grad_norm": 1.8741228400555137, "learning_rate": 5.2368264020428e-07, "loss": 0.5568, "step": 27976 }, { "epoch": 0.8574537207306607, "grad_norm": 1.8686385414868087, "learning_rate": 5.234615342427651e-07, "loss": 0.4981, "step": 27977 }, { "epoch": 0.857484369253402, "grad_norm": 1.8544679206043544, "learning_rate": 5.232404723897294e-07, "loss": 0.6797, "step": 27978 }, { "epoch": 0.8575150177761431, "grad_norm": 1.8371266798780979, "learning_rate": 5.230194546473516e-07, "loss": 0.6051, "step": 27979 }, { "epoch": 0.8575456662988844, "grad_norm": 1.9827460386186604, "learning_rate": 5.227984810178077e-07, "loss": 0.5554, "step": 27980 }, { "epoch": 0.8575763148216256, "grad_norm": 1.8870746735827173, "learning_rate": 5.22577551503276e-07, "loss": 0.5402, "step": 27981 }, { "epoch": 0.8576069633443668, "grad_norm": 1.697712618568492, "learning_rate": 5.223566661059338e-07, "loss": 0.5526, "step": 27982 }, { "epoch": 0.857637611867108, "grad_norm": 1.8846953184700528, "learning_rate": 5.221358248279568e-07, "loss": 0.6128, "step": 27983 }, { "epoch": 0.8576682603898492, "grad_norm": 1.7390654448429395, "learning_rate": 5.219150276715206e-07, "loss": 0.6056, "step": 27984 }, { "epoch": 0.8576989089125904, "grad_norm": 1.8280243360307538, "learning_rate": 5.216942746388026e-07, "loss": 0.6713, "step": 27985 }, { "epoch": 0.8577295574353316, "grad_norm": 1.761328487959593, "learning_rate": 5.214735657319758e-07, "loss": 0.5473, "step": 27986 }, { "epoch": 0.8577602059580728, "grad_norm": 0.8183739582548301, "learning_rate": 5.212529009532164e-07, "loss": 0.399, "step": 27987 }, { "epoch": 0.857790854480814, "grad_norm": 2.210134957951464, "learning_rate": 5.210322803046974e-07, "loss": 0.5707, "step": 27988 }, { "epoch": 0.8578215030035552, "grad_norm": 2.0281113186261646, "learning_rate": 5.208117037885934e-07, "loss": 0.5633, "step": 27989 }, { "epoch": 0.8578521515262965, "grad_norm": 1.8886666673230068, "learning_rate": 5.205911714070788e-07, "loss": 0.6152, "step": 27990 }, { "epoch": 0.8578828000490376, "grad_norm": 1.9943263588184623, "learning_rate": 5.203706831623245e-07, "loss": 0.4961, "step": 27991 }, { "epoch": 0.8579134485717789, "grad_norm": 1.9675978144262134, "learning_rate": 5.201502390565039e-07, "loss": 0.5368, "step": 27992 }, { "epoch": 0.85794409709452, "grad_norm": 0.8214366760008923, "learning_rate": 5.1992983909179e-07, "loss": 0.3949, "step": 27993 }, { "epoch": 0.8579747456172613, "grad_norm": 0.7768187128326858, "learning_rate": 5.197094832703531e-07, "loss": 0.3859, "step": 27994 }, { "epoch": 0.8580053941400024, "grad_norm": 0.7906105895758537, "learning_rate": 5.194891715943656e-07, "loss": 0.3965, "step": 27995 }, { "epoch": 0.8580360426627437, "grad_norm": 1.8622881904902278, "learning_rate": 5.19268904065997e-07, "loss": 0.5757, "step": 27996 }, { "epoch": 0.8580666911854848, "grad_norm": 1.78805863579303, "learning_rate": 5.190486806874184e-07, "loss": 0.5169, "step": 27997 }, { "epoch": 0.8580973397082261, "grad_norm": 1.8643177015553962, "learning_rate": 5.188285014608002e-07, "loss": 0.5187, "step": 27998 }, { "epoch": 0.8581279882309673, "grad_norm": 1.8032823948638323, "learning_rate": 5.186083663883107e-07, "loss": 0.6008, "step": 27999 }, { "epoch": 0.8581586367537085, "grad_norm": 1.9372514826878737, "learning_rate": 5.183882754721198e-07, "loss": 0.5843, "step": 28000 }, { "epoch": 0.8581892852764497, "grad_norm": 1.9707058647078748, "learning_rate": 5.181682287143963e-07, "loss": 0.6531, "step": 28001 }, { "epoch": 0.8582199337991909, "grad_norm": 0.8096048581480915, "learning_rate": 5.179482261173075e-07, "loss": 0.4017, "step": 28002 }, { "epoch": 0.8582505823219321, "grad_norm": 0.7750232963802235, "learning_rate": 5.177282676830214e-07, "loss": 0.3896, "step": 28003 }, { "epoch": 0.8582812308446733, "grad_norm": 0.7686253265338286, "learning_rate": 5.175083534137065e-07, "loss": 0.3916, "step": 28004 }, { "epoch": 0.8583118793674145, "grad_norm": 2.0217516352393647, "learning_rate": 5.172884833115277e-07, "loss": 0.5907, "step": 28005 }, { "epoch": 0.8583425278901557, "grad_norm": 1.6958262482514337, "learning_rate": 5.170686573786532e-07, "loss": 0.622, "step": 28006 }, { "epoch": 0.8583731764128969, "grad_norm": 0.780664816016095, "learning_rate": 5.168488756172463e-07, "loss": 0.3949, "step": 28007 }, { "epoch": 0.858403824935638, "grad_norm": 1.839513552892754, "learning_rate": 5.166291380294769e-07, "loss": 0.5627, "step": 28008 }, { "epoch": 0.8584344734583793, "grad_norm": 1.6576196646206423, "learning_rate": 5.164094446175072e-07, "loss": 0.5833, "step": 28009 }, { "epoch": 0.8584651219811205, "grad_norm": 2.1802951664964665, "learning_rate": 5.161897953835015e-07, "loss": 0.5971, "step": 28010 }, { "epoch": 0.8584957705038617, "grad_norm": 0.7818560633993816, "learning_rate": 5.159701903296255e-07, "loss": 0.3847, "step": 28011 }, { "epoch": 0.8585264190266029, "grad_norm": 1.6777202790191181, "learning_rate": 5.157506294580428e-07, "loss": 0.5869, "step": 28012 }, { "epoch": 0.8585570675493441, "grad_norm": 1.8577162028044176, "learning_rate": 5.155311127709156e-07, "loss": 0.5834, "step": 28013 }, { "epoch": 0.8585877160720853, "grad_norm": 1.7767847095958023, "learning_rate": 5.153116402704083e-07, "loss": 0.6097, "step": 28014 }, { "epoch": 0.8586183645948265, "grad_norm": 2.0008914838155896, "learning_rate": 5.150922119586832e-07, "loss": 0.6332, "step": 28015 }, { "epoch": 0.8586490131175677, "grad_norm": 1.705689806380536, "learning_rate": 5.148728278379018e-07, "loss": 0.5807, "step": 28016 }, { "epoch": 0.858679661640309, "grad_norm": 0.7969791360930001, "learning_rate": 5.146534879102267e-07, "loss": 0.4123, "step": 28017 }, { "epoch": 0.8587103101630501, "grad_norm": 1.886725413588049, "learning_rate": 5.144341921778162e-07, "loss": 0.5661, "step": 28018 }, { "epoch": 0.8587409586857914, "grad_norm": 1.9852412386038965, "learning_rate": 5.142149406428354e-07, "loss": 0.5764, "step": 28019 }, { "epoch": 0.8587716072085325, "grad_norm": 1.7765242526770706, "learning_rate": 5.139957333074424e-07, "loss": 0.6453, "step": 28020 }, { "epoch": 0.8588022557312738, "grad_norm": 1.917765226844638, "learning_rate": 5.137765701737962e-07, "loss": 0.4987, "step": 28021 }, { "epoch": 0.8588329042540149, "grad_norm": 1.6183597615143417, "learning_rate": 5.135574512440572e-07, "loss": 0.5402, "step": 28022 }, { "epoch": 0.8588635527767562, "grad_norm": 1.832804470171407, "learning_rate": 5.133383765203859e-07, "loss": 0.5563, "step": 28023 }, { "epoch": 0.8588942012994973, "grad_norm": 1.9507951378527155, "learning_rate": 5.131193460049383e-07, "loss": 0.5778, "step": 28024 }, { "epoch": 0.8589248498222386, "grad_norm": 1.5462395116081793, "learning_rate": 5.129003596998738e-07, "loss": 0.4186, "step": 28025 }, { "epoch": 0.8589554983449798, "grad_norm": 1.660866058948592, "learning_rate": 5.126814176073508e-07, "loss": 0.5471, "step": 28026 }, { "epoch": 0.858986146867721, "grad_norm": 1.9631639007499706, "learning_rate": 5.124625197295263e-07, "loss": 0.5952, "step": 28027 }, { "epoch": 0.8590167953904622, "grad_norm": 1.8687316937493652, "learning_rate": 5.122436660685565e-07, "loss": 0.545, "step": 28028 }, { "epoch": 0.8590474439132034, "grad_norm": 0.7963503830325269, "learning_rate": 5.120248566265967e-07, "loss": 0.373, "step": 28029 }, { "epoch": 0.8590780924359446, "grad_norm": 2.0971551325881124, "learning_rate": 5.11806091405806e-07, "loss": 0.5937, "step": 28030 }, { "epoch": 0.8591087409586858, "grad_norm": 1.9234675776433752, "learning_rate": 5.11587370408338e-07, "loss": 0.5395, "step": 28031 }, { "epoch": 0.859139389481427, "grad_norm": 1.7807106507764678, "learning_rate": 5.113686936363477e-07, "loss": 0.4902, "step": 28032 }, { "epoch": 0.8591700380041682, "grad_norm": 1.8802819207271224, "learning_rate": 5.111500610919894e-07, "loss": 0.5467, "step": 28033 }, { "epoch": 0.8592006865269094, "grad_norm": 0.7616008815630758, "learning_rate": 5.109314727774184e-07, "loss": 0.3858, "step": 28034 }, { "epoch": 0.8592313350496507, "grad_norm": 0.8202496384291486, "learning_rate": 5.107129286947893e-07, "loss": 0.4052, "step": 28035 }, { "epoch": 0.8592619835723918, "grad_norm": 1.8186917742431181, "learning_rate": 5.104944288462532e-07, "loss": 0.5723, "step": 28036 }, { "epoch": 0.8592926320951331, "grad_norm": 1.9319803801196285, "learning_rate": 5.10275973233964e-07, "loss": 0.6036, "step": 28037 }, { "epoch": 0.8593232806178742, "grad_norm": 1.9065650437406247, "learning_rate": 5.100575618600756e-07, "loss": 0.5791, "step": 28038 }, { "epoch": 0.8593539291406154, "grad_norm": 1.8923647552406924, "learning_rate": 5.09839194726739e-07, "loss": 0.481, "step": 28039 }, { "epoch": 0.8593845776633566, "grad_norm": 1.7824252739139297, "learning_rate": 5.096208718361045e-07, "loss": 0.5555, "step": 28040 }, { "epoch": 0.8594152261860978, "grad_norm": 1.8613608732376583, "learning_rate": 5.094025931903246e-07, "loss": 0.5569, "step": 28041 }, { "epoch": 0.859445874708839, "grad_norm": 1.762864211106487, "learning_rate": 5.091843587915507e-07, "loss": 0.52, "step": 28042 }, { "epoch": 0.8594765232315802, "grad_norm": 1.678032591701591, "learning_rate": 5.089661686419318e-07, "loss": 0.517, "step": 28043 }, { "epoch": 0.8595071717543215, "grad_norm": 0.8274210212255301, "learning_rate": 5.087480227436176e-07, "loss": 0.4202, "step": 28044 }, { "epoch": 0.8595378202770626, "grad_norm": 1.783205202985396, "learning_rate": 5.085299210987587e-07, "loss": 0.5842, "step": 28045 }, { "epoch": 0.8595684687998039, "grad_norm": 1.839231478592405, "learning_rate": 5.083118637095047e-07, "loss": 0.5553, "step": 28046 }, { "epoch": 0.859599117322545, "grad_norm": 0.8210705737401411, "learning_rate": 5.080938505780031e-07, "loss": 0.4084, "step": 28047 }, { "epoch": 0.8596297658452863, "grad_norm": 2.187858095301255, "learning_rate": 5.078758817064e-07, "loss": 0.65, "step": 28048 }, { "epoch": 0.8596604143680274, "grad_norm": 1.7506601377947013, "learning_rate": 5.076579570968471e-07, "loss": 0.5944, "step": 28049 }, { "epoch": 0.8596910628907687, "grad_norm": 1.9731723893380568, "learning_rate": 5.074400767514898e-07, "loss": 0.6036, "step": 28050 }, { "epoch": 0.8597217114135098, "grad_norm": 1.8036600911538743, "learning_rate": 5.072222406724742e-07, "loss": 0.5121, "step": 28051 }, { "epoch": 0.8597523599362511, "grad_norm": 1.8059418979381807, "learning_rate": 5.070044488619469e-07, "loss": 0.5277, "step": 28052 }, { "epoch": 0.8597830084589922, "grad_norm": 2.2120412778184155, "learning_rate": 5.067867013220551e-07, "loss": 0.654, "step": 28053 }, { "epoch": 0.8598136569817335, "grad_norm": 2.0312631411792514, "learning_rate": 5.065689980549438e-07, "loss": 0.6657, "step": 28054 }, { "epoch": 0.8598443055044747, "grad_norm": 1.8959523276273098, "learning_rate": 5.063513390627572e-07, "loss": 0.5037, "step": 28055 }, { "epoch": 0.8598749540272159, "grad_norm": 1.900442137621867, "learning_rate": 5.061337243476405e-07, "loss": 0.5372, "step": 28056 }, { "epoch": 0.8599056025499571, "grad_norm": 1.8947434557423664, "learning_rate": 5.059161539117391e-07, "loss": 0.5834, "step": 28057 }, { "epoch": 0.8599362510726983, "grad_norm": 1.7589180244181917, "learning_rate": 5.056986277571957e-07, "loss": 0.5168, "step": 28058 }, { "epoch": 0.8599668995954395, "grad_norm": 1.7739199698418475, "learning_rate": 5.05481145886152e-07, "loss": 0.5872, "step": 28059 }, { "epoch": 0.8599975481181807, "grad_norm": 1.8481652138382372, "learning_rate": 5.052637083007539e-07, "loss": 0.5664, "step": 28060 }, { "epoch": 0.8600281966409219, "grad_norm": 1.9339100829307332, "learning_rate": 5.050463150031414e-07, "loss": 0.5942, "step": 28061 }, { "epoch": 0.8600588451636632, "grad_norm": 1.781782030018472, "learning_rate": 5.048289659954591e-07, "loss": 0.5231, "step": 28062 }, { "epoch": 0.8600894936864043, "grad_norm": 1.7900954975867158, "learning_rate": 5.046116612798463e-07, "loss": 0.4891, "step": 28063 }, { "epoch": 0.8601201422091456, "grad_norm": 1.9741281809895268, "learning_rate": 5.04394400858445e-07, "loss": 0.6071, "step": 28064 }, { "epoch": 0.8601507907318867, "grad_norm": 1.9173161628636897, "learning_rate": 5.041771847333965e-07, "loss": 0.6537, "step": 28065 }, { "epoch": 0.860181439254628, "grad_norm": 1.9684812679213175, "learning_rate": 5.039600129068395e-07, "loss": 0.5074, "step": 28066 }, { "epoch": 0.8602120877773691, "grad_norm": 1.9517368003311728, "learning_rate": 5.037428853809151e-07, "loss": 0.5143, "step": 28067 }, { "epoch": 0.8602427363001104, "grad_norm": 1.8814764673866455, "learning_rate": 5.035258021577633e-07, "loss": 0.5782, "step": 28068 }, { "epoch": 0.8602733848228515, "grad_norm": 1.8105218117278212, "learning_rate": 5.033087632395223e-07, "loss": 0.5403, "step": 28069 }, { "epoch": 0.8603040333455927, "grad_norm": 1.867744729180472, "learning_rate": 5.030917686283287e-07, "loss": 0.6015, "step": 28070 }, { "epoch": 0.860334681868334, "grad_norm": 1.9905166860743262, "learning_rate": 5.028748183263243e-07, "loss": 0.6695, "step": 28071 }, { "epoch": 0.8603653303910751, "grad_norm": 1.8957391055013773, "learning_rate": 5.02657912335644e-07, "loss": 0.6151, "step": 28072 }, { "epoch": 0.8603959789138164, "grad_norm": 1.8713143973205264, "learning_rate": 5.024410506584271e-07, "loss": 0.495, "step": 28073 }, { "epoch": 0.8604266274365575, "grad_norm": 1.7766771866923883, "learning_rate": 5.022242332968086e-07, "loss": 0.5398, "step": 28074 }, { "epoch": 0.8604572759592988, "grad_norm": 1.663765956120282, "learning_rate": 5.020074602529251e-07, "loss": 0.5604, "step": 28075 }, { "epoch": 0.8604879244820399, "grad_norm": 1.8406659860029393, "learning_rate": 5.017907315289139e-07, "loss": 0.5457, "step": 28076 }, { "epoch": 0.8605185730047812, "grad_norm": 1.682234509924205, "learning_rate": 5.015740471269087e-07, "loss": 0.524, "step": 28077 }, { "epoch": 0.8605492215275223, "grad_norm": 1.9495255771847164, "learning_rate": 5.013574070490452e-07, "loss": 0.5759, "step": 28078 }, { "epoch": 0.8605798700502636, "grad_norm": 1.8224448101410562, "learning_rate": 5.011408112974592e-07, "loss": 0.6532, "step": 28079 }, { "epoch": 0.8606105185730047, "grad_norm": 1.7551419682423974, "learning_rate": 5.00924259874283e-07, "loss": 0.5473, "step": 28080 }, { "epoch": 0.860641167095746, "grad_norm": 1.9639084312793897, "learning_rate": 5.007077527816512e-07, "loss": 0.5607, "step": 28081 }, { "epoch": 0.8606718156184872, "grad_norm": 1.9950710303452037, "learning_rate": 5.004912900216985e-07, "loss": 0.6289, "step": 28082 }, { "epoch": 0.8607024641412284, "grad_norm": 1.962883755754301, "learning_rate": 5.002748715965549e-07, "loss": 0.627, "step": 28083 }, { "epoch": 0.8607331126639696, "grad_norm": 2.097796663117327, "learning_rate": 5.000584975083556e-07, "loss": 0.6666, "step": 28084 }, { "epoch": 0.8607637611867108, "grad_norm": 1.7374305332282733, "learning_rate": 4.998421677592297e-07, "loss": 0.5493, "step": 28085 }, { "epoch": 0.860794409709452, "grad_norm": 2.0682382866030333, "learning_rate": 4.996258823513106e-07, "loss": 0.6116, "step": 28086 }, { "epoch": 0.8608250582321932, "grad_norm": 1.9707969785973711, "learning_rate": 4.994096412867306e-07, "loss": 0.5961, "step": 28087 }, { "epoch": 0.8608557067549344, "grad_norm": 1.8863836456315133, "learning_rate": 4.991934445676172e-07, "loss": 0.6394, "step": 28088 }, { "epoch": 0.8608863552776757, "grad_norm": 1.7507179116034584, "learning_rate": 4.989772921961029e-07, "loss": 0.5257, "step": 28089 }, { "epoch": 0.8609170038004168, "grad_norm": 1.6503438181040497, "learning_rate": 4.987611841743178e-07, "loss": 0.5968, "step": 28090 }, { "epoch": 0.8609476523231581, "grad_norm": 1.8858256821759296, "learning_rate": 4.985451205043895e-07, "loss": 0.5199, "step": 28091 }, { "epoch": 0.8609783008458992, "grad_norm": 1.688705246853585, "learning_rate": 4.983291011884489e-07, "loss": 0.5479, "step": 28092 }, { "epoch": 0.8610089493686405, "grad_norm": 2.020680052044759, "learning_rate": 4.981131262286226e-07, "loss": 0.6387, "step": 28093 }, { "epoch": 0.8610395978913816, "grad_norm": 2.091936524235682, "learning_rate": 4.978971956270389e-07, "loss": 0.6053, "step": 28094 }, { "epoch": 0.8610702464141229, "grad_norm": 2.244249245927105, "learning_rate": 4.976813093858279e-07, "loss": 0.609, "step": 28095 }, { "epoch": 0.861100894936864, "grad_norm": 1.8188076367301111, "learning_rate": 4.974654675071133e-07, "loss": 0.4059, "step": 28096 }, { "epoch": 0.8611315434596053, "grad_norm": 1.8649206399254523, "learning_rate": 4.972496699930235e-07, "loss": 0.5682, "step": 28097 }, { "epoch": 0.8611621919823464, "grad_norm": 1.8890980430743756, "learning_rate": 4.970339168456861e-07, "loss": 0.5161, "step": 28098 }, { "epoch": 0.8611928405050877, "grad_norm": 0.8329287974304102, "learning_rate": 4.968182080672246e-07, "loss": 0.3962, "step": 28099 }, { "epoch": 0.8612234890278289, "grad_norm": 0.8246341655278457, "learning_rate": 4.966025436597655e-07, "loss": 0.4007, "step": 28100 }, { "epoch": 0.86125413755057, "grad_norm": 2.0947561257292766, "learning_rate": 4.963869236254343e-07, "loss": 0.6092, "step": 28101 }, { "epoch": 0.8612847860733113, "grad_norm": 1.7727152884871116, "learning_rate": 4.961713479663549e-07, "loss": 0.5774, "step": 28102 }, { "epoch": 0.8613154345960524, "grad_norm": 2.2832659260008197, "learning_rate": 4.959558166846518e-07, "loss": 0.5921, "step": 28103 }, { "epoch": 0.8613460831187937, "grad_norm": 1.8652771690610177, "learning_rate": 4.957403297824476e-07, "loss": 0.5207, "step": 28104 }, { "epoch": 0.8613767316415348, "grad_norm": 1.7692729744393572, "learning_rate": 4.955248872618667e-07, "loss": 0.5078, "step": 28105 }, { "epoch": 0.8614073801642761, "grad_norm": 1.8848915928859955, "learning_rate": 4.953094891250326e-07, "loss": 0.5228, "step": 28106 }, { "epoch": 0.8614380286870172, "grad_norm": 0.8097374684490923, "learning_rate": 4.950941353740651e-07, "loss": 0.4144, "step": 28107 }, { "epoch": 0.8614686772097585, "grad_norm": 2.0159108837010704, "learning_rate": 4.948788260110882e-07, "loss": 0.5479, "step": 28108 }, { "epoch": 0.8614993257324997, "grad_norm": 0.8144993891887393, "learning_rate": 4.946635610382239e-07, "loss": 0.3964, "step": 28109 }, { "epoch": 0.8615299742552409, "grad_norm": 0.7684025816778556, "learning_rate": 4.944483404575911e-07, "loss": 0.3991, "step": 28110 }, { "epoch": 0.8615606227779821, "grad_norm": 1.9329224205115538, "learning_rate": 4.942331642713116e-07, "loss": 0.6016, "step": 28111 }, { "epoch": 0.8615912713007233, "grad_norm": 1.8522362878487413, "learning_rate": 4.940180324815069e-07, "loss": 0.538, "step": 28112 }, { "epoch": 0.8616219198234645, "grad_norm": 1.75838743869024, "learning_rate": 4.938029450902943e-07, "loss": 0.6497, "step": 28113 }, { "epoch": 0.8616525683462057, "grad_norm": 0.799549057282173, "learning_rate": 4.935879020997953e-07, "loss": 0.3981, "step": 28114 }, { "epoch": 0.8616832168689469, "grad_norm": 2.1235491199358965, "learning_rate": 4.933729035121266e-07, "loss": 0.5799, "step": 28115 }, { "epoch": 0.8617138653916881, "grad_norm": 0.8044966630061703, "learning_rate": 4.931579493294075e-07, "loss": 0.3903, "step": 28116 }, { "epoch": 0.8617445139144293, "grad_norm": 1.8330210140415613, "learning_rate": 4.929430395537577e-07, "loss": 0.533, "step": 28117 }, { "epoch": 0.8617751624371706, "grad_norm": 1.9414492935210368, "learning_rate": 4.927281741872919e-07, "loss": 0.618, "step": 28118 }, { "epoch": 0.8618058109599117, "grad_norm": 1.8493128389754978, "learning_rate": 4.925133532321285e-07, "loss": 0.5358, "step": 28119 }, { "epoch": 0.861836459482653, "grad_norm": 1.8369775818217906, "learning_rate": 4.922985766903859e-07, "loss": 0.5303, "step": 28120 }, { "epoch": 0.8618671080053941, "grad_norm": 2.072834260558551, "learning_rate": 4.920838445641774e-07, "loss": 0.5874, "step": 28121 }, { "epoch": 0.8618977565281354, "grad_norm": 1.8621695449753632, "learning_rate": 4.918691568556205e-07, "loss": 0.6244, "step": 28122 }, { "epoch": 0.8619284050508765, "grad_norm": 1.8837857569785366, "learning_rate": 4.91654513566831e-07, "loss": 0.5496, "step": 28123 }, { "epoch": 0.8619590535736178, "grad_norm": 2.2442237042155773, "learning_rate": 4.914399146999222e-07, "loss": 0.6372, "step": 28124 }, { "epoch": 0.861989702096359, "grad_norm": 1.8300553226801002, "learning_rate": 4.912253602570105e-07, "loss": 0.5638, "step": 28125 }, { "epoch": 0.8620203506191002, "grad_norm": 1.9643374177131123, "learning_rate": 4.910108502402067e-07, "loss": 0.5599, "step": 28126 }, { "epoch": 0.8620509991418414, "grad_norm": 1.9141215153582245, "learning_rate": 4.907963846516289e-07, "loss": 0.6382, "step": 28127 }, { "epoch": 0.8620816476645826, "grad_norm": 1.8239292465737915, "learning_rate": 4.905819634933878e-07, "loss": 0.5038, "step": 28128 }, { "epoch": 0.8621122961873238, "grad_norm": 1.9840378802948702, "learning_rate": 4.903675867675956e-07, "loss": 0.5854, "step": 28129 }, { "epoch": 0.862142944710065, "grad_norm": 2.113333963400916, "learning_rate": 4.901532544763654e-07, "loss": 0.5674, "step": 28130 }, { "epoch": 0.8621735932328062, "grad_norm": 1.7384007598624565, "learning_rate": 4.899389666218101e-07, "loss": 0.5057, "step": 28131 }, { "epoch": 0.8622042417555473, "grad_norm": 1.7440245073420262, "learning_rate": 4.897247232060392e-07, "loss": 0.5782, "step": 28132 }, { "epoch": 0.8622348902782886, "grad_norm": 1.8919524143913518, "learning_rate": 4.895105242311643e-07, "loss": 0.5322, "step": 28133 }, { "epoch": 0.8622655388010297, "grad_norm": 1.798991394593926, "learning_rate": 4.892963696992964e-07, "loss": 0.5918, "step": 28134 }, { "epoch": 0.862296187323771, "grad_norm": 2.1393770913725865, "learning_rate": 4.890822596125466e-07, "loss": 0.5521, "step": 28135 }, { "epoch": 0.8623268358465122, "grad_norm": 2.2278922880947025, "learning_rate": 4.888681939730233e-07, "loss": 0.6141, "step": 28136 }, { "epoch": 0.8623574843692534, "grad_norm": 2.0477558821873076, "learning_rate": 4.886541727828348e-07, "loss": 0.4943, "step": 28137 }, { "epoch": 0.8623881328919946, "grad_norm": 1.737012157426301, "learning_rate": 4.884401960440915e-07, "loss": 0.5679, "step": 28138 }, { "epoch": 0.8624187814147358, "grad_norm": 1.9589275695670543, "learning_rate": 4.882262637589019e-07, "loss": 0.6101, "step": 28139 }, { "epoch": 0.862449429937477, "grad_norm": 1.9023883483944164, "learning_rate": 4.880123759293725e-07, "loss": 0.559, "step": 28140 }, { "epoch": 0.8624800784602182, "grad_norm": 0.8014925840456232, "learning_rate": 4.877985325576112e-07, "loss": 0.4033, "step": 28141 }, { "epoch": 0.8625107269829594, "grad_norm": 1.9383817613031995, "learning_rate": 4.875847336457268e-07, "loss": 0.5378, "step": 28142 }, { "epoch": 0.8625413755057006, "grad_norm": 1.9325083496215933, "learning_rate": 4.873709791958237e-07, "loss": 0.5252, "step": 28143 }, { "epoch": 0.8625720240284418, "grad_norm": 2.085821882675375, "learning_rate": 4.871572692100096e-07, "loss": 0.5524, "step": 28144 }, { "epoch": 0.8626026725511831, "grad_norm": 1.9930205022879275, "learning_rate": 4.86943603690388e-07, "loss": 0.5176, "step": 28145 }, { "epoch": 0.8626333210739242, "grad_norm": 1.90517259646636, "learning_rate": 4.867299826390676e-07, "loss": 0.5532, "step": 28146 }, { "epoch": 0.8626639695966655, "grad_norm": 1.8684319309386945, "learning_rate": 4.865164060581512e-07, "loss": 0.5734, "step": 28147 }, { "epoch": 0.8626946181194066, "grad_norm": 0.7613466458968321, "learning_rate": 4.863028739497427e-07, "loss": 0.3765, "step": 28148 }, { "epoch": 0.8627252666421479, "grad_norm": 1.8335167734241287, "learning_rate": 4.860893863159471e-07, "loss": 0.6005, "step": 28149 }, { "epoch": 0.862755915164889, "grad_norm": 2.06447231482471, "learning_rate": 4.858759431588683e-07, "loss": 0.5665, "step": 28150 }, { "epoch": 0.8627865636876303, "grad_norm": 2.0271334472350784, "learning_rate": 4.856625444806079e-07, "loss": 0.5937, "step": 28151 }, { "epoch": 0.8628172122103714, "grad_norm": 0.8136733750552472, "learning_rate": 4.854491902832697e-07, "loss": 0.4259, "step": 28152 }, { "epoch": 0.8628478607331127, "grad_norm": 1.971341810857388, "learning_rate": 4.852358805689556e-07, "loss": 0.6691, "step": 28153 }, { "epoch": 0.8628785092558539, "grad_norm": 1.735817161032438, "learning_rate": 4.850226153397686e-07, "loss": 0.5586, "step": 28154 }, { "epoch": 0.8629091577785951, "grad_norm": 0.8136221504119284, "learning_rate": 4.848093945978088e-07, "loss": 0.3802, "step": 28155 }, { "epoch": 0.8629398063013363, "grad_norm": 0.7858901029145279, "learning_rate": 4.845962183451753e-07, "loss": 0.4029, "step": 28156 }, { "epoch": 0.8629704548240775, "grad_norm": 2.2173543817834105, "learning_rate": 4.843830865839727e-07, "loss": 0.5574, "step": 28157 }, { "epoch": 0.8630011033468187, "grad_norm": 2.046596326551819, "learning_rate": 4.841699993162985e-07, "loss": 0.5478, "step": 28158 }, { "epoch": 0.8630317518695599, "grad_norm": 1.6897939970024547, "learning_rate": 4.839569565442525e-07, "loss": 0.5444, "step": 28159 }, { "epoch": 0.8630624003923011, "grad_norm": 2.237653767045334, "learning_rate": 4.837439582699332e-07, "loss": 0.5396, "step": 28160 }, { "epoch": 0.8630930489150423, "grad_norm": 1.7736616177439852, "learning_rate": 4.835310044954411e-07, "loss": 0.652, "step": 28161 }, { "epoch": 0.8631236974377835, "grad_norm": 1.8393004390512842, "learning_rate": 4.833180952228738e-07, "loss": 0.5293, "step": 28162 }, { "epoch": 0.8631543459605246, "grad_norm": 2.087023588449109, "learning_rate": 4.831052304543288e-07, "loss": 0.5641, "step": 28163 }, { "epoch": 0.8631849944832659, "grad_norm": 1.942928082684705, "learning_rate": 4.82892410191903e-07, "loss": 0.5277, "step": 28164 }, { "epoch": 0.8632156430060071, "grad_norm": 2.0471846541480345, "learning_rate": 4.826796344376955e-07, "loss": 0.6514, "step": 28165 }, { "epoch": 0.8632462915287483, "grad_norm": 0.7968092079718122, "learning_rate": 4.824669031938007e-07, "loss": 0.3817, "step": 28166 }, { "epoch": 0.8632769400514895, "grad_norm": 0.7806246088653589, "learning_rate": 4.822542164623139e-07, "loss": 0.3969, "step": 28167 }, { "epoch": 0.8633075885742307, "grad_norm": 1.794409373773614, "learning_rate": 4.820415742453343e-07, "loss": 0.5712, "step": 28168 }, { "epoch": 0.8633382370969719, "grad_norm": 1.5588732111089738, "learning_rate": 4.818289765449546e-07, "loss": 0.4564, "step": 28169 }, { "epoch": 0.8633688856197131, "grad_norm": 1.8993773513613386, "learning_rate": 4.816164233632692e-07, "loss": 0.595, "step": 28170 }, { "epoch": 0.8633995341424543, "grad_norm": 0.7452840670539739, "learning_rate": 4.814039147023736e-07, "loss": 0.382, "step": 28171 }, { "epoch": 0.8634301826651956, "grad_norm": 0.8123720928102718, "learning_rate": 4.811914505643612e-07, "loss": 0.4131, "step": 28172 }, { "epoch": 0.8634608311879367, "grad_norm": 1.837260308564238, "learning_rate": 4.809790309513263e-07, "loss": 0.5117, "step": 28173 }, { "epoch": 0.863491479710678, "grad_norm": 1.9733036548857397, "learning_rate": 4.807666558653601e-07, "loss": 0.6257, "step": 28174 }, { "epoch": 0.8635221282334191, "grad_norm": 2.0555333527972164, "learning_rate": 4.805543253085571e-07, "loss": 0.5803, "step": 28175 }, { "epoch": 0.8635527767561604, "grad_norm": 2.036908845705834, "learning_rate": 4.803420392830089e-07, "loss": 0.5886, "step": 28176 }, { "epoch": 0.8635834252789015, "grad_norm": 1.9627118351737896, "learning_rate": 4.801297977908076e-07, "loss": 0.5321, "step": 28177 }, { "epoch": 0.8636140738016428, "grad_norm": 1.8626416241628079, "learning_rate": 4.799176008340417e-07, "loss": 0.592, "step": 28178 }, { "epoch": 0.8636447223243839, "grad_norm": 1.7795862959471995, "learning_rate": 4.797054484148061e-07, "loss": 0.5539, "step": 28179 }, { "epoch": 0.8636753708471252, "grad_norm": 1.8111866159796257, "learning_rate": 4.794933405351881e-07, "loss": 0.5439, "step": 28180 }, { "epoch": 0.8637060193698664, "grad_norm": 1.9890736164946636, "learning_rate": 4.792812771972799e-07, "loss": 0.6109, "step": 28181 }, { "epoch": 0.8637366678926076, "grad_norm": 1.8960378320980897, "learning_rate": 4.790692584031692e-07, "loss": 0.6165, "step": 28182 }, { "epoch": 0.8637673164153488, "grad_norm": 1.8569296531756903, "learning_rate": 4.788572841549461e-07, "loss": 0.6128, "step": 28183 }, { "epoch": 0.86379796493809, "grad_norm": 1.7474737821164774, "learning_rate": 4.786453544546993e-07, "loss": 0.6257, "step": 28184 }, { "epoch": 0.8638286134608312, "grad_norm": 1.8843845194629454, "learning_rate": 4.784334693045157e-07, "loss": 0.5669, "step": 28185 }, { "epoch": 0.8638592619835724, "grad_norm": 0.8338476953239702, "learning_rate": 4.782216287064845e-07, "loss": 0.4141, "step": 28186 }, { "epoch": 0.8638899105063136, "grad_norm": 2.1722235759282875, "learning_rate": 4.780098326626931e-07, "loss": 0.6634, "step": 28187 }, { "epoch": 0.8639205590290548, "grad_norm": 2.0374339814385674, "learning_rate": 4.77798081175227e-07, "loss": 0.4959, "step": 28188 }, { "epoch": 0.863951207551796, "grad_norm": 1.9351051527851628, "learning_rate": 4.775863742461745e-07, "loss": 0.6001, "step": 28189 }, { "epoch": 0.8639818560745373, "grad_norm": 1.9802778852049971, "learning_rate": 4.773747118776196e-07, "loss": 0.5649, "step": 28190 }, { "epoch": 0.8640125045972784, "grad_norm": 1.728273203518851, "learning_rate": 4.771630940716487e-07, "loss": 0.5564, "step": 28191 }, { "epoch": 0.8640431531200197, "grad_norm": 1.8752614483022505, "learning_rate": 4.769515208303483e-07, "loss": 0.5795, "step": 28192 }, { "epoch": 0.8640738016427608, "grad_norm": 2.1031860073483997, "learning_rate": 4.7673999215580027e-07, "loss": 0.591, "step": 28193 }, { "epoch": 0.864104450165502, "grad_norm": 1.9262960289542805, "learning_rate": 4.7652850805009086e-07, "loss": 0.6185, "step": 28194 }, { "epoch": 0.8641350986882432, "grad_norm": 1.8111907778472693, "learning_rate": 4.763170685153046e-07, "loss": 0.5882, "step": 28195 }, { "epoch": 0.8641657472109844, "grad_norm": 2.449883529498196, "learning_rate": 4.7610567355352356e-07, "loss": 0.4959, "step": 28196 }, { "epoch": 0.8641963957337256, "grad_norm": 2.0192341176627053, "learning_rate": 4.758943231668284e-07, "loss": 0.6948, "step": 28197 }, { "epoch": 0.8642270442564668, "grad_norm": 1.8314397160757372, "learning_rate": 4.7568301735730626e-07, "loss": 0.5412, "step": 28198 }, { "epoch": 0.864257692779208, "grad_norm": 1.8040474981582195, "learning_rate": 4.754717561270361e-07, "loss": 0.531, "step": 28199 }, { "epoch": 0.8642883413019492, "grad_norm": 2.2031288190426253, "learning_rate": 4.7526053947810127e-07, "loss": 0.6148, "step": 28200 }, { "epoch": 0.8643189898246905, "grad_norm": 0.7872286866100286, "learning_rate": 4.750493674125811e-07, "loss": 0.3982, "step": 28201 }, { "epoch": 0.8643496383474316, "grad_norm": 2.111645981154864, "learning_rate": 4.748382399325574e-07, "loss": 0.5213, "step": 28202 }, { "epoch": 0.8643802868701729, "grad_norm": 1.6859279290377065, "learning_rate": 4.746271570401112e-07, "loss": 0.5725, "step": 28203 }, { "epoch": 0.864410935392914, "grad_norm": 2.0971429833919073, "learning_rate": 4.744161187373203e-07, "loss": 0.603, "step": 28204 }, { "epoch": 0.8644415839156553, "grad_norm": 1.9969178777212837, "learning_rate": 4.742051250262658e-07, "loss": 0.5733, "step": 28205 }, { "epoch": 0.8644722324383964, "grad_norm": 0.7599973008324132, "learning_rate": 4.7399417590902663e-07, "loss": 0.3847, "step": 28206 }, { "epoch": 0.8645028809611377, "grad_norm": 1.8443352507358775, "learning_rate": 4.737832713876805e-07, "loss": 0.5357, "step": 28207 }, { "epoch": 0.8645335294838788, "grad_norm": 1.6225661791914907, "learning_rate": 4.7357241146430533e-07, "loss": 0.437, "step": 28208 }, { "epoch": 0.8645641780066201, "grad_norm": 0.7772027524350723, "learning_rate": 4.7336159614098045e-07, "loss": 0.3998, "step": 28209 }, { "epoch": 0.8645948265293613, "grad_norm": 1.9592873945062281, "learning_rate": 4.7315082541978085e-07, "loss": 0.5263, "step": 28210 }, { "epoch": 0.8646254750521025, "grad_norm": 1.96759810687807, "learning_rate": 4.729400993027855e-07, "loss": 0.5011, "step": 28211 }, { "epoch": 0.8646561235748437, "grad_norm": 1.897593770204818, "learning_rate": 4.7272941779206885e-07, "loss": 0.5643, "step": 28212 }, { "epoch": 0.8646867720975849, "grad_norm": 1.8524119676341848, "learning_rate": 4.725187808897075e-07, "loss": 0.5949, "step": 28213 }, { "epoch": 0.8647174206203261, "grad_norm": 1.940473824929808, "learning_rate": 4.723081885977776e-07, "loss": 0.6193, "step": 28214 }, { "epoch": 0.8647480691430673, "grad_norm": 1.6601587255212267, "learning_rate": 4.720976409183531e-07, "loss": 0.5516, "step": 28215 }, { "epoch": 0.8647787176658085, "grad_norm": 1.896504997033576, "learning_rate": 4.718871378535089e-07, "loss": 0.6239, "step": 28216 }, { "epoch": 0.8648093661885498, "grad_norm": 0.8066759616230323, "learning_rate": 4.716766794053201e-07, "loss": 0.4071, "step": 28217 }, { "epoch": 0.8648400147112909, "grad_norm": 2.017502225943168, "learning_rate": 4.714662655758589e-07, "loss": 0.6314, "step": 28218 }, { "epoch": 0.8648706632340322, "grad_norm": 2.099183543042754, "learning_rate": 4.7125589636719925e-07, "loss": 0.6574, "step": 28219 }, { "epoch": 0.8649013117567733, "grad_norm": 1.9651360314775408, "learning_rate": 4.7104557178141495e-07, "loss": 0.5996, "step": 28220 }, { "epoch": 0.8649319602795146, "grad_norm": 1.9061798453329968, "learning_rate": 4.708352918205761e-07, "loss": 0.5205, "step": 28221 }, { "epoch": 0.8649626088022557, "grad_norm": 2.1256594846699555, "learning_rate": 4.706250564867576e-07, "loss": 0.6234, "step": 28222 }, { "epoch": 0.864993257324997, "grad_norm": 2.0817554393734063, "learning_rate": 4.704148657820279e-07, "loss": 0.5708, "step": 28223 }, { "epoch": 0.8650239058477381, "grad_norm": 1.831876933065554, "learning_rate": 4.7020471970845913e-07, "loss": 0.5381, "step": 28224 }, { "epoch": 0.8650545543704793, "grad_norm": 1.6971415631744933, "learning_rate": 4.6999461826812363e-07, "loss": 0.5676, "step": 28225 }, { "epoch": 0.8650852028932206, "grad_norm": 1.753967995931039, "learning_rate": 4.6978456146308915e-07, "loss": 0.559, "step": 28226 }, { "epoch": 0.8651158514159617, "grad_norm": 1.766489821813985, "learning_rate": 4.695745492954268e-07, "loss": 0.5268, "step": 28227 }, { "epoch": 0.865146499938703, "grad_norm": 1.8111713132827794, "learning_rate": 4.6936458176720603e-07, "loss": 0.5091, "step": 28228 }, { "epoch": 0.8651771484614441, "grad_norm": 1.9642752621739976, "learning_rate": 4.691546588804946e-07, "loss": 0.6314, "step": 28229 }, { "epoch": 0.8652077969841854, "grad_norm": 2.020318749316134, "learning_rate": 4.6894478063736147e-07, "loss": 0.6102, "step": 28230 }, { "epoch": 0.8652384455069265, "grad_norm": 1.7608106726520882, "learning_rate": 4.6873494703987555e-07, "loss": 0.5165, "step": 28231 }, { "epoch": 0.8652690940296678, "grad_norm": 1.9139032053972727, "learning_rate": 4.685251580901029e-07, "loss": 0.498, "step": 28232 }, { "epoch": 0.8652997425524089, "grad_norm": 1.7962899653658604, "learning_rate": 4.683154137901125e-07, "loss": 0.5851, "step": 28233 }, { "epoch": 0.8653303910751502, "grad_norm": 1.732187140655045, "learning_rate": 4.6810571414196817e-07, "loss": 0.5823, "step": 28234 }, { "epoch": 0.8653610395978913, "grad_norm": 1.9072288341406034, "learning_rate": 4.6789605914773827e-07, "loss": 0.5769, "step": 28235 }, { "epoch": 0.8653916881206326, "grad_norm": 1.9666781032772709, "learning_rate": 4.67686448809489e-07, "loss": 0.5688, "step": 28236 }, { "epoch": 0.8654223366433738, "grad_norm": 1.949472399003692, "learning_rate": 4.674768831292836e-07, "loss": 0.6122, "step": 28237 }, { "epoch": 0.865452985166115, "grad_norm": 1.7498659490084607, "learning_rate": 4.672673621091883e-07, "loss": 0.5621, "step": 28238 }, { "epoch": 0.8654836336888562, "grad_norm": 1.8519404859070667, "learning_rate": 4.670578857512681e-07, "loss": 0.6421, "step": 28239 }, { "epoch": 0.8655142822115974, "grad_norm": 1.8000147396636992, "learning_rate": 4.668484540575857e-07, "loss": 0.6135, "step": 28240 }, { "epoch": 0.8655449307343386, "grad_norm": 2.169116277145488, "learning_rate": 4.666390670302062e-07, "loss": 0.5259, "step": 28241 }, { "epoch": 0.8655755792570798, "grad_norm": 1.8351958731985154, "learning_rate": 4.664297246711902e-07, "loss": 0.5227, "step": 28242 }, { "epoch": 0.865606227779821, "grad_norm": 2.048249987959955, "learning_rate": 4.662204269826037e-07, "loss": 0.6433, "step": 28243 }, { "epoch": 0.8656368763025623, "grad_norm": 0.8389590099778653, "learning_rate": 4.660111739665074e-07, "loss": 0.403, "step": 28244 }, { "epoch": 0.8656675248253034, "grad_norm": 1.7936629521600025, "learning_rate": 4.658019656249624e-07, "loss": 0.5674, "step": 28245 }, { "epoch": 0.8656981733480447, "grad_norm": 2.0780309332605267, "learning_rate": 4.6559280196003087e-07, "loss": 0.5484, "step": 28246 }, { "epoch": 0.8657288218707858, "grad_norm": 1.9615540696530573, "learning_rate": 4.6538368297377403e-07, "loss": 0.5786, "step": 28247 }, { "epoch": 0.8657594703935271, "grad_norm": 1.7596128845857957, "learning_rate": 4.6517460866825125e-07, "loss": 0.5382, "step": 28248 }, { "epoch": 0.8657901189162682, "grad_norm": 1.7671668336162956, "learning_rate": 4.649655790455232e-07, "loss": 0.6446, "step": 28249 }, { "epoch": 0.8658207674390095, "grad_norm": 1.8647548547665247, "learning_rate": 4.6475659410765097e-07, "loss": 0.5777, "step": 28250 }, { "epoch": 0.8658514159617506, "grad_norm": 1.7626158574997375, "learning_rate": 4.645476538566912e-07, "loss": 0.5349, "step": 28251 }, { "epoch": 0.8658820644844919, "grad_norm": 0.7791847451286066, "learning_rate": 4.643387582947051e-07, "loss": 0.4168, "step": 28252 }, { "epoch": 0.865912713007233, "grad_norm": 1.9776931725968276, "learning_rate": 4.6412990742374766e-07, "loss": 0.5883, "step": 28253 }, { "epoch": 0.8659433615299743, "grad_norm": 1.826158020771826, "learning_rate": 4.6392110124588055e-07, "loss": 0.5192, "step": 28254 }, { "epoch": 0.8659740100527155, "grad_norm": 1.7358558493382628, "learning_rate": 4.6371233976315935e-07, "loss": 0.6253, "step": 28255 }, { "epoch": 0.8660046585754566, "grad_norm": 1.8492103240258269, "learning_rate": 4.635036229776402e-07, "loss": 0.5466, "step": 28256 }, { "epoch": 0.8660353070981979, "grad_norm": 1.944321882177437, "learning_rate": 4.6329495089138086e-07, "loss": 0.5682, "step": 28257 }, { "epoch": 0.866065955620939, "grad_norm": 1.9489769333758236, "learning_rate": 4.6308632350643756e-07, "loss": 0.6269, "step": 28258 }, { "epoch": 0.8660966041436803, "grad_norm": 2.0977504336438586, "learning_rate": 4.6287774082486523e-07, "loss": 0.5464, "step": 28259 }, { "epoch": 0.8661272526664214, "grad_norm": 1.6429431545545745, "learning_rate": 4.62669202848719e-07, "loss": 0.5261, "step": 28260 }, { "epoch": 0.8661579011891627, "grad_norm": 1.9245650760019266, "learning_rate": 4.624607095800543e-07, "loss": 0.6369, "step": 28261 }, { "epoch": 0.8661885497119038, "grad_norm": 1.760652021267608, "learning_rate": 4.622522610209257e-07, "loss": 0.5426, "step": 28262 }, { "epoch": 0.8662191982346451, "grad_norm": 0.792557753706329, "learning_rate": 4.6204385717338705e-07, "loss": 0.4069, "step": 28263 }, { "epoch": 0.8662498467573863, "grad_norm": 1.8848405016821244, "learning_rate": 4.6183549803948903e-07, "loss": 0.487, "step": 28264 }, { "epoch": 0.8662804952801275, "grad_norm": 1.9657120368633791, "learning_rate": 4.6162718362128933e-07, "loss": 0.558, "step": 28265 }, { "epoch": 0.8663111438028687, "grad_norm": 2.0536803814676268, "learning_rate": 4.6141891392083804e-07, "loss": 0.6461, "step": 28266 }, { "epoch": 0.8663417923256099, "grad_norm": 2.0024672851733314, "learning_rate": 4.612106889401863e-07, "loss": 0.5875, "step": 28267 }, { "epoch": 0.8663724408483511, "grad_norm": 1.9333376752755436, "learning_rate": 4.610025086813874e-07, "loss": 0.5405, "step": 28268 }, { "epoch": 0.8664030893710923, "grad_norm": 1.748048995272089, "learning_rate": 4.6079437314649257e-07, "loss": 0.4828, "step": 28269 }, { "epoch": 0.8664337378938335, "grad_norm": 1.9661723815109375, "learning_rate": 4.605862823375512e-07, "loss": 0.5693, "step": 28270 }, { "epoch": 0.8664643864165747, "grad_norm": 1.8817142318773101, "learning_rate": 4.6037823625661504e-07, "loss": 0.5191, "step": 28271 }, { "epoch": 0.8664950349393159, "grad_norm": 1.7828811183366586, "learning_rate": 4.601702349057335e-07, "loss": 0.6342, "step": 28272 }, { "epoch": 0.8665256834620572, "grad_norm": 2.064547336841913, "learning_rate": 4.599622782869573e-07, "loss": 0.6446, "step": 28273 }, { "epoch": 0.8665563319847983, "grad_norm": 1.911624404233292, "learning_rate": 4.5975436640233407e-07, "loss": 0.5149, "step": 28274 }, { "epoch": 0.8665869805075396, "grad_norm": 1.7051572843756109, "learning_rate": 4.5954649925391116e-07, "loss": 0.5562, "step": 28275 }, { "epoch": 0.8666176290302807, "grad_norm": 1.6848807339766592, "learning_rate": 4.593386768437402e-07, "loss": 0.5001, "step": 28276 }, { "epoch": 0.866648277553022, "grad_norm": 0.8104284657301735, "learning_rate": 4.591308991738669e-07, "loss": 0.4067, "step": 28277 }, { "epoch": 0.8666789260757631, "grad_norm": 1.8165296056142135, "learning_rate": 4.589231662463373e-07, "loss": 0.5877, "step": 28278 }, { "epoch": 0.8667095745985044, "grad_norm": 2.1268194303569117, "learning_rate": 4.587154780632003e-07, "loss": 0.5986, "step": 28279 }, { "epoch": 0.8667402231212455, "grad_norm": 1.8182412145092366, "learning_rate": 4.585078346265015e-07, "loss": 0.5986, "step": 28280 }, { "epoch": 0.8667708716439868, "grad_norm": 1.7138757820415467, "learning_rate": 4.5830023593828764e-07, "loss": 0.4622, "step": 28281 }, { "epoch": 0.866801520166728, "grad_norm": 1.857125583442514, "learning_rate": 4.5809268200060265e-07, "loss": 0.5499, "step": 28282 }, { "epoch": 0.8668321686894692, "grad_norm": 2.20058943715322, "learning_rate": 4.578851728154932e-07, "loss": 0.6394, "step": 28283 }, { "epoch": 0.8668628172122104, "grad_norm": 1.8758507167766423, "learning_rate": 4.576777083850037e-07, "loss": 0.6612, "step": 28284 }, { "epoch": 0.8668934657349516, "grad_norm": 1.9719453035416654, "learning_rate": 4.5747028871117815e-07, "loss": 0.6043, "step": 28285 }, { "epoch": 0.8669241142576928, "grad_norm": 0.8187881521823875, "learning_rate": 4.572629137960588e-07, "loss": 0.4186, "step": 28286 }, { "epoch": 0.8669547627804339, "grad_norm": 1.8846806344794786, "learning_rate": 4.570555836416907e-07, "loss": 0.4989, "step": 28287 }, { "epoch": 0.8669854113031752, "grad_norm": 1.957434831831752, "learning_rate": 4.56848298250116e-07, "loss": 0.6199, "step": 28288 }, { "epoch": 0.8670160598259163, "grad_norm": 1.9530635097497462, "learning_rate": 4.566410576233782e-07, "loss": 0.5386, "step": 28289 }, { "epoch": 0.8670467083486576, "grad_norm": 2.1044793452508777, "learning_rate": 4.5643386176351777e-07, "loss": 0.5946, "step": 28290 }, { "epoch": 0.8670773568713988, "grad_norm": 1.9876557348664943, "learning_rate": 4.562267106725776e-07, "loss": 0.6604, "step": 28291 }, { "epoch": 0.86710800539414, "grad_norm": 0.7743718740689581, "learning_rate": 4.560196043525983e-07, "loss": 0.4053, "step": 28292 }, { "epoch": 0.8671386539168812, "grad_norm": 1.863423548221988, "learning_rate": 4.5581254280562094e-07, "loss": 0.6874, "step": 28293 }, { "epoch": 0.8671693024396224, "grad_norm": 1.8739530660224657, "learning_rate": 4.5560552603368334e-07, "loss": 0.5977, "step": 28294 }, { "epoch": 0.8671999509623636, "grad_norm": 1.824773325352809, "learning_rate": 4.5539855403882895e-07, "loss": 0.528, "step": 28295 }, { "epoch": 0.8672305994851048, "grad_norm": 2.0648511043573587, "learning_rate": 4.551916268230955e-07, "loss": 0.6832, "step": 28296 }, { "epoch": 0.867261248007846, "grad_norm": 1.8570721170136453, "learning_rate": 4.549847443885208e-07, "loss": 0.6454, "step": 28297 }, { "epoch": 0.8672918965305872, "grad_norm": 1.8056287328451441, "learning_rate": 4.5477790673714437e-07, "loss": 0.6337, "step": 28298 }, { "epoch": 0.8673225450533284, "grad_norm": 1.9244176130765334, "learning_rate": 4.545711138710046e-07, "loss": 0.4977, "step": 28299 }, { "epoch": 0.8673531935760697, "grad_norm": 0.768658613751991, "learning_rate": 4.543643657921387e-07, "loss": 0.3865, "step": 28300 }, { "epoch": 0.8673838420988108, "grad_norm": 1.9908347052178526, "learning_rate": 4.5415766250258343e-07, "loss": 0.6306, "step": 28301 }, { "epoch": 0.8674144906215521, "grad_norm": 1.796563329561357, "learning_rate": 4.53951004004376e-07, "loss": 0.5126, "step": 28302 }, { "epoch": 0.8674451391442932, "grad_norm": 2.1523382103053246, "learning_rate": 4.5374439029955307e-07, "loss": 0.7131, "step": 28303 }, { "epoch": 0.8674757876670345, "grad_norm": 0.7847170824844732, "learning_rate": 4.535378213901498e-07, "loss": 0.3739, "step": 28304 }, { "epoch": 0.8675064361897756, "grad_norm": 2.0712835247228556, "learning_rate": 4.533312972781995e-07, "loss": 0.5655, "step": 28305 }, { "epoch": 0.8675370847125169, "grad_norm": 0.7752997478808696, "learning_rate": 4.5312481796574157e-07, "loss": 0.3788, "step": 28306 }, { "epoch": 0.867567733235258, "grad_norm": 0.8147656532741624, "learning_rate": 4.529183834548073e-07, "loss": 0.4095, "step": 28307 }, { "epoch": 0.8675983817579993, "grad_norm": 1.9506958486938328, "learning_rate": 4.5271199374743226e-07, "loss": 0.4997, "step": 28308 }, { "epoch": 0.8676290302807405, "grad_norm": 1.73387594056595, "learning_rate": 4.5250564884564864e-07, "loss": 0.5332, "step": 28309 }, { "epoch": 0.8676596788034817, "grad_norm": 1.8065451915877204, "learning_rate": 4.522993487514904e-07, "loss": 0.5625, "step": 28310 }, { "epoch": 0.8676903273262229, "grad_norm": 1.8278682154249914, "learning_rate": 4.5209309346699093e-07, "loss": 0.5557, "step": 28311 }, { "epoch": 0.8677209758489641, "grad_norm": 2.174144037873075, "learning_rate": 4.518868829941814e-07, "loss": 0.5766, "step": 28312 }, { "epoch": 0.8677516243717053, "grad_norm": 1.6714953086003448, "learning_rate": 4.516807173350934e-07, "loss": 0.5717, "step": 28313 }, { "epoch": 0.8677822728944465, "grad_norm": 1.5419402901802692, "learning_rate": 4.514745964917605e-07, "loss": 0.5817, "step": 28314 }, { "epoch": 0.8678129214171877, "grad_norm": 3.196264889646702, "learning_rate": 4.512685204662115e-07, "loss": 0.6384, "step": 28315 }, { "epoch": 0.867843569939929, "grad_norm": 1.8146335150834418, "learning_rate": 4.51062489260477e-07, "loss": 0.6102, "step": 28316 }, { "epoch": 0.8678742184626701, "grad_norm": 2.0606460437631937, "learning_rate": 4.5085650287658875e-07, "loss": 0.5983, "step": 28317 }, { "epoch": 0.8679048669854112, "grad_norm": 1.9487458034276337, "learning_rate": 4.506505613165746e-07, "loss": 0.6123, "step": 28318 }, { "epoch": 0.8679355155081525, "grad_norm": 1.9480493737009146, "learning_rate": 4.5044466458246563e-07, "loss": 0.659, "step": 28319 }, { "epoch": 0.8679661640308937, "grad_norm": 1.6502801989892875, "learning_rate": 4.50238812676288e-07, "loss": 0.5825, "step": 28320 }, { "epoch": 0.8679968125536349, "grad_norm": 1.7300308529628687, "learning_rate": 4.500330056000718e-07, "loss": 0.4266, "step": 28321 }, { "epoch": 0.8680274610763761, "grad_norm": 2.1893154194723623, "learning_rate": 4.498272433558454e-07, "loss": 0.6423, "step": 28322 }, { "epoch": 0.8680581095991173, "grad_norm": 1.9420693446644242, "learning_rate": 4.4962152594563436e-07, "loss": 0.5521, "step": 28323 }, { "epoch": 0.8680887581218585, "grad_norm": 1.9311858366289971, "learning_rate": 4.494158533714665e-07, "loss": 0.6002, "step": 28324 }, { "epoch": 0.8681194066445997, "grad_norm": 2.0525045997512437, "learning_rate": 4.4921022563536974e-07, "loss": 0.5191, "step": 28325 }, { "epoch": 0.8681500551673409, "grad_norm": 2.1431351758776627, "learning_rate": 4.4900464273936793e-07, "loss": 0.6364, "step": 28326 }, { "epoch": 0.8681807036900822, "grad_norm": 1.880914715789851, "learning_rate": 4.487991046854878e-07, "loss": 0.5748, "step": 28327 }, { "epoch": 0.8682113522128233, "grad_norm": 1.7032695007148944, "learning_rate": 4.4859361147575553e-07, "loss": 0.4393, "step": 28328 }, { "epoch": 0.8682420007355646, "grad_norm": 1.9361118642519863, "learning_rate": 4.4838816311219445e-07, "loss": 0.6154, "step": 28329 }, { "epoch": 0.8682726492583057, "grad_norm": 0.7793396077418914, "learning_rate": 4.4818275959682967e-07, "loss": 0.3961, "step": 28330 }, { "epoch": 0.868303297781047, "grad_norm": 2.0684760093531844, "learning_rate": 4.4797740093168395e-07, "loss": 0.5416, "step": 28331 }, { "epoch": 0.8683339463037881, "grad_norm": 0.7723132006702144, "learning_rate": 4.4777208711878186e-07, "loss": 0.3966, "step": 28332 }, { "epoch": 0.8683645948265294, "grad_norm": 1.7140959037220134, "learning_rate": 4.475668181601472e-07, "loss": 0.5889, "step": 28333 }, { "epoch": 0.8683952433492705, "grad_norm": 0.7795889615997111, "learning_rate": 4.473615940578002e-07, "loss": 0.3793, "step": 28334 }, { "epoch": 0.8684258918720118, "grad_norm": 1.8971473310344842, "learning_rate": 4.4715641481376414e-07, "loss": 0.564, "step": 28335 }, { "epoch": 0.868456540394753, "grad_norm": 1.652306317810122, "learning_rate": 4.4695128043006187e-07, "loss": 0.5021, "step": 28336 }, { "epoch": 0.8684871889174942, "grad_norm": 1.965597528748953, "learning_rate": 4.467461909087129e-07, "loss": 0.6458, "step": 28337 }, { "epoch": 0.8685178374402354, "grad_norm": 1.8158411367554645, "learning_rate": 4.465411462517394e-07, "loss": 0.5533, "step": 28338 }, { "epoch": 0.8685484859629766, "grad_norm": 2.169994118816739, "learning_rate": 4.463361464611604e-07, "loss": 0.5951, "step": 28339 }, { "epoch": 0.8685791344857178, "grad_norm": 0.7761031742048309, "learning_rate": 4.46131191538996e-07, "loss": 0.3818, "step": 28340 }, { "epoch": 0.868609783008459, "grad_norm": 1.845896119955678, "learning_rate": 4.459262814872672e-07, "loss": 0.5577, "step": 28341 }, { "epoch": 0.8686404315312002, "grad_norm": 0.7690665166289922, "learning_rate": 4.457214163079915e-07, "loss": 0.3885, "step": 28342 }, { "epoch": 0.8686710800539414, "grad_norm": 1.8469319184559432, "learning_rate": 4.455165960031876e-07, "loss": 0.5589, "step": 28343 }, { "epoch": 0.8687017285766826, "grad_norm": 1.9357732012266298, "learning_rate": 4.4531182057487464e-07, "loss": 0.5855, "step": 28344 }, { "epoch": 0.8687323770994239, "grad_norm": 2.0376982494869496, "learning_rate": 4.4510709002506924e-07, "loss": 0.5641, "step": 28345 }, { "epoch": 0.868763025622165, "grad_norm": 1.9432765580025997, "learning_rate": 4.449024043557887e-07, "loss": 0.6336, "step": 28346 }, { "epoch": 0.8687936741449063, "grad_norm": 1.8204393339523515, "learning_rate": 4.446977635690514e-07, "loss": 0.5443, "step": 28347 }, { "epoch": 0.8688243226676474, "grad_norm": 1.8663011523663247, "learning_rate": 4.4449316766687177e-07, "loss": 0.5353, "step": 28348 }, { "epoch": 0.8688549711903886, "grad_norm": 2.8766479276333783, "learning_rate": 4.442886166512672e-07, "loss": 0.5259, "step": 28349 }, { "epoch": 0.8688856197131298, "grad_norm": 1.914071115175663, "learning_rate": 4.440841105242516e-07, "loss": 0.5639, "step": 28350 }, { "epoch": 0.868916268235871, "grad_norm": 1.739981624364872, "learning_rate": 4.438796492878411e-07, "loss": 0.5658, "step": 28351 }, { "epoch": 0.8689469167586122, "grad_norm": 1.9293437711794323, "learning_rate": 4.436752329440508e-07, "loss": 0.6151, "step": 28352 }, { "epoch": 0.8689775652813534, "grad_norm": 0.7774275950335542, "learning_rate": 4.434708614948935e-07, "loss": 0.3994, "step": 28353 }, { "epoch": 0.8690082138040947, "grad_norm": 1.976724900623219, "learning_rate": 4.432665349423837e-07, "loss": 0.5571, "step": 28354 }, { "epoch": 0.8690388623268358, "grad_norm": 2.0021257768728895, "learning_rate": 4.430622532885354e-07, "loss": 0.6175, "step": 28355 }, { "epoch": 0.8690695108495771, "grad_norm": 1.7872235099940874, "learning_rate": 4.4285801653535964e-07, "loss": 0.6164, "step": 28356 }, { "epoch": 0.8691001593723182, "grad_norm": 1.8022408832447618, "learning_rate": 4.4265382468486993e-07, "loss": 0.6673, "step": 28357 }, { "epoch": 0.8691308078950595, "grad_norm": 1.8848462899020524, "learning_rate": 4.424496777390791e-07, "loss": 0.6125, "step": 28358 }, { "epoch": 0.8691614564178006, "grad_norm": 2.0865939821727704, "learning_rate": 4.4224557569999715e-07, "loss": 0.5096, "step": 28359 }, { "epoch": 0.8691921049405419, "grad_norm": 1.756156620738281, "learning_rate": 4.4204151856963586e-07, "loss": 0.5798, "step": 28360 }, { "epoch": 0.869222753463283, "grad_norm": 1.8292780671049151, "learning_rate": 4.418375063500041e-07, "loss": 0.6038, "step": 28361 }, { "epoch": 0.8692534019860243, "grad_norm": 1.6999361474712384, "learning_rate": 4.416335390431159e-07, "loss": 0.5638, "step": 28362 }, { "epoch": 0.8692840505087654, "grad_norm": 1.8223492498831226, "learning_rate": 4.414296166509785e-07, "loss": 0.5139, "step": 28363 }, { "epoch": 0.8693146990315067, "grad_norm": 1.6822138062758232, "learning_rate": 4.412257391756003e-07, "loss": 0.5349, "step": 28364 }, { "epoch": 0.8693453475542479, "grad_norm": 2.157308368935073, "learning_rate": 4.410219066189919e-07, "loss": 0.5809, "step": 28365 }, { "epoch": 0.8693759960769891, "grad_norm": 1.7719584518800096, "learning_rate": 4.408181189831612e-07, "loss": 0.5514, "step": 28366 }, { "epoch": 0.8694066445997303, "grad_norm": 1.8863934818225854, "learning_rate": 4.4061437627011597e-07, "loss": 0.5935, "step": 28367 }, { "epoch": 0.8694372931224715, "grad_norm": 1.9410762277963207, "learning_rate": 4.4041067848186347e-07, "loss": 0.6213, "step": 28368 }, { "epoch": 0.8694679416452127, "grad_norm": 2.001419266769122, "learning_rate": 4.402070256204111e-07, "loss": 0.5255, "step": 28369 }, { "epoch": 0.8694985901679539, "grad_norm": 0.7773428766707529, "learning_rate": 4.4000341768776654e-07, "loss": 0.4017, "step": 28370 }, { "epoch": 0.8695292386906951, "grad_norm": 2.1492758161559316, "learning_rate": 4.39799854685935e-07, "loss": 0.5456, "step": 28371 }, { "epoch": 0.8695598872134364, "grad_norm": 2.291324891059189, "learning_rate": 4.3959633661692145e-07, "loss": 0.5682, "step": 28372 }, { "epoch": 0.8695905357361775, "grad_norm": 0.8209381561689241, "learning_rate": 4.3939286348273215e-07, "loss": 0.418, "step": 28373 }, { "epoch": 0.8696211842589188, "grad_norm": 2.246373559126548, "learning_rate": 4.391894352853726e-07, "loss": 0.6507, "step": 28374 }, { "epoch": 0.8696518327816599, "grad_norm": 0.7709723903244764, "learning_rate": 4.389860520268457e-07, "loss": 0.3611, "step": 28375 }, { "epoch": 0.8696824813044012, "grad_norm": 2.085490463882064, "learning_rate": 4.3878271370915606e-07, "loss": 0.6171, "step": 28376 }, { "epoch": 0.8697131298271423, "grad_norm": 1.9992563694849683, "learning_rate": 4.3857942033430857e-07, "loss": 0.4708, "step": 28377 }, { "epoch": 0.8697437783498836, "grad_norm": 1.8101787884893628, "learning_rate": 4.3837617190430393e-07, "loss": 0.5177, "step": 28378 }, { "epoch": 0.8697744268726247, "grad_norm": 1.865528534568364, "learning_rate": 4.3817296842114667e-07, "loss": 0.5949, "step": 28379 }, { "epoch": 0.8698050753953659, "grad_norm": 0.7376603636636875, "learning_rate": 4.379698098868368e-07, "loss": 0.3964, "step": 28380 }, { "epoch": 0.8698357239181072, "grad_norm": 1.8880701481828857, "learning_rate": 4.3776669630338e-07, "loss": 0.6349, "step": 28381 }, { "epoch": 0.8698663724408483, "grad_norm": 1.8004986795714322, "learning_rate": 4.375636276727746e-07, "loss": 0.5446, "step": 28382 }, { "epoch": 0.8698970209635896, "grad_norm": 2.00687373668341, "learning_rate": 4.373606039970213e-07, "loss": 0.6044, "step": 28383 }, { "epoch": 0.8699276694863307, "grad_norm": 1.7531425135506729, "learning_rate": 4.3715762527812125e-07, "loss": 0.5686, "step": 28384 }, { "epoch": 0.869958318009072, "grad_norm": 1.7873757036223032, "learning_rate": 4.3695469151807555e-07, "loss": 0.5702, "step": 28385 }, { "epoch": 0.8699889665318131, "grad_norm": 1.7985477524216829, "learning_rate": 4.3675180271888217e-07, "loss": 0.5842, "step": 28386 }, { "epoch": 0.8700196150545544, "grad_norm": 2.072856857923916, "learning_rate": 4.365489588825406e-07, "loss": 0.5167, "step": 28387 }, { "epoch": 0.8700502635772955, "grad_norm": 1.9072052719831805, "learning_rate": 4.3634616001105024e-07, "loss": 0.631, "step": 28388 }, { "epoch": 0.8700809121000368, "grad_norm": 0.7794479466527564, "learning_rate": 4.3614340610640905e-07, "loss": 0.3924, "step": 28389 }, { "epoch": 0.870111560622778, "grad_norm": 1.919609137560368, "learning_rate": 4.3594069717061484e-07, "loss": 0.5096, "step": 28390 }, { "epoch": 0.8701422091455192, "grad_norm": 1.7124113978518103, "learning_rate": 4.3573803320566264e-07, "loss": 0.5394, "step": 28391 }, { "epoch": 0.8701728576682604, "grad_norm": 1.779672062819054, "learning_rate": 4.355354142135537e-07, "loss": 0.546, "step": 28392 }, { "epoch": 0.8702035061910016, "grad_norm": 2.021980314544473, "learning_rate": 4.35332840196282e-07, "loss": 0.5516, "step": 28393 }, { "epoch": 0.8702341547137428, "grad_norm": 1.6582118332978353, "learning_rate": 4.35130311155843e-07, "loss": 0.5009, "step": 28394 }, { "epoch": 0.870264803236484, "grad_norm": 1.78049269131355, "learning_rate": 4.349278270942325e-07, "loss": 0.4851, "step": 28395 }, { "epoch": 0.8702954517592252, "grad_norm": 1.9785830842739236, "learning_rate": 4.347253880134467e-07, "loss": 0.5767, "step": 28396 }, { "epoch": 0.8703261002819664, "grad_norm": 2.0292269423319182, "learning_rate": 4.3452299391548047e-07, "loss": 0.573, "step": 28397 }, { "epoch": 0.8703567488047076, "grad_norm": 1.883271830554814, "learning_rate": 4.343206448023263e-07, "loss": 0.5894, "step": 28398 }, { "epoch": 0.8703873973274489, "grad_norm": 1.9725748063374529, "learning_rate": 4.3411834067597913e-07, "loss": 0.6114, "step": 28399 }, { "epoch": 0.87041804585019, "grad_norm": 1.9507111180805312, "learning_rate": 4.33916081538433e-07, "loss": 0.5184, "step": 28400 }, { "epoch": 0.8704486943729313, "grad_norm": 1.8743131535633903, "learning_rate": 4.3371386739167966e-07, "loss": 0.6432, "step": 28401 }, { "epoch": 0.8704793428956724, "grad_norm": 1.9729234950927246, "learning_rate": 4.335116982377108e-07, "loss": 0.6259, "step": 28402 }, { "epoch": 0.8705099914184137, "grad_norm": 2.0648388703335434, "learning_rate": 4.333095740785209e-07, "loss": 0.5698, "step": 28403 }, { "epoch": 0.8705406399411548, "grad_norm": 1.7716098135557778, "learning_rate": 4.331074949161002e-07, "loss": 0.5485, "step": 28404 }, { "epoch": 0.8705712884638961, "grad_norm": 2.0247368113115343, "learning_rate": 4.329054607524391e-07, "loss": 0.5599, "step": 28405 }, { "epoch": 0.8706019369866372, "grad_norm": 1.8170838418839357, "learning_rate": 4.3270347158952894e-07, "loss": 0.6316, "step": 28406 }, { "epoch": 0.8706325855093785, "grad_norm": 2.1534829158241466, "learning_rate": 4.325015274293598e-07, "loss": 0.6245, "step": 28407 }, { "epoch": 0.8706632340321196, "grad_norm": 1.8297470783816658, "learning_rate": 4.3229962827392336e-07, "loss": 0.5446, "step": 28408 }, { "epoch": 0.8706938825548609, "grad_norm": 1.9108860509776073, "learning_rate": 4.320977741252058e-07, "loss": 0.593, "step": 28409 }, { "epoch": 0.8707245310776021, "grad_norm": 1.900422931730685, "learning_rate": 4.318959649851978e-07, "loss": 0.6213, "step": 28410 }, { "epoch": 0.8707551796003432, "grad_norm": 1.994633054041629, "learning_rate": 4.3169420085588885e-07, "loss": 0.5409, "step": 28411 }, { "epoch": 0.8707858281230845, "grad_norm": 1.9585770581902275, "learning_rate": 4.3149248173926575e-07, "loss": 0.6389, "step": 28412 }, { "epoch": 0.8708164766458256, "grad_norm": 1.502370835837105, "learning_rate": 4.31290807637314e-07, "loss": 0.5713, "step": 28413 }, { "epoch": 0.8708471251685669, "grad_norm": 1.822520557069089, "learning_rate": 4.3108917855202494e-07, "loss": 0.6198, "step": 28414 }, { "epoch": 0.870877773691308, "grad_norm": 1.887797914773563, "learning_rate": 4.308875944853824e-07, "loss": 0.4772, "step": 28415 }, { "epoch": 0.8709084222140493, "grad_norm": 1.7575030146738069, "learning_rate": 4.3068605543937434e-07, "loss": 0.5541, "step": 28416 }, { "epoch": 0.8709390707367904, "grad_norm": 2.271174111306202, "learning_rate": 4.304845614159842e-07, "loss": 0.5924, "step": 28417 }, { "epoch": 0.8709697192595317, "grad_norm": 0.8022515892749192, "learning_rate": 4.3028311241719964e-07, "loss": 0.3681, "step": 28418 }, { "epoch": 0.8710003677822729, "grad_norm": 1.8939310612701872, "learning_rate": 4.3008170844500543e-07, "loss": 0.6034, "step": 28419 }, { "epoch": 0.8710310163050141, "grad_norm": 2.0314588610521724, "learning_rate": 4.2988034950138424e-07, "loss": 0.6652, "step": 28420 }, { "epoch": 0.8710616648277553, "grad_norm": 0.791030585026227, "learning_rate": 4.2967903558832125e-07, "loss": 0.4144, "step": 28421 }, { "epoch": 0.8710923133504965, "grad_norm": 2.0839548032123782, "learning_rate": 4.294777667078015e-07, "loss": 0.5522, "step": 28422 }, { "epoch": 0.8711229618732377, "grad_norm": 1.9054205830552728, "learning_rate": 4.292765428618051e-07, "loss": 0.5842, "step": 28423 }, { "epoch": 0.8711536103959789, "grad_norm": 1.8191745639564987, "learning_rate": 4.2907536405231767e-07, "loss": 0.5732, "step": 28424 }, { "epoch": 0.8711842589187201, "grad_norm": 2.2079690048857783, "learning_rate": 4.288742302813192e-07, "loss": 0.6078, "step": 28425 }, { "epoch": 0.8712149074414614, "grad_norm": 1.8514979881199647, "learning_rate": 4.2867314155079275e-07, "loss": 0.6187, "step": 28426 }, { "epoch": 0.8712455559642025, "grad_norm": 1.7045772736969624, "learning_rate": 4.284720978627205e-07, "loss": 0.5432, "step": 28427 }, { "epoch": 0.8712762044869438, "grad_norm": 1.811597202972157, "learning_rate": 4.282710992190814e-07, "loss": 0.586, "step": 28428 }, { "epoch": 0.8713068530096849, "grad_norm": 1.9541988072946659, "learning_rate": 4.280701456218567e-07, "loss": 0.597, "step": 28429 }, { "epoch": 0.8713375015324262, "grad_norm": 2.112376305248507, "learning_rate": 4.2786923707302755e-07, "loss": 0.6151, "step": 28430 }, { "epoch": 0.8713681500551673, "grad_norm": 1.791375944836535, "learning_rate": 4.2766837357457235e-07, "loss": 0.6283, "step": 28431 }, { "epoch": 0.8713987985779086, "grad_norm": 0.8181718399436338, "learning_rate": 4.2746755512846904e-07, "loss": 0.3914, "step": 28432 }, { "epoch": 0.8714294471006497, "grad_norm": 1.5856801931103683, "learning_rate": 4.2726678173669935e-07, "loss": 0.5391, "step": 28433 }, { "epoch": 0.871460095623391, "grad_norm": 2.026997048161017, "learning_rate": 4.270660534012394e-07, "loss": 0.5611, "step": 28434 }, { "epoch": 0.8714907441461321, "grad_norm": 1.9273653448111776, "learning_rate": 4.2686537012406883e-07, "loss": 0.5726, "step": 28435 }, { "epoch": 0.8715213926688734, "grad_norm": 1.8120418592847507, "learning_rate": 4.2666473190716264e-07, "loss": 0.5523, "step": 28436 }, { "epoch": 0.8715520411916146, "grad_norm": 0.8069922894954162, "learning_rate": 4.2646413875249925e-07, "loss": 0.4113, "step": 28437 }, { "epoch": 0.8715826897143558, "grad_norm": 1.7859287298889504, "learning_rate": 4.2626359066205546e-07, "loss": 0.5367, "step": 28438 }, { "epoch": 0.871613338237097, "grad_norm": 1.8126184426738974, "learning_rate": 4.2606308763780577e-07, "loss": 0.6245, "step": 28439 }, { "epoch": 0.8716439867598382, "grad_norm": 1.8045836389070664, "learning_rate": 4.25862629681727e-07, "loss": 0.5668, "step": 28440 }, { "epoch": 0.8716746352825794, "grad_norm": 1.7620004423417748, "learning_rate": 4.2566221679579524e-07, "loss": 0.512, "step": 28441 }, { "epoch": 0.8717052838053205, "grad_norm": 1.981230268123318, "learning_rate": 4.2546184898198285e-07, "loss": 0.5704, "step": 28442 }, { "epoch": 0.8717359323280618, "grad_norm": 0.7919695371385532, "learning_rate": 4.2526152624226494e-07, "loss": 0.4023, "step": 28443 }, { "epoch": 0.8717665808508029, "grad_norm": 0.8106642882437706, "learning_rate": 4.250612485786171e-07, "loss": 0.3942, "step": 28444 }, { "epoch": 0.8717972293735442, "grad_norm": 1.8911393477179914, "learning_rate": 4.2486101599301054e-07, "loss": 0.5404, "step": 28445 }, { "epoch": 0.8718278778962854, "grad_norm": 0.7994878040324852, "learning_rate": 4.246608284874193e-07, "loss": 0.3992, "step": 28446 }, { "epoch": 0.8718585264190266, "grad_norm": 1.909554752042239, "learning_rate": 4.2446068606381507e-07, "loss": 0.5398, "step": 28447 }, { "epoch": 0.8718891749417678, "grad_norm": 1.8434638920541802, "learning_rate": 4.2426058872417074e-07, "loss": 0.5488, "step": 28448 }, { "epoch": 0.871919823464509, "grad_norm": 1.8397102541615842, "learning_rate": 4.2406053647045807e-07, "loss": 0.6821, "step": 28449 }, { "epoch": 0.8719504719872502, "grad_norm": 1.7337514292440082, "learning_rate": 4.238605293046466e-07, "loss": 0.5948, "step": 28450 }, { "epoch": 0.8719811205099914, "grad_norm": 2.1135940870081424, "learning_rate": 4.2366056722870865e-07, "loss": 0.6076, "step": 28451 }, { "epoch": 0.8720117690327326, "grad_norm": 1.9866123078630697, "learning_rate": 4.234606502446148e-07, "loss": 0.5532, "step": 28452 }, { "epoch": 0.8720424175554738, "grad_norm": 1.7925454268658576, "learning_rate": 4.23260778354333e-07, "loss": 0.6157, "step": 28453 }, { "epoch": 0.872073066078215, "grad_norm": 1.8511481570618884, "learning_rate": 4.2306095155983387e-07, "loss": 0.5837, "step": 28454 }, { "epoch": 0.8721037146009563, "grad_norm": 0.8208886532975138, "learning_rate": 4.2286116986308747e-07, "loss": 0.4042, "step": 28455 }, { "epoch": 0.8721343631236974, "grad_norm": 1.8125427449575247, "learning_rate": 4.2266143326605947e-07, "loss": 0.6246, "step": 28456 }, { "epoch": 0.8721650116464387, "grad_norm": 0.84118287267903, "learning_rate": 4.224617417707211e-07, "loss": 0.4009, "step": 28457 }, { "epoch": 0.8721956601691798, "grad_norm": 2.186176144203049, "learning_rate": 4.222620953790374e-07, "loss": 0.594, "step": 28458 }, { "epoch": 0.8722263086919211, "grad_norm": 1.8565820319183741, "learning_rate": 4.2206249409297627e-07, "loss": 0.4792, "step": 28459 }, { "epoch": 0.8722569572146622, "grad_norm": 1.847845993921749, "learning_rate": 4.218629379145056e-07, "loss": 0.5498, "step": 28460 }, { "epoch": 0.8722876057374035, "grad_norm": 1.9498662337121542, "learning_rate": 4.2166342684558994e-07, "loss": 0.6142, "step": 28461 }, { "epoch": 0.8723182542601446, "grad_norm": 1.7337086978469214, "learning_rate": 4.214639608881965e-07, "loss": 0.4886, "step": 28462 }, { "epoch": 0.8723489027828859, "grad_norm": 1.7956063929466541, "learning_rate": 4.212645400442905e-07, "loss": 0.6163, "step": 28463 }, { "epoch": 0.872379551305627, "grad_norm": 2.202798959570389, "learning_rate": 4.210651643158353e-07, "loss": 0.6395, "step": 28464 }, { "epoch": 0.8724101998283683, "grad_norm": 1.8594391467135425, "learning_rate": 4.2086583370479717e-07, "loss": 0.5995, "step": 28465 }, { "epoch": 0.8724408483511095, "grad_norm": 2.0166845142549175, "learning_rate": 4.2066654821314e-07, "loss": 0.6347, "step": 28466 }, { "epoch": 0.8724714968738507, "grad_norm": 1.8731679535296564, "learning_rate": 4.204673078428267e-07, "loss": 0.5618, "step": 28467 }, { "epoch": 0.8725021453965919, "grad_norm": 2.014228536214626, "learning_rate": 4.202681125958213e-07, "loss": 0.647, "step": 28468 }, { "epoch": 0.8725327939193331, "grad_norm": 1.8275168368119663, "learning_rate": 4.20068962474085e-07, "loss": 0.5922, "step": 28469 }, { "epoch": 0.8725634424420743, "grad_norm": 1.6808810958154117, "learning_rate": 4.198698574795812e-07, "loss": 0.5199, "step": 28470 }, { "epoch": 0.8725940909648155, "grad_norm": 1.8303096831153032, "learning_rate": 4.196707976142722e-07, "loss": 0.6053, "step": 28471 }, { "epoch": 0.8726247394875567, "grad_norm": 1.8378162110328669, "learning_rate": 4.1947178288011815e-07, "loss": 0.569, "step": 28472 }, { "epoch": 0.8726553880102978, "grad_norm": 1.8725087681282977, "learning_rate": 4.1927281327908074e-07, "loss": 0.5609, "step": 28473 }, { "epoch": 0.8726860365330391, "grad_norm": 1.9033147543610138, "learning_rate": 4.1907388881312074e-07, "loss": 0.5692, "step": 28474 }, { "epoch": 0.8727166850557803, "grad_norm": 1.9734838013296043, "learning_rate": 4.1887500948419755e-07, "loss": 0.6413, "step": 28475 }, { "epoch": 0.8727473335785215, "grad_norm": 2.020612821646127, "learning_rate": 4.186761752942714e-07, "loss": 0.5622, "step": 28476 }, { "epoch": 0.8727779821012627, "grad_norm": 1.6598785719686158, "learning_rate": 4.1847738624530007e-07, "loss": 0.5792, "step": 28477 }, { "epoch": 0.8728086306240039, "grad_norm": 1.6988596625873382, "learning_rate": 4.1827864233924374e-07, "loss": 0.5156, "step": 28478 }, { "epoch": 0.8728392791467451, "grad_norm": 1.7937064166418775, "learning_rate": 4.180799435780608e-07, "loss": 0.5725, "step": 28479 }, { "epoch": 0.8728699276694863, "grad_norm": 1.8594297800863606, "learning_rate": 4.1788128996370803e-07, "loss": 0.536, "step": 28480 }, { "epoch": 0.8729005761922275, "grad_norm": 2.067931434074301, "learning_rate": 4.176826814981427e-07, "loss": 0.5862, "step": 28481 }, { "epoch": 0.8729312247149688, "grad_norm": 2.1157266422332914, "learning_rate": 4.174841181833239e-07, "loss": 0.6257, "step": 28482 }, { "epoch": 0.8729618732377099, "grad_norm": 1.7558390528246972, "learning_rate": 4.17285600021205e-07, "loss": 0.5367, "step": 28483 }, { "epoch": 0.8729925217604512, "grad_norm": 2.0567643127280824, "learning_rate": 4.170871270137439e-07, "loss": 0.5877, "step": 28484 }, { "epoch": 0.8730231702831923, "grad_norm": 0.7770677287296838, "learning_rate": 4.168886991628968e-07, "loss": 0.389, "step": 28485 }, { "epoch": 0.8730538188059336, "grad_norm": 1.8259644136071163, "learning_rate": 4.166903164706171e-07, "loss": 0.4971, "step": 28486 }, { "epoch": 0.8730844673286747, "grad_norm": 1.9007211514579296, "learning_rate": 4.164919789388616e-07, "loss": 0.6337, "step": 28487 }, { "epoch": 0.873115115851416, "grad_norm": 1.719444939705008, "learning_rate": 4.16293686569581e-07, "loss": 0.5125, "step": 28488 }, { "epoch": 0.8731457643741571, "grad_norm": 2.0318545608468264, "learning_rate": 4.160954393647337e-07, "loss": 0.5604, "step": 28489 }, { "epoch": 0.8731764128968984, "grad_norm": 1.5685017078028551, "learning_rate": 4.1589723732627094e-07, "loss": 0.5406, "step": 28490 }, { "epoch": 0.8732070614196396, "grad_norm": 0.7854912590025549, "learning_rate": 4.156990804561445e-07, "loss": 0.3843, "step": 28491 }, { "epoch": 0.8732377099423808, "grad_norm": 1.9256879630163608, "learning_rate": 4.155009687563083e-07, "loss": 0.5413, "step": 28492 }, { "epoch": 0.873268358465122, "grad_norm": 1.6581952331449623, "learning_rate": 4.1530290222871474e-07, "loss": 0.5016, "step": 28493 }, { "epoch": 0.8732990069878632, "grad_norm": 1.777733617098108, "learning_rate": 4.151048808753133e-07, "loss": 0.5678, "step": 28494 }, { "epoch": 0.8733296555106044, "grad_norm": 1.8314525513029678, "learning_rate": 4.1490690469805694e-07, "loss": 0.5277, "step": 28495 }, { "epoch": 0.8733603040333456, "grad_norm": 1.7785925142614771, "learning_rate": 4.147089736988963e-07, "loss": 0.444, "step": 28496 }, { "epoch": 0.8733909525560868, "grad_norm": 1.7552580095050128, "learning_rate": 4.14511087879782e-07, "loss": 0.5511, "step": 28497 }, { "epoch": 0.873421601078828, "grad_norm": 1.9851937754686404, "learning_rate": 4.1431324724266306e-07, "loss": 0.6125, "step": 28498 }, { "epoch": 0.8734522496015692, "grad_norm": 1.8070112721260987, "learning_rate": 4.141154517894874e-07, "loss": 0.5121, "step": 28499 }, { "epoch": 0.8734828981243105, "grad_norm": 1.849361587178073, "learning_rate": 4.139177015222073e-07, "loss": 0.514, "step": 28500 }, { "epoch": 0.8735135466470516, "grad_norm": 1.8672816409607664, "learning_rate": 4.137199964427696e-07, "loss": 0.5951, "step": 28501 }, { "epoch": 0.8735441951697929, "grad_norm": 1.9225832680220958, "learning_rate": 4.13522336553121e-07, "loss": 0.5515, "step": 28502 }, { "epoch": 0.873574843692534, "grad_norm": 1.804546700646448, "learning_rate": 4.1332472185521054e-07, "loss": 0.5704, "step": 28503 }, { "epoch": 0.8736054922152752, "grad_norm": 0.7850288899714027, "learning_rate": 4.131271523509861e-07, "loss": 0.3912, "step": 28504 }, { "epoch": 0.8736361407380164, "grad_norm": 1.7529941372354603, "learning_rate": 4.129296280423928e-07, "loss": 0.5131, "step": 28505 }, { "epoch": 0.8736667892607576, "grad_norm": 2.146993243528726, "learning_rate": 4.127321489313768e-07, "loss": 0.5517, "step": 28506 }, { "epoch": 0.8736974377834988, "grad_norm": 1.9676076759959955, "learning_rate": 4.1253471501988495e-07, "loss": 0.5586, "step": 28507 }, { "epoch": 0.87372808630624, "grad_norm": 1.6390550987831647, "learning_rate": 4.1233732630986343e-07, "loss": 0.4864, "step": 28508 }, { "epoch": 0.8737587348289813, "grad_norm": 1.9014859012962506, "learning_rate": 4.121399828032557e-07, "loss": 0.6927, "step": 28509 }, { "epoch": 0.8737893833517224, "grad_norm": 2.0516673028126986, "learning_rate": 4.1194268450200526e-07, "loss": 0.6012, "step": 28510 }, { "epoch": 0.8738200318744637, "grad_norm": 1.9910468706831568, "learning_rate": 4.1174543140805877e-07, "loss": 0.5261, "step": 28511 }, { "epoch": 0.8738506803972048, "grad_norm": 2.0641978615650283, "learning_rate": 4.1154822352335864e-07, "loss": 0.6269, "step": 28512 }, { "epoch": 0.8738813289199461, "grad_norm": 1.883614405056083, "learning_rate": 4.1135106084984724e-07, "loss": 0.6107, "step": 28513 }, { "epoch": 0.8739119774426872, "grad_norm": 1.8965498605039195, "learning_rate": 4.111539433894679e-07, "loss": 0.5727, "step": 28514 }, { "epoch": 0.8739426259654285, "grad_norm": 2.0647318730918514, "learning_rate": 4.109568711441625e-07, "loss": 0.4878, "step": 28515 }, { "epoch": 0.8739732744881696, "grad_norm": 1.9602466390819049, "learning_rate": 4.1075984411587387e-07, "loss": 0.5929, "step": 28516 }, { "epoch": 0.8740039230109109, "grad_norm": 3.5554531550627986, "learning_rate": 4.105628623065422e-07, "loss": 0.5483, "step": 28517 }, { "epoch": 0.874034571533652, "grad_norm": 1.9264598155320019, "learning_rate": 4.1036592571810916e-07, "loss": 0.5593, "step": 28518 }, { "epoch": 0.8740652200563933, "grad_norm": 1.8499360772949482, "learning_rate": 4.1016903435251554e-07, "loss": 0.5518, "step": 28519 }, { "epoch": 0.8740958685791345, "grad_norm": 2.1898438956860353, "learning_rate": 4.099721882117008e-07, "loss": 0.571, "step": 28520 }, { "epoch": 0.8741265171018757, "grad_norm": 1.7835124860448361, "learning_rate": 4.0977538729760344e-07, "loss": 0.578, "step": 28521 }, { "epoch": 0.8741571656246169, "grad_norm": 1.8258993909344359, "learning_rate": 4.0957863161216416e-07, "loss": 0.4309, "step": 28522 }, { "epoch": 0.8741878141473581, "grad_norm": 1.8500655081090234, "learning_rate": 4.0938192115732076e-07, "loss": 0.4575, "step": 28523 }, { "epoch": 0.8742184626700993, "grad_norm": 1.6674994936635172, "learning_rate": 4.091852559350129e-07, "loss": 0.4811, "step": 28524 }, { "epoch": 0.8742491111928405, "grad_norm": 1.6605242702093668, "learning_rate": 4.089886359471762e-07, "loss": 0.48, "step": 28525 }, { "epoch": 0.8742797597155817, "grad_norm": 0.7964520067435842, "learning_rate": 4.087920611957491e-07, "loss": 0.3903, "step": 28526 }, { "epoch": 0.874310408238323, "grad_norm": 1.6790604661681048, "learning_rate": 4.085955316826695e-07, "loss": 0.5116, "step": 28527 }, { "epoch": 0.8743410567610641, "grad_norm": 1.8702888423837958, "learning_rate": 4.083990474098731e-07, "loss": 0.5834, "step": 28528 }, { "epoch": 0.8743717052838054, "grad_norm": 0.8170670005339304, "learning_rate": 4.0820260837929394e-07, "loss": 0.4019, "step": 28529 }, { "epoch": 0.8744023538065465, "grad_norm": 1.6021882059726413, "learning_rate": 4.080062145928709e-07, "loss": 0.5756, "step": 28530 }, { "epoch": 0.8744330023292878, "grad_norm": 1.6061929836709272, "learning_rate": 4.078098660525376e-07, "loss": 0.5157, "step": 28531 }, { "epoch": 0.8744636508520289, "grad_norm": 1.976499150153628, "learning_rate": 4.0761356276022736e-07, "loss": 0.6313, "step": 28532 }, { "epoch": 0.8744942993747702, "grad_norm": 1.7267820645027552, "learning_rate": 4.0741730471787646e-07, "loss": 0.5633, "step": 28533 }, { "epoch": 0.8745249478975113, "grad_norm": 2.153272910782335, "learning_rate": 4.072210919274172e-07, "loss": 0.6828, "step": 28534 }, { "epoch": 0.8745555964202525, "grad_norm": 0.8325228779861654, "learning_rate": 4.0702492439078534e-07, "loss": 0.3918, "step": 28535 }, { "epoch": 0.8745862449429938, "grad_norm": 0.8332539140635591, "learning_rate": 4.068288021099104e-07, "loss": 0.4027, "step": 28536 }, { "epoch": 0.8746168934657349, "grad_norm": 1.8174150013018862, "learning_rate": 4.0663272508672693e-07, "loss": 0.5719, "step": 28537 }, { "epoch": 0.8746475419884762, "grad_norm": 0.7741668866568195, "learning_rate": 4.0643669332316726e-07, "loss": 0.3988, "step": 28538 }, { "epoch": 0.8746781905112173, "grad_norm": 0.7860783648284349, "learning_rate": 4.062407068211621e-07, "loss": 0.3814, "step": 28539 }, { "epoch": 0.8747088390339586, "grad_norm": 1.9392658278813566, "learning_rate": 4.0604476558264106e-07, "loss": 0.6111, "step": 28540 }, { "epoch": 0.8747394875566997, "grad_norm": 1.7845759590409187, "learning_rate": 4.0584886960953806e-07, "loss": 0.4796, "step": 28541 }, { "epoch": 0.874770136079441, "grad_norm": 1.7778729453279085, "learning_rate": 4.0565301890378053e-07, "loss": 0.5178, "step": 28542 }, { "epoch": 0.8748007846021821, "grad_norm": 1.895063310863793, "learning_rate": 4.0545721346730017e-07, "loss": 0.5696, "step": 28543 }, { "epoch": 0.8748314331249234, "grad_norm": 1.8046680833049307, "learning_rate": 4.0526145330202494e-07, "loss": 0.496, "step": 28544 }, { "epoch": 0.8748620816476645, "grad_norm": 1.919196922439976, "learning_rate": 4.0506573840988386e-07, "loss": 0.508, "step": 28545 }, { "epoch": 0.8748927301704058, "grad_norm": 1.85107958419398, "learning_rate": 4.0487006879280646e-07, "loss": 0.4704, "step": 28546 }, { "epoch": 0.874923378693147, "grad_norm": 2.194559984248375, "learning_rate": 4.0467444445271953e-07, "loss": 0.7296, "step": 28547 }, { "epoch": 0.8749540272158882, "grad_norm": 0.8253558819678073, "learning_rate": 4.0447886539155103e-07, "loss": 0.3839, "step": 28548 }, { "epoch": 0.8749846757386294, "grad_norm": 2.0593942717548317, "learning_rate": 4.042833316112288e-07, "loss": 0.595, "step": 28549 }, { "epoch": 0.8750153242613706, "grad_norm": 2.2438360138857774, "learning_rate": 4.04087843113678e-07, "loss": 0.6133, "step": 28550 }, { "epoch": 0.8750459727841118, "grad_norm": 2.05057105855926, "learning_rate": 4.038923999008254e-07, "loss": 0.597, "step": 28551 }, { "epoch": 0.875076621306853, "grad_norm": 1.8368679150254887, "learning_rate": 4.036970019745978e-07, "loss": 0.5196, "step": 28552 }, { "epoch": 0.8751072698295942, "grad_norm": 1.841590001210223, "learning_rate": 4.0350164933691925e-07, "loss": 0.5896, "step": 28553 }, { "epoch": 0.8751379183523355, "grad_norm": 1.8180044665139126, "learning_rate": 4.0330634198971543e-07, "loss": 0.5065, "step": 28554 }, { "epoch": 0.8751685668750766, "grad_norm": 1.9322044454072396, "learning_rate": 4.031110799349097e-07, "loss": 0.5727, "step": 28555 }, { "epoch": 0.8751992153978179, "grad_norm": 1.8206304872380672, "learning_rate": 4.029158631744262e-07, "loss": 0.5521, "step": 28556 }, { "epoch": 0.875229863920559, "grad_norm": 1.6553009952562856, "learning_rate": 4.0272069171019055e-07, "loss": 0.553, "step": 28557 }, { "epoch": 0.8752605124433003, "grad_norm": 2.0818394066172856, "learning_rate": 4.025255655441229e-07, "loss": 0.575, "step": 28558 }, { "epoch": 0.8752911609660414, "grad_norm": 1.904576748172883, "learning_rate": 4.0233048467814727e-07, "loss": 0.5416, "step": 28559 }, { "epoch": 0.8753218094887827, "grad_norm": 1.892772847483345, "learning_rate": 4.0213544911418653e-07, "loss": 0.5221, "step": 28560 }, { "epoch": 0.8753524580115238, "grad_norm": 2.0323713883948837, "learning_rate": 4.0194045885416034e-07, "loss": 0.5666, "step": 28561 }, { "epoch": 0.8753831065342651, "grad_norm": 0.8815213284286773, "learning_rate": 4.017455138999921e-07, "loss": 0.3977, "step": 28562 }, { "epoch": 0.8754137550570062, "grad_norm": 0.7998810106081091, "learning_rate": 4.0155061425360187e-07, "loss": 0.3856, "step": 28563 }, { "epoch": 0.8754444035797475, "grad_norm": 1.7938751603547363, "learning_rate": 4.013557599169099e-07, "loss": 0.5125, "step": 28564 }, { "epoch": 0.8754750521024887, "grad_norm": 0.7907428060443402, "learning_rate": 4.0116095089183684e-07, "loss": 0.4028, "step": 28565 }, { "epoch": 0.8755057006252298, "grad_norm": 1.8887757700831473, "learning_rate": 4.0096618718030055e-07, "loss": 0.5925, "step": 28566 }, { "epoch": 0.8755363491479711, "grad_norm": 1.7889467865510136, "learning_rate": 4.0077146878422126e-07, "loss": 0.5272, "step": 28567 }, { "epoch": 0.8755669976707122, "grad_norm": 1.7910166023257996, "learning_rate": 4.005767957055179e-07, "loss": 0.5074, "step": 28568 }, { "epoch": 0.8755976461934535, "grad_norm": 1.948533413342609, "learning_rate": 4.0038216794610786e-07, "loss": 0.6092, "step": 28569 }, { "epoch": 0.8756282947161946, "grad_norm": 1.755596664035591, "learning_rate": 4.00187585507909e-07, "loss": 0.5087, "step": 28570 }, { "epoch": 0.8756589432389359, "grad_norm": 2.1523898517967077, "learning_rate": 3.999930483928399e-07, "loss": 0.5441, "step": 28571 }, { "epoch": 0.875689591761677, "grad_norm": 0.7710270263608361, "learning_rate": 3.9979855660281505e-07, "loss": 0.3861, "step": 28572 }, { "epoch": 0.8757202402844183, "grad_norm": 1.8835387058408029, "learning_rate": 3.9960411013975296e-07, "loss": 0.6246, "step": 28573 }, { "epoch": 0.8757508888071595, "grad_norm": 1.9634616195432704, "learning_rate": 3.9940970900556766e-07, "loss": 0.5229, "step": 28574 }, { "epoch": 0.8757815373299007, "grad_norm": 0.8378185025721835, "learning_rate": 3.9921535320217583e-07, "loss": 0.4132, "step": 28575 }, { "epoch": 0.8758121858526419, "grad_norm": 0.8249699044916637, "learning_rate": 3.990210427314933e-07, "loss": 0.3988, "step": 28576 }, { "epoch": 0.8758428343753831, "grad_norm": 0.7960869575842036, "learning_rate": 3.9882677759543244e-07, "loss": 0.3842, "step": 28577 }, { "epoch": 0.8758734828981243, "grad_norm": 2.1130744015252305, "learning_rate": 3.986325577959088e-07, "loss": 0.5837, "step": 28578 }, { "epoch": 0.8759041314208655, "grad_norm": 2.0025303384253674, "learning_rate": 3.9843838333483654e-07, "loss": 0.5918, "step": 28579 }, { "epoch": 0.8759347799436067, "grad_norm": 1.9394243672535454, "learning_rate": 3.9824425421412736e-07, "loss": 0.5312, "step": 28580 }, { "epoch": 0.875965428466348, "grad_norm": 1.927388770623665, "learning_rate": 3.980501704356954e-07, "loss": 0.5231, "step": 28581 }, { "epoch": 0.8759960769890891, "grad_norm": 1.8107041828413353, "learning_rate": 3.978561320014529e-07, "loss": 0.5877, "step": 28582 }, { "epoch": 0.8760267255118304, "grad_norm": 1.7130265859009517, "learning_rate": 3.9766213891331116e-07, "loss": 0.5752, "step": 28583 }, { "epoch": 0.8760573740345715, "grad_norm": 1.7194195183626166, "learning_rate": 3.97468191173182e-07, "loss": 0.6878, "step": 28584 }, { "epoch": 0.8760880225573128, "grad_norm": 1.8720298102008681, "learning_rate": 3.9727428878297613e-07, "loss": 0.4849, "step": 28585 }, { "epoch": 0.8761186710800539, "grad_norm": 1.926789568533385, "learning_rate": 3.970804317446042e-07, "loss": 0.5603, "step": 28586 }, { "epoch": 0.8761493196027952, "grad_norm": 1.9024047037174858, "learning_rate": 3.9688662005997747e-07, "loss": 0.6175, "step": 28587 }, { "epoch": 0.8761799681255363, "grad_norm": 2.33860405179312, "learning_rate": 3.966928537310033e-07, "loss": 0.5185, "step": 28588 }, { "epoch": 0.8762106166482776, "grad_norm": 2.090444999443609, "learning_rate": 3.9649913275959295e-07, "loss": 0.5518, "step": 28589 }, { "epoch": 0.8762412651710187, "grad_norm": 1.9467490072500608, "learning_rate": 3.963054571476549e-07, "loss": 0.6936, "step": 28590 }, { "epoch": 0.87627191369376, "grad_norm": 1.7422027858122942, "learning_rate": 3.9611182689709595e-07, "loss": 0.5258, "step": 28591 }, { "epoch": 0.8763025622165012, "grad_norm": 2.1617962363347254, "learning_rate": 3.959182420098256e-07, "loss": 0.6624, "step": 28592 }, { "epoch": 0.8763332107392424, "grad_norm": 0.7766676222837043, "learning_rate": 3.957247024877514e-07, "loss": 0.4038, "step": 28593 }, { "epoch": 0.8763638592619836, "grad_norm": 1.9093880150315368, "learning_rate": 3.955312083327795e-07, "loss": 0.564, "step": 28594 }, { "epoch": 0.8763945077847248, "grad_norm": 2.0893129984500227, "learning_rate": 3.9533775954681664e-07, "loss": 0.6426, "step": 28595 }, { "epoch": 0.876425156307466, "grad_norm": 1.8074033844418291, "learning_rate": 3.9514435613176805e-07, "loss": 0.5087, "step": 28596 }, { "epoch": 0.8764558048302071, "grad_norm": 1.883215288371888, "learning_rate": 3.9495099808954165e-07, "loss": 0.5619, "step": 28597 }, { "epoch": 0.8764864533529484, "grad_norm": 2.0369303582956846, "learning_rate": 3.947576854220414e-07, "loss": 0.6997, "step": 28598 }, { "epoch": 0.8765171018756895, "grad_norm": 2.080779025764892, "learning_rate": 3.945644181311709e-07, "loss": 0.614, "step": 28599 }, { "epoch": 0.8765477503984308, "grad_norm": 1.8736932153733006, "learning_rate": 3.9437119621883626e-07, "loss": 0.6716, "step": 28600 }, { "epoch": 0.876578398921172, "grad_norm": 2.0756611248571635, "learning_rate": 3.9417801968694045e-07, "loss": 0.597, "step": 28601 }, { "epoch": 0.8766090474439132, "grad_norm": 1.9602126788893912, "learning_rate": 3.93984888537387e-07, "loss": 0.5831, "step": 28602 }, { "epoch": 0.8766396959666544, "grad_norm": 1.8607465900441884, "learning_rate": 3.9379180277207885e-07, "loss": 0.5817, "step": 28603 }, { "epoch": 0.8766703444893956, "grad_norm": 1.778294574926346, "learning_rate": 3.935987623929183e-07, "loss": 0.5569, "step": 28604 }, { "epoch": 0.8767009930121368, "grad_norm": 1.6383400992585653, "learning_rate": 3.934057674018088e-07, "loss": 0.5704, "step": 28605 }, { "epoch": 0.876731641534878, "grad_norm": 1.6655790619985433, "learning_rate": 3.9321281780065055e-07, "loss": 0.4637, "step": 28606 }, { "epoch": 0.8767622900576192, "grad_norm": 1.8258757280855795, "learning_rate": 3.9301991359134373e-07, "loss": 0.5582, "step": 28607 }, { "epoch": 0.8767929385803604, "grad_norm": 1.7530781334138448, "learning_rate": 3.928270547757923e-07, "loss": 0.4821, "step": 28608 }, { "epoch": 0.8768235871031016, "grad_norm": 1.8126796032537338, "learning_rate": 3.926342413558948e-07, "loss": 0.5374, "step": 28609 }, { "epoch": 0.8768542356258429, "grad_norm": 0.839209353299235, "learning_rate": 3.9244147333354965e-07, "loss": 0.4045, "step": 28610 }, { "epoch": 0.876884884148584, "grad_norm": 0.8009438053919635, "learning_rate": 3.922487507106576e-07, "loss": 0.3961, "step": 28611 }, { "epoch": 0.8769155326713253, "grad_norm": 1.9280272066580546, "learning_rate": 3.920560734891188e-07, "loss": 0.5762, "step": 28612 }, { "epoch": 0.8769461811940664, "grad_norm": 1.772360863317973, "learning_rate": 3.9186344167082945e-07, "loss": 0.5579, "step": 28613 }, { "epoch": 0.8769768297168077, "grad_norm": 1.951019569509041, "learning_rate": 3.9167085525768864e-07, "loss": 0.5514, "step": 28614 }, { "epoch": 0.8770074782395488, "grad_norm": 1.722571332345179, "learning_rate": 3.9147831425159375e-07, "loss": 0.5678, "step": 28615 }, { "epoch": 0.8770381267622901, "grad_norm": 1.7991939084380286, "learning_rate": 3.9128581865444325e-07, "loss": 0.6054, "step": 28616 }, { "epoch": 0.8770687752850312, "grad_norm": 1.7167695601388506, "learning_rate": 3.9109336846813285e-07, "loss": 0.5697, "step": 28617 }, { "epoch": 0.8770994238077725, "grad_norm": 1.8085282970740801, "learning_rate": 3.9090096369455763e-07, "loss": 0.6332, "step": 28618 }, { "epoch": 0.8771300723305137, "grad_norm": 0.7648950714029354, "learning_rate": 3.907086043356145e-07, "loss": 0.3803, "step": 28619 }, { "epoch": 0.8771607208532549, "grad_norm": 1.7501520607672756, "learning_rate": 3.905162903932003e-07, "loss": 0.4708, "step": 28620 }, { "epoch": 0.8771913693759961, "grad_norm": 1.8235969889683532, "learning_rate": 3.903240218692067e-07, "loss": 0.5023, "step": 28621 }, { "epoch": 0.8772220178987373, "grad_norm": 2.1039777276628, "learning_rate": 3.9013179876553067e-07, "loss": 0.5946, "step": 28622 }, { "epoch": 0.8772526664214785, "grad_norm": 1.7463051485868906, "learning_rate": 3.89939621084065e-07, "loss": 0.5059, "step": 28623 }, { "epoch": 0.8772833149442197, "grad_norm": 0.7949072578176334, "learning_rate": 3.89747488826705e-07, "loss": 0.3954, "step": 28624 }, { "epoch": 0.8773139634669609, "grad_norm": 1.8859333993370182, "learning_rate": 3.895554019953424e-07, "loss": 0.5404, "step": 28625 }, { "epoch": 0.8773446119897022, "grad_norm": 1.897529234396919, "learning_rate": 3.893633605918684e-07, "loss": 0.6056, "step": 28626 }, { "epoch": 0.8773752605124433, "grad_norm": 1.9495623648020872, "learning_rate": 3.8917136461817884e-07, "loss": 0.5991, "step": 28627 }, { "epoch": 0.8774059090351845, "grad_norm": 0.8380679074054705, "learning_rate": 3.889794140761632e-07, "loss": 0.4032, "step": 28628 }, { "epoch": 0.8774365575579257, "grad_norm": 1.940466733106242, "learning_rate": 3.887875089677123e-07, "loss": 0.6857, "step": 28629 }, { "epoch": 0.8774672060806669, "grad_norm": 1.7292116285376096, "learning_rate": 3.8859564929471793e-07, "loss": 0.4962, "step": 28630 }, { "epoch": 0.8774978546034081, "grad_norm": 1.8747783532306632, "learning_rate": 3.8840383505907186e-07, "loss": 0.6546, "step": 28631 }, { "epoch": 0.8775285031261493, "grad_norm": 1.85140065201563, "learning_rate": 3.882120662626615e-07, "loss": 0.5722, "step": 28632 }, { "epoch": 0.8775591516488905, "grad_norm": 1.8962019566077848, "learning_rate": 3.8802034290737756e-07, "loss": 0.6268, "step": 28633 }, { "epoch": 0.8775898001716317, "grad_norm": 1.6211382735301616, "learning_rate": 3.8782866499510905e-07, "loss": 0.553, "step": 28634 }, { "epoch": 0.877620448694373, "grad_norm": 2.040001787999705, "learning_rate": 3.876370325277462e-07, "loss": 0.5465, "step": 28635 }, { "epoch": 0.8776510972171141, "grad_norm": 2.0771438882224555, "learning_rate": 3.874454455071752e-07, "loss": 0.5257, "step": 28636 }, { "epoch": 0.8776817457398554, "grad_norm": 1.9047006177411019, "learning_rate": 3.8725390393528293e-07, "loss": 0.5441, "step": 28637 }, { "epoch": 0.8777123942625965, "grad_norm": 2.043849201039589, "learning_rate": 3.870624078139601e-07, "loss": 0.505, "step": 28638 }, { "epoch": 0.8777430427853378, "grad_norm": 0.8478700898295707, "learning_rate": 3.8687095714509124e-07, "loss": 0.4035, "step": 28639 }, { "epoch": 0.8777736913080789, "grad_norm": 1.7354483815085093, "learning_rate": 3.866795519305622e-07, "loss": 0.5901, "step": 28640 }, { "epoch": 0.8778043398308202, "grad_norm": 1.9885937292483566, "learning_rate": 3.864881921722602e-07, "loss": 0.6422, "step": 28641 }, { "epoch": 0.8778349883535613, "grad_norm": 1.7703094564735964, "learning_rate": 3.862968778720705e-07, "loss": 0.5754, "step": 28642 }, { "epoch": 0.8778656368763026, "grad_norm": 2.0166942781549153, "learning_rate": 3.861056090318788e-07, "loss": 0.5813, "step": 28643 }, { "epoch": 0.8778962853990437, "grad_norm": 0.7747948184275141, "learning_rate": 3.859143856535685e-07, "loss": 0.3973, "step": 28644 }, { "epoch": 0.877926933921785, "grad_norm": 1.9288011156285276, "learning_rate": 3.8572320773902436e-07, "loss": 0.5783, "step": 28645 }, { "epoch": 0.8779575824445262, "grad_norm": 1.9167163336920736, "learning_rate": 3.855320752901304e-07, "loss": 0.4913, "step": 28646 }, { "epoch": 0.8779882309672674, "grad_norm": 1.6754211804500023, "learning_rate": 3.8534098830877e-07, "loss": 0.5082, "step": 28647 }, { "epoch": 0.8780188794900086, "grad_norm": 1.8905743481265345, "learning_rate": 3.8514994679682395e-07, "loss": 0.5445, "step": 28648 }, { "epoch": 0.8780495280127498, "grad_norm": 1.9981142036857087, "learning_rate": 3.849589507561774e-07, "loss": 0.5569, "step": 28649 }, { "epoch": 0.878080176535491, "grad_norm": 1.786346709990749, "learning_rate": 3.8476800018871054e-07, "loss": 0.5844, "step": 28650 }, { "epoch": 0.8781108250582322, "grad_norm": 1.635121087308965, "learning_rate": 3.8457709509630623e-07, "loss": 0.5122, "step": 28651 }, { "epoch": 0.8781414735809734, "grad_norm": 2.0776141179145076, "learning_rate": 3.843862354808442e-07, "loss": 0.6369, "step": 28652 }, { "epoch": 0.8781721221037146, "grad_norm": 1.6931006954703982, "learning_rate": 3.8419542134420505e-07, "loss": 0.5761, "step": 28653 }, { "epoch": 0.8782027706264558, "grad_norm": 1.9920919694285253, "learning_rate": 3.840046526882707e-07, "loss": 0.5645, "step": 28654 }, { "epoch": 0.8782334191491971, "grad_norm": 1.8867598209472833, "learning_rate": 3.838139295149185e-07, "loss": 0.5384, "step": 28655 }, { "epoch": 0.8782640676719382, "grad_norm": 1.90788293644906, "learning_rate": 3.8362325182602857e-07, "loss": 0.5065, "step": 28656 }, { "epoch": 0.8782947161946795, "grad_norm": 0.8457584020639773, "learning_rate": 3.834326196234811e-07, "loss": 0.3844, "step": 28657 }, { "epoch": 0.8783253647174206, "grad_norm": 0.8082008753162495, "learning_rate": 3.8324203290915296e-07, "loss": 0.4137, "step": 28658 }, { "epoch": 0.8783560132401618, "grad_norm": 1.9327191575897111, "learning_rate": 3.8305149168492094e-07, "loss": 0.5981, "step": 28659 }, { "epoch": 0.878386661762903, "grad_norm": 2.1636284620834942, "learning_rate": 3.8286099595266525e-07, "loss": 0.606, "step": 28660 }, { "epoch": 0.8784173102856442, "grad_norm": 1.9296002266692271, "learning_rate": 3.82670545714261e-07, "loss": 0.5839, "step": 28661 }, { "epoch": 0.8784479588083854, "grad_norm": 2.0137824891129084, "learning_rate": 3.824801409715856e-07, "loss": 0.5233, "step": 28662 }, { "epoch": 0.8784786073311266, "grad_norm": 0.7861777748957325, "learning_rate": 3.822897817265142e-07, "loss": 0.3758, "step": 28663 }, { "epoch": 0.8785092558538679, "grad_norm": 1.8284567839891666, "learning_rate": 3.820994679809231e-07, "loss": 0.5257, "step": 28664 }, { "epoch": 0.878539904376609, "grad_norm": 2.0499404181695393, "learning_rate": 3.819091997366886e-07, "loss": 0.5541, "step": 28665 }, { "epoch": 0.8785705528993503, "grad_norm": 1.7701758500819516, "learning_rate": 3.8171897699568304e-07, "loss": 0.5057, "step": 28666 }, { "epoch": 0.8786012014220914, "grad_norm": 2.2337439992277326, "learning_rate": 3.815287997597822e-07, "loss": 0.6079, "step": 28667 }, { "epoch": 0.8786318499448327, "grad_norm": 1.8377616480674557, "learning_rate": 3.813386680308606e-07, "loss": 0.587, "step": 28668 }, { "epoch": 0.8786624984675738, "grad_norm": 0.7856482793618613, "learning_rate": 3.811485818107902e-07, "loss": 0.3898, "step": 28669 }, { "epoch": 0.8786931469903151, "grad_norm": 0.8103713473320187, "learning_rate": 3.809585411014455e-07, "loss": 0.4006, "step": 28670 }, { "epoch": 0.8787237955130562, "grad_norm": 2.220966797884925, "learning_rate": 3.807685459046967e-07, "loss": 0.6069, "step": 28671 }, { "epoch": 0.8787544440357975, "grad_norm": 2.0403875724840908, "learning_rate": 3.805785962224179e-07, "loss": 0.6161, "step": 28672 }, { "epoch": 0.8787850925585386, "grad_norm": 1.74247791314156, "learning_rate": 3.803886920564809e-07, "loss": 0.5208, "step": 28673 }, { "epoch": 0.8788157410812799, "grad_norm": 1.9008269470759558, "learning_rate": 3.8019883340875473e-07, "loss": 0.5644, "step": 28674 }, { "epoch": 0.8788463896040211, "grad_norm": 1.9855662219089805, "learning_rate": 3.800090202811119e-07, "loss": 0.5423, "step": 28675 }, { "epoch": 0.8788770381267623, "grad_norm": 2.016081590042351, "learning_rate": 3.798192526754235e-07, "loss": 0.5774, "step": 28676 }, { "epoch": 0.8789076866495035, "grad_norm": 0.8152179330679686, "learning_rate": 3.7962953059355655e-07, "loss": 0.4004, "step": 28677 }, { "epoch": 0.8789383351722447, "grad_norm": 0.8274141786441599, "learning_rate": 3.794398540373823e-07, "loss": 0.4082, "step": 28678 }, { "epoch": 0.8789689836949859, "grad_norm": 1.8678418062935422, "learning_rate": 3.7925022300877026e-07, "loss": 0.5495, "step": 28679 }, { "epoch": 0.8789996322177271, "grad_norm": 0.787990135128531, "learning_rate": 3.7906063750958734e-07, "loss": 0.3959, "step": 28680 }, { "epoch": 0.8790302807404683, "grad_norm": 2.1205458325993405, "learning_rate": 3.7887109754170315e-07, "loss": 0.6378, "step": 28681 }, { "epoch": 0.8790609292632096, "grad_norm": 2.1871172877980003, "learning_rate": 3.786816031069829e-07, "loss": 0.5921, "step": 28682 }, { "epoch": 0.8790915777859507, "grad_norm": 0.7727992531107527, "learning_rate": 3.7849215420729615e-07, "loss": 0.3933, "step": 28683 }, { "epoch": 0.879122226308692, "grad_norm": 1.8580957446322786, "learning_rate": 3.7830275084450865e-07, "loss": 0.5687, "step": 28684 }, { "epoch": 0.8791528748314331, "grad_norm": 2.1993350821903057, "learning_rate": 3.7811339302048667e-07, "loss": 0.6198, "step": 28685 }, { "epoch": 0.8791835233541744, "grad_norm": 1.926523070763217, "learning_rate": 3.779240807370954e-07, "loss": 0.4534, "step": 28686 }, { "epoch": 0.8792141718769155, "grad_norm": 1.9049899806660453, "learning_rate": 3.777348139962017e-07, "loss": 0.5943, "step": 28687 }, { "epoch": 0.8792448203996568, "grad_norm": 1.8523038710671043, "learning_rate": 3.775455927996685e-07, "loss": 0.549, "step": 28688 }, { "epoch": 0.8792754689223979, "grad_norm": 1.8362006145416376, "learning_rate": 3.7735641714936157e-07, "loss": 0.5756, "step": 28689 }, { "epoch": 0.8793061174451391, "grad_norm": 0.8357687895272043, "learning_rate": 3.7716728704714547e-07, "loss": 0.397, "step": 28690 }, { "epoch": 0.8793367659678804, "grad_norm": 1.9320185927780136, "learning_rate": 3.7697820249488204e-07, "loss": 0.6062, "step": 28691 }, { "epoch": 0.8793674144906215, "grad_norm": 1.9016900403544386, "learning_rate": 3.7678916349443596e-07, "loss": 0.5816, "step": 28692 }, { "epoch": 0.8793980630133628, "grad_norm": 1.7727728125633548, "learning_rate": 3.766001700476685e-07, "loss": 0.4832, "step": 28693 }, { "epoch": 0.8794287115361039, "grad_norm": 0.836379787546283, "learning_rate": 3.7641122215644254e-07, "loss": 0.4233, "step": 28694 }, { "epoch": 0.8794593600588452, "grad_norm": 1.828253286808137, "learning_rate": 3.7622231982262057e-07, "loss": 0.5328, "step": 28695 }, { "epoch": 0.8794900085815863, "grad_norm": 1.9533856320382135, "learning_rate": 3.760334630480622e-07, "loss": 0.5689, "step": 28696 }, { "epoch": 0.8795206571043276, "grad_norm": 1.7271509583432687, "learning_rate": 3.7584465183462925e-07, "loss": 0.6388, "step": 28697 }, { "epoch": 0.8795513056270687, "grad_norm": 1.6831638501165624, "learning_rate": 3.7565588618418305e-07, "loss": 0.5335, "step": 28698 }, { "epoch": 0.87958195414981, "grad_norm": 1.8697398250655242, "learning_rate": 3.7546716609858146e-07, "loss": 0.4819, "step": 28699 }, { "epoch": 0.8796126026725511, "grad_norm": 2.0946422037290917, "learning_rate": 3.752784915796853e-07, "loss": 0.5713, "step": 28700 }, { "epoch": 0.8796432511952924, "grad_norm": 1.9423215841640047, "learning_rate": 3.750898626293542e-07, "loss": 0.5407, "step": 28701 }, { "epoch": 0.8796738997180336, "grad_norm": 1.980125689941204, "learning_rate": 3.749012792494455e-07, "loss": 0.5967, "step": 28702 }, { "epoch": 0.8797045482407748, "grad_norm": 1.9305597236955747, "learning_rate": 3.7471274144181836e-07, "loss": 0.5181, "step": 28703 }, { "epoch": 0.879735196763516, "grad_norm": 0.8284579845497634, "learning_rate": 3.745242492083284e-07, "loss": 0.3986, "step": 28704 }, { "epoch": 0.8797658452862572, "grad_norm": 1.6920985899321654, "learning_rate": 3.743358025508359e-07, "loss": 0.4979, "step": 28705 }, { "epoch": 0.8797964938089984, "grad_norm": 1.8138472998738, "learning_rate": 3.7414740147119653e-07, "loss": 0.5117, "step": 28706 }, { "epoch": 0.8798271423317396, "grad_norm": 1.9563525400146624, "learning_rate": 3.739590459712661e-07, "loss": 0.6968, "step": 28707 }, { "epoch": 0.8798577908544808, "grad_norm": 1.9401918228882624, "learning_rate": 3.7377073605290024e-07, "loss": 0.5585, "step": 28708 }, { "epoch": 0.879888439377222, "grad_norm": 0.7671270140037109, "learning_rate": 3.7358247171795593e-07, "loss": 0.3757, "step": 28709 }, { "epoch": 0.8799190878999632, "grad_norm": 1.8648831386187026, "learning_rate": 3.7339425296828603e-07, "loss": 0.5166, "step": 28710 }, { "epoch": 0.8799497364227045, "grad_norm": 0.8272022189306802, "learning_rate": 3.732060798057469e-07, "loss": 0.4086, "step": 28711 }, { "epoch": 0.8799803849454456, "grad_norm": 1.9835747244398148, "learning_rate": 3.730179522321925e-07, "loss": 0.6336, "step": 28712 }, { "epoch": 0.8800110334681869, "grad_norm": 1.881261610624436, "learning_rate": 3.728298702494754e-07, "loss": 0.6102, "step": 28713 }, { "epoch": 0.880041681990928, "grad_norm": 1.7924441152720367, "learning_rate": 3.726418338594506e-07, "loss": 0.6137, "step": 28714 }, { "epoch": 0.8800723305136693, "grad_norm": 1.9618572109663626, "learning_rate": 3.724538430639685e-07, "loss": 0.6133, "step": 28715 }, { "epoch": 0.8801029790364104, "grad_norm": 1.6935987896811795, "learning_rate": 3.72265897864883e-07, "loss": 0.4688, "step": 28716 }, { "epoch": 0.8801336275591517, "grad_norm": 2.2274022460144987, "learning_rate": 3.7207799826404603e-07, "loss": 0.6668, "step": 28717 }, { "epoch": 0.8801642760818928, "grad_norm": 1.969976875493802, "learning_rate": 3.7189014426330826e-07, "loss": 0.5101, "step": 28718 }, { "epoch": 0.8801949246046341, "grad_norm": 1.6073796392664388, "learning_rate": 3.717023358645211e-07, "loss": 0.5466, "step": 28719 }, { "epoch": 0.8802255731273753, "grad_norm": 2.901334750032524, "learning_rate": 3.715145730695358e-07, "loss": 0.6581, "step": 28720 }, { "epoch": 0.8802562216501164, "grad_norm": 1.8775649042123852, "learning_rate": 3.713268558802008e-07, "loss": 0.6057, "step": 28721 }, { "epoch": 0.8802868701728577, "grad_norm": 1.9238624143637164, "learning_rate": 3.711391842983675e-07, "loss": 0.5615, "step": 28722 }, { "epoch": 0.8803175186955988, "grad_norm": 2.176400808407551, "learning_rate": 3.709515583258821e-07, "loss": 0.614, "step": 28723 }, { "epoch": 0.8803481672183401, "grad_norm": 1.775779786499469, "learning_rate": 3.707639779645972e-07, "loss": 0.5571, "step": 28724 }, { "epoch": 0.8803788157410812, "grad_norm": 2.0335720706665117, "learning_rate": 3.705764432163594e-07, "loss": 0.6735, "step": 28725 }, { "epoch": 0.8804094642638225, "grad_norm": 1.9631997642102665, "learning_rate": 3.703889540830158e-07, "loss": 0.6659, "step": 28726 }, { "epoch": 0.8804401127865636, "grad_norm": 1.915842997921532, "learning_rate": 3.702015105664142e-07, "loss": 0.5834, "step": 28727 }, { "epoch": 0.8804707613093049, "grad_norm": 0.8276815564017774, "learning_rate": 3.70014112668402e-07, "loss": 0.4054, "step": 28728 }, { "epoch": 0.8805014098320461, "grad_norm": 1.866710535062723, "learning_rate": 3.698267603908251e-07, "loss": 0.5859, "step": 28729 }, { "epoch": 0.8805320583547873, "grad_norm": 1.686565157036055, "learning_rate": 3.696394537355297e-07, "loss": 0.6184, "step": 28730 }, { "epoch": 0.8805627068775285, "grad_norm": 1.9753286092151998, "learning_rate": 3.694521927043615e-07, "loss": 0.532, "step": 28731 }, { "epoch": 0.8805933554002697, "grad_norm": 0.8676250781119224, "learning_rate": 3.6926497729916633e-07, "loss": 0.4112, "step": 28732 }, { "epoch": 0.8806240039230109, "grad_norm": 1.7681684091971814, "learning_rate": 3.6907780752178877e-07, "loss": 0.5796, "step": 28733 }, { "epoch": 0.8806546524457521, "grad_norm": 1.854193857205674, "learning_rate": 3.688906833740702e-07, "loss": 0.6069, "step": 28734 }, { "epoch": 0.8806853009684933, "grad_norm": 1.8807925215372578, "learning_rate": 3.687036048578585e-07, "loss": 0.5478, "step": 28735 }, { "epoch": 0.8807159494912346, "grad_norm": 1.857412665902949, "learning_rate": 3.6851657197499503e-07, "loss": 0.5486, "step": 28736 }, { "epoch": 0.8807465980139757, "grad_norm": 1.8235242840077106, "learning_rate": 3.683295847273216e-07, "loss": 0.5621, "step": 28737 }, { "epoch": 0.880777246536717, "grad_norm": 1.873898328508797, "learning_rate": 3.6814264311668235e-07, "loss": 0.5863, "step": 28738 }, { "epoch": 0.8808078950594581, "grad_norm": 1.8119015292619471, "learning_rate": 3.6795574714491966e-07, "loss": 0.5839, "step": 28739 }, { "epoch": 0.8808385435821994, "grad_norm": 1.832670729006786, "learning_rate": 3.677688968138732e-07, "loss": 0.5537, "step": 28740 }, { "epoch": 0.8808691921049405, "grad_norm": 1.8036037922177797, "learning_rate": 3.675820921253848e-07, "loss": 0.605, "step": 28741 }, { "epoch": 0.8808998406276818, "grad_norm": 2.09180250508167, "learning_rate": 3.673953330812952e-07, "loss": 0.5706, "step": 28742 }, { "epoch": 0.8809304891504229, "grad_norm": 1.8864035593826776, "learning_rate": 3.6720861968344567e-07, "loss": 0.552, "step": 28743 }, { "epoch": 0.8809611376731642, "grad_norm": 1.9843688743780672, "learning_rate": 3.670219519336754e-07, "loss": 0.6263, "step": 28744 }, { "epoch": 0.8809917861959053, "grad_norm": 1.7957007271572696, "learning_rate": 3.668353298338212e-07, "loss": 0.4439, "step": 28745 }, { "epoch": 0.8810224347186466, "grad_norm": 1.7321538272331478, "learning_rate": 3.6664875338572546e-07, "loss": 0.5879, "step": 28746 }, { "epoch": 0.8810530832413878, "grad_norm": 0.8866726226103738, "learning_rate": 3.664622225912251e-07, "loss": 0.3799, "step": 28747 }, { "epoch": 0.881083731764129, "grad_norm": 1.6689793025235733, "learning_rate": 3.662757374521575e-07, "loss": 0.4367, "step": 28748 }, { "epoch": 0.8811143802868702, "grad_norm": 2.0026028466858596, "learning_rate": 3.660892979703601e-07, "loss": 0.6128, "step": 28749 }, { "epoch": 0.8811450288096114, "grad_norm": 2.382804308761761, "learning_rate": 3.6590290414767084e-07, "loss": 0.6187, "step": 28750 }, { "epoch": 0.8811756773323526, "grad_norm": 1.7842423248816037, "learning_rate": 3.6571655598592715e-07, "loss": 0.5277, "step": 28751 }, { "epoch": 0.8812063258550937, "grad_norm": 1.8616925126481485, "learning_rate": 3.6553025348696256e-07, "loss": 0.5391, "step": 28752 }, { "epoch": 0.881236974377835, "grad_norm": 1.967178995547843, "learning_rate": 3.6534399665261454e-07, "loss": 0.5347, "step": 28753 }, { "epoch": 0.8812676229005761, "grad_norm": 2.0353380393673466, "learning_rate": 3.6515778548471824e-07, "loss": 0.6862, "step": 28754 }, { "epoch": 0.8812982714233174, "grad_norm": 1.7115062218121333, "learning_rate": 3.6497161998510833e-07, "loss": 0.5563, "step": 28755 }, { "epoch": 0.8813289199460586, "grad_norm": 0.7984657244504502, "learning_rate": 3.6478550015561775e-07, "loss": 0.3913, "step": 28756 }, { "epoch": 0.8813595684687998, "grad_norm": 1.8938628193713065, "learning_rate": 3.6459942599808285e-07, "loss": 0.5538, "step": 28757 }, { "epoch": 0.881390216991541, "grad_norm": 2.0420679599026457, "learning_rate": 3.6441339751433546e-07, "loss": 0.6609, "step": 28758 }, { "epoch": 0.8814208655142822, "grad_norm": 0.8133192110145753, "learning_rate": 3.6422741470620913e-07, "loss": 0.3926, "step": 28759 }, { "epoch": 0.8814515140370234, "grad_norm": 2.009151228080707, "learning_rate": 3.640414775755358e-07, "loss": 0.612, "step": 28760 }, { "epoch": 0.8814821625597646, "grad_norm": 1.9214175772367494, "learning_rate": 3.638555861241477e-07, "loss": 0.5839, "step": 28761 }, { "epoch": 0.8815128110825058, "grad_norm": 2.013735503512586, "learning_rate": 3.636697403538775e-07, "loss": 0.6683, "step": 28762 }, { "epoch": 0.881543459605247, "grad_norm": 2.0654752080217755, "learning_rate": 3.634839402665552e-07, "loss": 0.657, "step": 28763 }, { "epoch": 0.8815741081279882, "grad_norm": 1.8920365167550655, "learning_rate": 3.632981858640117e-07, "loss": 0.6192, "step": 28764 }, { "epoch": 0.8816047566507295, "grad_norm": 1.7853245716555908, "learning_rate": 3.6311247714807815e-07, "loss": 0.5018, "step": 28765 }, { "epoch": 0.8816354051734706, "grad_norm": 1.8082568639729952, "learning_rate": 3.6292681412058384e-07, "loss": 0.5401, "step": 28766 }, { "epoch": 0.8816660536962119, "grad_norm": 1.7489058104067154, "learning_rate": 3.6274119678335775e-07, "loss": 0.4869, "step": 28767 }, { "epoch": 0.881696702218953, "grad_norm": 1.6757718934339796, "learning_rate": 3.625556251382284e-07, "loss": 0.5698, "step": 28768 }, { "epoch": 0.8817273507416943, "grad_norm": 1.780129618047362, "learning_rate": 3.623700991870255e-07, "loss": 0.5713, "step": 28769 }, { "epoch": 0.8817579992644354, "grad_norm": 1.6677466341643037, "learning_rate": 3.6218461893157753e-07, "loss": 0.5268, "step": 28770 }, { "epoch": 0.8817886477871767, "grad_norm": 2.190968620810849, "learning_rate": 3.619991843737097e-07, "loss": 0.6182, "step": 28771 }, { "epoch": 0.8818192963099178, "grad_norm": 1.8910636015217857, "learning_rate": 3.618137955152512e-07, "loss": 0.5437, "step": 28772 }, { "epoch": 0.8818499448326591, "grad_norm": 2.009655853401205, "learning_rate": 3.616284523580288e-07, "loss": 0.4857, "step": 28773 }, { "epoch": 0.8818805933554003, "grad_norm": 1.8902532956983387, "learning_rate": 3.6144315490386774e-07, "loss": 0.5697, "step": 28774 }, { "epoch": 0.8819112418781415, "grad_norm": 2.067741063158282, "learning_rate": 3.6125790315459263e-07, "loss": 0.5112, "step": 28775 }, { "epoch": 0.8819418904008827, "grad_norm": 2.226554097577595, "learning_rate": 3.610726971120321e-07, "loss": 0.5417, "step": 28776 }, { "epoch": 0.8819725389236239, "grad_norm": 1.798781168071757, "learning_rate": 3.608875367780079e-07, "loss": 0.5881, "step": 28777 }, { "epoch": 0.8820031874463651, "grad_norm": 0.7788541994912409, "learning_rate": 3.60702422154347e-07, "loss": 0.3932, "step": 28778 }, { "epoch": 0.8820338359691063, "grad_norm": 1.8342983211406627, "learning_rate": 3.605173532428713e-07, "loss": 0.5374, "step": 28779 }, { "epoch": 0.8820644844918475, "grad_norm": 0.7776914066212202, "learning_rate": 3.6033233004540534e-07, "loss": 0.3689, "step": 28780 }, { "epoch": 0.8820951330145888, "grad_norm": 1.727256293463583, "learning_rate": 3.601473525637728e-07, "loss": 0.4558, "step": 28781 }, { "epoch": 0.8821257815373299, "grad_norm": 1.9851573388290902, "learning_rate": 3.599624207997943e-07, "loss": 0.5936, "step": 28782 }, { "epoch": 0.882156430060071, "grad_norm": 1.6275421444551401, "learning_rate": 3.597775347552934e-07, "loss": 0.4869, "step": 28783 }, { "epoch": 0.8821870785828123, "grad_norm": 1.994222214473163, "learning_rate": 3.5959269443209267e-07, "loss": 0.6772, "step": 28784 }, { "epoch": 0.8822177271055535, "grad_norm": 1.984930805907903, "learning_rate": 3.5940789983201274e-07, "loss": 0.561, "step": 28785 }, { "epoch": 0.8822483756282947, "grad_norm": 1.9489416869155025, "learning_rate": 3.592231509568722e-07, "loss": 0.6048, "step": 28786 }, { "epoch": 0.8822790241510359, "grad_norm": 1.8471957452957724, "learning_rate": 3.5903844780849464e-07, "loss": 0.5571, "step": 28787 }, { "epoch": 0.8823096726737771, "grad_norm": 1.9744799542796496, "learning_rate": 3.588537903886985e-07, "loss": 0.536, "step": 28788 }, { "epoch": 0.8823403211965183, "grad_norm": 1.8743903840009741, "learning_rate": 3.5866917869930405e-07, "loss": 0.5832, "step": 28789 }, { "epoch": 0.8823709697192595, "grad_norm": 1.8841929792843322, "learning_rate": 3.584846127421288e-07, "loss": 0.5645, "step": 28790 }, { "epoch": 0.8824016182420007, "grad_norm": 1.8474784889677964, "learning_rate": 3.583000925189922e-07, "loss": 0.5825, "step": 28791 }, { "epoch": 0.882432266764742, "grad_norm": 1.6535382532827703, "learning_rate": 3.5811561803171304e-07, "loss": 0.5076, "step": 28792 }, { "epoch": 0.8824629152874831, "grad_norm": 1.7494997793043927, "learning_rate": 3.5793118928210803e-07, "loss": 0.6175, "step": 28793 }, { "epoch": 0.8824935638102244, "grad_norm": 1.7351823851702375, "learning_rate": 3.577468062719941e-07, "loss": 0.5056, "step": 28794 }, { "epoch": 0.8825242123329655, "grad_norm": 1.8884016962544816, "learning_rate": 3.5756246900319034e-07, "loss": 0.5803, "step": 28795 }, { "epoch": 0.8825548608557068, "grad_norm": 1.9147949171360419, "learning_rate": 3.573781774775098e-07, "loss": 0.5443, "step": 28796 }, { "epoch": 0.8825855093784479, "grad_norm": 1.7490303609308058, "learning_rate": 3.571939316967704e-07, "loss": 0.4961, "step": 28797 }, { "epoch": 0.8826161579011892, "grad_norm": 2.0063971150148636, "learning_rate": 3.5700973166278795e-07, "loss": 0.4422, "step": 28798 }, { "epoch": 0.8826468064239303, "grad_norm": 1.8363055889167468, "learning_rate": 3.5682557737737546e-07, "loss": 0.5604, "step": 28799 }, { "epoch": 0.8826774549466716, "grad_norm": 2.2661767134366273, "learning_rate": 3.566414688423492e-07, "loss": 0.603, "step": 28800 }, { "epoch": 0.8827081034694128, "grad_norm": 1.8172771417720226, "learning_rate": 3.564574060595222e-07, "loss": 0.6139, "step": 28801 }, { "epoch": 0.882738751992154, "grad_norm": 2.1046985053688476, "learning_rate": 3.562733890307085e-07, "loss": 0.6322, "step": 28802 }, { "epoch": 0.8827694005148952, "grad_norm": 1.7611099061785351, "learning_rate": 3.5608941775772175e-07, "loss": 0.5673, "step": 28803 }, { "epoch": 0.8828000490376364, "grad_norm": 1.8005234476534968, "learning_rate": 3.559054922423738e-07, "loss": 0.5233, "step": 28804 }, { "epoch": 0.8828306975603776, "grad_norm": 1.8592668848409324, "learning_rate": 3.5572161248647705e-07, "loss": 0.5564, "step": 28805 }, { "epoch": 0.8828613460831188, "grad_norm": 1.8058981345156169, "learning_rate": 3.5553777849184403e-07, "loss": 0.5305, "step": 28806 }, { "epoch": 0.88289199460586, "grad_norm": 0.8211679223280414, "learning_rate": 3.5535399026028537e-07, "loss": 0.4089, "step": 28807 }, { "epoch": 0.8829226431286012, "grad_norm": 1.980427760587801, "learning_rate": 3.55170247793612e-07, "loss": 0.6311, "step": 28808 }, { "epoch": 0.8829532916513424, "grad_norm": 1.965863230604797, "learning_rate": 3.549865510936351e-07, "loss": 0.4957, "step": 28809 }, { "epoch": 0.8829839401740837, "grad_norm": 1.852388692206327, "learning_rate": 3.548029001621639e-07, "loss": 0.5436, "step": 28810 }, { "epoch": 0.8830145886968248, "grad_norm": 1.68373977531125, "learning_rate": 3.5461929500100857e-07, "loss": 0.6034, "step": 28811 }, { "epoch": 0.8830452372195661, "grad_norm": 1.8620072593752222, "learning_rate": 3.5443573561197763e-07, "loss": 0.5407, "step": 28812 }, { "epoch": 0.8830758857423072, "grad_norm": 1.9631275936011328, "learning_rate": 3.542522219968797e-07, "loss": 0.5708, "step": 28813 }, { "epoch": 0.8831065342650484, "grad_norm": 1.9674215604491727, "learning_rate": 3.5406875415752386e-07, "loss": 0.6657, "step": 28814 }, { "epoch": 0.8831371827877896, "grad_norm": 0.8198461739639469, "learning_rate": 3.5388533209571696e-07, "loss": 0.4027, "step": 28815 }, { "epoch": 0.8831678313105308, "grad_norm": 1.7679954629778254, "learning_rate": 3.537019558132665e-07, "loss": 0.6446, "step": 28816 }, { "epoch": 0.883198479833272, "grad_norm": 0.7910478703215732, "learning_rate": 3.535186253119799e-07, "loss": 0.3878, "step": 28817 }, { "epoch": 0.8832291283560132, "grad_norm": 1.846256719689068, "learning_rate": 3.5333534059366294e-07, "loss": 0.5606, "step": 28818 }, { "epoch": 0.8832597768787545, "grad_norm": 1.6809401970156777, "learning_rate": 3.5315210166012195e-07, "loss": 0.4687, "step": 28819 }, { "epoch": 0.8832904254014956, "grad_norm": 1.9053788703378949, "learning_rate": 3.5296890851316154e-07, "loss": 0.6112, "step": 28820 }, { "epoch": 0.8833210739242369, "grad_norm": 1.83080199997713, "learning_rate": 3.5278576115458817e-07, "loss": 0.5752, "step": 28821 }, { "epoch": 0.883351722446978, "grad_norm": 1.8968304433218397, "learning_rate": 3.5260265958620586e-07, "loss": 0.5796, "step": 28822 }, { "epoch": 0.8833823709697193, "grad_norm": 1.8832574771480757, "learning_rate": 3.524196038098182e-07, "loss": 0.6575, "step": 28823 }, { "epoch": 0.8834130194924604, "grad_norm": 0.782139397514332, "learning_rate": 3.5223659382722875e-07, "loss": 0.3735, "step": 28824 }, { "epoch": 0.8834436680152017, "grad_norm": 0.7928637330362747, "learning_rate": 3.520536296402427e-07, "loss": 0.4074, "step": 28825 }, { "epoch": 0.8834743165379428, "grad_norm": 1.772679758004928, "learning_rate": 3.518707112506603e-07, "loss": 0.5388, "step": 28826 }, { "epoch": 0.8835049650606841, "grad_norm": 0.8173263080504118, "learning_rate": 3.516878386602857e-07, "loss": 0.393, "step": 28827 }, { "epoch": 0.8835356135834253, "grad_norm": 1.8597009420455732, "learning_rate": 3.5150501187092013e-07, "loss": 0.5731, "step": 28828 }, { "epoch": 0.8835662621061665, "grad_norm": 2.0687004105726645, "learning_rate": 3.51322230884365e-07, "loss": 0.6489, "step": 28829 }, { "epoch": 0.8835969106289077, "grad_norm": 1.7138204036981992, "learning_rate": 3.511394957024217e-07, "loss": 0.5719, "step": 28830 }, { "epoch": 0.8836275591516489, "grad_norm": 1.828252531449769, "learning_rate": 3.5095680632688867e-07, "loss": 0.5439, "step": 28831 }, { "epoch": 0.8836582076743901, "grad_norm": 1.7788381100468185, "learning_rate": 3.5077416275956956e-07, "loss": 0.5687, "step": 28832 }, { "epoch": 0.8836888561971313, "grad_norm": 1.882352535504446, "learning_rate": 3.5059156500226235e-07, "loss": 0.6606, "step": 28833 }, { "epoch": 0.8837195047198725, "grad_norm": 1.995701671943256, "learning_rate": 3.50409013056765e-07, "loss": 0.5905, "step": 28834 }, { "epoch": 0.8837501532426137, "grad_norm": 2.13693122972275, "learning_rate": 3.5022650692487725e-07, "loss": 0.6525, "step": 28835 }, { "epoch": 0.8837808017653549, "grad_norm": 1.9727400734218603, "learning_rate": 3.500440466083982e-07, "loss": 0.6329, "step": 28836 }, { "epoch": 0.8838114502880962, "grad_norm": 1.8771275641335072, "learning_rate": 3.498616321091242e-07, "loss": 0.5518, "step": 28837 }, { "epoch": 0.8838420988108373, "grad_norm": 0.7774440170531215, "learning_rate": 3.4967926342885317e-07, "loss": 0.385, "step": 28838 }, { "epoch": 0.8838727473335786, "grad_norm": 1.962377751595147, "learning_rate": 3.4949694056938324e-07, "loss": 0.5575, "step": 28839 }, { "epoch": 0.8839033958563197, "grad_norm": 2.1261043690467005, "learning_rate": 3.49314663532509e-07, "loss": 0.5667, "step": 28840 }, { "epoch": 0.883934044379061, "grad_norm": 0.7904967840475449, "learning_rate": 3.4913243232002846e-07, "loss": 0.3915, "step": 28841 }, { "epoch": 0.8839646929018021, "grad_norm": 2.067308413262351, "learning_rate": 3.489502469337336e-07, "loss": 0.5153, "step": 28842 }, { "epoch": 0.8839953414245434, "grad_norm": 1.9126517591964742, "learning_rate": 3.48768107375424e-07, "loss": 0.668, "step": 28843 }, { "epoch": 0.8840259899472845, "grad_norm": 1.5955394830864174, "learning_rate": 3.485860136468927e-07, "loss": 0.5652, "step": 28844 }, { "epoch": 0.8840566384700257, "grad_norm": 1.8063278358713157, "learning_rate": 3.4840396574993217e-07, "loss": 0.5163, "step": 28845 }, { "epoch": 0.884087286992767, "grad_norm": 0.7693458593811393, "learning_rate": 3.4822196368633767e-07, "loss": 0.3842, "step": 28846 }, { "epoch": 0.8841179355155081, "grad_norm": 1.9821480139421235, "learning_rate": 3.480400074579032e-07, "loss": 0.5644, "step": 28847 }, { "epoch": 0.8841485840382494, "grad_norm": 1.672274829778345, "learning_rate": 3.4785809706642027e-07, "loss": 0.5223, "step": 28848 }, { "epoch": 0.8841792325609905, "grad_norm": 0.7847595729962399, "learning_rate": 3.476762325136812e-07, "loss": 0.3993, "step": 28849 }, { "epoch": 0.8842098810837318, "grad_norm": 2.034225151133813, "learning_rate": 3.4749441380147906e-07, "loss": 0.5457, "step": 28850 }, { "epoch": 0.8842405296064729, "grad_norm": 0.8204576689309776, "learning_rate": 3.4731264093160574e-07, "loss": 0.4003, "step": 28851 }, { "epoch": 0.8842711781292142, "grad_norm": 1.8804280609348552, "learning_rate": 3.4713091390585096e-07, "loss": 0.5687, "step": 28852 }, { "epoch": 0.8843018266519553, "grad_norm": 1.9931024259135914, "learning_rate": 3.469492327260043e-07, "loss": 0.6538, "step": 28853 }, { "epoch": 0.8843324751746966, "grad_norm": 1.9682166329601585, "learning_rate": 3.4676759739385946e-07, "loss": 0.6266, "step": 28854 }, { "epoch": 0.8843631236974377, "grad_norm": 0.8267891163079384, "learning_rate": 3.465860079112032e-07, "loss": 0.4041, "step": 28855 }, { "epoch": 0.884393772220179, "grad_norm": 1.7953336036854568, "learning_rate": 3.464044642798259e-07, "loss": 0.5081, "step": 28856 }, { "epoch": 0.8844244207429202, "grad_norm": 1.9567778560834692, "learning_rate": 3.4622296650151545e-07, "loss": 0.5861, "step": 28857 }, { "epoch": 0.8844550692656614, "grad_norm": 1.8398090213649063, "learning_rate": 3.460415145780605e-07, "loss": 0.5528, "step": 28858 }, { "epoch": 0.8844857177884026, "grad_norm": 1.9457008929825945, "learning_rate": 3.4586010851125063e-07, "loss": 0.6117, "step": 28859 }, { "epoch": 0.8845163663111438, "grad_norm": 2.2291741268521874, "learning_rate": 3.4567874830287116e-07, "loss": 0.63, "step": 28860 }, { "epoch": 0.884547014833885, "grad_norm": 2.1157566526763643, "learning_rate": 3.454974339547096e-07, "loss": 0.5535, "step": 28861 }, { "epoch": 0.8845776633566262, "grad_norm": 1.7927765601942867, "learning_rate": 3.453161654685533e-07, "loss": 0.5273, "step": 28862 }, { "epoch": 0.8846083118793674, "grad_norm": 1.9320617828684248, "learning_rate": 3.451349428461881e-07, "loss": 0.6576, "step": 28863 }, { "epoch": 0.8846389604021087, "grad_norm": 1.865323521632812, "learning_rate": 3.449537660893987e-07, "loss": 0.6145, "step": 28864 }, { "epoch": 0.8846696089248498, "grad_norm": 2.3065430776986737, "learning_rate": 3.447726351999703e-07, "loss": 0.6012, "step": 28865 }, { "epoch": 0.8847002574475911, "grad_norm": 1.9973336625309321, "learning_rate": 3.4459155017968925e-07, "loss": 0.6007, "step": 28866 }, { "epoch": 0.8847309059703322, "grad_norm": 1.909393612781189, "learning_rate": 3.4441051103033807e-07, "loss": 0.5537, "step": 28867 }, { "epoch": 0.8847615544930735, "grad_norm": 1.7650707471802451, "learning_rate": 3.442295177537014e-07, "loss": 0.5647, "step": 28868 }, { "epoch": 0.8847922030158146, "grad_norm": 2.0584953006811415, "learning_rate": 3.4404857035156226e-07, "loss": 0.6598, "step": 28869 }, { "epoch": 0.8848228515385559, "grad_norm": 1.7557727576856832, "learning_rate": 3.438676688257053e-07, "loss": 0.5618, "step": 28870 }, { "epoch": 0.884853500061297, "grad_norm": 1.8551261737663953, "learning_rate": 3.4368681317791086e-07, "loss": 0.575, "step": 28871 }, { "epoch": 0.8848841485840383, "grad_norm": 2.190673437447005, "learning_rate": 3.4350600340996023e-07, "loss": 0.6167, "step": 28872 }, { "epoch": 0.8849147971067794, "grad_norm": 2.1372939959444537, "learning_rate": 3.433252395236381e-07, "loss": 0.6255, "step": 28873 }, { "epoch": 0.8849454456295207, "grad_norm": 2.4132063876566807, "learning_rate": 3.4314452152072354e-07, "loss": 0.5104, "step": 28874 }, { "epoch": 0.8849760941522619, "grad_norm": 0.7939650877787471, "learning_rate": 3.4296384940299687e-07, "loss": 0.3863, "step": 28875 }, { "epoch": 0.885006742675003, "grad_norm": 1.7978663813028786, "learning_rate": 3.427832231722389e-07, "loss": 0.4622, "step": 28876 }, { "epoch": 0.8850373911977443, "grad_norm": 0.8110800643459795, "learning_rate": 3.4260264283022926e-07, "loss": 0.3931, "step": 28877 }, { "epoch": 0.8850680397204854, "grad_norm": 2.175052734635789, "learning_rate": 3.4242210837874876e-07, "loss": 0.5285, "step": 28878 }, { "epoch": 0.8850986882432267, "grad_norm": 1.9469960441262322, "learning_rate": 3.422416198195738e-07, "loss": 0.5769, "step": 28879 }, { "epoch": 0.8851293367659678, "grad_norm": 1.773107667561283, "learning_rate": 3.420611771544835e-07, "loss": 0.5835, "step": 28880 }, { "epoch": 0.8851599852887091, "grad_norm": 1.8112891464844663, "learning_rate": 3.418807803852575e-07, "loss": 0.5168, "step": 28881 }, { "epoch": 0.8851906338114502, "grad_norm": 1.7863973956866381, "learning_rate": 3.4170042951367224e-07, "loss": 0.5863, "step": 28882 }, { "epoch": 0.8852212823341915, "grad_norm": 0.8310185698966569, "learning_rate": 3.415201245415023e-07, "loss": 0.4072, "step": 28883 }, { "epoch": 0.8852519308569327, "grad_norm": 0.8023775671561039, "learning_rate": 3.4133986547052855e-07, "loss": 0.408, "step": 28884 }, { "epoch": 0.8852825793796739, "grad_norm": 1.7234956815432487, "learning_rate": 3.4115965230252404e-07, "loss": 0.4582, "step": 28885 }, { "epoch": 0.8853132279024151, "grad_norm": 0.7965915084710973, "learning_rate": 3.4097948503926613e-07, "loss": 0.3807, "step": 28886 }, { "epoch": 0.8853438764251563, "grad_norm": 1.9625676510415193, "learning_rate": 3.407993636825291e-07, "loss": 0.5672, "step": 28887 }, { "epoch": 0.8853745249478975, "grad_norm": 2.054292024234447, "learning_rate": 3.406192882340875e-07, "loss": 0.6071, "step": 28888 }, { "epoch": 0.8854051734706387, "grad_norm": 1.8978879122807937, "learning_rate": 3.4043925869571724e-07, "loss": 0.6476, "step": 28889 }, { "epoch": 0.8854358219933799, "grad_norm": 1.6962235919246855, "learning_rate": 3.4025927506919075e-07, "loss": 0.5493, "step": 28890 }, { "epoch": 0.8854664705161212, "grad_norm": 2.027507379750177, "learning_rate": 3.4007933735628163e-07, "loss": 0.6042, "step": 28891 }, { "epoch": 0.8854971190388623, "grad_norm": 0.7869686397740563, "learning_rate": 3.398994455587634e-07, "loss": 0.3783, "step": 28892 }, { "epoch": 0.8855277675616036, "grad_norm": 1.786588942627675, "learning_rate": 3.397195996784092e-07, "loss": 0.5943, "step": 28893 }, { "epoch": 0.8855584160843447, "grad_norm": 2.0178439308050002, "learning_rate": 3.39539799716988e-07, "loss": 0.5535, "step": 28894 }, { "epoch": 0.885589064607086, "grad_norm": 1.9429443235307005, "learning_rate": 3.3936004567627523e-07, "loss": 0.491, "step": 28895 }, { "epoch": 0.8856197131298271, "grad_norm": 0.781733764586263, "learning_rate": 3.391803375580394e-07, "loss": 0.3989, "step": 28896 }, { "epoch": 0.8856503616525684, "grad_norm": 1.9036958046392785, "learning_rate": 3.3900067536405346e-07, "loss": 0.5095, "step": 28897 }, { "epoch": 0.8856810101753095, "grad_norm": 1.8960075811741655, "learning_rate": 3.3882105909608497e-07, "loss": 0.5598, "step": 28898 }, { "epoch": 0.8857116586980508, "grad_norm": 1.8747398927554595, "learning_rate": 3.386414887559059e-07, "loss": 0.5753, "step": 28899 }, { "epoch": 0.885742307220792, "grad_norm": 0.7869018956181375, "learning_rate": 3.384619643452852e-07, "loss": 0.394, "step": 28900 }, { "epoch": 0.8857729557435332, "grad_norm": 0.846780397097301, "learning_rate": 3.3828248586599113e-07, "loss": 0.3931, "step": 28901 }, { "epoch": 0.8858036042662744, "grad_norm": 1.8375768587223384, "learning_rate": 3.381030533197921e-07, "loss": 0.5829, "step": 28902 }, { "epoch": 0.8858342527890156, "grad_norm": 1.6789877550686059, "learning_rate": 3.379236667084573e-07, "loss": 0.5472, "step": 28903 }, { "epoch": 0.8858649013117568, "grad_norm": 1.8937611057691655, "learning_rate": 3.377443260337532e-07, "loss": 0.5513, "step": 28904 }, { "epoch": 0.885895549834498, "grad_norm": 1.864353241627941, "learning_rate": 3.375650312974466e-07, "loss": 0.5458, "step": 28905 }, { "epoch": 0.8859261983572392, "grad_norm": 0.7894753702202391, "learning_rate": 3.3738578250130547e-07, "loss": 0.3951, "step": 28906 }, { "epoch": 0.8859568468799803, "grad_norm": 1.9401732883897658, "learning_rate": 3.372065796470947e-07, "loss": 0.4783, "step": 28907 }, { "epoch": 0.8859874954027216, "grad_norm": 1.7914869221825485, "learning_rate": 3.370274227365811e-07, "loss": 0.5285, "step": 28908 }, { "epoch": 0.8860181439254627, "grad_norm": 0.8514817196136353, "learning_rate": 3.3684831177152876e-07, "loss": 0.4054, "step": 28909 }, { "epoch": 0.886048792448204, "grad_norm": 1.943011324296612, "learning_rate": 3.3666924675370307e-07, "loss": 0.548, "step": 28910 }, { "epoch": 0.8860794409709452, "grad_norm": 1.6601629472334165, "learning_rate": 3.3649022768486917e-07, "loss": 0.4874, "step": 28911 }, { "epoch": 0.8861100894936864, "grad_norm": 2.0867875253624706, "learning_rate": 3.363112545667896e-07, "loss": 0.5768, "step": 28912 }, { "epoch": 0.8861407380164276, "grad_norm": 1.8289127227589266, "learning_rate": 3.361323274012279e-07, "loss": 0.5296, "step": 28913 }, { "epoch": 0.8861713865391688, "grad_norm": 1.881912974710792, "learning_rate": 3.359534461899494e-07, "loss": 0.5932, "step": 28914 }, { "epoch": 0.88620203506191, "grad_norm": 2.511327577627848, "learning_rate": 3.3577461093471376e-07, "loss": 0.7267, "step": 28915 }, { "epoch": 0.8862326835846512, "grad_norm": 1.8299224541224273, "learning_rate": 3.3559582163728456e-07, "loss": 0.5824, "step": 28916 }, { "epoch": 0.8862633321073924, "grad_norm": 1.9875105439824137, "learning_rate": 3.3541707829942314e-07, "loss": 0.5115, "step": 28917 }, { "epoch": 0.8862939806301336, "grad_norm": 2.0945536108247786, "learning_rate": 3.352383809228904e-07, "loss": 0.536, "step": 28918 }, { "epoch": 0.8863246291528748, "grad_norm": 2.091546213430536, "learning_rate": 3.350597295094482e-07, "loss": 0.5445, "step": 28919 }, { "epoch": 0.8863552776756161, "grad_norm": 1.770522094459845, "learning_rate": 3.348811240608552e-07, "loss": 0.5274, "step": 28920 }, { "epoch": 0.8863859261983572, "grad_norm": 1.8453384353000615, "learning_rate": 3.347025645788726e-07, "loss": 0.5821, "step": 28921 }, { "epoch": 0.8864165747210985, "grad_norm": 1.7944564772210196, "learning_rate": 3.345240510652592e-07, "loss": 0.6545, "step": 28922 }, { "epoch": 0.8864472232438396, "grad_norm": 1.7774518904237924, "learning_rate": 3.3434558352177403e-07, "loss": 0.5611, "step": 28923 }, { "epoch": 0.8864778717665809, "grad_norm": 2.4397661572656024, "learning_rate": 3.341671619501752e-07, "loss": 0.5324, "step": 28924 }, { "epoch": 0.886508520289322, "grad_norm": 2.029662475303819, "learning_rate": 3.339887863522223e-07, "loss": 0.6394, "step": 28925 }, { "epoch": 0.8865391688120633, "grad_norm": 1.9791284753478031, "learning_rate": 3.338104567296707e-07, "loss": 0.6311, "step": 28926 }, { "epoch": 0.8865698173348044, "grad_norm": 1.8556122112597473, "learning_rate": 3.33632173084279e-07, "loss": 0.5795, "step": 28927 }, { "epoch": 0.8866004658575457, "grad_norm": 1.7899587560376602, "learning_rate": 3.334539354178029e-07, "loss": 0.5795, "step": 28928 }, { "epoch": 0.8866311143802869, "grad_norm": 1.7032223850657624, "learning_rate": 3.3327574373199946e-07, "loss": 0.463, "step": 28929 }, { "epoch": 0.8866617629030281, "grad_norm": 0.8047722578422246, "learning_rate": 3.3309759802862496e-07, "loss": 0.3936, "step": 28930 }, { "epoch": 0.8866924114257693, "grad_norm": 1.9358634163365862, "learning_rate": 3.329194983094325e-07, "loss": 0.6525, "step": 28931 }, { "epoch": 0.8867230599485105, "grad_norm": 1.9387194155016003, "learning_rate": 3.3274144457617897e-07, "loss": 0.6184, "step": 28932 }, { "epoch": 0.8867537084712517, "grad_norm": 1.7129824238132496, "learning_rate": 3.3256343683061854e-07, "loss": 0.5308, "step": 28933 }, { "epoch": 0.8867843569939929, "grad_norm": 2.0211389259699284, "learning_rate": 3.3238547507450425e-07, "loss": 0.5194, "step": 28934 }, { "epoch": 0.8868150055167341, "grad_norm": 1.929366615997866, "learning_rate": 3.3220755930959025e-07, "loss": 0.511, "step": 28935 }, { "epoch": 0.8868456540394754, "grad_norm": 0.8635747213541562, "learning_rate": 3.3202968953763015e-07, "loss": 0.432, "step": 28936 }, { "epoch": 0.8868763025622165, "grad_norm": 0.8145113125074753, "learning_rate": 3.3185186576037474e-07, "loss": 0.406, "step": 28937 }, { "epoch": 0.8869069510849577, "grad_norm": 1.6964154624893713, "learning_rate": 3.316740879795782e-07, "loss": 0.4908, "step": 28938 }, { "epoch": 0.8869375996076989, "grad_norm": 1.6828082602539274, "learning_rate": 3.3149635619699026e-07, "loss": 0.6121, "step": 28939 }, { "epoch": 0.8869682481304401, "grad_norm": 1.9328464872176194, "learning_rate": 3.3131867041436394e-07, "loss": 0.6544, "step": 28940 }, { "epoch": 0.8869988966531813, "grad_norm": 2.042680729067915, "learning_rate": 3.3114103063345006e-07, "loss": 0.5893, "step": 28941 }, { "epoch": 0.8870295451759225, "grad_norm": 1.9634249516322158, "learning_rate": 3.3096343685599717e-07, "loss": 0.5555, "step": 28942 }, { "epoch": 0.8870601936986637, "grad_norm": 1.9292713315578298, "learning_rate": 3.3078588908375565e-07, "loss": 0.5311, "step": 28943 }, { "epoch": 0.8870908422214049, "grad_norm": 2.055311608315351, "learning_rate": 3.3060838731847676e-07, "loss": 0.5996, "step": 28944 }, { "epoch": 0.8871214907441461, "grad_norm": 1.9378832775888086, "learning_rate": 3.3043093156190754e-07, "loss": 0.6266, "step": 28945 }, { "epoch": 0.8871521392668873, "grad_norm": 1.9378447417456952, "learning_rate": 3.302535218157965e-07, "loss": 0.4873, "step": 28946 }, { "epoch": 0.8871827877896286, "grad_norm": 2.046594704974327, "learning_rate": 3.300761580818934e-07, "loss": 0.6141, "step": 28947 }, { "epoch": 0.8872134363123697, "grad_norm": 1.9856485673844226, "learning_rate": 3.298988403619441e-07, "loss": 0.5827, "step": 28948 }, { "epoch": 0.887244084835111, "grad_norm": 2.0527820538927246, "learning_rate": 3.297215686576971e-07, "loss": 0.6251, "step": 28949 }, { "epoch": 0.8872747333578521, "grad_norm": 1.761532070881029, "learning_rate": 3.2954434297089775e-07, "loss": 0.5667, "step": 28950 }, { "epoch": 0.8873053818805934, "grad_norm": 0.7923468555954134, "learning_rate": 3.29367163303293e-07, "loss": 0.3953, "step": 28951 }, { "epoch": 0.8873360304033345, "grad_norm": 1.9258018995010464, "learning_rate": 3.2919002965662915e-07, "loss": 0.5356, "step": 28952 }, { "epoch": 0.8873666789260758, "grad_norm": 2.094367373066047, "learning_rate": 3.2901294203265046e-07, "loss": 0.5081, "step": 28953 }, { "epoch": 0.8873973274488169, "grad_norm": 1.94350636010397, "learning_rate": 3.288359004331021e-07, "loss": 0.4917, "step": 28954 }, { "epoch": 0.8874279759715582, "grad_norm": 1.6638563857690718, "learning_rate": 3.2865890485972995e-07, "loss": 0.4631, "step": 28955 }, { "epoch": 0.8874586244942994, "grad_norm": 2.0108908995063657, "learning_rate": 3.2848195531427594e-07, "loss": 0.5789, "step": 28956 }, { "epoch": 0.8874892730170406, "grad_norm": 0.8166272419140229, "learning_rate": 3.2830505179848425e-07, "loss": 0.4039, "step": 28957 }, { "epoch": 0.8875199215397818, "grad_norm": 2.008877045289778, "learning_rate": 3.281281943140985e-07, "loss": 0.557, "step": 28958 }, { "epoch": 0.887550570062523, "grad_norm": 2.013818900083544, "learning_rate": 3.279513828628611e-07, "loss": 0.5376, "step": 28959 }, { "epoch": 0.8875812185852642, "grad_norm": 2.2027441896565816, "learning_rate": 3.2777461744651516e-07, "loss": 0.4342, "step": 28960 }, { "epoch": 0.8876118671080054, "grad_norm": 1.7382378856645497, "learning_rate": 3.2759789806679987e-07, "loss": 0.3804, "step": 28961 }, { "epoch": 0.8876425156307466, "grad_norm": 2.0172973810891657, "learning_rate": 3.2742122472545825e-07, "loss": 0.4881, "step": 28962 }, { "epoch": 0.8876731641534878, "grad_norm": 1.8309814259854873, "learning_rate": 3.272445974242311e-07, "loss": 0.4639, "step": 28963 }, { "epoch": 0.887703812676229, "grad_norm": 2.125981181742931, "learning_rate": 3.2706801616485816e-07, "loss": 0.6077, "step": 28964 }, { "epoch": 0.8877344611989703, "grad_norm": 1.6922378419601438, "learning_rate": 3.268914809490797e-07, "loss": 0.5768, "step": 28965 }, { "epoch": 0.8877651097217114, "grad_norm": 1.9668405635706747, "learning_rate": 3.267149917786361e-07, "loss": 0.6488, "step": 28966 }, { "epoch": 0.8877957582444527, "grad_norm": 2.2172711913884875, "learning_rate": 3.2653854865526414e-07, "loss": 0.5656, "step": 28967 }, { "epoch": 0.8878264067671938, "grad_norm": 0.8306824386344192, "learning_rate": 3.263621515807047e-07, "loss": 0.4059, "step": 28968 }, { "epoch": 0.887857055289935, "grad_norm": 1.7830071241762653, "learning_rate": 3.2618580055669313e-07, "loss": 0.4889, "step": 28969 }, { "epoch": 0.8878877038126762, "grad_norm": 2.147971835047823, "learning_rate": 3.2600949558497076e-07, "loss": 0.5706, "step": 28970 }, { "epoch": 0.8879183523354174, "grad_norm": 1.8091317963017624, "learning_rate": 3.2583323666727174e-07, "loss": 0.5305, "step": 28971 }, { "epoch": 0.8879490008581586, "grad_norm": 1.770124908448919, "learning_rate": 3.256570238053336e-07, "loss": 0.5202, "step": 28972 }, { "epoch": 0.8879796493808998, "grad_norm": 1.772519720013148, "learning_rate": 3.2548085700089273e-07, "loss": 0.5971, "step": 28973 }, { "epoch": 0.888010297903641, "grad_norm": 1.827547184666738, "learning_rate": 3.2530473625568606e-07, "loss": 0.5218, "step": 28974 }, { "epoch": 0.8880409464263822, "grad_norm": 1.6985786578021274, "learning_rate": 3.251286615714466e-07, "loss": 0.5276, "step": 28975 }, { "epoch": 0.8880715949491235, "grad_norm": 1.932631515907652, "learning_rate": 3.2495263294991084e-07, "loss": 0.4807, "step": 28976 }, { "epoch": 0.8881022434718646, "grad_norm": 1.7831731810877096, "learning_rate": 3.247766503928129e-07, "loss": 0.6862, "step": 28977 }, { "epoch": 0.8881328919946059, "grad_norm": 1.9914966363737243, "learning_rate": 3.246007139018875e-07, "loss": 0.5119, "step": 28978 }, { "epoch": 0.888163540517347, "grad_norm": 1.8728180396198404, "learning_rate": 3.244248234788677e-07, "loss": 0.5554, "step": 28979 }, { "epoch": 0.8881941890400883, "grad_norm": 1.7548979336185042, "learning_rate": 3.242489791254849e-07, "loss": 0.5391, "step": 28980 }, { "epoch": 0.8882248375628294, "grad_norm": 1.8842432314026316, "learning_rate": 3.2407318084347494e-07, "loss": 0.5522, "step": 28981 }, { "epoch": 0.8882554860855707, "grad_norm": 1.7273461888424073, "learning_rate": 3.238974286345681e-07, "loss": 0.5947, "step": 28982 }, { "epoch": 0.8882861346083119, "grad_norm": 1.8567774365782619, "learning_rate": 3.2372172250049513e-07, "loss": 0.4994, "step": 28983 }, { "epoch": 0.8883167831310531, "grad_norm": 2.1664242591325142, "learning_rate": 3.2354606244298925e-07, "loss": 0.592, "step": 28984 }, { "epoch": 0.8883474316537943, "grad_norm": 2.036312011180408, "learning_rate": 3.233704484637801e-07, "loss": 0.6024, "step": 28985 }, { "epoch": 0.8883780801765355, "grad_norm": 2.37341662414432, "learning_rate": 3.23194880564599e-07, "loss": 0.6261, "step": 28986 }, { "epoch": 0.8884087286992767, "grad_norm": 1.869144205545394, "learning_rate": 3.2301935874717527e-07, "loss": 0.5828, "step": 28987 }, { "epoch": 0.8884393772220179, "grad_norm": 2.0904654542411545, "learning_rate": 3.22843883013238e-07, "loss": 0.602, "step": 28988 }, { "epoch": 0.8884700257447591, "grad_norm": 0.8221834292918085, "learning_rate": 3.2266845336451747e-07, "loss": 0.406, "step": 28989 }, { "epoch": 0.8885006742675003, "grad_norm": 1.8373531497045148, "learning_rate": 3.224930698027412e-07, "loss": 0.4296, "step": 28990 }, { "epoch": 0.8885313227902415, "grad_norm": 1.7790869350149203, "learning_rate": 3.223177323296367e-07, "loss": 0.5711, "step": 28991 }, { "epoch": 0.8885619713129828, "grad_norm": 2.042836921636191, "learning_rate": 3.2214244094693313e-07, "loss": 0.6017, "step": 28992 }, { "epoch": 0.8885926198357239, "grad_norm": 1.9260500781727263, "learning_rate": 3.2196719565635747e-07, "loss": 0.5262, "step": 28993 }, { "epoch": 0.8886232683584652, "grad_norm": 2.0898561325409175, "learning_rate": 3.21791996459635e-07, "loss": 0.6384, "step": 28994 }, { "epoch": 0.8886539168812063, "grad_norm": 1.8887583581646403, "learning_rate": 3.2161684335849317e-07, "loss": 0.5277, "step": 28995 }, { "epoch": 0.8886845654039476, "grad_norm": 1.802089003101817, "learning_rate": 3.2144173635465735e-07, "loss": 0.6024, "step": 28996 }, { "epoch": 0.8887152139266887, "grad_norm": 1.7671756943673864, "learning_rate": 3.2126667544985393e-07, "loss": 0.5258, "step": 28997 }, { "epoch": 0.88874586244943, "grad_norm": 1.8620330679648993, "learning_rate": 3.210916606458064e-07, "loss": 0.5131, "step": 28998 }, { "epoch": 0.8887765109721711, "grad_norm": 1.7945501055686455, "learning_rate": 3.2091669194424025e-07, "loss": 0.5931, "step": 28999 }, { "epoch": 0.8888071594949123, "grad_norm": 1.9171455808119584, "learning_rate": 3.207417693468795e-07, "loss": 0.5592, "step": 29000 }, { "epoch": 0.8888378080176536, "grad_norm": 2.0091207894390366, "learning_rate": 3.205668928554473e-07, "loss": 0.5969, "step": 29001 }, { "epoch": 0.8888684565403947, "grad_norm": 1.921490533473004, "learning_rate": 3.203920624716661e-07, "loss": 0.5341, "step": 29002 }, { "epoch": 0.888899105063136, "grad_norm": 1.8625515422114902, "learning_rate": 3.20217278197259e-07, "loss": 0.5358, "step": 29003 }, { "epoch": 0.8889297535858771, "grad_norm": 2.161902899053662, "learning_rate": 3.200425400339485e-07, "loss": 0.6238, "step": 29004 }, { "epoch": 0.8889604021086184, "grad_norm": 1.906457101398361, "learning_rate": 3.198678479834572e-07, "loss": 0.5454, "step": 29005 }, { "epoch": 0.8889910506313595, "grad_norm": 1.8476450823910626, "learning_rate": 3.1969320204750467e-07, "loss": 0.5123, "step": 29006 }, { "epoch": 0.8890216991541008, "grad_norm": 0.8304897094001467, "learning_rate": 3.1951860222781296e-07, "loss": 0.3916, "step": 29007 }, { "epoch": 0.8890523476768419, "grad_norm": 1.7467573206064493, "learning_rate": 3.1934404852610235e-07, "loss": 0.5088, "step": 29008 }, { "epoch": 0.8890829961995832, "grad_norm": 1.8034219281689097, "learning_rate": 3.191695409440915e-07, "loss": 0.5993, "step": 29009 }, { "epoch": 0.8891136447223243, "grad_norm": 2.089903090224437, "learning_rate": 3.1899507948350115e-07, "loss": 0.5931, "step": 29010 }, { "epoch": 0.8891442932450656, "grad_norm": 1.911871664044322, "learning_rate": 3.1882066414605063e-07, "loss": 0.616, "step": 29011 }, { "epoch": 0.8891749417678068, "grad_norm": 2.1514097287572715, "learning_rate": 3.186462949334568e-07, "loss": 0.6283, "step": 29012 }, { "epoch": 0.889205590290548, "grad_norm": 1.8428092231976299, "learning_rate": 3.1847197184743997e-07, "loss": 0.5612, "step": 29013 }, { "epoch": 0.8892362388132892, "grad_norm": 1.7428099748469208, "learning_rate": 3.182976948897154e-07, "loss": 0.6234, "step": 29014 }, { "epoch": 0.8892668873360304, "grad_norm": 0.788031371430566, "learning_rate": 3.1812346406200176e-07, "loss": 0.3847, "step": 29015 }, { "epoch": 0.8892975358587716, "grad_norm": 0.7983602238097871, "learning_rate": 3.179492793660166e-07, "loss": 0.3868, "step": 29016 }, { "epoch": 0.8893281843815128, "grad_norm": 1.9169360978470897, "learning_rate": 3.1777514080347404e-07, "loss": 0.5938, "step": 29017 }, { "epoch": 0.889358832904254, "grad_norm": 1.9821030864245914, "learning_rate": 3.176010483760911e-07, "loss": 0.5517, "step": 29018 }, { "epoch": 0.8893894814269953, "grad_norm": 1.8239884348417124, "learning_rate": 3.174270020855835e-07, "loss": 0.6049, "step": 29019 }, { "epoch": 0.8894201299497364, "grad_norm": 1.8948023963069922, "learning_rate": 3.1725300193366615e-07, "loss": 0.5663, "step": 29020 }, { "epoch": 0.8894507784724777, "grad_norm": 2.11583418751898, "learning_rate": 3.1707904792205144e-07, "loss": 0.5727, "step": 29021 }, { "epoch": 0.8894814269952188, "grad_norm": 2.0200462801505346, "learning_rate": 3.1690514005245643e-07, "loss": 0.4937, "step": 29022 }, { "epoch": 0.8895120755179601, "grad_norm": 0.804056955381687, "learning_rate": 3.16731278326593e-07, "loss": 0.4196, "step": 29023 }, { "epoch": 0.8895427240407012, "grad_norm": 1.9012160592573941, "learning_rate": 3.165574627461748e-07, "loss": 0.6114, "step": 29024 }, { "epoch": 0.8895733725634425, "grad_norm": 2.215780892412796, "learning_rate": 3.1638369331291386e-07, "loss": 0.6153, "step": 29025 }, { "epoch": 0.8896040210861836, "grad_norm": 1.8093519057098022, "learning_rate": 3.162099700285226e-07, "loss": 0.5269, "step": 29026 }, { "epoch": 0.8896346696089249, "grad_norm": 0.8148135057690322, "learning_rate": 3.160362928947136e-07, "loss": 0.3978, "step": 29027 }, { "epoch": 0.889665318131666, "grad_norm": 1.9768876263859065, "learning_rate": 3.158626619131966e-07, "loss": 0.5589, "step": 29028 }, { "epoch": 0.8896959666544073, "grad_norm": 2.0131924274979327, "learning_rate": 3.156890770856835e-07, "loss": 0.6318, "step": 29029 }, { "epoch": 0.8897266151771485, "grad_norm": 2.1017357849129197, "learning_rate": 3.1551553841388526e-07, "loss": 0.5904, "step": 29030 }, { "epoch": 0.8897572636998896, "grad_norm": 2.0284941997617576, "learning_rate": 3.153420458995099e-07, "loss": 0.5554, "step": 29031 }, { "epoch": 0.8897879122226309, "grad_norm": 1.9980857692772616, "learning_rate": 3.1516859954426826e-07, "loss": 0.5814, "step": 29032 }, { "epoch": 0.889818560745372, "grad_norm": 1.9596234964670158, "learning_rate": 3.1499519934986956e-07, "loss": 0.5922, "step": 29033 }, { "epoch": 0.8898492092681133, "grad_norm": 1.7405249199104411, "learning_rate": 3.148218453180213e-07, "loss": 0.4613, "step": 29034 }, { "epoch": 0.8898798577908544, "grad_norm": 1.920755636925017, "learning_rate": 3.1464853745043324e-07, "loss": 0.6221, "step": 29035 }, { "epoch": 0.8899105063135957, "grad_norm": 1.888297534274532, "learning_rate": 3.1447527574881064e-07, "loss": 0.5136, "step": 29036 }, { "epoch": 0.8899411548363368, "grad_norm": 1.7202633860026595, "learning_rate": 3.143020602148622e-07, "loss": 0.449, "step": 29037 }, { "epoch": 0.8899718033590781, "grad_norm": 1.9204764591776966, "learning_rate": 3.141288908502954e-07, "loss": 0.5757, "step": 29038 }, { "epoch": 0.8900024518818193, "grad_norm": 1.5465270505194502, "learning_rate": 3.139557676568145e-07, "loss": 0.5137, "step": 29039 }, { "epoch": 0.8900331004045605, "grad_norm": 1.8811209703919567, "learning_rate": 3.137826906361263e-07, "loss": 0.6456, "step": 29040 }, { "epoch": 0.8900637489273017, "grad_norm": 2.1534825086166496, "learning_rate": 3.136096597899374e-07, "loss": 0.6616, "step": 29041 }, { "epoch": 0.8900943974500429, "grad_norm": 1.7485997858353872, "learning_rate": 3.134366751199508e-07, "loss": 0.4567, "step": 29042 }, { "epoch": 0.8901250459727841, "grad_norm": 1.942530294622304, "learning_rate": 3.132637366278718e-07, "loss": 0.6234, "step": 29043 }, { "epoch": 0.8901556944955253, "grad_norm": 2.0840710337432444, "learning_rate": 3.130908443154046e-07, "loss": 0.6145, "step": 29044 }, { "epoch": 0.8901863430182665, "grad_norm": 2.003964987378896, "learning_rate": 3.129179981842523e-07, "loss": 0.6154, "step": 29045 }, { "epoch": 0.8902169915410078, "grad_norm": 1.8042121574256427, "learning_rate": 3.1274519823611847e-07, "loss": 0.5217, "step": 29046 }, { "epoch": 0.8902476400637489, "grad_norm": 1.696645716707758, "learning_rate": 3.125724444727052e-07, "loss": 0.4659, "step": 29047 }, { "epoch": 0.8902782885864902, "grad_norm": 1.7879813558286557, "learning_rate": 3.123997368957149e-07, "loss": 0.5545, "step": 29048 }, { "epoch": 0.8903089371092313, "grad_norm": 1.7967369302949938, "learning_rate": 3.122270755068502e-07, "loss": 0.5414, "step": 29049 }, { "epoch": 0.8903395856319726, "grad_norm": 1.8356361261350387, "learning_rate": 3.1205446030781016e-07, "loss": 0.527, "step": 29050 }, { "epoch": 0.8903702341547137, "grad_norm": 0.8025951028634561, "learning_rate": 3.1188189130029747e-07, "loss": 0.3852, "step": 29051 }, { "epoch": 0.890400882677455, "grad_norm": 2.113555099035457, "learning_rate": 3.1170936848601285e-07, "loss": 0.6082, "step": 29052 }, { "epoch": 0.8904315312001961, "grad_norm": 2.229597634416197, "learning_rate": 3.1153689186665446e-07, "loss": 0.6389, "step": 29053 }, { "epoch": 0.8904621797229374, "grad_norm": 1.779915903584385, "learning_rate": 3.1136446144392376e-07, "loss": 0.6339, "step": 29054 }, { "epoch": 0.8904928282456785, "grad_norm": 1.873885063833857, "learning_rate": 3.1119207721951704e-07, "loss": 0.6297, "step": 29055 }, { "epoch": 0.8905234767684198, "grad_norm": 0.778540263541164, "learning_rate": 3.1101973919513526e-07, "loss": 0.4071, "step": 29056 }, { "epoch": 0.890554125291161, "grad_norm": 1.900882466875336, "learning_rate": 3.108474473724765e-07, "loss": 0.6163, "step": 29057 }, { "epoch": 0.8905847738139022, "grad_norm": 2.312170950173978, "learning_rate": 3.1067520175323605e-07, "loss": 0.5841, "step": 29058 }, { "epoch": 0.8906154223366434, "grad_norm": 0.7882235436507042, "learning_rate": 3.105030023391137e-07, "loss": 0.4023, "step": 29059 }, { "epoch": 0.8906460708593846, "grad_norm": 1.8420308313375806, "learning_rate": 3.103308491318052e-07, "loss": 0.5237, "step": 29060 }, { "epoch": 0.8906767193821258, "grad_norm": 2.2746020298710885, "learning_rate": 3.10158742133006e-07, "loss": 0.6106, "step": 29061 }, { "epoch": 0.8907073679048669, "grad_norm": 1.7930235891997015, "learning_rate": 3.0998668134441304e-07, "loss": 0.531, "step": 29062 }, { "epoch": 0.8907380164276082, "grad_norm": 1.6758337130074699, "learning_rate": 3.098146667677215e-07, "loss": 0.5795, "step": 29063 }, { "epoch": 0.8907686649503493, "grad_norm": 2.0439412275828377, "learning_rate": 3.096426984046258e-07, "loss": 0.6527, "step": 29064 }, { "epoch": 0.8907993134730906, "grad_norm": 0.7809036132738257, "learning_rate": 3.0947077625682165e-07, "loss": 0.3947, "step": 29065 }, { "epoch": 0.8908299619958318, "grad_norm": 2.047623762514005, "learning_rate": 3.09298900326e-07, "loss": 0.5016, "step": 29066 }, { "epoch": 0.890860610518573, "grad_norm": 1.8739739279239578, "learning_rate": 3.0912707061385825e-07, "loss": 0.5731, "step": 29067 }, { "epoch": 0.8908912590413142, "grad_norm": 1.8912788588067229, "learning_rate": 3.0895528712208745e-07, "loss": 0.604, "step": 29068 }, { "epoch": 0.8909219075640554, "grad_norm": 1.8012962057587318, "learning_rate": 3.0878354985238e-07, "loss": 0.5528, "step": 29069 }, { "epoch": 0.8909525560867966, "grad_norm": 5.660167287507456, "learning_rate": 3.0861185880642854e-07, "loss": 0.535, "step": 29070 }, { "epoch": 0.8909832046095378, "grad_norm": 0.8435329984968566, "learning_rate": 3.084402139859249e-07, "loss": 0.4062, "step": 29071 }, { "epoch": 0.891013853132279, "grad_norm": 1.75176519682702, "learning_rate": 3.082686153925601e-07, "loss": 0.5145, "step": 29072 }, { "epoch": 0.8910445016550202, "grad_norm": 1.9622440558930174, "learning_rate": 3.08097063028025e-07, "loss": 0.6916, "step": 29073 }, { "epoch": 0.8910751501777614, "grad_norm": 1.9037043542293641, "learning_rate": 3.0792555689401093e-07, "loss": 0.6755, "step": 29074 }, { "epoch": 0.8911057987005027, "grad_norm": 0.8182673684337977, "learning_rate": 3.0775409699220547e-07, "loss": 0.3975, "step": 29075 }, { "epoch": 0.8911364472232438, "grad_norm": 0.7957894712751551, "learning_rate": 3.0758268332430064e-07, "loss": 0.3935, "step": 29076 }, { "epoch": 0.8911670957459851, "grad_norm": 1.9131489390268377, "learning_rate": 3.074113158919828e-07, "loss": 0.5719, "step": 29077 }, { "epoch": 0.8911977442687262, "grad_norm": 1.731359894890742, "learning_rate": 3.0723999469694344e-07, "loss": 0.5622, "step": 29078 }, { "epoch": 0.8912283927914675, "grad_norm": 1.8324879652583121, "learning_rate": 3.0706871974086893e-07, "loss": 0.6708, "step": 29079 }, { "epoch": 0.8912590413142086, "grad_norm": 1.8481956450359893, "learning_rate": 3.068974910254463e-07, "loss": 0.6312, "step": 29080 }, { "epoch": 0.8912896898369499, "grad_norm": 2.013728598186218, "learning_rate": 3.0672630855236363e-07, "loss": 0.6066, "step": 29081 }, { "epoch": 0.891320338359691, "grad_norm": 1.7653955724070913, "learning_rate": 3.065551723233079e-07, "loss": 0.6142, "step": 29082 }, { "epoch": 0.8913509868824323, "grad_norm": 2.0579864933538126, "learning_rate": 3.063840823399644e-07, "loss": 0.5995, "step": 29083 }, { "epoch": 0.8913816354051735, "grad_norm": 1.8819989620211557, "learning_rate": 3.062130386040196e-07, "loss": 0.5736, "step": 29084 }, { "epoch": 0.8914122839279147, "grad_norm": 0.760247279150852, "learning_rate": 3.060420411171583e-07, "loss": 0.3878, "step": 29085 }, { "epoch": 0.8914429324506559, "grad_norm": 1.7399464088786423, "learning_rate": 3.0587108988106684e-07, "loss": 0.5433, "step": 29086 }, { "epoch": 0.8914735809733971, "grad_norm": 1.8215361117418338, "learning_rate": 3.0570018489742836e-07, "loss": 0.5644, "step": 29087 }, { "epoch": 0.8915042294961383, "grad_norm": 1.8112935206004073, "learning_rate": 3.05529326167926e-07, "loss": 0.5636, "step": 29088 }, { "epoch": 0.8915348780188795, "grad_norm": 0.7722763693873629, "learning_rate": 3.053585136942455e-07, "loss": 0.3922, "step": 29089 }, { "epoch": 0.8915655265416207, "grad_norm": 1.8068013609766076, "learning_rate": 3.0518774747806844e-07, "loss": 0.5822, "step": 29090 }, { "epoch": 0.891596175064362, "grad_norm": 1.7437084341977913, "learning_rate": 3.0501702752107733e-07, "loss": 0.5051, "step": 29091 }, { "epoch": 0.8916268235871031, "grad_norm": 1.8893678939385854, "learning_rate": 3.0484635382495465e-07, "loss": 0.5418, "step": 29092 }, { "epoch": 0.8916574721098443, "grad_norm": 2.1250981240522115, "learning_rate": 3.0467572639138243e-07, "loss": 0.5524, "step": 29093 }, { "epoch": 0.8916881206325855, "grad_norm": 1.862285832252163, "learning_rate": 3.045051452220421e-07, "loss": 0.4989, "step": 29094 }, { "epoch": 0.8917187691553267, "grad_norm": 1.7210389435392401, "learning_rate": 3.043346103186129e-07, "loss": 0.4968, "step": 29095 }, { "epoch": 0.8917494176780679, "grad_norm": 1.8843111826785408, "learning_rate": 3.0416412168277675e-07, "loss": 0.4861, "step": 29096 }, { "epoch": 0.8917800662008091, "grad_norm": 1.8439909751909624, "learning_rate": 3.039936793162135e-07, "loss": 0.5393, "step": 29097 }, { "epoch": 0.8918107147235503, "grad_norm": 1.8607791386070653, "learning_rate": 3.038232832206023e-07, "loss": 0.5473, "step": 29098 }, { "epoch": 0.8918413632462915, "grad_norm": 1.851444998668086, "learning_rate": 3.036529333976207e-07, "loss": 0.5557, "step": 29099 }, { "epoch": 0.8918720117690327, "grad_norm": 1.940725448335634, "learning_rate": 3.034826298489485e-07, "loss": 0.5947, "step": 29100 }, { "epoch": 0.8919026602917739, "grad_norm": 1.8156469737731344, "learning_rate": 3.033123725762643e-07, "loss": 0.536, "step": 29101 }, { "epoch": 0.8919333088145152, "grad_norm": 1.7901395963721274, "learning_rate": 3.0314216158124465e-07, "loss": 0.5151, "step": 29102 }, { "epoch": 0.8919639573372563, "grad_norm": 1.8531344533473035, "learning_rate": 3.0297199686556646e-07, "loss": 0.6332, "step": 29103 }, { "epoch": 0.8919946058599976, "grad_norm": 1.8693745806484297, "learning_rate": 3.0280187843090723e-07, "loss": 0.631, "step": 29104 }, { "epoch": 0.8920252543827387, "grad_norm": 1.775778540585314, "learning_rate": 3.026318062789441e-07, "loss": 0.5561, "step": 29105 }, { "epoch": 0.89205590290548, "grad_norm": 0.7879474360736394, "learning_rate": 3.024617804113511e-07, "loss": 0.38, "step": 29106 }, { "epoch": 0.8920865514282211, "grad_norm": 1.8397481148291779, "learning_rate": 3.022918008298026e-07, "loss": 0.6542, "step": 29107 }, { "epoch": 0.8921171999509624, "grad_norm": 1.9532605321943324, "learning_rate": 3.021218675359766e-07, "loss": 0.6371, "step": 29108 }, { "epoch": 0.8921478484737035, "grad_norm": 2.12472425838271, "learning_rate": 3.0195198053154574e-07, "loss": 0.5302, "step": 29109 }, { "epoch": 0.8921784969964448, "grad_norm": 2.151523792697309, "learning_rate": 3.017821398181836e-07, "loss": 0.6679, "step": 29110 }, { "epoch": 0.892209145519186, "grad_norm": 1.7802675762365676, "learning_rate": 3.016123453975639e-07, "loss": 0.5777, "step": 29111 }, { "epoch": 0.8922397940419272, "grad_norm": 2.019278242405207, "learning_rate": 3.0144259727135974e-07, "loss": 0.6369, "step": 29112 }, { "epoch": 0.8922704425646684, "grad_norm": 0.7978264733601866, "learning_rate": 3.0127289544124473e-07, "loss": 0.3975, "step": 29113 }, { "epoch": 0.8923010910874096, "grad_norm": 0.7843679513968835, "learning_rate": 3.0110323990888924e-07, "loss": 0.4109, "step": 29114 }, { "epoch": 0.8923317396101508, "grad_norm": 2.0003811489430188, "learning_rate": 3.0093363067596635e-07, "loss": 0.5375, "step": 29115 }, { "epoch": 0.892362388132892, "grad_norm": 2.3979415505086807, "learning_rate": 3.00764067744147e-07, "loss": 0.6925, "step": 29116 }, { "epoch": 0.8923930366556332, "grad_norm": 2.046790509762005, "learning_rate": 3.005945511151015e-07, "loss": 0.5894, "step": 29117 }, { "epoch": 0.8924236851783744, "grad_norm": 2.1330396100493947, "learning_rate": 3.0042508079049905e-07, "loss": 0.5789, "step": 29118 }, { "epoch": 0.8924543337011156, "grad_norm": 1.695988361021494, "learning_rate": 3.002556567720122e-07, "loss": 0.5981, "step": 29119 }, { "epoch": 0.8924849822238569, "grad_norm": 1.8576940022463726, "learning_rate": 3.0008627906130796e-07, "loss": 0.4326, "step": 29120 }, { "epoch": 0.892515630746598, "grad_norm": 1.8072933178672075, "learning_rate": 2.999169476600572e-07, "loss": 0.5781, "step": 29121 }, { "epoch": 0.8925462792693393, "grad_norm": 1.9878373394393731, "learning_rate": 2.997476625699258e-07, "loss": 0.5775, "step": 29122 }, { "epoch": 0.8925769277920804, "grad_norm": 0.7551825418678626, "learning_rate": 2.9957842379258417e-07, "loss": 0.3779, "step": 29123 }, { "epoch": 0.8926075763148216, "grad_norm": 2.1106463596058616, "learning_rate": 2.9940923132969923e-07, "loss": 0.4977, "step": 29124 }, { "epoch": 0.8926382248375628, "grad_norm": 1.9490424406592117, "learning_rate": 2.992400851829375e-07, "loss": 0.5468, "step": 29125 }, { "epoch": 0.892668873360304, "grad_norm": 2.0190871609522563, "learning_rate": 2.990709853539653e-07, "loss": 0.5203, "step": 29126 }, { "epoch": 0.8926995218830452, "grad_norm": 1.849929607364662, "learning_rate": 2.9890193184445085e-07, "loss": 0.4791, "step": 29127 }, { "epoch": 0.8927301704057864, "grad_norm": 1.7776841157213055, "learning_rate": 2.987329246560583e-07, "loss": 0.452, "step": 29128 }, { "epoch": 0.8927608189285277, "grad_norm": 1.7629829775079375, "learning_rate": 2.985639637904514e-07, "loss": 0.5082, "step": 29129 }, { "epoch": 0.8927914674512688, "grad_norm": 2.004821386564947, "learning_rate": 2.9839504924929875e-07, "loss": 0.7131, "step": 29130 }, { "epoch": 0.8928221159740101, "grad_norm": 2.0175278610542002, "learning_rate": 2.9822618103426127e-07, "loss": 0.6452, "step": 29131 }, { "epoch": 0.8928527644967512, "grad_norm": 1.743905806220782, "learning_rate": 2.980573591470054e-07, "loss": 0.5293, "step": 29132 }, { "epoch": 0.8928834130194925, "grad_norm": 2.0840923914410587, "learning_rate": 2.97888583589192e-07, "loss": 0.606, "step": 29133 }, { "epoch": 0.8929140615422336, "grad_norm": 2.0929372379167317, "learning_rate": 2.9771985436248594e-07, "loss": 0.6084, "step": 29134 }, { "epoch": 0.8929447100649749, "grad_norm": 1.9062881504459506, "learning_rate": 2.975511714685503e-07, "loss": 0.5753, "step": 29135 }, { "epoch": 0.892975358587716, "grad_norm": 1.7931682456394555, "learning_rate": 2.9738253490904477e-07, "loss": 0.4972, "step": 29136 }, { "epoch": 0.8930060071104573, "grad_norm": 1.7849139978955413, "learning_rate": 2.9721394468563316e-07, "loss": 0.5459, "step": 29137 }, { "epoch": 0.8930366556331985, "grad_norm": 1.731882349280884, "learning_rate": 2.970454007999757e-07, "loss": 0.5928, "step": 29138 }, { "epoch": 0.8930673041559397, "grad_norm": 1.9459694425965892, "learning_rate": 2.9687690325373273e-07, "loss": 0.6124, "step": 29139 }, { "epoch": 0.8930979526786809, "grad_norm": 2.2927261224590576, "learning_rate": 2.9670845204856523e-07, "loss": 0.6055, "step": 29140 }, { "epoch": 0.8931286012014221, "grad_norm": 2.113591511800739, "learning_rate": 2.9654004718613347e-07, "loss": 0.4909, "step": 29141 }, { "epoch": 0.8931592497241633, "grad_norm": 1.7317605629901862, "learning_rate": 2.9637168866809505e-07, "loss": 0.5824, "step": 29142 }, { "epoch": 0.8931898982469045, "grad_norm": 0.7935615034272031, "learning_rate": 2.962033764961109e-07, "loss": 0.4156, "step": 29143 }, { "epoch": 0.8932205467696457, "grad_norm": 1.9429123687689625, "learning_rate": 2.960351106718373e-07, "loss": 0.592, "step": 29144 }, { "epoch": 0.893251195292387, "grad_norm": 0.7995429373810334, "learning_rate": 2.958668911969337e-07, "loss": 0.3921, "step": 29145 }, { "epoch": 0.8932818438151281, "grad_norm": 1.9974081335530944, "learning_rate": 2.9569871807305806e-07, "loss": 0.575, "step": 29146 }, { "epoch": 0.8933124923378694, "grad_norm": 1.8534141453188069, "learning_rate": 2.9553059130186526e-07, "loss": 0.4686, "step": 29147 }, { "epoch": 0.8933431408606105, "grad_norm": 2.0681246870041665, "learning_rate": 2.9536251088501387e-07, "loss": 0.6325, "step": 29148 }, { "epoch": 0.8933737893833518, "grad_norm": 0.8318179429243185, "learning_rate": 2.951944768241594e-07, "loss": 0.4049, "step": 29149 }, { "epoch": 0.8934044379060929, "grad_norm": 1.9459011675732532, "learning_rate": 2.9502648912095756e-07, "loss": 0.6117, "step": 29150 }, { "epoch": 0.8934350864288342, "grad_norm": 1.8120695544157384, "learning_rate": 2.948585477770638e-07, "loss": 0.6158, "step": 29151 }, { "epoch": 0.8934657349515753, "grad_norm": 1.6901622106541945, "learning_rate": 2.946906527941318e-07, "loss": 0.5333, "step": 29152 }, { "epoch": 0.8934963834743166, "grad_norm": 1.9849151021722908, "learning_rate": 2.945228041738174e-07, "loss": 0.6179, "step": 29153 }, { "epoch": 0.8935270319970577, "grad_norm": 1.8234653085981025, "learning_rate": 2.9435500191777377e-07, "loss": 0.5916, "step": 29154 }, { "epoch": 0.8935576805197989, "grad_norm": 2.32639326314753, "learning_rate": 2.94187246027654e-07, "loss": 0.575, "step": 29155 }, { "epoch": 0.8935883290425402, "grad_norm": 1.8152939640847385, "learning_rate": 2.9401953650511073e-07, "loss": 0.5808, "step": 29156 }, { "epoch": 0.8936189775652813, "grad_norm": 1.8973742186914775, "learning_rate": 2.9385187335179864e-07, "loss": 0.5113, "step": 29157 }, { "epoch": 0.8936496260880226, "grad_norm": 1.8170855523471412, "learning_rate": 2.936842565693665e-07, "loss": 0.5646, "step": 29158 }, { "epoch": 0.8936802746107637, "grad_norm": 1.8107254084085909, "learning_rate": 2.935166861594685e-07, "loss": 0.5518, "step": 29159 }, { "epoch": 0.893710923133505, "grad_norm": 0.7779192417365765, "learning_rate": 2.9334916212375495e-07, "loss": 0.3755, "step": 29160 }, { "epoch": 0.8937415716562461, "grad_norm": 1.818033833948246, "learning_rate": 2.9318168446387574e-07, "loss": 0.5613, "step": 29161 }, { "epoch": 0.8937722201789874, "grad_norm": 1.8466878349879683, "learning_rate": 2.9301425318148223e-07, "loss": 0.5513, "step": 29162 }, { "epoch": 0.8938028687017285, "grad_norm": 1.895656318863321, "learning_rate": 2.9284686827822316e-07, "loss": 0.5464, "step": 29163 }, { "epoch": 0.8938335172244698, "grad_norm": 0.8124790528849958, "learning_rate": 2.926795297557483e-07, "loss": 0.3996, "step": 29164 }, { "epoch": 0.893864165747211, "grad_norm": 2.0808319135081925, "learning_rate": 2.925122376157069e-07, "loss": 0.617, "step": 29165 }, { "epoch": 0.8938948142699522, "grad_norm": 2.065602852269105, "learning_rate": 2.9234499185974594e-07, "loss": 0.6226, "step": 29166 }, { "epoch": 0.8939254627926934, "grad_norm": 2.0209563888364475, "learning_rate": 2.9217779248951474e-07, "loss": 0.5325, "step": 29167 }, { "epoch": 0.8939561113154346, "grad_norm": 1.7494424696336512, "learning_rate": 2.920106395066613e-07, "loss": 0.4379, "step": 29168 }, { "epoch": 0.8939867598381758, "grad_norm": 0.7816735913288129, "learning_rate": 2.918435329128305e-07, "loss": 0.387, "step": 29169 }, { "epoch": 0.894017408360917, "grad_norm": 1.839286701142163, "learning_rate": 2.916764727096699e-07, "loss": 0.5455, "step": 29170 }, { "epoch": 0.8940480568836582, "grad_norm": 1.9623114928759546, "learning_rate": 2.915094588988265e-07, "loss": 0.6433, "step": 29171 }, { "epoch": 0.8940787054063994, "grad_norm": 1.902332919442307, "learning_rate": 2.913424914819446e-07, "loss": 0.484, "step": 29172 }, { "epoch": 0.8941093539291406, "grad_norm": 1.8395367307213528, "learning_rate": 2.911755704606706e-07, "loss": 0.531, "step": 29173 }, { "epoch": 0.8941400024518819, "grad_norm": 1.932102156047284, "learning_rate": 2.9100869583664757e-07, "loss": 0.6666, "step": 29174 }, { "epoch": 0.894170650974623, "grad_norm": 1.7579167944973295, "learning_rate": 2.9084186761152044e-07, "loss": 0.5993, "step": 29175 }, { "epoch": 0.8942012994973643, "grad_norm": 1.6977004324811318, "learning_rate": 2.906750857869345e-07, "loss": 0.4819, "step": 29176 }, { "epoch": 0.8942319480201054, "grad_norm": 1.607898155034489, "learning_rate": 2.905083503645312e-07, "loss": 0.5302, "step": 29177 }, { "epoch": 0.8942625965428467, "grad_norm": 2.065490854046644, "learning_rate": 2.9034166134595365e-07, "loss": 0.5915, "step": 29178 }, { "epoch": 0.8942932450655878, "grad_norm": 0.8337618925793387, "learning_rate": 2.901750187328456e-07, "loss": 0.3922, "step": 29179 }, { "epoch": 0.8943238935883291, "grad_norm": 1.852287106284008, "learning_rate": 2.900084225268474e-07, "loss": 0.5532, "step": 29180 }, { "epoch": 0.8943545421110702, "grad_norm": 1.99731106632739, "learning_rate": 2.8984187272960154e-07, "loss": 0.5493, "step": 29181 }, { "epoch": 0.8943851906338115, "grad_norm": 2.0384550865378626, "learning_rate": 2.896753693427495e-07, "loss": 0.5482, "step": 29182 }, { "epoch": 0.8944158391565527, "grad_norm": 1.7431788271994622, "learning_rate": 2.8950891236793065e-07, "loss": 0.6085, "step": 29183 }, { "epoch": 0.8944464876792939, "grad_norm": 1.965659846171309, "learning_rate": 2.893425018067864e-07, "loss": 0.6226, "step": 29184 }, { "epoch": 0.8944771362020351, "grad_norm": 1.7194839635842138, "learning_rate": 2.891761376609542e-07, "loss": 0.5803, "step": 29185 }, { "epoch": 0.8945077847247762, "grad_norm": 2.0722777436861, "learning_rate": 2.890098199320762e-07, "loss": 0.568, "step": 29186 }, { "epoch": 0.8945384332475175, "grad_norm": 1.7574609629894091, "learning_rate": 2.888435486217894e-07, "loss": 0.5745, "step": 29187 }, { "epoch": 0.8945690817702586, "grad_norm": 1.7025819730196563, "learning_rate": 2.8867732373173254e-07, "loss": 0.5256, "step": 29188 }, { "epoch": 0.8945997302929999, "grad_norm": 2.150190023216781, "learning_rate": 2.885111452635431e-07, "loss": 0.5359, "step": 29189 }, { "epoch": 0.894630378815741, "grad_norm": 1.8895932279435248, "learning_rate": 2.883450132188598e-07, "loss": 0.4764, "step": 29190 }, { "epoch": 0.8946610273384823, "grad_norm": 1.8295830356906857, "learning_rate": 2.881789275993174e-07, "loss": 0.5367, "step": 29191 }, { "epoch": 0.8946916758612234, "grad_norm": 2.0132890986415335, "learning_rate": 2.8801288840655415e-07, "loss": 0.5814, "step": 29192 }, { "epoch": 0.8947223243839647, "grad_norm": 1.6692377853725935, "learning_rate": 2.878468956422054e-07, "loss": 0.4817, "step": 29193 }, { "epoch": 0.8947529729067059, "grad_norm": 1.903340151074622, "learning_rate": 2.876809493079075e-07, "loss": 0.5632, "step": 29194 }, { "epoch": 0.8947836214294471, "grad_norm": 1.8148722186912343, "learning_rate": 2.8751504940529474e-07, "loss": 0.5567, "step": 29195 }, { "epoch": 0.8948142699521883, "grad_norm": 1.8339618281746743, "learning_rate": 2.873491959360014e-07, "loss": 0.5302, "step": 29196 }, { "epoch": 0.8948449184749295, "grad_norm": 2.004410379949535, "learning_rate": 2.8718338890166177e-07, "loss": 0.5647, "step": 29197 }, { "epoch": 0.8948755669976707, "grad_norm": 1.7552343209555843, "learning_rate": 2.8701762830391167e-07, "loss": 0.5813, "step": 29198 }, { "epoch": 0.8949062155204119, "grad_norm": 1.7657694078515431, "learning_rate": 2.8685191414438096e-07, "loss": 0.5854, "step": 29199 }, { "epoch": 0.8949368640431531, "grad_norm": 2.05976046332123, "learning_rate": 2.86686246424705e-07, "loss": 0.6052, "step": 29200 }, { "epoch": 0.8949675125658944, "grad_norm": 2.1476777095853268, "learning_rate": 2.865206251465158e-07, "loss": 0.5821, "step": 29201 }, { "epoch": 0.8949981610886355, "grad_norm": 1.817901783728755, "learning_rate": 2.863550503114443e-07, "loss": 0.5035, "step": 29202 }, { "epoch": 0.8950288096113768, "grad_norm": 2.0068907009382655, "learning_rate": 2.861895219211236e-07, "loss": 0.6799, "step": 29203 }, { "epoch": 0.8950594581341179, "grad_norm": 1.6164625222059577, "learning_rate": 2.860240399771813e-07, "loss": 0.452, "step": 29204 }, { "epoch": 0.8950901066568592, "grad_norm": 1.9457819500615179, "learning_rate": 2.8585860448125226e-07, "loss": 0.5259, "step": 29205 }, { "epoch": 0.8951207551796003, "grad_norm": 1.8595995052565346, "learning_rate": 2.856932154349645e-07, "loss": 0.5104, "step": 29206 }, { "epoch": 0.8951514037023416, "grad_norm": 1.9350729220612155, "learning_rate": 2.855278728399469e-07, "loss": 0.518, "step": 29207 }, { "epoch": 0.8951820522250827, "grad_norm": 1.8947473120231615, "learning_rate": 2.853625766978296e-07, "loss": 0.6532, "step": 29208 }, { "epoch": 0.895212700747824, "grad_norm": 2.168787595011027, "learning_rate": 2.8519732701024204e-07, "loss": 0.6003, "step": 29209 }, { "epoch": 0.8952433492705651, "grad_norm": 1.8387736345263717, "learning_rate": 2.850321237788101e-07, "loss": 0.5956, "step": 29210 }, { "epoch": 0.8952739977933064, "grad_norm": 1.8707079157983724, "learning_rate": 2.8486696700516345e-07, "loss": 0.5085, "step": 29211 }, { "epoch": 0.8953046463160476, "grad_norm": 2.1716392442499823, "learning_rate": 2.8470185669092934e-07, "loss": 0.4722, "step": 29212 }, { "epoch": 0.8953352948387888, "grad_norm": 1.8736021854179894, "learning_rate": 2.8453679283773463e-07, "loss": 0.5882, "step": 29213 }, { "epoch": 0.89536594336153, "grad_norm": 2.00389006194049, "learning_rate": 2.8437177544720526e-07, "loss": 0.5301, "step": 29214 }, { "epoch": 0.8953965918842712, "grad_norm": 1.673989531671404, "learning_rate": 2.8420680452096616e-07, "loss": 0.5947, "step": 29215 }, { "epoch": 0.8954272404070124, "grad_norm": 0.8003917596887395, "learning_rate": 2.8404188006064537e-07, "loss": 0.3902, "step": 29216 }, { "epoch": 0.8954578889297535, "grad_norm": 1.918525527712599, "learning_rate": 2.838770020678666e-07, "loss": 0.6257, "step": 29217 }, { "epoch": 0.8954885374524948, "grad_norm": 1.8799803837274798, "learning_rate": 2.837121705442536e-07, "loss": 0.519, "step": 29218 }, { "epoch": 0.8955191859752359, "grad_norm": 2.203779230514419, "learning_rate": 2.835473854914311e-07, "loss": 0.5882, "step": 29219 }, { "epoch": 0.8955498344979772, "grad_norm": 2.116306933600119, "learning_rate": 2.833826469110235e-07, "loss": 0.61, "step": 29220 }, { "epoch": 0.8955804830207184, "grad_norm": 1.9546660465373877, "learning_rate": 2.832179548046537e-07, "loss": 0.5491, "step": 29221 }, { "epoch": 0.8956111315434596, "grad_norm": 1.8618129396627898, "learning_rate": 2.830533091739435e-07, "loss": 0.5051, "step": 29222 }, { "epoch": 0.8956417800662008, "grad_norm": 2.1678956073648807, "learning_rate": 2.828887100205163e-07, "loss": 0.6605, "step": 29223 }, { "epoch": 0.895672428588942, "grad_norm": 0.8360643549609592, "learning_rate": 2.8272415734599435e-07, "loss": 0.4122, "step": 29224 }, { "epoch": 0.8957030771116832, "grad_norm": 1.8341346823604834, "learning_rate": 2.825596511519979e-07, "loss": 0.6539, "step": 29225 }, { "epoch": 0.8957337256344244, "grad_norm": 0.8056317579471671, "learning_rate": 2.823951914401468e-07, "loss": 0.3907, "step": 29226 }, { "epoch": 0.8957643741571656, "grad_norm": 0.8093602503205974, "learning_rate": 2.8223077821206425e-07, "loss": 0.3989, "step": 29227 }, { "epoch": 0.8957950226799068, "grad_norm": 1.8867237979490838, "learning_rate": 2.820664114693694e-07, "loss": 0.4711, "step": 29228 }, { "epoch": 0.895825671202648, "grad_norm": 2.0252698072041206, "learning_rate": 2.8190209121367996e-07, "loss": 0.5703, "step": 29229 }, { "epoch": 0.8958563197253893, "grad_norm": 1.7193856466041282, "learning_rate": 2.8173781744661676e-07, "loss": 0.5544, "step": 29230 }, { "epoch": 0.8958869682481304, "grad_norm": 1.9889802632921436, "learning_rate": 2.8157359016979855e-07, "loss": 0.5118, "step": 29231 }, { "epoch": 0.8959176167708717, "grad_norm": 1.668808547595713, "learning_rate": 2.8140940938484296e-07, "loss": 0.5449, "step": 29232 }, { "epoch": 0.8959482652936128, "grad_norm": 1.7659792524536948, "learning_rate": 2.812452750933675e-07, "loss": 0.5703, "step": 29233 }, { "epoch": 0.8959789138163541, "grad_norm": 1.8652856259433372, "learning_rate": 2.810811872969893e-07, "loss": 0.5712, "step": 29234 }, { "epoch": 0.8960095623390952, "grad_norm": 1.7698872620171506, "learning_rate": 2.809171459973264e-07, "loss": 0.5625, "step": 29235 }, { "epoch": 0.8960402108618365, "grad_norm": 0.7701898951136432, "learning_rate": 2.8075315119599487e-07, "loss": 0.399, "step": 29236 }, { "epoch": 0.8960708593845776, "grad_norm": 1.8610504476007268, "learning_rate": 2.805892028946078e-07, "loss": 0.5003, "step": 29237 }, { "epoch": 0.8961015079073189, "grad_norm": 1.7912553333740155, "learning_rate": 2.804253010947849e-07, "loss": 0.5741, "step": 29238 }, { "epoch": 0.8961321564300601, "grad_norm": 0.8219311097434789, "learning_rate": 2.8026144579813786e-07, "loss": 0.4081, "step": 29239 }, { "epoch": 0.8961628049528013, "grad_norm": 2.154717285618533, "learning_rate": 2.80097637006283e-07, "loss": 0.621, "step": 29240 }, { "epoch": 0.8961934534755425, "grad_norm": 0.798380169657634, "learning_rate": 2.799338747208336e-07, "loss": 0.4094, "step": 29241 }, { "epoch": 0.8962241019982837, "grad_norm": 1.9715887504809895, "learning_rate": 2.797701589434032e-07, "loss": 0.6094, "step": 29242 }, { "epoch": 0.8962547505210249, "grad_norm": 1.8808314280050809, "learning_rate": 2.796064896756057e-07, "loss": 0.5207, "step": 29243 }, { "epoch": 0.8962853990437661, "grad_norm": 1.861445098391788, "learning_rate": 2.7944286691905244e-07, "loss": 0.5221, "step": 29244 }, { "epoch": 0.8963160475665073, "grad_norm": 2.075949667075406, "learning_rate": 2.7927929067535664e-07, "loss": 0.5887, "step": 29245 }, { "epoch": 0.8963466960892486, "grad_norm": 0.8273002025690672, "learning_rate": 2.7911576094613035e-07, "loss": 0.382, "step": 29246 }, { "epoch": 0.8963773446119897, "grad_norm": 1.8375159514839357, "learning_rate": 2.789522777329839e-07, "loss": 0.5662, "step": 29247 }, { "epoch": 0.8964079931347309, "grad_norm": 1.9381639586596504, "learning_rate": 2.7878884103752944e-07, "loss": 0.5428, "step": 29248 }, { "epoch": 0.8964386416574721, "grad_norm": 2.0911167386797667, "learning_rate": 2.786254508613756e-07, "loss": 0.653, "step": 29249 }, { "epoch": 0.8964692901802133, "grad_norm": 1.7610778312218256, "learning_rate": 2.7846210720613276e-07, "loss": 0.5155, "step": 29250 }, { "epoch": 0.8964999387029545, "grad_norm": 1.8639204885877811, "learning_rate": 2.7829881007341184e-07, "loss": 0.4531, "step": 29251 }, { "epoch": 0.8965305872256957, "grad_norm": 2.027183662316618, "learning_rate": 2.781355594648205e-07, "loss": 0.657, "step": 29252 }, { "epoch": 0.8965612357484369, "grad_norm": 2.05777035754172, "learning_rate": 2.779723553819674e-07, "loss": 0.6193, "step": 29253 }, { "epoch": 0.8965918842711781, "grad_norm": 1.8689330573246337, "learning_rate": 2.778091978264613e-07, "loss": 0.5933, "step": 29254 }, { "epoch": 0.8966225327939193, "grad_norm": 1.9019726884111532, "learning_rate": 2.776460867999098e-07, "loss": 0.5388, "step": 29255 }, { "epoch": 0.8966531813166605, "grad_norm": 1.9551571003733557, "learning_rate": 2.774830223039182e-07, "loss": 0.571, "step": 29256 }, { "epoch": 0.8966838298394018, "grad_norm": 2.046712632145146, "learning_rate": 2.7732000434009586e-07, "loss": 0.6907, "step": 29257 }, { "epoch": 0.8967144783621429, "grad_norm": 1.9069053068752349, "learning_rate": 2.77157032910047e-07, "loss": 0.5643, "step": 29258 }, { "epoch": 0.8967451268848842, "grad_norm": 1.8828504547401932, "learning_rate": 2.769941080153793e-07, "loss": 0.5292, "step": 29259 }, { "epoch": 0.8967757754076253, "grad_norm": 0.7998760470834524, "learning_rate": 2.768312296576964e-07, "loss": 0.3956, "step": 29260 }, { "epoch": 0.8968064239303666, "grad_norm": 2.00523741690335, "learning_rate": 2.7666839783860424e-07, "loss": 0.4946, "step": 29261 }, { "epoch": 0.8968370724531077, "grad_norm": 2.1723409452615687, "learning_rate": 2.765056125597071e-07, "loss": 0.5327, "step": 29262 }, { "epoch": 0.896867720975849, "grad_norm": 0.7689508956570946, "learning_rate": 2.7634287382260816e-07, "loss": 0.3874, "step": 29263 }, { "epoch": 0.8968983694985901, "grad_norm": 1.7798843198535963, "learning_rate": 2.7618018162891116e-07, "loss": 0.5583, "step": 29264 }, { "epoch": 0.8969290180213314, "grad_norm": 1.5834024994371259, "learning_rate": 2.760175359802203e-07, "loss": 0.5471, "step": 29265 }, { "epoch": 0.8969596665440726, "grad_norm": 2.0531621845932397, "learning_rate": 2.7585493687813656e-07, "loss": 0.6031, "step": 29266 }, { "epoch": 0.8969903150668138, "grad_norm": 1.8270546921954653, "learning_rate": 2.7569238432426313e-07, "loss": 0.5521, "step": 29267 }, { "epoch": 0.897020963589555, "grad_norm": 1.8646996140411491, "learning_rate": 2.755298783202021e-07, "loss": 0.6155, "step": 29268 }, { "epoch": 0.8970516121122962, "grad_norm": 1.9487507606936878, "learning_rate": 2.7536741886755313e-07, "loss": 0.5795, "step": 29269 }, { "epoch": 0.8970822606350374, "grad_norm": 1.8966039176431155, "learning_rate": 2.75205005967919e-07, "loss": 0.4895, "step": 29270 }, { "epoch": 0.8971129091577786, "grad_norm": 1.8612731625383419, "learning_rate": 2.750426396228978e-07, "loss": 0.5648, "step": 29271 }, { "epoch": 0.8971435576805198, "grad_norm": 1.8823334470509954, "learning_rate": 2.748803198340899e-07, "loss": 0.585, "step": 29272 }, { "epoch": 0.897174206203261, "grad_norm": 1.9203068679094375, "learning_rate": 2.747180466030963e-07, "loss": 0.6031, "step": 29273 }, { "epoch": 0.8972048547260022, "grad_norm": 1.9291794146556083, "learning_rate": 2.745558199315146e-07, "loss": 0.4811, "step": 29274 }, { "epoch": 0.8972355032487435, "grad_norm": 0.792496063387553, "learning_rate": 2.7439363982094293e-07, "loss": 0.3765, "step": 29275 }, { "epoch": 0.8972661517714846, "grad_norm": 1.9669486477386566, "learning_rate": 2.7423150627298056e-07, "loss": 0.5775, "step": 29276 }, { "epoch": 0.8972968002942259, "grad_norm": 2.003381533554582, "learning_rate": 2.740694192892235e-07, "loss": 0.5233, "step": 29277 }, { "epoch": 0.897327448816967, "grad_norm": 1.9926537104690332, "learning_rate": 2.7390737887126986e-07, "loss": 0.5596, "step": 29278 }, { "epoch": 0.8973580973397082, "grad_norm": 1.9491787297288925, "learning_rate": 2.737453850207167e-07, "loss": 0.4122, "step": 29279 }, { "epoch": 0.8973887458624494, "grad_norm": 1.7982491644774736, "learning_rate": 2.7358343773915887e-07, "loss": 0.5073, "step": 29280 }, { "epoch": 0.8974193943851906, "grad_norm": 1.9756738469514707, "learning_rate": 2.7342153702819284e-07, "loss": 0.6733, "step": 29281 }, { "epoch": 0.8974500429079318, "grad_norm": 1.7489643877741574, "learning_rate": 2.732596828894141e-07, "loss": 0.4999, "step": 29282 }, { "epoch": 0.897480691430673, "grad_norm": 2.034881915033056, "learning_rate": 2.730978753244162e-07, "loss": 0.5787, "step": 29283 }, { "epoch": 0.8975113399534143, "grad_norm": 0.7634298626035657, "learning_rate": 2.7293611433479527e-07, "loss": 0.3879, "step": 29284 }, { "epoch": 0.8975419884761554, "grad_norm": 1.737450889628991, "learning_rate": 2.7277439992214385e-07, "loss": 0.5381, "step": 29285 }, { "epoch": 0.8975726369988967, "grad_norm": 1.9267205355970907, "learning_rate": 2.726127320880556e-07, "loss": 0.5849, "step": 29286 }, { "epoch": 0.8976032855216378, "grad_norm": 1.6995518293704426, "learning_rate": 2.7245111083412436e-07, "loss": 0.6391, "step": 29287 }, { "epoch": 0.8976339340443791, "grad_norm": 1.7584414666834478, "learning_rate": 2.7228953616194155e-07, "loss": 0.5935, "step": 29288 }, { "epoch": 0.8976645825671202, "grad_norm": 2.537039823902325, "learning_rate": 2.721280080730992e-07, "loss": 0.63, "step": 29289 }, { "epoch": 0.8976952310898615, "grad_norm": 2.0701506950930852, "learning_rate": 2.719665265691901e-07, "loss": 0.4835, "step": 29290 }, { "epoch": 0.8977258796126026, "grad_norm": 1.9477853396115545, "learning_rate": 2.7180509165180337e-07, "loss": 0.6417, "step": 29291 }, { "epoch": 0.8977565281353439, "grad_norm": 1.8131152526203869, "learning_rate": 2.716437033225322e-07, "loss": 0.525, "step": 29292 }, { "epoch": 0.897787176658085, "grad_norm": 1.6856296247814015, "learning_rate": 2.7148236158296427e-07, "loss": 0.448, "step": 29293 }, { "epoch": 0.8978178251808263, "grad_norm": 1.8701655018144763, "learning_rate": 2.7132106643469103e-07, "loss": 0.558, "step": 29294 }, { "epoch": 0.8978484737035675, "grad_norm": 1.9031542420568397, "learning_rate": 2.711598178793012e-07, "loss": 0.6732, "step": 29295 }, { "epoch": 0.8978791222263087, "grad_norm": 2.1492021011795033, "learning_rate": 2.709986159183836e-07, "loss": 0.6344, "step": 29296 }, { "epoch": 0.8979097707490499, "grad_norm": 1.6678756952701714, "learning_rate": 2.7083746055352635e-07, "loss": 0.5725, "step": 29297 }, { "epoch": 0.8979404192717911, "grad_norm": 1.9628412083601219, "learning_rate": 2.7067635178631814e-07, "loss": 0.574, "step": 29298 }, { "epoch": 0.8979710677945323, "grad_norm": 1.873675405093256, "learning_rate": 2.7051528961834493e-07, "loss": 0.5696, "step": 29299 }, { "epoch": 0.8980017163172735, "grad_norm": 0.8024288846762553, "learning_rate": 2.703542740511961e-07, "loss": 0.3838, "step": 29300 }, { "epoch": 0.8980323648400147, "grad_norm": 1.7355224433481549, "learning_rate": 2.7019330508645526e-07, "loss": 0.4907, "step": 29301 }, { "epoch": 0.898063013362756, "grad_norm": 1.8566307859569602, "learning_rate": 2.700323827257106e-07, "loss": 0.6571, "step": 29302 }, { "epoch": 0.8980936618854971, "grad_norm": 2.3232313117382724, "learning_rate": 2.6987150697054764e-07, "loss": 0.6111, "step": 29303 }, { "epoch": 0.8981243104082384, "grad_norm": 0.7648395926318021, "learning_rate": 2.6971067782255e-07, "loss": 0.3634, "step": 29304 }, { "epoch": 0.8981549589309795, "grad_norm": 1.9924849663467847, "learning_rate": 2.695498952833037e-07, "loss": 0.5269, "step": 29305 }, { "epoch": 0.8981856074537208, "grad_norm": 0.8123740995418868, "learning_rate": 2.693891593543929e-07, "loss": 0.3767, "step": 29306 }, { "epoch": 0.8982162559764619, "grad_norm": 1.8188088965928029, "learning_rate": 2.6922847003740036e-07, "loss": 0.5303, "step": 29307 }, { "epoch": 0.8982469044992032, "grad_norm": 1.682590903986641, "learning_rate": 2.6906782733391036e-07, "loss": 0.415, "step": 29308 }, { "epoch": 0.8982775530219443, "grad_norm": 0.8300489150216056, "learning_rate": 2.689072312455066e-07, "loss": 0.3821, "step": 29309 }, { "epoch": 0.8983082015446855, "grad_norm": 1.8602072525758628, "learning_rate": 2.687466817737694e-07, "loss": 0.5352, "step": 29310 }, { "epoch": 0.8983388500674268, "grad_norm": 2.0386544483819016, "learning_rate": 2.6858617892028203e-07, "loss": 0.5607, "step": 29311 }, { "epoch": 0.8983694985901679, "grad_norm": 1.8246708811032046, "learning_rate": 2.6842572268662436e-07, "loss": 0.491, "step": 29312 }, { "epoch": 0.8984001471129092, "grad_norm": 1.8372061541033682, "learning_rate": 2.6826531307438066e-07, "loss": 0.5483, "step": 29313 }, { "epoch": 0.8984307956356503, "grad_norm": 1.8309730176317962, "learning_rate": 2.6810495008512907e-07, "loss": 0.5778, "step": 29314 }, { "epoch": 0.8984614441583916, "grad_norm": 0.8777603287102849, "learning_rate": 2.679446337204494e-07, "loss": 0.3877, "step": 29315 }, { "epoch": 0.8984920926811327, "grad_norm": 1.968805237942405, "learning_rate": 2.6778436398192165e-07, "loss": 0.6061, "step": 29316 }, { "epoch": 0.898522741203874, "grad_norm": 1.8762072269043844, "learning_rate": 2.6762414087112663e-07, "loss": 0.6021, "step": 29317 }, { "epoch": 0.8985533897266151, "grad_norm": 1.7182700929357089, "learning_rate": 2.6746396438964095e-07, "loss": 0.5208, "step": 29318 }, { "epoch": 0.8985840382493564, "grad_norm": 1.7533900403627338, "learning_rate": 2.673038345390433e-07, "loss": 0.5517, "step": 29319 }, { "epoch": 0.8986146867720975, "grad_norm": 2.046476171668499, "learning_rate": 2.671437513209124e-07, "loss": 0.5254, "step": 29320 }, { "epoch": 0.8986453352948388, "grad_norm": 1.768850135918499, "learning_rate": 2.669837147368254e-07, "loss": 0.5731, "step": 29321 }, { "epoch": 0.89867598381758, "grad_norm": 1.9428013058419196, "learning_rate": 2.6682372478835925e-07, "loss": 0.5828, "step": 29322 }, { "epoch": 0.8987066323403212, "grad_norm": 2.4515208256745464, "learning_rate": 2.666637814770884e-07, "loss": 0.5708, "step": 29323 }, { "epoch": 0.8987372808630624, "grad_norm": 1.8378581144843997, "learning_rate": 2.6650388480459143e-07, "loss": 0.6084, "step": 29324 }, { "epoch": 0.8987679293858036, "grad_norm": 0.7878784442635957, "learning_rate": 2.663440347724433e-07, "loss": 0.3772, "step": 29325 }, { "epoch": 0.8987985779085448, "grad_norm": 1.7460663948556883, "learning_rate": 2.661842313822177e-07, "loss": 0.5443, "step": 29326 }, { "epoch": 0.898829226431286, "grad_norm": 1.8990045374810351, "learning_rate": 2.660244746354901e-07, "loss": 0.6426, "step": 29327 }, { "epoch": 0.8988598749540272, "grad_norm": 1.9711059818719308, "learning_rate": 2.6586476453383526e-07, "loss": 0.5567, "step": 29328 }, { "epoch": 0.8988905234767685, "grad_norm": 2.160673210121862, "learning_rate": 2.657051010788253e-07, "loss": 0.5737, "step": 29329 }, { "epoch": 0.8989211719995096, "grad_norm": 1.6644767954771131, "learning_rate": 2.6554548427203455e-07, "loss": 0.6, "step": 29330 }, { "epoch": 0.8989518205222509, "grad_norm": 1.8479124204798558, "learning_rate": 2.65385914115035e-07, "loss": 0.5889, "step": 29331 }, { "epoch": 0.898982469044992, "grad_norm": 0.8140317126315945, "learning_rate": 2.652263906094005e-07, "loss": 0.4019, "step": 29332 }, { "epoch": 0.8990131175677333, "grad_norm": 2.2741951711784805, "learning_rate": 2.650669137567019e-07, "loss": 0.5483, "step": 29333 }, { "epoch": 0.8990437660904744, "grad_norm": 0.7557883786851174, "learning_rate": 2.6490748355850916e-07, "loss": 0.3746, "step": 29334 }, { "epoch": 0.8990744146132157, "grad_norm": 1.974357547962374, "learning_rate": 2.6474810001639594e-07, "loss": 0.4755, "step": 29335 }, { "epoch": 0.8991050631359568, "grad_norm": 1.837583193974551, "learning_rate": 2.645887631319311e-07, "loss": 0.5602, "step": 29336 }, { "epoch": 0.8991357116586981, "grad_norm": 1.7507385888559666, "learning_rate": 2.6442947290668374e-07, "loss": 0.5224, "step": 29337 }, { "epoch": 0.8991663601814393, "grad_norm": 1.824950924997863, "learning_rate": 2.6427022934222503e-07, "loss": 0.5685, "step": 29338 }, { "epoch": 0.8991970087041805, "grad_norm": 1.9881659815192378, "learning_rate": 2.64111032440123e-07, "loss": 0.5144, "step": 29339 }, { "epoch": 0.8992276572269217, "grad_norm": 1.7488096480028716, "learning_rate": 2.6395188220194767e-07, "loss": 0.5555, "step": 29340 }, { "epoch": 0.8992583057496628, "grad_norm": 1.717946652286305, "learning_rate": 2.6379277862926546e-07, "loss": 0.5722, "step": 29341 }, { "epoch": 0.8992889542724041, "grad_norm": 1.8212245143480394, "learning_rate": 2.6363372172364453e-07, "loss": 0.5888, "step": 29342 }, { "epoch": 0.8993196027951452, "grad_norm": 2.052400527400419, "learning_rate": 2.6347471148665373e-07, "loss": 0.5815, "step": 29343 }, { "epoch": 0.8993502513178865, "grad_norm": 1.763367645974218, "learning_rate": 2.633157479198578e-07, "loss": 0.5327, "step": 29344 }, { "epoch": 0.8993808998406276, "grad_norm": 0.7888614869862418, "learning_rate": 2.631568310248234e-07, "loss": 0.3913, "step": 29345 }, { "epoch": 0.8994115483633689, "grad_norm": 1.9705873492127277, "learning_rate": 2.629979608031169e-07, "loss": 0.6135, "step": 29346 }, { "epoch": 0.89944219688611, "grad_norm": 0.7630558614162967, "learning_rate": 2.6283913725630326e-07, "loss": 0.3778, "step": 29347 }, { "epoch": 0.8994728454088513, "grad_norm": 2.3946372260914908, "learning_rate": 2.626803603859479e-07, "loss": 0.5668, "step": 29348 }, { "epoch": 0.8995034939315925, "grad_norm": 1.8172657186923074, "learning_rate": 2.625216301936151e-07, "loss": 0.5813, "step": 29349 }, { "epoch": 0.8995341424543337, "grad_norm": 0.821854306545918, "learning_rate": 2.623629466808686e-07, "loss": 0.4156, "step": 29350 }, { "epoch": 0.8995647909770749, "grad_norm": 1.8632299858019898, "learning_rate": 2.622043098492727e-07, "loss": 0.4839, "step": 29351 }, { "epoch": 0.8995954394998161, "grad_norm": 2.139910118683589, "learning_rate": 2.620457197003901e-07, "loss": 0.5439, "step": 29352 }, { "epoch": 0.8996260880225573, "grad_norm": 1.721402435809283, "learning_rate": 2.618871762357816e-07, "loss": 0.5182, "step": 29353 }, { "epoch": 0.8996567365452985, "grad_norm": 2.1044016581994742, "learning_rate": 2.6172867945701284e-07, "loss": 0.5617, "step": 29354 }, { "epoch": 0.8996873850680397, "grad_norm": 0.8188717152654078, "learning_rate": 2.615702293656436e-07, "loss": 0.3729, "step": 29355 }, { "epoch": 0.899718033590781, "grad_norm": 2.016287781275154, "learning_rate": 2.6141182596323423e-07, "loss": 0.5254, "step": 29356 }, { "epoch": 0.8997486821135221, "grad_norm": 0.7875961325613866, "learning_rate": 2.612534692513469e-07, "loss": 0.3938, "step": 29357 }, { "epoch": 0.8997793306362634, "grad_norm": 0.7997941650222773, "learning_rate": 2.6109515923154137e-07, "loss": 0.4051, "step": 29358 }, { "epoch": 0.8998099791590045, "grad_norm": 1.7831829053322286, "learning_rate": 2.6093689590537877e-07, "loss": 0.592, "step": 29359 }, { "epoch": 0.8998406276817458, "grad_norm": 1.8694840207352492, "learning_rate": 2.6077867927441656e-07, "loss": 0.5628, "step": 29360 }, { "epoch": 0.8998712762044869, "grad_norm": 1.8831997882168001, "learning_rate": 2.6062050934021476e-07, "loss": 0.6631, "step": 29361 }, { "epoch": 0.8999019247272282, "grad_norm": 1.9508618062023444, "learning_rate": 2.604623861043326e-07, "loss": 0.6335, "step": 29362 }, { "epoch": 0.8999325732499693, "grad_norm": 1.9294040405434345, "learning_rate": 2.6030430956832664e-07, "loss": 0.6309, "step": 29363 }, { "epoch": 0.8999632217727106, "grad_norm": 2.0679107499345473, "learning_rate": 2.6014627973375395e-07, "loss": 0.6035, "step": 29364 }, { "epoch": 0.8999938702954517, "grad_norm": 0.7951516752034937, "learning_rate": 2.5998829660217383e-07, "loss": 0.3958, "step": 29365 }, { "epoch": 0.900024518818193, "grad_norm": 1.9276448152771783, "learning_rate": 2.5983036017514174e-07, "loss": 0.5886, "step": 29366 }, { "epoch": 0.9000551673409342, "grad_norm": 1.8012554251208501, "learning_rate": 2.596724704542142e-07, "loss": 0.4521, "step": 29367 }, { "epoch": 0.9000858158636754, "grad_norm": 1.9825147341736093, "learning_rate": 2.59514627440946e-07, "loss": 0.5854, "step": 29368 }, { "epoch": 0.9001164643864166, "grad_norm": 1.9446766477648025, "learning_rate": 2.5935683113689324e-07, "loss": 0.5665, "step": 29369 }, { "epoch": 0.9001471129091578, "grad_norm": 1.965927979952297, "learning_rate": 2.5919908154361076e-07, "loss": 0.5916, "step": 29370 }, { "epoch": 0.900177761431899, "grad_norm": 1.6811436851939405, "learning_rate": 2.5904137866265277e-07, "loss": 0.4474, "step": 29371 }, { "epoch": 0.9002084099546401, "grad_norm": 1.9016487325609823, "learning_rate": 2.5888372249557256e-07, "loss": 0.5261, "step": 29372 }, { "epoch": 0.9002390584773814, "grad_norm": 2.0781550725894595, "learning_rate": 2.5872611304392503e-07, "loss": 0.5881, "step": 29373 }, { "epoch": 0.9002697070001225, "grad_norm": 2.034869316094638, "learning_rate": 2.585685503092611e-07, "loss": 0.5525, "step": 29374 }, { "epoch": 0.9003003555228638, "grad_norm": 1.7596261519306777, "learning_rate": 2.5841103429313506e-07, "loss": 0.677, "step": 29375 }, { "epoch": 0.900331004045605, "grad_norm": 1.8220812186389388, "learning_rate": 2.5825356499709853e-07, "loss": 0.5664, "step": 29376 }, { "epoch": 0.9003616525683462, "grad_norm": 2.153720003096516, "learning_rate": 2.580961424227024e-07, "loss": 0.5621, "step": 29377 }, { "epoch": 0.9003923010910874, "grad_norm": 1.6025251943495533, "learning_rate": 2.5793876657149886e-07, "loss": 0.4647, "step": 29378 }, { "epoch": 0.9004229496138286, "grad_norm": 1.7066621383945344, "learning_rate": 2.5778143744503714e-07, "loss": 0.5864, "step": 29379 }, { "epoch": 0.9004535981365698, "grad_norm": 0.7927054019160985, "learning_rate": 2.5762415504486827e-07, "loss": 0.4117, "step": 29380 }, { "epoch": 0.900484246659311, "grad_norm": 2.071334451455699, "learning_rate": 2.5746691937254265e-07, "loss": 0.5881, "step": 29381 }, { "epoch": 0.9005148951820522, "grad_norm": 1.7647916858528423, "learning_rate": 2.573097304296085e-07, "loss": 0.5176, "step": 29382 }, { "epoch": 0.9005455437047934, "grad_norm": 1.803438795526948, "learning_rate": 2.571525882176146e-07, "loss": 0.5356, "step": 29383 }, { "epoch": 0.9005761922275346, "grad_norm": 1.7245548044178796, "learning_rate": 2.5699549273811075e-07, "loss": 0.5674, "step": 29384 }, { "epoch": 0.9006068407502759, "grad_norm": 2.0705041420913464, "learning_rate": 2.56838443992643e-07, "loss": 0.6318, "step": 29385 }, { "epoch": 0.900637489273017, "grad_norm": 2.092674537510572, "learning_rate": 2.566814419827601e-07, "loss": 0.6483, "step": 29386 }, { "epoch": 0.9006681377957583, "grad_norm": 2.0621977434215943, "learning_rate": 2.5652448671000916e-07, "loss": 0.5428, "step": 29387 }, { "epoch": 0.9006987863184994, "grad_norm": 1.8116227308457469, "learning_rate": 2.5636757817593506e-07, "loss": 0.4958, "step": 29388 }, { "epoch": 0.9007294348412407, "grad_norm": 1.9074803282691604, "learning_rate": 2.5621071638208597e-07, "loss": 0.4925, "step": 29389 }, { "epoch": 0.9007600833639818, "grad_norm": 2.0061463512388675, "learning_rate": 2.560539013300051e-07, "loss": 0.6087, "step": 29390 }, { "epoch": 0.9007907318867231, "grad_norm": 1.9445750102424375, "learning_rate": 2.5589713302123955e-07, "loss": 0.5615, "step": 29391 }, { "epoch": 0.9008213804094642, "grad_norm": 1.9174992446623618, "learning_rate": 2.557404114573342e-07, "loss": 0.4917, "step": 29392 }, { "epoch": 0.9008520289322055, "grad_norm": 1.9314902552030213, "learning_rate": 2.555837366398312e-07, "loss": 0.6029, "step": 29393 }, { "epoch": 0.9008826774549467, "grad_norm": 1.806848155555166, "learning_rate": 2.554271085702759e-07, "loss": 0.578, "step": 29394 }, { "epoch": 0.9009133259776879, "grad_norm": 1.8918266479797128, "learning_rate": 2.5527052725021204e-07, "loss": 0.4177, "step": 29395 }, { "epoch": 0.9009439745004291, "grad_norm": 2.010895033529152, "learning_rate": 2.5511399268118076e-07, "loss": 0.5247, "step": 29396 }, { "epoch": 0.9009746230231703, "grad_norm": 1.7905001641923357, "learning_rate": 2.5495750486472625e-07, "loss": 0.5693, "step": 29397 }, { "epoch": 0.9010052715459115, "grad_norm": 1.7813964860216478, "learning_rate": 2.5480106380238846e-07, "loss": 0.6053, "step": 29398 }, { "epoch": 0.9010359200686527, "grad_norm": 1.7986376048163764, "learning_rate": 2.5464466949571e-07, "loss": 0.5774, "step": 29399 }, { "epoch": 0.9010665685913939, "grad_norm": 0.7997195234637732, "learning_rate": 2.544883219462324e-07, "loss": 0.3985, "step": 29400 }, { "epoch": 0.9010972171141352, "grad_norm": 1.8738077494682301, "learning_rate": 2.5433202115549503e-07, "loss": 0.5548, "step": 29401 }, { "epoch": 0.9011278656368763, "grad_norm": 1.8106464642648479, "learning_rate": 2.541757671250389e-07, "loss": 0.5404, "step": 29402 }, { "epoch": 0.9011585141596175, "grad_norm": 0.794015348073414, "learning_rate": 2.5401955985640323e-07, "loss": 0.3825, "step": 29403 }, { "epoch": 0.9011891626823587, "grad_norm": 1.907340292992794, "learning_rate": 2.5386339935112694e-07, "loss": 0.5408, "step": 29404 }, { "epoch": 0.9012198112050999, "grad_norm": 1.792771529545859, "learning_rate": 2.537072856107486e-07, "loss": 0.5539, "step": 29405 }, { "epoch": 0.9012504597278411, "grad_norm": 1.7477001476210643, "learning_rate": 2.535512186368072e-07, "loss": 0.5328, "step": 29406 }, { "epoch": 0.9012811082505823, "grad_norm": 1.7617717599627931, "learning_rate": 2.533951984308397e-07, "loss": 0.5602, "step": 29407 }, { "epoch": 0.9013117567733235, "grad_norm": 0.7879095708391005, "learning_rate": 2.532392249943849e-07, "loss": 0.3916, "step": 29408 }, { "epoch": 0.9013424052960647, "grad_norm": 1.8484681729398935, "learning_rate": 2.5308329832897715e-07, "loss": 0.5616, "step": 29409 }, { "epoch": 0.901373053818806, "grad_norm": 1.8061554172900018, "learning_rate": 2.5292741843615466e-07, "loss": 0.4948, "step": 29410 }, { "epoch": 0.9014037023415471, "grad_norm": 0.8399638944021208, "learning_rate": 2.527715853174534e-07, "loss": 0.3995, "step": 29411 }, { "epoch": 0.9014343508642884, "grad_norm": 1.8860845377713058, "learning_rate": 2.526157989744077e-07, "loss": 0.4736, "step": 29412 }, { "epoch": 0.9014649993870295, "grad_norm": 1.862761563260096, "learning_rate": 2.5246005940855303e-07, "loss": 0.5574, "step": 29413 }, { "epoch": 0.9014956479097708, "grad_norm": 1.977199363846384, "learning_rate": 2.523043666214248e-07, "loss": 0.5839, "step": 29414 }, { "epoch": 0.9015262964325119, "grad_norm": 1.9609410065897774, "learning_rate": 2.521487206145562e-07, "loss": 0.6975, "step": 29415 }, { "epoch": 0.9015569449552532, "grad_norm": 1.7983322882964656, "learning_rate": 2.5199312138948053e-07, "loss": 0.4803, "step": 29416 }, { "epoch": 0.9015875934779943, "grad_norm": 2.1252316046912854, "learning_rate": 2.518375689477326e-07, "loss": 0.553, "step": 29417 }, { "epoch": 0.9016182420007356, "grad_norm": 1.9745249234692124, "learning_rate": 2.516820632908429e-07, "loss": 0.6307, "step": 29418 }, { "epoch": 0.9016488905234767, "grad_norm": 2.0013549010016813, "learning_rate": 2.515266044203457e-07, "loss": 0.5272, "step": 29419 }, { "epoch": 0.901679539046218, "grad_norm": 2.1778345451434555, "learning_rate": 2.5137119233776984e-07, "loss": 0.555, "step": 29420 }, { "epoch": 0.9017101875689592, "grad_norm": 1.8781103962663663, "learning_rate": 2.5121582704465076e-07, "loss": 0.606, "step": 29421 }, { "epoch": 0.9017408360917004, "grad_norm": 1.9472075164985834, "learning_rate": 2.510605085425166e-07, "loss": 0.5341, "step": 29422 }, { "epoch": 0.9017714846144416, "grad_norm": 0.8118054184815704, "learning_rate": 2.509052368328979e-07, "loss": 0.3736, "step": 29423 }, { "epoch": 0.9018021331371828, "grad_norm": 1.8734659587719824, "learning_rate": 2.5075001191732507e-07, "loss": 0.5994, "step": 29424 }, { "epoch": 0.901832781659924, "grad_norm": 1.7172823605665353, "learning_rate": 2.5059483379732797e-07, "loss": 0.5948, "step": 29425 }, { "epoch": 0.9018634301826652, "grad_norm": 1.9974334527987159, "learning_rate": 2.5043970247443484e-07, "loss": 0.5505, "step": 29426 }, { "epoch": 0.9018940787054064, "grad_norm": 1.9740646599988825, "learning_rate": 2.5028461795017446e-07, "loss": 0.5591, "step": 29427 }, { "epoch": 0.9019247272281476, "grad_norm": 0.8160560431418603, "learning_rate": 2.5012958022607446e-07, "loss": 0.4086, "step": 29428 }, { "epoch": 0.9019553757508888, "grad_norm": 1.917393451737605, "learning_rate": 2.4997458930366425e-07, "loss": 0.5937, "step": 29429 }, { "epoch": 0.9019860242736301, "grad_norm": 1.8824578420171616, "learning_rate": 2.498196451844698e-07, "loss": 0.5255, "step": 29430 }, { "epoch": 0.9020166727963712, "grad_norm": 1.9169970527952531, "learning_rate": 2.4966474787001596e-07, "loss": 0.61, "step": 29431 }, { "epoch": 0.9020473213191125, "grad_norm": 1.8863135627219374, "learning_rate": 2.4950989736183264e-07, "loss": 0.5763, "step": 29432 }, { "epoch": 0.9020779698418536, "grad_norm": 0.7538837436968295, "learning_rate": 2.493550936614436e-07, "loss": 0.4004, "step": 29433 }, { "epoch": 0.9021086183645948, "grad_norm": 1.9092182533262825, "learning_rate": 2.4920033677037327e-07, "loss": 0.5961, "step": 29434 }, { "epoch": 0.902139266887336, "grad_norm": 2.020810364652536, "learning_rate": 2.490456266901481e-07, "loss": 0.5431, "step": 29435 }, { "epoch": 0.9021699154100772, "grad_norm": 1.6698285019548913, "learning_rate": 2.4889096342229246e-07, "loss": 0.5006, "step": 29436 }, { "epoch": 0.9022005639328184, "grad_norm": 2.1326999036269485, "learning_rate": 2.4873634696832904e-07, "loss": 0.5988, "step": 29437 }, { "epoch": 0.9022312124555596, "grad_norm": 1.8417496244321665, "learning_rate": 2.485817773297816e-07, "loss": 0.5661, "step": 29438 }, { "epoch": 0.9022618609783009, "grad_norm": 1.8968550098155506, "learning_rate": 2.484272545081745e-07, "loss": 0.5403, "step": 29439 }, { "epoch": 0.902292509501042, "grad_norm": 1.7763696019699526, "learning_rate": 2.4827277850502926e-07, "loss": 0.4974, "step": 29440 }, { "epoch": 0.9023231580237833, "grad_norm": 1.8361257338309793, "learning_rate": 2.481183493218686e-07, "loss": 0.478, "step": 29441 }, { "epoch": 0.9023538065465244, "grad_norm": 0.8042319736745163, "learning_rate": 2.4796396696021295e-07, "loss": 0.4004, "step": 29442 }, { "epoch": 0.9023844550692657, "grad_norm": 1.8751747116930264, "learning_rate": 2.478096314215844e-07, "loss": 0.5469, "step": 29443 }, { "epoch": 0.9024151035920068, "grad_norm": 1.9113253444323393, "learning_rate": 2.4765534270750404e-07, "loss": 0.5072, "step": 29444 }, { "epoch": 0.9024457521147481, "grad_norm": 2.0758465194791595, "learning_rate": 2.4750110081949054e-07, "loss": 0.6721, "step": 29445 }, { "epoch": 0.9024764006374892, "grad_norm": 1.6742589868509319, "learning_rate": 2.473469057590644e-07, "loss": 0.4391, "step": 29446 }, { "epoch": 0.9025070491602305, "grad_norm": 2.1770662777498724, "learning_rate": 2.471927575277461e-07, "loss": 0.6304, "step": 29447 }, { "epoch": 0.9025376976829717, "grad_norm": 0.8422081332340827, "learning_rate": 2.470386561270538e-07, "loss": 0.4152, "step": 29448 }, { "epoch": 0.9025683462057129, "grad_norm": 1.6746994673491928, "learning_rate": 2.468846015585058e-07, "loss": 0.5519, "step": 29449 }, { "epoch": 0.9025989947284541, "grad_norm": 2.0206674401710614, "learning_rate": 2.4673059382361806e-07, "loss": 0.5898, "step": 29450 }, { "epoch": 0.9026296432511953, "grad_norm": 1.7158748448013528, "learning_rate": 2.465766329239122e-07, "loss": 0.5373, "step": 29451 }, { "epoch": 0.9026602917739365, "grad_norm": 0.8176785385661951, "learning_rate": 2.464227188609025e-07, "loss": 0.4048, "step": 29452 }, { "epoch": 0.9026909402966777, "grad_norm": 1.9442107117819698, "learning_rate": 2.462688516361056e-07, "loss": 0.5772, "step": 29453 }, { "epoch": 0.9027215888194189, "grad_norm": 2.080158757138292, "learning_rate": 2.4611503125103744e-07, "loss": 0.4934, "step": 29454 }, { "epoch": 0.9027522373421601, "grad_norm": 1.6291977109813902, "learning_rate": 2.4596125770721456e-07, "loss": 0.4695, "step": 29455 }, { "epoch": 0.9027828858649013, "grad_norm": 2.0163140868559593, "learning_rate": 2.458075310061525e-07, "loss": 0.5429, "step": 29456 }, { "epoch": 0.9028135343876426, "grad_norm": 1.818212276675911, "learning_rate": 2.45653851149365e-07, "loss": 0.5704, "step": 29457 }, { "epoch": 0.9028441829103837, "grad_norm": 2.207387352266369, "learning_rate": 2.4550021813836587e-07, "loss": 0.5997, "step": 29458 }, { "epoch": 0.902874831433125, "grad_norm": 1.6281146939152613, "learning_rate": 2.4534663197467056e-07, "loss": 0.5735, "step": 29459 }, { "epoch": 0.9029054799558661, "grad_norm": 1.9725324374677429, "learning_rate": 2.451930926597912e-07, "loss": 0.5906, "step": 29460 }, { "epoch": 0.9029361284786074, "grad_norm": 1.6979282839001644, "learning_rate": 2.450396001952399e-07, "loss": 0.5724, "step": 29461 }, { "epoch": 0.9029667770013485, "grad_norm": 1.7249236409294697, "learning_rate": 2.4488615458253096e-07, "loss": 0.5683, "step": 29462 }, { "epoch": 0.9029974255240898, "grad_norm": 2.07276738835748, "learning_rate": 2.4473275582317545e-07, "loss": 0.4651, "step": 29463 }, { "epoch": 0.9030280740468309, "grad_norm": 1.9613465314108285, "learning_rate": 2.445794039186844e-07, "loss": 0.5882, "step": 29464 }, { "epoch": 0.9030587225695721, "grad_norm": 0.8136020698937046, "learning_rate": 2.4442609887056935e-07, "loss": 0.4105, "step": 29465 }, { "epoch": 0.9030893710923134, "grad_norm": 1.8475252406031069, "learning_rate": 2.442728406803402e-07, "loss": 0.5718, "step": 29466 }, { "epoch": 0.9031200196150545, "grad_norm": 1.9662172829950688, "learning_rate": 2.4411962934950853e-07, "loss": 0.5516, "step": 29467 }, { "epoch": 0.9031506681377958, "grad_norm": 1.6875612636722273, "learning_rate": 2.4396646487958195e-07, "loss": 0.5711, "step": 29468 }, { "epoch": 0.9031813166605369, "grad_norm": 1.201593389967192, "learning_rate": 2.438133472720711e-07, "loss": 0.3967, "step": 29469 }, { "epoch": 0.9032119651832782, "grad_norm": 1.9661142732964016, "learning_rate": 2.4366027652848513e-07, "loss": 0.5929, "step": 29470 }, { "epoch": 0.9032426137060193, "grad_norm": 1.8214341153360438, "learning_rate": 2.435072526503307e-07, "loss": 0.5576, "step": 29471 }, { "epoch": 0.9032732622287606, "grad_norm": 1.8813825548986, "learning_rate": 2.433542756391155e-07, "loss": 0.5236, "step": 29472 }, { "epoch": 0.9033039107515017, "grad_norm": 1.9661849247954888, "learning_rate": 2.432013454963489e-07, "loss": 0.5525, "step": 29473 }, { "epoch": 0.903334559274243, "grad_norm": 1.8097574387598903, "learning_rate": 2.4304846222353573e-07, "loss": 0.6318, "step": 29474 }, { "epoch": 0.9033652077969841, "grad_norm": 2.0577437631252202, "learning_rate": 2.428956258221843e-07, "loss": 0.6743, "step": 29475 }, { "epoch": 0.9033958563197254, "grad_norm": 1.9907759450873566, "learning_rate": 2.4274283629379833e-07, "loss": 0.5463, "step": 29476 }, { "epoch": 0.9034265048424666, "grad_norm": 1.9711975061861815, "learning_rate": 2.4259009363988397e-07, "loss": 0.4881, "step": 29477 }, { "epoch": 0.9034571533652078, "grad_norm": 0.7958039164707195, "learning_rate": 2.424373978619482e-07, "loss": 0.3874, "step": 29478 }, { "epoch": 0.903487801887949, "grad_norm": 1.9473212825871435, "learning_rate": 2.4228474896149266e-07, "loss": 0.5297, "step": 29479 }, { "epoch": 0.9035184504106902, "grad_norm": 1.699238830618576, "learning_rate": 2.421321469400234e-07, "loss": 0.566, "step": 29480 }, { "epoch": 0.9035490989334314, "grad_norm": 1.8722191453541133, "learning_rate": 2.419795917990436e-07, "loss": 0.612, "step": 29481 }, { "epoch": 0.9035797474561726, "grad_norm": 2.1456604959659193, "learning_rate": 2.4182708354005656e-07, "loss": 0.6225, "step": 29482 }, { "epoch": 0.9036103959789138, "grad_norm": 1.9317552542372027, "learning_rate": 2.4167462216456326e-07, "loss": 0.6313, "step": 29483 }, { "epoch": 0.903641044501655, "grad_norm": 1.7684947266335724, "learning_rate": 2.4152220767406863e-07, "loss": 0.6548, "step": 29484 }, { "epoch": 0.9036716930243962, "grad_norm": 0.7605769032644377, "learning_rate": 2.413698400700726e-07, "loss": 0.3913, "step": 29485 }, { "epoch": 0.9037023415471375, "grad_norm": 1.9735958347673617, "learning_rate": 2.4121751935407776e-07, "loss": 0.5751, "step": 29486 }, { "epoch": 0.9037329900698786, "grad_norm": 1.9778329804457462, "learning_rate": 2.4106524552758414e-07, "loss": 0.6775, "step": 29487 }, { "epoch": 0.9037636385926199, "grad_norm": 1.8412232381758866, "learning_rate": 2.409130185920916e-07, "loss": 0.5474, "step": 29488 }, { "epoch": 0.903794287115361, "grad_norm": 0.7758693577762805, "learning_rate": 2.407608385491017e-07, "loss": 0.4043, "step": 29489 }, { "epoch": 0.9038249356381023, "grad_norm": 1.832335523321538, "learning_rate": 2.4060870540011216e-07, "loss": 0.5154, "step": 29490 }, { "epoch": 0.9038555841608434, "grad_norm": 1.9435708635257127, "learning_rate": 2.404566191466229e-07, "loss": 0.5685, "step": 29491 }, { "epoch": 0.9038862326835847, "grad_norm": 1.921153707109997, "learning_rate": 2.4030457979013265e-07, "loss": 0.5513, "step": 29492 }, { "epoch": 0.9039168812063259, "grad_norm": 1.742086214875164, "learning_rate": 2.401525873321392e-07, "loss": 0.4814, "step": 29493 }, { "epoch": 0.9039475297290671, "grad_norm": 1.851663810751358, "learning_rate": 2.400006417741402e-07, "loss": 0.4825, "step": 29494 }, { "epoch": 0.9039781782518083, "grad_norm": 1.8601145580906453, "learning_rate": 2.398487431176327e-07, "loss": 0.5949, "step": 29495 }, { "epoch": 0.9040088267745494, "grad_norm": 1.6975320992932015, "learning_rate": 2.396968913641129e-07, "loss": 0.584, "step": 29496 }, { "epoch": 0.9040394752972907, "grad_norm": 1.782429001363504, "learning_rate": 2.3954508651507837e-07, "loss": 0.6105, "step": 29497 }, { "epoch": 0.9040701238200318, "grad_norm": 2.0458841275196984, "learning_rate": 2.3939332857202404e-07, "loss": 0.6518, "step": 29498 }, { "epoch": 0.9041007723427731, "grad_norm": 2.1911065947095127, "learning_rate": 2.392416175364448e-07, "loss": 0.6117, "step": 29499 }, { "epoch": 0.9041314208655142, "grad_norm": 1.952338590924274, "learning_rate": 2.390899534098368e-07, "loss": 0.574, "step": 29500 }, { "epoch": 0.9041620693882555, "grad_norm": 2.1218995005799757, "learning_rate": 2.3893833619369255e-07, "loss": 0.5729, "step": 29501 }, { "epoch": 0.9041927179109966, "grad_norm": 1.9202135973056416, "learning_rate": 2.387867658895077e-07, "loss": 0.5543, "step": 29502 }, { "epoch": 0.9042233664337379, "grad_norm": 1.8440802302016486, "learning_rate": 2.386352424987753e-07, "loss": 0.5526, "step": 29503 }, { "epoch": 0.9042540149564791, "grad_norm": 1.9993345522828774, "learning_rate": 2.3848376602298716e-07, "loss": 0.6141, "step": 29504 }, { "epoch": 0.9042846634792203, "grad_norm": 1.9828285795850877, "learning_rate": 2.3833233646363806e-07, "loss": 0.6286, "step": 29505 }, { "epoch": 0.9043153120019615, "grad_norm": 0.8282248330474796, "learning_rate": 2.3818095382221795e-07, "loss": 0.3732, "step": 29506 }, { "epoch": 0.9043459605247027, "grad_norm": 1.9482144556456567, "learning_rate": 2.3802961810021896e-07, "loss": 0.5442, "step": 29507 }, { "epoch": 0.9043766090474439, "grad_norm": 1.780710681180047, "learning_rate": 2.3787832929913324e-07, "loss": 0.5164, "step": 29508 }, { "epoch": 0.9044072575701851, "grad_norm": 1.8621492458587594, "learning_rate": 2.377270874204507e-07, "loss": 0.535, "step": 29509 }, { "epoch": 0.9044379060929263, "grad_norm": 2.0007043851554904, "learning_rate": 2.3757589246566127e-07, "loss": 0.6485, "step": 29510 }, { "epoch": 0.9044685546156676, "grad_norm": 2.0356713746252506, "learning_rate": 2.374247444362554e-07, "loss": 0.654, "step": 29511 }, { "epoch": 0.9044992031384087, "grad_norm": 0.8116422430553808, "learning_rate": 2.3727364333372194e-07, "loss": 0.3924, "step": 29512 }, { "epoch": 0.90452985166115, "grad_norm": 1.885093935017345, "learning_rate": 2.3712258915954966e-07, "loss": 0.4683, "step": 29513 }, { "epoch": 0.9045605001838911, "grad_norm": 1.8978456850830852, "learning_rate": 2.369715819152274e-07, "loss": 0.5437, "step": 29514 }, { "epoch": 0.9045911487066324, "grad_norm": 1.9840338154807478, "learning_rate": 2.3682062160224284e-07, "loss": 0.7028, "step": 29515 }, { "epoch": 0.9046217972293735, "grad_norm": 1.8777241542666374, "learning_rate": 2.366697082220837e-07, "loss": 0.4991, "step": 29516 }, { "epoch": 0.9046524457521148, "grad_norm": 1.7516969989224045, "learning_rate": 2.3651884177623596e-07, "loss": 0.5069, "step": 29517 }, { "epoch": 0.9046830942748559, "grad_norm": 1.845781739718388, "learning_rate": 2.3636802226618737e-07, "loss": 0.5056, "step": 29518 }, { "epoch": 0.9047137427975972, "grad_norm": 1.7367623123851994, "learning_rate": 2.3621724969342342e-07, "loss": 0.5589, "step": 29519 }, { "epoch": 0.9047443913203383, "grad_norm": 1.8355157037925152, "learning_rate": 2.3606652405942954e-07, "loss": 0.5237, "step": 29520 }, { "epoch": 0.9047750398430796, "grad_norm": 0.8022998045358689, "learning_rate": 2.3591584536569123e-07, "loss": 0.3994, "step": 29521 }, { "epoch": 0.9048056883658208, "grad_norm": 0.8156995692598492, "learning_rate": 2.3576521361369342e-07, "loss": 0.41, "step": 29522 }, { "epoch": 0.904836336888562, "grad_norm": 1.9368861028354314, "learning_rate": 2.3561462880491935e-07, "loss": 0.4907, "step": 29523 }, { "epoch": 0.9048669854113032, "grad_norm": 0.8009440194434957, "learning_rate": 2.3546409094085342e-07, "loss": 0.4171, "step": 29524 }, { "epoch": 0.9048976339340444, "grad_norm": 0.7814456713758997, "learning_rate": 2.3531360002297944e-07, "loss": 0.3847, "step": 29525 }, { "epoch": 0.9049282824567856, "grad_norm": 2.0360996091905874, "learning_rate": 2.3516315605277895e-07, "loss": 0.5676, "step": 29526 }, { "epoch": 0.9049589309795267, "grad_norm": 1.6740919486700439, "learning_rate": 2.3501275903173582e-07, "loss": 0.4981, "step": 29527 }, { "epoch": 0.904989579502268, "grad_norm": 1.9735673923583195, "learning_rate": 2.3486240896132996e-07, "loss": 0.5826, "step": 29528 }, { "epoch": 0.9050202280250091, "grad_norm": 0.7861530512545438, "learning_rate": 2.3471210584304514e-07, "loss": 0.3956, "step": 29529 }, { "epoch": 0.9050508765477504, "grad_norm": 1.7682131529199474, "learning_rate": 2.3456184967836138e-07, "loss": 0.4547, "step": 29530 }, { "epoch": 0.9050815250704916, "grad_norm": 1.847498407751274, "learning_rate": 2.3441164046875797e-07, "loss": 0.5748, "step": 29531 }, { "epoch": 0.9051121735932328, "grad_norm": 1.925393771731317, "learning_rate": 2.34261478215716e-07, "loss": 0.5609, "step": 29532 }, { "epoch": 0.905142822115974, "grad_norm": 0.8842771463438754, "learning_rate": 2.341113629207159e-07, "loss": 0.3964, "step": 29533 }, { "epoch": 0.9051734706387152, "grad_norm": 1.895421357959315, "learning_rate": 2.339612945852354e-07, "loss": 0.5703, "step": 29534 }, { "epoch": 0.9052041191614564, "grad_norm": 1.7977972475430273, "learning_rate": 2.3381127321075338e-07, "loss": 0.594, "step": 29535 }, { "epoch": 0.9052347676841976, "grad_norm": 2.1695012073260562, "learning_rate": 2.3366129879874965e-07, "loss": 0.5893, "step": 29536 }, { "epoch": 0.9052654162069388, "grad_norm": 1.8898681185164912, "learning_rate": 2.3351137135069922e-07, "loss": 0.5794, "step": 29537 }, { "epoch": 0.90529606472968, "grad_norm": 1.8192502510805875, "learning_rate": 2.3336149086808203e-07, "loss": 0.5623, "step": 29538 }, { "epoch": 0.9053267132524212, "grad_norm": 1.9793067806750653, "learning_rate": 2.3321165735237294e-07, "loss": 0.5441, "step": 29539 }, { "epoch": 0.9053573617751625, "grad_norm": 1.8244210863240913, "learning_rate": 2.330618708050486e-07, "loss": 0.6479, "step": 29540 }, { "epoch": 0.9053880102979036, "grad_norm": 1.7158800466254744, "learning_rate": 2.329121312275867e-07, "loss": 0.4929, "step": 29541 }, { "epoch": 0.9054186588206449, "grad_norm": 1.991738060286121, "learning_rate": 2.3276243862145998e-07, "loss": 0.6331, "step": 29542 }, { "epoch": 0.905449307343386, "grad_norm": 0.8285402323810862, "learning_rate": 2.32612792988145e-07, "loss": 0.3807, "step": 29543 }, { "epoch": 0.9054799558661273, "grad_norm": 1.9076014505768668, "learning_rate": 2.324631943291167e-07, "loss": 0.5672, "step": 29544 }, { "epoch": 0.9055106043888684, "grad_norm": 0.836543033107424, "learning_rate": 2.3231364264584721e-07, "loss": 0.4222, "step": 29545 }, { "epoch": 0.9055412529116097, "grad_norm": 1.7288434590992463, "learning_rate": 2.3216413793981207e-07, "loss": 0.4667, "step": 29546 }, { "epoch": 0.9055719014343508, "grad_norm": 1.7292097116545353, "learning_rate": 2.3201468021248285e-07, "loss": 0.6092, "step": 29547 }, { "epoch": 0.9056025499570921, "grad_norm": 1.7789265115110935, "learning_rate": 2.3186526946533395e-07, "loss": 0.6232, "step": 29548 }, { "epoch": 0.9056331984798333, "grad_norm": 0.7825880743657075, "learning_rate": 2.3171590569983636e-07, "loss": 0.3932, "step": 29549 }, { "epoch": 0.9056638470025745, "grad_norm": 2.1290324152245432, "learning_rate": 2.315665889174612e-07, "loss": 0.492, "step": 29550 }, { "epoch": 0.9056944955253157, "grad_norm": 1.7373629979082401, "learning_rate": 2.3141731911968057e-07, "loss": 0.6239, "step": 29551 }, { "epoch": 0.9057251440480569, "grad_norm": 1.7976614127190562, "learning_rate": 2.312680963079661e-07, "loss": 0.5938, "step": 29552 }, { "epoch": 0.9057557925707981, "grad_norm": 0.7828700745842545, "learning_rate": 2.311189204837866e-07, "loss": 0.3859, "step": 29553 }, { "epoch": 0.9057864410935393, "grad_norm": 1.9619329457332744, "learning_rate": 2.30969791648612e-07, "loss": 0.463, "step": 29554 }, { "epoch": 0.9058170896162805, "grad_norm": 2.090321698504476, "learning_rate": 2.308207098039128e-07, "loss": 0.5283, "step": 29555 }, { "epoch": 0.9058477381390218, "grad_norm": 2.0413650610409255, "learning_rate": 2.3067167495115783e-07, "loss": 0.5041, "step": 29556 }, { "epoch": 0.9058783866617629, "grad_norm": 1.9291783872362211, "learning_rate": 2.3052268709181536e-07, "loss": 0.5238, "step": 29557 }, { "epoch": 0.905909035184504, "grad_norm": 1.9784804038661836, "learning_rate": 2.3037374622735143e-07, "loss": 0.6061, "step": 29558 }, { "epoch": 0.9059396837072453, "grad_norm": 2.5340407495836548, "learning_rate": 2.3022485235923708e-07, "loss": 0.5834, "step": 29559 }, { "epoch": 0.9059703322299865, "grad_norm": 0.7979483952763403, "learning_rate": 2.3007600548893727e-07, "loss": 0.3942, "step": 29560 }, { "epoch": 0.9060009807527277, "grad_norm": 1.8697796850851613, "learning_rate": 2.299272056179186e-07, "loss": 0.5323, "step": 29561 }, { "epoch": 0.9060316292754689, "grad_norm": 1.855315593746743, "learning_rate": 2.2977845274764764e-07, "loss": 0.5949, "step": 29562 }, { "epoch": 0.9060622777982101, "grad_norm": 1.7548875676594622, "learning_rate": 2.296297468795905e-07, "loss": 0.522, "step": 29563 }, { "epoch": 0.9060929263209513, "grad_norm": 2.103972639802503, "learning_rate": 2.2948108801521207e-07, "loss": 0.6001, "step": 29564 }, { "epoch": 0.9061235748436925, "grad_norm": 1.6686320565298305, "learning_rate": 2.293324761559762e-07, "loss": 0.5281, "step": 29565 }, { "epoch": 0.9061542233664337, "grad_norm": 1.8712687609668126, "learning_rate": 2.2918391130334838e-07, "loss": 0.5802, "step": 29566 }, { "epoch": 0.906184871889175, "grad_norm": 0.7991883872580141, "learning_rate": 2.29035393458793e-07, "loss": 0.386, "step": 29567 }, { "epoch": 0.9062155204119161, "grad_norm": 0.7697306878030268, "learning_rate": 2.2888692262377276e-07, "loss": 0.3799, "step": 29568 }, { "epoch": 0.9062461689346574, "grad_norm": 2.0838491681246945, "learning_rate": 2.2873849879974874e-07, "loss": 0.6465, "step": 29569 }, { "epoch": 0.9062768174573985, "grad_norm": 1.7021274863797589, "learning_rate": 2.285901219881864e-07, "loss": 0.6003, "step": 29570 }, { "epoch": 0.9063074659801398, "grad_norm": 1.9065421986822464, "learning_rate": 2.284417921905463e-07, "loss": 0.5958, "step": 29571 }, { "epoch": 0.9063381145028809, "grad_norm": 2.202675403679364, "learning_rate": 2.2829350940828943e-07, "loss": 0.5143, "step": 29572 }, { "epoch": 0.9063687630256222, "grad_norm": 2.060823939313047, "learning_rate": 2.2814527364287796e-07, "loss": 0.651, "step": 29573 }, { "epoch": 0.9063994115483633, "grad_norm": 1.8429939041116405, "learning_rate": 2.2799708489577187e-07, "loss": 0.4929, "step": 29574 }, { "epoch": 0.9064300600711046, "grad_norm": 1.9581968316850797, "learning_rate": 2.2784894316843165e-07, "loss": 0.5228, "step": 29575 }, { "epoch": 0.9064607085938458, "grad_norm": 1.8525762677618143, "learning_rate": 2.2770084846231666e-07, "loss": 0.5271, "step": 29576 }, { "epoch": 0.906491357116587, "grad_norm": 1.6915501542812297, "learning_rate": 2.275528007788863e-07, "loss": 0.4368, "step": 29577 }, { "epoch": 0.9065220056393282, "grad_norm": 2.042675236116029, "learning_rate": 2.2740480011959942e-07, "loss": 0.5907, "step": 29578 }, { "epoch": 0.9065526541620694, "grad_norm": 1.8603608821217748, "learning_rate": 2.2725684648591427e-07, "loss": 0.5651, "step": 29579 }, { "epoch": 0.9065833026848106, "grad_norm": 1.8686138978540143, "learning_rate": 2.271089398792875e-07, "loss": 0.6317, "step": 29580 }, { "epoch": 0.9066139512075518, "grad_norm": 0.7857433063011086, "learning_rate": 2.2696108030117902e-07, "loss": 0.379, "step": 29581 }, { "epoch": 0.906644599730293, "grad_norm": 1.7857403586860505, "learning_rate": 2.2681326775304323e-07, "loss": 0.5314, "step": 29582 }, { "epoch": 0.9066752482530342, "grad_norm": 2.0898090456271565, "learning_rate": 2.2666550223633844e-07, "loss": 0.582, "step": 29583 }, { "epoch": 0.9067058967757754, "grad_norm": 1.664774928922017, "learning_rate": 2.2651778375251897e-07, "loss": 0.5491, "step": 29584 }, { "epoch": 0.9067365452985167, "grad_norm": 2.1269440344464425, "learning_rate": 2.263701123030415e-07, "loss": 0.6582, "step": 29585 }, { "epoch": 0.9067671938212578, "grad_norm": 1.9362287090029398, "learning_rate": 2.2622248788936098e-07, "loss": 0.547, "step": 29586 }, { "epoch": 0.9067978423439991, "grad_norm": 2.5289401863364818, "learning_rate": 2.2607491051293119e-07, "loss": 0.5223, "step": 29587 }, { "epoch": 0.9068284908667402, "grad_norm": 1.8114882017612313, "learning_rate": 2.2592738017520655e-07, "loss": 0.4911, "step": 29588 }, { "epoch": 0.9068591393894814, "grad_norm": 2.0953288587330894, "learning_rate": 2.25779896877642e-07, "loss": 0.5193, "step": 29589 }, { "epoch": 0.9068897879122226, "grad_norm": 0.7914356177794579, "learning_rate": 2.256324606216892e-07, "loss": 0.3814, "step": 29590 }, { "epoch": 0.9069204364349638, "grad_norm": 0.7745092793965092, "learning_rate": 2.2548507140880081e-07, "loss": 0.4081, "step": 29591 }, { "epoch": 0.906951084957705, "grad_norm": 2.1039462546733763, "learning_rate": 2.253377292404296e-07, "loss": 0.5496, "step": 29592 }, { "epoch": 0.9069817334804462, "grad_norm": 2.059893222780893, "learning_rate": 2.2519043411802777e-07, "loss": 0.5177, "step": 29593 }, { "epoch": 0.9070123820031875, "grad_norm": 1.937094801450269, "learning_rate": 2.2504318604304687e-07, "loss": 0.6126, "step": 29594 }, { "epoch": 0.9070430305259286, "grad_norm": 1.9688867912968178, "learning_rate": 2.2489598501693632e-07, "loss": 0.5881, "step": 29595 }, { "epoch": 0.9070736790486699, "grad_norm": 1.8389831092881004, "learning_rate": 2.2474883104114719e-07, "loss": 0.6696, "step": 29596 }, { "epoch": 0.907104327571411, "grad_norm": 2.161116497579345, "learning_rate": 2.2460172411713054e-07, "loss": 0.5097, "step": 29597 }, { "epoch": 0.9071349760941523, "grad_norm": 1.707736254560601, "learning_rate": 2.244546642463352e-07, "loss": 0.5046, "step": 29598 }, { "epoch": 0.9071656246168934, "grad_norm": 1.9175773773731073, "learning_rate": 2.2430765143020783e-07, "loss": 0.5801, "step": 29599 }, { "epoch": 0.9071962731396347, "grad_norm": 1.6738489542752775, "learning_rate": 2.241606856702011e-07, "loss": 0.5022, "step": 29600 }, { "epoch": 0.9072269216623758, "grad_norm": 2.1045868223694275, "learning_rate": 2.2401376696776e-07, "loss": 0.6283, "step": 29601 }, { "epoch": 0.9072575701851171, "grad_norm": 0.7195310877894786, "learning_rate": 2.2386689532433447e-07, "loss": 0.3706, "step": 29602 }, { "epoch": 0.9072882187078583, "grad_norm": 1.68175939725784, "learning_rate": 2.237200707413695e-07, "loss": 0.5457, "step": 29603 }, { "epoch": 0.9073188672305995, "grad_norm": 1.8086070440157727, "learning_rate": 2.2357329322031273e-07, "loss": 0.6444, "step": 29604 }, { "epoch": 0.9073495157533407, "grad_norm": 1.7688714607844067, "learning_rate": 2.2342656276261087e-07, "loss": 0.5213, "step": 29605 }, { "epoch": 0.9073801642760819, "grad_norm": 1.935450489174442, "learning_rate": 2.2327987936970885e-07, "loss": 0.6073, "step": 29606 }, { "epoch": 0.9074108127988231, "grad_norm": 0.8408036281978026, "learning_rate": 2.2313324304305217e-07, "loss": 0.414, "step": 29607 }, { "epoch": 0.9074414613215643, "grad_norm": 1.8714145607477723, "learning_rate": 2.2298665378408635e-07, "loss": 0.6845, "step": 29608 }, { "epoch": 0.9074721098443055, "grad_norm": 1.9308552183411534, "learning_rate": 2.2284011159425466e-07, "loss": 0.5279, "step": 29609 }, { "epoch": 0.9075027583670467, "grad_norm": 1.9576026131700652, "learning_rate": 2.226936164750021e-07, "loss": 0.6093, "step": 29610 }, { "epoch": 0.9075334068897879, "grad_norm": 1.7817601446257287, "learning_rate": 2.225471684277719e-07, "loss": 0.6017, "step": 29611 }, { "epoch": 0.9075640554125292, "grad_norm": 2.001604589988426, "learning_rate": 2.224007674540063e-07, "loss": 0.5683, "step": 29612 }, { "epoch": 0.9075947039352703, "grad_norm": 1.80483811130117, "learning_rate": 2.222544135551491e-07, "loss": 0.6274, "step": 29613 }, { "epoch": 0.9076253524580116, "grad_norm": 1.8953372135938893, "learning_rate": 2.2210810673264084e-07, "loss": 0.5529, "step": 29614 }, { "epoch": 0.9076560009807527, "grad_norm": 0.8267386909765997, "learning_rate": 2.2196184698792368e-07, "loss": 0.421, "step": 29615 }, { "epoch": 0.907686649503494, "grad_norm": 1.8853415693470015, "learning_rate": 2.218156343224398e-07, "loss": 0.5755, "step": 29616 }, { "epoch": 0.9077172980262351, "grad_norm": 1.796898392647778, "learning_rate": 2.216694687376286e-07, "loss": 0.5619, "step": 29617 }, { "epoch": 0.9077479465489764, "grad_norm": 1.9442153795262116, "learning_rate": 2.215233502349301e-07, "loss": 0.5806, "step": 29618 }, { "epoch": 0.9077785950717175, "grad_norm": 2.183827042801568, "learning_rate": 2.2137727881578586e-07, "loss": 0.5392, "step": 29619 }, { "epoch": 0.9078092435944587, "grad_norm": 2.24748608607461, "learning_rate": 2.2123125448163307e-07, "loss": 0.5338, "step": 29620 }, { "epoch": 0.9078398921172, "grad_norm": 2.0105210783358034, "learning_rate": 2.2108527723391172e-07, "loss": 0.5193, "step": 29621 }, { "epoch": 0.9078705406399411, "grad_norm": 0.8373541377779637, "learning_rate": 2.2093934707406007e-07, "loss": 0.4067, "step": 29622 }, { "epoch": 0.9079011891626824, "grad_norm": 2.0784903037764155, "learning_rate": 2.2079346400351532e-07, "loss": 0.5036, "step": 29623 }, { "epoch": 0.9079318376854235, "grad_norm": 2.1399687466526864, "learning_rate": 2.2064762802371632e-07, "loss": 0.5719, "step": 29624 }, { "epoch": 0.9079624862081648, "grad_norm": 0.8073074029206349, "learning_rate": 2.2050183913609802e-07, "loss": 0.3942, "step": 29625 }, { "epoch": 0.9079931347309059, "grad_norm": 1.8324539551840697, "learning_rate": 2.2035609734209818e-07, "loss": 0.5818, "step": 29626 }, { "epoch": 0.9080237832536472, "grad_norm": 0.8425577581086234, "learning_rate": 2.202104026431534e-07, "loss": 0.3866, "step": 29627 }, { "epoch": 0.9080544317763883, "grad_norm": 1.8331272026253307, "learning_rate": 2.2006475504069757e-07, "loss": 0.5024, "step": 29628 }, { "epoch": 0.9080850802991296, "grad_norm": 1.9609231392616753, "learning_rate": 2.199191545361673e-07, "loss": 0.6037, "step": 29629 }, { "epoch": 0.9081157288218707, "grad_norm": 1.7329494531428475, "learning_rate": 2.1977360113099643e-07, "loss": 0.537, "step": 29630 }, { "epoch": 0.908146377344612, "grad_norm": 1.722763106033029, "learning_rate": 2.196280948266194e-07, "loss": 0.516, "step": 29631 }, { "epoch": 0.9081770258673532, "grad_norm": 1.8697838079131532, "learning_rate": 2.194826356244695e-07, "loss": 0.6357, "step": 29632 }, { "epoch": 0.9082076743900944, "grad_norm": 1.9507382403412927, "learning_rate": 2.1933722352598109e-07, "loss": 0.5881, "step": 29633 }, { "epoch": 0.9082383229128356, "grad_norm": 2.035914273603673, "learning_rate": 2.191918585325853e-07, "loss": 0.6646, "step": 29634 }, { "epoch": 0.9082689714355768, "grad_norm": 1.8409436616979438, "learning_rate": 2.190465406457165e-07, "loss": 0.5314, "step": 29635 }, { "epoch": 0.908299619958318, "grad_norm": 1.9184295482774945, "learning_rate": 2.1890126986680416e-07, "loss": 0.5459, "step": 29636 }, { "epoch": 0.9083302684810592, "grad_norm": 1.7997753330685684, "learning_rate": 2.1875604619728153e-07, "loss": 0.5694, "step": 29637 }, { "epoch": 0.9083609170038004, "grad_norm": 1.8890517095773711, "learning_rate": 2.1861086963857914e-07, "loss": 0.52, "step": 29638 }, { "epoch": 0.9083915655265417, "grad_norm": 0.814540954454496, "learning_rate": 2.1846574019212695e-07, "loss": 0.3967, "step": 29639 }, { "epoch": 0.9084222140492828, "grad_norm": 1.728849869108706, "learning_rate": 2.1832065785935496e-07, "loss": 0.5438, "step": 29640 }, { "epoch": 0.9084528625720241, "grad_norm": 1.9271338608179733, "learning_rate": 2.1817562264169312e-07, "loss": 0.6212, "step": 29641 }, { "epoch": 0.9084835110947652, "grad_norm": 1.9225278169585442, "learning_rate": 2.1803063454057028e-07, "loss": 0.603, "step": 29642 }, { "epoch": 0.9085141596175065, "grad_norm": 1.823296872209789, "learning_rate": 2.1788569355741583e-07, "loss": 0.5309, "step": 29643 }, { "epoch": 0.9085448081402476, "grad_norm": 1.8042322181624533, "learning_rate": 2.1774079969365646e-07, "loss": 0.5511, "step": 29644 }, { "epoch": 0.9085754566629889, "grad_norm": 1.7523405677111625, "learning_rate": 2.1759595295072044e-07, "loss": 0.5145, "step": 29645 }, { "epoch": 0.90860610518573, "grad_norm": 1.9050009178203684, "learning_rate": 2.1745115333003607e-07, "loss": 0.5351, "step": 29646 }, { "epoch": 0.9086367537084713, "grad_norm": 1.821171658277009, "learning_rate": 2.1730640083302834e-07, "loss": 0.6127, "step": 29647 }, { "epoch": 0.9086674022312125, "grad_norm": 1.95888198987617, "learning_rate": 2.1716169546112442e-07, "loss": 0.5055, "step": 29648 }, { "epoch": 0.9086980507539537, "grad_norm": 1.9850594268722888, "learning_rate": 2.170170372157504e-07, "loss": 0.4724, "step": 29649 }, { "epoch": 0.9087286992766949, "grad_norm": 0.7652970464377942, "learning_rate": 2.168724260983307e-07, "loss": 0.3847, "step": 29650 }, { "epoch": 0.908759347799436, "grad_norm": 1.6493200989157122, "learning_rate": 2.1672786211029085e-07, "loss": 0.4847, "step": 29651 }, { "epoch": 0.9087899963221773, "grad_norm": 2.1127710408785987, "learning_rate": 2.1658334525305634e-07, "loss": 0.6182, "step": 29652 }, { "epoch": 0.9088206448449184, "grad_norm": 2.0445491548755412, "learning_rate": 2.1643887552804888e-07, "loss": 0.5575, "step": 29653 }, { "epoch": 0.9088512933676597, "grad_norm": 1.7041232790082985, "learning_rate": 2.1629445293669394e-07, "loss": 0.614, "step": 29654 }, { "epoch": 0.9088819418904008, "grad_norm": 1.8759923667326004, "learning_rate": 2.1615007748041205e-07, "loss": 0.5378, "step": 29655 }, { "epoch": 0.9089125904131421, "grad_norm": 1.9520313517526122, "learning_rate": 2.1600574916062934e-07, "loss": 0.612, "step": 29656 }, { "epoch": 0.9089432389358832, "grad_norm": 0.7757120275847026, "learning_rate": 2.1586146797876574e-07, "loss": 0.4015, "step": 29657 }, { "epoch": 0.9089738874586245, "grad_norm": 1.9639827376041505, "learning_rate": 2.1571723393624232e-07, "loss": 0.5639, "step": 29658 }, { "epoch": 0.9090045359813657, "grad_norm": 1.8465652485117319, "learning_rate": 2.1557304703448134e-07, "loss": 0.4925, "step": 29659 }, { "epoch": 0.9090351845041069, "grad_norm": 1.954659775118195, "learning_rate": 2.1542890727490385e-07, "loss": 0.6949, "step": 29660 }, { "epoch": 0.9090658330268481, "grad_norm": 2.073610571273492, "learning_rate": 2.1528481465892869e-07, "loss": 0.4773, "step": 29661 }, { "epoch": 0.9090964815495893, "grad_norm": 0.8074427949224627, "learning_rate": 2.1514076918797698e-07, "loss": 0.3812, "step": 29662 }, { "epoch": 0.9091271300723305, "grad_norm": 1.7461496985854423, "learning_rate": 2.149967708634676e-07, "loss": 0.5745, "step": 29663 }, { "epoch": 0.9091577785950717, "grad_norm": 2.152882874530331, "learning_rate": 2.1485281968681937e-07, "loss": 0.6494, "step": 29664 }, { "epoch": 0.9091884271178129, "grad_norm": 1.7520978925038773, "learning_rate": 2.1470891565945062e-07, "loss": 0.5498, "step": 29665 }, { "epoch": 0.9092190756405542, "grad_norm": 1.717569665576465, "learning_rate": 2.1456505878277855e-07, "loss": 0.5506, "step": 29666 }, { "epoch": 0.9092497241632953, "grad_norm": 1.854739544906703, "learning_rate": 2.1442124905822204e-07, "loss": 0.6096, "step": 29667 }, { "epoch": 0.9092803726860366, "grad_norm": 2.140648880127492, "learning_rate": 2.142774864871977e-07, "loss": 0.6138, "step": 29668 }, { "epoch": 0.9093110212087777, "grad_norm": 1.8964807857842167, "learning_rate": 2.141337710711211e-07, "loss": 0.5337, "step": 29669 }, { "epoch": 0.909341669731519, "grad_norm": 1.91861456603135, "learning_rate": 2.1399010281140941e-07, "loss": 0.5613, "step": 29670 }, { "epoch": 0.9093723182542601, "grad_norm": 1.835585437865799, "learning_rate": 2.138464817094782e-07, "loss": 0.5563, "step": 29671 }, { "epoch": 0.9094029667770014, "grad_norm": 1.6269265445294676, "learning_rate": 2.137029077667413e-07, "loss": 0.5603, "step": 29672 }, { "epoch": 0.9094336152997425, "grad_norm": 1.8297227115954018, "learning_rate": 2.1355938098461427e-07, "loss": 0.5399, "step": 29673 }, { "epoch": 0.9094642638224838, "grad_norm": 1.8015988420763898, "learning_rate": 2.1341590136451152e-07, "loss": 0.5898, "step": 29674 }, { "epoch": 0.909494912345225, "grad_norm": 1.9553149613196914, "learning_rate": 2.1327246890784693e-07, "loss": 0.5497, "step": 29675 }, { "epoch": 0.9095255608679662, "grad_norm": 1.5843601083964227, "learning_rate": 2.131290836160338e-07, "loss": 0.5212, "step": 29676 }, { "epoch": 0.9095562093907074, "grad_norm": 1.821082248261238, "learning_rate": 2.129857454904838e-07, "loss": 0.6106, "step": 29677 }, { "epoch": 0.9095868579134486, "grad_norm": 1.9258347852912694, "learning_rate": 2.1284245453261021e-07, "loss": 0.6451, "step": 29678 }, { "epoch": 0.9096175064361898, "grad_norm": 1.9060985753601098, "learning_rate": 2.1269921074382528e-07, "loss": 0.6199, "step": 29679 }, { "epoch": 0.909648154958931, "grad_norm": 1.9004702293785922, "learning_rate": 2.1255601412553895e-07, "loss": 0.6021, "step": 29680 }, { "epoch": 0.9096788034816722, "grad_norm": 1.9108984180100026, "learning_rate": 2.1241286467916345e-07, "loss": 0.5512, "step": 29681 }, { "epoch": 0.9097094520044133, "grad_norm": 1.9352901015230835, "learning_rate": 2.1226976240610875e-07, "loss": 0.496, "step": 29682 }, { "epoch": 0.9097401005271546, "grad_norm": 0.7709087533943932, "learning_rate": 2.1212670730778594e-07, "loss": 0.3942, "step": 29683 }, { "epoch": 0.9097707490498957, "grad_norm": 1.8629948759601602, "learning_rate": 2.1198369938560338e-07, "loss": 0.6267, "step": 29684 }, { "epoch": 0.909801397572637, "grad_norm": 1.8321467171726735, "learning_rate": 2.1184073864096987e-07, "loss": 0.6195, "step": 29685 }, { "epoch": 0.9098320460953782, "grad_norm": 1.9710058864612074, "learning_rate": 2.1169782507529545e-07, "loss": 0.6332, "step": 29686 }, { "epoch": 0.9098626946181194, "grad_norm": 1.8736425965056456, "learning_rate": 2.1155495868998787e-07, "loss": 0.5384, "step": 29687 }, { "epoch": 0.9098933431408606, "grad_norm": 2.0233838059477147, "learning_rate": 2.114121394864538e-07, "loss": 0.5131, "step": 29688 }, { "epoch": 0.9099239916636018, "grad_norm": 1.7872455507944498, "learning_rate": 2.1126936746610094e-07, "loss": 0.579, "step": 29689 }, { "epoch": 0.909954640186343, "grad_norm": 1.9457506572004433, "learning_rate": 2.1112664263033654e-07, "loss": 0.6062, "step": 29690 }, { "epoch": 0.9099852887090842, "grad_norm": 2.1522468992483375, "learning_rate": 2.1098396498056616e-07, "loss": 0.5401, "step": 29691 }, { "epoch": 0.9100159372318254, "grad_norm": 1.9644561009884518, "learning_rate": 2.1084133451819644e-07, "loss": 0.7272, "step": 29692 }, { "epoch": 0.9100465857545667, "grad_norm": 1.8798294145366168, "learning_rate": 2.1069875124463235e-07, "loss": 0.5482, "step": 29693 }, { "epoch": 0.9100772342773078, "grad_norm": 1.9707608401977217, "learning_rate": 2.1055621516127945e-07, "loss": 0.5441, "step": 29694 }, { "epoch": 0.9101078828000491, "grad_norm": 1.8669633609985514, "learning_rate": 2.1041372626954103e-07, "loss": 0.632, "step": 29695 }, { "epoch": 0.9101385313227902, "grad_norm": 1.8859338308166613, "learning_rate": 2.1027128457082102e-07, "loss": 0.5328, "step": 29696 }, { "epoch": 0.9101691798455315, "grad_norm": 2.2425193589906014, "learning_rate": 2.1012889006652492e-07, "loss": 0.5459, "step": 29697 }, { "epoch": 0.9101998283682726, "grad_norm": 2.150882028442587, "learning_rate": 2.0998654275805385e-07, "loss": 0.5793, "step": 29698 }, { "epoch": 0.9102304768910139, "grad_norm": 1.7164534860932017, "learning_rate": 2.0984424264681057e-07, "loss": 0.5182, "step": 29699 }, { "epoch": 0.910261125413755, "grad_norm": 1.7592682773496704, "learning_rate": 2.0970198973419786e-07, "loss": 0.5541, "step": 29700 }, { "epoch": 0.9102917739364963, "grad_norm": 1.7077437671873053, "learning_rate": 2.095597840216168e-07, "loss": 0.5359, "step": 29701 }, { "epoch": 0.9103224224592374, "grad_norm": 2.016109004608056, "learning_rate": 2.0941762551046906e-07, "loss": 0.5209, "step": 29702 }, { "epoch": 0.9103530709819787, "grad_norm": 1.8703776006672888, "learning_rate": 2.092755142021552e-07, "loss": 0.6526, "step": 29703 }, { "epoch": 0.9103837195047199, "grad_norm": 1.8702423697312673, "learning_rate": 2.0913345009807518e-07, "loss": 0.5521, "step": 29704 }, { "epoch": 0.9104143680274611, "grad_norm": 1.8378991481396267, "learning_rate": 2.089914331996301e-07, "loss": 0.6246, "step": 29705 }, { "epoch": 0.9104450165502023, "grad_norm": 1.887384961869677, "learning_rate": 2.088494635082178e-07, "loss": 0.4739, "step": 29706 }, { "epoch": 0.9104756650729435, "grad_norm": 1.9722775579415346, "learning_rate": 2.087075410252365e-07, "loss": 0.5489, "step": 29707 }, { "epoch": 0.9105063135956847, "grad_norm": 0.8257560805056063, "learning_rate": 2.0856566575208682e-07, "loss": 0.3896, "step": 29708 }, { "epoch": 0.9105369621184259, "grad_norm": 1.901393908561219, "learning_rate": 2.084238376901654e-07, "loss": 0.5538, "step": 29709 }, { "epoch": 0.9105676106411671, "grad_norm": 2.0088486115684403, "learning_rate": 2.0828205684087e-07, "loss": 0.5392, "step": 29710 }, { "epoch": 0.9105982591639084, "grad_norm": 1.8283329997410498, "learning_rate": 2.081403232055973e-07, "loss": 0.553, "step": 29711 }, { "epoch": 0.9106289076866495, "grad_norm": 1.7869486894229902, "learning_rate": 2.0799863678574396e-07, "loss": 0.5535, "step": 29712 }, { "epoch": 0.9106595562093907, "grad_norm": 1.913556650720194, "learning_rate": 2.078569975827066e-07, "loss": 0.5625, "step": 29713 }, { "epoch": 0.9106902047321319, "grad_norm": 1.8893015592553424, "learning_rate": 2.0771540559787973e-07, "loss": 0.5632, "step": 29714 }, { "epoch": 0.9107208532548731, "grad_norm": 2.0315097029440152, "learning_rate": 2.0757386083265885e-07, "loss": 0.5911, "step": 29715 }, { "epoch": 0.9107515017776143, "grad_norm": 0.8339545238753595, "learning_rate": 2.0743236328844007e-07, "loss": 0.3852, "step": 29716 }, { "epoch": 0.9107821503003555, "grad_norm": 1.8303772684310624, "learning_rate": 2.0729091296661618e-07, "loss": 0.4981, "step": 29717 }, { "epoch": 0.9108127988230967, "grad_norm": 1.8362179779940764, "learning_rate": 2.0714950986857995e-07, "loss": 0.5812, "step": 29718 }, { "epoch": 0.9108434473458379, "grad_norm": 0.7755825501458815, "learning_rate": 2.0700815399572749e-07, "loss": 0.4092, "step": 29719 }, { "epoch": 0.9108740958685791, "grad_norm": 2.1070783916218288, "learning_rate": 2.0686684534944878e-07, "loss": 0.6467, "step": 29720 }, { "epoch": 0.9109047443913203, "grad_norm": 1.838067416709029, "learning_rate": 2.0672558393113884e-07, "loss": 0.5205, "step": 29721 }, { "epoch": 0.9109353929140616, "grad_norm": 2.0469300343781947, "learning_rate": 2.0658436974218653e-07, "loss": 0.5729, "step": 29722 }, { "epoch": 0.9109660414368027, "grad_norm": 2.113165877479592, "learning_rate": 2.0644320278398578e-07, "loss": 0.5435, "step": 29723 }, { "epoch": 0.910996689959544, "grad_norm": 1.6641150807938199, "learning_rate": 2.0630208305792655e-07, "loss": 0.4968, "step": 29724 }, { "epoch": 0.9110273384822851, "grad_norm": 1.8298893166430195, "learning_rate": 2.061610105653994e-07, "loss": 0.562, "step": 29725 }, { "epoch": 0.9110579870050264, "grad_norm": 2.0735315338819436, "learning_rate": 2.0601998530779376e-07, "loss": 0.5839, "step": 29726 }, { "epoch": 0.9110886355277675, "grad_norm": 1.8111725938537926, "learning_rate": 2.0587900728650078e-07, "loss": 0.433, "step": 29727 }, { "epoch": 0.9111192840505088, "grad_norm": 0.909149235110463, "learning_rate": 2.0573807650290823e-07, "loss": 0.4036, "step": 29728 }, { "epoch": 0.9111499325732499, "grad_norm": 1.7666109257664193, "learning_rate": 2.0559719295840552e-07, "loss": 0.6515, "step": 29729 }, { "epoch": 0.9111805810959912, "grad_norm": 1.915062137593043, "learning_rate": 2.0545635665437936e-07, "loss": 0.5819, "step": 29730 }, { "epoch": 0.9112112296187324, "grad_norm": 2.246781140357417, "learning_rate": 2.053155675922186e-07, "loss": 0.5771, "step": 29731 }, { "epoch": 0.9112418781414736, "grad_norm": 1.7735667750372373, "learning_rate": 2.0517482577331105e-07, "loss": 0.6173, "step": 29732 }, { "epoch": 0.9112725266642148, "grad_norm": 1.8712015266219422, "learning_rate": 2.0503413119904224e-07, "loss": 0.5587, "step": 29733 }, { "epoch": 0.911303175186956, "grad_norm": 1.8532138037624193, "learning_rate": 2.0489348387079888e-07, "loss": 0.5153, "step": 29734 }, { "epoch": 0.9113338237096972, "grad_norm": 0.8058880490308359, "learning_rate": 2.047528837899676e-07, "loss": 0.4083, "step": 29735 }, { "epoch": 0.9113644722324384, "grad_norm": 2.210130321913654, "learning_rate": 2.046123309579323e-07, "loss": 0.5781, "step": 29736 }, { "epoch": 0.9113951207551796, "grad_norm": 0.7648680779274829, "learning_rate": 2.0447182537607856e-07, "loss": 0.3763, "step": 29737 }, { "epoch": 0.9114257692779208, "grad_norm": 1.8857461246558889, "learning_rate": 2.0433136704579194e-07, "loss": 0.5054, "step": 29738 }, { "epoch": 0.911456417800662, "grad_norm": 2.013026235585857, "learning_rate": 2.0419095596845462e-07, "loss": 0.5758, "step": 29739 }, { "epoch": 0.9114870663234033, "grad_norm": 2.025255327174587, "learning_rate": 2.0405059214545108e-07, "loss": 0.5514, "step": 29740 }, { "epoch": 0.9115177148461444, "grad_norm": 1.8230637449481857, "learning_rate": 2.0391027557816412e-07, "loss": 0.5764, "step": 29741 }, { "epoch": 0.9115483633688857, "grad_norm": 1.7930975316784756, "learning_rate": 2.0377000626797595e-07, "loss": 0.5635, "step": 29742 }, { "epoch": 0.9115790118916268, "grad_norm": 1.9554193450446438, "learning_rate": 2.036297842162699e-07, "loss": 0.5991, "step": 29743 }, { "epoch": 0.911609660414368, "grad_norm": 1.9511231531259083, "learning_rate": 2.0348960942442596e-07, "loss": 0.5362, "step": 29744 }, { "epoch": 0.9116403089371092, "grad_norm": 1.868082197170135, "learning_rate": 2.033494818938264e-07, "loss": 0.5652, "step": 29745 }, { "epoch": 0.9116709574598504, "grad_norm": 2.10442787743176, "learning_rate": 2.0320940162585234e-07, "loss": 0.5132, "step": 29746 }, { "epoch": 0.9117016059825916, "grad_norm": 2.127582307013866, "learning_rate": 2.030693686218821e-07, "loss": 0.544, "step": 29747 }, { "epoch": 0.9117322545053328, "grad_norm": 2.181648540493091, "learning_rate": 2.0292938288329733e-07, "loss": 0.6084, "step": 29748 }, { "epoch": 0.9117629030280741, "grad_norm": 1.909606872689317, "learning_rate": 2.0278944441147751e-07, "loss": 0.5347, "step": 29749 }, { "epoch": 0.9117935515508152, "grad_norm": 1.8043339138179333, "learning_rate": 2.0264955320779934e-07, "loss": 0.5375, "step": 29750 }, { "epoch": 0.9118242000735565, "grad_norm": 1.9604874503223229, "learning_rate": 2.0250970927364387e-07, "loss": 0.5073, "step": 29751 }, { "epoch": 0.9118548485962976, "grad_norm": 1.9423337326012, "learning_rate": 2.0236991261038674e-07, "loss": 0.5174, "step": 29752 }, { "epoch": 0.9118854971190389, "grad_norm": 1.7701606685805502, "learning_rate": 2.022301632194068e-07, "loss": 0.49, "step": 29753 }, { "epoch": 0.91191614564178, "grad_norm": 0.8033370325221514, "learning_rate": 2.0209046110208074e-07, "loss": 0.408, "step": 29754 }, { "epoch": 0.9119467941645213, "grad_norm": 1.9739664594780144, "learning_rate": 2.019508062597847e-07, "loss": 0.4982, "step": 29755 }, { "epoch": 0.9119774426872624, "grad_norm": 1.9743219078196463, "learning_rate": 2.0181119869389477e-07, "loss": 0.5627, "step": 29756 }, { "epoch": 0.9120080912100037, "grad_norm": 1.8918012788538072, "learning_rate": 2.0167163840578762e-07, "loss": 0.5366, "step": 29757 }, { "epoch": 0.9120387397327449, "grad_norm": 1.9099765021513888, "learning_rate": 2.0153212539683664e-07, "loss": 0.5845, "step": 29758 }, { "epoch": 0.9120693882554861, "grad_norm": 1.8131091011586693, "learning_rate": 2.0139265966841738e-07, "loss": 0.6197, "step": 29759 }, { "epoch": 0.9121000367782273, "grad_norm": 1.729021626176158, "learning_rate": 2.0125324122190483e-07, "loss": 0.5338, "step": 29760 }, { "epoch": 0.9121306853009685, "grad_norm": 1.9546492406502696, "learning_rate": 2.0111387005867123e-07, "loss": 0.541, "step": 29761 }, { "epoch": 0.9121613338237097, "grad_norm": 2.186004672035276, "learning_rate": 2.0097454618009104e-07, "loss": 0.5545, "step": 29762 }, { "epoch": 0.9121919823464509, "grad_norm": 1.8384899395649175, "learning_rate": 2.008352695875354e-07, "loss": 0.6322, "step": 29763 }, { "epoch": 0.9122226308691921, "grad_norm": 1.8212084991127482, "learning_rate": 2.0069604028237932e-07, "loss": 0.5451, "step": 29764 }, { "epoch": 0.9122532793919333, "grad_norm": 1.7645413605420928, "learning_rate": 2.005568582659928e-07, "loss": 0.5656, "step": 29765 }, { "epoch": 0.9122839279146745, "grad_norm": 2.0227947968119993, "learning_rate": 2.0041772353974699e-07, "loss": 0.5436, "step": 29766 }, { "epoch": 0.9123145764374158, "grad_norm": 0.8325429958812915, "learning_rate": 2.0027863610501297e-07, "loss": 0.392, "step": 29767 }, { "epoch": 0.9123452249601569, "grad_norm": 1.915560374256313, "learning_rate": 2.0013959596316247e-07, "loss": 0.5198, "step": 29768 }, { "epoch": 0.9123758734828982, "grad_norm": 1.9493501624178233, "learning_rate": 2.0000060311556434e-07, "loss": 0.5302, "step": 29769 }, { "epoch": 0.9124065220056393, "grad_norm": 0.7792174596407336, "learning_rate": 1.998616575635881e-07, "loss": 0.3878, "step": 29770 }, { "epoch": 0.9124371705283806, "grad_norm": 2.0547526861170997, "learning_rate": 1.9972275930860374e-07, "loss": 0.5409, "step": 29771 }, { "epoch": 0.9124678190511217, "grad_norm": 1.6240109494525612, "learning_rate": 1.9958390835197849e-07, "loss": 0.5515, "step": 29772 }, { "epoch": 0.912498467573863, "grad_norm": 1.9001729828973304, "learning_rate": 1.994451046950824e-07, "loss": 0.525, "step": 29773 }, { "epoch": 0.9125291160966041, "grad_norm": 1.7424331371716175, "learning_rate": 1.9930634833928097e-07, "loss": 0.5048, "step": 29774 }, { "epoch": 0.9125597646193453, "grad_norm": 2.1320727175239806, "learning_rate": 1.9916763928594206e-07, "loss": 0.6796, "step": 29775 }, { "epoch": 0.9125904131420866, "grad_norm": 1.8834746087663297, "learning_rate": 1.99028977536434e-07, "loss": 0.6557, "step": 29776 }, { "epoch": 0.9126210616648277, "grad_norm": 1.7820165488137425, "learning_rate": 1.9889036309212073e-07, "loss": 0.4734, "step": 29777 }, { "epoch": 0.912651710187569, "grad_norm": 1.7977143580567192, "learning_rate": 1.9875179595436944e-07, "loss": 0.5309, "step": 29778 }, { "epoch": 0.9126823587103101, "grad_norm": 1.8487919931039938, "learning_rate": 1.9861327612454519e-07, "loss": 0.5372, "step": 29779 }, { "epoch": 0.9127130072330514, "grad_norm": 0.812941334588587, "learning_rate": 1.9847480360401296e-07, "loss": 0.3808, "step": 29780 }, { "epoch": 0.9127436557557925, "grad_norm": 0.8218441269788579, "learning_rate": 1.9833637839413722e-07, "loss": 0.4126, "step": 29781 }, { "epoch": 0.9127743042785338, "grad_norm": 2.0719323143990387, "learning_rate": 1.981980004962808e-07, "loss": 0.6144, "step": 29782 }, { "epoch": 0.9128049528012749, "grad_norm": 2.0590232813196305, "learning_rate": 1.9805966991180869e-07, "loss": 0.5381, "step": 29783 }, { "epoch": 0.9128356013240162, "grad_norm": 1.8240641679882563, "learning_rate": 1.979213866420837e-07, "loss": 0.4679, "step": 29784 }, { "epoch": 0.9128662498467573, "grad_norm": 1.723262818076001, "learning_rate": 1.9778315068846754e-07, "loss": 0.5735, "step": 29785 }, { "epoch": 0.9128968983694986, "grad_norm": 1.9869367100195723, "learning_rate": 1.9764496205232243e-07, "loss": 0.6786, "step": 29786 }, { "epoch": 0.9129275468922398, "grad_norm": 0.8088707707998462, "learning_rate": 1.975068207350106e-07, "loss": 0.3946, "step": 29787 }, { "epoch": 0.912958195414981, "grad_norm": 1.9283352241503933, "learning_rate": 1.9736872673789266e-07, "loss": 0.4868, "step": 29788 }, { "epoch": 0.9129888439377222, "grad_norm": 1.83302899690976, "learning_rate": 1.9723068006232916e-07, "loss": 0.6837, "step": 29789 }, { "epoch": 0.9130194924604634, "grad_norm": 1.7970675721686629, "learning_rate": 1.9709268070968069e-07, "loss": 0.5632, "step": 29790 }, { "epoch": 0.9130501409832046, "grad_norm": 0.7954142264585664, "learning_rate": 1.9695472868130783e-07, "loss": 0.3836, "step": 29791 }, { "epoch": 0.9130807895059458, "grad_norm": 1.9359522282975246, "learning_rate": 1.9681682397856838e-07, "loss": 0.6127, "step": 29792 }, { "epoch": 0.913111438028687, "grad_norm": 2.2590878801107834, "learning_rate": 1.9667896660282127e-07, "loss": 0.5834, "step": 29793 }, { "epoch": 0.9131420865514283, "grad_norm": 2.351317183283431, "learning_rate": 1.9654115655542594e-07, "loss": 0.5481, "step": 29794 }, { "epoch": 0.9131727350741694, "grad_norm": 1.8136005574468739, "learning_rate": 1.9640339383773966e-07, "loss": 0.5881, "step": 29795 }, { "epoch": 0.9132033835969107, "grad_norm": 1.9042667226020809, "learning_rate": 1.962656784511191e-07, "loss": 0.5428, "step": 29796 }, { "epoch": 0.9132340321196518, "grad_norm": 2.088770671860707, "learning_rate": 1.9612801039692208e-07, "loss": 0.5983, "step": 29797 }, { "epoch": 0.9132646806423931, "grad_norm": 1.8793711831875297, "learning_rate": 1.959903896765053e-07, "loss": 0.596, "step": 29798 }, { "epoch": 0.9132953291651342, "grad_norm": 1.9697076609240003, "learning_rate": 1.9585281629122377e-07, "loss": 0.6658, "step": 29799 }, { "epoch": 0.9133259776878755, "grad_norm": 2.035563548197452, "learning_rate": 1.957152902424342e-07, "loss": 0.6685, "step": 29800 }, { "epoch": 0.9133566262106166, "grad_norm": 2.0321567617238276, "learning_rate": 1.9557781153149047e-07, "loss": 0.6296, "step": 29801 }, { "epoch": 0.9133872747333579, "grad_norm": 1.6577251639746828, "learning_rate": 1.9544038015974876e-07, "loss": 0.547, "step": 29802 }, { "epoch": 0.913417923256099, "grad_norm": 1.9972269495068276, "learning_rate": 1.953029961285624e-07, "loss": 0.5047, "step": 29803 }, { "epoch": 0.9134485717788403, "grad_norm": 2.1006773836782613, "learning_rate": 1.9516565943928311e-07, "loss": 0.5763, "step": 29804 }, { "epoch": 0.9134792203015815, "grad_norm": 0.8308279808088622, "learning_rate": 1.9502837009326758e-07, "loss": 0.4001, "step": 29805 }, { "epoch": 0.9135098688243226, "grad_norm": 0.7929519512724081, "learning_rate": 1.9489112809186695e-07, "loss": 0.3913, "step": 29806 }, { "epoch": 0.9135405173470639, "grad_norm": 2.102046152418683, "learning_rate": 1.947539334364329e-07, "loss": 0.5259, "step": 29807 }, { "epoch": 0.913571165869805, "grad_norm": 0.7826532360218452, "learning_rate": 1.946167861283177e-07, "loss": 0.399, "step": 29808 }, { "epoch": 0.9136018143925463, "grad_norm": 0.8377151583169092, "learning_rate": 1.9447968616887302e-07, "loss": 0.4065, "step": 29809 }, { "epoch": 0.9136324629152874, "grad_norm": 1.9881223925058247, "learning_rate": 1.9434263355945004e-07, "loss": 0.5627, "step": 29810 }, { "epoch": 0.9136631114380287, "grad_norm": 2.1628398145495242, "learning_rate": 1.9420562830139766e-07, "loss": 0.5606, "step": 29811 }, { "epoch": 0.9136937599607698, "grad_norm": 2.103519598432719, "learning_rate": 1.9406867039606759e-07, "loss": 0.6246, "step": 29812 }, { "epoch": 0.9137244084835111, "grad_norm": 1.82608147729515, "learning_rate": 1.939317598448087e-07, "loss": 0.5385, "step": 29813 }, { "epoch": 0.9137550570062523, "grad_norm": 1.7680381776354255, "learning_rate": 1.9379489664897e-07, "loss": 0.4757, "step": 29814 }, { "epoch": 0.9137857055289935, "grad_norm": 1.6851213601562043, "learning_rate": 1.9365808080989868e-07, "loss": 0.5394, "step": 29815 }, { "epoch": 0.9138163540517347, "grad_norm": 1.8670631199755037, "learning_rate": 1.9352131232894477e-07, "loss": 0.5622, "step": 29816 }, { "epoch": 0.9138470025744759, "grad_norm": 0.8751007802666039, "learning_rate": 1.9338459120745555e-07, "loss": 0.427, "step": 29817 }, { "epoch": 0.9138776510972171, "grad_norm": 1.755041932577439, "learning_rate": 1.9324791744677772e-07, "loss": 0.4708, "step": 29818 }, { "epoch": 0.9139082996199583, "grad_norm": 2.001544816412576, "learning_rate": 1.9311129104825744e-07, "loss": 0.633, "step": 29819 }, { "epoch": 0.9139389481426995, "grad_norm": 1.7204736663974673, "learning_rate": 1.9297471201324136e-07, "loss": 0.5115, "step": 29820 }, { "epoch": 0.9139695966654408, "grad_norm": 1.8092536726844792, "learning_rate": 1.9283818034307623e-07, "loss": 0.5641, "step": 29821 }, { "epoch": 0.9140002451881819, "grad_norm": 0.7843256799944103, "learning_rate": 1.9270169603910593e-07, "loss": 0.4001, "step": 29822 }, { "epoch": 0.9140308937109232, "grad_norm": 1.9165975855449944, "learning_rate": 1.9256525910267555e-07, "loss": 0.4965, "step": 29823 }, { "epoch": 0.9140615422336643, "grad_norm": 2.066624618962266, "learning_rate": 1.9242886953513062e-07, "loss": 0.555, "step": 29824 }, { "epoch": 0.9140921907564056, "grad_norm": 0.8260440683147674, "learning_rate": 1.9229252733781402e-07, "loss": 0.3847, "step": 29825 }, { "epoch": 0.9141228392791467, "grad_norm": 2.317418273391752, "learning_rate": 1.9215623251206849e-07, "loss": 0.5948, "step": 29826 }, { "epoch": 0.914153487801888, "grad_norm": 0.7601698782303822, "learning_rate": 1.92019985059238e-07, "loss": 0.3934, "step": 29827 }, { "epoch": 0.9141841363246291, "grad_norm": 1.7701121672153355, "learning_rate": 1.9188378498066485e-07, "loss": 0.5211, "step": 29828 }, { "epoch": 0.9142147848473704, "grad_norm": 1.620564714308119, "learning_rate": 1.9174763227769122e-07, "loss": 0.5372, "step": 29829 }, { "epoch": 0.9142454333701115, "grad_norm": 1.8376358072555965, "learning_rate": 1.9161152695165775e-07, "loss": 0.5301, "step": 29830 }, { "epoch": 0.9142760818928528, "grad_norm": 1.834457572055738, "learning_rate": 1.9147546900390667e-07, "loss": 0.6397, "step": 29831 }, { "epoch": 0.914306730415594, "grad_norm": 2.002561637182817, "learning_rate": 1.9133945843577805e-07, "loss": 0.5887, "step": 29832 }, { "epoch": 0.9143373789383352, "grad_norm": 1.873545338203715, "learning_rate": 1.9120349524861247e-07, "loss": 0.5404, "step": 29833 }, { "epoch": 0.9143680274610764, "grad_norm": 1.9313336282749736, "learning_rate": 1.9106757944374831e-07, "loss": 0.5583, "step": 29834 }, { "epoch": 0.9143986759838176, "grad_norm": 1.7981164045321143, "learning_rate": 1.9093171102252672e-07, "loss": 0.5724, "step": 29835 }, { "epoch": 0.9144293245065588, "grad_norm": 2.2944023907103865, "learning_rate": 1.907958899862844e-07, "loss": 0.5136, "step": 29836 }, { "epoch": 0.9144599730292999, "grad_norm": 1.9038416729546161, "learning_rate": 1.9066011633636196e-07, "loss": 0.5592, "step": 29837 }, { "epoch": 0.9144906215520412, "grad_norm": 1.9802487108386877, "learning_rate": 1.9052439007409495e-07, "loss": 0.5413, "step": 29838 }, { "epoch": 0.9145212700747823, "grad_norm": 2.2843399975553784, "learning_rate": 1.9038871120082125e-07, "loss": 0.5018, "step": 29839 }, { "epoch": 0.9145519185975236, "grad_norm": 2.102622555683787, "learning_rate": 1.9025307971787921e-07, "loss": 0.5591, "step": 29840 }, { "epoch": 0.9145825671202648, "grad_norm": 1.9682382605658804, "learning_rate": 1.9011749562660388e-07, "loss": 0.6196, "step": 29841 }, { "epoch": 0.914613215643006, "grad_norm": 0.8131069313309174, "learning_rate": 1.8998195892833137e-07, "loss": 0.4002, "step": 29842 }, { "epoch": 0.9146438641657472, "grad_norm": 0.7782865208099427, "learning_rate": 1.898464696243979e-07, "loss": 0.3971, "step": 29843 }, { "epoch": 0.9146745126884884, "grad_norm": 1.787427554632268, "learning_rate": 1.8971102771613736e-07, "loss": 0.4623, "step": 29844 }, { "epoch": 0.9147051612112296, "grad_norm": 1.824743145200961, "learning_rate": 1.8957563320488427e-07, "loss": 0.5031, "step": 29845 }, { "epoch": 0.9147358097339708, "grad_norm": 2.0084720262422397, "learning_rate": 1.8944028609197419e-07, "loss": 0.5417, "step": 29846 }, { "epoch": 0.914766458256712, "grad_norm": 0.7728143895472296, "learning_rate": 1.893049863787394e-07, "loss": 0.4008, "step": 29847 }, { "epoch": 0.9147971067794533, "grad_norm": 0.7958647392681752, "learning_rate": 1.8916973406651385e-07, "loss": 0.3721, "step": 29848 }, { "epoch": 0.9148277553021944, "grad_norm": 1.9010737069301877, "learning_rate": 1.8903452915662924e-07, "loss": 0.5354, "step": 29849 }, { "epoch": 0.9148584038249357, "grad_norm": 1.8352376364610106, "learning_rate": 1.888993716504184e-07, "loss": 0.5719, "step": 29850 }, { "epoch": 0.9148890523476768, "grad_norm": 1.7654695012098736, "learning_rate": 1.8876426154921357e-07, "loss": 0.4857, "step": 29851 }, { "epoch": 0.9149197008704181, "grad_norm": 0.8133822629484254, "learning_rate": 1.8862919885434537e-07, "loss": 0.4073, "step": 29852 }, { "epoch": 0.9149503493931592, "grad_norm": 1.9488651936680543, "learning_rate": 1.8849418356714388e-07, "loss": 0.6207, "step": 29853 }, { "epoch": 0.9149809979159005, "grad_norm": 0.7907619829001993, "learning_rate": 1.8835921568894133e-07, "loss": 0.3826, "step": 29854 }, { "epoch": 0.9150116464386416, "grad_norm": 1.8069057454026776, "learning_rate": 1.882242952210661e-07, "loss": 0.5074, "step": 29855 }, { "epoch": 0.9150422949613829, "grad_norm": 0.7556608075004828, "learning_rate": 1.880894221648477e-07, "loss": 0.3813, "step": 29856 }, { "epoch": 0.915072943484124, "grad_norm": 1.858192694137059, "learning_rate": 1.8795459652161618e-07, "loss": 0.6511, "step": 29857 }, { "epoch": 0.9151035920068653, "grad_norm": 2.0355058893089866, "learning_rate": 1.878198182926988e-07, "loss": 0.5255, "step": 29858 }, { "epoch": 0.9151342405296065, "grad_norm": 1.7955447216761946, "learning_rate": 1.8768508747942393e-07, "loss": 0.6339, "step": 29859 }, { "epoch": 0.9151648890523477, "grad_norm": 1.850423834490208, "learning_rate": 1.8755040408311941e-07, "loss": 0.5086, "step": 29860 }, { "epoch": 0.9151955375750889, "grad_norm": 0.7665965391927742, "learning_rate": 1.874157681051114e-07, "loss": 0.3917, "step": 29861 }, { "epoch": 0.9152261860978301, "grad_norm": 2.07787818488601, "learning_rate": 1.872811795467283e-07, "loss": 0.592, "step": 29862 }, { "epoch": 0.9152568346205713, "grad_norm": 2.0819718146861437, "learning_rate": 1.8714663840929403e-07, "loss": 0.6191, "step": 29863 }, { "epoch": 0.9152874831433125, "grad_norm": 1.8248911732984692, "learning_rate": 1.8701214469413588e-07, "loss": 0.5991, "step": 29864 }, { "epoch": 0.9153181316660537, "grad_norm": 1.9288650997973105, "learning_rate": 1.8687769840257886e-07, "loss": 0.6116, "step": 29865 }, { "epoch": 0.915348780188795, "grad_norm": 2.007894687365859, "learning_rate": 1.8674329953594693e-07, "loss": 0.6744, "step": 29866 }, { "epoch": 0.9153794287115361, "grad_norm": 1.888686699688477, "learning_rate": 1.8660894809556464e-07, "loss": 0.474, "step": 29867 }, { "epoch": 0.9154100772342773, "grad_norm": 1.8232478375231507, "learning_rate": 1.864746440827564e-07, "loss": 0.6213, "step": 29868 }, { "epoch": 0.9154407257570185, "grad_norm": 1.651192664806235, "learning_rate": 1.8634038749884453e-07, "loss": 0.5705, "step": 29869 }, { "epoch": 0.9154713742797597, "grad_norm": 1.8955125029414923, "learning_rate": 1.8620617834515299e-07, "loss": 0.5601, "step": 29870 }, { "epoch": 0.9155020228025009, "grad_norm": 1.9138241582202362, "learning_rate": 1.8607201662300346e-07, "loss": 0.541, "step": 29871 }, { "epoch": 0.9155326713252421, "grad_norm": 0.849926327426312, "learning_rate": 1.8593790233371766e-07, "loss": 0.376, "step": 29872 }, { "epoch": 0.9155633198479833, "grad_norm": 1.8951644740577365, "learning_rate": 1.8580383547861792e-07, "loss": 0.541, "step": 29873 }, { "epoch": 0.9155939683707245, "grad_norm": 1.9242527376944571, "learning_rate": 1.856698160590248e-07, "loss": 0.5529, "step": 29874 }, { "epoch": 0.9156246168934657, "grad_norm": 1.9089923785835765, "learning_rate": 1.8553584407625834e-07, "loss": 0.5474, "step": 29875 }, { "epoch": 0.9156552654162069, "grad_norm": 1.8088991732278865, "learning_rate": 1.8540191953163978e-07, "loss": 0.6027, "step": 29876 }, { "epoch": 0.9156859139389482, "grad_norm": 2.2285335699964572, "learning_rate": 1.852680424264869e-07, "loss": 0.6314, "step": 29877 }, { "epoch": 0.9157165624616893, "grad_norm": 1.924940047709909, "learning_rate": 1.8513421276212086e-07, "loss": 0.5485, "step": 29878 }, { "epoch": 0.9157472109844306, "grad_norm": 0.7908868024844797, "learning_rate": 1.8500043053985894e-07, "loss": 0.3861, "step": 29879 }, { "epoch": 0.9157778595071717, "grad_norm": 1.9863357099960703, "learning_rate": 1.8486669576101957e-07, "loss": 0.6354, "step": 29880 }, { "epoch": 0.915808508029913, "grad_norm": 1.906898444353477, "learning_rate": 1.847330084269211e-07, "loss": 0.5801, "step": 29881 }, { "epoch": 0.9158391565526541, "grad_norm": 2.1082116295351296, "learning_rate": 1.8459936853888028e-07, "loss": 0.6394, "step": 29882 }, { "epoch": 0.9158698050753954, "grad_norm": 2.236746010260709, "learning_rate": 1.8446577609821325e-07, "loss": 0.5585, "step": 29883 }, { "epoch": 0.9159004535981365, "grad_norm": 1.7726134875365898, "learning_rate": 1.843322311062379e-07, "loss": 0.5309, "step": 29884 }, { "epoch": 0.9159311021208778, "grad_norm": 2.0222924823151427, "learning_rate": 1.8419873356426866e-07, "loss": 0.6339, "step": 29885 }, { "epoch": 0.915961750643619, "grad_norm": 1.6976302086934025, "learning_rate": 1.8406528347362172e-07, "loss": 0.4488, "step": 29886 }, { "epoch": 0.9159923991663602, "grad_norm": 0.8448031019743651, "learning_rate": 1.839318808356122e-07, "loss": 0.3857, "step": 29887 }, { "epoch": 0.9160230476891014, "grad_norm": 1.8923785641575195, "learning_rate": 1.8379852565155343e-07, "loss": 0.598, "step": 29888 }, { "epoch": 0.9160536962118426, "grad_norm": 1.899553582131931, "learning_rate": 1.836652179227605e-07, "loss": 0.5924, "step": 29889 }, { "epoch": 0.9160843447345838, "grad_norm": 2.108186321372636, "learning_rate": 1.8353195765054566e-07, "loss": 0.4471, "step": 29890 }, { "epoch": 0.916114993257325, "grad_norm": 1.823651875884009, "learning_rate": 1.8339874483622344e-07, "loss": 0.6146, "step": 29891 }, { "epoch": 0.9161456417800662, "grad_norm": 1.9840593231250891, "learning_rate": 1.8326557948110611e-07, "loss": 0.5067, "step": 29892 }, { "epoch": 0.9161762903028075, "grad_norm": 1.9338520720822872, "learning_rate": 1.831324615865049e-07, "loss": 0.5455, "step": 29893 }, { "epoch": 0.9162069388255486, "grad_norm": 1.830993745345836, "learning_rate": 1.82999391153732e-07, "loss": 0.6246, "step": 29894 }, { "epoch": 0.9162375873482899, "grad_norm": 1.7195738110086303, "learning_rate": 1.828663681840992e-07, "loss": 0.4778, "step": 29895 }, { "epoch": 0.916268235871031, "grad_norm": 1.9176348927173066, "learning_rate": 1.8273339267891598e-07, "loss": 0.5474, "step": 29896 }, { "epoch": 0.9162988843937723, "grad_norm": 2.0003291183148333, "learning_rate": 1.8260046463949298e-07, "loss": 0.583, "step": 29897 }, { "epoch": 0.9163295329165134, "grad_norm": 0.8013937719027161, "learning_rate": 1.8246758406714082e-07, "loss": 0.4073, "step": 29898 }, { "epoch": 0.9163601814392546, "grad_norm": 0.8107480959432858, "learning_rate": 1.8233475096316788e-07, "loss": 0.4125, "step": 29899 }, { "epoch": 0.9163908299619958, "grad_norm": 1.9889088520102665, "learning_rate": 1.822019653288837e-07, "loss": 0.646, "step": 29900 }, { "epoch": 0.916421478484737, "grad_norm": 1.7370180482678679, "learning_rate": 1.8206922716559493e-07, "loss": 0.5184, "step": 29901 }, { "epoch": 0.9164521270074782, "grad_norm": 1.90587649566498, "learning_rate": 1.819365364746123e-07, "loss": 0.5314, "step": 29902 }, { "epoch": 0.9164827755302194, "grad_norm": 1.8348284480387997, "learning_rate": 1.8180389325724135e-07, "loss": 0.5348, "step": 29903 }, { "epoch": 0.9165134240529607, "grad_norm": 0.807386427507119, "learning_rate": 1.8167129751478886e-07, "loss": 0.3991, "step": 29904 }, { "epoch": 0.9165440725757018, "grad_norm": 2.107661199648564, "learning_rate": 1.8153874924856207e-07, "loss": 0.6146, "step": 29905 }, { "epoch": 0.9165747210984431, "grad_norm": 1.9299936138053255, "learning_rate": 1.814062484598672e-07, "loss": 0.6056, "step": 29906 }, { "epoch": 0.9166053696211842, "grad_norm": 2.1095606508557667, "learning_rate": 1.812737951500093e-07, "loss": 0.4446, "step": 29907 }, { "epoch": 0.9166360181439255, "grad_norm": 1.9708110128186203, "learning_rate": 1.8114138932029347e-07, "loss": 0.6052, "step": 29908 }, { "epoch": 0.9166666666666666, "grad_norm": 0.8300224173695621, "learning_rate": 1.8100903097202415e-07, "loss": 0.4005, "step": 29909 }, { "epoch": 0.9166973151894079, "grad_norm": 1.8884469988384611, "learning_rate": 1.8087672010650704e-07, "loss": 0.5981, "step": 29910 }, { "epoch": 0.916727963712149, "grad_norm": 1.7940027490770662, "learning_rate": 1.8074445672504382e-07, "loss": 0.5059, "step": 29911 }, { "epoch": 0.9167586122348903, "grad_norm": 1.8056559156681649, "learning_rate": 1.8061224082893791e-07, "loss": 0.5462, "step": 29912 }, { "epoch": 0.9167892607576315, "grad_norm": 1.7772004115917632, "learning_rate": 1.8048007241949384e-07, "loss": 0.5906, "step": 29913 }, { "epoch": 0.9168199092803727, "grad_norm": 1.8295383446854867, "learning_rate": 1.8034795149801276e-07, "loss": 0.5076, "step": 29914 }, { "epoch": 0.9168505578031139, "grad_norm": 0.8267780262616873, "learning_rate": 1.802158780657959e-07, "loss": 0.3966, "step": 29915 }, { "epoch": 0.9168812063258551, "grad_norm": 0.7751601835117532, "learning_rate": 1.800838521241449e-07, "loss": 0.3884, "step": 29916 }, { "epoch": 0.9169118548485963, "grad_norm": 1.9398736662491098, "learning_rate": 1.7995187367436106e-07, "loss": 0.5389, "step": 29917 }, { "epoch": 0.9169425033713375, "grad_norm": 2.0729146313603475, "learning_rate": 1.798199427177455e-07, "loss": 0.5934, "step": 29918 }, { "epoch": 0.9169731518940787, "grad_norm": 1.844961182876262, "learning_rate": 1.7968805925559663e-07, "loss": 0.504, "step": 29919 }, { "epoch": 0.91700380041682, "grad_norm": 1.8933853842971278, "learning_rate": 1.7955622328921451e-07, "loss": 0.6435, "step": 29920 }, { "epoch": 0.9170344489395611, "grad_norm": 2.0372937841157013, "learning_rate": 1.794244348198987e-07, "loss": 0.4687, "step": 29921 }, { "epoch": 0.9170650974623024, "grad_norm": 2.217030217755701, "learning_rate": 1.7929269384894755e-07, "loss": 0.6346, "step": 29922 }, { "epoch": 0.9170957459850435, "grad_norm": 1.9848104381679677, "learning_rate": 1.7916100037765837e-07, "loss": 0.5637, "step": 29923 }, { "epoch": 0.9171263945077848, "grad_norm": 1.9326413561024731, "learning_rate": 1.7902935440732962e-07, "loss": 0.5398, "step": 29924 }, { "epoch": 0.9171570430305259, "grad_norm": 2.092131969534347, "learning_rate": 1.7889775593925795e-07, "loss": 0.6468, "step": 29925 }, { "epoch": 0.9171876915532672, "grad_norm": 2.1842064275426134, "learning_rate": 1.7876620497474018e-07, "loss": 0.5483, "step": 29926 }, { "epoch": 0.9172183400760083, "grad_norm": 2.048325304120231, "learning_rate": 1.7863470151507246e-07, "loss": 0.6276, "step": 29927 }, { "epoch": 0.9172489885987496, "grad_norm": 1.8491008169295664, "learning_rate": 1.7850324556155096e-07, "loss": 0.5962, "step": 29928 }, { "epoch": 0.9172796371214907, "grad_norm": 1.71399178748151, "learning_rate": 1.7837183711547078e-07, "loss": 0.4805, "step": 29929 }, { "epoch": 0.9173102856442319, "grad_norm": 1.8128748598794622, "learning_rate": 1.7824047617812644e-07, "loss": 0.5275, "step": 29930 }, { "epoch": 0.9173409341669732, "grad_norm": 2.0879555280411033, "learning_rate": 1.7810916275081136e-07, "loss": 0.6215, "step": 29931 }, { "epoch": 0.9173715826897143, "grad_norm": 1.8377251849188252, "learning_rate": 1.779778968348217e-07, "loss": 0.4862, "step": 29932 }, { "epoch": 0.9174022312124556, "grad_norm": 0.8655734865652366, "learning_rate": 1.7784667843144977e-07, "loss": 0.4097, "step": 29933 }, { "epoch": 0.9174328797351967, "grad_norm": 1.9358243153256087, "learning_rate": 1.777155075419873e-07, "loss": 0.5935, "step": 29934 }, { "epoch": 0.917463528257938, "grad_norm": 1.6950051289924655, "learning_rate": 1.7758438416772827e-07, "loss": 0.5932, "step": 29935 }, { "epoch": 0.9174941767806791, "grad_norm": 1.9913793312890131, "learning_rate": 1.7745330830996387e-07, "loss": 0.683, "step": 29936 }, { "epoch": 0.9175248253034204, "grad_norm": 0.7749855411992175, "learning_rate": 1.773222799699864e-07, "loss": 0.4053, "step": 29937 }, { "epoch": 0.9175554738261615, "grad_norm": 1.846442631808184, "learning_rate": 1.7719129914908594e-07, "loss": 0.546, "step": 29938 }, { "epoch": 0.9175861223489028, "grad_norm": 1.8215289907328782, "learning_rate": 1.770603658485537e-07, "loss": 0.4972, "step": 29939 }, { "epoch": 0.917616770871644, "grad_norm": 1.8333935948157287, "learning_rate": 1.7692948006968024e-07, "loss": 0.5806, "step": 29940 }, { "epoch": 0.9176474193943852, "grad_norm": 1.7937221426365295, "learning_rate": 1.767986418137546e-07, "loss": 0.5165, "step": 29941 }, { "epoch": 0.9176780679171264, "grad_norm": 2.3140110462526646, "learning_rate": 1.7666785108206462e-07, "loss": 0.5789, "step": 29942 }, { "epoch": 0.9177087164398676, "grad_norm": 1.9758785222561035, "learning_rate": 1.7653710787590206e-07, "loss": 0.5709, "step": 29943 }, { "epoch": 0.9177393649626088, "grad_norm": 1.9769737533079852, "learning_rate": 1.7640641219655252e-07, "loss": 0.5616, "step": 29944 }, { "epoch": 0.91777001348535, "grad_norm": 1.7740059621650013, "learning_rate": 1.7627576404530554e-07, "loss": 0.5565, "step": 29945 }, { "epoch": 0.9178006620080912, "grad_norm": 0.7893420944573207, "learning_rate": 1.7614516342344678e-07, "loss": 0.3893, "step": 29946 }, { "epoch": 0.9178313105308324, "grad_norm": 1.8774525592759355, "learning_rate": 1.7601461033226407e-07, "loss": 0.5285, "step": 29947 }, { "epoch": 0.9178619590535736, "grad_norm": 1.8007394754912385, "learning_rate": 1.7588410477304475e-07, "loss": 0.5143, "step": 29948 }, { "epoch": 0.9178926075763149, "grad_norm": 1.8775920229868, "learning_rate": 1.7575364674707275e-07, "loss": 0.6116, "step": 29949 }, { "epoch": 0.917923256099056, "grad_norm": 1.8894885115640385, "learning_rate": 1.7562323625563427e-07, "loss": 0.5855, "step": 29950 }, { "epoch": 0.9179539046217973, "grad_norm": 1.5737939202098676, "learning_rate": 1.7549287330001498e-07, "loss": 0.5055, "step": 29951 }, { "epoch": 0.9179845531445384, "grad_norm": 1.818232268891024, "learning_rate": 1.753625578814988e-07, "loss": 0.5918, "step": 29952 }, { "epoch": 0.9180152016672797, "grad_norm": 1.9404723050635013, "learning_rate": 1.7523229000136866e-07, "loss": 0.6725, "step": 29953 }, { "epoch": 0.9180458501900208, "grad_norm": 1.7233724940010537, "learning_rate": 1.751020696609107e-07, "loss": 0.4583, "step": 29954 }, { "epoch": 0.9180764987127621, "grad_norm": 1.863811890994834, "learning_rate": 1.749718968614056e-07, "loss": 0.5948, "step": 29955 }, { "epoch": 0.9181071472355032, "grad_norm": 2.2111717874956662, "learning_rate": 1.7484177160413785e-07, "loss": 0.6044, "step": 29956 }, { "epoch": 0.9181377957582445, "grad_norm": 1.990187132663756, "learning_rate": 1.7471169389038812e-07, "loss": 0.4932, "step": 29957 }, { "epoch": 0.9181684442809857, "grad_norm": 2.0807868686614617, "learning_rate": 1.7458166372143815e-07, "loss": 0.5695, "step": 29958 }, { "epoch": 0.9181990928037269, "grad_norm": 1.9496194832721905, "learning_rate": 1.744516810985708e-07, "loss": 0.4997, "step": 29959 }, { "epoch": 0.9182297413264681, "grad_norm": 1.8827040561658797, "learning_rate": 1.7432174602306507e-07, "loss": 0.6205, "step": 29960 }, { "epoch": 0.9182603898492092, "grad_norm": 2.043493990415983, "learning_rate": 1.7419185849620158e-07, "loss": 0.4697, "step": 29961 }, { "epoch": 0.9182910383719505, "grad_norm": 1.8430209553145536, "learning_rate": 1.7406201851926097e-07, "loss": 0.5791, "step": 29962 }, { "epoch": 0.9183216868946916, "grad_norm": 1.8439695568247747, "learning_rate": 1.7393222609352167e-07, "loss": 0.6119, "step": 29963 }, { "epoch": 0.9183523354174329, "grad_norm": 2.0391861881525966, "learning_rate": 1.7380248122026322e-07, "loss": 0.5131, "step": 29964 }, { "epoch": 0.918382983940174, "grad_norm": 0.7514299420856326, "learning_rate": 1.7367278390076404e-07, "loss": 0.4011, "step": 29965 }, { "epoch": 0.9184136324629153, "grad_norm": 1.9902693566785263, "learning_rate": 1.735431341363014e-07, "loss": 0.524, "step": 29966 }, { "epoch": 0.9184442809856564, "grad_norm": 1.8278229364048026, "learning_rate": 1.7341353192815325e-07, "loss": 0.6025, "step": 29967 }, { "epoch": 0.9184749295083977, "grad_norm": 1.7602220349404867, "learning_rate": 1.7328397727759628e-07, "loss": 0.5727, "step": 29968 }, { "epoch": 0.9185055780311389, "grad_norm": 1.878385036113042, "learning_rate": 1.7315447018590724e-07, "loss": 0.5141, "step": 29969 }, { "epoch": 0.9185362265538801, "grad_norm": 1.6681184692713042, "learning_rate": 1.7302501065436295e-07, "loss": 0.5333, "step": 29970 }, { "epoch": 0.9185668750766213, "grad_norm": 2.0029761099041115, "learning_rate": 1.7289559868423733e-07, "loss": 0.5987, "step": 29971 }, { "epoch": 0.9185975235993625, "grad_norm": 2.0291957021479203, "learning_rate": 1.727662342768066e-07, "loss": 0.6379, "step": 29972 }, { "epoch": 0.9186281721221037, "grad_norm": 1.8144333895990579, "learning_rate": 1.7263691743334587e-07, "loss": 0.5901, "step": 29973 }, { "epoch": 0.9186588206448449, "grad_norm": 2.0147866759982356, "learning_rate": 1.7250764815512854e-07, "loss": 0.5411, "step": 29974 }, { "epoch": 0.9186894691675861, "grad_norm": 1.907783103987325, "learning_rate": 1.7237842644342862e-07, "loss": 0.5112, "step": 29975 }, { "epoch": 0.9187201176903274, "grad_norm": 0.7845244213070537, "learning_rate": 1.7224925229951838e-07, "loss": 0.3726, "step": 29976 }, { "epoch": 0.9187507662130685, "grad_norm": 1.9951646365500937, "learning_rate": 1.721201257246724e-07, "loss": 0.5935, "step": 29977 }, { "epoch": 0.9187814147358098, "grad_norm": 1.8123838686003342, "learning_rate": 1.7199104672016187e-07, "loss": 0.6007, "step": 29978 }, { "epoch": 0.9188120632585509, "grad_norm": 1.9338891218348713, "learning_rate": 1.7186201528725855e-07, "loss": 0.6056, "step": 29979 }, { "epoch": 0.9188427117812922, "grad_norm": 1.7977638271421321, "learning_rate": 1.7173303142723418e-07, "loss": 0.5525, "step": 29980 }, { "epoch": 0.9188733603040333, "grad_norm": 1.9817832150045476, "learning_rate": 1.7160409514136e-07, "loss": 0.5536, "step": 29981 }, { "epoch": 0.9189040088267746, "grad_norm": 2.2972773564162394, "learning_rate": 1.7147520643090554e-07, "loss": 0.5752, "step": 29982 }, { "epoch": 0.9189346573495157, "grad_norm": 0.8472819587060303, "learning_rate": 1.7134636529714144e-07, "loss": 0.3866, "step": 29983 }, { "epoch": 0.918965305872257, "grad_norm": 1.8355002083022338, "learning_rate": 1.712175717413378e-07, "loss": 0.6101, "step": 29984 }, { "epoch": 0.9189959543949981, "grad_norm": 0.8364051142968102, "learning_rate": 1.7108882576476194e-07, "loss": 0.416, "step": 29985 }, { "epoch": 0.9190266029177394, "grad_norm": 1.9282604783846453, "learning_rate": 1.709601273686845e-07, "loss": 0.4454, "step": 29986 }, { "epoch": 0.9190572514404806, "grad_norm": 1.7838835195682203, "learning_rate": 1.7083147655437172e-07, "loss": 0.6121, "step": 29987 }, { "epoch": 0.9190878999632218, "grad_norm": 2.0617376999236785, "learning_rate": 1.70702873323092e-07, "loss": 0.5478, "step": 29988 }, { "epoch": 0.919118548485963, "grad_norm": 1.8803356601796306, "learning_rate": 1.7057431767611264e-07, "loss": 0.5794, "step": 29989 }, { "epoch": 0.9191491970087042, "grad_norm": 1.750155631247757, "learning_rate": 1.7044580961469992e-07, "loss": 0.5001, "step": 29990 }, { "epoch": 0.9191798455314454, "grad_norm": 1.9742923969065593, "learning_rate": 1.7031734914012056e-07, "loss": 0.6258, "step": 29991 }, { "epoch": 0.9192104940541865, "grad_norm": 2.985873264447969, "learning_rate": 1.701889362536402e-07, "loss": 0.6129, "step": 29992 }, { "epoch": 0.9192411425769278, "grad_norm": 1.9193595020080418, "learning_rate": 1.7006057095652395e-07, "loss": 0.5121, "step": 29993 }, { "epoch": 0.919271791099669, "grad_norm": 2.0662767623499882, "learning_rate": 1.6993225325003638e-07, "loss": 0.6058, "step": 29994 }, { "epoch": 0.9193024396224102, "grad_norm": 1.9695479952479418, "learning_rate": 1.6980398313544255e-07, "loss": 0.5593, "step": 29995 }, { "epoch": 0.9193330881451514, "grad_norm": 1.9154218238028915, "learning_rate": 1.6967576061400592e-07, "loss": 0.5517, "step": 29996 }, { "epoch": 0.9193637366678926, "grad_norm": 1.9705838665661684, "learning_rate": 1.6954758568698992e-07, "loss": 0.5479, "step": 29997 }, { "epoch": 0.9193943851906338, "grad_norm": 1.7880833254528465, "learning_rate": 1.6941945835565686e-07, "loss": 0.5893, "step": 29998 }, { "epoch": 0.919425033713375, "grad_norm": 1.9401330922787172, "learning_rate": 1.692913786212702e-07, "loss": 0.561, "step": 29999 }, { "epoch": 0.9194556822361162, "grad_norm": 2.156114348416436, "learning_rate": 1.6916334648509225e-07, "loss": 0.5146, "step": 30000 }, { "epoch": 0.9194863307588574, "grad_norm": 1.8593846583369962, "learning_rate": 1.690353619483831e-07, "loss": 0.575, "step": 30001 }, { "epoch": 0.9195169792815986, "grad_norm": 2.0996933031119602, "learning_rate": 1.6890742501240453e-07, "loss": 0.5913, "step": 30002 }, { "epoch": 0.9195476278043399, "grad_norm": 1.694657846574413, "learning_rate": 1.6877953567841777e-07, "loss": 0.6016, "step": 30003 }, { "epoch": 0.919578276327081, "grad_norm": 1.8258604952613309, "learning_rate": 1.6865169394768176e-07, "loss": 0.5323, "step": 30004 }, { "epoch": 0.9196089248498223, "grad_norm": 2.079178727595316, "learning_rate": 1.6852389982145722e-07, "loss": 0.5442, "step": 30005 }, { "epoch": 0.9196395733725634, "grad_norm": 1.9575972966063369, "learning_rate": 1.6839615330100313e-07, "loss": 0.5509, "step": 30006 }, { "epoch": 0.9196702218953047, "grad_norm": 1.9779586541460523, "learning_rate": 1.6826845438757733e-07, "loss": 0.5084, "step": 30007 }, { "epoch": 0.9197008704180458, "grad_norm": 1.7556303597396277, "learning_rate": 1.6814080308243885e-07, "loss": 0.4872, "step": 30008 }, { "epoch": 0.9197315189407871, "grad_norm": 1.9571966150492626, "learning_rate": 1.6801319938684502e-07, "loss": 0.5561, "step": 30009 }, { "epoch": 0.9197621674635282, "grad_norm": 2.1326907043990815, "learning_rate": 1.678856433020537e-07, "loss": 0.5448, "step": 30010 }, { "epoch": 0.9197928159862695, "grad_norm": 1.7314794802256452, "learning_rate": 1.6775813482932225e-07, "loss": 0.4127, "step": 30011 }, { "epoch": 0.9198234645090106, "grad_norm": 0.7651345764067198, "learning_rate": 1.6763067396990517e-07, "loss": 0.3699, "step": 30012 }, { "epoch": 0.9198541130317519, "grad_norm": 2.010087459226924, "learning_rate": 1.6750326072505984e-07, "loss": 0.5307, "step": 30013 }, { "epoch": 0.9198847615544931, "grad_norm": 1.941459578289682, "learning_rate": 1.673758950960419e-07, "loss": 0.6269, "step": 30014 }, { "epoch": 0.9199154100772343, "grad_norm": 2.0445004459297236, "learning_rate": 1.672485770841048e-07, "loss": 0.6449, "step": 30015 }, { "epoch": 0.9199460585999755, "grad_norm": 1.8442165576867562, "learning_rate": 1.6712130669050476e-07, "loss": 0.4744, "step": 30016 }, { "epoch": 0.9199767071227167, "grad_norm": 1.8770841254594863, "learning_rate": 1.6699408391649407e-07, "loss": 0.5731, "step": 30017 }, { "epoch": 0.9200073556454579, "grad_norm": 2.161507999885705, "learning_rate": 1.6686690876332845e-07, "loss": 0.6679, "step": 30018 }, { "epoch": 0.9200380041681991, "grad_norm": 1.979714829442546, "learning_rate": 1.6673978123225963e-07, "loss": 0.6157, "step": 30019 }, { "epoch": 0.9200686526909403, "grad_norm": 1.969544874008329, "learning_rate": 1.6661270132454e-07, "loss": 0.6503, "step": 30020 }, { "epoch": 0.9200993012136816, "grad_norm": 2.315181038944726, "learning_rate": 1.6648566904142183e-07, "loss": 0.5086, "step": 30021 }, { "epoch": 0.9201299497364227, "grad_norm": 1.862732801075558, "learning_rate": 1.6635868438415748e-07, "loss": 0.5873, "step": 30022 }, { "epoch": 0.9201605982591639, "grad_norm": 1.9592712163190356, "learning_rate": 1.662317473539976e-07, "loss": 0.5564, "step": 30023 }, { "epoch": 0.9201912467819051, "grad_norm": 1.846617112904417, "learning_rate": 1.6610485795219288e-07, "loss": 0.5228, "step": 30024 }, { "epoch": 0.9202218953046463, "grad_norm": 1.929330686062303, "learning_rate": 1.6597801617999454e-07, "loss": 0.4819, "step": 30025 }, { "epoch": 0.9202525438273875, "grad_norm": 1.6309367861396058, "learning_rate": 1.6585122203865046e-07, "loss": 0.5455, "step": 30026 }, { "epoch": 0.9202831923501287, "grad_norm": 2.0626355228025615, "learning_rate": 1.657244755294124e-07, "loss": 0.5766, "step": 30027 }, { "epoch": 0.9203138408728699, "grad_norm": 1.6474711071406505, "learning_rate": 1.655977766535266e-07, "loss": 0.5161, "step": 30028 }, { "epoch": 0.9203444893956111, "grad_norm": 1.8122156387563908, "learning_rate": 1.654711254122443e-07, "loss": 0.5868, "step": 30029 }, { "epoch": 0.9203751379183523, "grad_norm": 1.9105757294218313, "learning_rate": 1.6534452180681115e-07, "loss": 0.4359, "step": 30030 }, { "epoch": 0.9204057864410935, "grad_norm": 0.7849252684243418, "learning_rate": 1.652179658384756e-07, "loss": 0.4101, "step": 30031 }, { "epoch": 0.9204364349638348, "grad_norm": 2.148934657393557, "learning_rate": 1.6509145750848444e-07, "loss": 0.6533, "step": 30032 }, { "epoch": 0.9204670834865759, "grad_norm": 2.1238638612309324, "learning_rate": 1.649649968180844e-07, "loss": 0.5641, "step": 30033 }, { "epoch": 0.9204977320093172, "grad_norm": 1.9636049030160343, "learning_rate": 1.6483858376852123e-07, "loss": 0.6043, "step": 30034 }, { "epoch": 0.9205283805320583, "grad_norm": 1.711662720907363, "learning_rate": 1.6471221836104e-07, "loss": 0.5187, "step": 30035 }, { "epoch": 0.9205590290547996, "grad_norm": 1.7044924691327157, "learning_rate": 1.6458590059688696e-07, "loss": 0.5082, "step": 30036 }, { "epoch": 0.9205896775775407, "grad_norm": 1.6683873638788085, "learning_rate": 1.6445963047730663e-07, "loss": 0.4625, "step": 30037 }, { "epoch": 0.920620326100282, "grad_norm": 1.8557013714118882, "learning_rate": 1.6433340800354302e-07, "loss": 0.5393, "step": 30038 }, { "epoch": 0.9206509746230231, "grad_norm": 1.9692370880580405, "learning_rate": 1.6420723317683796e-07, "loss": 0.6118, "step": 30039 }, { "epoch": 0.9206816231457644, "grad_norm": 2.0267312123107577, "learning_rate": 1.6408110599843763e-07, "loss": 0.5806, "step": 30040 }, { "epoch": 0.9207122716685056, "grad_norm": 2.234350277733038, "learning_rate": 1.6395502646958385e-07, "loss": 0.5704, "step": 30041 }, { "epoch": 0.9207429201912468, "grad_norm": 2.0806959725003895, "learning_rate": 1.638289945915178e-07, "loss": 0.5637, "step": 30042 }, { "epoch": 0.920773568713988, "grad_norm": 1.9711605523613058, "learning_rate": 1.6370301036548186e-07, "loss": 0.5822, "step": 30043 }, { "epoch": 0.9208042172367292, "grad_norm": 2.129342451558588, "learning_rate": 1.6357707379271782e-07, "loss": 0.53, "step": 30044 }, { "epoch": 0.9208348657594704, "grad_norm": 1.8634334841924873, "learning_rate": 1.6345118487446687e-07, "loss": 0.5385, "step": 30045 }, { "epoch": 0.9208655142822116, "grad_norm": 1.7419973348208744, "learning_rate": 1.6332534361196806e-07, "loss": 0.6126, "step": 30046 }, { "epoch": 0.9208961628049528, "grad_norm": 1.704001457584428, "learning_rate": 1.6319955000646258e-07, "loss": 0.4746, "step": 30047 }, { "epoch": 0.920926811327694, "grad_norm": 0.7816022029876883, "learning_rate": 1.630738040591895e-07, "loss": 0.3982, "step": 30048 }, { "epoch": 0.9209574598504352, "grad_norm": 2.0381573809104077, "learning_rate": 1.6294810577138832e-07, "loss": 0.5772, "step": 30049 }, { "epoch": 0.9209881083731765, "grad_norm": 2.004991158067527, "learning_rate": 1.6282245514429583e-07, "loss": 0.5569, "step": 30050 }, { "epoch": 0.9210187568959176, "grad_norm": 0.7973406183430272, "learning_rate": 1.6269685217915222e-07, "loss": 0.4062, "step": 30051 }, { "epoch": 0.9210494054186589, "grad_norm": 1.8659150356846101, "learning_rate": 1.625712968771942e-07, "loss": 0.4984, "step": 30052 }, { "epoch": 0.9210800539414, "grad_norm": 1.7765940909920486, "learning_rate": 1.624457892396586e-07, "loss": 0.5173, "step": 30053 }, { "epoch": 0.9211107024641412, "grad_norm": 1.724575730563262, "learning_rate": 1.6232032926778218e-07, "loss": 0.5281, "step": 30054 }, { "epoch": 0.9211413509868824, "grad_norm": 1.7899828009267333, "learning_rate": 1.6219491696280122e-07, "loss": 0.4934, "step": 30055 }, { "epoch": 0.9211719995096236, "grad_norm": 1.8779048260828888, "learning_rate": 1.6206955232595245e-07, "loss": 0.523, "step": 30056 }, { "epoch": 0.9212026480323648, "grad_norm": 0.8790113191026394, "learning_rate": 1.6194423535846936e-07, "loss": 0.3993, "step": 30057 }, { "epoch": 0.921233296555106, "grad_norm": 2.0053704017049285, "learning_rate": 1.6181896606158764e-07, "loss": 0.4938, "step": 30058 }, { "epoch": 0.9212639450778473, "grad_norm": 0.8227523844057286, "learning_rate": 1.616937444365424e-07, "loss": 0.3925, "step": 30059 }, { "epoch": 0.9212945936005884, "grad_norm": 1.7683950236346433, "learning_rate": 1.6156857048456654e-07, "loss": 0.6005, "step": 30060 }, { "epoch": 0.9213252421233297, "grad_norm": 1.9728951289658556, "learning_rate": 1.614434442068924e-07, "loss": 0.5877, "step": 30061 }, { "epoch": 0.9213558906460708, "grad_norm": 1.8701459477693347, "learning_rate": 1.6131836560475457e-07, "loss": 0.4984, "step": 30062 }, { "epoch": 0.9213865391688121, "grad_norm": 2.1013865404307106, "learning_rate": 1.611933346793848e-07, "loss": 0.6011, "step": 30063 }, { "epoch": 0.9214171876915532, "grad_norm": 1.6824615916629904, "learning_rate": 1.6106835143201605e-07, "loss": 0.5507, "step": 30064 }, { "epoch": 0.9214478362142945, "grad_norm": 1.697617023240348, "learning_rate": 1.6094341586387785e-07, "loss": 0.5692, "step": 30065 }, { "epoch": 0.9214784847370356, "grad_norm": 0.803677674046571, "learning_rate": 1.6081852797620257e-07, "loss": 0.3841, "step": 30066 }, { "epoch": 0.9215091332597769, "grad_norm": 1.8997716783471328, "learning_rate": 1.6069368777022088e-07, "loss": 0.5525, "step": 30067 }, { "epoch": 0.921539781782518, "grad_norm": 2.0394819021631307, "learning_rate": 1.6056889524716234e-07, "loss": 0.577, "step": 30068 }, { "epoch": 0.9215704303052593, "grad_norm": 1.786986005406287, "learning_rate": 1.60444150408256e-07, "loss": 0.5549, "step": 30069 }, { "epoch": 0.9216010788280005, "grad_norm": 1.7970970760003244, "learning_rate": 1.6031945325473253e-07, "loss": 0.5747, "step": 30070 }, { "epoch": 0.9216317273507417, "grad_norm": 1.7202712563450582, "learning_rate": 1.6019480378781927e-07, "loss": 0.5286, "step": 30071 }, { "epoch": 0.9216623758734829, "grad_norm": 1.8769157938048997, "learning_rate": 1.600702020087458e-07, "loss": 0.579, "step": 30072 }, { "epoch": 0.9216930243962241, "grad_norm": 1.8876107000271272, "learning_rate": 1.5994564791873835e-07, "loss": 0.6286, "step": 30073 }, { "epoch": 0.9217236729189653, "grad_norm": 1.7829164403432887, "learning_rate": 1.5982114151902428e-07, "loss": 0.5656, "step": 30074 }, { "epoch": 0.9217543214417065, "grad_norm": 1.9120772750447348, "learning_rate": 1.5969668281083207e-07, "loss": 0.5472, "step": 30075 }, { "epoch": 0.9217849699644477, "grad_norm": 1.7921603192966507, "learning_rate": 1.595722717953857e-07, "loss": 0.6409, "step": 30076 }, { "epoch": 0.921815618487189, "grad_norm": 1.7487789463988437, "learning_rate": 1.5944790847391255e-07, "loss": 0.5291, "step": 30077 }, { "epoch": 0.9218462670099301, "grad_norm": 1.8855362513730693, "learning_rate": 1.5932359284763832e-07, "loss": 0.5728, "step": 30078 }, { "epoch": 0.9218769155326714, "grad_norm": 1.6678820152434681, "learning_rate": 1.59199324917787e-07, "loss": 0.4966, "step": 30079 }, { "epoch": 0.9219075640554125, "grad_norm": 1.9318158907920318, "learning_rate": 1.5907510468558264e-07, "loss": 0.5851, "step": 30080 }, { "epoch": 0.9219382125781538, "grad_norm": 1.9225751394894546, "learning_rate": 1.5895093215225089e-07, "loss": 0.5585, "step": 30081 }, { "epoch": 0.9219688611008949, "grad_norm": 1.849049511489584, "learning_rate": 1.5882680731901356e-07, "loss": 0.5223, "step": 30082 }, { "epoch": 0.9219995096236362, "grad_norm": 2.1919309498010775, "learning_rate": 1.587027301870947e-07, "loss": 0.6075, "step": 30083 }, { "epoch": 0.9220301581463773, "grad_norm": 0.823093088490152, "learning_rate": 1.5857870075771608e-07, "loss": 0.3891, "step": 30084 }, { "epoch": 0.9220608066691185, "grad_norm": 0.8055653224025559, "learning_rate": 1.5845471903210063e-07, "loss": 0.3859, "step": 30085 }, { "epoch": 0.9220914551918598, "grad_norm": 0.8141301082443593, "learning_rate": 1.583307850114696e-07, "loss": 0.3948, "step": 30086 }, { "epoch": 0.9221221037146009, "grad_norm": 4.2793180443249375, "learning_rate": 1.5820689869704364e-07, "loss": 0.5639, "step": 30087 }, { "epoch": 0.9221527522373422, "grad_norm": 1.9918385751086995, "learning_rate": 1.5808306009004458e-07, "loss": 0.5376, "step": 30088 }, { "epoch": 0.9221834007600833, "grad_norm": 1.8812482625853963, "learning_rate": 1.5795926919169201e-07, "loss": 0.6199, "step": 30089 }, { "epoch": 0.9222140492828246, "grad_norm": 1.854982902743777, "learning_rate": 1.5783552600320495e-07, "loss": 0.6074, "step": 30090 }, { "epoch": 0.9222446978055657, "grad_norm": 1.9318227015661167, "learning_rate": 1.5771183052580353e-07, "loss": 0.6342, "step": 30091 }, { "epoch": 0.922275346328307, "grad_norm": 1.9033360811447548, "learning_rate": 1.575881827607073e-07, "loss": 0.5885, "step": 30092 }, { "epoch": 0.9223059948510481, "grad_norm": 1.7919160340755327, "learning_rate": 1.5746458270913258e-07, "loss": 0.5448, "step": 30093 }, { "epoch": 0.9223366433737894, "grad_norm": 2.1177785862873173, "learning_rate": 1.5734103037229942e-07, "loss": 0.6564, "step": 30094 }, { "epoch": 0.9223672918965306, "grad_norm": 1.8827045492074608, "learning_rate": 1.5721752575142357e-07, "loss": 0.5818, "step": 30095 }, { "epoch": 0.9223979404192718, "grad_norm": 2.0551106371247476, "learning_rate": 1.5709406884772182e-07, "loss": 0.5168, "step": 30096 }, { "epoch": 0.922428588942013, "grad_norm": 1.8905468721905765, "learning_rate": 1.5697065966241266e-07, "loss": 0.5165, "step": 30097 }, { "epoch": 0.9224592374647542, "grad_norm": 2.0117169821277274, "learning_rate": 1.5684729819671008e-07, "loss": 0.5275, "step": 30098 }, { "epoch": 0.9224898859874954, "grad_norm": 1.6093235328049154, "learning_rate": 1.5672398445182978e-07, "loss": 0.4846, "step": 30099 }, { "epoch": 0.9225205345102366, "grad_norm": 0.8107951988203376, "learning_rate": 1.5660071842898806e-07, "loss": 0.397, "step": 30100 }, { "epoch": 0.9225511830329778, "grad_norm": 2.029522805756595, "learning_rate": 1.5647750012939833e-07, "loss": 0.5279, "step": 30101 }, { "epoch": 0.922581831555719, "grad_norm": 1.5877579511616549, "learning_rate": 1.5635432955427464e-07, "loss": 0.458, "step": 30102 }, { "epoch": 0.9226124800784602, "grad_norm": 1.8972530179758766, "learning_rate": 1.5623120670483215e-07, "loss": 0.6016, "step": 30103 }, { "epoch": 0.9226431286012015, "grad_norm": 2.044835222057019, "learning_rate": 1.5610813158228156e-07, "loss": 0.6241, "step": 30104 }, { "epoch": 0.9226737771239426, "grad_norm": 0.8086664561428432, "learning_rate": 1.5598510418783796e-07, "loss": 0.393, "step": 30105 }, { "epoch": 0.9227044256466839, "grad_norm": 1.9463871333850724, "learning_rate": 1.558621245227121e-07, "loss": 0.5674, "step": 30106 }, { "epoch": 0.922735074169425, "grad_norm": 2.0939090888464373, "learning_rate": 1.557391925881163e-07, "loss": 0.5982, "step": 30107 }, { "epoch": 0.9227657226921663, "grad_norm": 1.8175392753348563, "learning_rate": 1.5561630838526186e-07, "loss": 0.6246, "step": 30108 }, { "epoch": 0.9227963712149074, "grad_norm": 2.104317487353734, "learning_rate": 1.554934719153589e-07, "loss": 0.7009, "step": 30109 }, { "epoch": 0.9228270197376487, "grad_norm": 2.0034278381306705, "learning_rate": 1.553706831796181e-07, "loss": 0.6504, "step": 30110 }, { "epoch": 0.9228576682603898, "grad_norm": 1.8023647728406444, "learning_rate": 1.5524794217925077e-07, "loss": 0.5317, "step": 30111 }, { "epoch": 0.9228883167831311, "grad_norm": 0.8139807872267302, "learning_rate": 1.5512524891546366e-07, "loss": 0.3972, "step": 30112 }, { "epoch": 0.9229189653058723, "grad_norm": 1.7840419614609537, "learning_rate": 1.550026033894675e-07, "loss": 0.6119, "step": 30113 }, { "epoch": 0.9229496138286135, "grad_norm": 1.7966565715202565, "learning_rate": 1.548800056024713e-07, "loss": 0.5003, "step": 30114 }, { "epoch": 0.9229802623513547, "grad_norm": 1.906185639590162, "learning_rate": 1.5475745555568077e-07, "loss": 0.4824, "step": 30115 }, { "epoch": 0.9230109108740958, "grad_norm": 1.9067665503503692, "learning_rate": 1.5463495325030554e-07, "loss": 0.587, "step": 30116 }, { "epoch": 0.9230415593968371, "grad_norm": 2.079167665646108, "learning_rate": 1.5451249868755126e-07, "loss": 0.6266, "step": 30117 }, { "epoch": 0.9230722079195782, "grad_norm": 1.6625177265386037, "learning_rate": 1.543900918686253e-07, "loss": 0.5607, "step": 30118 }, { "epoch": 0.9231028564423195, "grad_norm": 2.1581239878301606, "learning_rate": 1.5426773279473395e-07, "loss": 0.5146, "step": 30119 }, { "epoch": 0.9231335049650606, "grad_norm": 2.0926545754316255, "learning_rate": 1.5414542146708234e-07, "loss": 0.5674, "step": 30120 }, { "epoch": 0.9231641534878019, "grad_norm": 1.7560449110990752, "learning_rate": 1.5402315788687506e-07, "loss": 0.5913, "step": 30121 }, { "epoch": 0.923194802010543, "grad_norm": 2.028102952158552, "learning_rate": 1.5390094205531893e-07, "loss": 0.5499, "step": 30122 }, { "epoch": 0.9232254505332843, "grad_norm": 1.9496569897310654, "learning_rate": 1.5377877397361518e-07, "loss": 0.5964, "step": 30123 }, { "epoch": 0.9232560990560255, "grad_norm": 2.076696473832834, "learning_rate": 1.536566536429701e-07, "loss": 0.5628, "step": 30124 }, { "epoch": 0.9232867475787667, "grad_norm": 1.8609044413074927, "learning_rate": 1.5353458106458551e-07, "loss": 0.5123, "step": 30125 }, { "epoch": 0.9233173961015079, "grad_norm": 2.168948358435581, "learning_rate": 1.5341255623966488e-07, "loss": 0.5777, "step": 30126 }, { "epoch": 0.9233480446242491, "grad_norm": 1.9525324442159229, "learning_rate": 1.5329057916941114e-07, "loss": 0.5419, "step": 30127 }, { "epoch": 0.9233786931469903, "grad_norm": 1.7231696837608799, "learning_rate": 1.5316864985502445e-07, "loss": 0.592, "step": 30128 }, { "epoch": 0.9234093416697315, "grad_norm": 2.1890650957182154, "learning_rate": 1.5304676829770716e-07, "loss": 0.6128, "step": 30129 }, { "epoch": 0.9234399901924727, "grad_norm": 2.072484186567036, "learning_rate": 1.5292493449866053e-07, "loss": 0.6195, "step": 30130 }, { "epoch": 0.923470638715214, "grad_norm": 0.7816452377874767, "learning_rate": 1.5280314845908474e-07, "loss": 0.3946, "step": 30131 }, { "epoch": 0.9235012872379551, "grad_norm": 1.816546993625056, "learning_rate": 1.5268141018017933e-07, "loss": 0.5642, "step": 30132 }, { "epoch": 0.9235319357606964, "grad_norm": 2.0529956622166146, "learning_rate": 1.5255971966314508e-07, "loss": 0.5904, "step": 30133 }, { "epoch": 0.9235625842834375, "grad_norm": 0.828286370442792, "learning_rate": 1.5243807690917932e-07, "loss": 0.4092, "step": 30134 }, { "epoch": 0.9235932328061788, "grad_norm": 1.7250871554940077, "learning_rate": 1.5231648191948224e-07, "loss": 0.5071, "step": 30135 }, { "epoch": 0.9236238813289199, "grad_norm": 1.8501128547664256, "learning_rate": 1.5219493469525004e-07, "loss": 0.6195, "step": 30136 }, { "epoch": 0.9236545298516612, "grad_norm": 0.7466819577064897, "learning_rate": 1.5207343523768237e-07, "loss": 0.4014, "step": 30137 }, { "epoch": 0.9236851783744023, "grad_norm": 1.889683029164006, "learning_rate": 1.5195198354797547e-07, "loss": 0.6052, "step": 30138 }, { "epoch": 0.9237158268971436, "grad_norm": 0.7960577879430237, "learning_rate": 1.5183057962732617e-07, "loss": 0.3923, "step": 30139 }, { "epoch": 0.9237464754198847, "grad_norm": 1.9288619084155239, "learning_rate": 1.5170922347693017e-07, "loss": 0.4997, "step": 30140 }, { "epoch": 0.923777123942626, "grad_norm": 1.875906870334187, "learning_rate": 1.5158791509798432e-07, "loss": 0.5556, "step": 30141 }, { "epoch": 0.9238077724653672, "grad_norm": 1.9077038312599983, "learning_rate": 1.5146665449168262e-07, "loss": 0.5479, "step": 30142 }, { "epoch": 0.9238384209881084, "grad_norm": 2.0613679815857098, "learning_rate": 1.5134544165922083e-07, "loss": 0.4802, "step": 30143 }, { "epoch": 0.9238690695108496, "grad_norm": 1.969083046941238, "learning_rate": 1.5122427660179295e-07, "loss": 0.6367, "step": 30144 }, { "epoch": 0.9238997180335908, "grad_norm": 0.7728356749465064, "learning_rate": 1.5110315932059304e-07, "loss": 0.3792, "step": 30145 }, { "epoch": 0.923930366556332, "grad_norm": 0.7589528953416846, "learning_rate": 1.5098208981681462e-07, "loss": 0.3686, "step": 30146 }, { "epoch": 0.9239610150790731, "grad_norm": 0.7502341403060475, "learning_rate": 1.5086106809164947e-07, "loss": 0.3637, "step": 30147 }, { "epoch": 0.9239916636018144, "grad_norm": 1.9448035248794522, "learning_rate": 1.5074009414629165e-07, "loss": 0.6318, "step": 30148 }, { "epoch": 0.9240223121245555, "grad_norm": 0.809916829097725, "learning_rate": 1.5061916798193242e-07, "loss": 0.395, "step": 30149 }, { "epoch": 0.9240529606472968, "grad_norm": 2.005212361313888, "learning_rate": 1.5049828959976308e-07, "loss": 0.5315, "step": 30150 }, { "epoch": 0.924083609170038, "grad_norm": 1.9303711667823589, "learning_rate": 1.5037745900097435e-07, "loss": 0.5821, "step": 30151 }, { "epoch": 0.9241142576927792, "grad_norm": 1.8570418202554857, "learning_rate": 1.5025667618675855e-07, "loss": 0.5941, "step": 30152 }, { "epoch": 0.9241449062155204, "grad_norm": 1.6577252771992483, "learning_rate": 1.5013594115830367e-07, "loss": 0.4957, "step": 30153 }, { "epoch": 0.9241755547382616, "grad_norm": 1.9522900175307811, "learning_rate": 1.5001525391679982e-07, "loss": 0.5467, "step": 30154 }, { "epoch": 0.9242062032610028, "grad_norm": 2.0203510675224607, "learning_rate": 1.4989461446343723e-07, "loss": 0.5697, "step": 30155 }, { "epoch": 0.924236851783744, "grad_norm": 1.9865433125300378, "learning_rate": 1.4977402279940434e-07, "loss": 0.6144, "step": 30156 }, { "epoch": 0.9242675003064852, "grad_norm": 0.813427335235647, "learning_rate": 1.496534789258891e-07, "loss": 0.4178, "step": 30157 }, { "epoch": 0.9242981488292265, "grad_norm": 2.112096466115649, "learning_rate": 1.495329828440778e-07, "loss": 0.604, "step": 30158 }, { "epoch": 0.9243287973519676, "grad_norm": 0.7494659582510629, "learning_rate": 1.4941253455516002e-07, "loss": 0.4061, "step": 30159 }, { "epoch": 0.9243594458747089, "grad_norm": 1.9930114708706106, "learning_rate": 1.4929213406032205e-07, "loss": 0.5752, "step": 30160 }, { "epoch": 0.92439009439745, "grad_norm": 1.9187386147556982, "learning_rate": 1.4917178136074906e-07, "loss": 0.514, "step": 30161 }, { "epoch": 0.9244207429201913, "grad_norm": 1.9759344721057268, "learning_rate": 1.4905147645762785e-07, "loss": 0.5339, "step": 30162 }, { "epoch": 0.9244513914429324, "grad_norm": 1.8790990815168644, "learning_rate": 1.489312193521436e-07, "loss": 0.5899, "step": 30163 }, { "epoch": 0.9244820399656737, "grad_norm": 1.9669894552494334, "learning_rate": 1.488110100454815e-07, "loss": 0.6025, "step": 30164 }, { "epoch": 0.9245126884884148, "grad_norm": 1.8766491594052568, "learning_rate": 1.4869084853882497e-07, "loss": 0.5155, "step": 30165 }, { "epoch": 0.9245433370111561, "grad_norm": 1.853859217775322, "learning_rate": 1.4857073483335927e-07, "loss": 0.5803, "step": 30166 }, { "epoch": 0.9245739855338972, "grad_norm": 0.798773937536044, "learning_rate": 1.4845066893026783e-07, "loss": 0.403, "step": 30167 }, { "epoch": 0.9246046340566385, "grad_norm": 1.718464066865541, "learning_rate": 1.4833065083073305e-07, "loss": 0.5356, "step": 30168 }, { "epoch": 0.9246352825793797, "grad_norm": 2.189376851230975, "learning_rate": 1.4821068053593734e-07, "loss": 0.6016, "step": 30169 }, { "epoch": 0.9246659311021209, "grad_norm": 2.1692409815650513, "learning_rate": 1.4809075804706363e-07, "loss": 0.5355, "step": 30170 }, { "epoch": 0.9246965796248621, "grad_norm": 2.1327098999680745, "learning_rate": 1.4797088336529264e-07, "loss": 0.5964, "step": 30171 }, { "epoch": 0.9247272281476033, "grad_norm": 1.6881643053418685, "learning_rate": 1.478510564918062e-07, "loss": 0.5684, "step": 30172 }, { "epoch": 0.9247578766703445, "grad_norm": 1.7800173528736154, "learning_rate": 1.4773127742778503e-07, "loss": 0.5591, "step": 30173 }, { "epoch": 0.9247885251930857, "grad_norm": 1.7460982254033355, "learning_rate": 1.476115461744082e-07, "loss": 0.5697, "step": 30174 }, { "epoch": 0.9248191737158269, "grad_norm": 2.2093966036636874, "learning_rate": 1.4749186273285755e-07, "loss": 0.6592, "step": 30175 }, { "epoch": 0.9248498222385682, "grad_norm": 0.7440157444590293, "learning_rate": 1.4737222710431098e-07, "loss": 0.3953, "step": 30176 }, { "epoch": 0.9248804707613093, "grad_norm": 1.9662428123348334, "learning_rate": 1.472526392899465e-07, "loss": 0.5057, "step": 30177 }, { "epoch": 0.9249111192840505, "grad_norm": 2.064102536748657, "learning_rate": 1.471330992909442e-07, "loss": 0.6365, "step": 30178 }, { "epoch": 0.9249417678067917, "grad_norm": 0.8353309828048852, "learning_rate": 1.4701360710848102e-07, "loss": 0.4085, "step": 30179 }, { "epoch": 0.9249724163295329, "grad_norm": 1.8647089526934308, "learning_rate": 1.4689416274373426e-07, "loss": 0.576, "step": 30180 }, { "epoch": 0.9250030648522741, "grad_norm": 1.8088253216750603, "learning_rate": 1.4677476619788078e-07, "loss": 0.4856, "step": 30181 }, { "epoch": 0.9250337133750153, "grad_norm": 1.954385962302232, "learning_rate": 1.4665541747209743e-07, "loss": 0.6225, "step": 30182 }, { "epoch": 0.9250643618977565, "grad_norm": 2.04842766517975, "learning_rate": 1.465361165675605e-07, "loss": 0.558, "step": 30183 }, { "epoch": 0.9250950104204977, "grad_norm": 1.8526110598074648, "learning_rate": 1.464168634854446e-07, "loss": 0.5009, "step": 30184 }, { "epoch": 0.925125658943239, "grad_norm": 1.8899733716511031, "learning_rate": 1.4629765822692487e-07, "loss": 0.5767, "step": 30185 }, { "epoch": 0.9251563074659801, "grad_norm": 1.9499083526958216, "learning_rate": 1.4617850079317707e-07, "loss": 0.6195, "step": 30186 }, { "epoch": 0.9251869559887214, "grad_norm": 0.8085355715905059, "learning_rate": 1.4605939118537415e-07, "loss": 0.399, "step": 30187 }, { "epoch": 0.9252176045114625, "grad_norm": 1.931720083292043, "learning_rate": 1.4594032940468905e-07, "loss": 0.6155, "step": 30188 }, { "epoch": 0.9252482530342038, "grad_norm": 1.7458851074935169, "learning_rate": 1.4582131545229693e-07, "loss": 0.5632, "step": 30189 }, { "epoch": 0.9252789015569449, "grad_norm": 2.111131942845891, "learning_rate": 1.4570234932936912e-07, "loss": 0.5993, "step": 30190 }, { "epoch": 0.9253095500796862, "grad_norm": 1.8305973669911129, "learning_rate": 1.455834310370785e-07, "loss": 0.5309, "step": 30191 }, { "epoch": 0.9253401986024273, "grad_norm": 2.0707809253801504, "learning_rate": 1.4546456057659532e-07, "loss": 0.6729, "step": 30192 }, { "epoch": 0.9253708471251686, "grad_norm": 1.9974764961210545, "learning_rate": 1.4534573794909246e-07, "loss": 0.5771, "step": 30193 }, { "epoch": 0.9254014956479097, "grad_norm": 0.7987826708638458, "learning_rate": 1.452269631557407e-07, "loss": 0.388, "step": 30194 }, { "epoch": 0.925432144170651, "grad_norm": 2.1174615645774653, "learning_rate": 1.451082361977091e-07, "loss": 0.5074, "step": 30195 }, { "epoch": 0.9254627926933922, "grad_norm": 1.9555763944709974, "learning_rate": 1.4498955707616836e-07, "loss": 0.541, "step": 30196 }, { "epoch": 0.9254934412161334, "grad_norm": 2.054120319697463, "learning_rate": 1.4487092579228812e-07, "loss": 0.5059, "step": 30197 }, { "epoch": 0.9255240897388746, "grad_norm": 1.8904587900028589, "learning_rate": 1.4475234234723633e-07, "loss": 0.6307, "step": 30198 }, { "epoch": 0.9255547382616158, "grad_norm": 2.061805539864277, "learning_rate": 1.4463380674218208e-07, "loss": 0.5855, "step": 30199 }, { "epoch": 0.925585386784357, "grad_norm": 1.8398171156481524, "learning_rate": 1.4451531897829384e-07, "loss": 0.5912, "step": 30200 }, { "epoch": 0.9256160353070982, "grad_norm": 2.119812453388559, "learning_rate": 1.443968790567374e-07, "loss": 0.6049, "step": 30201 }, { "epoch": 0.9256466838298394, "grad_norm": 1.744901977307004, "learning_rate": 1.4427848697868175e-07, "loss": 0.5163, "step": 30202 }, { "epoch": 0.9256773323525807, "grad_norm": 0.789061339919895, "learning_rate": 1.4416014274529211e-07, "loss": 0.3918, "step": 30203 }, { "epoch": 0.9257079808753218, "grad_norm": 0.7268667094357432, "learning_rate": 1.4404184635773532e-07, "loss": 0.3708, "step": 30204 }, { "epoch": 0.9257386293980631, "grad_norm": 2.095796494024011, "learning_rate": 1.4392359781717658e-07, "loss": 0.4956, "step": 30205 }, { "epoch": 0.9257692779208042, "grad_norm": 1.9281138351237261, "learning_rate": 1.4380539712478102e-07, "loss": 0.572, "step": 30206 }, { "epoch": 0.9257999264435455, "grad_norm": 2.0649347539805634, "learning_rate": 1.4368724428171333e-07, "loss": 0.5977, "step": 30207 }, { "epoch": 0.9258305749662866, "grad_norm": 2.110845772877831, "learning_rate": 1.4356913928913807e-07, "loss": 0.5197, "step": 30208 }, { "epoch": 0.9258612234890278, "grad_norm": 1.692934573086164, "learning_rate": 1.4345108214821823e-07, "loss": 0.496, "step": 30209 }, { "epoch": 0.925891872011769, "grad_norm": 2.0222273502128676, "learning_rate": 1.4333307286011789e-07, "loss": 0.5368, "step": 30210 }, { "epoch": 0.9259225205345102, "grad_norm": 1.9917520237013344, "learning_rate": 1.4321511142599943e-07, "loss": 0.6138, "step": 30211 }, { "epoch": 0.9259531690572514, "grad_norm": 1.9374240160473657, "learning_rate": 1.430971978470247e-07, "loss": 0.5456, "step": 30212 }, { "epoch": 0.9259838175799926, "grad_norm": 1.710775827805957, "learning_rate": 1.4297933212435665e-07, "loss": 0.5637, "step": 30213 }, { "epoch": 0.9260144661027339, "grad_norm": 0.7707150950558563, "learning_rate": 1.428615142591555e-07, "loss": 0.399, "step": 30214 }, { "epoch": 0.926045114625475, "grad_norm": 1.953173275205177, "learning_rate": 1.4274374425258196e-07, "loss": 0.596, "step": 30215 }, { "epoch": 0.9260757631482163, "grad_norm": 1.7389013103779591, "learning_rate": 1.4262602210579846e-07, "loss": 0.6047, "step": 30216 }, { "epoch": 0.9261064116709574, "grad_norm": 2.047848911132523, "learning_rate": 1.4250834781996237e-07, "loss": 0.5523, "step": 30217 }, { "epoch": 0.9261370601936987, "grad_norm": 2.035735179716976, "learning_rate": 1.4239072139623499e-07, "loss": 0.628, "step": 30218 }, { "epoch": 0.9261677087164398, "grad_norm": 1.7670225927652026, "learning_rate": 1.4227314283577488e-07, "loss": 0.4617, "step": 30219 }, { "epoch": 0.9261983572391811, "grad_norm": 0.7952813644688762, "learning_rate": 1.4215561213973994e-07, "loss": 0.3856, "step": 30220 }, { "epoch": 0.9262290057619222, "grad_norm": 1.9222972030619656, "learning_rate": 1.4203812930928927e-07, "loss": 0.5588, "step": 30221 }, { "epoch": 0.9262596542846635, "grad_norm": 1.7278453319142173, "learning_rate": 1.419206943455792e-07, "loss": 0.5762, "step": 30222 }, { "epoch": 0.9262903028074047, "grad_norm": 1.905456911126969, "learning_rate": 1.4180330724976764e-07, "loss": 0.5383, "step": 30223 }, { "epoch": 0.9263209513301459, "grad_norm": 1.9015137239136093, "learning_rate": 1.4168596802301148e-07, "loss": 0.5036, "step": 30224 }, { "epoch": 0.9263515998528871, "grad_norm": 7.516035816074913, "learning_rate": 1.415686766664659e-07, "loss": 0.57, "step": 30225 }, { "epoch": 0.9263822483756283, "grad_norm": 2.089072428001156, "learning_rate": 1.4145143318128773e-07, "loss": 0.6117, "step": 30226 }, { "epoch": 0.9264128968983695, "grad_norm": 1.6317089687209803, "learning_rate": 1.4133423756863164e-07, "loss": 0.5275, "step": 30227 }, { "epoch": 0.9264435454211107, "grad_norm": 1.919935232866215, "learning_rate": 1.4121708982965278e-07, "loss": 0.5822, "step": 30228 }, { "epoch": 0.9264741939438519, "grad_norm": 1.8622054372590795, "learning_rate": 1.410999899655041e-07, "loss": 0.5397, "step": 30229 }, { "epoch": 0.9265048424665931, "grad_norm": 2.0198648795054064, "learning_rate": 1.4098293797734142e-07, "loss": 0.6299, "step": 30230 }, { "epoch": 0.9265354909893343, "grad_norm": 2.1207213833074836, "learning_rate": 1.4086593386631653e-07, "loss": 0.633, "step": 30231 }, { "epoch": 0.9265661395120756, "grad_norm": 1.8800846315742699, "learning_rate": 1.4074897763358354e-07, "loss": 0.6175, "step": 30232 }, { "epoch": 0.9265967880348167, "grad_norm": 1.7479275333988102, "learning_rate": 1.406320692802937e-07, "loss": 0.5028, "step": 30233 }, { "epoch": 0.926627436557558, "grad_norm": 0.8574100047520695, "learning_rate": 1.4051520880759896e-07, "loss": 0.4203, "step": 30234 }, { "epoch": 0.9266580850802991, "grad_norm": 0.7609132584809805, "learning_rate": 1.403983962166522e-07, "loss": 0.4015, "step": 30235 }, { "epoch": 0.9266887336030404, "grad_norm": 1.6397918107373475, "learning_rate": 1.4028163150860252e-07, "loss": 0.5114, "step": 30236 }, { "epoch": 0.9267193821257815, "grad_norm": 1.985334234367259, "learning_rate": 1.401649146846018e-07, "loss": 0.5858, "step": 30237 }, { "epoch": 0.9267500306485228, "grad_norm": 1.9699014998873885, "learning_rate": 1.4004824574579967e-07, "loss": 0.6131, "step": 30238 }, { "epoch": 0.9267806791712639, "grad_norm": 1.827700453023318, "learning_rate": 1.399316246933452e-07, "loss": 0.534, "step": 30239 }, { "epoch": 0.9268113276940051, "grad_norm": 1.7192532858071636, "learning_rate": 1.3981505152838803e-07, "loss": 0.5659, "step": 30240 }, { "epoch": 0.9268419762167464, "grad_norm": 1.7847621713731492, "learning_rate": 1.3969852625207726e-07, "loss": 0.5851, "step": 30241 }, { "epoch": 0.9268726247394875, "grad_norm": 2.132941706448631, "learning_rate": 1.395820488655597e-07, "loss": 0.5663, "step": 30242 }, { "epoch": 0.9269032732622288, "grad_norm": 2.097746000958309, "learning_rate": 1.3946561936998448e-07, "loss": 0.5447, "step": 30243 }, { "epoch": 0.9269339217849699, "grad_norm": 1.8972839375654087, "learning_rate": 1.3934923776649734e-07, "loss": 0.6076, "step": 30244 }, { "epoch": 0.9269645703077112, "grad_norm": 1.884653723348513, "learning_rate": 1.3923290405624678e-07, "loss": 0.5886, "step": 30245 }, { "epoch": 0.9269952188304523, "grad_norm": 1.8964101094484496, "learning_rate": 1.3911661824037803e-07, "loss": 0.5935, "step": 30246 }, { "epoch": 0.9270258673531936, "grad_norm": 1.8891159852524557, "learning_rate": 1.3900038032003627e-07, "loss": 0.5879, "step": 30247 }, { "epoch": 0.9270565158759347, "grad_norm": 1.9827183335070564, "learning_rate": 1.388841902963678e-07, "loss": 0.5239, "step": 30248 }, { "epoch": 0.927087164398676, "grad_norm": 1.8541385387979088, "learning_rate": 1.3876804817051727e-07, "loss": 0.5853, "step": 30249 }, { "epoch": 0.9271178129214172, "grad_norm": 2.0240153001451007, "learning_rate": 1.3865195394362875e-07, "loss": 0.5888, "step": 30250 }, { "epoch": 0.9271484614441584, "grad_norm": 2.0502118030769516, "learning_rate": 1.3853590761684631e-07, "loss": 0.53, "step": 30251 }, { "epoch": 0.9271791099668996, "grad_norm": 1.870710701497551, "learning_rate": 1.3841990919131354e-07, "loss": 0.5475, "step": 30252 }, { "epoch": 0.9272097584896408, "grad_norm": 1.8456837825347687, "learning_rate": 1.3830395866817336e-07, "loss": 0.6424, "step": 30253 }, { "epoch": 0.927240407012382, "grad_norm": 2.0638114886597276, "learning_rate": 1.3818805604856877e-07, "loss": 0.5742, "step": 30254 }, { "epoch": 0.9272710555351232, "grad_norm": 1.8064871809673229, "learning_rate": 1.3807220133363996e-07, "loss": 0.5001, "step": 30255 }, { "epoch": 0.9273017040578644, "grad_norm": 1.844406140112449, "learning_rate": 1.3795639452453047e-07, "loss": 0.4863, "step": 30256 }, { "epoch": 0.9273323525806056, "grad_norm": 1.8172951500478813, "learning_rate": 1.3784063562238104e-07, "loss": 0.5361, "step": 30257 }, { "epoch": 0.9273630011033468, "grad_norm": 1.7593064849074052, "learning_rate": 1.377249246283313e-07, "loss": 0.5437, "step": 30258 }, { "epoch": 0.9273936496260881, "grad_norm": 2.158646540755094, "learning_rate": 1.376092615435215e-07, "loss": 0.5552, "step": 30259 }, { "epoch": 0.9274242981488292, "grad_norm": 1.6991700888371106, "learning_rate": 1.374936463690929e-07, "loss": 0.5769, "step": 30260 }, { "epoch": 0.9274549466715705, "grad_norm": 1.9287205170217883, "learning_rate": 1.373780791061824e-07, "loss": 0.5348, "step": 30261 }, { "epoch": 0.9274855951943116, "grad_norm": 1.99548948790754, "learning_rate": 1.3726255975593018e-07, "loss": 0.5921, "step": 30262 }, { "epoch": 0.9275162437170529, "grad_norm": 0.7737556343960671, "learning_rate": 1.371470883194742e-07, "loss": 0.3876, "step": 30263 }, { "epoch": 0.927546892239794, "grad_norm": 1.981429480573051, "learning_rate": 1.370316647979525e-07, "loss": 0.5548, "step": 30264 }, { "epoch": 0.9275775407625353, "grad_norm": 1.8652047927735753, "learning_rate": 1.3691628919250245e-07, "loss": 0.5377, "step": 30265 }, { "epoch": 0.9276081892852764, "grad_norm": 2.1738010446464204, "learning_rate": 1.3680096150425925e-07, "loss": 0.5782, "step": 30266 }, { "epoch": 0.9276388378080177, "grad_norm": 1.803454509162057, "learning_rate": 1.366856817343609e-07, "loss": 0.5648, "step": 30267 }, { "epoch": 0.9276694863307589, "grad_norm": 1.9730597647183632, "learning_rate": 1.3657044988394376e-07, "loss": 0.596, "step": 30268 }, { "epoch": 0.9277001348535001, "grad_norm": 1.7692823724090838, "learning_rate": 1.3645526595414126e-07, "loss": 0.5857, "step": 30269 }, { "epoch": 0.9277307833762413, "grad_norm": 1.9976671132314021, "learning_rate": 1.363401299460898e-07, "loss": 0.6018, "step": 30270 }, { "epoch": 0.9277614318989824, "grad_norm": 2.125456849906902, "learning_rate": 1.362250418609229e-07, "loss": 0.5563, "step": 30271 }, { "epoch": 0.9277920804217237, "grad_norm": 0.7685281706654071, "learning_rate": 1.361100016997763e-07, "loss": 0.3746, "step": 30272 }, { "epoch": 0.9278227289444648, "grad_norm": 2.1073970704169436, "learning_rate": 1.3599500946378185e-07, "loss": 0.6062, "step": 30273 }, { "epoch": 0.9278533774672061, "grad_norm": 1.7991381100919097, "learning_rate": 1.3588006515407203e-07, "loss": 0.5131, "step": 30274 }, { "epoch": 0.9278840259899472, "grad_norm": 2.0947461856566005, "learning_rate": 1.3576516877178204e-07, "loss": 0.5679, "step": 30275 }, { "epoch": 0.9279146745126885, "grad_norm": 2.1796274018600865, "learning_rate": 1.3565032031804205e-07, "loss": 0.5412, "step": 30276 }, { "epoch": 0.9279453230354296, "grad_norm": 1.8850612351526441, "learning_rate": 1.3553551979398339e-07, "loss": 0.5868, "step": 30277 }, { "epoch": 0.9279759715581709, "grad_norm": 2.2466644852578996, "learning_rate": 1.3542076720073792e-07, "loss": 0.6016, "step": 30278 }, { "epoch": 0.9280066200809121, "grad_norm": 1.9586065758324935, "learning_rate": 1.3530606253943645e-07, "loss": 0.5506, "step": 30279 }, { "epoch": 0.9280372686036533, "grad_norm": 2.041862041404441, "learning_rate": 1.3519140581120914e-07, "loss": 0.5988, "step": 30280 }, { "epoch": 0.9280679171263945, "grad_norm": 1.7144928330548517, "learning_rate": 1.3507679701718568e-07, "loss": 0.4985, "step": 30281 }, { "epoch": 0.9280985656491357, "grad_norm": 1.9168304000834508, "learning_rate": 1.3496223615849513e-07, "loss": 0.438, "step": 30282 }, { "epoch": 0.9281292141718769, "grad_norm": 1.6785248199754992, "learning_rate": 1.348477232362666e-07, "loss": 0.4653, "step": 30283 }, { "epoch": 0.9281598626946181, "grad_norm": 1.96992975359013, "learning_rate": 1.3473325825162864e-07, "loss": 0.555, "step": 30284 }, { "epoch": 0.9281905112173593, "grad_norm": 0.7596629493858744, "learning_rate": 1.3461884120570756e-07, "loss": 0.3729, "step": 30285 }, { "epoch": 0.9282211597401006, "grad_norm": 2.0926485672701807, "learning_rate": 1.3450447209963303e-07, "loss": 0.6235, "step": 30286 }, { "epoch": 0.9282518082628417, "grad_norm": 2.017563701909049, "learning_rate": 1.3439015093453078e-07, "loss": 0.5825, "step": 30287 }, { "epoch": 0.928282456785583, "grad_norm": 1.9146592070821977, "learning_rate": 1.342758777115266e-07, "loss": 0.4776, "step": 30288 }, { "epoch": 0.9283131053083241, "grad_norm": 1.7139619831766688, "learning_rate": 1.341616524317474e-07, "loss": 0.5812, "step": 30289 }, { "epoch": 0.9283437538310654, "grad_norm": 1.8205189315931603, "learning_rate": 1.3404747509631833e-07, "loss": 0.5289, "step": 30290 }, { "epoch": 0.9283744023538065, "grad_norm": 1.88364162861242, "learning_rate": 1.3393334570636517e-07, "loss": 0.5297, "step": 30291 }, { "epoch": 0.9284050508765478, "grad_norm": 2.01916903677195, "learning_rate": 1.3381926426301095e-07, "loss": 0.6011, "step": 30292 }, { "epoch": 0.9284356993992889, "grad_norm": 1.9109176965435817, "learning_rate": 1.3370523076738083e-07, "loss": 0.5542, "step": 30293 }, { "epoch": 0.9284663479220302, "grad_norm": 2.0759179056822896, "learning_rate": 1.335912452205984e-07, "loss": 0.6207, "step": 30294 }, { "epoch": 0.9284969964447713, "grad_norm": 1.7075359092577462, "learning_rate": 1.3347730762378664e-07, "loss": 0.5741, "step": 30295 }, { "epoch": 0.9285276449675126, "grad_norm": 2.06954045854017, "learning_rate": 1.333634179780674e-07, "loss": 0.5406, "step": 30296 }, { "epoch": 0.9285582934902538, "grad_norm": 0.7525169690112795, "learning_rate": 1.3324957628456424e-07, "loss": 0.383, "step": 30297 }, { "epoch": 0.928588942012995, "grad_norm": 1.7660900777942217, "learning_rate": 1.3313578254439796e-07, "loss": 0.5496, "step": 30298 }, { "epoch": 0.9286195905357362, "grad_norm": 1.9973543558894133, "learning_rate": 1.330220367586904e-07, "loss": 0.5577, "step": 30299 }, { "epoch": 0.9286502390584774, "grad_norm": 1.937670048176482, "learning_rate": 1.329083389285618e-07, "loss": 0.5816, "step": 30300 }, { "epoch": 0.9286808875812186, "grad_norm": 2.108573569673201, "learning_rate": 1.3279468905513237e-07, "loss": 0.5985, "step": 30301 }, { "epoch": 0.9287115361039597, "grad_norm": 1.9309690198751375, "learning_rate": 1.326810871395229e-07, "loss": 0.579, "step": 30302 }, { "epoch": 0.928742184626701, "grad_norm": 1.9708199324562663, "learning_rate": 1.325675331828513e-07, "loss": 0.6363, "step": 30303 }, { "epoch": 0.9287728331494421, "grad_norm": 1.7998165428884934, "learning_rate": 1.3245402718623678e-07, "loss": 0.517, "step": 30304 }, { "epoch": 0.9288034816721834, "grad_norm": 0.8132432123264824, "learning_rate": 1.323405691507995e-07, "loss": 0.4101, "step": 30305 }, { "epoch": 0.9288341301949246, "grad_norm": 1.8587952292258143, "learning_rate": 1.3222715907765471e-07, "loss": 0.5225, "step": 30306 }, { "epoch": 0.9288647787176658, "grad_norm": 1.7817167939600589, "learning_rate": 1.321137969679226e-07, "loss": 0.5077, "step": 30307 }, { "epoch": 0.928895427240407, "grad_norm": 1.9409385002880803, "learning_rate": 1.3200048282271727e-07, "loss": 0.6007, "step": 30308 }, { "epoch": 0.9289260757631482, "grad_norm": 1.746778715105681, "learning_rate": 1.318872166431573e-07, "loss": 0.5884, "step": 30309 }, { "epoch": 0.9289567242858894, "grad_norm": 2.217168067641722, "learning_rate": 1.3177399843035898e-07, "loss": 0.5461, "step": 30310 }, { "epoch": 0.9289873728086306, "grad_norm": 1.805618423791435, "learning_rate": 1.316608281854359e-07, "loss": 0.5401, "step": 30311 }, { "epoch": 0.9290180213313718, "grad_norm": 0.7603653629725331, "learning_rate": 1.3154770590950438e-07, "loss": 0.3885, "step": 30312 }, { "epoch": 0.929048669854113, "grad_norm": 1.8202932296359118, "learning_rate": 1.3143463160367964e-07, "loss": 0.4853, "step": 30313 }, { "epoch": 0.9290793183768542, "grad_norm": 0.8188486802721628, "learning_rate": 1.3132160526907467e-07, "loss": 0.3959, "step": 30314 }, { "epoch": 0.9291099668995955, "grad_norm": 0.8762924744445023, "learning_rate": 1.3120862690680358e-07, "loss": 0.4037, "step": 30315 }, { "epoch": 0.9291406154223366, "grad_norm": 1.7973031839419003, "learning_rate": 1.3109569651797992e-07, "loss": 0.5601, "step": 30316 }, { "epoch": 0.9291712639450779, "grad_norm": 1.9725139760497072, "learning_rate": 1.3098281410371616e-07, "loss": 0.5186, "step": 30317 }, { "epoch": 0.929201912467819, "grad_norm": 1.8292274233130392, "learning_rate": 1.3086997966512472e-07, "loss": 0.6376, "step": 30318 }, { "epoch": 0.9292325609905603, "grad_norm": 1.7282537232964204, "learning_rate": 1.3075719320331636e-07, "loss": 0.549, "step": 30319 }, { "epoch": 0.9292632095133014, "grad_norm": 1.9697086714163485, "learning_rate": 1.3064445471940358e-07, "loss": 0.4925, "step": 30320 }, { "epoch": 0.9292938580360427, "grad_norm": 2.014056854200755, "learning_rate": 1.305317642144971e-07, "loss": 0.5631, "step": 30321 }, { "epoch": 0.9293245065587838, "grad_norm": 1.6651052869124021, "learning_rate": 1.3041912168970715e-07, "loss": 0.4902, "step": 30322 }, { "epoch": 0.9293551550815251, "grad_norm": 1.940008516163006, "learning_rate": 1.303065271461429e-07, "loss": 0.4768, "step": 30323 }, { "epoch": 0.9293858036042663, "grad_norm": 2.29177549180772, "learning_rate": 1.3019398058491507e-07, "loss": 0.563, "step": 30324 }, { "epoch": 0.9294164521270075, "grad_norm": 2.352689476981958, "learning_rate": 1.3008148200713166e-07, "loss": 0.5944, "step": 30325 }, { "epoch": 0.9294471006497487, "grad_norm": 1.5806774003797164, "learning_rate": 1.2996903141390127e-07, "loss": 0.5224, "step": 30326 }, { "epoch": 0.9294777491724899, "grad_norm": 1.7306643093017688, "learning_rate": 1.2985662880633243e-07, "loss": 0.5324, "step": 30327 }, { "epoch": 0.9295083976952311, "grad_norm": 1.9274391932317068, "learning_rate": 1.2974427418553205e-07, "loss": 0.5257, "step": 30328 }, { "epoch": 0.9295390462179723, "grad_norm": 1.778849041692575, "learning_rate": 1.296319675526081e-07, "loss": 0.6328, "step": 30329 }, { "epoch": 0.9295696947407135, "grad_norm": 0.7971298690615559, "learning_rate": 1.2951970890866527e-07, "loss": 0.3686, "step": 30330 }, { "epoch": 0.9296003432634548, "grad_norm": 1.8803297214697077, "learning_rate": 1.294074982548116e-07, "loss": 0.4911, "step": 30331 }, { "epoch": 0.9296309917861959, "grad_norm": 1.838789156057047, "learning_rate": 1.2929533559215223e-07, "loss": 0.5233, "step": 30332 }, { "epoch": 0.929661640308937, "grad_norm": 1.8600318124030248, "learning_rate": 1.2918322092179192e-07, "loss": 0.5994, "step": 30333 }, { "epoch": 0.9296922888316783, "grad_norm": 1.8094204978819133, "learning_rate": 1.290711542448353e-07, "loss": 0.4603, "step": 30334 }, { "epoch": 0.9297229373544195, "grad_norm": 2.0305582506496367, "learning_rate": 1.289591355623876e-07, "loss": 0.5222, "step": 30335 }, { "epoch": 0.9297535858771607, "grad_norm": 1.735317767591369, "learning_rate": 1.2884716487555128e-07, "loss": 0.5609, "step": 30336 }, { "epoch": 0.9297842343999019, "grad_norm": 1.903383455446117, "learning_rate": 1.2873524218542988e-07, "loss": 0.5928, "step": 30337 }, { "epoch": 0.9298148829226431, "grad_norm": 1.7441644203309326, "learning_rate": 1.2862336749312753e-07, "loss": 0.6008, "step": 30338 }, { "epoch": 0.9298455314453843, "grad_norm": 1.864636570685893, "learning_rate": 1.2851154079974448e-07, "loss": 0.6545, "step": 30339 }, { "epoch": 0.9298761799681255, "grad_norm": 0.7888717497059411, "learning_rate": 1.2839976210638482e-07, "loss": 0.3874, "step": 30340 }, { "epoch": 0.9299068284908667, "grad_norm": 1.7032984978063992, "learning_rate": 1.282880314141477e-07, "loss": 0.5025, "step": 30341 }, { "epoch": 0.929937477013608, "grad_norm": 2.3669894718266855, "learning_rate": 1.2817634872413555e-07, "loss": 0.6379, "step": 30342 }, { "epoch": 0.9299681255363491, "grad_norm": 0.7921816192285469, "learning_rate": 1.2806471403744857e-07, "loss": 0.3998, "step": 30343 }, { "epoch": 0.9299987740590904, "grad_norm": 2.2883156043180803, "learning_rate": 1.2795312735518595e-07, "loss": 0.6101, "step": 30344 }, { "epoch": 0.9300294225818315, "grad_norm": 1.8701791952368778, "learning_rate": 1.2784158867844788e-07, "loss": 0.4578, "step": 30345 }, { "epoch": 0.9300600711045728, "grad_norm": 2.009275669451542, "learning_rate": 1.2773009800833346e-07, "loss": 0.5795, "step": 30346 }, { "epoch": 0.9300907196273139, "grad_norm": 1.6919749558342725, "learning_rate": 1.2761865534594077e-07, "loss": 0.4688, "step": 30347 }, { "epoch": 0.9301213681500552, "grad_norm": 1.8502767267640452, "learning_rate": 1.2750726069236774e-07, "loss": 0.5235, "step": 30348 }, { "epoch": 0.9301520166727963, "grad_norm": 0.8554420125514794, "learning_rate": 1.27395914048713e-07, "loss": 0.4008, "step": 30349 }, { "epoch": 0.9301826651955376, "grad_norm": 0.8097881074189925, "learning_rate": 1.2728461541607285e-07, "loss": 0.4063, "step": 30350 }, { "epoch": 0.9302133137182788, "grad_norm": 1.9066976135070317, "learning_rate": 1.2717336479554422e-07, "loss": 0.5463, "step": 30351 }, { "epoch": 0.93024396224102, "grad_norm": 0.7946698991559457, "learning_rate": 1.2706216218822343e-07, "loss": 0.3941, "step": 30352 }, { "epoch": 0.9302746107637612, "grad_norm": 1.885693627118648, "learning_rate": 1.269510075952052e-07, "loss": 0.5677, "step": 30353 }, { "epoch": 0.9303052592865024, "grad_norm": 0.7702294222247479, "learning_rate": 1.2683990101758636e-07, "loss": 0.3801, "step": 30354 }, { "epoch": 0.9303359078092436, "grad_norm": 2.1218944780761553, "learning_rate": 1.2672884245646e-07, "loss": 0.5635, "step": 30355 }, { "epoch": 0.9303665563319848, "grad_norm": 2.055326487135701, "learning_rate": 1.266178319129213e-07, "loss": 0.6795, "step": 30356 }, { "epoch": 0.930397204854726, "grad_norm": 2.0145400173394794, "learning_rate": 1.26506869388065e-07, "loss": 0.5519, "step": 30357 }, { "epoch": 0.9304278533774673, "grad_norm": 1.1080094655503636, "learning_rate": 1.263959548829824e-07, "loss": 0.4101, "step": 30358 }, { "epoch": 0.9304585019002084, "grad_norm": 1.8062553908752002, "learning_rate": 1.2628508839876818e-07, "loss": 0.5497, "step": 30359 }, { "epoch": 0.9304891504229497, "grad_norm": 2.3618217539218276, "learning_rate": 1.2617426993651372e-07, "loss": 0.5737, "step": 30360 }, { "epoch": 0.9305197989456908, "grad_norm": 2.1072074172371917, "learning_rate": 1.260634994973109e-07, "loss": 0.5458, "step": 30361 }, { "epoch": 0.9305504474684321, "grad_norm": 1.963972650026863, "learning_rate": 1.259527770822516e-07, "loss": 0.5268, "step": 30362 }, { "epoch": 0.9305810959911732, "grad_norm": 2.133390256540111, "learning_rate": 1.258421026924267e-07, "loss": 0.4506, "step": 30363 }, { "epoch": 0.9306117445139144, "grad_norm": 1.773781271688466, "learning_rate": 1.2573147632892635e-07, "loss": 0.5178, "step": 30364 }, { "epoch": 0.9306423930366556, "grad_norm": 0.7808316506242561, "learning_rate": 1.2562089799284195e-07, "loss": 0.3963, "step": 30365 }, { "epoch": 0.9306730415593968, "grad_norm": 1.8120647837545865, "learning_rate": 1.2551036768526093e-07, "loss": 0.5278, "step": 30366 }, { "epoch": 0.930703690082138, "grad_norm": 2.0039174132167985, "learning_rate": 1.2539988540727354e-07, "loss": 0.5651, "step": 30367 }, { "epoch": 0.9307343386048792, "grad_norm": 1.806709796871289, "learning_rate": 1.2528945115996892e-07, "loss": 0.5357, "step": 30368 }, { "epoch": 0.9307649871276205, "grad_norm": 2.036605817410067, "learning_rate": 1.2517906494443399e-07, "loss": 0.5291, "step": 30369 }, { "epoch": 0.9307956356503616, "grad_norm": 1.9059528269652735, "learning_rate": 1.2506872676175786e-07, "loss": 0.4704, "step": 30370 }, { "epoch": 0.9308262841731029, "grad_norm": 1.7795213589235017, "learning_rate": 1.2495843661302577e-07, "loss": 0.5858, "step": 30371 }, { "epoch": 0.930856932695844, "grad_norm": 2.0141727278059425, "learning_rate": 1.248481944993263e-07, "loss": 0.5786, "step": 30372 }, { "epoch": 0.9308875812185853, "grad_norm": 0.7713854756426611, "learning_rate": 1.2473800042174465e-07, "loss": 0.3918, "step": 30373 }, { "epoch": 0.9309182297413264, "grad_norm": 1.7749595802328872, "learning_rate": 1.246278543813667e-07, "loss": 0.4911, "step": 30374 }, { "epoch": 0.9309488782640677, "grad_norm": 1.8055870493557233, "learning_rate": 1.245177563792782e-07, "loss": 0.6139, "step": 30375 }, { "epoch": 0.9309795267868088, "grad_norm": 1.9241578128506638, "learning_rate": 1.2440770641656385e-07, "loss": 0.554, "step": 30376 }, { "epoch": 0.9310101753095501, "grad_norm": 0.7869500064025791, "learning_rate": 1.2429770449430723e-07, "loss": 0.3947, "step": 30377 }, { "epoch": 0.9310408238322913, "grad_norm": 1.8626436067072227, "learning_rate": 1.2418775061359302e-07, "loss": 0.5571, "step": 30378 }, { "epoch": 0.9310714723550325, "grad_norm": 2.1387551996341827, "learning_rate": 1.2407784477550477e-07, "loss": 0.5515, "step": 30379 }, { "epoch": 0.9311021208777737, "grad_norm": 1.8961987612276587, "learning_rate": 1.23967986981125e-07, "loss": 0.5422, "step": 30380 }, { "epoch": 0.9311327694005149, "grad_norm": 1.840442851886647, "learning_rate": 1.238581772315367e-07, "loss": 0.5969, "step": 30381 }, { "epoch": 0.9311634179232561, "grad_norm": 1.9270931518111545, "learning_rate": 1.2374841552782014e-07, "loss": 0.5459, "step": 30382 }, { "epoch": 0.9311940664459973, "grad_norm": 0.7725641192014657, "learning_rate": 1.2363870187105886e-07, "loss": 0.3687, "step": 30383 }, { "epoch": 0.9312247149687385, "grad_norm": 0.7861092697117893, "learning_rate": 1.235290362623337e-07, "loss": 0.4033, "step": 30384 }, { "epoch": 0.9312553634914797, "grad_norm": 2.0325804028718544, "learning_rate": 1.234194187027238e-07, "loss": 0.6183, "step": 30385 }, { "epoch": 0.9312860120142209, "grad_norm": 0.8260853676368017, "learning_rate": 1.233098491933099e-07, "loss": 0.3944, "step": 30386 }, { "epoch": 0.9313166605369622, "grad_norm": 1.7556416980093001, "learning_rate": 1.2320032773517233e-07, "loss": 0.4932, "step": 30387 }, { "epoch": 0.9313473090597033, "grad_norm": 0.8034493301113103, "learning_rate": 1.2309085432938962e-07, "loss": 0.3789, "step": 30388 }, { "epoch": 0.9313779575824446, "grad_norm": 1.8131834097182373, "learning_rate": 1.2298142897704035e-07, "loss": 0.4995, "step": 30389 }, { "epoch": 0.9314086061051857, "grad_norm": 1.8999732567136707, "learning_rate": 1.2287205167920313e-07, "loss": 0.4966, "step": 30390 }, { "epoch": 0.931439254627927, "grad_norm": 0.8544499602128677, "learning_rate": 1.2276272243695542e-07, "loss": 0.4256, "step": 30391 }, { "epoch": 0.9314699031506681, "grad_norm": 2.030452108902028, "learning_rate": 1.2265344125137467e-07, "loss": 0.5855, "step": 30392 }, { "epoch": 0.9315005516734094, "grad_norm": 1.658330472472909, "learning_rate": 1.2254420812353674e-07, "loss": 0.5246, "step": 30393 }, { "epoch": 0.9315312001961505, "grad_norm": 0.8244028975135712, "learning_rate": 1.224350230545196e-07, "loss": 0.3928, "step": 30394 }, { "epoch": 0.9315618487188917, "grad_norm": 1.9407904369483944, "learning_rate": 1.2232588604539796e-07, "loss": 0.5453, "step": 30395 }, { "epoch": 0.931592497241633, "grad_norm": 1.9674487534544298, "learning_rate": 1.222167970972471e-07, "loss": 0.6041, "step": 30396 }, { "epoch": 0.9316231457643741, "grad_norm": 0.7755284253392458, "learning_rate": 1.2210775621114224e-07, "loss": 0.3751, "step": 30397 }, { "epoch": 0.9316537942871154, "grad_norm": 2.004701457257427, "learning_rate": 1.2199876338815753e-07, "loss": 0.4881, "step": 30398 }, { "epoch": 0.9316844428098565, "grad_norm": 1.9478214120555393, "learning_rate": 1.2188981862936765e-07, "loss": 0.5307, "step": 30399 }, { "epoch": 0.9317150913325978, "grad_norm": 1.7603160771577961, "learning_rate": 1.2178092193584457e-07, "loss": 0.6011, "step": 30400 }, { "epoch": 0.9317457398553389, "grad_norm": 1.9408482601794996, "learning_rate": 1.2167207330866294e-07, "loss": 0.6058, "step": 30401 }, { "epoch": 0.9317763883780802, "grad_norm": 0.7801108780469107, "learning_rate": 1.2156327274889467e-07, "loss": 0.3769, "step": 30402 }, { "epoch": 0.9318070369008213, "grad_norm": 2.0310058910111115, "learning_rate": 1.2145452025761173e-07, "loss": 0.5781, "step": 30403 }, { "epoch": 0.9318376854235626, "grad_norm": 1.894704543268682, "learning_rate": 1.213458158358849e-07, "loss": 0.5057, "step": 30404 }, { "epoch": 0.9318683339463038, "grad_norm": 1.9581418461856281, "learning_rate": 1.2123715948478608e-07, "loss": 0.528, "step": 30405 }, { "epoch": 0.931898982469045, "grad_norm": 1.7545730375509372, "learning_rate": 1.2112855120538613e-07, "loss": 0.5068, "step": 30406 }, { "epoch": 0.9319296309917862, "grad_norm": 1.9509897160441698, "learning_rate": 1.2101999099875529e-07, "loss": 0.538, "step": 30407 }, { "epoch": 0.9319602795145274, "grad_norm": 1.9867845341709518, "learning_rate": 1.209114788659621e-07, "loss": 0.5805, "step": 30408 }, { "epoch": 0.9319909280372686, "grad_norm": 2.053658414261732, "learning_rate": 1.2080301480807687e-07, "loss": 0.5975, "step": 30409 }, { "epoch": 0.9320215765600098, "grad_norm": 0.8130059424366901, "learning_rate": 1.2069459882616818e-07, "loss": 0.3829, "step": 30410 }, { "epoch": 0.932052225082751, "grad_norm": 1.9623457828923727, "learning_rate": 1.2058623092130406e-07, "loss": 0.5877, "step": 30411 }, { "epoch": 0.9320828736054922, "grad_norm": 2.0435556604148393, "learning_rate": 1.2047791109455087e-07, "loss": 0.5447, "step": 30412 }, { "epoch": 0.9321135221282334, "grad_norm": 1.924039178650051, "learning_rate": 1.2036963934697887e-07, "loss": 0.5372, "step": 30413 }, { "epoch": 0.9321441706509747, "grad_norm": 1.9354599265710761, "learning_rate": 1.2026141567965332e-07, "loss": 0.5797, "step": 30414 }, { "epoch": 0.9321748191737158, "grad_norm": 1.9911155983310158, "learning_rate": 1.201532400936395e-07, "loss": 0.6276, "step": 30415 }, { "epoch": 0.9322054676964571, "grad_norm": 2.0294742354006017, "learning_rate": 1.2004511259000485e-07, "loss": 0.5451, "step": 30416 }, { "epoch": 0.9322361162191982, "grad_norm": 2.6004022328154, "learning_rate": 1.1993703316981408e-07, "loss": 0.6545, "step": 30417 }, { "epoch": 0.9322667647419395, "grad_norm": 1.9045503662533372, "learning_rate": 1.1982900183413304e-07, "loss": 0.5295, "step": 30418 }, { "epoch": 0.9322974132646806, "grad_norm": 1.6634543687514454, "learning_rate": 1.1972101858402475e-07, "loss": 0.5302, "step": 30419 }, { "epoch": 0.9323280617874219, "grad_norm": 1.965424279978089, "learning_rate": 1.1961308342055444e-07, "loss": 0.6634, "step": 30420 }, { "epoch": 0.932358710310163, "grad_norm": 1.8251510990912796, "learning_rate": 1.195051963447852e-07, "loss": 0.6131, "step": 30421 }, { "epoch": 0.9323893588329043, "grad_norm": 1.7280209164343692, "learning_rate": 1.1939735735778e-07, "loss": 0.5295, "step": 30422 }, { "epoch": 0.9324200073556455, "grad_norm": 1.983268412732296, "learning_rate": 1.192895664606003e-07, "loss": 0.5082, "step": 30423 }, { "epoch": 0.9324506558783867, "grad_norm": 1.828564196802525, "learning_rate": 1.1918182365431075e-07, "loss": 0.5778, "step": 30424 }, { "epoch": 0.9324813044011279, "grad_norm": 0.8476933504289318, "learning_rate": 1.190741289399705e-07, "loss": 0.4105, "step": 30425 }, { "epoch": 0.932511952923869, "grad_norm": 1.8912771092054503, "learning_rate": 1.1896648231864205e-07, "loss": 0.5584, "step": 30426 }, { "epoch": 0.9325426014466103, "grad_norm": 0.8026325811025863, "learning_rate": 1.1885888379138565e-07, "loss": 0.3927, "step": 30427 }, { "epoch": 0.9325732499693514, "grad_norm": 1.9701776620773388, "learning_rate": 1.1875133335926159e-07, "loss": 0.5597, "step": 30428 }, { "epoch": 0.9326038984920927, "grad_norm": 2.2421682902620472, "learning_rate": 1.186438310233301e-07, "loss": 0.4702, "step": 30429 }, { "epoch": 0.9326345470148338, "grad_norm": 2.0718324544481614, "learning_rate": 1.1853637678464925e-07, "loss": 0.5365, "step": 30430 }, { "epoch": 0.9326651955375751, "grad_norm": 0.8106780858682594, "learning_rate": 1.1842897064427816e-07, "loss": 0.375, "step": 30431 }, { "epoch": 0.9326958440603162, "grad_norm": 1.9916027678653492, "learning_rate": 1.1832161260327602e-07, "loss": 0.5422, "step": 30432 }, { "epoch": 0.9327264925830575, "grad_norm": 1.8223853365430738, "learning_rate": 1.1821430266269973e-07, "loss": 0.6164, "step": 30433 }, { "epoch": 0.9327571411057987, "grad_norm": 0.792150705958518, "learning_rate": 1.1810704082360735e-07, "loss": 0.3873, "step": 30434 }, { "epoch": 0.9327877896285399, "grad_norm": 1.9176068546477976, "learning_rate": 1.1799982708705526e-07, "loss": 0.5172, "step": 30435 }, { "epoch": 0.9328184381512811, "grad_norm": 1.6743624888984374, "learning_rate": 1.1789266145409984e-07, "loss": 0.525, "step": 30436 }, { "epoch": 0.9328490866740223, "grad_norm": 1.7939122462758905, "learning_rate": 1.1778554392579745e-07, "loss": 0.5212, "step": 30437 }, { "epoch": 0.9328797351967635, "grad_norm": 1.8558245770887571, "learning_rate": 1.176784745032028e-07, "loss": 0.6154, "step": 30438 }, { "epoch": 0.9329103837195047, "grad_norm": 1.8147955584391204, "learning_rate": 1.1757145318737172e-07, "loss": 0.5325, "step": 30439 }, { "epoch": 0.9329410322422459, "grad_norm": 1.7960735393124785, "learning_rate": 1.174644799793584e-07, "loss": 0.5675, "step": 30440 }, { "epoch": 0.9329716807649872, "grad_norm": 0.7838336073208652, "learning_rate": 1.1735755488021638e-07, "loss": 0.3916, "step": 30441 }, { "epoch": 0.9330023292877283, "grad_norm": 1.9834552752255916, "learning_rate": 1.1725067789099986e-07, "loss": 0.596, "step": 30442 }, { "epoch": 0.9330329778104696, "grad_norm": 2.0199762180019785, "learning_rate": 1.1714384901276188e-07, "loss": 0.5696, "step": 30443 }, { "epoch": 0.9330636263332107, "grad_norm": 1.7446180991772455, "learning_rate": 1.1703706824655492e-07, "loss": 0.4852, "step": 30444 }, { "epoch": 0.933094274855952, "grad_norm": 2.2515594734864064, "learning_rate": 1.1693033559343037e-07, "loss": 0.6169, "step": 30445 }, { "epoch": 0.9331249233786931, "grad_norm": 1.7815121922183055, "learning_rate": 1.1682365105444182e-07, "loss": 0.5209, "step": 30446 }, { "epoch": 0.9331555719014344, "grad_norm": 2.0601871743811295, "learning_rate": 1.1671701463063845e-07, "loss": 0.5851, "step": 30447 }, { "epoch": 0.9331862204241755, "grad_norm": 2.151754594954003, "learning_rate": 1.1661042632307218e-07, "loss": 0.5882, "step": 30448 }, { "epoch": 0.9332168689469168, "grad_norm": 0.8055100764575162, "learning_rate": 1.1650388613279218e-07, "loss": 0.386, "step": 30449 }, { "epoch": 0.933247517469658, "grad_norm": 1.8914335298777267, "learning_rate": 1.1639739406084927e-07, "loss": 0.5935, "step": 30450 }, { "epoch": 0.9332781659923992, "grad_norm": 1.8045129256784886, "learning_rate": 1.1629095010829262e-07, "loss": 0.5259, "step": 30451 }, { "epoch": 0.9333088145151404, "grad_norm": 1.5888409235577556, "learning_rate": 1.1618455427617026e-07, "loss": 0.4865, "step": 30452 }, { "epoch": 0.9333394630378816, "grad_norm": 1.7564968193307984, "learning_rate": 1.1607820656553138e-07, "loss": 0.5041, "step": 30453 }, { "epoch": 0.9333701115606228, "grad_norm": 1.921660624112481, "learning_rate": 1.1597190697742344e-07, "loss": 0.5296, "step": 30454 }, { "epoch": 0.933400760083364, "grad_norm": 2.08823929116713, "learning_rate": 1.1586565551289397e-07, "loss": 0.6034, "step": 30455 }, { "epoch": 0.9334314086061052, "grad_norm": 0.8243634847006549, "learning_rate": 1.1575945217299045e-07, "loss": 0.3933, "step": 30456 }, { "epoch": 0.9334620571288463, "grad_norm": 2.153475520132104, "learning_rate": 1.156532969587576e-07, "loss": 0.6143, "step": 30457 }, { "epoch": 0.9334927056515876, "grad_norm": 1.9329113255993065, "learning_rate": 1.1554718987124292e-07, "loss": 0.6142, "step": 30458 }, { "epoch": 0.9335233541743287, "grad_norm": 0.7916393853716633, "learning_rate": 1.1544113091149222e-07, "loss": 0.3925, "step": 30459 }, { "epoch": 0.93355400269707, "grad_norm": 1.897302417989542, "learning_rate": 1.1533512008054915e-07, "loss": 0.5994, "step": 30460 }, { "epoch": 0.9335846512198112, "grad_norm": 0.7645305503514448, "learning_rate": 1.1522915737945895e-07, "loss": 0.3986, "step": 30461 }, { "epoch": 0.9336152997425524, "grad_norm": 1.9491138286116685, "learning_rate": 1.1512324280926635e-07, "loss": 0.5474, "step": 30462 }, { "epoch": 0.9336459482652936, "grad_norm": 1.7884979548406539, "learning_rate": 1.1501737637101329e-07, "loss": 0.5827, "step": 30463 }, { "epoch": 0.9336765967880348, "grad_norm": 1.9881905307794887, "learning_rate": 1.149115580657445e-07, "loss": 0.5793, "step": 30464 }, { "epoch": 0.933707245310776, "grad_norm": 0.8676383701852162, "learning_rate": 1.1480578789450192e-07, "loss": 0.401, "step": 30465 }, { "epoch": 0.9337378938335172, "grad_norm": 1.8568185247125235, "learning_rate": 1.147000658583275e-07, "loss": 0.5606, "step": 30466 }, { "epoch": 0.9337685423562584, "grad_norm": 1.9846198650889928, "learning_rate": 1.1459439195826427e-07, "loss": 0.6106, "step": 30467 }, { "epoch": 0.9337991908789997, "grad_norm": 1.6862404840320644, "learning_rate": 1.1448876619535143e-07, "loss": 0.5388, "step": 30468 }, { "epoch": 0.9338298394017408, "grad_norm": 1.9229047479687615, "learning_rate": 1.1438318857063091e-07, "loss": 0.5683, "step": 30469 }, { "epoch": 0.9338604879244821, "grad_norm": 1.8728124028945186, "learning_rate": 1.1427765908514355e-07, "loss": 0.5776, "step": 30470 }, { "epoch": 0.9338911364472232, "grad_norm": 1.6736387879023615, "learning_rate": 1.1417217773992795e-07, "loss": 0.6039, "step": 30471 }, { "epoch": 0.9339217849699645, "grad_norm": 2.1401789124659936, "learning_rate": 1.1406674453602385e-07, "loss": 0.672, "step": 30472 }, { "epoch": 0.9339524334927056, "grad_norm": 0.8171638097031042, "learning_rate": 1.1396135947447096e-07, "loss": 0.4044, "step": 30473 }, { "epoch": 0.9339830820154469, "grad_norm": 1.7261476031356362, "learning_rate": 1.1385602255630624e-07, "loss": 0.5453, "step": 30474 }, { "epoch": 0.934013730538188, "grad_norm": 1.7088493302962906, "learning_rate": 1.137507337825683e-07, "loss": 0.5717, "step": 30475 }, { "epoch": 0.9340443790609293, "grad_norm": 1.9214173541115007, "learning_rate": 1.1364549315429519e-07, "loss": 0.5705, "step": 30476 }, { "epoch": 0.9340750275836704, "grad_norm": 1.7442241966149028, "learning_rate": 1.1354030067252275e-07, "loss": 0.6075, "step": 30477 }, { "epoch": 0.9341056761064117, "grad_norm": 1.9948620191223134, "learning_rate": 1.134351563382885e-07, "loss": 0.5253, "step": 30478 }, { "epoch": 0.9341363246291529, "grad_norm": 1.928566099909711, "learning_rate": 1.1333006015262659e-07, "loss": 0.5842, "step": 30479 }, { "epoch": 0.9341669731518941, "grad_norm": 1.715259781501888, "learning_rate": 1.1322501211657566e-07, "loss": 0.5058, "step": 30480 }, { "epoch": 0.9341976216746353, "grad_norm": 1.7313839365774617, "learning_rate": 1.131200122311682e-07, "loss": 0.4581, "step": 30481 }, { "epoch": 0.9342282701973765, "grad_norm": 1.7840706912502502, "learning_rate": 1.1301506049743949e-07, "loss": 0.6102, "step": 30482 }, { "epoch": 0.9342589187201177, "grad_norm": 0.8239430824499689, "learning_rate": 1.129101569164237e-07, "loss": 0.4044, "step": 30483 }, { "epoch": 0.9342895672428589, "grad_norm": 1.6702638291395286, "learning_rate": 1.1280530148915503e-07, "loss": 0.5709, "step": 30484 }, { "epoch": 0.9343202157656001, "grad_norm": 1.916496144719198, "learning_rate": 1.1270049421666596e-07, "loss": 0.5829, "step": 30485 }, { "epoch": 0.9343508642883414, "grad_norm": 1.809920303387526, "learning_rate": 1.12595735099989e-07, "loss": 0.5721, "step": 30486 }, { "epoch": 0.9343815128110825, "grad_norm": 2.4332434369866163, "learning_rate": 1.1249102414015722e-07, "loss": 0.6565, "step": 30487 }, { "epoch": 0.9344121613338237, "grad_norm": 0.7815360060665272, "learning_rate": 1.1238636133820202e-07, "loss": 0.3878, "step": 30488 }, { "epoch": 0.9344428098565649, "grad_norm": 2.120890495820727, "learning_rate": 1.122817466951548e-07, "loss": 0.5308, "step": 30489 }, { "epoch": 0.9344734583793061, "grad_norm": 1.8621088315699386, "learning_rate": 1.1217718021204471e-07, "loss": 0.6553, "step": 30490 }, { "epoch": 0.9345041069020473, "grad_norm": 0.7676511578914174, "learning_rate": 1.1207266188990485e-07, "loss": 0.395, "step": 30491 }, { "epoch": 0.9345347554247885, "grad_norm": 1.7080666339349193, "learning_rate": 1.1196819172976325e-07, "loss": 0.5426, "step": 30492 }, { "epoch": 0.9345654039475297, "grad_norm": 1.7691167426034413, "learning_rate": 1.1186376973264967e-07, "loss": 0.5768, "step": 30493 }, { "epoch": 0.9345960524702709, "grad_norm": 1.9506004855009214, "learning_rate": 1.1175939589959328e-07, "loss": 0.5516, "step": 30494 }, { "epoch": 0.9346267009930121, "grad_norm": 1.91025860843002, "learning_rate": 1.1165507023162214e-07, "loss": 0.5858, "step": 30495 }, { "epoch": 0.9346573495157533, "grad_norm": 1.861166779865468, "learning_rate": 1.1155079272976432e-07, "loss": 0.5411, "step": 30496 }, { "epoch": 0.9346879980384946, "grad_norm": 0.7907238022546068, "learning_rate": 1.1144656339504678e-07, "loss": 0.4144, "step": 30497 }, { "epoch": 0.9347186465612357, "grad_norm": 1.7278415521686903, "learning_rate": 1.1134238222849758e-07, "loss": 0.4977, "step": 30498 }, { "epoch": 0.934749295083977, "grad_norm": 1.7841302751115702, "learning_rate": 1.1123824923114257e-07, "loss": 0.4826, "step": 30499 }, { "epoch": 0.9347799436067181, "grad_norm": 2.097282628894226, "learning_rate": 1.1113416440400815e-07, "loss": 0.5794, "step": 30500 }, { "epoch": 0.9348105921294594, "grad_norm": 2.025998696395405, "learning_rate": 1.1103012774811961e-07, "loss": 0.5831, "step": 30501 }, { "epoch": 0.9348412406522005, "grad_norm": 1.8085276569632203, "learning_rate": 1.1092613926450169e-07, "loss": 0.5072, "step": 30502 }, { "epoch": 0.9348718891749418, "grad_norm": 2.002773128538355, "learning_rate": 1.1082219895418022e-07, "loss": 0.6127, "step": 30503 }, { "epoch": 0.934902537697683, "grad_norm": 1.927231298687507, "learning_rate": 1.1071830681817775e-07, "loss": 0.5202, "step": 30504 }, { "epoch": 0.9349331862204242, "grad_norm": 0.8023890467203006, "learning_rate": 1.1061446285751897e-07, "loss": 0.3996, "step": 30505 }, { "epoch": 0.9349638347431654, "grad_norm": 0.8251473014551244, "learning_rate": 1.1051066707322699e-07, "loss": 0.4043, "step": 30506 }, { "epoch": 0.9349944832659066, "grad_norm": 1.9665079770509555, "learning_rate": 1.1040691946632487e-07, "loss": 0.5933, "step": 30507 }, { "epoch": 0.9350251317886478, "grad_norm": 2.0096097608708487, "learning_rate": 1.1030322003783456e-07, "loss": 0.564, "step": 30508 }, { "epoch": 0.935055780311389, "grad_norm": 0.8023744946280961, "learning_rate": 1.1019956878877635e-07, "loss": 0.4, "step": 30509 }, { "epoch": 0.9350864288341302, "grad_norm": 0.8299355783780531, "learning_rate": 1.1009596572017445e-07, "loss": 0.4128, "step": 30510 }, { "epoch": 0.9351170773568714, "grad_norm": 1.774452247474805, "learning_rate": 1.0999241083304746e-07, "loss": 0.5695, "step": 30511 }, { "epoch": 0.9351477258796126, "grad_norm": 1.9062286684503253, "learning_rate": 1.0988890412841624e-07, "loss": 0.5669, "step": 30512 }, { "epoch": 0.9351783744023539, "grad_norm": 1.8924636200867986, "learning_rate": 1.0978544560730109e-07, "loss": 0.51, "step": 30513 }, { "epoch": 0.935209022925095, "grad_norm": 1.984456538639923, "learning_rate": 1.0968203527072119e-07, "loss": 0.6329, "step": 30514 }, { "epoch": 0.9352396714478363, "grad_norm": 1.7754809171708947, "learning_rate": 1.0957867311969516e-07, "loss": 0.6169, "step": 30515 }, { "epoch": 0.9352703199705774, "grad_norm": 1.9092263510348326, "learning_rate": 1.0947535915524166e-07, "loss": 0.5288, "step": 30516 }, { "epoch": 0.9353009684933187, "grad_norm": 2.0655840064364024, "learning_rate": 1.0937209337837872e-07, "loss": 0.6135, "step": 30517 }, { "epoch": 0.9353316170160598, "grad_norm": 2.00877940844232, "learning_rate": 1.0926887579012446e-07, "loss": 0.5689, "step": 30518 }, { "epoch": 0.935362265538801, "grad_norm": 1.8422150395283556, "learning_rate": 1.0916570639149526e-07, "loss": 0.6019, "step": 30519 }, { "epoch": 0.9353929140615422, "grad_norm": 0.8188807102770522, "learning_rate": 1.0906258518350698e-07, "loss": 0.3989, "step": 30520 }, { "epoch": 0.9354235625842834, "grad_norm": 1.7910059573694739, "learning_rate": 1.0895951216717715e-07, "loss": 0.5024, "step": 30521 }, { "epoch": 0.9354542111070246, "grad_norm": 1.907596709551668, "learning_rate": 1.0885648734352105e-07, "loss": 0.5583, "step": 30522 }, { "epoch": 0.9354848596297658, "grad_norm": 1.963945820477966, "learning_rate": 1.0875351071355234e-07, "loss": 0.599, "step": 30523 }, { "epoch": 0.9355155081525071, "grad_norm": 1.8660593191991914, "learning_rate": 1.086505822782874e-07, "loss": 0.5233, "step": 30524 }, { "epoch": 0.9355461566752482, "grad_norm": 1.8660103987344807, "learning_rate": 1.0854770203873932e-07, "loss": 0.5896, "step": 30525 }, { "epoch": 0.9355768051979895, "grad_norm": 2.0702313816568196, "learning_rate": 1.0844486999592285e-07, "loss": 0.657, "step": 30526 }, { "epoch": 0.9356074537207306, "grad_norm": 1.882848725314382, "learning_rate": 1.0834208615084996e-07, "loss": 0.5718, "step": 30527 }, { "epoch": 0.9356381022434719, "grad_norm": 1.7867950177397067, "learning_rate": 1.0823935050453427e-07, "loss": 0.5202, "step": 30528 }, { "epoch": 0.935668750766213, "grad_norm": 1.8713470004483628, "learning_rate": 1.081366630579883e-07, "loss": 0.6394, "step": 30529 }, { "epoch": 0.9356993992889543, "grad_norm": 2.1037861939754925, "learning_rate": 1.080340238122235e-07, "loss": 0.6959, "step": 30530 }, { "epoch": 0.9357300478116954, "grad_norm": 2.1485413159908413, "learning_rate": 1.0793143276825068e-07, "loss": 0.5675, "step": 30531 }, { "epoch": 0.9357606963344367, "grad_norm": 1.7140117242183706, "learning_rate": 1.0782888992708129e-07, "loss": 0.5958, "step": 30532 }, { "epoch": 0.9357913448571779, "grad_norm": 0.753430231137135, "learning_rate": 1.0772639528972562e-07, "loss": 0.3969, "step": 30533 }, { "epoch": 0.9358219933799191, "grad_norm": 0.7750324798177829, "learning_rate": 1.076239488571934e-07, "loss": 0.3839, "step": 30534 }, { "epoch": 0.9358526419026603, "grad_norm": 2.076242701760184, "learning_rate": 1.0752155063049441e-07, "loss": 0.5561, "step": 30535 }, { "epoch": 0.9358832904254015, "grad_norm": 1.6076214176442176, "learning_rate": 1.0741920061063671e-07, "loss": 0.5194, "step": 30536 }, { "epoch": 0.9359139389481427, "grad_norm": 2.3155563945113844, "learning_rate": 1.0731689879863061e-07, "loss": 0.5242, "step": 30537 }, { "epoch": 0.9359445874708839, "grad_norm": 0.7895870526622443, "learning_rate": 1.0721464519548198e-07, "loss": 0.38, "step": 30538 }, { "epoch": 0.9359752359936251, "grad_norm": 1.7180601332337302, "learning_rate": 1.0711243980219943e-07, "loss": 0.539, "step": 30539 }, { "epoch": 0.9360058845163663, "grad_norm": 1.92840083603284, "learning_rate": 1.0701028261979052e-07, "loss": 0.6091, "step": 30540 }, { "epoch": 0.9360365330391075, "grad_norm": 2.1236150581699134, "learning_rate": 1.0690817364926109e-07, "loss": 0.6128, "step": 30541 }, { "epoch": 0.9360671815618488, "grad_norm": 2.0510172862341616, "learning_rate": 1.0680611289161647e-07, "loss": 0.5381, "step": 30542 }, { "epoch": 0.9360978300845899, "grad_norm": 1.924865841829892, "learning_rate": 1.0670410034786416e-07, "loss": 0.5834, "step": 30543 }, { "epoch": 0.9361284786073312, "grad_norm": 2.1039866723439102, "learning_rate": 1.0660213601900782e-07, "loss": 0.5998, "step": 30544 }, { "epoch": 0.9361591271300723, "grad_norm": 1.8768932353473011, "learning_rate": 1.065002199060533e-07, "loss": 0.5902, "step": 30545 }, { "epoch": 0.9361897756528136, "grad_norm": 1.7811917447115695, "learning_rate": 1.0639835201000315e-07, "loss": 0.5669, "step": 30546 }, { "epoch": 0.9362204241755547, "grad_norm": 1.824428575976078, "learning_rate": 1.0629653233186265e-07, "loss": 0.55, "step": 30547 }, { "epoch": 0.936251072698296, "grad_norm": 1.9584300484798602, "learning_rate": 1.0619476087263491e-07, "loss": 0.5487, "step": 30548 }, { "epoch": 0.9362817212210371, "grad_norm": 1.7680411679240695, "learning_rate": 1.060930376333219e-07, "loss": 0.545, "step": 30549 }, { "epoch": 0.9363123697437783, "grad_norm": 1.8667340051082892, "learning_rate": 1.0599136261492615e-07, "loss": 0.5978, "step": 30550 }, { "epoch": 0.9363430182665196, "grad_norm": 2.107423193270652, "learning_rate": 1.0588973581845019e-07, "loss": 0.5274, "step": 30551 }, { "epoch": 0.9363736667892607, "grad_norm": 0.7939237417298767, "learning_rate": 1.0578815724489489e-07, "loss": 0.3857, "step": 30552 }, { "epoch": 0.936404315312002, "grad_norm": 1.910849249875178, "learning_rate": 1.056866268952611e-07, "loss": 0.6158, "step": 30553 }, { "epoch": 0.9364349638347431, "grad_norm": 2.0740262969994303, "learning_rate": 1.055851447705486e-07, "loss": 0.555, "step": 30554 }, { "epoch": 0.9364656123574844, "grad_norm": 1.92704754348519, "learning_rate": 1.0548371087175824e-07, "loss": 0.6005, "step": 30555 }, { "epoch": 0.9364962608802255, "grad_norm": 1.6686538002819695, "learning_rate": 1.0538232519988978e-07, "loss": 0.5173, "step": 30556 }, { "epoch": 0.9365269094029668, "grad_norm": 1.8930405877899517, "learning_rate": 1.0528098775594131e-07, "loss": 0.5355, "step": 30557 }, { "epoch": 0.9365575579257079, "grad_norm": 1.883179713467252, "learning_rate": 1.0517969854091092e-07, "loss": 0.5867, "step": 30558 }, { "epoch": 0.9365882064484492, "grad_norm": 2.0286365049315083, "learning_rate": 1.0507845755579838e-07, "loss": 0.6139, "step": 30559 }, { "epoch": 0.9366188549711904, "grad_norm": 1.852637981635783, "learning_rate": 1.0497726480159954e-07, "loss": 0.5256, "step": 30560 }, { "epoch": 0.9366495034939316, "grad_norm": 0.7902089144076336, "learning_rate": 1.0487612027931249e-07, "loss": 0.379, "step": 30561 }, { "epoch": 0.9366801520166728, "grad_norm": 2.0194029812072776, "learning_rate": 1.0477502398993367e-07, "loss": 0.5839, "step": 30562 }, { "epoch": 0.936710800539414, "grad_norm": 1.82117124807287, "learning_rate": 1.0467397593445838e-07, "loss": 0.5269, "step": 30563 }, { "epoch": 0.9367414490621552, "grad_norm": 0.8123426450307518, "learning_rate": 1.0457297611388362e-07, "loss": 0.3793, "step": 30564 }, { "epoch": 0.9367720975848964, "grad_norm": 1.8413345232698133, "learning_rate": 1.0447202452920357e-07, "loss": 0.5486, "step": 30565 }, { "epoch": 0.9368027461076376, "grad_norm": 2.3241240481028376, "learning_rate": 1.0437112118141302e-07, "loss": 0.4973, "step": 30566 }, { "epoch": 0.9368333946303788, "grad_norm": 1.938470283323702, "learning_rate": 1.042702660715067e-07, "loss": 0.5185, "step": 30567 }, { "epoch": 0.93686404315312, "grad_norm": 1.7818044847556291, "learning_rate": 1.0416945920047771e-07, "loss": 0.499, "step": 30568 }, { "epoch": 0.9368946916758613, "grad_norm": 1.7873118153343954, "learning_rate": 1.0406870056932028e-07, "loss": 0.5651, "step": 30569 }, { "epoch": 0.9369253401986024, "grad_norm": 1.9366720575270175, "learning_rate": 1.0396799017902637e-07, "loss": 0.566, "step": 30570 }, { "epoch": 0.9369559887213437, "grad_norm": 0.8709506809062358, "learning_rate": 1.0386732803058852e-07, "loss": 0.4089, "step": 30571 }, { "epoch": 0.9369866372440848, "grad_norm": 1.9316292554607568, "learning_rate": 1.0376671412499817e-07, "loss": 0.6397, "step": 30572 }, { "epoch": 0.9370172857668261, "grad_norm": 1.9080995127963492, "learning_rate": 1.0366614846324786e-07, "loss": 0.5914, "step": 30573 }, { "epoch": 0.9370479342895672, "grad_norm": 0.8420920203689021, "learning_rate": 1.0356563104632733e-07, "loss": 0.4189, "step": 30574 }, { "epoch": 0.9370785828123085, "grad_norm": 0.7820772460735179, "learning_rate": 1.0346516187522804e-07, "loss": 0.3883, "step": 30575 }, { "epoch": 0.9371092313350496, "grad_norm": 1.6676047682018855, "learning_rate": 1.0336474095093862e-07, "loss": 0.5523, "step": 30576 }, { "epoch": 0.9371398798577909, "grad_norm": 1.9528756764549389, "learning_rate": 1.0326436827444941e-07, "loss": 0.5844, "step": 30577 }, { "epoch": 0.937170528380532, "grad_norm": 2.0257658389041087, "learning_rate": 1.0316404384674961e-07, "loss": 0.5328, "step": 30578 }, { "epoch": 0.9372011769032733, "grad_norm": 1.7791797371947633, "learning_rate": 1.030637676688273e-07, "loss": 0.5879, "step": 30579 }, { "epoch": 0.9372318254260145, "grad_norm": 1.9489620897029263, "learning_rate": 1.0296353974167006e-07, "loss": 0.587, "step": 30580 }, { "epoch": 0.9372624739487556, "grad_norm": 0.8128570483215016, "learning_rate": 1.0286336006626707e-07, "loss": 0.4004, "step": 30581 }, { "epoch": 0.9372931224714969, "grad_norm": 0.7938253594664986, "learning_rate": 1.0276322864360366e-07, "loss": 0.3968, "step": 30582 }, { "epoch": 0.937323770994238, "grad_norm": 1.9570600472206803, "learning_rate": 1.0266314547466683e-07, "loss": 0.5908, "step": 30583 }, { "epoch": 0.9373544195169793, "grad_norm": 2.297376168267138, "learning_rate": 1.025631105604441e-07, "loss": 0.4538, "step": 30584 }, { "epoch": 0.9373850680397204, "grad_norm": 2.0220414188529094, "learning_rate": 1.0246312390191915e-07, "loss": 0.5282, "step": 30585 }, { "epoch": 0.9374157165624617, "grad_norm": 1.9536291672297075, "learning_rate": 1.0236318550007896e-07, "loss": 0.5461, "step": 30586 }, { "epoch": 0.9374463650852028, "grad_norm": 1.457569321845854, "learning_rate": 1.0226329535590607e-07, "loss": 0.3629, "step": 30587 }, { "epoch": 0.9374770136079441, "grad_norm": 1.7800263444841402, "learning_rate": 1.0216345347038748e-07, "loss": 0.5572, "step": 30588 }, { "epoch": 0.9375076621306853, "grad_norm": 1.9180964105197251, "learning_rate": 1.0206365984450516e-07, "loss": 0.6069, "step": 30589 }, { "epoch": 0.9375383106534265, "grad_norm": 1.891807212152802, "learning_rate": 1.0196391447924282e-07, "loss": 0.5929, "step": 30590 }, { "epoch": 0.9375689591761677, "grad_norm": 2.105648100434335, "learning_rate": 1.0186421737558294e-07, "loss": 0.771, "step": 30591 }, { "epoch": 0.9375996076989089, "grad_norm": 0.7760342769839029, "learning_rate": 1.0176456853450812e-07, "loss": 0.4041, "step": 30592 }, { "epoch": 0.9376302562216501, "grad_norm": 0.7936099481259686, "learning_rate": 1.0166496795700031e-07, "loss": 0.4263, "step": 30593 }, { "epoch": 0.9376609047443913, "grad_norm": 1.8757882685638074, "learning_rate": 1.0156541564404099e-07, "loss": 0.5381, "step": 30594 }, { "epoch": 0.9376915532671325, "grad_norm": 2.0674174296901953, "learning_rate": 1.0146591159661157e-07, "loss": 0.7136, "step": 30595 }, { "epoch": 0.9377222017898738, "grad_norm": 2.0295210815290727, "learning_rate": 1.0136645581569071e-07, "loss": 0.613, "step": 30596 }, { "epoch": 0.9377528503126149, "grad_norm": 1.6498582928300598, "learning_rate": 1.0126704830226042e-07, "loss": 0.5229, "step": 30597 }, { "epoch": 0.9377834988353562, "grad_norm": 1.785297855364811, "learning_rate": 1.011676890572988e-07, "loss": 0.5758, "step": 30598 }, { "epoch": 0.9378141473580973, "grad_norm": 0.8024078027532198, "learning_rate": 1.0106837808178561e-07, "loss": 0.4024, "step": 30599 }, { "epoch": 0.9378447958808386, "grad_norm": 1.778972934328205, "learning_rate": 1.0096911537669951e-07, "loss": 0.5006, "step": 30600 }, { "epoch": 0.9378754444035797, "grad_norm": 1.8922069416885303, "learning_rate": 1.0086990094301751e-07, "loss": 0.5269, "step": 30601 }, { "epoch": 0.937906092926321, "grad_norm": 2.209310963060305, "learning_rate": 1.0077073478171773e-07, "loss": 0.6126, "step": 30602 }, { "epoch": 0.9379367414490621, "grad_norm": 1.7852310291674895, "learning_rate": 1.0067161689377825e-07, "loss": 0.5475, "step": 30603 }, { "epoch": 0.9379673899718034, "grad_norm": 0.8981225539427515, "learning_rate": 1.005725472801744e-07, "loss": 0.3996, "step": 30604 }, { "epoch": 0.9379980384945446, "grad_norm": 1.7546301694036175, "learning_rate": 1.004735259418832e-07, "loss": 0.5225, "step": 30605 }, { "epoch": 0.9380286870172858, "grad_norm": 1.8810153207980207, "learning_rate": 1.0037455287987885e-07, "loss": 0.6153, "step": 30606 }, { "epoch": 0.938059335540027, "grad_norm": 1.7474668619576783, "learning_rate": 1.0027562809513836e-07, "loss": 0.5477, "step": 30607 }, { "epoch": 0.9380899840627682, "grad_norm": 1.8913743638796416, "learning_rate": 1.0017675158863649e-07, "loss": 0.5475, "step": 30608 }, { "epoch": 0.9381206325855094, "grad_norm": 1.871672040564825, "learning_rate": 1.0007792336134581e-07, "loss": 0.5725, "step": 30609 }, { "epoch": 0.9381512811082506, "grad_norm": 2.153683475404787, "learning_rate": 9.997914341424109e-08, "loss": 0.632, "step": 30610 }, { "epoch": 0.9381819296309918, "grad_norm": 1.76329568769421, "learning_rate": 9.9880411748296e-08, "loss": 0.5396, "step": 30611 }, { "epoch": 0.938212578153733, "grad_norm": 1.8379108559944595, "learning_rate": 9.978172836448252e-08, "loss": 0.5728, "step": 30612 }, { "epoch": 0.9382432266764742, "grad_norm": 1.782825314482853, "learning_rate": 9.968309326377379e-08, "loss": 0.6024, "step": 30613 }, { "epoch": 0.9382738751992153, "grad_norm": 1.9290808563525779, "learning_rate": 9.958450644714068e-08, "loss": 0.6035, "step": 30614 }, { "epoch": 0.9383045237219566, "grad_norm": 1.7296323434404823, "learning_rate": 9.948596791555632e-08, "loss": 0.5193, "step": 30615 }, { "epoch": 0.9383351722446978, "grad_norm": 1.8377432370336395, "learning_rate": 9.938747766999046e-08, "loss": 0.6803, "step": 30616 }, { "epoch": 0.938365820767439, "grad_norm": 1.884912500851155, "learning_rate": 9.928903571141235e-08, "loss": 0.4512, "step": 30617 }, { "epoch": 0.9383964692901802, "grad_norm": 1.809667842412141, "learning_rate": 9.919064204079454e-08, "loss": 0.5157, "step": 30618 }, { "epoch": 0.9384271178129214, "grad_norm": 1.9444375960759686, "learning_rate": 9.909229665910514e-08, "loss": 0.5213, "step": 30619 }, { "epoch": 0.9384577663356626, "grad_norm": 1.7810499348080462, "learning_rate": 9.899399956731281e-08, "loss": 0.5274, "step": 30620 }, { "epoch": 0.9384884148584038, "grad_norm": 1.844418978567794, "learning_rate": 9.88957507663868e-08, "loss": 0.5862, "step": 30621 }, { "epoch": 0.938519063381145, "grad_norm": 2.127548441149026, "learning_rate": 9.879755025729521e-08, "loss": 0.6728, "step": 30622 }, { "epoch": 0.9385497119038863, "grad_norm": 1.724528708183913, "learning_rate": 9.869939804100504e-08, "loss": 0.5082, "step": 30623 }, { "epoch": 0.9385803604266274, "grad_norm": 1.7469318507792855, "learning_rate": 9.860129411848385e-08, "loss": 0.4667, "step": 30624 }, { "epoch": 0.9386110089493687, "grad_norm": 1.8346566483372817, "learning_rate": 9.850323849069809e-08, "loss": 0.6063, "step": 30625 }, { "epoch": 0.9386416574721098, "grad_norm": 1.9054391159917858, "learning_rate": 9.840523115861423e-08, "loss": 0.5287, "step": 30626 }, { "epoch": 0.9386723059948511, "grad_norm": 1.9840570792051686, "learning_rate": 9.830727212319813e-08, "loss": 0.5472, "step": 30627 }, { "epoch": 0.9387029545175922, "grad_norm": 1.758124785369086, "learning_rate": 9.820936138541348e-08, "loss": 0.5118, "step": 30628 }, { "epoch": 0.9387336030403335, "grad_norm": 1.8603354087117743, "learning_rate": 9.811149894622729e-08, "loss": 0.6456, "step": 30629 }, { "epoch": 0.9387642515630746, "grad_norm": 1.7457450128406384, "learning_rate": 9.801368480660267e-08, "loss": 0.5606, "step": 30630 }, { "epoch": 0.9387949000858159, "grad_norm": 0.7858236239448939, "learning_rate": 9.791591896750275e-08, "loss": 0.3988, "step": 30631 }, { "epoch": 0.938825548608557, "grad_norm": 0.7848838393183531, "learning_rate": 9.781820142989173e-08, "loss": 0.3971, "step": 30632 }, { "epoch": 0.9388561971312983, "grad_norm": 0.7869370431418792, "learning_rate": 9.772053219473221e-08, "loss": 0.3964, "step": 30633 }, { "epoch": 0.9388868456540395, "grad_norm": 2.612061549856875, "learning_rate": 9.762291126298673e-08, "loss": 0.5824, "step": 30634 }, { "epoch": 0.9389174941767807, "grad_norm": 1.8374649440458475, "learning_rate": 9.752533863561676e-08, "loss": 0.5043, "step": 30635 }, { "epoch": 0.9389481426995219, "grad_norm": 1.7436376289261113, "learning_rate": 9.742781431358428e-08, "loss": 0.578, "step": 30636 }, { "epoch": 0.9389787912222631, "grad_norm": 1.9756068257240436, "learning_rate": 9.733033829785021e-08, "loss": 0.5218, "step": 30637 }, { "epoch": 0.9390094397450043, "grad_norm": 1.8066122405809593, "learning_rate": 9.723291058937434e-08, "loss": 0.4795, "step": 30638 }, { "epoch": 0.9390400882677455, "grad_norm": 0.8029551891570746, "learning_rate": 9.713553118911645e-08, "loss": 0.3987, "step": 30639 }, { "epoch": 0.9390707367904867, "grad_norm": 2.3495048598152457, "learning_rate": 9.7038200098038e-08, "loss": 0.5532, "step": 30640 }, { "epoch": 0.939101385313228, "grad_norm": 1.7422064019539156, "learning_rate": 9.694091731709542e-08, "loss": 0.5799, "step": 30641 }, { "epoch": 0.9391320338359691, "grad_norm": 2.2600708979916924, "learning_rate": 9.68436828472491e-08, "loss": 0.6347, "step": 30642 }, { "epoch": 0.9391626823587104, "grad_norm": 1.9857760222087837, "learning_rate": 9.674649668945657e-08, "loss": 0.5319, "step": 30643 }, { "epoch": 0.9391933308814515, "grad_norm": 1.9026790273417622, "learning_rate": 9.664935884467486e-08, "loss": 0.5417, "step": 30644 }, { "epoch": 0.9392239794041927, "grad_norm": 1.9439204212563153, "learning_rate": 9.655226931386208e-08, "loss": 0.5578, "step": 30645 }, { "epoch": 0.9392546279269339, "grad_norm": 1.8052891440712682, "learning_rate": 9.645522809797414e-08, "loss": 0.602, "step": 30646 }, { "epoch": 0.9392852764496751, "grad_norm": 1.8929643474098166, "learning_rate": 9.63582351979675e-08, "loss": 0.5936, "step": 30647 }, { "epoch": 0.9393159249724163, "grad_norm": 1.9656496533756054, "learning_rate": 9.626129061479806e-08, "loss": 0.5774, "step": 30648 }, { "epoch": 0.9393465734951575, "grad_norm": 1.9443302557770474, "learning_rate": 9.61643943494206e-08, "loss": 0.6075, "step": 30649 }, { "epoch": 0.9393772220178987, "grad_norm": 1.9090535557818418, "learning_rate": 9.606754640278993e-08, "loss": 0.4916, "step": 30650 }, { "epoch": 0.9394078705406399, "grad_norm": 1.958270846139174, "learning_rate": 9.597074677586027e-08, "loss": 0.5497, "step": 30651 }, { "epoch": 0.9394385190633812, "grad_norm": 2.1703502904270495, "learning_rate": 9.587399546958586e-08, "loss": 0.6079, "step": 30652 }, { "epoch": 0.9394691675861223, "grad_norm": 1.9620098932631247, "learning_rate": 9.577729248491985e-08, "loss": 0.5613, "step": 30653 }, { "epoch": 0.9394998161088636, "grad_norm": 0.8276033244589128, "learning_rate": 9.568063782281478e-08, "loss": 0.4121, "step": 30654 }, { "epoch": 0.9395304646316047, "grad_norm": 2.071190513676695, "learning_rate": 9.558403148422324e-08, "loss": 0.562, "step": 30655 }, { "epoch": 0.939561113154346, "grad_norm": 1.9821065800653286, "learning_rate": 9.548747347009779e-08, "loss": 0.6546, "step": 30656 }, { "epoch": 0.9395917616770871, "grad_norm": 2.0452838174816512, "learning_rate": 9.539096378138879e-08, "loss": 0.5971, "step": 30657 }, { "epoch": 0.9396224101998284, "grad_norm": 1.9540579317973081, "learning_rate": 9.529450241904659e-08, "loss": 0.5966, "step": 30658 }, { "epoch": 0.9396530587225695, "grad_norm": 0.8336380610679281, "learning_rate": 9.51980893840232e-08, "loss": 0.3925, "step": 30659 }, { "epoch": 0.9396837072453108, "grad_norm": 2.20416912922291, "learning_rate": 9.510172467726786e-08, "loss": 0.5406, "step": 30660 }, { "epoch": 0.939714355768052, "grad_norm": 2.0947012584986147, "learning_rate": 9.500540829973093e-08, "loss": 0.5242, "step": 30661 }, { "epoch": 0.9397450042907932, "grad_norm": 2.06595474844286, "learning_rate": 9.490914025235998e-08, "loss": 0.6449, "step": 30662 }, { "epoch": 0.9397756528135344, "grad_norm": 0.8427488086514479, "learning_rate": 9.481292053610424e-08, "loss": 0.407, "step": 30663 }, { "epoch": 0.9398063013362756, "grad_norm": 0.7670647124941585, "learning_rate": 9.471674915191242e-08, "loss": 0.3879, "step": 30664 }, { "epoch": 0.9398369498590168, "grad_norm": 0.8227094628594452, "learning_rate": 9.462062610073098e-08, "loss": 0.4004, "step": 30665 }, { "epoch": 0.939867598381758, "grad_norm": 2.030127643268019, "learning_rate": 9.452455138350746e-08, "loss": 0.5763, "step": 30666 }, { "epoch": 0.9398982469044992, "grad_norm": 0.781460732424952, "learning_rate": 9.442852500118949e-08, "loss": 0.3758, "step": 30667 }, { "epoch": 0.9399288954272405, "grad_norm": 2.042524255323359, "learning_rate": 9.433254695472183e-08, "loss": 0.594, "step": 30668 }, { "epoch": 0.9399595439499816, "grad_norm": 1.8819243582439757, "learning_rate": 9.42366172450504e-08, "loss": 0.6505, "step": 30669 }, { "epoch": 0.9399901924727229, "grad_norm": 2.036585725258693, "learning_rate": 9.414073587312167e-08, "loss": 0.6391, "step": 30670 }, { "epoch": 0.940020840995464, "grad_norm": 0.8225749166626458, "learning_rate": 9.404490283987877e-08, "loss": 0.4043, "step": 30671 }, { "epoch": 0.9400514895182053, "grad_norm": 0.8310314286423981, "learning_rate": 9.394911814626706e-08, "loss": 0.3929, "step": 30672 }, { "epoch": 0.9400821380409464, "grad_norm": 1.8975442055621308, "learning_rate": 9.385338179322967e-08, "loss": 0.6185, "step": 30673 }, { "epoch": 0.9401127865636877, "grad_norm": 1.962887299774007, "learning_rate": 9.375769378171029e-08, "loss": 0.5302, "step": 30674 }, { "epoch": 0.9401434350864288, "grad_norm": 2.227243444421822, "learning_rate": 9.366205411265205e-08, "loss": 0.6537, "step": 30675 }, { "epoch": 0.94017408360917, "grad_norm": 1.8138353928998852, "learning_rate": 9.356646278699643e-08, "loss": 0.5496, "step": 30676 }, { "epoch": 0.9402047321319112, "grad_norm": 0.8140861817132444, "learning_rate": 9.347091980568601e-08, "loss": 0.3808, "step": 30677 }, { "epoch": 0.9402353806546524, "grad_norm": 2.077276653462024, "learning_rate": 9.337542516966224e-08, "loss": 0.6116, "step": 30678 }, { "epoch": 0.9402660291773937, "grad_norm": 0.8058716206162084, "learning_rate": 9.32799788798655e-08, "loss": 0.3908, "step": 30679 }, { "epoch": 0.9402966777001348, "grad_norm": 2.029480163460199, "learning_rate": 9.318458093723614e-08, "loss": 0.4638, "step": 30680 }, { "epoch": 0.9403273262228761, "grad_norm": 2.001749770100484, "learning_rate": 9.308923134271563e-08, "loss": 0.5844, "step": 30681 }, { "epoch": 0.9403579747456172, "grad_norm": 1.8331080904565253, "learning_rate": 9.299393009724156e-08, "loss": 0.6256, "step": 30682 }, { "epoch": 0.9403886232683585, "grad_norm": 1.8256412135063766, "learning_rate": 9.289867720175427e-08, "loss": 0.5688, "step": 30683 }, { "epoch": 0.9404192717910996, "grad_norm": 1.8980194811692148, "learning_rate": 9.280347265719192e-08, "loss": 0.5669, "step": 30684 }, { "epoch": 0.9404499203138409, "grad_norm": 1.775160081863164, "learning_rate": 9.270831646449207e-08, "loss": 0.4881, "step": 30685 }, { "epoch": 0.940480568836582, "grad_norm": 2.2242716417608333, "learning_rate": 9.261320862459288e-08, "loss": 0.5256, "step": 30686 }, { "epoch": 0.9405112173593233, "grad_norm": 1.918085261989034, "learning_rate": 9.251814913843138e-08, "loss": 0.5364, "step": 30687 }, { "epoch": 0.9405418658820645, "grad_norm": 2.0365589033282454, "learning_rate": 9.242313800694402e-08, "loss": 0.5673, "step": 30688 }, { "epoch": 0.9405725144048057, "grad_norm": 2.0027066559185442, "learning_rate": 9.232817523106785e-08, "loss": 0.6333, "step": 30689 }, { "epoch": 0.9406031629275469, "grad_norm": 2.1144846423024815, "learning_rate": 9.223326081173712e-08, "loss": 0.6037, "step": 30690 }, { "epoch": 0.9406338114502881, "grad_norm": 2.186095397796437, "learning_rate": 9.213839474988829e-08, "loss": 0.7227, "step": 30691 }, { "epoch": 0.9406644599730293, "grad_norm": 1.888013700739318, "learning_rate": 9.204357704645562e-08, "loss": 0.5747, "step": 30692 }, { "epoch": 0.9406951084957705, "grad_norm": 1.9219942722958958, "learning_rate": 9.19488077023728e-08, "loss": 0.5447, "step": 30693 }, { "epoch": 0.9407257570185117, "grad_norm": 0.8138727535390005, "learning_rate": 9.185408671857465e-08, "loss": 0.3926, "step": 30694 }, { "epoch": 0.940756405541253, "grad_norm": 1.777749682759295, "learning_rate": 9.175941409599376e-08, "loss": 0.5163, "step": 30695 }, { "epoch": 0.9407870540639941, "grad_norm": 1.7736200759839142, "learning_rate": 9.16647898355627e-08, "loss": 0.4644, "step": 30696 }, { "epoch": 0.9408177025867354, "grad_norm": 1.8803944096766008, "learning_rate": 9.15702139382152e-08, "loss": 0.5291, "step": 30697 }, { "epoch": 0.9408483511094765, "grad_norm": 1.986441843874043, "learning_rate": 9.147568640488158e-08, "loss": 0.5684, "step": 30698 }, { "epoch": 0.9408789996322178, "grad_norm": 2.0362388217878338, "learning_rate": 9.13812072364939e-08, "loss": 0.619, "step": 30699 }, { "epoch": 0.9409096481549589, "grad_norm": 1.7570409063091086, "learning_rate": 9.128677643398365e-08, "loss": 0.5242, "step": 30700 }, { "epoch": 0.9409402966777002, "grad_norm": 1.8583093700886455, "learning_rate": 9.119239399828062e-08, "loss": 0.5626, "step": 30701 }, { "epoch": 0.9409709452004413, "grad_norm": 1.9475511821387326, "learning_rate": 9.109805993031517e-08, "loss": 0.5221, "step": 30702 }, { "epoch": 0.9410015937231826, "grad_norm": 2.2011140959663877, "learning_rate": 9.100377423101603e-08, "loss": 0.5879, "step": 30703 }, { "epoch": 0.9410322422459237, "grad_norm": 1.789742136288403, "learning_rate": 9.090953690131243e-08, "loss": 0.4915, "step": 30704 }, { "epoch": 0.941062890768665, "grad_norm": 2.0162389447727005, "learning_rate": 9.08153479421342e-08, "loss": 0.6379, "step": 30705 }, { "epoch": 0.9410935392914062, "grad_norm": 1.7897544668463754, "learning_rate": 9.072120735440726e-08, "loss": 0.4898, "step": 30706 }, { "epoch": 0.9411241878141473, "grad_norm": 1.8562802375152774, "learning_rate": 9.062711513906087e-08, "loss": 0.5761, "step": 30707 }, { "epoch": 0.9411548363368886, "grad_norm": 1.7799505973303142, "learning_rate": 9.053307129702204e-08, "loss": 0.484, "step": 30708 }, { "epoch": 0.9411854848596297, "grad_norm": 0.8196392229733329, "learning_rate": 9.043907582921673e-08, "loss": 0.3773, "step": 30709 }, { "epoch": 0.941216133382371, "grad_norm": 1.9158249774959528, "learning_rate": 9.034512873657086e-08, "loss": 0.5748, "step": 30710 }, { "epoch": 0.9412467819051121, "grad_norm": 1.7965449512233664, "learning_rate": 9.025123002001146e-08, "loss": 0.45, "step": 30711 }, { "epoch": 0.9412774304278534, "grad_norm": 1.8113202434546467, "learning_rate": 9.015737968046223e-08, "loss": 0.5434, "step": 30712 }, { "epoch": 0.9413080789505945, "grad_norm": 0.7978888493819439, "learning_rate": 9.00635777188491e-08, "loss": 0.3908, "step": 30713 }, { "epoch": 0.9413387274733358, "grad_norm": 1.90360581723147, "learning_rate": 8.996982413609523e-08, "loss": 0.5689, "step": 30714 }, { "epoch": 0.941369375996077, "grad_norm": 1.8585578720129208, "learning_rate": 8.987611893312542e-08, "loss": 0.4805, "step": 30715 }, { "epoch": 0.9414000245188182, "grad_norm": 1.968484401310985, "learning_rate": 8.978246211086228e-08, "loss": 0.5932, "step": 30716 }, { "epoch": 0.9414306730415594, "grad_norm": 1.949694843421069, "learning_rate": 8.968885367022895e-08, "loss": 0.6108, "step": 30717 }, { "epoch": 0.9414613215643006, "grad_norm": 1.6840115768981356, "learning_rate": 8.959529361214748e-08, "loss": 0.5239, "step": 30718 }, { "epoch": 0.9414919700870418, "grad_norm": 1.8465295313758863, "learning_rate": 8.950178193754045e-08, "loss": 0.6381, "step": 30719 }, { "epoch": 0.941522618609783, "grad_norm": 1.806290973308188, "learning_rate": 8.94083186473288e-08, "loss": 0.6053, "step": 30720 }, { "epoch": 0.9415532671325242, "grad_norm": 0.7866005928656588, "learning_rate": 8.93149037424329e-08, "loss": 0.4021, "step": 30721 }, { "epoch": 0.9415839156552654, "grad_norm": 1.8208889384324585, "learning_rate": 8.922153722377425e-08, "loss": 0.6128, "step": 30722 }, { "epoch": 0.9416145641780066, "grad_norm": 1.8725607611092752, "learning_rate": 8.912821909227154e-08, "loss": 0.5877, "step": 30723 }, { "epoch": 0.9416452127007479, "grad_norm": 1.8001824890909521, "learning_rate": 8.903494934884572e-08, "loss": 0.5263, "step": 30724 }, { "epoch": 0.941675861223489, "grad_norm": 1.7736836768246436, "learning_rate": 8.894172799441436e-08, "loss": 0.5848, "step": 30725 }, { "epoch": 0.9417065097462303, "grad_norm": 1.7304559778596555, "learning_rate": 8.884855502989732e-08, "loss": 0.4503, "step": 30726 }, { "epoch": 0.9417371582689714, "grad_norm": 2.092843781812343, "learning_rate": 8.875543045621216e-08, "loss": 0.5724, "step": 30727 }, { "epoch": 0.9417678067917127, "grad_norm": 0.7815158190837229, "learning_rate": 8.866235427427594e-08, "loss": 0.3825, "step": 30728 }, { "epoch": 0.9417984553144538, "grad_norm": 2.0982716170657794, "learning_rate": 8.856932648500571e-08, "loss": 0.5884, "step": 30729 }, { "epoch": 0.9418291038371951, "grad_norm": 1.9365747490730316, "learning_rate": 8.847634708931963e-08, "loss": 0.638, "step": 30730 }, { "epoch": 0.9418597523599362, "grad_norm": 0.7793467723707076, "learning_rate": 8.838341608813194e-08, "loss": 0.3815, "step": 30731 }, { "epoch": 0.9418904008826775, "grad_norm": 1.7451944457039381, "learning_rate": 8.829053348235917e-08, "loss": 0.4851, "step": 30732 }, { "epoch": 0.9419210494054187, "grad_norm": 1.9798191208023137, "learning_rate": 8.819769927291666e-08, "loss": 0.5195, "step": 30733 }, { "epoch": 0.9419516979281599, "grad_norm": 2.0098955948502777, "learning_rate": 8.810491346071926e-08, "loss": 0.5612, "step": 30734 }, { "epoch": 0.9419823464509011, "grad_norm": 2.002100303313886, "learning_rate": 8.801217604668121e-08, "loss": 0.5973, "step": 30735 }, { "epoch": 0.9420129949736423, "grad_norm": 2.1188913901061417, "learning_rate": 8.79194870317146e-08, "loss": 0.5649, "step": 30736 }, { "epoch": 0.9420436434963835, "grad_norm": 0.7613150177289116, "learning_rate": 8.782684641673533e-08, "loss": 0.3982, "step": 30737 }, { "epoch": 0.9420742920191246, "grad_norm": 1.694139638039624, "learning_rate": 8.773425420265491e-08, "loss": 0.4834, "step": 30738 }, { "epoch": 0.9421049405418659, "grad_norm": 1.9841335501653097, "learning_rate": 8.764171039038538e-08, "loss": 0.5894, "step": 30739 }, { "epoch": 0.942135589064607, "grad_norm": 1.8815103973267513, "learning_rate": 8.754921498083879e-08, "loss": 0.5318, "step": 30740 }, { "epoch": 0.9421662375873483, "grad_norm": 1.7816698619563525, "learning_rate": 8.745676797492664e-08, "loss": 0.6067, "step": 30741 }, { "epoch": 0.9421968861100894, "grad_norm": 1.7584085416668709, "learning_rate": 8.73643693735604e-08, "loss": 0.6712, "step": 30742 }, { "epoch": 0.9422275346328307, "grad_norm": 1.8841709941099993, "learning_rate": 8.727201917764938e-08, "loss": 0.583, "step": 30743 }, { "epoch": 0.9422581831555719, "grad_norm": 1.9551959589352272, "learning_rate": 8.717971738810448e-08, "loss": 0.5955, "step": 30744 }, { "epoch": 0.9422888316783131, "grad_norm": 1.628711820476706, "learning_rate": 8.7087464005835e-08, "loss": 0.5813, "step": 30745 }, { "epoch": 0.9423194802010543, "grad_norm": 0.8267265194129803, "learning_rate": 8.699525903175022e-08, "loss": 0.4112, "step": 30746 }, { "epoch": 0.9423501287237955, "grad_norm": 0.8594362257133937, "learning_rate": 8.690310246675715e-08, "loss": 0.4315, "step": 30747 }, { "epoch": 0.9423807772465367, "grad_norm": 2.091632698680766, "learning_rate": 8.68109943117651e-08, "loss": 0.6343, "step": 30748 }, { "epoch": 0.9424114257692779, "grad_norm": 1.9938757732711834, "learning_rate": 8.671893456768166e-08, "loss": 0.6009, "step": 30749 }, { "epoch": 0.9424420742920191, "grad_norm": 1.8295454840196055, "learning_rate": 8.662692323541388e-08, "loss": 0.6028, "step": 30750 }, { "epoch": 0.9424727228147604, "grad_norm": 1.9872237007372302, "learning_rate": 8.653496031586772e-08, "loss": 0.6037, "step": 30751 }, { "epoch": 0.9425033713375015, "grad_norm": 2.408638359113397, "learning_rate": 8.644304580994967e-08, "loss": 0.5446, "step": 30752 }, { "epoch": 0.9425340198602428, "grad_norm": 1.6968853801820776, "learning_rate": 8.635117971856622e-08, "loss": 0.4104, "step": 30753 }, { "epoch": 0.9425646683829839, "grad_norm": 0.7768050244670885, "learning_rate": 8.625936204262165e-08, "loss": 0.3817, "step": 30754 }, { "epoch": 0.9425953169057252, "grad_norm": 1.9650008514375246, "learning_rate": 8.616759278301967e-08, "loss": 0.588, "step": 30755 }, { "epoch": 0.9426259654284663, "grad_norm": 1.971070081698132, "learning_rate": 8.60758719406668e-08, "loss": 0.5761, "step": 30756 }, { "epoch": 0.9426566139512076, "grad_norm": 1.9855110659069954, "learning_rate": 8.598419951646564e-08, "loss": 0.6366, "step": 30757 }, { "epoch": 0.9426872624739487, "grad_norm": 1.8704174424529054, "learning_rate": 8.58925755113188e-08, "loss": 0.5235, "step": 30758 }, { "epoch": 0.94271791099669, "grad_norm": 2.073024849765034, "learning_rate": 8.580099992613001e-08, "loss": 0.5355, "step": 30759 }, { "epoch": 0.9427485595194312, "grad_norm": 1.7753379950659292, "learning_rate": 8.570947276180131e-08, "loss": 0.5682, "step": 30760 }, { "epoch": 0.9427792080421724, "grad_norm": 1.875087597578183, "learning_rate": 8.561799401923477e-08, "loss": 0.4821, "step": 30761 }, { "epoch": 0.9428098565649136, "grad_norm": 1.7168649721755884, "learning_rate": 8.552656369933132e-08, "loss": 0.572, "step": 30762 }, { "epoch": 0.9428405050876548, "grad_norm": 1.827145391768168, "learning_rate": 8.543518180299192e-08, "loss": 0.4725, "step": 30763 }, { "epoch": 0.942871153610396, "grad_norm": 1.7566892619849301, "learning_rate": 8.534384833111809e-08, "loss": 0.4834, "step": 30764 }, { "epoch": 0.9429018021331372, "grad_norm": 1.7634627313822113, "learning_rate": 8.525256328460796e-08, "loss": 0.6165, "step": 30765 }, { "epoch": 0.9429324506558784, "grad_norm": 1.8057095417386722, "learning_rate": 8.51613266643614e-08, "loss": 0.5359, "step": 30766 }, { "epoch": 0.9429630991786196, "grad_norm": 1.8324055753084325, "learning_rate": 8.507013847127876e-08, "loss": 0.5821, "step": 30767 }, { "epoch": 0.9429937477013608, "grad_norm": 1.863722214291174, "learning_rate": 8.497899870625715e-08, "loss": 0.599, "step": 30768 }, { "epoch": 0.943024396224102, "grad_norm": 2.2554254020187487, "learning_rate": 8.488790737019525e-08, "loss": 0.6207, "step": 30769 }, { "epoch": 0.9430550447468432, "grad_norm": 0.7715938721857639, "learning_rate": 8.47968644639896e-08, "loss": 0.4018, "step": 30770 }, { "epoch": 0.9430856932695844, "grad_norm": 1.6605104432595683, "learning_rate": 8.470586998853891e-08, "loss": 0.4885, "step": 30771 }, { "epoch": 0.9431163417923256, "grad_norm": 1.9262221173325569, "learning_rate": 8.461492394473859e-08, "loss": 0.5678, "step": 30772 }, { "epoch": 0.9431469903150668, "grad_norm": 1.9240488261766464, "learning_rate": 8.452402633348511e-08, "loss": 0.5087, "step": 30773 }, { "epoch": 0.943177638837808, "grad_norm": 2.05330220000334, "learning_rate": 8.44331771556739e-08, "loss": 0.635, "step": 30774 }, { "epoch": 0.9432082873605492, "grad_norm": 0.815572279076092, "learning_rate": 8.434237641220088e-08, "loss": 0.42, "step": 30775 }, { "epoch": 0.9432389358832904, "grad_norm": 1.998645412013749, "learning_rate": 8.425162410395981e-08, "loss": 0.6922, "step": 30776 }, { "epoch": 0.9432695844060316, "grad_norm": 0.7970622570525705, "learning_rate": 8.416092023184441e-08, "loss": 0.384, "step": 30777 }, { "epoch": 0.9433002329287729, "grad_norm": 0.8004424272080015, "learning_rate": 8.407026479675063e-08, "loss": 0.384, "step": 30778 }, { "epoch": 0.943330881451514, "grad_norm": 1.7550736678138215, "learning_rate": 8.39796577995694e-08, "loss": 0.495, "step": 30779 }, { "epoch": 0.9433615299742553, "grad_norm": 1.7821506010812906, "learning_rate": 8.388909924119503e-08, "loss": 0.6161, "step": 30780 }, { "epoch": 0.9433921784969964, "grad_norm": 1.9599838688786975, "learning_rate": 8.379858912251848e-08, "loss": 0.6158, "step": 30781 }, { "epoch": 0.9434228270197377, "grad_norm": 0.7392142884502415, "learning_rate": 8.37081274444329e-08, "loss": 0.3561, "step": 30782 }, { "epoch": 0.9434534755424788, "grad_norm": 1.6861714458433728, "learning_rate": 8.361771420782871e-08, "loss": 0.4958, "step": 30783 }, { "epoch": 0.9434841240652201, "grad_norm": 1.8174202388615803, "learning_rate": 8.352734941359741e-08, "loss": 0.467, "step": 30784 }, { "epoch": 0.9435147725879612, "grad_norm": 1.7412818932012533, "learning_rate": 8.343703306262829e-08, "loss": 0.5107, "step": 30785 }, { "epoch": 0.9435454211107025, "grad_norm": 1.9668698499628803, "learning_rate": 8.334676515581286e-08, "loss": 0.4977, "step": 30786 }, { "epoch": 0.9435760696334436, "grad_norm": 2.044019583700591, "learning_rate": 8.325654569403985e-08, "loss": 0.6201, "step": 30787 }, { "epoch": 0.9436067181561849, "grad_norm": 1.5719994005710314, "learning_rate": 8.316637467819744e-08, "loss": 0.5343, "step": 30788 }, { "epoch": 0.9436373666789261, "grad_norm": 1.9218838298090044, "learning_rate": 8.307625210917548e-08, "loss": 0.6276, "step": 30789 }, { "epoch": 0.9436680152016673, "grad_norm": 1.9814913524595452, "learning_rate": 8.298617798786047e-08, "loss": 0.6252, "step": 30790 }, { "epoch": 0.9436986637244085, "grad_norm": 2.5149796611474144, "learning_rate": 8.289615231514115e-08, "loss": 0.5292, "step": 30791 }, { "epoch": 0.9437293122471497, "grad_norm": 0.824652403603273, "learning_rate": 8.280617509190403e-08, "loss": 0.4092, "step": 30792 }, { "epoch": 0.9437599607698909, "grad_norm": 2.0424388672714477, "learning_rate": 8.271624631903564e-08, "loss": 0.6023, "step": 30793 }, { "epoch": 0.9437906092926321, "grad_norm": 1.7226600848798084, "learning_rate": 8.262636599742301e-08, "loss": 0.5068, "step": 30794 }, { "epoch": 0.9438212578153733, "grad_norm": 1.9826069624497746, "learning_rate": 8.253653412794994e-08, "loss": 0.5893, "step": 30795 }, { "epoch": 0.9438519063381146, "grad_norm": 2.1017130420668515, "learning_rate": 8.244675071150287e-08, "loss": 0.6416, "step": 30796 }, { "epoch": 0.9438825548608557, "grad_norm": 1.998726253581635, "learning_rate": 8.23570157489667e-08, "loss": 0.5529, "step": 30797 }, { "epoch": 0.943913203383597, "grad_norm": 1.842832084036919, "learning_rate": 8.22673292412246e-08, "loss": 0.5339, "step": 30798 }, { "epoch": 0.9439438519063381, "grad_norm": 1.8228760420288226, "learning_rate": 8.217769118916085e-08, "loss": 0.5064, "step": 30799 }, { "epoch": 0.9439745004290793, "grad_norm": 2.0793161504146433, "learning_rate": 8.208810159365865e-08, "loss": 0.5988, "step": 30800 }, { "epoch": 0.9440051489518205, "grad_norm": 2.0954201474839325, "learning_rate": 8.199856045560062e-08, "loss": 0.6383, "step": 30801 }, { "epoch": 0.9440357974745617, "grad_norm": 2.002774922603127, "learning_rate": 8.190906777586938e-08, "loss": 0.507, "step": 30802 }, { "epoch": 0.9440664459973029, "grad_norm": 1.6487595163882016, "learning_rate": 8.18196235553459e-08, "loss": 0.4289, "step": 30803 }, { "epoch": 0.9440970945200441, "grad_norm": 1.9300279939012512, "learning_rate": 8.173022779491224e-08, "loss": 0.6348, "step": 30804 }, { "epoch": 0.9441277430427854, "grad_norm": 1.9610089585237165, "learning_rate": 8.164088049544938e-08, "loss": 0.5967, "step": 30805 }, { "epoch": 0.9441583915655265, "grad_norm": 0.7948180144489922, "learning_rate": 8.155158165783661e-08, "loss": 0.404, "step": 30806 }, { "epoch": 0.9441890400882678, "grad_norm": 1.8753559056493632, "learning_rate": 8.146233128295489e-08, "loss": 0.6002, "step": 30807 }, { "epoch": 0.9442196886110089, "grad_norm": 1.9251770901463738, "learning_rate": 8.137312937168407e-08, "loss": 0.5668, "step": 30808 }, { "epoch": 0.9442503371337502, "grad_norm": 1.9696372955316814, "learning_rate": 8.128397592490123e-08, "loss": 0.6296, "step": 30809 }, { "epoch": 0.9442809856564913, "grad_norm": 1.791628944884019, "learning_rate": 8.119487094348677e-08, "loss": 0.6144, "step": 30810 }, { "epoch": 0.9443116341792326, "grad_norm": 1.7965476171657218, "learning_rate": 8.110581442831666e-08, "loss": 0.5417, "step": 30811 }, { "epoch": 0.9443422827019737, "grad_norm": 1.9556793913989357, "learning_rate": 8.10168063802702e-08, "loss": 0.5427, "step": 30812 }, { "epoch": 0.944372931224715, "grad_norm": 2.0292135454407045, "learning_rate": 8.092784680022391e-08, "loss": 0.6486, "step": 30813 }, { "epoch": 0.9444035797474561, "grad_norm": 1.8425146268627919, "learning_rate": 8.083893568905376e-08, "loss": 0.5422, "step": 30814 }, { "epoch": 0.9444342282701974, "grad_norm": 1.8960540611044658, "learning_rate": 8.075007304763626e-08, "loss": 0.5331, "step": 30815 }, { "epoch": 0.9444648767929386, "grad_norm": 0.8240392099656495, "learning_rate": 8.066125887684739e-08, "loss": 0.4003, "step": 30816 }, { "epoch": 0.9444955253156798, "grad_norm": 1.8684856212806866, "learning_rate": 8.057249317756089e-08, "loss": 0.5338, "step": 30817 }, { "epoch": 0.944526173838421, "grad_norm": 1.9458426758952498, "learning_rate": 8.048377595065271e-08, "loss": 0.6441, "step": 30818 }, { "epoch": 0.9445568223611622, "grad_norm": 1.8248774922345912, "learning_rate": 8.039510719699717e-08, "loss": 0.5194, "step": 30819 }, { "epoch": 0.9445874708839034, "grad_norm": 2.0028923301562958, "learning_rate": 8.03064869174669e-08, "loss": 0.5804, "step": 30820 }, { "epoch": 0.9446181194066446, "grad_norm": 1.7251705334258538, "learning_rate": 8.021791511293564e-08, "loss": 0.5665, "step": 30821 }, { "epoch": 0.9446487679293858, "grad_norm": 2.2277666995713115, "learning_rate": 8.012939178427547e-08, "loss": 0.5319, "step": 30822 }, { "epoch": 0.944679416452127, "grad_norm": 0.8180288162018265, "learning_rate": 8.004091693236016e-08, "loss": 0.3939, "step": 30823 }, { "epoch": 0.9447100649748682, "grad_norm": 1.99254953114895, "learning_rate": 7.995249055806009e-08, "loss": 0.5704, "step": 30824 }, { "epoch": 0.9447407134976095, "grad_norm": 1.9562156207204784, "learning_rate": 7.986411266224681e-08, "loss": 0.533, "step": 30825 }, { "epoch": 0.9447713620203506, "grad_norm": 1.9761398516571298, "learning_rate": 7.977578324579127e-08, "loss": 0.5402, "step": 30826 }, { "epoch": 0.9448020105430919, "grad_norm": 0.8021618331709844, "learning_rate": 7.968750230956445e-08, "loss": 0.3915, "step": 30827 }, { "epoch": 0.944832659065833, "grad_norm": 1.8071565614054725, "learning_rate": 7.959926985443511e-08, "loss": 0.548, "step": 30828 }, { "epoch": 0.9448633075885743, "grad_norm": 1.8127152906420407, "learning_rate": 7.95110858812731e-08, "loss": 0.5984, "step": 30829 }, { "epoch": 0.9448939561113154, "grad_norm": 1.773869321018094, "learning_rate": 7.942295039094771e-08, "loss": 0.4696, "step": 30830 }, { "epoch": 0.9449246046340566, "grad_norm": 0.9778885200933358, "learning_rate": 7.933486338432661e-08, "loss": 0.3969, "step": 30831 }, { "epoch": 0.9449552531567978, "grad_norm": 1.9671591698973536, "learning_rate": 7.924682486227797e-08, "loss": 0.5635, "step": 30832 }, { "epoch": 0.944985901679539, "grad_norm": 2.005978830606242, "learning_rate": 7.915883482566943e-08, "loss": 0.5857, "step": 30833 }, { "epoch": 0.9450165502022803, "grad_norm": 1.869661851669318, "learning_rate": 7.90708932753681e-08, "loss": 0.5877, "step": 30834 }, { "epoch": 0.9450471987250214, "grad_norm": 2.011317367604866, "learning_rate": 7.898300021224048e-08, "loss": 0.5999, "step": 30835 }, { "epoch": 0.9450778472477627, "grad_norm": 1.798276131309933, "learning_rate": 7.889515563715256e-08, "loss": 0.5278, "step": 30836 }, { "epoch": 0.9451084957705038, "grad_norm": 1.7523887402957607, "learning_rate": 7.880735955096918e-08, "loss": 0.548, "step": 30837 }, { "epoch": 0.9451391442932451, "grad_norm": 2.1832860761882453, "learning_rate": 7.871961195455635e-08, "loss": 0.5621, "step": 30838 }, { "epoch": 0.9451697928159862, "grad_norm": 1.9517632154011808, "learning_rate": 7.863191284877836e-08, "loss": 0.564, "step": 30839 }, { "epoch": 0.9452004413387275, "grad_norm": 1.8611410851602854, "learning_rate": 7.854426223449951e-08, "loss": 0.4537, "step": 30840 }, { "epoch": 0.9452310898614686, "grad_norm": 1.703246882170462, "learning_rate": 7.845666011258247e-08, "loss": 0.5206, "step": 30841 }, { "epoch": 0.9452617383842099, "grad_norm": 2.1633662548376376, "learning_rate": 7.836910648389206e-08, "loss": 0.5807, "step": 30842 }, { "epoch": 0.945292386906951, "grad_norm": 1.7028050591150836, "learning_rate": 7.828160134929041e-08, "loss": 0.508, "step": 30843 }, { "epoch": 0.9453230354296923, "grad_norm": 0.802692261814572, "learning_rate": 7.819414470963848e-08, "loss": 0.401, "step": 30844 }, { "epoch": 0.9453536839524335, "grad_norm": 1.819694000763012, "learning_rate": 7.810673656579947e-08, "loss": 0.5897, "step": 30845 }, { "epoch": 0.9453843324751747, "grad_norm": 1.61654079660305, "learning_rate": 7.801937691863381e-08, "loss": 0.4706, "step": 30846 }, { "epoch": 0.9454149809979159, "grad_norm": 2.132931691724455, "learning_rate": 7.793206576900247e-08, "loss": 0.6449, "step": 30847 }, { "epoch": 0.9454456295206571, "grad_norm": 1.7247931974701045, "learning_rate": 7.784480311776588e-08, "loss": 0.5032, "step": 30848 }, { "epoch": 0.9454762780433983, "grad_norm": 1.8094041166245547, "learning_rate": 7.775758896578445e-08, "loss": 0.5988, "step": 30849 }, { "epoch": 0.9455069265661395, "grad_norm": 2.001295469650313, "learning_rate": 7.767042331391638e-08, "loss": 0.5637, "step": 30850 }, { "epoch": 0.9455375750888807, "grad_norm": 1.8903515848767187, "learning_rate": 7.758330616302156e-08, "loss": 0.5929, "step": 30851 }, { "epoch": 0.945568223611622, "grad_norm": 1.9806717597714945, "learning_rate": 7.749623751395707e-08, "loss": 0.4918, "step": 30852 }, { "epoch": 0.9455988721343631, "grad_norm": 2.449874528321828, "learning_rate": 7.740921736758222e-08, "loss": 0.5763, "step": 30853 }, { "epoch": 0.9456295206571044, "grad_norm": 1.898199163436087, "learning_rate": 7.732224572475355e-08, "loss": 0.5782, "step": 30854 }, { "epoch": 0.9456601691798455, "grad_norm": 2.1190686524561766, "learning_rate": 7.72353225863287e-08, "loss": 0.5446, "step": 30855 }, { "epoch": 0.9456908177025868, "grad_norm": 1.7314309379943642, "learning_rate": 7.714844795316312e-08, "loss": 0.5439, "step": 30856 }, { "epoch": 0.9457214662253279, "grad_norm": 2.1377385042231847, "learning_rate": 7.706162182611387e-08, "loss": 0.6245, "step": 30857 }, { "epoch": 0.9457521147480692, "grad_norm": 1.9205258615927319, "learning_rate": 7.697484420603584e-08, "loss": 0.5718, "step": 30858 }, { "epoch": 0.9457827632708103, "grad_norm": 1.942000348891351, "learning_rate": 7.688811509378447e-08, "loss": 0.605, "step": 30859 }, { "epoch": 0.9458134117935516, "grad_norm": 1.8756442188346918, "learning_rate": 7.680143449021404e-08, "loss": 0.5064, "step": 30860 }, { "epoch": 0.9458440603162928, "grad_norm": 2.0893059827228475, "learning_rate": 7.67148023961789e-08, "loss": 0.6355, "step": 30861 }, { "epoch": 0.9458747088390339, "grad_norm": 0.8549054688848124, "learning_rate": 7.662821881253279e-08, "loss": 0.4082, "step": 30862 }, { "epoch": 0.9459053573617752, "grad_norm": 1.9023922194427927, "learning_rate": 7.654168374012782e-08, "loss": 0.4823, "step": 30863 }, { "epoch": 0.9459360058845163, "grad_norm": 0.8020266068423344, "learning_rate": 7.645519717981775e-08, "loss": 0.4021, "step": 30864 }, { "epoch": 0.9459666544072576, "grad_norm": 2.104561788502694, "learning_rate": 7.636875913245467e-08, "loss": 0.4987, "step": 30865 }, { "epoch": 0.9459973029299987, "grad_norm": 1.9363294493359229, "learning_rate": 7.628236959888902e-08, "loss": 0.4902, "step": 30866 }, { "epoch": 0.94602795145274, "grad_norm": 1.9483104430250937, "learning_rate": 7.619602857997344e-08, "loss": 0.5002, "step": 30867 }, { "epoch": 0.9460585999754811, "grad_norm": 1.9472387534993332, "learning_rate": 7.610973607655836e-08, "loss": 0.592, "step": 30868 }, { "epoch": 0.9460892484982224, "grad_norm": 2.1202954223426853, "learning_rate": 7.602349208949422e-08, "loss": 0.5654, "step": 30869 }, { "epoch": 0.9461198970209636, "grad_norm": 2.092687189843874, "learning_rate": 7.593729661962979e-08, "loss": 0.6368, "step": 30870 }, { "epoch": 0.9461505455437048, "grad_norm": 2.2408438986613795, "learning_rate": 7.585114966781493e-08, "loss": 0.5061, "step": 30871 }, { "epoch": 0.946181194066446, "grad_norm": 1.912631861499024, "learning_rate": 7.576505123489952e-08, "loss": 0.5367, "step": 30872 }, { "epoch": 0.9462118425891872, "grad_norm": 1.7664442219276146, "learning_rate": 7.567900132173067e-08, "loss": 0.615, "step": 30873 }, { "epoch": 0.9462424911119284, "grad_norm": 0.7998272718691155, "learning_rate": 7.559299992915602e-08, "loss": 0.4057, "step": 30874 }, { "epoch": 0.9462731396346696, "grad_norm": 1.918557105485891, "learning_rate": 7.550704705802381e-08, "loss": 0.5848, "step": 30875 }, { "epoch": 0.9463037881574108, "grad_norm": 0.7975651894406763, "learning_rate": 7.542114270918111e-08, "loss": 0.3846, "step": 30876 }, { "epoch": 0.946334436680152, "grad_norm": 2.0892091053791875, "learning_rate": 7.533528688347336e-08, "loss": 0.6, "step": 30877 }, { "epoch": 0.9463650852028932, "grad_norm": 1.7695611015459554, "learning_rate": 7.524947958174655e-08, "loss": 0.4603, "step": 30878 }, { "epoch": 0.9463957337256345, "grad_norm": 1.9625061567613178, "learning_rate": 7.516372080484724e-08, "loss": 0.5231, "step": 30879 }, { "epoch": 0.9464263822483756, "grad_norm": 2.029628469145157, "learning_rate": 7.50780105536203e-08, "loss": 0.5206, "step": 30880 }, { "epoch": 0.9464570307711169, "grad_norm": 2.115794533439602, "learning_rate": 7.499234882890949e-08, "loss": 0.5534, "step": 30881 }, { "epoch": 0.946487679293858, "grad_norm": 2.035686627795528, "learning_rate": 7.49067356315586e-08, "loss": 0.5544, "step": 30882 }, { "epoch": 0.9465183278165993, "grad_norm": 2.200659318516957, "learning_rate": 7.482117096241248e-08, "loss": 0.4961, "step": 30883 }, { "epoch": 0.9465489763393404, "grad_norm": 0.795541496610307, "learning_rate": 7.473565482231382e-08, "loss": 0.3967, "step": 30884 }, { "epoch": 0.9465796248620817, "grad_norm": 0.7844098140885174, "learning_rate": 7.465018721210416e-08, "loss": 0.3782, "step": 30885 }, { "epoch": 0.9466102733848228, "grad_norm": 1.9514999110468432, "learning_rate": 7.45647681326267e-08, "loss": 0.5537, "step": 30886 }, { "epoch": 0.9466409219075641, "grad_norm": 1.949747859210283, "learning_rate": 7.447939758472245e-08, "loss": 0.5755, "step": 30887 }, { "epoch": 0.9466715704303053, "grad_norm": 0.8338501146864048, "learning_rate": 7.43940755692335e-08, "loss": 0.4278, "step": 30888 }, { "epoch": 0.9467022189530465, "grad_norm": 2.0453881758003827, "learning_rate": 7.430880208699975e-08, "loss": 0.5433, "step": 30889 }, { "epoch": 0.9467328674757877, "grad_norm": 1.8327238123232852, "learning_rate": 7.422357713886163e-08, "loss": 0.5173, "step": 30890 }, { "epoch": 0.9467635159985289, "grad_norm": 1.908808249294877, "learning_rate": 7.413840072565959e-08, "loss": 0.5224, "step": 30891 }, { "epoch": 0.9467941645212701, "grad_norm": 2.023593128690564, "learning_rate": 7.405327284823128e-08, "loss": 0.5612, "step": 30892 }, { "epoch": 0.9468248130440112, "grad_norm": 1.6990567015523828, "learning_rate": 7.396819350741657e-08, "loss": 0.5758, "step": 30893 }, { "epoch": 0.9468554615667525, "grad_norm": 1.8958958180863643, "learning_rate": 7.388316270405427e-08, "loss": 0.5524, "step": 30894 }, { "epoch": 0.9468861100894936, "grad_norm": 1.9351201124627895, "learning_rate": 7.37981804389809e-08, "loss": 0.5539, "step": 30895 }, { "epoch": 0.9469167586122349, "grad_norm": 1.7969712397051942, "learning_rate": 7.37132467130347e-08, "loss": 0.4817, "step": 30896 }, { "epoch": 0.946947407134976, "grad_norm": 1.9028657527503734, "learning_rate": 7.362836152705221e-08, "loss": 0.5545, "step": 30897 }, { "epoch": 0.9469780556577173, "grad_norm": 1.9537539147507301, "learning_rate": 7.354352488187e-08, "loss": 0.6457, "step": 30898 }, { "epoch": 0.9470087041804585, "grad_norm": 1.8052042655382121, "learning_rate": 7.34587367783246e-08, "loss": 0.5448, "step": 30899 }, { "epoch": 0.9470393527031997, "grad_norm": 2.1045126931671216, "learning_rate": 7.33739972172498e-08, "loss": 0.6162, "step": 30900 }, { "epoch": 0.9470700012259409, "grad_norm": 0.8186609017697417, "learning_rate": 7.32893061994816e-08, "loss": 0.4202, "step": 30901 }, { "epoch": 0.9471006497486821, "grad_norm": 2.1110578648577043, "learning_rate": 7.320466372585544e-08, "loss": 0.5476, "step": 30902 }, { "epoch": 0.9471312982714233, "grad_norm": 1.7739712113872788, "learning_rate": 7.312006979720344e-08, "loss": 0.501, "step": 30903 }, { "epoch": 0.9471619467941645, "grad_norm": 1.9628043419843804, "learning_rate": 7.30355244143599e-08, "loss": 0.5782, "step": 30904 }, { "epoch": 0.9471925953169057, "grad_norm": 1.8793095856322024, "learning_rate": 7.295102757815864e-08, "loss": 0.4839, "step": 30905 }, { "epoch": 0.947223243839647, "grad_norm": 2.1435340067528763, "learning_rate": 7.286657928943064e-08, "loss": 0.5369, "step": 30906 }, { "epoch": 0.9472538923623881, "grad_norm": 1.810430197743001, "learning_rate": 7.278217954900968e-08, "loss": 0.4933, "step": 30907 }, { "epoch": 0.9472845408851294, "grad_norm": 1.8667223501835069, "learning_rate": 7.269782835772621e-08, "loss": 0.655, "step": 30908 }, { "epoch": 0.9473151894078705, "grad_norm": 1.9225278234353838, "learning_rate": 7.261352571641179e-08, "loss": 0.5441, "step": 30909 }, { "epoch": 0.9473458379306118, "grad_norm": 1.8338780617088204, "learning_rate": 7.252927162589684e-08, "loss": 0.5702, "step": 30910 }, { "epoch": 0.9473764864533529, "grad_norm": 1.7464253255294468, "learning_rate": 7.244506608701186e-08, "loss": 0.5755, "step": 30911 }, { "epoch": 0.9474071349760942, "grad_norm": 1.913654299449949, "learning_rate": 7.236090910058668e-08, "loss": 0.5375, "step": 30912 }, { "epoch": 0.9474377834988353, "grad_norm": 1.8164081538219352, "learning_rate": 7.227680066745013e-08, "loss": 0.5463, "step": 30913 }, { "epoch": 0.9474684320215766, "grad_norm": 1.7692368793441795, "learning_rate": 7.219274078843097e-08, "loss": 0.469, "step": 30914 }, { "epoch": 0.9474990805443178, "grad_norm": 1.8463994012160099, "learning_rate": 7.210872946435743e-08, "loss": 0.5989, "step": 30915 }, { "epoch": 0.947529729067059, "grad_norm": 2.081932412043895, "learning_rate": 7.202476669605774e-08, "loss": 0.5678, "step": 30916 }, { "epoch": 0.9475603775898002, "grad_norm": 0.7807313399654812, "learning_rate": 7.194085248435844e-08, "loss": 0.3726, "step": 30917 }, { "epoch": 0.9475910261125414, "grad_norm": 1.9534814834200707, "learning_rate": 7.18569868300878e-08, "loss": 0.5716, "step": 30918 }, { "epoch": 0.9476216746352826, "grad_norm": 1.8259929284685588, "learning_rate": 7.177316973407011e-08, "loss": 0.5196, "step": 30919 }, { "epoch": 0.9476523231580238, "grad_norm": 2.058498664584029, "learning_rate": 7.168940119713252e-08, "loss": 0.541, "step": 30920 }, { "epoch": 0.947682971680765, "grad_norm": 1.9381655706878858, "learning_rate": 7.160568122010103e-08, "loss": 0.5338, "step": 30921 }, { "epoch": 0.9477136202035062, "grad_norm": 1.973056311953984, "learning_rate": 7.152200980379887e-08, "loss": 0.5296, "step": 30922 }, { "epoch": 0.9477442687262474, "grad_norm": 2.000839327832495, "learning_rate": 7.143838694905148e-08, "loss": 0.5568, "step": 30923 }, { "epoch": 0.9477749172489885, "grad_norm": 0.7845122345572013, "learning_rate": 7.13548126566832e-08, "loss": 0.3851, "step": 30924 }, { "epoch": 0.9478055657717298, "grad_norm": 2.0222659270640793, "learning_rate": 7.127128692751617e-08, "loss": 0.5517, "step": 30925 }, { "epoch": 0.947836214294471, "grad_norm": 1.9046933100695382, "learning_rate": 7.118780976237471e-08, "loss": 0.5652, "step": 30926 }, { "epoch": 0.9478668628172122, "grad_norm": 1.713415068938289, "learning_rate": 7.110438116208096e-08, "loss": 0.5217, "step": 30927 }, { "epoch": 0.9478975113399534, "grad_norm": 0.7864791235168408, "learning_rate": 7.102100112745702e-08, "loss": 0.4105, "step": 30928 }, { "epoch": 0.9479281598626946, "grad_norm": 1.8239389730425977, "learning_rate": 7.093766965932392e-08, "loss": 0.5138, "step": 30929 }, { "epoch": 0.9479588083854358, "grad_norm": 1.8932539615600172, "learning_rate": 7.08543867585032e-08, "loss": 0.5495, "step": 30930 }, { "epoch": 0.947989456908177, "grad_norm": 1.7234908065934391, "learning_rate": 7.077115242581534e-08, "loss": 0.6329, "step": 30931 }, { "epoch": 0.9480201054309182, "grad_norm": 0.7540940178570785, "learning_rate": 7.068796666208078e-08, "loss": 0.3976, "step": 30932 }, { "epoch": 0.9480507539536595, "grad_norm": 1.6873359386112925, "learning_rate": 7.060482946811831e-08, "loss": 0.5074, "step": 30933 }, { "epoch": 0.9480814024764006, "grad_norm": 1.7403046376111138, "learning_rate": 7.052174084474784e-08, "loss": 0.5719, "step": 30934 }, { "epoch": 0.9481120509991419, "grad_norm": 2.0460753621078003, "learning_rate": 7.043870079278869e-08, "loss": 0.4413, "step": 30935 }, { "epoch": 0.948142699521883, "grad_norm": 1.9525797078904414, "learning_rate": 7.035570931305746e-08, "loss": 0.6214, "step": 30936 }, { "epoch": 0.9481733480446243, "grad_norm": 2.194833389933503, "learning_rate": 7.027276640637293e-08, "loss": 0.5751, "step": 30937 }, { "epoch": 0.9482039965673654, "grad_norm": 1.807725351612749, "learning_rate": 7.018987207355276e-08, "loss": 0.5504, "step": 30938 }, { "epoch": 0.9482346450901067, "grad_norm": 1.899799224883368, "learning_rate": 7.010702631541245e-08, "loss": 0.5599, "step": 30939 }, { "epoch": 0.9482652936128478, "grad_norm": 1.8857053942932709, "learning_rate": 7.002422913276907e-08, "loss": 0.5829, "step": 30940 }, { "epoch": 0.9482959421355891, "grad_norm": 2.169263282948279, "learning_rate": 6.994148052643868e-08, "loss": 0.5609, "step": 30941 }, { "epoch": 0.9483265906583302, "grad_norm": 1.90374712713827, "learning_rate": 6.98587804972356e-08, "loss": 0.6053, "step": 30942 }, { "epoch": 0.9483572391810715, "grad_norm": 0.7756941045997442, "learning_rate": 6.977612904597586e-08, "loss": 0.3882, "step": 30943 }, { "epoch": 0.9483878877038127, "grad_norm": 1.9798268754964674, "learning_rate": 6.969352617347325e-08, "loss": 0.6209, "step": 30944 }, { "epoch": 0.9484185362265539, "grad_norm": 1.795736120277724, "learning_rate": 6.961097188054211e-08, "loss": 0.538, "step": 30945 }, { "epoch": 0.9484491847492951, "grad_norm": 2.081140873779094, "learning_rate": 6.952846616799569e-08, "loss": 0.658, "step": 30946 }, { "epoch": 0.9484798332720363, "grad_norm": 1.9175732212172247, "learning_rate": 6.944600903664612e-08, "loss": 0.586, "step": 30947 }, { "epoch": 0.9485104817947775, "grad_norm": 1.9080041228013325, "learning_rate": 6.936360048730718e-08, "loss": 0.5328, "step": 30948 }, { "epoch": 0.9485411303175187, "grad_norm": 0.8079766582255864, "learning_rate": 6.928124052078933e-08, "loss": 0.396, "step": 30949 }, { "epoch": 0.9485717788402599, "grad_norm": 2.0843425687532497, "learning_rate": 6.919892913790582e-08, "loss": 0.5335, "step": 30950 }, { "epoch": 0.9486024273630012, "grad_norm": 1.8435035984495656, "learning_rate": 6.911666633946712e-08, "loss": 0.5158, "step": 30951 }, { "epoch": 0.9486330758857423, "grad_norm": 1.7399627944119658, "learning_rate": 6.903445212628257e-08, "loss": 0.5821, "step": 30952 }, { "epoch": 0.9486637244084836, "grad_norm": 2.271402905568892, "learning_rate": 6.895228649916374e-08, "loss": 0.4477, "step": 30953 }, { "epoch": 0.9486943729312247, "grad_norm": 1.8455645320389253, "learning_rate": 6.887016945892e-08, "loss": 0.6207, "step": 30954 }, { "epoch": 0.9487250214539659, "grad_norm": 2.133341446383731, "learning_rate": 6.878810100635958e-08, "loss": 0.5697, "step": 30955 }, { "epoch": 0.9487556699767071, "grad_norm": 1.8595570260570555, "learning_rate": 6.870608114229183e-08, "loss": 0.5306, "step": 30956 }, { "epoch": 0.9487863184994483, "grad_norm": 2.2305397356077106, "learning_rate": 6.8624109867525e-08, "loss": 0.5896, "step": 30957 }, { "epoch": 0.9488169670221895, "grad_norm": 2.083471876513204, "learning_rate": 6.854218718286676e-08, "loss": 0.5852, "step": 30958 }, { "epoch": 0.9488476155449307, "grad_norm": 2.161757106052577, "learning_rate": 6.846031308912371e-08, "loss": 0.5445, "step": 30959 }, { "epoch": 0.948878264067672, "grad_norm": 1.8471246173942282, "learning_rate": 6.837848758710241e-08, "loss": 0.5471, "step": 30960 }, { "epoch": 0.9489089125904131, "grad_norm": 1.8305356322877953, "learning_rate": 6.829671067761112e-08, "loss": 0.5972, "step": 30961 }, { "epoch": 0.9489395611131544, "grad_norm": 1.7260542462563702, "learning_rate": 6.821498236145363e-08, "loss": 0.4882, "step": 30962 }, { "epoch": 0.9489702096358955, "grad_norm": 0.7910997959005236, "learning_rate": 6.81333026394354e-08, "loss": 0.3978, "step": 30963 }, { "epoch": 0.9490008581586368, "grad_norm": 1.830249857125107, "learning_rate": 6.805167151236137e-08, "loss": 0.5393, "step": 30964 }, { "epoch": 0.9490315066813779, "grad_norm": 0.8003328872114168, "learning_rate": 6.797008898103697e-08, "loss": 0.3977, "step": 30965 }, { "epoch": 0.9490621552041192, "grad_norm": 1.8825450054191388, "learning_rate": 6.788855504626435e-08, "loss": 0.501, "step": 30966 }, { "epoch": 0.9490928037268603, "grad_norm": 2.217163769265625, "learning_rate": 6.780706970884788e-08, "loss": 0.5751, "step": 30967 }, { "epoch": 0.9491234522496016, "grad_norm": 1.7178583763258921, "learning_rate": 6.772563296959079e-08, "loss": 0.5185, "step": 30968 }, { "epoch": 0.9491541007723427, "grad_norm": 1.8307502402391194, "learning_rate": 6.764424482929465e-08, "loss": 0.6054, "step": 30969 }, { "epoch": 0.949184749295084, "grad_norm": 1.9632160129701366, "learning_rate": 6.75629052887622e-08, "loss": 0.5561, "step": 30970 }, { "epoch": 0.9492153978178252, "grad_norm": 1.932199502874756, "learning_rate": 6.748161434879386e-08, "loss": 0.5892, "step": 30971 }, { "epoch": 0.9492460463405664, "grad_norm": 0.8446718609640425, "learning_rate": 6.740037201019179e-08, "loss": 0.407, "step": 30972 }, { "epoch": 0.9492766948633076, "grad_norm": 1.995512408006633, "learning_rate": 6.731917827375589e-08, "loss": 0.5611, "step": 30973 }, { "epoch": 0.9493073433860488, "grad_norm": 1.8943066615268749, "learning_rate": 6.723803314028554e-08, "loss": 0.5823, "step": 30974 }, { "epoch": 0.94933799190879, "grad_norm": 1.8314671316827118, "learning_rate": 6.71569366105812e-08, "loss": 0.6238, "step": 30975 }, { "epoch": 0.9493686404315312, "grad_norm": 1.96539563577945, "learning_rate": 6.707588868544168e-08, "loss": 0.5716, "step": 30976 }, { "epoch": 0.9493992889542724, "grad_norm": 2.0368358203910644, "learning_rate": 6.699488936566634e-08, "loss": 0.6115, "step": 30977 }, { "epoch": 0.9494299374770137, "grad_norm": 1.9182855582734288, "learning_rate": 6.691393865205176e-08, "loss": 0.6033, "step": 30978 }, { "epoch": 0.9494605859997548, "grad_norm": 1.9747721068842137, "learning_rate": 6.683303654539619e-08, "loss": 0.5834, "step": 30979 }, { "epoch": 0.9494912345224961, "grad_norm": 2.0449132363171048, "learning_rate": 6.675218304649733e-08, "loss": 0.5256, "step": 30980 }, { "epoch": 0.9495218830452372, "grad_norm": 1.7772363688634416, "learning_rate": 6.667137815615176e-08, "loss": 0.5108, "step": 30981 }, { "epoch": 0.9495525315679785, "grad_norm": 1.7716774812246265, "learning_rate": 6.659062187515498e-08, "loss": 0.6101, "step": 30982 }, { "epoch": 0.9495831800907196, "grad_norm": 2.286655431635491, "learning_rate": 6.650991420430241e-08, "loss": 0.6775, "step": 30983 }, { "epoch": 0.9496138286134609, "grad_norm": 1.9133913370253293, "learning_rate": 6.642925514439125e-08, "loss": 0.5754, "step": 30984 }, { "epoch": 0.949644477136202, "grad_norm": 1.7205987478948466, "learning_rate": 6.634864469621361e-08, "loss": 0.5068, "step": 30985 }, { "epoch": 0.9496751256589432, "grad_norm": 2.0095703376451466, "learning_rate": 6.626808286056607e-08, "loss": 0.5826, "step": 30986 }, { "epoch": 0.9497057741816844, "grad_norm": 1.912573108539849, "learning_rate": 6.61875696382408e-08, "loss": 0.6541, "step": 30987 }, { "epoch": 0.9497364227044256, "grad_norm": 1.691961981139282, "learning_rate": 6.610710503003214e-08, "loss": 0.5933, "step": 30988 }, { "epoch": 0.9497670712271669, "grad_norm": 1.7836960485076938, "learning_rate": 6.602668903673226e-08, "loss": 0.4789, "step": 30989 }, { "epoch": 0.949797719749908, "grad_norm": 0.8421656071778608, "learning_rate": 6.59463216591344e-08, "loss": 0.384, "step": 30990 }, { "epoch": 0.9498283682726493, "grad_norm": 1.816348830287691, "learning_rate": 6.586600289802958e-08, "loss": 0.495, "step": 30991 }, { "epoch": 0.9498590167953904, "grad_norm": 1.7653982642977681, "learning_rate": 6.578573275420941e-08, "loss": 0.5205, "step": 30992 }, { "epoch": 0.9498896653181317, "grad_norm": 1.8622976601196832, "learning_rate": 6.570551122846491e-08, "loss": 0.4979, "step": 30993 }, { "epoch": 0.9499203138408728, "grad_norm": 1.747613933883945, "learning_rate": 6.562533832158657e-08, "loss": 0.4633, "step": 30994 }, { "epoch": 0.9499509623636141, "grad_norm": 1.8703827935678294, "learning_rate": 6.554521403436376e-08, "loss": 0.6107, "step": 30995 }, { "epoch": 0.9499816108863552, "grad_norm": 1.9972685889635684, "learning_rate": 6.54651383675875e-08, "loss": 0.6011, "step": 30996 }, { "epoch": 0.9500122594090965, "grad_norm": 1.960110998492348, "learning_rate": 6.538511132204495e-08, "loss": 0.4632, "step": 30997 }, { "epoch": 0.9500429079318377, "grad_norm": 1.7674775134321294, "learning_rate": 6.530513289852603e-08, "loss": 0.5624, "step": 30998 }, { "epoch": 0.9500735564545789, "grad_norm": 2.071699323365764, "learning_rate": 6.522520309781788e-08, "loss": 0.6153, "step": 30999 }, { "epoch": 0.9501042049773201, "grad_norm": 1.832615523657835, "learning_rate": 6.514532192070876e-08, "loss": 0.5555, "step": 31000 }, { "epoch": 0.9501348535000613, "grad_norm": 0.802354482821107, "learning_rate": 6.506548936798474e-08, "loss": 0.3963, "step": 31001 }, { "epoch": 0.9501655020228025, "grad_norm": 1.8405848757399657, "learning_rate": 6.498570544043348e-08, "loss": 0.6113, "step": 31002 }, { "epoch": 0.9501961505455437, "grad_norm": 1.8835500770640923, "learning_rate": 6.490597013884103e-08, "loss": 0.6182, "step": 31003 }, { "epoch": 0.9502267990682849, "grad_norm": 1.9299958584539838, "learning_rate": 6.482628346399289e-08, "loss": 0.5897, "step": 31004 }, { "epoch": 0.9502574475910261, "grad_norm": 1.9433412287572038, "learning_rate": 6.474664541667341e-08, "loss": 0.5851, "step": 31005 }, { "epoch": 0.9502880961137673, "grad_norm": 1.8288298172233342, "learning_rate": 6.466705599766809e-08, "loss": 0.6184, "step": 31006 }, { "epoch": 0.9503187446365086, "grad_norm": 1.9470633715217385, "learning_rate": 6.45875152077613e-08, "loss": 0.6116, "step": 31007 }, { "epoch": 0.9503493931592497, "grad_norm": 2.1242578420801723, "learning_rate": 6.450802304773629e-08, "loss": 0.6038, "step": 31008 }, { "epoch": 0.950380041681991, "grad_norm": 1.835844175364146, "learning_rate": 6.442857951837689e-08, "loss": 0.5406, "step": 31009 }, { "epoch": 0.9504106902047321, "grad_norm": 1.9308708320961614, "learning_rate": 6.434918462046525e-08, "loss": 0.6428, "step": 31010 }, { "epoch": 0.9504413387274734, "grad_norm": 0.8320531466493298, "learning_rate": 6.426983835478462e-08, "loss": 0.4036, "step": 31011 }, { "epoch": 0.9504719872502145, "grad_norm": 1.7218169715464302, "learning_rate": 6.419054072211494e-08, "loss": 0.5694, "step": 31012 }, { "epoch": 0.9505026357729558, "grad_norm": 2.0701249718474273, "learning_rate": 6.411129172323949e-08, "loss": 0.6513, "step": 31013 }, { "epoch": 0.950533284295697, "grad_norm": 2.0037402225703005, "learning_rate": 6.403209135893818e-08, "loss": 0.4572, "step": 31014 }, { "epoch": 0.9505639328184382, "grad_norm": 1.8786632811414756, "learning_rate": 6.39529396299915e-08, "loss": 0.5835, "step": 31015 }, { "epoch": 0.9505945813411794, "grad_norm": 0.7747344251682287, "learning_rate": 6.387383653717938e-08, "loss": 0.3848, "step": 31016 }, { "epoch": 0.9506252298639205, "grad_norm": 1.9596668787246232, "learning_rate": 6.379478208128176e-08, "loss": 0.5876, "step": 31017 }, { "epoch": 0.9506558783866618, "grad_norm": 1.7352617511793205, "learning_rate": 6.37157762630769e-08, "loss": 0.5904, "step": 31018 }, { "epoch": 0.9506865269094029, "grad_norm": 1.9504051482479245, "learning_rate": 6.363681908334307e-08, "loss": 0.575, "step": 31019 }, { "epoch": 0.9507171754321442, "grad_norm": 2.0689224778241457, "learning_rate": 6.355791054285908e-08, "loss": 0.7093, "step": 31020 }, { "epoch": 0.9507478239548853, "grad_norm": 1.8299219504960893, "learning_rate": 6.347905064240211e-08, "loss": 0.4924, "step": 31021 }, { "epoch": 0.9507784724776266, "grad_norm": 1.868014880782125, "learning_rate": 6.340023938274931e-08, "loss": 0.4062, "step": 31022 }, { "epoch": 0.9508091210003677, "grad_norm": 2.0076874136692746, "learning_rate": 6.332147676467671e-08, "loss": 0.5619, "step": 31023 }, { "epoch": 0.950839769523109, "grad_norm": 2.007038905246689, "learning_rate": 6.324276278896091e-08, "loss": 0.6555, "step": 31024 }, { "epoch": 0.9508704180458502, "grad_norm": 1.7560521496670727, "learning_rate": 6.316409745637686e-08, "loss": 0.5072, "step": 31025 }, { "epoch": 0.9509010665685914, "grad_norm": 0.8050330118665351, "learning_rate": 6.30854807677006e-08, "loss": 0.3877, "step": 31026 }, { "epoch": 0.9509317150913326, "grad_norm": 1.8094012513575801, "learning_rate": 6.300691272370596e-08, "loss": 0.5221, "step": 31027 }, { "epoch": 0.9509623636140738, "grad_norm": 1.8761387263781946, "learning_rate": 6.292839332516731e-08, "loss": 0.5663, "step": 31028 }, { "epoch": 0.950993012136815, "grad_norm": 1.8339260464808116, "learning_rate": 6.284992257285904e-08, "loss": 0.6023, "step": 31029 }, { "epoch": 0.9510236606595562, "grad_norm": 2.0153179685599634, "learning_rate": 6.277150046755276e-08, "loss": 0.5613, "step": 31030 }, { "epoch": 0.9510543091822974, "grad_norm": 1.8279836006186863, "learning_rate": 6.269312701002284e-08, "loss": 0.6227, "step": 31031 }, { "epoch": 0.9510849577050386, "grad_norm": 2.008754418004958, "learning_rate": 6.261480220104088e-08, "loss": 0.5243, "step": 31032 }, { "epoch": 0.9511156062277798, "grad_norm": 1.9206489658038177, "learning_rate": 6.253652604137794e-08, "loss": 0.5542, "step": 31033 }, { "epoch": 0.9511462547505211, "grad_norm": 1.9763262766370053, "learning_rate": 6.245829853180618e-08, "loss": 0.6372, "step": 31034 }, { "epoch": 0.9511769032732622, "grad_norm": 2.058583401506986, "learning_rate": 6.23801196730961e-08, "loss": 0.5356, "step": 31035 }, { "epoch": 0.9512075517960035, "grad_norm": 1.852099565445736, "learning_rate": 6.230198946601818e-08, "loss": 0.5698, "step": 31036 }, { "epoch": 0.9512382003187446, "grad_norm": 1.8479511263721071, "learning_rate": 6.222390791134236e-08, "loss": 0.566, "step": 31037 }, { "epoch": 0.9512688488414859, "grad_norm": 0.8400452890405624, "learning_rate": 6.214587500983748e-08, "loss": 0.4006, "step": 31038 }, { "epoch": 0.951299497364227, "grad_norm": 2.0262002870689257, "learning_rate": 6.206789076227238e-08, "loss": 0.5795, "step": 31039 }, { "epoch": 0.9513301458869683, "grad_norm": 2.1133823789338475, "learning_rate": 6.198995516941642e-08, "loss": 0.5189, "step": 31040 }, { "epoch": 0.9513607944097094, "grad_norm": 1.9936648454900678, "learning_rate": 6.191206823203622e-08, "loss": 0.5532, "step": 31041 }, { "epoch": 0.9513914429324507, "grad_norm": 2.087665539403235, "learning_rate": 6.183422995090005e-08, "loss": 0.517, "step": 31042 }, { "epoch": 0.9514220914551919, "grad_norm": 1.902723614277879, "learning_rate": 6.175644032677508e-08, "loss": 0.5695, "step": 31043 }, { "epoch": 0.9514527399779331, "grad_norm": 1.9063919504154494, "learning_rate": 6.167869936042737e-08, "loss": 0.5435, "step": 31044 }, { "epoch": 0.9514833885006743, "grad_norm": 1.7402627452890154, "learning_rate": 6.160100705262295e-08, "loss": 0.586, "step": 31045 }, { "epoch": 0.9515140370234155, "grad_norm": 2.067613167009254, "learning_rate": 6.152336340412679e-08, "loss": 0.5768, "step": 31046 }, { "epoch": 0.9515446855461567, "grad_norm": 0.781311312447566, "learning_rate": 6.144576841570494e-08, "loss": 0.383, "step": 31047 }, { "epoch": 0.9515753340688978, "grad_norm": 1.826525788939158, "learning_rate": 6.136822208812121e-08, "loss": 0.524, "step": 31048 }, { "epoch": 0.9516059825916391, "grad_norm": 2.0481566262012842, "learning_rate": 6.129072442214057e-08, "loss": 0.6792, "step": 31049 }, { "epoch": 0.9516366311143802, "grad_norm": 1.8390826793522845, "learning_rate": 6.121327541852517e-08, "loss": 0.5788, "step": 31050 }, { "epoch": 0.9516672796371215, "grad_norm": 1.8940768673581785, "learning_rate": 6.113587507803997e-08, "loss": 0.5088, "step": 31051 }, { "epoch": 0.9516979281598626, "grad_norm": 2.005456402174599, "learning_rate": 6.105852340144602e-08, "loss": 0.5993, "step": 31052 }, { "epoch": 0.9517285766826039, "grad_norm": 2.0325946095112664, "learning_rate": 6.098122038950605e-08, "loss": 0.5115, "step": 31053 }, { "epoch": 0.9517592252053451, "grad_norm": 2.0060673212206956, "learning_rate": 6.090396604298276e-08, "loss": 0.5338, "step": 31054 }, { "epoch": 0.9517898737280863, "grad_norm": 1.9228095648519468, "learning_rate": 6.082676036263558e-08, "loss": 0.5379, "step": 31055 }, { "epoch": 0.9518205222508275, "grad_norm": 1.8670215378640018, "learning_rate": 6.074960334922609e-08, "loss": 0.5503, "step": 31056 }, { "epoch": 0.9518511707735687, "grad_norm": 1.8504635812718875, "learning_rate": 6.06724950035148e-08, "loss": 0.48, "step": 31057 }, { "epoch": 0.9518818192963099, "grad_norm": 1.9712118115218251, "learning_rate": 6.059543532626111e-08, "loss": 0.5447, "step": 31058 }, { "epoch": 0.9519124678190511, "grad_norm": 1.7833290845162533, "learning_rate": 6.051842431822442e-08, "loss": 0.5187, "step": 31059 }, { "epoch": 0.9519431163417923, "grad_norm": 1.9205089901049668, "learning_rate": 6.044146198016299e-08, "loss": 0.5707, "step": 31060 }, { "epoch": 0.9519737648645336, "grad_norm": 1.7861346172075763, "learning_rate": 6.036454831283623e-08, "loss": 0.5363, "step": 31061 }, { "epoch": 0.9520044133872747, "grad_norm": 0.7846767547662303, "learning_rate": 6.02876833170013e-08, "loss": 0.3997, "step": 31062 }, { "epoch": 0.952035061910016, "grad_norm": 1.9006203954691379, "learning_rate": 6.021086699341594e-08, "loss": 0.534, "step": 31063 }, { "epoch": 0.9520657104327571, "grad_norm": 2.2088006447274906, "learning_rate": 6.01340993428362e-08, "loss": 0.6445, "step": 31064 }, { "epoch": 0.9520963589554984, "grad_norm": 2.0260558665701787, "learning_rate": 6.005738036601982e-08, "loss": 0.4961, "step": 31065 }, { "epoch": 0.9521270074782395, "grad_norm": 2.074656902266161, "learning_rate": 5.998071006372175e-08, "loss": 0.6014, "step": 31066 }, { "epoch": 0.9521576560009808, "grad_norm": 1.8119962248881556, "learning_rate": 5.990408843669803e-08, "loss": 0.6162, "step": 31067 }, { "epoch": 0.9521883045237219, "grad_norm": 1.9460433207418049, "learning_rate": 5.982751548570253e-08, "loss": 0.6043, "step": 31068 }, { "epoch": 0.9522189530464632, "grad_norm": 0.797124795091327, "learning_rate": 5.975099121149074e-08, "loss": 0.3884, "step": 31069 }, { "epoch": 0.9522496015692044, "grad_norm": 1.692668660345055, "learning_rate": 5.967451561481708e-08, "loss": 0.5097, "step": 31070 }, { "epoch": 0.9522802500919456, "grad_norm": 1.9625769135145283, "learning_rate": 5.959808869643369e-08, "loss": 0.5659, "step": 31071 }, { "epoch": 0.9523108986146868, "grad_norm": 2.197697746181491, "learning_rate": 5.952171045709443e-08, "loss": 0.5457, "step": 31072 }, { "epoch": 0.952341547137428, "grad_norm": 0.7965830724362266, "learning_rate": 5.944538089755258e-08, "loss": 0.3938, "step": 31073 }, { "epoch": 0.9523721956601692, "grad_norm": 1.844336323519031, "learning_rate": 5.936910001855867e-08, "loss": 0.6779, "step": 31074 }, { "epoch": 0.9524028441829104, "grad_norm": 1.9051926626011686, "learning_rate": 5.929286782086541e-08, "loss": 0.6533, "step": 31075 }, { "epoch": 0.9524334927056516, "grad_norm": 1.9675252540873345, "learning_rate": 5.921668430522387e-08, "loss": 0.5146, "step": 31076 }, { "epoch": 0.9524641412283928, "grad_norm": 1.8008685792023778, "learning_rate": 5.914054947238457e-08, "loss": 0.5672, "step": 31077 }, { "epoch": 0.952494789751134, "grad_norm": 0.8196438923458761, "learning_rate": 5.906446332309745e-08, "loss": 0.3845, "step": 31078 }, { "epoch": 0.9525254382738751, "grad_norm": 0.7848585809042018, "learning_rate": 5.898842585811193e-08, "loss": 0.4039, "step": 31079 }, { "epoch": 0.9525560867966164, "grad_norm": 1.907456166382401, "learning_rate": 5.8912437078177953e-08, "loss": 0.5645, "step": 31080 }, { "epoch": 0.9525867353193576, "grad_norm": 1.857613192702058, "learning_rate": 5.883649698404437e-08, "loss": 0.6156, "step": 31081 }, { "epoch": 0.9526173838420988, "grad_norm": 2.0291254805430263, "learning_rate": 5.876060557645835e-08, "loss": 0.606, "step": 31082 }, { "epoch": 0.95264803236484, "grad_norm": 2.0335478575819605, "learning_rate": 5.8684762856168756e-08, "loss": 0.5492, "step": 31083 }, { "epoch": 0.9526786808875812, "grad_norm": 2.169274481255191, "learning_rate": 5.8608968823922754e-08, "loss": 0.5384, "step": 31084 }, { "epoch": 0.9527093294103224, "grad_norm": 1.8288805319527868, "learning_rate": 5.8533223480466417e-08, "loss": 0.602, "step": 31085 }, { "epoch": 0.9527399779330636, "grad_norm": 2.059098085448526, "learning_rate": 5.845752682654693e-08, "loss": 0.5563, "step": 31086 }, { "epoch": 0.9527706264558048, "grad_norm": 1.954932115656238, "learning_rate": 5.838187886290925e-08, "loss": 0.5967, "step": 31087 }, { "epoch": 0.952801274978546, "grad_norm": 1.875466146158423, "learning_rate": 5.8306279590299444e-08, "loss": 0.5248, "step": 31088 }, { "epoch": 0.9528319235012872, "grad_norm": 1.7371303706835346, "learning_rate": 5.823072900946303e-08, "loss": 0.5814, "step": 31089 }, { "epoch": 0.9528625720240285, "grad_norm": 2.306863401285187, "learning_rate": 5.815522712114274e-08, "loss": 0.6961, "step": 31090 }, { "epoch": 0.9528932205467696, "grad_norm": 2.0248128066905924, "learning_rate": 5.8079773926083546e-08, "loss": 0.609, "step": 31091 }, { "epoch": 0.9529238690695109, "grad_norm": 1.7910235337263625, "learning_rate": 5.80043694250293e-08, "loss": 0.5233, "step": 31092 }, { "epoch": 0.952954517592252, "grad_norm": 1.9611787440426234, "learning_rate": 5.792901361872216e-08, "loss": 0.4334, "step": 31093 }, { "epoch": 0.9529851661149933, "grad_norm": 2.074080582795116, "learning_rate": 5.7853706507904337e-08, "loss": 0.5817, "step": 31094 }, { "epoch": 0.9530158146377344, "grad_norm": 3.1595968395183167, "learning_rate": 5.7778448093319115e-08, "loss": 0.5071, "step": 31095 }, { "epoch": 0.9530464631604757, "grad_norm": 2.586010802845303, "learning_rate": 5.770323837570757e-08, "loss": 0.5847, "step": 31096 }, { "epoch": 0.9530771116832168, "grad_norm": 1.993772705859699, "learning_rate": 5.762807735581022e-08, "loss": 0.5403, "step": 31097 }, { "epoch": 0.9531077602059581, "grad_norm": 1.8952997982920012, "learning_rate": 5.755296503436758e-08, "loss": 0.5854, "step": 31098 }, { "epoch": 0.9531384087286993, "grad_norm": 1.869001861914134, "learning_rate": 5.747790141212073e-08, "loss": 0.4147, "step": 31099 }, { "epoch": 0.9531690572514405, "grad_norm": 1.9184625044500627, "learning_rate": 5.7402886489809075e-08, "loss": 0.5306, "step": 31100 }, { "epoch": 0.9531997057741817, "grad_norm": 1.7919086946565392, "learning_rate": 5.7327920268170356e-08, "loss": 0.556, "step": 31101 }, { "epoch": 0.9532303542969229, "grad_norm": 1.7446791192566908, "learning_rate": 5.725300274794454e-08, "loss": 0.5174, "step": 31102 }, { "epoch": 0.9532610028196641, "grad_norm": 1.9895491465998538, "learning_rate": 5.717813392986993e-08, "loss": 0.5224, "step": 31103 }, { "epoch": 0.9532916513424053, "grad_norm": 2.2210647154682612, "learning_rate": 5.71033138146837e-08, "loss": 0.5798, "step": 31104 }, { "epoch": 0.9533222998651465, "grad_norm": 0.7968411758965319, "learning_rate": 5.702854240312361e-08, "loss": 0.4125, "step": 31105 }, { "epoch": 0.9533529483878878, "grad_norm": 2.090872550356026, "learning_rate": 5.6953819695925175e-08, "loss": 0.5497, "step": 31106 }, { "epoch": 0.9533835969106289, "grad_norm": 1.9758055570971245, "learning_rate": 5.6879145693826133e-08, "loss": 0.5633, "step": 31107 }, { "epoch": 0.9534142454333702, "grad_norm": 2.1024542287455583, "learning_rate": 5.6804520397561456e-08, "loss": 0.5805, "step": 31108 }, { "epoch": 0.9534448939561113, "grad_norm": 2.036131988160263, "learning_rate": 5.672994380786667e-08, "loss": 0.6148, "step": 31109 }, { "epoch": 0.9534755424788525, "grad_norm": 1.7578371579045826, "learning_rate": 5.665541592547619e-08, "loss": 0.5406, "step": 31110 }, { "epoch": 0.9535061910015937, "grad_norm": 1.85938482859866, "learning_rate": 5.6580936751125526e-08, "loss": 0.5264, "step": 31111 }, { "epoch": 0.9535368395243349, "grad_norm": 2.238173830910778, "learning_rate": 5.650650628554688e-08, "loss": 0.5406, "step": 31112 }, { "epoch": 0.9535674880470761, "grad_norm": 1.824168058503052, "learning_rate": 5.643212452947466e-08, "loss": 0.5605, "step": 31113 }, { "epoch": 0.9535981365698173, "grad_norm": 1.7607520215265737, "learning_rate": 5.635779148364162e-08, "loss": 0.549, "step": 31114 }, { "epoch": 0.9536287850925586, "grad_norm": 1.8301763351362053, "learning_rate": 5.6283507148780505e-08, "loss": 0.5386, "step": 31115 }, { "epoch": 0.9536594336152997, "grad_norm": 0.8823648224475552, "learning_rate": 5.6209271525622385e-08, "loss": 0.386, "step": 31116 }, { "epoch": 0.953690082138041, "grad_norm": 1.9338156687424983, "learning_rate": 5.613508461489947e-08, "loss": 0.6354, "step": 31117 }, { "epoch": 0.9537207306607821, "grad_norm": 1.930339678849446, "learning_rate": 5.6060946417342276e-08, "loss": 0.5641, "step": 31118 }, { "epoch": 0.9537513791835234, "grad_norm": 1.9303872257894779, "learning_rate": 5.598685693368189e-08, "loss": 0.5819, "step": 31119 }, { "epoch": 0.9537820277062645, "grad_norm": 1.996183718324916, "learning_rate": 5.591281616464772e-08, "loss": 0.5197, "step": 31120 }, { "epoch": 0.9538126762290058, "grad_norm": 1.8219130343138845, "learning_rate": 5.5838824110969745e-08, "loss": 0.601, "step": 31121 }, { "epoch": 0.9538433247517469, "grad_norm": 2.1550266949234103, "learning_rate": 5.5764880773376826e-08, "loss": 0.5333, "step": 31122 }, { "epoch": 0.9538739732744882, "grad_norm": 1.756523129563703, "learning_rate": 5.5690986152597824e-08, "loss": 0.5715, "step": 31123 }, { "epoch": 0.9539046217972293, "grad_norm": 0.7869542990034883, "learning_rate": 5.5617140249359934e-08, "loss": 0.4165, "step": 31124 }, { "epoch": 0.9539352703199706, "grad_norm": 2.0291384466525564, "learning_rate": 5.554334306439202e-08, "loss": 0.6312, "step": 31125 }, { "epoch": 0.9539659188427118, "grad_norm": 2.028368018096644, "learning_rate": 5.5469594598420164e-08, "loss": 0.482, "step": 31126 }, { "epoch": 0.953996567365453, "grad_norm": 1.8902693213128143, "learning_rate": 5.5395894852172116e-08, "loss": 0.5519, "step": 31127 }, { "epoch": 0.9540272158881942, "grad_norm": 1.8672432636863898, "learning_rate": 5.532224382637286e-08, "loss": 0.6085, "step": 31128 }, { "epoch": 0.9540578644109354, "grad_norm": 1.7656914898424498, "learning_rate": 5.5248641521749024e-08, "loss": 0.5816, "step": 31129 }, { "epoch": 0.9540885129336766, "grad_norm": 1.8323443598523719, "learning_rate": 5.5175087939025596e-08, "loss": 0.602, "step": 31130 }, { "epoch": 0.9541191614564178, "grad_norm": 0.7888386915907795, "learning_rate": 5.510158307892699e-08, "loss": 0.3906, "step": 31131 }, { "epoch": 0.954149809979159, "grad_norm": 1.9135759409151498, "learning_rate": 5.5028126942177626e-08, "loss": 0.589, "step": 31132 }, { "epoch": 0.9541804585019003, "grad_norm": 2.1369157321176155, "learning_rate": 5.4954719529501376e-08, "loss": 0.4514, "step": 31133 }, { "epoch": 0.9542111070246414, "grad_norm": 2.0112440690830726, "learning_rate": 5.488136084162155e-08, "loss": 0.5852, "step": 31134 }, { "epoch": 0.9542417555473827, "grad_norm": 1.741679598543246, "learning_rate": 5.480805087926089e-08, "loss": 0.6029, "step": 31135 }, { "epoch": 0.9542724040701238, "grad_norm": 2.078365595412863, "learning_rate": 5.473478964314216e-08, "loss": 0.6343, "step": 31136 }, { "epoch": 0.9543030525928651, "grad_norm": 2.0178327458164547, "learning_rate": 5.4661577133986455e-08, "loss": 0.4433, "step": 31137 }, { "epoch": 0.9543337011156062, "grad_norm": 1.7550600978854487, "learning_rate": 5.458841335251597e-08, "loss": 0.5868, "step": 31138 }, { "epoch": 0.9543643496383475, "grad_norm": 1.9755268517908857, "learning_rate": 5.4515298299450126e-08, "loss": 0.6165, "step": 31139 }, { "epoch": 0.9543949981610886, "grad_norm": 2.065988198996507, "learning_rate": 5.444223197551168e-08, "loss": 0.7032, "step": 31140 }, { "epoch": 0.9544256466838298, "grad_norm": 2.2909391910978387, "learning_rate": 5.43692143814184e-08, "loss": 0.6394, "step": 31141 }, { "epoch": 0.954456295206571, "grad_norm": 1.5605945640567418, "learning_rate": 5.429624551789136e-08, "loss": 0.4382, "step": 31142 }, { "epoch": 0.9544869437293122, "grad_norm": 1.7646054496587025, "learning_rate": 5.422332538564834e-08, "loss": 0.5624, "step": 31143 }, { "epoch": 0.9545175922520535, "grad_norm": 2.0833544761436777, "learning_rate": 5.4150453985408194e-08, "loss": 0.6636, "step": 31144 }, { "epoch": 0.9545482407747946, "grad_norm": 1.8563616001334484, "learning_rate": 5.407763131788979e-08, "loss": 0.6071, "step": 31145 }, { "epoch": 0.9545788892975359, "grad_norm": 2.049121962196826, "learning_rate": 5.400485738380923e-08, "loss": 0.6287, "step": 31146 }, { "epoch": 0.954609537820277, "grad_norm": 1.8329161990944156, "learning_rate": 5.393213218388482e-08, "loss": 0.5529, "step": 31147 }, { "epoch": 0.9546401863430183, "grad_norm": 2.039588215249282, "learning_rate": 5.3859455718832667e-08, "loss": 0.4983, "step": 31148 }, { "epoch": 0.9546708348657594, "grad_norm": 0.7864410900202188, "learning_rate": 5.3786827989368296e-08, "loss": 0.3918, "step": 31149 }, { "epoch": 0.9547014833885007, "grad_norm": 1.9037858956113625, "learning_rate": 5.3714248996207804e-08, "loss": 0.4975, "step": 31150 }, { "epoch": 0.9547321319112418, "grad_norm": 1.995018671333513, "learning_rate": 5.364171874006674e-08, "loss": 0.528, "step": 31151 }, { "epoch": 0.9547627804339831, "grad_norm": 1.9500049773958852, "learning_rate": 5.3569237221659523e-08, "loss": 0.6036, "step": 31152 }, { "epoch": 0.9547934289567243, "grad_norm": 0.8455106889877346, "learning_rate": 5.3496804441700024e-08, "loss": 0.396, "step": 31153 }, { "epoch": 0.9548240774794655, "grad_norm": 1.7270561671356162, "learning_rate": 5.342442040090212e-08, "loss": 0.6222, "step": 31154 }, { "epoch": 0.9548547260022067, "grad_norm": 1.7475516095387695, "learning_rate": 5.335208509997858e-08, "loss": 0.5974, "step": 31155 }, { "epoch": 0.9548853745249479, "grad_norm": 1.9652541049267507, "learning_rate": 5.327979853964327e-08, "loss": 0.5509, "step": 31156 }, { "epoch": 0.9549160230476891, "grad_norm": 1.952734610135969, "learning_rate": 5.320756072060784e-08, "loss": 0.4975, "step": 31157 }, { "epoch": 0.9549466715704303, "grad_norm": 1.7676611551255348, "learning_rate": 5.31353716435834e-08, "loss": 0.5133, "step": 31158 }, { "epoch": 0.9549773200931715, "grad_norm": 1.9475678692489007, "learning_rate": 5.3063231309282706e-08, "loss": 0.5774, "step": 31159 }, { "epoch": 0.9550079686159128, "grad_norm": 1.5879306341931745, "learning_rate": 5.299113971841463e-08, "loss": 0.6703, "step": 31160 }, { "epoch": 0.9550386171386539, "grad_norm": 1.8337489225819221, "learning_rate": 5.291909687169139e-08, "loss": 0.4857, "step": 31161 }, { "epoch": 0.9550692656613952, "grad_norm": 2.093396595334691, "learning_rate": 5.2847102769821854e-08, "loss": 0.561, "step": 31162 }, { "epoch": 0.9550999141841363, "grad_norm": 1.7419453594852001, "learning_rate": 5.2775157413515464e-08, "loss": 0.5998, "step": 31163 }, { "epoch": 0.9551305627068776, "grad_norm": 2.0423229727461316, "learning_rate": 5.2703260803481645e-08, "loss": 0.5963, "step": 31164 }, { "epoch": 0.9551612112296187, "grad_norm": 1.9855000360704937, "learning_rate": 5.263141294042817e-08, "loss": 0.5818, "step": 31165 }, { "epoch": 0.95519185975236, "grad_norm": 1.9449960597596117, "learning_rate": 5.2559613825062806e-08, "loss": 0.605, "step": 31166 }, { "epoch": 0.9552225082751011, "grad_norm": 2.0115500608586667, "learning_rate": 5.2487863458093867e-08, "loss": 0.5611, "step": 31167 }, { "epoch": 0.9552531567978424, "grad_norm": 1.7469019806808161, "learning_rate": 5.2416161840228016e-08, "loss": 0.5429, "step": 31168 }, { "epoch": 0.9552838053205835, "grad_norm": 1.716740329641963, "learning_rate": 5.234450897217136e-08, "loss": 0.5289, "step": 31169 }, { "epoch": 0.9553144538433248, "grad_norm": 0.7962620450843126, "learning_rate": 5.227290485462999e-08, "loss": 0.3923, "step": 31170 }, { "epoch": 0.955345102366066, "grad_norm": 1.7894871927752893, "learning_rate": 5.2201349488310015e-08, "loss": 0.4912, "step": 31171 }, { "epoch": 0.9553757508888071, "grad_norm": 1.7981717888734978, "learning_rate": 5.212984287391587e-08, "loss": 0.464, "step": 31172 }, { "epoch": 0.9554063994115484, "grad_norm": 1.742494383723993, "learning_rate": 5.205838501215254e-08, "loss": 0.5444, "step": 31173 }, { "epoch": 0.9554370479342895, "grad_norm": 2.021068948398119, "learning_rate": 5.1986975903723926e-08, "loss": 0.604, "step": 31174 }, { "epoch": 0.9554676964570308, "grad_norm": 1.8549419065678394, "learning_rate": 5.191561554933333e-08, "loss": 0.6041, "step": 31175 }, { "epoch": 0.9554983449797719, "grad_norm": 2.0266006345222385, "learning_rate": 5.184430394968465e-08, "loss": 0.6134, "step": 31176 }, { "epoch": 0.9555289935025132, "grad_norm": 1.9020546725611756, "learning_rate": 5.177304110547954e-08, "loss": 0.5302, "step": 31177 }, { "epoch": 0.9555596420252543, "grad_norm": 2.0138163605139, "learning_rate": 5.170182701742133e-08, "loss": 0.6187, "step": 31178 }, { "epoch": 0.9555902905479956, "grad_norm": 2.0421660266840274, "learning_rate": 5.163066168621056e-08, "loss": 0.5531, "step": 31179 }, { "epoch": 0.9556209390707368, "grad_norm": 1.896567721858918, "learning_rate": 5.1559545112548904e-08, "loss": 0.5287, "step": 31180 }, { "epoch": 0.955651587593478, "grad_norm": 1.8537738397404249, "learning_rate": 5.1488477297137465e-08, "loss": 0.5986, "step": 31181 }, { "epoch": 0.9556822361162192, "grad_norm": 2.205230090971057, "learning_rate": 5.141745824067623e-08, "loss": 0.6126, "step": 31182 }, { "epoch": 0.9557128846389604, "grad_norm": 2.0902733432590206, "learning_rate": 5.1346487943865206e-08, "loss": 0.5087, "step": 31183 }, { "epoch": 0.9557435331617016, "grad_norm": 1.8110539269389565, "learning_rate": 5.127556640740272e-08, "loss": 0.5073, "step": 31184 }, { "epoch": 0.9557741816844428, "grad_norm": 1.8031996713098748, "learning_rate": 5.1204693631988764e-08, "loss": 0.5565, "step": 31185 }, { "epoch": 0.955804830207184, "grad_norm": 1.851773840707076, "learning_rate": 5.113386961832112e-08, "loss": 0.5266, "step": 31186 }, { "epoch": 0.9558354787299252, "grad_norm": 0.8036130694790324, "learning_rate": 5.106309436709756e-08, "loss": 0.4109, "step": 31187 }, { "epoch": 0.9558661272526664, "grad_norm": 1.662013341334166, "learning_rate": 5.099236787901529e-08, "loss": 0.3846, "step": 31188 }, { "epoch": 0.9558967757754077, "grad_norm": 0.7521450014307924, "learning_rate": 5.092169015477211e-08, "loss": 0.3805, "step": 31189 }, { "epoch": 0.9559274242981488, "grad_norm": 2.0904236365224262, "learning_rate": 5.0851061195063e-08, "loss": 0.5048, "step": 31190 }, { "epoch": 0.9559580728208901, "grad_norm": 1.768824547033017, "learning_rate": 5.0780481000585194e-08, "loss": 0.5792, "step": 31191 }, { "epoch": 0.9559887213436312, "grad_norm": 1.8817968805834568, "learning_rate": 5.070994957203368e-08, "loss": 0.584, "step": 31192 }, { "epoch": 0.9560193698663725, "grad_norm": 1.6328352998997389, "learning_rate": 5.0639466910102905e-08, "loss": 0.4565, "step": 31193 }, { "epoch": 0.9560500183891136, "grad_norm": 1.8021649085419007, "learning_rate": 5.0569033015488436e-08, "loss": 0.4784, "step": 31194 }, { "epoch": 0.9560806669118549, "grad_norm": 0.7984631909676558, "learning_rate": 5.0498647888883036e-08, "loss": 0.4053, "step": 31195 }, { "epoch": 0.956111315434596, "grad_norm": 1.8456883134146882, "learning_rate": 5.0428311530981155e-08, "loss": 0.5395, "step": 31196 }, { "epoch": 0.9561419639573373, "grad_norm": 2.0307675815884467, "learning_rate": 5.0358023942476134e-08, "loss": 0.5162, "step": 31197 }, { "epoch": 0.9561726124800785, "grad_norm": 1.7931825149562055, "learning_rate": 5.0287785124059074e-08, "loss": 0.6973, "step": 31198 }, { "epoch": 0.9562032610028197, "grad_norm": 1.9928890674352386, "learning_rate": 5.021759507642277e-08, "loss": 0.5353, "step": 31199 }, { "epoch": 0.9562339095255609, "grad_norm": 1.8468163003979245, "learning_rate": 5.014745380025998e-08, "loss": 0.5, "step": 31200 }, { "epoch": 0.9562645580483021, "grad_norm": 2.0009599835039436, "learning_rate": 5.007736129625962e-08, "loss": 0.6395, "step": 31201 }, { "epoch": 0.9562952065710433, "grad_norm": 1.8657781269513714, "learning_rate": 5.00073175651139e-08, "loss": 0.5611, "step": 31202 }, { "epoch": 0.9563258550937844, "grad_norm": 0.8225018931550059, "learning_rate": 4.993732260751283e-08, "loss": 0.4115, "step": 31203 }, { "epoch": 0.9563565036165257, "grad_norm": 2.0146875842803085, "learning_rate": 4.986737642414585e-08, "loss": 0.5, "step": 31204 }, { "epoch": 0.9563871521392668, "grad_norm": 1.974603955532535, "learning_rate": 4.979747901570242e-08, "loss": 0.5732, "step": 31205 }, { "epoch": 0.9564178006620081, "grad_norm": 1.9039566972072937, "learning_rate": 4.9727630382870315e-08, "loss": 0.5973, "step": 31206 }, { "epoch": 0.9564484491847492, "grad_norm": 0.7873337024195984, "learning_rate": 4.9657830526338993e-08, "loss": 0.3991, "step": 31207 }, { "epoch": 0.9564790977074905, "grad_norm": 1.7216571679328907, "learning_rate": 4.958807944679567e-08, "loss": 0.5189, "step": 31208 }, { "epoch": 0.9565097462302317, "grad_norm": 1.9139705771960733, "learning_rate": 4.9518377144927024e-08, "loss": 0.5631, "step": 31209 }, { "epoch": 0.9565403947529729, "grad_norm": 2.0346726273896425, "learning_rate": 4.9448723621420834e-08, "loss": 0.5602, "step": 31210 }, { "epoch": 0.9565710432757141, "grad_norm": 1.9218210922262442, "learning_rate": 4.9379118876963227e-08, "loss": 0.5821, "step": 31211 }, { "epoch": 0.9566016917984553, "grad_norm": 2.0610892445287776, "learning_rate": 4.9309562912239207e-08, "loss": 0.5694, "step": 31212 }, { "epoch": 0.9566323403211965, "grad_norm": 0.7806337093462767, "learning_rate": 4.924005572793544e-08, "loss": 0.394, "step": 31213 }, { "epoch": 0.9566629888439377, "grad_norm": 2.0982098070446606, "learning_rate": 4.917059732473528e-08, "loss": 0.5895, "step": 31214 }, { "epoch": 0.9566936373666789, "grad_norm": 1.8018123749846684, "learning_rate": 4.9101187703324835e-08, "loss": 0.5672, "step": 31215 }, { "epoch": 0.9567242858894202, "grad_norm": 1.7160104069353994, "learning_rate": 4.90318268643869e-08, "loss": 0.5481, "step": 31216 }, { "epoch": 0.9567549344121613, "grad_norm": 1.9849213134970884, "learning_rate": 4.896251480860481e-08, "loss": 0.5773, "step": 31217 }, { "epoch": 0.9567855829349026, "grad_norm": 1.7930529266312334, "learning_rate": 4.889325153666247e-08, "loss": 0.5473, "step": 31218 }, { "epoch": 0.9568162314576437, "grad_norm": 1.8423418885820817, "learning_rate": 4.882403704924099e-08, "loss": 0.5426, "step": 31219 }, { "epoch": 0.956846879980385, "grad_norm": 1.6682719283395955, "learning_rate": 4.8754871347023725e-08, "loss": 0.5217, "step": 31220 }, { "epoch": 0.9568775285031261, "grad_norm": 1.9172721035780904, "learning_rate": 4.868575443069068e-08, "loss": 0.5111, "step": 31221 }, { "epoch": 0.9569081770258674, "grad_norm": 1.6612877784180577, "learning_rate": 4.8616686300924644e-08, "loss": 0.4589, "step": 31222 }, { "epoch": 0.9569388255486085, "grad_norm": 1.739604383138107, "learning_rate": 4.854766695840507e-08, "loss": 0.4854, "step": 31223 }, { "epoch": 0.9569694740713498, "grad_norm": 1.7994347902102243, "learning_rate": 4.847869640381142e-08, "loss": 0.5958, "step": 31224 }, { "epoch": 0.957000122594091, "grad_norm": 1.9498158040281375, "learning_rate": 4.840977463782481e-08, "loss": 0.6043, "step": 31225 }, { "epoch": 0.9570307711168322, "grad_norm": 1.8342794277805243, "learning_rate": 4.8340901661123596e-08, "loss": 0.5962, "step": 31226 }, { "epoch": 0.9570614196395734, "grad_norm": 1.9613206241185124, "learning_rate": 4.827207747438667e-08, "loss": 0.6194, "step": 31227 }, { "epoch": 0.9570920681623146, "grad_norm": 1.9526814673553599, "learning_rate": 4.820330207829127e-08, "loss": 0.5095, "step": 31228 }, { "epoch": 0.9571227166850558, "grad_norm": 0.8196547735706278, "learning_rate": 4.81345754735163e-08, "loss": 0.4081, "step": 31229 }, { "epoch": 0.957153365207797, "grad_norm": 1.9020701369078117, "learning_rate": 4.806589766073788e-08, "loss": 0.499, "step": 31230 }, { "epoch": 0.9571840137305382, "grad_norm": 1.7906083388956522, "learning_rate": 4.7997268640633255e-08, "loss": 0.6161, "step": 31231 }, { "epoch": 0.9572146622532794, "grad_norm": 1.8071572573712134, "learning_rate": 4.792868841387854e-08, "loss": 0.5058, "step": 31232 }, { "epoch": 0.9572453107760206, "grad_norm": 0.8087363480172078, "learning_rate": 4.786015698114988e-08, "loss": 0.4042, "step": 31233 }, { "epoch": 0.9572759592987617, "grad_norm": 1.8647708195894022, "learning_rate": 4.779167434312171e-08, "loss": 0.4688, "step": 31234 }, { "epoch": 0.957306607821503, "grad_norm": 2.0477646818026467, "learning_rate": 4.7723240500469616e-08, "loss": 0.6723, "step": 31235 }, { "epoch": 0.9573372563442442, "grad_norm": 0.7987026532201917, "learning_rate": 4.7654855453866944e-08, "loss": 0.3927, "step": 31236 }, { "epoch": 0.9573679048669854, "grad_norm": 2.0139496837293596, "learning_rate": 4.758651920398871e-08, "loss": 0.6185, "step": 31237 }, { "epoch": 0.9573985533897266, "grad_norm": 2.2566219111913446, "learning_rate": 4.7518231751507715e-08, "loss": 0.6582, "step": 31238 }, { "epoch": 0.9574292019124678, "grad_norm": 2.0243937076742022, "learning_rate": 4.744999309709619e-08, "loss": 0.616, "step": 31239 }, { "epoch": 0.957459850435209, "grad_norm": 1.7280854678336641, "learning_rate": 4.738180324142749e-08, "loss": 0.4209, "step": 31240 }, { "epoch": 0.9574904989579502, "grad_norm": 2.183943923912822, "learning_rate": 4.7313662185172745e-08, "loss": 0.6168, "step": 31241 }, { "epoch": 0.9575211474806914, "grad_norm": 1.6868372332759005, "learning_rate": 4.7245569929003644e-08, "loss": 0.4532, "step": 31242 }, { "epoch": 0.9575517960034327, "grad_norm": 1.98288775690004, "learning_rate": 4.717752647359131e-08, "loss": 0.5986, "step": 31243 }, { "epoch": 0.9575824445261738, "grad_norm": 2.015273983141244, "learning_rate": 4.710953181960576e-08, "loss": 0.4651, "step": 31244 }, { "epoch": 0.9576130930489151, "grad_norm": 0.7722838862717484, "learning_rate": 4.704158596771813e-08, "loss": 0.3939, "step": 31245 }, { "epoch": 0.9576437415716562, "grad_norm": 1.883678084306338, "learning_rate": 4.6973688918596214e-08, "loss": 0.4779, "step": 31246 }, { "epoch": 0.9576743900943975, "grad_norm": 2.3397817656115043, "learning_rate": 4.6905840672910044e-08, "loss": 0.5745, "step": 31247 }, { "epoch": 0.9577050386171386, "grad_norm": 2.0591991279478057, "learning_rate": 4.6838041231327956e-08, "loss": 0.6021, "step": 31248 }, { "epoch": 0.9577356871398799, "grad_norm": 0.8355709335979418, "learning_rate": 4.677029059451776e-08, "loss": 0.4084, "step": 31249 }, { "epoch": 0.957766335662621, "grad_norm": 1.9261013595526468, "learning_rate": 4.670258876314781e-08, "loss": 0.5974, "step": 31250 }, { "epoch": 0.9577969841853623, "grad_norm": 2.2503430330186167, "learning_rate": 4.663493573788369e-08, "loss": 0.6312, "step": 31251 }, { "epoch": 0.9578276327081034, "grad_norm": 1.9780896831778347, "learning_rate": 4.6567331519393747e-08, "loss": 0.5416, "step": 31252 }, { "epoch": 0.9578582812308447, "grad_norm": 1.661702601810785, "learning_rate": 4.6499776108343e-08, "loss": 0.4631, "step": 31253 }, { "epoch": 0.9578889297535859, "grad_norm": 2.1142703977675885, "learning_rate": 4.643226950539703e-08, "loss": 0.5723, "step": 31254 }, { "epoch": 0.9579195782763271, "grad_norm": 2.13315509136335, "learning_rate": 4.6364811711221426e-08, "loss": 0.6074, "step": 31255 }, { "epoch": 0.9579502267990683, "grad_norm": 1.9164947879639473, "learning_rate": 4.6297402726481197e-08, "loss": 0.5993, "step": 31256 }, { "epoch": 0.9579808753218095, "grad_norm": 1.988814002898255, "learning_rate": 4.623004255183971e-08, "loss": 0.5468, "step": 31257 }, { "epoch": 0.9580115238445507, "grad_norm": 1.9512710440502834, "learning_rate": 4.616273118796144e-08, "loss": 0.5861, "step": 31258 }, { "epoch": 0.9580421723672919, "grad_norm": 1.501131347621706, "learning_rate": 4.609546863550918e-08, "loss": 0.4696, "step": 31259 }, { "epoch": 0.9580728208900331, "grad_norm": 1.8852997171287702, "learning_rate": 4.602825489514573e-08, "loss": 0.5238, "step": 31260 }, { "epoch": 0.9581034694127744, "grad_norm": 1.9314346346682545, "learning_rate": 4.5961089967533346e-08, "loss": 0.5625, "step": 31261 }, { "epoch": 0.9581341179355155, "grad_norm": 1.8675671060666583, "learning_rate": 4.589397385333427e-08, "loss": 0.5321, "step": 31262 }, { "epoch": 0.9581647664582568, "grad_norm": 1.9727471297119397, "learning_rate": 4.582690655320854e-08, "loss": 0.6101, "step": 31263 }, { "epoch": 0.9581954149809979, "grad_norm": 0.8059993481207911, "learning_rate": 4.575988806781895e-08, "loss": 0.4127, "step": 31264 }, { "epoch": 0.9582260635037391, "grad_norm": 1.8940162343295217, "learning_rate": 4.569291839782386e-08, "loss": 0.4186, "step": 31265 }, { "epoch": 0.9582567120264803, "grad_norm": 2.438626499838043, "learning_rate": 4.562599754388441e-08, "loss": 0.5173, "step": 31266 }, { "epoch": 0.9582873605492215, "grad_norm": 1.9151186749795213, "learning_rate": 4.5559125506660084e-08, "loss": 0.5815, "step": 31267 }, { "epoch": 0.9583180090719627, "grad_norm": 1.835251098419266, "learning_rate": 4.5492302286808676e-08, "loss": 0.6763, "step": 31268 }, { "epoch": 0.9583486575947039, "grad_norm": 1.7696354074445333, "learning_rate": 4.542552788498966e-08, "loss": 0.5779, "step": 31269 }, { "epoch": 0.9583793061174452, "grad_norm": 1.7628099419920633, "learning_rate": 4.535880230186085e-08, "loss": 0.5102, "step": 31270 }, { "epoch": 0.9584099546401863, "grad_norm": 1.8392857005838212, "learning_rate": 4.5292125538078933e-08, "loss": 0.6319, "step": 31271 }, { "epoch": 0.9584406031629276, "grad_norm": 1.9127444681052868, "learning_rate": 4.522549759430173e-08, "loss": 0.5369, "step": 31272 }, { "epoch": 0.9584712516856687, "grad_norm": 1.9588176733428695, "learning_rate": 4.5158918471185365e-08, "loss": 0.5315, "step": 31273 }, { "epoch": 0.95850190020841, "grad_norm": 0.7595521078746742, "learning_rate": 4.5092388169385436e-08, "loss": 0.3879, "step": 31274 }, { "epoch": 0.9585325487311511, "grad_norm": 1.8738704862223994, "learning_rate": 4.502590668955864e-08, "loss": 0.5121, "step": 31275 }, { "epoch": 0.9585631972538924, "grad_norm": 1.7064790976911204, "learning_rate": 4.495947403235889e-08, "loss": 0.5248, "step": 31276 }, { "epoch": 0.9585938457766335, "grad_norm": 1.9911674787758697, "learning_rate": 4.489309019844124e-08, "loss": 0.5387, "step": 31277 }, { "epoch": 0.9586244942993748, "grad_norm": 2.124649892054935, "learning_rate": 4.482675518846069e-08, "loss": 0.6481, "step": 31278 }, { "epoch": 0.958655142822116, "grad_norm": 0.7986421894665096, "learning_rate": 4.4760469003068965e-08, "loss": 0.3967, "step": 31279 }, { "epoch": 0.9586857913448572, "grad_norm": 0.8376987083310761, "learning_rate": 4.469423164292053e-08, "loss": 0.4052, "step": 31280 }, { "epoch": 0.9587164398675984, "grad_norm": 0.8218228993675248, "learning_rate": 4.462804310866764e-08, "loss": 0.4043, "step": 31281 }, { "epoch": 0.9587470883903396, "grad_norm": 2.0524383140907845, "learning_rate": 4.456190340096256e-08, "loss": 0.589, "step": 31282 }, { "epoch": 0.9587777369130808, "grad_norm": 2.0052035565185715, "learning_rate": 4.449581252045698e-08, "loss": 0.6163, "step": 31283 }, { "epoch": 0.958808385435822, "grad_norm": 1.9097938999793798, "learning_rate": 4.442977046780206e-08, "loss": 0.5682, "step": 31284 }, { "epoch": 0.9588390339585632, "grad_norm": 1.8402601298399466, "learning_rate": 4.4363777243648377e-08, "loss": 0.5415, "step": 31285 }, { "epoch": 0.9588696824813044, "grad_norm": 0.7691515531797322, "learning_rate": 4.4297832848647084e-08, "loss": 0.3799, "step": 31286 }, { "epoch": 0.9589003310040456, "grad_norm": 1.843994181145781, "learning_rate": 4.4231937283446544e-08, "loss": 0.5445, "step": 31287 }, { "epoch": 0.9589309795267869, "grad_norm": 2.3350317712832522, "learning_rate": 4.416609054869681e-08, "loss": 0.5415, "step": 31288 }, { "epoch": 0.958961628049528, "grad_norm": 1.8280975212209682, "learning_rate": 4.410029264504678e-08, "loss": 0.5216, "step": 31289 }, { "epoch": 0.9589922765722693, "grad_norm": 2.1717239503846635, "learning_rate": 4.4034543573144295e-08, "loss": 0.4831, "step": 31290 }, { "epoch": 0.9590229250950104, "grad_norm": 1.9688551743614326, "learning_rate": 4.396884333363771e-08, "loss": 0.5709, "step": 31291 }, { "epoch": 0.9590535736177517, "grad_norm": 2.042437738788527, "learning_rate": 4.3903191927173736e-08, "loss": 0.6054, "step": 31292 }, { "epoch": 0.9590842221404928, "grad_norm": 1.7871023971833968, "learning_rate": 4.383758935440019e-08, "loss": 0.6065, "step": 31293 }, { "epoch": 0.9591148706632341, "grad_norm": 2.111490095371684, "learning_rate": 4.377203561596266e-08, "loss": 0.5533, "step": 31294 }, { "epoch": 0.9591455191859752, "grad_norm": 1.8854034970401155, "learning_rate": 4.3706530712507854e-08, "loss": 0.5915, "step": 31295 }, { "epoch": 0.9591761677087164, "grad_norm": 1.8571068425782609, "learning_rate": 4.364107464468026e-08, "loss": 0.5371, "step": 31296 }, { "epoch": 0.9592068162314576, "grad_norm": 2.4271333720975936, "learning_rate": 4.3575667413125466e-08, "loss": 0.6262, "step": 31297 }, { "epoch": 0.9592374647541988, "grad_norm": 1.8261266102139624, "learning_rate": 4.351030901848741e-08, "loss": 0.5489, "step": 31298 }, { "epoch": 0.9592681132769401, "grad_norm": 1.9105915947780459, "learning_rate": 4.344499946141056e-08, "loss": 0.5478, "step": 31299 }, { "epoch": 0.9592987617996812, "grad_norm": 1.875067974203376, "learning_rate": 4.337973874253887e-08, "loss": 0.5535, "step": 31300 }, { "epoch": 0.9593294103224225, "grad_norm": 0.8090554477455706, "learning_rate": 4.331452686251458e-08, "loss": 0.4086, "step": 31301 }, { "epoch": 0.9593600588451636, "grad_norm": 1.7902329608286685, "learning_rate": 4.324936382198053e-08, "loss": 0.4651, "step": 31302 }, { "epoch": 0.9593907073679049, "grad_norm": 2.124513569875996, "learning_rate": 4.318424962157786e-08, "loss": 0.499, "step": 31303 }, { "epoch": 0.959421355890646, "grad_norm": 0.8142861086782649, "learning_rate": 4.3119184261949945e-08, "loss": 0.3919, "step": 31304 }, { "epoch": 0.9594520044133873, "grad_norm": 2.243625649007804, "learning_rate": 4.3054167743737385e-08, "loss": 0.573, "step": 31305 }, { "epoch": 0.9594826529361284, "grad_norm": 1.9046112401696393, "learning_rate": 4.298920006757967e-08, "loss": 0.6235, "step": 31306 }, { "epoch": 0.9595133014588697, "grad_norm": 1.7734998051958895, "learning_rate": 4.2924281234117407e-08, "loss": 0.577, "step": 31307 }, { "epoch": 0.9595439499816109, "grad_norm": 1.781485772506605, "learning_rate": 4.285941124399118e-08, "loss": 0.5306, "step": 31308 }, { "epoch": 0.9595745985043521, "grad_norm": 2.0089523487517424, "learning_rate": 4.2794590097839375e-08, "loss": 0.6523, "step": 31309 }, { "epoch": 0.9596052470270933, "grad_norm": 0.8143141650426291, "learning_rate": 4.272981779630036e-08, "loss": 0.3784, "step": 31310 }, { "epoch": 0.9596358955498345, "grad_norm": 1.8996184838967074, "learning_rate": 4.266509434001309e-08, "loss": 0.5478, "step": 31311 }, { "epoch": 0.9596665440725757, "grad_norm": 0.7985292201151827, "learning_rate": 4.260041972961537e-08, "loss": 0.3916, "step": 31312 }, { "epoch": 0.9596971925953169, "grad_norm": 2.00385458370692, "learning_rate": 4.253579396574392e-08, "loss": 0.6068, "step": 31313 }, { "epoch": 0.9597278411180581, "grad_norm": 2.000867174155858, "learning_rate": 4.247121704903545e-08, "loss": 0.683, "step": 31314 }, { "epoch": 0.9597584896407994, "grad_norm": 1.8928658163125243, "learning_rate": 4.2406688980126675e-08, "loss": 0.6123, "step": 31315 }, { "epoch": 0.9597891381635405, "grad_norm": 1.942430771209755, "learning_rate": 4.234220975965375e-08, "loss": 0.5977, "step": 31316 }, { "epoch": 0.9598197866862818, "grad_norm": 1.7655410856889122, "learning_rate": 4.227777938825117e-08, "loss": 0.4815, "step": 31317 }, { "epoch": 0.9598504352090229, "grad_norm": 1.8062388466333505, "learning_rate": 4.221339786655343e-08, "loss": 0.5348, "step": 31318 }, { "epoch": 0.9598810837317642, "grad_norm": 2.3478010700552625, "learning_rate": 4.214906519519668e-08, "loss": 0.5611, "step": 31319 }, { "epoch": 0.9599117322545053, "grad_norm": 1.8506109357990748, "learning_rate": 4.20847813748132e-08, "loss": 0.5837, "step": 31320 }, { "epoch": 0.9599423807772466, "grad_norm": 1.8065367367941958, "learning_rate": 4.2020546406036364e-08, "loss": 0.6185, "step": 31321 }, { "epoch": 0.9599730292999877, "grad_norm": 2.1917629617534224, "learning_rate": 4.195636028950012e-08, "loss": 0.6561, "step": 31322 }, { "epoch": 0.960003677822729, "grad_norm": 2.1760070803535743, "learning_rate": 4.189222302583673e-08, "loss": 0.5447, "step": 31323 }, { "epoch": 0.9600343263454701, "grad_norm": 1.9858524949678285, "learning_rate": 4.182813461567792e-08, "loss": 0.495, "step": 31324 }, { "epoch": 0.9600649748682114, "grad_norm": 1.9934928791457274, "learning_rate": 4.176409505965484e-08, "loss": 0.6507, "step": 31325 }, { "epoch": 0.9600956233909526, "grad_norm": 0.8061047399956712, "learning_rate": 4.1700104358398106e-08, "loss": 0.3928, "step": 31326 }, { "epoch": 0.9601262719136937, "grad_norm": 1.8799559691656131, "learning_rate": 4.163616251253999e-08, "loss": 0.4388, "step": 31327 }, { "epoch": 0.960156920436435, "grad_norm": 1.9718060036113665, "learning_rate": 4.1572269522708875e-08, "loss": 0.4938, "step": 31328 }, { "epoch": 0.9601875689591761, "grad_norm": 1.988289741962226, "learning_rate": 4.150842538953481e-08, "loss": 0.498, "step": 31329 }, { "epoch": 0.9602182174819174, "grad_norm": 1.9225928611376337, "learning_rate": 4.144463011364675e-08, "loss": 0.5706, "step": 31330 }, { "epoch": 0.9602488660046585, "grad_norm": 1.8947058575008988, "learning_rate": 4.138088369567361e-08, "loss": 0.5559, "step": 31331 }, { "epoch": 0.9602795145273998, "grad_norm": 1.9499719892755727, "learning_rate": 4.1317186136243805e-08, "loss": 0.6391, "step": 31332 }, { "epoch": 0.9603101630501409, "grad_norm": 1.738587933860036, "learning_rate": 4.125353743598348e-08, "loss": 0.4697, "step": 31333 }, { "epoch": 0.9603408115728822, "grad_norm": 1.8711510611671118, "learning_rate": 4.118993759552159e-08, "loss": 0.4815, "step": 31334 }, { "epoch": 0.9603714600956234, "grad_norm": 1.7543672362562173, "learning_rate": 4.112638661548429e-08, "loss": 0.5619, "step": 31335 }, { "epoch": 0.9604021086183646, "grad_norm": 0.8003468442344579, "learning_rate": 4.1062884496496645e-08, "loss": 0.4046, "step": 31336 }, { "epoch": 0.9604327571411058, "grad_norm": 2.100471753046198, "learning_rate": 4.099943123918593e-08, "loss": 0.5064, "step": 31337 }, { "epoch": 0.960463405663847, "grad_norm": 2.321064635416969, "learning_rate": 4.093602684417608e-08, "loss": 0.4514, "step": 31338 }, { "epoch": 0.9604940541865882, "grad_norm": 1.945649776932131, "learning_rate": 4.087267131209271e-08, "loss": 0.5032, "step": 31339 }, { "epoch": 0.9605247027093294, "grad_norm": 2.000892849548619, "learning_rate": 4.080936464355978e-08, "loss": 0.5922, "step": 31340 }, { "epoch": 0.9605553512320706, "grad_norm": 2.2452838776406154, "learning_rate": 4.074610683920066e-08, "loss": 0.626, "step": 31341 }, { "epoch": 0.9605859997548118, "grad_norm": 1.8100125992608167, "learning_rate": 4.068289789963931e-08, "loss": 0.4871, "step": 31342 }, { "epoch": 0.960616648277553, "grad_norm": 1.8235481870119161, "learning_rate": 4.061973782549855e-08, "loss": 0.4886, "step": 31343 }, { "epoch": 0.9606472968002943, "grad_norm": 0.7694432459618766, "learning_rate": 4.0556626617399566e-08, "loss": 0.3784, "step": 31344 }, { "epoch": 0.9606779453230354, "grad_norm": 2.3335928104971724, "learning_rate": 4.0493564275965735e-08, "loss": 0.4752, "step": 31345 }, { "epoch": 0.9607085938457767, "grad_norm": 1.9227126031114978, "learning_rate": 4.043055080181824e-08, "loss": 0.636, "step": 31346 }, { "epoch": 0.9607392423685178, "grad_norm": 2.0171399586846133, "learning_rate": 4.036758619557657e-08, "loss": 0.6292, "step": 31347 }, { "epoch": 0.9607698908912591, "grad_norm": 1.8065203377065562, "learning_rate": 4.030467045786246e-08, "loss": 0.5421, "step": 31348 }, { "epoch": 0.9608005394140002, "grad_norm": 1.8419026653847066, "learning_rate": 4.024180358929486e-08, "loss": 0.4728, "step": 31349 }, { "epoch": 0.9608311879367415, "grad_norm": 0.7893837211290605, "learning_rate": 4.017898559049438e-08, "loss": 0.3817, "step": 31350 }, { "epoch": 0.9608618364594826, "grad_norm": 1.7108990299364002, "learning_rate": 4.011621646207942e-08, "loss": 0.4471, "step": 31351 }, { "epoch": 0.9608924849822239, "grad_norm": 1.7332606153693737, "learning_rate": 4.005349620466836e-08, "loss": 0.4834, "step": 31352 }, { "epoch": 0.960923133504965, "grad_norm": 1.8497134318765922, "learning_rate": 3.999082481887906e-08, "loss": 0.5365, "step": 31353 }, { "epoch": 0.9609537820277063, "grad_norm": 2.0308797456813776, "learning_rate": 3.992820230532934e-08, "loss": 0.5667, "step": 31354 }, { "epoch": 0.9609844305504475, "grad_norm": 1.9680375673029942, "learning_rate": 3.9865628664635945e-08, "loss": 0.5873, "step": 31355 }, { "epoch": 0.9610150790731887, "grad_norm": 2.05297054996984, "learning_rate": 3.980310389741615e-08, "loss": 0.5211, "step": 31356 }, { "epoch": 0.9610457275959299, "grad_norm": 1.8804333168844207, "learning_rate": 3.974062800428502e-08, "loss": 0.5435, "step": 31357 }, { "epoch": 0.961076376118671, "grad_norm": 1.7571645194728986, "learning_rate": 3.9678200985858726e-08, "loss": 0.4818, "step": 31358 }, { "epoch": 0.9611070246414123, "grad_norm": 0.7513353805283806, "learning_rate": 3.961582284275234e-08, "loss": 0.4111, "step": 31359 }, { "epoch": 0.9611376731641534, "grad_norm": 1.933987012292599, "learning_rate": 3.9553493575579804e-08, "loss": 0.6062, "step": 31360 }, { "epoch": 0.9611683216868947, "grad_norm": 2.2988674342884825, "learning_rate": 3.949121318495674e-08, "loss": 0.6434, "step": 31361 }, { "epoch": 0.9611989702096359, "grad_norm": 2.075040895730147, "learning_rate": 3.9428981671495446e-08, "loss": 0.5807, "step": 31362 }, { "epoch": 0.9612296187323771, "grad_norm": 2.1189770960405383, "learning_rate": 3.936679903580986e-08, "loss": 0.5595, "step": 31363 }, { "epoch": 0.9612602672551183, "grad_norm": 1.719705376925939, "learning_rate": 3.9304665278512846e-08, "loss": 0.5005, "step": 31364 }, { "epoch": 0.9612909157778595, "grad_norm": 1.8044127304997921, "learning_rate": 3.924258040021556e-08, "loss": 0.4966, "step": 31365 }, { "epoch": 0.9613215643006007, "grad_norm": 1.6624869439568088, "learning_rate": 3.9180544401530296e-08, "loss": 0.4774, "step": 31366 }, { "epoch": 0.9613522128233419, "grad_norm": 1.7327501667045218, "learning_rate": 3.911855728306879e-08, "loss": 0.5291, "step": 31367 }, { "epoch": 0.9613828613460831, "grad_norm": 1.7845504953503635, "learning_rate": 3.905661904544167e-08, "loss": 0.5701, "step": 31368 }, { "epoch": 0.9614135098688243, "grad_norm": 0.7915888684570761, "learning_rate": 3.899472968925844e-08, "loss": 0.3944, "step": 31369 }, { "epoch": 0.9614441583915655, "grad_norm": 2.0456043050056065, "learning_rate": 3.893288921512972e-08, "loss": 0.57, "step": 31370 }, { "epoch": 0.9614748069143068, "grad_norm": 2.135312733619914, "learning_rate": 3.8871097623664475e-08, "loss": 0.5066, "step": 31371 }, { "epoch": 0.9615054554370479, "grad_norm": 1.9540228766124634, "learning_rate": 3.880935491547222e-08, "loss": 0.5257, "step": 31372 }, { "epoch": 0.9615361039597892, "grad_norm": 2.1467835640940196, "learning_rate": 3.874766109115968e-08, "loss": 0.5861, "step": 31373 }, { "epoch": 0.9615667524825303, "grad_norm": 1.8513885613856322, "learning_rate": 3.8686016151336384e-08, "loss": 0.5566, "step": 31374 }, { "epoch": 0.9615974010052716, "grad_norm": 1.8253164623605238, "learning_rate": 3.8624420096609604e-08, "loss": 0.6485, "step": 31375 }, { "epoch": 0.9616280495280127, "grad_norm": 1.8079707747994378, "learning_rate": 3.856287292758554e-08, "loss": 0.569, "step": 31376 }, { "epoch": 0.961658698050754, "grad_norm": 1.9603243958376573, "learning_rate": 3.8501374644870914e-08, "loss": 0.5251, "step": 31377 }, { "epoch": 0.9616893465734951, "grad_norm": 1.7361274764273489, "learning_rate": 3.8439925249071366e-08, "loss": 0.572, "step": 31378 }, { "epoch": 0.9617199950962364, "grad_norm": 0.8136981348035252, "learning_rate": 3.837852474079307e-08, "loss": 0.394, "step": 31379 }, { "epoch": 0.9617506436189776, "grad_norm": 2.0054595626454033, "learning_rate": 3.831717312064054e-08, "loss": 0.5342, "step": 31380 }, { "epoch": 0.9617812921417188, "grad_norm": 1.9822359016977573, "learning_rate": 3.8255870389218297e-08, "loss": 0.5602, "step": 31381 }, { "epoch": 0.96181194066446, "grad_norm": 0.7985270209598556, "learning_rate": 3.8194616547130856e-08, "loss": 0.3879, "step": 31382 }, { "epoch": 0.9618425891872012, "grad_norm": 1.8381390157496567, "learning_rate": 3.813341159498107e-08, "loss": 0.6462, "step": 31383 }, { "epoch": 0.9618732377099424, "grad_norm": 2.04092190471796, "learning_rate": 3.807225553337235e-08, "loss": 0.5674, "step": 31384 }, { "epoch": 0.9619038862326836, "grad_norm": 1.936105160282342, "learning_rate": 3.801114836290754e-08, "loss": 0.5758, "step": 31385 }, { "epoch": 0.9619345347554248, "grad_norm": 1.9342319043493799, "learning_rate": 3.795009008418837e-08, "loss": 0.5974, "step": 31386 }, { "epoch": 0.961965183278166, "grad_norm": 2.001503357467748, "learning_rate": 3.788908069781605e-08, "loss": 0.52, "step": 31387 }, { "epoch": 0.9619958318009072, "grad_norm": 2.0790202623469134, "learning_rate": 3.782812020439286e-08, "loss": 0.674, "step": 31388 }, { "epoch": 0.9620264803236483, "grad_norm": 1.975149096819531, "learning_rate": 3.7767208604518325e-08, "loss": 0.5191, "step": 31389 }, { "epoch": 0.9620571288463896, "grad_norm": 2.165529403701501, "learning_rate": 3.770634589879363e-08, "loss": 0.6017, "step": 31390 }, { "epoch": 0.9620877773691308, "grad_norm": 2.021856870221355, "learning_rate": 3.764553208781774e-08, "loss": 0.5744, "step": 31391 }, { "epoch": 0.962118425891872, "grad_norm": 1.992241132110484, "learning_rate": 3.7584767172190175e-08, "loss": 0.52, "step": 31392 }, { "epoch": 0.9621490744146132, "grad_norm": 1.9004699543989214, "learning_rate": 3.7524051152509346e-08, "loss": 0.5357, "step": 31393 }, { "epoch": 0.9621797229373544, "grad_norm": 0.8321379139152727, "learning_rate": 3.746338402937366e-08, "loss": 0.4031, "step": 31394 }, { "epoch": 0.9622103714600956, "grad_norm": 1.835714526366607, "learning_rate": 3.740276580338098e-08, "loss": 0.6085, "step": 31395 }, { "epoch": 0.9622410199828368, "grad_norm": 2.003353399594089, "learning_rate": 3.7342196475129156e-08, "loss": 0.6529, "step": 31396 }, { "epoch": 0.962271668505578, "grad_norm": 0.7826198340830863, "learning_rate": 3.728167604521382e-08, "loss": 0.3957, "step": 31397 }, { "epoch": 0.9623023170283193, "grad_norm": 1.735353975411378, "learning_rate": 3.722120451423228e-08, "loss": 0.521, "step": 31398 }, { "epoch": 0.9623329655510604, "grad_norm": 0.8033151442230045, "learning_rate": 3.7160781882780164e-08, "loss": 0.3888, "step": 31399 }, { "epoch": 0.9623636140738017, "grad_norm": 1.890714950788884, "learning_rate": 3.710040815145199e-08, "loss": 0.5939, "step": 31400 }, { "epoch": 0.9623942625965428, "grad_norm": 0.7831916695759189, "learning_rate": 3.704008332084341e-08, "loss": 0.4122, "step": 31401 }, { "epoch": 0.9624249111192841, "grad_norm": 2.024422372761532, "learning_rate": 3.6979807391549495e-08, "loss": 0.5611, "step": 31402 }, { "epoch": 0.9624555596420252, "grad_norm": 1.8138707171116777, "learning_rate": 3.6919580364162547e-08, "loss": 0.589, "step": 31403 }, { "epoch": 0.9624862081647665, "grad_norm": 0.7510568060037691, "learning_rate": 3.685940223927709e-08, "loss": 0.402, "step": 31404 }, { "epoch": 0.9625168566875076, "grad_norm": 2.21716087353525, "learning_rate": 3.6799273017485985e-08, "loss": 0.5189, "step": 31405 }, { "epoch": 0.9625475052102489, "grad_norm": 0.7833276185145915, "learning_rate": 3.673919269938153e-08, "loss": 0.3772, "step": 31406 }, { "epoch": 0.96257815373299, "grad_norm": 0.7691089633083004, "learning_rate": 3.667916128555549e-08, "loss": 0.3959, "step": 31407 }, { "epoch": 0.9626088022557313, "grad_norm": 2.0186008917900256, "learning_rate": 3.661917877659959e-08, "loss": 0.5259, "step": 31408 }, { "epoch": 0.9626394507784725, "grad_norm": 1.952247957210946, "learning_rate": 3.655924517310505e-08, "loss": 0.6227, "step": 31409 }, { "epoch": 0.9626700993012137, "grad_norm": 0.9022158369493533, "learning_rate": 3.649936047566249e-08, "loss": 0.4103, "step": 31410 }, { "epoch": 0.9627007478239549, "grad_norm": 2.0960228315069616, "learning_rate": 3.6439524684860894e-08, "loss": 0.589, "step": 31411 }, { "epoch": 0.9627313963466961, "grad_norm": 1.8155498609011573, "learning_rate": 3.6379737801290896e-08, "loss": 0.5569, "step": 31412 }, { "epoch": 0.9627620448694373, "grad_norm": 1.850578917064874, "learning_rate": 3.631999982554202e-08, "loss": 0.5523, "step": 31413 }, { "epoch": 0.9627926933921785, "grad_norm": 1.8254579221389662, "learning_rate": 3.626031075820158e-08, "loss": 0.4851, "step": 31414 }, { "epoch": 0.9628233419149197, "grad_norm": 2.103653317462176, "learning_rate": 3.6200670599858544e-08, "loss": 0.4957, "step": 31415 }, { "epoch": 0.962853990437661, "grad_norm": 1.8215367944229144, "learning_rate": 3.614107935110023e-08, "loss": 0.5518, "step": 31416 }, { "epoch": 0.9628846389604021, "grad_norm": 1.9980605380843959, "learning_rate": 3.608153701251393e-08, "loss": 0.5618, "step": 31417 }, { "epoch": 0.9629152874831434, "grad_norm": 2.1860303911093872, "learning_rate": 3.6022043584686416e-08, "loss": 0.596, "step": 31418 }, { "epoch": 0.9629459360058845, "grad_norm": 1.9777807445055087, "learning_rate": 3.596259906820387e-08, "loss": 0.6373, "step": 31419 }, { "epoch": 0.9629765845286257, "grad_norm": 1.9165307021181235, "learning_rate": 3.590320346365139e-08, "loss": 0.6071, "step": 31420 }, { "epoch": 0.9630072330513669, "grad_norm": 0.8099800267237576, "learning_rate": 3.584385677161517e-08, "loss": 0.4185, "step": 31421 }, { "epoch": 0.9630378815741081, "grad_norm": 1.9047544241728034, "learning_rate": 3.5784558992679744e-08, "loss": 0.5726, "step": 31422 }, { "epoch": 0.9630685300968493, "grad_norm": 2.248549772413251, "learning_rate": 3.572531012742908e-08, "loss": 0.6708, "step": 31423 }, { "epoch": 0.9630991786195905, "grad_norm": 0.7776625391962969, "learning_rate": 3.5666110176447166e-08, "loss": 0.3968, "step": 31424 }, { "epoch": 0.9631298271423318, "grad_norm": 0.7674493036269067, "learning_rate": 3.5606959140316863e-08, "loss": 0.3833, "step": 31425 }, { "epoch": 0.9631604756650729, "grad_norm": 0.7755819061495585, "learning_rate": 3.5547857019621603e-08, "loss": 0.395, "step": 31426 }, { "epoch": 0.9631911241878142, "grad_norm": 1.652849448300206, "learning_rate": 3.548880381494369e-08, "loss": 0.4881, "step": 31427 }, { "epoch": 0.9632217727105553, "grad_norm": 1.9906979510416816, "learning_rate": 3.542979952686432e-08, "loss": 0.5531, "step": 31428 }, { "epoch": 0.9632524212332966, "grad_norm": 1.9000034859761354, "learning_rate": 3.537084415596636e-08, "loss": 0.5855, "step": 31429 }, { "epoch": 0.9632830697560377, "grad_norm": 2.0946698386212885, "learning_rate": 3.531193770282826e-08, "loss": 0.6489, "step": 31430 }, { "epoch": 0.963313718278779, "grad_norm": 0.7564458680191282, "learning_rate": 3.5253080168033414e-08, "loss": 0.3798, "step": 31431 }, { "epoch": 0.9633443668015201, "grad_norm": 1.8153450673964353, "learning_rate": 3.519427155215971e-08, "loss": 0.545, "step": 31432 }, { "epoch": 0.9633750153242614, "grad_norm": 1.8996949575411035, "learning_rate": 3.513551185578667e-08, "loss": 0.6358, "step": 31433 }, { "epoch": 0.9634056638470025, "grad_norm": 2.191179024721583, "learning_rate": 3.5076801079493847e-08, "loss": 0.6203, "step": 31434 }, { "epoch": 0.9634363123697438, "grad_norm": 1.9558003626177223, "learning_rate": 3.501813922386022e-08, "loss": 0.5448, "step": 31435 }, { "epoch": 0.963466960892485, "grad_norm": 1.7895066462813691, "learning_rate": 3.4959526289463085e-08, "loss": 0.5529, "step": 31436 }, { "epoch": 0.9634976094152262, "grad_norm": 2.288730705587839, "learning_rate": 3.490096227687978e-08, "loss": 0.5507, "step": 31437 }, { "epoch": 0.9635282579379674, "grad_norm": 1.8577096458535458, "learning_rate": 3.484244718668761e-08, "loss": 0.4834, "step": 31438 }, { "epoch": 0.9635589064607086, "grad_norm": 1.9081739514597538, "learning_rate": 3.478398101946334e-08, "loss": 0.5743, "step": 31439 }, { "epoch": 0.9635895549834498, "grad_norm": 2.3242888217624476, "learning_rate": 3.4725563775783175e-08, "loss": 0.692, "step": 31440 }, { "epoch": 0.963620203506191, "grad_norm": 2.032487282275462, "learning_rate": 3.466719545622166e-08, "loss": 0.5609, "step": 31441 }, { "epoch": 0.9636508520289322, "grad_norm": 1.811200747478175, "learning_rate": 3.460887606135554e-08, "loss": 0.5526, "step": 31442 }, { "epoch": 0.9636815005516735, "grad_norm": 2.3337069687739667, "learning_rate": 3.455060559175882e-08, "loss": 0.6151, "step": 31443 }, { "epoch": 0.9637121490744146, "grad_norm": 1.9189873651716352, "learning_rate": 3.449238404800492e-08, "loss": 0.6229, "step": 31444 }, { "epoch": 0.9637427975971559, "grad_norm": 2.0822818259379052, "learning_rate": 3.443421143066783e-08, "loss": 0.5869, "step": 31445 }, { "epoch": 0.963773446119897, "grad_norm": 2.1018895405664026, "learning_rate": 3.437608774032153e-08, "loss": 0.5487, "step": 31446 }, { "epoch": 0.9638040946426383, "grad_norm": 1.9101729908505163, "learning_rate": 3.431801297753778e-08, "loss": 0.4893, "step": 31447 }, { "epoch": 0.9638347431653794, "grad_norm": 1.801931892415407, "learning_rate": 3.4259987142888916e-08, "loss": 0.5397, "step": 31448 }, { "epoch": 0.9638653916881207, "grad_norm": 1.6830435267278268, "learning_rate": 3.420201023694725e-08, "loss": 0.4871, "step": 31449 }, { "epoch": 0.9638960402108618, "grad_norm": 2.1012958416488563, "learning_rate": 3.4144082260283984e-08, "loss": 0.6854, "step": 31450 }, { "epoch": 0.963926688733603, "grad_norm": 1.8811639296980769, "learning_rate": 3.4086203213469224e-08, "loss": 0.5554, "step": 31451 }, { "epoch": 0.9639573372563442, "grad_norm": 1.7727879323775038, "learning_rate": 3.402837309707363e-08, "loss": 0.5913, "step": 31452 }, { "epoch": 0.9639879857790854, "grad_norm": 1.9616452624981282, "learning_rate": 3.39705919116673e-08, "loss": 0.6139, "step": 31453 }, { "epoch": 0.9640186343018267, "grad_norm": 0.8020516865176345, "learning_rate": 3.391285965781976e-08, "loss": 0.379, "step": 31454 }, { "epoch": 0.9640492828245678, "grad_norm": 1.9432139612013453, "learning_rate": 3.385517633609892e-08, "loss": 0.5702, "step": 31455 }, { "epoch": 0.9640799313473091, "grad_norm": 1.793000416061456, "learning_rate": 3.3797541947073744e-08, "loss": 0.4684, "step": 31456 }, { "epoch": 0.9641105798700502, "grad_norm": 1.7302366745880435, "learning_rate": 3.3739956491311564e-08, "loss": 0.527, "step": 31457 }, { "epoch": 0.9641412283927915, "grad_norm": 2.195405306877065, "learning_rate": 3.368241996938137e-08, "loss": 0.593, "step": 31458 }, { "epoch": 0.9641718769155326, "grad_norm": 1.963633648728703, "learning_rate": 3.362493238184827e-08, "loss": 0.5088, "step": 31459 }, { "epoch": 0.9642025254382739, "grad_norm": 0.7511786265746598, "learning_rate": 3.356749372927903e-08, "loss": 0.3749, "step": 31460 }, { "epoch": 0.964233173961015, "grad_norm": 1.6624959269529287, "learning_rate": 3.3510104012240976e-08, "loss": 0.5062, "step": 31461 }, { "epoch": 0.9642638224837563, "grad_norm": 1.8991247675099978, "learning_rate": 3.345276323129809e-08, "loss": 0.5524, "step": 31462 }, { "epoch": 0.9642944710064975, "grad_norm": 1.883329035236024, "learning_rate": 3.3395471387015485e-08, "loss": 0.5576, "step": 31463 }, { "epoch": 0.9643251195292387, "grad_norm": 1.8533434403528388, "learning_rate": 3.333822847995882e-08, "loss": 0.6314, "step": 31464 }, { "epoch": 0.9643557680519799, "grad_norm": 2.0146740301687234, "learning_rate": 3.3281034510690977e-08, "loss": 0.5609, "step": 31465 }, { "epoch": 0.9643864165747211, "grad_norm": 1.6536162082268724, "learning_rate": 3.322388947977595e-08, "loss": 0.4922, "step": 31466 }, { "epoch": 0.9644170650974623, "grad_norm": 1.9549016515219781, "learning_rate": 3.316679338777662e-08, "loss": 0.6063, "step": 31467 }, { "epoch": 0.9644477136202035, "grad_norm": 2.0433809811011328, "learning_rate": 3.3109746235255316e-08, "loss": 0.5326, "step": 31468 }, { "epoch": 0.9644783621429447, "grad_norm": 1.8689639905442827, "learning_rate": 3.305274802277547e-08, "loss": 0.4561, "step": 31469 }, { "epoch": 0.964509010665686, "grad_norm": 1.840285936889023, "learning_rate": 3.2995798750897204e-08, "loss": 0.5553, "step": 31470 }, { "epoch": 0.9645396591884271, "grad_norm": 1.7233999644454332, "learning_rate": 3.2938898420182276e-08, "loss": 0.5522, "step": 31471 }, { "epoch": 0.9645703077111684, "grad_norm": 1.8571551987946575, "learning_rate": 3.2882047031191354e-08, "loss": 0.6392, "step": 31472 }, { "epoch": 0.9646009562339095, "grad_norm": 1.912111613840711, "learning_rate": 3.282524458448455e-08, "loss": 0.6033, "step": 31473 }, { "epoch": 0.9646316047566508, "grad_norm": 2.3782972051589675, "learning_rate": 3.2768491080620854e-08, "loss": 0.6618, "step": 31474 }, { "epoch": 0.9646622532793919, "grad_norm": 1.8595228902115581, "learning_rate": 3.2711786520160934e-08, "loss": 0.4713, "step": 31475 }, { "epoch": 0.9646929018021332, "grad_norm": 1.9903870736246856, "learning_rate": 3.265513090366213e-08, "loss": 0.6249, "step": 31476 }, { "epoch": 0.9647235503248743, "grad_norm": 1.8136506524618192, "learning_rate": 3.259852423168397e-08, "loss": 0.6443, "step": 31477 }, { "epoch": 0.9647541988476156, "grad_norm": 2.0352278345350414, "learning_rate": 3.25419665047827e-08, "loss": 0.6252, "step": 31478 }, { "epoch": 0.9647848473703567, "grad_norm": 1.944744638898108, "learning_rate": 3.248545772351674e-08, "loss": 0.6973, "step": 31479 }, { "epoch": 0.964815495893098, "grad_norm": 1.8557763947829817, "learning_rate": 3.2428997888442894e-08, "loss": 0.5881, "step": 31480 }, { "epoch": 0.9648461444158392, "grad_norm": 0.8025826487879908, "learning_rate": 3.237258700011736e-08, "loss": 0.3931, "step": 31481 }, { "epoch": 0.9648767929385803, "grad_norm": 2.0833383721960983, "learning_rate": 3.231622505909471e-08, "loss": 0.6536, "step": 31482 }, { "epoch": 0.9649074414613216, "grad_norm": 1.8993911244464958, "learning_rate": 3.225991206593226e-08, "loss": 0.493, "step": 31483 }, { "epoch": 0.9649380899840627, "grad_norm": 1.8705753133149183, "learning_rate": 3.2203648021183475e-08, "loss": 0.5247, "step": 31484 }, { "epoch": 0.964968738506804, "grad_norm": 2.1688802689208715, "learning_rate": 3.2147432925403455e-08, "loss": 0.5638, "step": 31485 }, { "epoch": 0.9649993870295451, "grad_norm": 2.0512445025570982, "learning_rate": 3.2091266779145643e-08, "loss": 0.5967, "step": 31486 }, { "epoch": 0.9650300355522864, "grad_norm": 2.0191953910872082, "learning_rate": 3.20351495829635e-08, "loss": 0.6348, "step": 31487 }, { "epoch": 0.9650606840750275, "grad_norm": 1.7919074832627273, "learning_rate": 3.1979081337410453e-08, "loss": 0.5352, "step": 31488 }, { "epoch": 0.9650913325977688, "grad_norm": 1.786741061577768, "learning_rate": 3.1923062043038856e-08, "loss": 0.5569, "step": 31489 }, { "epoch": 0.96512198112051, "grad_norm": 1.8587067864187647, "learning_rate": 3.186709170039992e-08, "loss": 0.5685, "step": 31490 }, { "epoch": 0.9651526296432512, "grad_norm": 0.8419494042591912, "learning_rate": 3.181117031004599e-08, "loss": 0.4008, "step": 31491 }, { "epoch": 0.9651832781659924, "grad_norm": 2.05710878214068, "learning_rate": 3.175529787252773e-08, "loss": 0.4057, "step": 31492 }, { "epoch": 0.9652139266887336, "grad_norm": 2.087259204636892, "learning_rate": 3.1699474388395824e-08, "loss": 0.5518, "step": 31493 }, { "epoch": 0.9652445752114748, "grad_norm": 1.8704585413734467, "learning_rate": 3.1643699858199815e-08, "loss": 0.5172, "step": 31494 }, { "epoch": 0.965275223734216, "grad_norm": 2.210152829977733, "learning_rate": 3.158797428248983e-08, "loss": 0.5757, "step": 31495 }, { "epoch": 0.9653058722569572, "grad_norm": 2.2296640095567803, "learning_rate": 3.153229766181487e-08, "loss": 0.5385, "step": 31496 }, { "epoch": 0.9653365207796984, "grad_norm": 1.746863381355645, "learning_rate": 3.1476669996723384e-08, "loss": 0.5387, "step": 31497 }, { "epoch": 0.9653671693024396, "grad_norm": 1.8888872556623622, "learning_rate": 3.1421091287763826e-08, "loss": 0.5286, "step": 31498 }, { "epoch": 0.9653978178251809, "grad_norm": 2.2515643142840815, "learning_rate": 3.136556153548298e-08, "loss": 0.6445, "step": 31499 }, { "epoch": 0.965428466347922, "grad_norm": 2.226626635908187, "learning_rate": 3.131008074042874e-08, "loss": 0.7042, "step": 31500 }, { "epoch": 0.9654591148706633, "grad_norm": 1.7230995443892625, "learning_rate": 3.1254648903147336e-08, "loss": 0.5395, "step": 31501 }, { "epoch": 0.9654897633934044, "grad_norm": 1.8071512030693104, "learning_rate": 3.119926602418555e-08, "loss": 0.4945, "step": 31502 }, { "epoch": 0.9655204119161457, "grad_norm": 1.8499991345063, "learning_rate": 3.1143932104088506e-08, "loss": 0.6439, "step": 31503 }, { "epoch": 0.9655510604388868, "grad_norm": 1.771986545769202, "learning_rate": 3.1088647143401875e-08, "loss": 0.5124, "step": 31504 }, { "epoch": 0.9655817089616281, "grad_norm": 0.7873203592981427, "learning_rate": 3.1033411142670225e-08, "loss": 0.4021, "step": 31505 }, { "epoch": 0.9656123574843692, "grad_norm": 1.8981068610346783, "learning_rate": 3.097822410243756e-08, "loss": 0.4992, "step": 31506 }, { "epoch": 0.9656430060071105, "grad_norm": 0.8311488735732468, "learning_rate": 3.09230860232479e-08, "loss": 0.4108, "step": 31507 }, { "epoch": 0.9656736545298517, "grad_norm": 0.8131236082247367, "learning_rate": 3.086799690564413e-08, "loss": 0.3954, "step": 31508 }, { "epoch": 0.9657043030525929, "grad_norm": 2.2049567276788014, "learning_rate": 3.081295675016971e-08, "loss": 0.5567, "step": 31509 }, { "epoch": 0.9657349515753341, "grad_norm": 1.8323119957602467, "learning_rate": 3.075796555736643e-08, "loss": 0.5465, "step": 31510 }, { "epoch": 0.9657656000980753, "grad_norm": 0.7988874350354227, "learning_rate": 3.0703023327776635e-08, "loss": 0.4236, "step": 31511 }, { "epoch": 0.9657962486208165, "grad_norm": 2.2248711491409243, "learning_rate": 3.0648130061941004e-08, "loss": 0.524, "step": 31512 }, { "epoch": 0.9658268971435576, "grad_norm": 2.019258513819308, "learning_rate": 3.0593285760401325e-08, "loss": 0.5927, "step": 31513 }, { "epoch": 0.9658575456662989, "grad_norm": 0.7644366190025282, "learning_rate": 3.0538490423697166e-08, "loss": 0.3923, "step": 31514 }, { "epoch": 0.96588819418904, "grad_norm": 0.809611134896956, "learning_rate": 3.048374405236865e-08, "loss": 0.4222, "step": 31515 }, { "epoch": 0.9659188427117813, "grad_norm": 1.9460656281031075, "learning_rate": 3.042904664695534e-08, "loss": 0.5497, "step": 31516 }, { "epoch": 0.9659494912345225, "grad_norm": 1.7321151796738758, "learning_rate": 3.037439820799626e-08, "loss": 0.5047, "step": 31517 }, { "epoch": 0.9659801397572637, "grad_norm": 1.6890862766954806, "learning_rate": 3.031979873602986e-08, "loss": 0.6549, "step": 31518 }, { "epoch": 0.9660107882800049, "grad_norm": 1.6898333484141275, "learning_rate": 3.026524823159405e-08, "loss": 0.5171, "step": 31519 }, { "epoch": 0.9660414368027461, "grad_norm": 1.9195848369368587, "learning_rate": 3.021074669522617e-08, "loss": 0.5802, "step": 31520 }, { "epoch": 0.9660720853254873, "grad_norm": 2.1072586489430853, "learning_rate": 3.015629412746357e-08, "loss": 0.6036, "step": 31521 }, { "epoch": 0.9661027338482285, "grad_norm": 1.932350400558947, "learning_rate": 3.010189052884249e-08, "loss": 0.6393, "step": 31522 }, { "epoch": 0.9661333823709697, "grad_norm": 0.8075014291574936, "learning_rate": 3.0047535899898616e-08, "loss": 0.3839, "step": 31523 }, { "epoch": 0.966164030893711, "grad_norm": 1.5783472266829515, "learning_rate": 2.999323024116873e-08, "loss": 0.4415, "step": 31524 }, { "epoch": 0.9661946794164521, "grad_norm": 0.8307456000120635, "learning_rate": 2.993897355318687e-08, "loss": 0.4243, "step": 31525 }, { "epoch": 0.9662253279391934, "grad_norm": 1.8621649029413514, "learning_rate": 2.988476583648814e-08, "loss": 0.4708, "step": 31526 }, { "epoch": 0.9662559764619345, "grad_norm": 2.0206422263819883, "learning_rate": 2.983060709160601e-08, "loss": 0.5837, "step": 31527 }, { "epoch": 0.9662866249846758, "grad_norm": 0.8262147597058059, "learning_rate": 2.9776497319074503e-08, "loss": 0.3881, "step": 31528 }, { "epoch": 0.9663172735074169, "grad_norm": 1.8630029987451344, "learning_rate": 2.9722436519427632e-08, "loss": 0.559, "step": 31529 }, { "epoch": 0.9663479220301582, "grad_norm": 1.885153536611853, "learning_rate": 2.966842469319664e-08, "loss": 0.6325, "step": 31530 }, { "epoch": 0.9663785705528993, "grad_norm": 2.0252969344362683, "learning_rate": 2.9614461840914432e-08, "loss": 0.5642, "step": 31531 }, { "epoch": 0.9664092190756406, "grad_norm": 1.9974182153851014, "learning_rate": 2.9560547963112808e-08, "loss": 0.6199, "step": 31532 }, { "epoch": 0.9664398675983817, "grad_norm": 1.7844961115310733, "learning_rate": 2.9506683060323005e-08, "loss": 0.5003, "step": 31533 }, { "epoch": 0.966470516121123, "grad_norm": 2.333120996702714, "learning_rate": 2.945286713307516e-08, "loss": 0.6293, "step": 31534 }, { "epoch": 0.9665011646438642, "grad_norm": 1.8313614552900286, "learning_rate": 2.9399100181900507e-08, "loss": 0.6094, "step": 31535 }, { "epoch": 0.9665318131666054, "grad_norm": 1.653855321707302, "learning_rate": 2.9345382207328078e-08, "loss": 0.4515, "step": 31536 }, { "epoch": 0.9665624616893466, "grad_norm": 1.991531513402732, "learning_rate": 2.9291713209887442e-08, "loss": 0.5547, "step": 31537 }, { "epoch": 0.9665931102120878, "grad_norm": 1.7426987151336524, "learning_rate": 2.9238093190107066e-08, "loss": 0.4717, "step": 31538 }, { "epoch": 0.966623758734829, "grad_norm": 1.8871132478779173, "learning_rate": 2.918452214851597e-08, "loss": 0.5521, "step": 31539 }, { "epoch": 0.9666544072575702, "grad_norm": 0.8392348646909842, "learning_rate": 2.9131000085641514e-08, "loss": 0.4188, "step": 31540 }, { "epoch": 0.9666850557803114, "grad_norm": 2.0565494094375047, "learning_rate": 2.9077527002011054e-08, "loss": 0.6424, "step": 31541 }, { "epoch": 0.9667157043030526, "grad_norm": 1.7712717260646573, "learning_rate": 2.902410289815194e-08, "loss": 0.5688, "step": 31542 }, { "epoch": 0.9667463528257938, "grad_norm": 1.6266064820825097, "learning_rate": 2.897072777458987e-08, "loss": 0.5016, "step": 31543 }, { "epoch": 0.966777001348535, "grad_norm": 0.8094435363072211, "learning_rate": 2.891740163185108e-08, "loss": 0.399, "step": 31544 }, { "epoch": 0.9668076498712762, "grad_norm": 0.7987073205218556, "learning_rate": 2.8864124470461276e-08, "loss": 0.4024, "step": 31545 }, { "epoch": 0.9668382983940174, "grad_norm": 0.8135651587143096, "learning_rate": 2.8810896290945023e-08, "loss": 0.4091, "step": 31546 }, { "epoch": 0.9668689469167586, "grad_norm": 0.8030438085715187, "learning_rate": 2.875771709382691e-08, "loss": 0.3805, "step": 31547 }, { "epoch": 0.9668995954394998, "grad_norm": 0.8721169579245636, "learning_rate": 2.870458687963096e-08, "loss": 0.39, "step": 31548 }, { "epoch": 0.966930243962241, "grad_norm": 1.8260584573140666, "learning_rate": 2.865150564888064e-08, "loss": 0.5887, "step": 31549 }, { "epoch": 0.9669608924849822, "grad_norm": 1.9531791169278143, "learning_rate": 2.859847340209887e-08, "loss": 0.4634, "step": 31550 }, { "epoch": 0.9669915410077234, "grad_norm": 1.9889724526184527, "learning_rate": 2.8545490139809117e-08, "loss": 0.616, "step": 31551 }, { "epoch": 0.9670221895304646, "grad_norm": 0.7734974911137978, "learning_rate": 2.8492555862531522e-08, "loss": 0.383, "step": 31552 }, { "epoch": 0.9670528380532059, "grad_norm": 1.9427266347098229, "learning_rate": 2.843967057078956e-08, "loss": 0.5821, "step": 31553 }, { "epoch": 0.967083486575947, "grad_norm": 2.0666172013389583, "learning_rate": 2.8386834265103357e-08, "loss": 0.5804, "step": 31554 }, { "epoch": 0.9671141350986883, "grad_norm": 0.7936356420837691, "learning_rate": 2.8334046945993622e-08, "loss": 0.3851, "step": 31555 }, { "epoch": 0.9671447836214294, "grad_norm": 2.1267796570215616, "learning_rate": 2.8281308613980483e-08, "loss": 0.6006, "step": 31556 }, { "epoch": 0.9671754321441707, "grad_norm": 0.7907657036774621, "learning_rate": 2.822861926958409e-08, "loss": 0.391, "step": 31557 }, { "epoch": 0.9672060806669118, "grad_norm": 1.7669498671253923, "learning_rate": 2.817597891332291e-08, "loss": 0.5092, "step": 31558 }, { "epoch": 0.9672367291896531, "grad_norm": 0.8124743347486859, "learning_rate": 2.8123387545715975e-08, "loss": 0.4137, "step": 31559 }, { "epoch": 0.9672673777123942, "grad_norm": 1.6792816467315586, "learning_rate": 2.8070845167281202e-08, "loss": 0.5405, "step": 31560 }, { "epoch": 0.9672980262351355, "grad_norm": 1.7214963053365138, "learning_rate": 2.8018351778537068e-08, "loss": 0.5658, "step": 31561 }, { "epoch": 0.9673286747578766, "grad_norm": 2.0404048021591805, "learning_rate": 2.7965907379999822e-08, "loss": 0.6175, "step": 31562 }, { "epoch": 0.9673593232806179, "grad_norm": 1.5210486125090268, "learning_rate": 2.7913511972186834e-08, "loss": 0.4547, "step": 31563 }, { "epoch": 0.9673899718033591, "grad_norm": 1.8149173491241795, "learning_rate": 2.78611655556138e-08, "loss": 0.5697, "step": 31564 }, { "epoch": 0.9674206203261003, "grad_norm": 1.9236009015268865, "learning_rate": 2.7808868130796974e-08, "loss": 0.587, "step": 31565 }, { "epoch": 0.9674512688488415, "grad_norm": 1.8116045115297836, "learning_rate": 2.7756619698252053e-08, "loss": 0.6425, "step": 31566 }, { "epoch": 0.9674819173715827, "grad_norm": 0.8179684873016287, "learning_rate": 2.7704420258492515e-08, "loss": 0.4154, "step": 31567 }, { "epoch": 0.9675125658943239, "grad_norm": 1.8707799628171031, "learning_rate": 2.765226981203406e-08, "loss": 0.4619, "step": 31568 }, { "epoch": 0.9675432144170651, "grad_norm": 2.128134206845601, "learning_rate": 2.7600168359390166e-08, "loss": 0.5838, "step": 31569 }, { "epoch": 0.9675738629398063, "grad_norm": 1.9034384309839298, "learning_rate": 2.7548115901074314e-08, "loss": 0.6018, "step": 31570 }, { "epoch": 0.9676045114625476, "grad_norm": 1.9743833559999289, "learning_rate": 2.749611243759831e-08, "loss": 0.5449, "step": 31571 }, { "epoch": 0.9676351599852887, "grad_norm": 1.860885440589818, "learning_rate": 2.7444157969475637e-08, "loss": 0.5967, "step": 31572 }, { "epoch": 0.96766580850803, "grad_norm": 2.0401800738120826, "learning_rate": 2.7392252497218662e-08, "loss": 0.6248, "step": 31573 }, { "epoch": 0.9676964570307711, "grad_norm": 0.7721756719446466, "learning_rate": 2.7340396021336977e-08, "loss": 0.3777, "step": 31574 }, { "epoch": 0.9677271055535123, "grad_norm": 2.0788969797371526, "learning_rate": 2.7288588542343508e-08, "loss": 0.5388, "step": 31575 }, { "epoch": 0.9677577540762535, "grad_norm": 1.9285214031131392, "learning_rate": 2.7236830060747287e-08, "loss": 0.5883, "step": 31576 }, { "epoch": 0.9677884025989947, "grad_norm": 0.8284953580565892, "learning_rate": 2.7185120577059575e-08, "loss": 0.4236, "step": 31577 }, { "epoch": 0.9678190511217359, "grad_norm": 1.681926057469386, "learning_rate": 2.713346009178941e-08, "loss": 0.5524, "step": 31578 }, { "epoch": 0.9678496996444771, "grad_norm": 1.8521961149955235, "learning_rate": 2.7081848605444716e-08, "loss": 0.4688, "step": 31579 }, { "epoch": 0.9678803481672184, "grad_norm": 0.8368240775432254, "learning_rate": 2.7030286118535643e-08, "loss": 0.3657, "step": 31580 }, { "epoch": 0.9679109966899595, "grad_norm": 2.010674970793177, "learning_rate": 2.6978772631569007e-08, "loss": 0.5923, "step": 31581 }, { "epoch": 0.9679416452127008, "grad_norm": 1.9196006807777954, "learning_rate": 2.692730814505329e-08, "loss": 0.5877, "step": 31582 }, { "epoch": 0.9679722937354419, "grad_norm": 2.177545720301933, "learning_rate": 2.687589265949475e-08, "loss": 0.5874, "step": 31583 }, { "epoch": 0.9680029422581832, "grad_norm": 1.833086639477431, "learning_rate": 2.682452617540077e-08, "loss": 0.5536, "step": 31584 }, { "epoch": 0.9680335907809243, "grad_norm": 1.8041853226954854, "learning_rate": 2.6773208693277595e-08, "loss": 0.4958, "step": 31585 }, { "epoch": 0.9680642393036656, "grad_norm": 2.136535195561452, "learning_rate": 2.672194021362984e-08, "loss": 0.6308, "step": 31586 }, { "epoch": 0.9680948878264067, "grad_norm": 2.0201420407193957, "learning_rate": 2.667072073696375e-08, "loss": 0.6036, "step": 31587 }, { "epoch": 0.968125536349148, "grad_norm": 1.9495336060479784, "learning_rate": 2.661955026378338e-08, "loss": 0.5908, "step": 31588 }, { "epoch": 0.9681561848718891, "grad_norm": 2.0573350943642454, "learning_rate": 2.6568428794593314e-08, "loss": 0.6136, "step": 31589 }, { "epoch": 0.9681868333946304, "grad_norm": 1.7870775651935704, "learning_rate": 2.6517356329896492e-08, "loss": 0.5441, "step": 31590 }, { "epoch": 0.9682174819173716, "grad_norm": 2.1764059653391437, "learning_rate": 2.6466332870196953e-08, "loss": 0.5716, "step": 31591 }, { "epoch": 0.9682481304401128, "grad_norm": 1.8836563021164112, "learning_rate": 2.641535841599707e-08, "loss": 0.6241, "step": 31592 }, { "epoch": 0.968278778962854, "grad_norm": 2.168269810271015, "learning_rate": 2.6364432967799225e-08, "loss": 0.6342, "step": 31593 }, { "epoch": 0.9683094274855952, "grad_norm": 1.7209610432009081, "learning_rate": 2.6313556526105234e-08, "loss": 0.5974, "step": 31594 }, { "epoch": 0.9683400760083364, "grad_norm": 1.9401466440249302, "learning_rate": 2.626272909141636e-08, "loss": 0.5811, "step": 31595 }, { "epoch": 0.9683707245310776, "grad_norm": 2.146911104157268, "learning_rate": 2.6211950664233322e-08, "loss": 0.5926, "step": 31596 }, { "epoch": 0.9684013730538188, "grad_norm": 1.8544011831448521, "learning_rate": 2.6161221245056266e-08, "loss": 0.5665, "step": 31597 }, { "epoch": 0.96843202157656, "grad_norm": 0.7952085673164468, "learning_rate": 2.6110540834385912e-08, "loss": 0.4088, "step": 31598 }, { "epoch": 0.9684626700993012, "grad_norm": 0.7774524931783084, "learning_rate": 2.605990943272074e-08, "loss": 0.3848, "step": 31599 }, { "epoch": 0.9684933186220425, "grad_norm": 0.7950984614467173, "learning_rate": 2.600932704056036e-08, "loss": 0.3869, "step": 31600 }, { "epoch": 0.9685239671447836, "grad_norm": 1.7432035296464168, "learning_rate": 2.5958793658401593e-08, "loss": 0.5232, "step": 31601 }, { "epoch": 0.9685546156675249, "grad_norm": 1.9130226069527698, "learning_rate": 2.5908309286744593e-08, "loss": 0.5639, "step": 31602 }, { "epoch": 0.968585264190266, "grad_norm": 2.045994289937715, "learning_rate": 2.5857873926085075e-08, "loss": 0.5727, "step": 31603 }, { "epoch": 0.9686159127130073, "grad_norm": 1.7753027882791397, "learning_rate": 2.5807487576920977e-08, "loss": 0.529, "step": 31604 }, { "epoch": 0.9686465612357484, "grad_norm": 1.8507580777005297, "learning_rate": 2.575715023974801e-08, "loss": 0.5042, "step": 31605 }, { "epoch": 0.9686772097584896, "grad_norm": 1.857346060947962, "learning_rate": 2.5706861915062996e-08, "loss": 0.5389, "step": 31606 }, { "epoch": 0.9687078582812308, "grad_norm": 0.8060492107758652, "learning_rate": 2.5656622603361104e-08, "loss": 0.3961, "step": 31607 }, { "epoch": 0.968738506803972, "grad_norm": 2.4581690419325426, "learning_rate": 2.5606432305136376e-08, "loss": 0.5643, "step": 31608 }, { "epoch": 0.9687691553267133, "grad_norm": 1.7237774715453524, "learning_rate": 2.5556291020885084e-08, "loss": 0.5701, "step": 31609 }, { "epoch": 0.9687998038494544, "grad_norm": 0.7585619091492547, "learning_rate": 2.5506198751100163e-08, "loss": 0.3811, "step": 31610 }, { "epoch": 0.9688304523721957, "grad_norm": 1.7846880920330814, "learning_rate": 2.5456155496275114e-08, "loss": 0.5246, "step": 31611 }, { "epoch": 0.9688611008949368, "grad_norm": 1.9667818983397316, "learning_rate": 2.5406161256903982e-08, "loss": 0.547, "step": 31612 }, { "epoch": 0.9688917494176781, "grad_norm": 2.140182203850835, "learning_rate": 2.5356216033478598e-08, "loss": 0.5637, "step": 31613 }, { "epoch": 0.9689223979404192, "grad_norm": 1.9446237340186572, "learning_rate": 2.5306319826490788e-08, "loss": 0.5682, "step": 31614 }, { "epoch": 0.9689530464631605, "grad_norm": 0.7961103698176147, "learning_rate": 2.5256472636432938e-08, "loss": 0.4051, "step": 31615 }, { "epoch": 0.9689836949859016, "grad_norm": 2.1382915113686045, "learning_rate": 2.5206674463795766e-08, "loss": 0.6486, "step": 31616 }, { "epoch": 0.9690143435086429, "grad_norm": 1.9737141860612677, "learning_rate": 2.5156925309070544e-08, "loss": 0.4684, "step": 31617 }, { "epoch": 0.9690449920313841, "grad_norm": 1.813522577989904, "learning_rate": 2.510722517274633e-08, "loss": 0.6172, "step": 31618 }, { "epoch": 0.9690756405541253, "grad_norm": 2.078618026637253, "learning_rate": 2.5057574055313837e-08, "loss": 0.4721, "step": 31619 }, { "epoch": 0.9691062890768665, "grad_norm": 2.3734786858625596, "learning_rate": 2.500797195726212e-08, "loss": 0.6571, "step": 31620 }, { "epoch": 0.9691369375996077, "grad_norm": 1.8868400678756356, "learning_rate": 2.4958418879079683e-08, "loss": 0.5635, "step": 31621 }, { "epoch": 0.9691675861223489, "grad_norm": 2.041456042880567, "learning_rate": 2.4908914821255016e-08, "loss": 0.6543, "step": 31622 }, { "epoch": 0.9691982346450901, "grad_norm": 2.0257488825910133, "learning_rate": 2.4859459784275507e-08, "loss": 0.5565, "step": 31623 }, { "epoch": 0.9692288831678313, "grad_norm": 1.9673505179103024, "learning_rate": 2.481005376862855e-08, "loss": 0.558, "step": 31624 }, { "epoch": 0.9692595316905726, "grad_norm": 1.9528104547821317, "learning_rate": 2.4760696774800973e-08, "loss": 0.5521, "step": 31625 }, { "epoch": 0.9692901802133137, "grad_norm": 1.8059067267622644, "learning_rate": 2.4711388803279613e-08, "loss": 0.5354, "step": 31626 }, { "epoch": 0.969320828736055, "grad_norm": 1.899405594437495, "learning_rate": 2.466212985454963e-08, "loss": 0.4698, "step": 31627 }, { "epoch": 0.9693514772587961, "grad_norm": 0.8239650137708713, "learning_rate": 2.4612919929096756e-08, "loss": 0.3816, "step": 31628 }, { "epoch": 0.9693821257815374, "grad_norm": 2.012226812203538, "learning_rate": 2.4563759027406155e-08, "loss": 0.6307, "step": 31629 }, { "epoch": 0.9694127743042785, "grad_norm": 0.8067682354016605, "learning_rate": 2.4514647149961878e-08, "loss": 0.3905, "step": 31630 }, { "epoch": 0.9694434228270198, "grad_norm": 1.941320602695435, "learning_rate": 2.4465584297247434e-08, "loss": 0.6443, "step": 31631 }, { "epoch": 0.9694740713497609, "grad_norm": 1.879302653030023, "learning_rate": 2.4416570469746326e-08, "loss": 0.5631, "step": 31632 }, { "epoch": 0.9695047198725022, "grad_norm": 2.067333068556759, "learning_rate": 2.4367605667942607e-08, "loss": 0.521, "step": 31633 }, { "epoch": 0.9695353683952433, "grad_norm": 1.8825654544028845, "learning_rate": 2.431868989231756e-08, "loss": 0.5409, "step": 31634 }, { "epoch": 0.9695660169179846, "grad_norm": 2.014735684058113, "learning_rate": 2.4269823143353578e-08, "loss": 0.6611, "step": 31635 }, { "epoch": 0.9695966654407258, "grad_norm": 1.9607213825235612, "learning_rate": 2.422100542153194e-08, "loss": 0.661, "step": 31636 }, { "epoch": 0.9696273139634669, "grad_norm": 1.7204906138446312, "learning_rate": 2.4172236727333933e-08, "loss": 0.5698, "step": 31637 }, { "epoch": 0.9696579624862082, "grad_norm": 1.88744793789759, "learning_rate": 2.412351706123972e-08, "loss": 0.5271, "step": 31638 }, { "epoch": 0.9696886110089493, "grad_norm": 1.7055463717378576, "learning_rate": 2.4074846423730035e-08, "loss": 0.5721, "step": 31639 }, { "epoch": 0.9697192595316906, "grad_norm": 1.7352670595475561, "learning_rate": 2.4026224815283937e-08, "loss": 0.4741, "step": 31640 }, { "epoch": 0.9697499080544317, "grad_norm": 1.946082943573191, "learning_rate": 2.3977652236380488e-08, "loss": 0.6292, "step": 31641 }, { "epoch": 0.969780556577173, "grad_norm": 2.124939488767779, "learning_rate": 2.3929128687498748e-08, "loss": 0.5298, "step": 31642 }, { "epoch": 0.9698112050999141, "grad_norm": 1.9089508863642957, "learning_rate": 2.3880654169116113e-08, "loss": 0.6607, "step": 31643 }, { "epoch": 0.9698418536226554, "grad_norm": 1.9087751144010536, "learning_rate": 2.3832228681710533e-08, "loss": 0.5251, "step": 31644 }, { "epoch": 0.9698725021453966, "grad_norm": 2.0413881929604467, "learning_rate": 2.378385222575996e-08, "loss": 0.4984, "step": 31645 }, { "epoch": 0.9699031506681378, "grad_norm": 1.8496453294741515, "learning_rate": 2.3735524801739017e-08, "loss": 0.6167, "step": 31646 }, { "epoch": 0.969933799190879, "grad_norm": 1.8547367080603558, "learning_rate": 2.3687246410126207e-08, "loss": 0.5741, "step": 31647 }, { "epoch": 0.9699644477136202, "grad_norm": 2.1301375144675636, "learning_rate": 2.3639017051396152e-08, "loss": 0.6155, "step": 31648 }, { "epoch": 0.9699950962363614, "grad_norm": 1.6593401284646967, "learning_rate": 2.3590836726024024e-08, "loss": 0.4479, "step": 31649 }, { "epoch": 0.9700257447591026, "grad_norm": 1.8275144145911169, "learning_rate": 2.3542705434484448e-08, "loss": 0.5392, "step": 31650 }, { "epoch": 0.9700563932818438, "grad_norm": 1.862592525541383, "learning_rate": 2.3494623177252042e-08, "loss": 0.5408, "step": 31651 }, { "epoch": 0.970087041804585, "grad_norm": 2.247981300084337, "learning_rate": 2.3446589954799757e-08, "loss": 0.5707, "step": 31652 }, { "epoch": 0.9701176903273262, "grad_norm": 1.8675764025341877, "learning_rate": 2.3398605767602224e-08, "loss": 0.598, "step": 31653 }, { "epoch": 0.9701483388500675, "grad_norm": 1.864413533279644, "learning_rate": 2.3350670616131275e-08, "loss": 0.5989, "step": 31654 }, { "epoch": 0.9701789873728086, "grad_norm": 1.8828620310420088, "learning_rate": 2.3302784500859877e-08, "loss": 0.5535, "step": 31655 }, { "epoch": 0.9702096358955499, "grad_norm": 1.7992617370335449, "learning_rate": 2.325494742225931e-08, "loss": 0.5739, "step": 31656 }, { "epoch": 0.970240284418291, "grad_norm": 1.7995026012378927, "learning_rate": 2.320715938080087e-08, "loss": 0.6145, "step": 31657 }, { "epoch": 0.9702709329410323, "grad_norm": 2.0066979568176038, "learning_rate": 2.3159420376955844e-08, "loss": 0.552, "step": 31658 }, { "epoch": 0.9703015814637734, "grad_norm": 2.1036663751456692, "learning_rate": 2.311173041119441e-08, "loss": 0.6001, "step": 31659 }, { "epoch": 0.9703322299865147, "grad_norm": 1.924206883866052, "learning_rate": 2.3064089483986195e-08, "loss": 0.5295, "step": 31660 }, { "epoch": 0.9703628785092558, "grad_norm": 1.9093049545936391, "learning_rate": 2.301649759580138e-08, "loss": 0.6102, "step": 31661 }, { "epoch": 0.9703935270319971, "grad_norm": 0.7563579869699992, "learning_rate": 2.2968954747108474e-08, "loss": 0.3901, "step": 31662 }, { "epoch": 0.9704241755547383, "grad_norm": 2.1021731888142643, "learning_rate": 2.292146093837544e-08, "loss": 0.5406, "step": 31663 }, { "epoch": 0.9704548240774795, "grad_norm": 0.8157244292000065, "learning_rate": 2.287401617007079e-08, "loss": 0.39, "step": 31664 }, { "epoch": 0.9704854726002207, "grad_norm": 2.1435704339886987, "learning_rate": 2.2826620442661927e-08, "loss": 0.6515, "step": 31665 }, { "epoch": 0.9705161211229619, "grad_norm": 0.8415595893255705, "learning_rate": 2.277927375661626e-08, "loss": 0.4067, "step": 31666 }, { "epoch": 0.9705467696457031, "grad_norm": 2.009353300296861, "learning_rate": 2.2731976112399522e-08, "loss": 0.5085, "step": 31667 }, { "epoch": 0.9705774181684442, "grad_norm": 1.956765048453026, "learning_rate": 2.268472751047801e-08, "loss": 0.5031, "step": 31668 }, { "epoch": 0.9706080666911855, "grad_norm": 1.9004518044047338, "learning_rate": 2.2637527951317462e-08, "loss": 0.6494, "step": 31669 }, { "epoch": 0.9706387152139266, "grad_norm": 1.8718155274215154, "learning_rate": 2.2590377435383058e-08, "loss": 0.4551, "step": 31670 }, { "epoch": 0.9706693637366679, "grad_norm": 1.9647110919476636, "learning_rate": 2.2543275963138877e-08, "loss": 0.5332, "step": 31671 }, { "epoch": 0.970700012259409, "grad_norm": 1.794410064971025, "learning_rate": 2.2496223535049544e-08, "loss": 0.572, "step": 31672 }, { "epoch": 0.9707306607821503, "grad_norm": 1.806197453758538, "learning_rate": 2.244922015157802e-08, "loss": 0.5229, "step": 31673 }, { "epoch": 0.9707613093048915, "grad_norm": 0.8233410423819092, "learning_rate": 2.2402265813188938e-08, "loss": 0.3981, "step": 31674 }, { "epoch": 0.9707919578276327, "grad_norm": 1.8795598418787671, "learning_rate": 2.235536052034304e-08, "loss": 0.5741, "step": 31675 }, { "epoch": 0.9708226063503739, "grad_norm": 1.8893215757157407, "learning_rate": 2.2308504273503285e-08, "loss": 0.4962, "step": 31676 }, { "epoch": 0.9708532548731151, "grad_norm": 2.102784819531181, "learning_rate": 2.2261697073132084e-08, "loss": 0.5653, "step": 31677 }, { "epoch": 0.9708839033958563, "grad_norm": 2.114911185831545, "learning_rate": 2.221493891968962e-08, "loss": 0.6348, "step": 31678 }, { "epoch": 0.9709145519185975, "grad_norm": 1.8673771697172117, "learning_rate": 2.2168229813637198e-08, "loss": 0.5806, "step": 31679 }, { "epoch": 0.9709452004413387, "grad_norm": 1.7069869423920643, "learning_rate": 2.2121569755434446e-08, "loss": 0.5265, "step": 31680 }, { "epoch": 0.97097584896408, "grad_norm": 1.859994102248004, "learning_rate": 2.2074958745541553e-08, "loss": 0.5809, "step": 31681 }, { "epoch": 0.9710064974868211, "grad_norm": 1.992877461092978, "learning_rate": 2.202839678441815e-08, "loss": 0.5642, "step": 31682 }, { "epoch": 0.9710371460095624, "grad_norm": 1.8930743345396819, "learning_rate": 2.1981883872522204e-08, "loss": 0.6162, "step": 31683 }, { "epoch": 0.9710677945323035, "grad_norm": 0.8012953527842986, "learning_rate": 2.1935420010312235e-08, "loss": 0.4162, "step": 31684 }, { "epoch": 0.9710984430550448, "grad_norm": 1.83304256105555, "learning_rate": 2.188900519824677e-08, "loss": 0.5822, "step": 31685 }, { "epoch": 0.9711290915777859, "grad_norm": 1.773847865814823, "learning_rate": 2.1842639436782664e-08, "loss": 0.5567, "step": 31686 }, { "epoch": 0.9711597401005272, "grad_norm": 1.9543389258816495, "learning_rate": 2.1796322726376772e-08, "loss": 0.5326, "step": 31687 }, { "epoch": 0.9711903886232683, "grad_norm": 1.930669985047454, "learning_rate": 2.17500550674854e-08, "loss": 0.6054, "step": 31688 }, { "epoch": 0.9712210371460096, "grad_norm": 2.077277109463137, "learning_rate": 2.1703836460564286e-08, "loss": 0.5302, "step": 31689 }, { "epoch": 0.9712516856687508, "grad_norm": 1.9646379998745216, "learning_rate": 2.1657666906069185e-08, "loss": 0.6431, "step": 31690 }, { "epoch": 0.971282334191492, "grad_norm": 1.7570750959405563, "learning_rate": 2.161154640445473e-08, "loss": 0.5562, "step": 31691 }, { "epoch": 0.9713129827142332, "grad_norm": 1.9138223433207835, "learning_rate": 2.1565474956175002e-08, "loss": 0.5984, "step": 31692 }, { "epoch": 0.9713436312369744, "grad_norm": 2.0152320639284205, "learning_rate": 2.1519452561685194e-08, "loss": 0.5813, "step": 31693 }, { "epoch": 0.9713742797597156, "grad_norm": 1.9778368039304324, "learning_rate": 2.147347922143772e-08, "loss": 0.5888, "step": 31694 }, { "epoch": 0.9714049282824568, "grad_norm": 0.7745660225731427, "learning_rate": 2.1427554935886106e-08, "loss": 0.4223, "step": 31695 }, { "epoch": 0.971435576805198, "grad_norm": 2.1313712868152686, "learning_rate": 2.1381679705482217e-08, "loss": 0.4876, "step": 31696 }, { "epoch": 0.9714662253279392, "grad_norm": 1.759046324424533, "learning_rate": 2.133585353067902e-08, "loss": 0.6256, "step": 31697 }, { "epoch": 0.9714968738506804, "grad_norm": 2.1823790504017255, "learning_rate": 2.129007641192671e-08, "loss": 0.604, "step": 31698 }, { "epoch": 0.9715275223734215, "grad_norm": 1.8685044328209695, "learning_rate": 2.12443483496777e-08, "loss": 0.5795, "step": 31699 }, { "epoch": 0.9715581708961628, "grad_norm": 1.9378738013823782, "learning_rate": 2.1198669344382196e-08, "loss": 0.5776, "step": 31700 }, { "epoch": 0.971588819418904, "grad_norm": 1.8131385133098554, "learning_rate": 2.1153039396489274e-08, "loss": 0.5353, "step": 31701 }, { "epoch": 0.9716194679416452, "grad_norm": 1.7955889281519273, "learning_rate": 2.110745850645024e-08, "loss": 0.5609, "step": 31702 }, { "epoch": 0.9716501164643864, "grad_norm": 1.6682649507972669, "learning_rate": 2.1061926674712518e-08, "loss": 0.4489, "step": 31703 }, { "epoch": 0.9716807649871276, "grad_norm": 1.8812673694389672, "learning_rate": 2.10164439017263e-08, "loss": 0.5655, "step": 31704 }, { "epoch": 0.9717114135098688, "grad_norm": 1.8750859760512695, "learning_rate": 2.0971010187938456e-08, "loss": 0.6502, "step": 31705 }, { "epoch": 0.97174206203261, "grad_norm": 1.886794259180863, "learning_rate": 2.092562553379751e-08, "loss": 0.5187, "step": 31706 }, { "epoch": 0.9717727105553512, "grad_norm": 0.8254338680209592, "learning_rate": 2.088028993975033e-08, "loss": 0.4247, "step": 31707 }, { "epoch": 0.9718033590780925, "grad_norm": 1.883009862812064, "learning_rate": 2.0835003406243227e-08, "loss": 0.5501, "step": 31708 }, { "epoch": 0.9718340076008336, "grad_norm": 1.8346855279340497, "learning_rate": 2.078976593372306e-08, "loss": 0.5633, "step": 31709 }, { "epoch": 0.9718646561235749, "grad_norm": 1.861701461056088, "learning_rate": 2.074457752263559e-08, "loss": 0.5902, "step": 31710 }, { "epoch": 0.971895304646316, "grad_norm": 1.8987942830190705, "learning_rate": 2.06994381734249e-08, "loss": 0.513, "step": 31711 }, { "epoch": 0.9719259531690573, "grad_norm": 2.131301755434329, "learning_rate": 2.065434788653786e-08, "loss": 0.5126, "step": 31712 }, { "epoch": 0.9719566016917984, "grad_norm": 1.9778591535418255, "learning_rate": 2.0609306662416896e-08, "loss": 0.5541, "step": 31713 }, { "epoch": 0.9719872502145397, "grad_norm": 2.122538070025594, "learning_rate": 2.0564314501506088e-08, "loss": 0.5538, "step": 31714 }, { "epoch": 0.9720178987372808, "grad_norm": 1.8711682544373478, "learning_rate": 2.051937140425009e-08, "loss": 0.5384, "step": 31715 }, { "epoch": 0.9720485472600221, "grad_norm": 1.8438206765202634, "learning_rate": 2.0474477371090208e-08, "loss": 0.5287, "step": 31716 }, { "epoch": 0.9720791957827633, "grad_norm": 1.813943177732691, "learning_rate": 2.042963240246887e-08, "loss": 0.4996, "step": 31717 }, { "epoch": 0.9721098443055045, "grad_norm": 0.8577991228454601, "learning_rate": 2.0384836498829608e-08, "loss": 0.4252, "step": 31718 }, { "epoch": 0.9721404928282457, "grad_norm": 1.8206755338984966, "learning_rate": 2.0340089660611518e-08, "loss": 0.5397, "step": 31719 }, { "epoch": 0.9721711413509869, "grad_norm": 2.114471621727208, "learning_rate": 2.0295391888257577e-08, "loss": 0.5648, "step": 31720 }, { "epoch": 0.9722017898737281, "grad_norm": 1.880044973212771, "learning_rate": 2.0250743182206877e-08, "loss": 0.6168, "step": 31721 }, { "epoch": 0.9722324383964693, "grad_norm": 2.1708830790809843, "learning_rate": 2.0206143542899625e-08, "loss": 0.5932, "step": 31722 }, { "epoch": 0.9722630869192105, "grad_norm": 1.8447174952425214, "learning_rate": 2.016159297077547e-08, "loss": 0.6131, "step": 31723 }, { "epoch": 0.9722937354419517, "grad_norm": 2.085821798610382, "learning_rate": 2.0117091466272943e-08, "loss": 0.5577, "step": 31724 }, { "epoch": 0.9723243839646929, "grad_norm": 2.0055023348976633, "learning_rate": 2.0072639029831142e-08, "loss": 0.5919, "step": 31725 }, { "epoch": 0.9723550324874342, "grad_norm": 1.734509614026842, "learning_rate": 2.002823566188805e-08, "loss": 0.5273, "step": 31726 }, { "epoch": 0.9723856810101753, "grad_norm": 1.7729707934366776, "learning_rate": 1.9983881362880542e-08, "loss": 0.5461, "step": 31727 }, { "epoch": 0.9724163295329166, "grad_norm": 2.336259662547532, "learning_rate": 1.993957613324604e-08, "loss": 0.5409, "step": 31728 }, { "epoch": 0.9724469780556577, "grad_norm": 0.8156128196305433, "learning_rate": 1.9895319973421423e-08, "loss": 0.3879, "step": 31729 }, { "epoch": 0.9724776265783989, "grad_norm": 1.9857183065014978, "learning_rate": 1.985111288384245e-08, "loss": 0.5216, "step": 31730 }, { "epoch": 0.9725082751011401, "grad_norm": 2.0653140176406795, "learning_rate": 1.9806954864944328e-08, "loss": 0.5534, "step": 31731 }, { "epoch": 0.9725389236238813, "grad_norm": 1.9989358474339634, "learning_rate": 1.976284591716282e-08, "loss": 0.5394, "step": 31732 }, { "epoch": 0.9725695721466225, "grad_norm": 0.8229407761117813, "learning_rate": 1.9718786040932024e-08, "loss": 0.385, "step": 31733 }, { "epoch": 0.9726002206693637, "grad_norm": 0.8629277136071317, "learning_rate": 1.9674775236686595e-08, "loss": 0.398, "step": 31734 }, { "epoch": 0.972630869192105, "grad_norm": 1.9596135714795988, "learning_rate": 1.9630813504859515e-08, "loss": 0.562, "step": 31735 }, { "epoch": 0.9726615177148461, "grad_norm": 1.9012758586017626, "learning_rate": 1.9586900845884327e-08, "loss": 0.5608, "step": 31736 }, { "epoch": 0.9726921662375874, "grad_norm": 2.012064780113119, "learning_rate": 1.954303726019402e-08, "loss": 0.5452, "step": 31737 }, { "epoch": 0.9727228147603285, "grad_norm": 1.9294832815700318, "learning_rate": 1.9499222748219916e-08, "loss": 0.5147, "step": 31738 }, { "epoch": 0.9727534632830698, "grad_norm": 2.0565172155363802, "learning_rate": 1.9455457310394444e-08, "loss": 0.6891, "step": 31739 }, { "epoch": 0.9727841118058109, "grad_norm": 1.8531471024343475, "learning_rate": 1.941174094714948e-08, "loss": 0.5486, "step": 31740 }, { "epoch": 0.9728147603285522, "grad_norm": 1.8162914400325947, "learning_rate": 1.936807365891413e-08, "loss": 0.4807, "step": 31741 }, { "epoch": 0.9728454088512933, "grad_norm": 1.8421662862706534, "learning_rate": 1.9324455446119718e-08, "loss": 0.4853, "step": 31742 }, { "epoch": 0.9728760573740346, "grad_norm": 2.3277750436014872, "learning_rate": 1.928088630919589e-08, "loss": 0.6013, "step": 31743 }, { "epoch": 0.9729067058967757, "grad_norm": 1.9124974815162183, "learning_rate": 1.9237366248571754e-08, "loss": 0.5009, "step": 31744 }, { "epoch": 0.972937354419517, "grad_norm": 0.9087435136442549, "learning_rate": 1.919389526467641e-08, "loss": 0.413, "step": 31745 }, { "epoch": 0.9729680029422582, "grad_norm": 1.8794295787980588, "learning_rate": 1.9150473357937847e-08, "loss": 0.4967, "step": 31746 }, { "epoch": 0.9729986514649994, "grad_norm": 1.7646167314241479, "learning_rate": 1.9107100528784063e-08, "loss": 0.4741, "step": 31747 }, { "epoch": 0.9730292999877406, "grad_norm": 2.0031868811527476, "learning_rate": 1.9063776777642485e-08, "loss": 0.4735, "step": 31748 }, { "epoch": 0.9730599485104818, "grad_norm": 0.840516979562198, "learning_rate": 1.9020502104939996e-08, "loss": 0.4001, "step": 31749 }, { "epoch": 0.973090597033223, "grad_norm": 1.8618730502961784, "learning_rate": 1.8977276511102927e-08, "loss": 0.6095, "step": 31750 }, { "epoch": 0.9731212455559642, "grad_norm": 0.7912796016758243, "learning_rate": 1.8934099996557044e-08, "loss": 0.3996, "step": 31751 }, { "epoch": 0.9731518940787054, "grad_norm": 1.9261470030455556, "learning_rate": 1.8890972561728115e-08, "loss": 0.625, "step": 31752 }, { "epoch": 0.9731825426014467, "grad_norm": 2.29967521633271, "learning_rate": 1.8847894207040806e-08, "loss": 0.6105, "step": 31753 }, { "epoch": 0.9732131911241878, "grad_norm": 2.009422833147141, "learning_rate": 1.8804864932919774e-08, "loss": 0.6359, "step": 31754 }, { "epoch": 0.9732438396469291, "grad_norm": 1.9494433399901892, "learning_rate": 1.8761884739788573e-08, "loss": 0.6363, "step": 31755 }, { "epoch": 0.9732744881696702, "grad_norm": 2.0846405108605617, "learning_rate": 1.8718953628071303e-08, "loss": 0.602, "step": 31756 }, { "epoch": 0.9733051366924115, "grad_norm": 1.8826535696512403, "learning_rate": 1.867607159819096e-08, "loss": 0.5392, "step": 31757 }, { "epoch": 0.9733357852151526, "grad_norm": 2.0353766120304315, "learning_rate": 1.863323865056943e-08, "loss": 0.5258, "step": 31758 }, { "epoch": 0.9733664337378939, "grad_norm": 1.7770506679099247, "learning_rate": 1.8590454785629152e-08, "loss": 0.5677, "step": 31759 }, { "epoch": 0.973397082260635, "grad_norm": 1.8230249052310143, "learning_rate": 1.8547720003792013e-08, "loss": 0.5576, "step": 31760 }, { "epoch": 0.9734277307833762, "grad_norm": 0.8303730231679375, "learning_rate": 1.850503430547823e-08, "loss": 0.3831, "step": 31761 }, { "epoch": 0.9734583793061174, "grad_norm": 2.1031554000366546, "learning_rate": 1.8462397691109135e-08, "loss": 0.5709, "step": 31762 }, { "epoch": 0.9734890278288586, "grad_norm": 1.8264785526734444, "learning_rate": 1.8419810161104946e-08, "loss": 0.5095, "step": 31763 }, { "epoch": 0.9735196763515999, "grad_norm": 1.8099336037944367, "learning_rate": 1.837727171588477e-08, "loss": 0.4803, "step": 31764 }, { "epoch": 0.973550324874341, "grad_norm": 2.1131556471562436, "learning_rate": 1.833478235586772e-08, "loss": 0.6185, "step": 31765 }, { "epoch": 0.9735809733970823, "grad_norm": 0.7854385290125679, "learning_rate": 1.8292342081472346e-08, "loss": 0.3843, "step": 31766 }, { "epoch": 0.9736116219198234, "grad_norm": 1.6901166898078996, "learning_rate": 1.8249950893117762e-08, "loss": 0.5356, "step": 31767 }, { "epoch": 0.9736422704425647, "grad_norm": 1.725879803389061, "learning_rate": 1.8207608791220855e-08, "loss": 0.4863, "step": 31768 }, { "epoch": 0.9736729189653058, "grad_norm": 1.7964150795381162, "learning_rate": 1.8165315776199065e-08, "loss": 0.6052, "step": 31769 }, { "epoch": 0.9737035674880471, "grad_norm": 1.989179043641693, "learning_rate": 1.8123071848469286e-08, "loss": 0.6485, "step": 31770 }, { "epoch": 0.9737342160107882, "grad_norm": 0.7859053513614543, "learning_rate": 1.8080877008447296e-08, "loss": 0.4073, "step": 31771 }, { "epoch": 0.9737648645335295, "grad_norm": 1.8978838739524884, "learning_rate": 1.8038731256549426e-08, "loss": 0.5794, "step": 31772 }, { "epoch": 0.9737955130562707, "grad_norm": 1.9908427872997505, "learning_rate": 1.7996634593189787e-08, "loss": 0.597, "step": 31773 }, { "epoch": 0.9738261615790119, "grad_norm": 1.5967866584919441, "learning_rate": 1.795458701878472e-08, "loss": 0.4952, "step": 31774 }, { "epoch": 0.9738568101017531, "grad_norm": 1.9643371243200334, "learning_rate": 1.7912588533747777e-08, "loss": 0.6437, "step": 31775 }, { "epoch": 0.9738874586244943, "grad_norm": 1.9126675166128417, "learning_rate": 1.787063913849252e-08, "loss": 0.5316, "step": 31776 }, { "epoch": 0.9739181071472355, "grad_norm": 1.9699166702838387, "learning_rate": 1.7828738833433055e-08, "loss": 0.6154, "step": 31777 }, { "epoch": 0.9739487556699767, "grad_norm": 1.9422766872167272, "learning_rate": 1.7786887618981287e-08, "loss": 0.5166, "step": 31778 }, { "epoch": 0.9739794041927179, "grad_norm": 1.8524906869774203, "learning_rate": 1.774508549555021e-08, "loss": 0.6324, "step": 31779 }, { "epoch": 0.9740100527154592, "grad_norm": 1.9705866800271252, "learning_rate": 1.7703332463551714e-08, "loss": 0.544, "step": 31780 }, { "epoch": 0.9740407012382003, "grad_norm": 1.919469413939203, "learning_rate": 1.7661628523397146e-08, "loss": 0.6036, "step": 31781 }, { "epoch": 0.9740713497609416, "grad_norm": 2.0112404659581986, "learning_rate": 1.7619973675496728e-08, "loss": 0.6189, "step": 31782 }, { "epoch": 0.9741019982836827, "grad_norm": 1.914928741094061, "learning_rate": 1.7578367920262352e-08, "loss": 0.5851, "step": 31783 }, { "epoch": 0.974132646806424, "grad_norm": 2.0480402571180076, "learning_rate": 1.7536811258102582e-08, "loss": 0.66, "step": 31784 }, { "epoch": 0.9741632953291651, "grad_norm": 1.7753304945172232, "learning_rate": 1.7495303689427644e-08, "loss": 0.5894, "step": 31785 }, { "epoch": 0.9741939438519064, "grad_norm": 2.197130568063493, "learning_rate": 1.7453845214646102e-08, "loss": 0.6239, "step": 31786 }, { "epoch": 0.9742245923746475, "grad_norm": 1.852973375201662, "learning_rate": 1.741243583416652e-08, "loss": 0.6141, "step": 31787 }, { "epoch": 0.9742552408973888, "grad_norm": 2.117679987920416, "learning_rate": 1.7371075548397454e-08, "loss": 0.5323, "step": 31788 }, { "epoch": 0.97428588942013, "grad_norm": 1.8092498399203631, "learning_rate": 1.7329764357746358e-08, "loss": 0.6021, "step": 31789 }, { "epoch": 0.9743165379428712, "grad_norm": 1.82477653109671, "learning_rate": 1.7288502262619022e-08, "loss": 0.597, "step": 31790 }, { "epoch": 0.9743471864656124, "grad_norm": 2.0647067792794154, "learning_rate": 1.724728926342345e-08, "loss": 0.4964, "step": 31791 }, { "epoch": 0.9743778349883535, "grad_norm": 2.1253228310320376, "learning_rate": 1.7206125360565427e-08, "loss": 0.5912, "step": 31792 }, { "epoch": 0.9744084835110948, "grad_norm": 2.1208712756439936, "learning_rate": 1.716501055445019e-08, "loss": 0.608, "step": 31793 }, { "epoch": 0.9744391320338359, "grad_norm": 2.1255329598103785, "learning_rate": 1.7123944845482963e-08, "loss": 0.5501, "step": 31794 }, { "epoch": 0.9744697805565772, "grad_norm": 0.7969459487385565, "learning_rate": 1.708292823406843e-08, "loss": 0.4001, "step": 31795 }, { "epoch": 0.9745004290793183, "grad_norm": 2.2695723358750906, "learning_rate": 1.7041960720610708e-08, "loss": 0.4782, "step": 31796 }, { "epoch": 0.9745310776020596, "grad_norm": 1.7966942943838466, "learning_rate": 1.700104230551336e-08, "loss": 0.4872, "step": 31797 }, { "epoch": 0.9745617261248007, "grad_norm": 1.7706771841212576, "learning_rate": 1.6960172989179958e-08, "loss": 0.4957, "step": 31798 }, { "epoch": 0.974592374647542, "grad_norm": 1.9538933170741817, "learning_rate": 1.6919352772012954e-08, "loss": 0.6488, "step": 31799 }, { "epoch": 0.9746230231702832, "grad_norm": 2.082111695367197, "learning_rate": 1.687858165441425e-08, "loss": 0.5399, "step": 31800 }, { "epoch": 0.9746536716930244, "grad_norm": 1.7049984028793572, "learning_rate": 1.6837859636786303e-08, "loss": 0.5632, "step": 31801 }, { "epoch": 0.9746843202157656, "grad_norm": 1.8516884406125789, "learning_rate": 1.6797186719529347e-08, "loss": 0.5673, "step": 31802 }, { "epoch": 0.9747149687385068, "grad_norm": 1.8809970896855077, "learning_rate": 1.6756562903045282e-08, "loss": 0.5824, "step": 31803 }, { "epoch": 0.974745617261248, "grad_norm": 1.8563824960230093, "learning_rate": 1.671598818773379e-08, "loss": 0.5857, "step": 31804 }, { "epoch": 0.9747762657839892, "grad_norm": 1.7418916517143348, "learning_rate": 1.667546257399455e-08, "loss": 0.4899, "step": 31805 }, { "epoch": 0.9748069143067304, "grad_norm": 1.9950178961961116, "learning_rate": 1.663498606222669e-08, "loss": 0.5932, "step": 31806 }, { "epoch": 0.9748375628294716, "grad_norm": 0.7902598228087562, "learning_rate": 1.6594558652829884e-08, "loss": 0.3976, "step": 31807 }, { "epoch": 0.9748682113522128, "grad_norm": 1.9626182511875137, "learning_rate": 1.6554180346201597e-08, "loss": 0.5979, "step": 31808 }, { "epoch": 0.9748988598749541, "grad_norm": 2.010177646507929, "learning_rate": 1.6513851142739845e-08, "loss": 0.6174, "step": 31809 }, { "epoch": 0.9749295083976952, "grad_norm": 2.154801809410911, "learning_rate": 1.647357104284264e-08, "loss": 0.5871, "step": 31810 }, { "epoch": 0.9749601569204365, "grad_norm": 1.7801149047409082, "learning_rate": 1.6433340046906334e-08, "loss": 0.5465, "step": 31811 }, { "epoch": 0.9749908054431776, "grad_norm": 0.8311325480361897, "learning_rate": 1.639315815532727e-08, "loss": 0.4187, "step": 31812 }, { "epoch": 0.9750214539659189, "grad_norm": 1.9389854502456616, "learning_rate": 1.635302536850181e-08, "loss": 0.6028, "step": 31813 }, { "epoch": 0.97505210248866, "grad_norm": 1.8734629376714331, "learning_rate": 1.6312941686824622e-08, "loss": 0.5159, "step": 31814 }, { "epoch": 0.9750827510114013, "grad_norm": 1.912781759015803, "learning_rate": 1.6272907110691516e-08, "loss": 0.6172, "step": 31815 }, { "epoch": 0.9751133995341424, "grad_norm": 2.028138023773924, "learning_rate": 1.6232921640497167e-08, "loss": 0.6268, "step": 31816 }, { "epoch": 0.9751440480568837, "grad_norm": 2.3696234188935157, "learning_rate": 1.6192985276634042e-08, "loss": 0.624, "step": 31817 }, { "epoch": 0.9751746965796249, "grad_norm": 0.838105902149964, "learning_rate": 1.6153098019496826e-08, "loss": 0.3973, "step": 31818 }, { "epoch": 0.9752053451023661, "grad_norm": 1.7749581638307665, "learning_rate": 1.6113259869478536e-08, "loss": 0.565, "step": 31819 }, { "epoch": 0.9752359936251073, "grad_norm": 1.609906522951461, "learning_rate": 1.607347082697164e-08, "loss": 0.476, "step": 31820 }, { "epoch": 0.9752666421478485, "grad_norm": 1.9032678378793468, "learning_rate": 1.6033730892367484e-08, "loss": 0.5567, "step": 31821 }, { "epoch": 0.9752972906705897, "grad_norm": 2.003318210925103, "learning_rate": 1.5994040066058535e-08, "loss": 0.5803, "step": 31822 }, { "epoch": 0.9753279391933308, "grad_norm": 1.9975021116174776, "learning_rate": 1.595439834843504e-08, "loss": 0.6017, "step": 31823 }, { "epoch": 0.9753585877160721, "grad_norm": 1.97041137512288, "learning_rate": 1.5914805739888906e-08, "loss": 0.5392, "step": 31824 }, { "epoch": 0.9753892362388132, "grad_norm": 2.0978638565400174, "learning_rate": 1.587526224080871e-08, "loss": 0.7043, "step": 31825 }, { "epoch": 0.9754198847615545, "grad_norm": 1.8705695655743395, "learning_rate": 1.583576785158525e-08, "loss": 0.5878, "step": 31826 }, { "epoch": 0.9754505332842957, "grad_norm": 1.9142064445332958, "learning_rate": 1.5796322572607105e-08, "loss": 0.5947, "step": 31827 }, { "epoch": 0.9754811818070369, "grad_norm": 1.6983964068440414, "learning_rate": 1.5756926404262852e-08, "loss": 0.6266, "step": 31828 }, { "epoch": 0.9755118303297781, "grad_norm": 2.067200329868559, "learning_rate": 1.571757934694107e-08, "loss": 0.5618, "step": 31829 }, { "epoch": 0.9755424788525193, "grad_norm": 2.1634114680880727, "learning_rate": 1.5678281401029228e-08, "loss": 0.6126, "step": 31830 }, { "epoch": 0.9755731273752605, "grad_norm": 1.8517520555526037, "learning_rate": 1.5639032566914793e-08, "loss": 0.5703, "step": 31831 }, { "epoch": 0.9756037758980017, "grad_norm": 1.8318532863695012, "learning_rate": 1.5599832844983564e-08, "loss": 0.5083, "step": 31832 }, { "epoch": 0.9756344244207429, "grad_norm": 1.8934496229498505, "learning_rate": 1.5560682235623014e-08, "loss": 0.5504, "step": 31833 }, { "epoch": 0.9756650729434841, "grad_norm": 1.916141753634737, "learning_rate": 1.552158073921839e-08, "loss": 0.5537, "step": 31834 }, { "epoch": 0.9756957214662253, "grad_norm": 1.7233299457571478, "learning_rate": 1.5482528356154937e-08, "loss": 0.4902, "step": 31835 }, { "epoch": 0.9757263699889666, "grad_norm": 2.0104127512312298, "learning_rate": 1.5443525086817347e-08, "loss": 0.5917, "step": 31836 }, { "epoch": 0.9757570185117077, "grad_norm": 2.251026920127436, "learning_rate": 1.5404570931590314e-08, "loss": 0.5757, "step": 31837 }, { "epoch": 0.975787667034449, "grad_norm": 1.8587067898764922, "learning_rate": 1.536566589085742e-08, "loss": 0.612, "step": 31838 }, { "epoch": 0.9758183155571901, "grad_norm": 1.9998656539877573, "learning_rate": 1.532680996500169e-08, "loss": 0.5688, "step": 31839 }, { "epoch": 0.9758489640799314, "grad_norm": 1.8478500945962695, "learning_rate": 1.5288003154406707e-08, "loss": 0.521, "step": 31840 }, { "epoch": 0.9758796126026725, "grad_norm": 1.694313376057823, "learning_rate": 1.5249245459453833e-08, "loss": 0.479, "step": 31841 }, { "epoch": 0.9759102611254138, "grad_norm": 1.8712384224623908, "learning_rate": 1.5210536880526093e-08, "loss": 0.5997, "step": 31842 }, { "epoch": 0.9759409096481549, "grad_norm": 1.8197325291760589, "learning_rate": 1.5171877418003744e-08, "loss": 0.5653, "step": 31843 }, { "epoch": 0.9759715581708962, "grad_norm": 2.0863208097149593, "learning_rate": 1.51332670722687e-08, "loss": 0.5595, "step": 31844 }, { "epoch": 0.9760022066936374, "grad_norm": 2.0582748639172075, "learning_rate": 1.509470584370121e-08, "loss": 0.597, "step": 31845 }, { "epoch": 0.9760328552163786, "grad_norm": 0.7990896212853191, "learning_rate": 1.505619373268097e-08, "loss": 0.399, "step": 31846 }, { "epoch": 0.9760635037391198, "grad_norm": 2.0995105467955333, "learning_rate": 1.501773073958712e-08, "loss": 0.601, "step": 31847 }, { "epoch": 0.976094152261861, "grad_norm": 1.9127494339555062, "learning_rate": 1.4979316864799364e-08, "loss": 0.6561, "step": 31848 }, { "epoch": 0.9761248007846022, "grad_norm": 1.7784168432882994, "learning_rate": 1.4940952108695727e-08, "loss": 0.5922, "step": 31849 }, { "epoch": 0.9761554493073434, "grad_norm": 1.9065979811442446, "learning_rate": 1.490263647165424e-08, "loss": 0.5479, "step": 31850 }, { "epoch": 0.9761860978300846, "grad_norm": 2.1653766792002114, "learning_rate": 1.4864369954052938e-08, "loss": 0.6557, "step": 31851 }, { "epoch": 0.9762167463528258, "grad_norm": 1.9210858370923831, "learning_rate": 1.4826152556268181e-08, "loss": 0.517, "step": 31852 }, { "epoch": 0.976247394875567, "grad_norm": 1.8624679141069673, "learning_rate": 1.4787984278676892e-08, "loss": 0.5873, "step": 31853 }, { "epoch": 0.9762780433983081, "grad_norm": 2.020034126109676, "learning_rate": 1.4749865121655438e-08, "loss": 0.602, "step": 31854 }, { "epoch": 0.9763086919210494, "grad_norm": 0.8085930878566514, "learning_rate": 1.4711795085578517e-08, "loss": 0.4012, "step": 31855 }, { "epoch": 0.9763393404437906, "grad_norm": 1.814412781039921, "learning_rate": 1.4673774170822496e-08, "loss": 0.5603, "step": 31856 }, { "epoch": 0.9763699889665318, "grad_norm": 1.6702355404075098, "learning_rate": 1.4635802377760966e-08, "loss": 0.4844, "step": 31857 }, { "epoch": 0.976400637489273, "grad_norm": 1.8037637568186058, "learning_rate": 1.4597879706768625e-08, "loss": 0.554, "step": 31858 }, { "epoch": 0.9764312860120142, "grad_norm": 1.7086485959007878, "learning_rate": 1.456000615821851e-08, "loss": 0.5494, "step": 31859 }, { "epoch": 0.9764619345347554, "grad_norm": 1.7663277448335903, "learning_rate": 1.4522181732484209e-08, "loss": 0.5901, "step": 31860 }, { "epoch": 0.9764925830574966, "grad_norm": 0.8457075212267262, "learning_rate": 1.4484406429938758e-08, "loss": 0.401, "step": 31861 }, { "epoch": 0.9765232315802378, "grad_norm": 1.9145559001210897, "learning_rate": 1.4446680250954082e-08, "loss": 0.5242, "step": 31862 }, { "epoch": 0.976553880102979, "grad_norm": 1.7183494781512125, "learning_rate": 1.4409003195902105e-08, "loss": 0.5356, "step": 31863 }, { "epoch": 0.9765845286257202, "grad_norm": 1.6258729962619216, "learning_rate": 1.4371375265153643e-08, "loss": 0.544, "step": 31864 }, { "epoch": 0.9766151771484615, "grad_norm": 2.0178698547722176, "learning_rate": 1.4333796459079508e-08, "loss": 0.5535, "step": 31865 }, { "epoch": 0.9766458256712026, "grad_norm": 2.2976689657299376, "learning_rate": 1.429626677804996e-08, "loss": 0.5799, "step": 31866 }, { "epoch": 0.9766764741939439, "grad_norm": 1.9487985683397668, "learning_rate": 1.4258786222435261e-08, "loss": 0.6262, "step": 31867 }, { "epoch": 0.976707122716685, "grad_norm": 0.7841234544463233, "learning_rate": 1.4221354792604004e-08, "loss": 0.3933, "step": 31868 }, { "epoch": 0.9767377712394263, "grad_norm": 1.788490161897443, "learning_rate": 1.418397248892589e-08, "loss": 0.512, "step": 31869 }, { "epoch": 0.9767684197621674, "grad_norm": 2.1320077949842418, "learning_rate": 1.4146639311768406e-08, "loss": 0.5952, "step": 31870 }, { "epoch": 0.9767990682849087, "grad_norm": 1.8171341412807838, "learning_rate": 1.4109355261500146e-08, "loss": 0.6138, "step": 31871 }, { "epoch": 0.9768297168076499, "grad_norm": 0.7732698906120108, "learning_rate": 1.4072120338488038e-08, "loss": 0.3881, "step": 31872 }, { "epoch": 0.9768603653303911, "grad_norm": 1.9404617177513157, "learning_rate": 1.4034934543098454e-08, "loss": 0.5457, "step": 31873 }, { "epoch": 0.9768910138531323, "grad_norm": 1.7259473186561074, "learning_rate": 1.3997797875698882e-08, "loss": 0.4828, "step": 31874 }, { "epoch": 0.9769216623758735, "grad_norm": 2.394299163902907, "learning_rate": 1.3960710336654582e-08, "loss": 0.6116, "step": 31875 }, { "epoch": 0.9769523108986147, "grad_norm": 0.786583638461665, "learning_rate": 1.3923671926331373e-08, "loss": 0.3882, "step": 31876 }, { "epoch": 0.9769829594213559, "grad_norm": 1.833583111418267, "learning_rate": 1.3886682645093407e-08, "loss": 0.5342, "step": 31877 }, { "epoch": 0.9770136079440971, "grad_norm": 0.7873920805289292, "learning_rate": 1.3849742493306506e-08, "loss": 0.3875, "step": 31878 }, { "epoch": 0.9770442564668383, "grad_norm": 1.7799435754329973, "learning_rate": 1.3812851471333156e-08, "loss": 0.5534, "step": 31879 }, { "epoch": 0.9770749049895795, "grad_norm": 2.0132499871521192, "learning_rate": 1.3776009579538063e-08, "loss": 0.5075, "step": 31880 }, { "epoch": 0.9771055535123208, "grad_norm": 1.8228952648720056, "learning_rate": 1.3739216818283163e-08, "loss": 0.5627, "step": 31881 }, { "epoch": 0.9771362020350619, "grad_norm": 0.7837897654698595, "learning_rate": 1.3702473187932053e-08, "loss": 0.3946, "step": 31882 }, { "epoch": 0.9771668505578032, "grad_norm": 0.7825882507590101, "learning_rate": 1.366577868884611e-08, "loss": 0.3963, "step": 31883 }, { "epoch": 0.9771974990805443, "grad_norm": 2.295853931681736, "learning_rate": 1.3629133321387266e-08, "loss": 0.5652, "step": 31884 }, { "epoch": 0.9772281476032855, "grad_norm": 1.781082614413763, "learning_rate": 1.3592537085915792e-08, "loss": 0.5783, "step": 31885 }, { "epoch": 0.9772587961260267, "grad_norm": 1.7099890211703175, "learning_rate": 1.3555989982793615e-08, "loss": 0.559, "step": 31886 }, { "epoch": 0.9772894446487679, "grad_norm": 1.7056210469667308, "learning_rate": 1.3519492012379898e-08, "loss": 0.6017, "step": 31887 }, { "epoch": 0.9773200931715091, "grad_norm": 1.705201338048168, "learning_rate": 1.3483043175033794e-08, "loss": 0.505, "step": 31888 }, { "epoch": 0.9773507416942503, "grad_norm": 1.8376586239348065, "learning_rate": 1.3446643471116127e-08, "loss": 0.5719, "step": 31889 }, { "epoch": 0.9773813902169916, "grad_norm": 1.8002811393432405, "learning_rate": 1.3410292900983835e-08, "loss": 0.5419, "step": 31890 }, { "epoch": 0.9774120387397327, "grad_norm": 1.8647755520758869, "learning_rate": 1.3373991464996072e-08, "loss": 0.5662, "step": 31891 }, { "epoch": 0.977442687262474, "grad_norm": 1.826893093878163, "learning_rate": 1.3337739163510333e-08, "loss": 0.5107, "step": 31892 }, { "epoch": 0.9774733357852151, "grad_norm": 1.831795888350997, "learning_rate": 1.3301535996883552e-08, "loss": 0.5815, "step": 31893 }, { "epoch": 0.9775039843079564, "grad_norm": 0.7473496201782, "learning_rate": 1.3265381965472668e-08, "loss": 0.3727, "step": 31894 }, { "epoch": 0.9775346328306975, "grad_norm": 0.7843474255340667, "learning_rate": 1.3229277069634062e-08, "loss": 0.4032, "step": 31895 }, { "epoch": 0.9775652813534388, "grad_norm": 0.8403566765196125, "learning_rate": 1.3193221309723004e-08, "loss": 0.4106, "step": 31896 }, { "epoch": 0.9775959298761799, "grad_norm": 0.8439363826978342, "learning_rate": 1.3157214686095321e-08, "loss": 0.3996, "step": 31897 }, { "epoch": 0.9776265783989212, "grad_norm": 1.8731311659605938, "learning_rate": 1.312125719910573e-08, "loss": 0.5926, "step": 31898 }, { "epoch": 0.9776572269216623, "grad_norm": 1.811618178139205, "learning_rate": 1.3085348849107837e-08, "loss": 0.5218, "step": 31899 }, { "epoch": 0.9776878754444036, "grad_norm": 1.7354293420212186, "learning_rate": 1.3049489636456358e-08, "loss": 0.5772, "step": 31900 }, { "epoch": 0.9777185239671448, "grad_norm": 1.9774091652710486, "learning_rate": 1.3013679561503789e-08, "loss": 0.6365, "step": 31901 }, { "epoch": 0.977749172489886, "grad_norm": 1.8815545852725586, "learning_rate": 1.2977918624603736e-08, "loss": 0.4771, "step": 31902 }, { "epoch": 0.9777798210126272, "grad_norm": 1.9981337797034788, "learning_rate": 1.2942206826108139e-08, "loss": 0.5778, "step": 31903 }, { "epoch": 0.9778104695353684, "grad_norm": 1.9189680304059709, "learning_rate": 1.290654416636894e-08, "loss": 0.5842, "step": 31904 }, { "epoch": 0.9778411180581096, "grad_norm": 2.063224174254957, "learning_rate": 1.2870930645738078e-08, "loss": 0.6074, "step": 31905 }, { "epoch": 0.9778717665808508, "grad_norm": 1.835256070908092, "learning_rate": 1.2835366264565275e-08, "loss": 0.479, "step": 31906 }, { "epoch": 0.977902415103592, "grad_norm": 1.6436492903983173, "learning_rate": 1.2799851023201914e-08, "loss": 0.5627, "step": 31907 }, { "epoch": 0.9779330636263333, "grad_norm": 1.8986326520928547, "learning_rate": 1.2764384921997718e-08, "loss": 0.5614, "step": 31908 }, { "epoch": 0.9779637121490744, "grad_norm": 2.0604016088432333, "learning_rate": 1.2728967961301853e-08, "loss": 0.5938, "step": 31909 }, { "epoch": 0.9779943606718157, "grad_norm": 2.0270544282239618, "learning_rate": 1.269360014146348e-08, "loss": 0.5611, "step": 31910 }, { "epoch": 0.9780250091945568, "grad_norm": 1.842541233764921, "learning_rate": 1.2658281462831212e-08, "loss": 0.5334, "step": 31911 }, { "epoch": 0.9780556577172981, "grad_norm": 1.7947948163725607, "learning_rate": 1.2623011925753104e-08, "loss": 0.6424, "step": 31912 }, { "epoch": 0.9780863062400392, "grad_norm": 2.150762475995836, "learning_rate": 1.2587791530576653e-08, "loss": 0.5807, "step": 31913 }, { "epoch": 0.9781169547627805, "grad_norm": 1.8723556692939702, "learning_rate": 1.2552620277648253e-08, "loss": 0.6432, "step": 31914 }, { "epoch": 0.9781476032855216, "grad_norm": 1.8893388679551355, "learning_rate": 1.2517498167315401e-08, "loss": 0.5566, "step": 31915 }, { "epoch": 0.9781782518082628, "grad_norm": 1.7601524692220067, "learning_rate": 1.2482425199923931e-08, "loss": 0.5699, "step": 31916 }, { "epoch": 0.978208900331004, "grad_norm": 1.8354211610169624, "learning_rate": 1.2447401375818569e-08, "loss": 0.5427, "step": 31917 }, { "epoch": 0.9782395488537452, "grad_norm": 1.842420814097618, "learning_rate": 1.2412426695345702e-08, "loss": 0.5229, "step": 31918 }, { "epoch": 0.9782701973764865, "grad_norm": 1.862829934088107, "learning_rate": 1.2377501158848947e-08, "loss": 0.5597, "step": 31919 }, { "epoch": 0.9783008458992276, "grad_norm": 1.9737795551710935, "learning_rate": 1.2342624766673028e-08, "loss": 0.5167, "step": 31920 }, { "epoch": 0.9783314944219689, "grad_norm": 1.8469035295481453, "learning_rate": 1.2307797519161558e-08, "loss": 0.5574, "step": 31921 }, { "epoch": 0.97836214294471, "grad_norm": 1.8179868060474267, "learning_rate": 1.227301941665704e-08, "loss": 0.5463, "step": 31922 }, { "epoch": 0.9783927914674513, "grad_norm": 2.1472050359721844, "learning_rate": 1.2238290459502533e-08, "loss": 0.5882, "step": 31923 }, { "epoch": 0.9784234399901924, "grad_norm": 2.0861477668995656, "learning_rate": 1.2203610648041098e-08, "loss": 0.6867, "step": 31924 }, { "epoch": 0.9784540885129337, "grad_norm": 0.7550695487127996, "learning_rate": 1.2168979982613016e-08, "loss": 0.3683, "step": 31925 }, { "epoch": 0.9784847370356748, "grad_norm": 2.135681019911945, "learning_rate": 1.2134398463560238e-08, "loss": 0.5402, "step": 31926 }, { "epoch": 0.9785153855584161, "grad_norm": 1.9316476634752442, "learning_rate": 1.20998660912236e-08, "loss": 0.5133, "step": 31927 }, { "epoch": 0.9785460340811573, "grad_norm": 2.1568871407044217, "learning_rate": 1.2065382865942832e-08, "loss": 0.6472, "step": 31928 }, { "epoch": 0.9785766826038985, "grad_norm": 0.7982383536294246, "learning_rate": 1.2030948788058772e-08, "loss": 0.4029, "step": 31929 }, { "epoch": 0.9786073311266397, "grad_norm": 1.873059959584648, "learning_rate": 1.1996563857909482e-08, "loss": 0.5353, "step": 31930 }, { "epoch": 0.9786379796493809, "grad_norm": 1.8270161167884624, "learning_rate": 1.1962228075834137e-08, "loss": 0.5264, "step": 31931 }, { "epoch": 0.9786686281721221, "grad_norm": 2.0035210308675104, "learning_rate": 1.1927941442171908e-08, "loss": 0.5882, "step": 31932 }, { "epoch": 0.9786992766948633, "grad_norm": 0.8530117139590689, "learning_rate": 1.1893703957259194e-08, "loss": 0.4095, "step": 31933 }, { "epoch": 0.9787299252176045, "grad_norm": 2.2252060719468445, "learning_rate": 1.1859515621434615e-08, "loss": 0.6557, "step": 31934 }, { "epoch": 0.9787605737403458, "grad_norm": 1.8030863735010636, "learning_rate": 1.1825376435034008e-08, "loss": 0.4797, "step": 31935 }, { "epoch": 0.9787912222630869, "grad_norm": 2.3406413075036063, "learning_rate": 1.179128639839433e-08, "loss": 0.5654, "step": 31936 }, { "epoch": 0.9788218707858282, "grad_norm": 1.8456915282808701, "learning_rate": 1.1757245511851423e-08, "loss": 0.5558, "step": 31937 }, { "epoch": 0.9788525193085693, "grad_norm": 1.8208960806401542, "learning_rate": 1.1723253775741129e-08, "loss": 0.5115, "step": 31938 }, { "epoch": 0.9788831678313106, "grad_norm": 0.764049829879985, "learning_rate": 1.1689311190397624e-08, "loss": 0.3873, "step": 31939 }, { "epoch": 0.9789138163540517, "grad_norm": 1.8396727722930533, "learning_rate": 1.165541775615564e-08, "loss": 0.5551, "step": 31940 }, { "epoch": 0.978944464876793, "grad_norm": 2.2472271528017242, "learning_rate": 1.1621573473348801e-08, "loss": 0.6678, "step": 31941 }, { "epoch": 0.9789751133995341, "grad_norm": 0.8299616352376835, "learning_rate": 1.1587778342311284e-08, "loss": 0.4162, "step": 31942 }, { "epoch": 0.9790057619222754, "grad_norm": 1.7128234487610028, "learning_rate": 1.1554032363376156e-08, "loss": 0.5784, "step": 31943 }, { "epoch": 0.9790364104450165, "grad_norm": 1.6973560300227395, "learning_rate": 1.1520335536874816e-08, "loss": 0.4651, "step": 31944 }, { "epoch": 0.9790670589677578, "grad_norm": 1.725814974818113, "learning_rate": 1.1486687863139778e-08, "loss": 0.5859, "step": 31945 }, { "epoch": 0.979097707490499, "grad_norm": 1.8345641800476444, "learning_rate": 1.1453089342503555e-08, "loss": 0.5591, "step": 31946 }, { "epoch": 0.9791283560132401, "grad_norm": 1.6995622213731028, "learning_rate": 1.1419539975295878e-08, "loss": 0.5377, "step": 31947 }, { "epoch": 0.9791590045359814, "grad_norm": 1.8847205944202197, "learning_rate": 1.1386039761848155e-08, "loss": 0.4936, "step": 31948 }, { "epoch": 0.9791896530587225, "grad_norm": 1.8774533163047418, "learning_rate": 1.135258870249012e-08, "loss": 0.5741, "step": 31949 }, { "epoch": 0.9792203015814638, "grad_norm": 2.0481770560766415, "learning_rate": 1.1319186797550952e-08, "loss": 0.63, "step": 31950 }, { "epoch": 0.9792509501042049, "grad_norm": 2.1366090652104814, "learning_rate": 1.1285834047360943e-08, "loss": 0.6605, "step": 31951 }, { "epoch": 0.9792815986269462, "grad_norm": 1.8362893825631765, "learning_rate": 1.1252530452247612e-08, "loss": 0.511, "step": 31952 }, { "epoch": 0.9793122471496873, "grad_norm": 1.677369193164723, "learning_rate": 1.1219276012539581e-08, "loss": 0.5251, "step": 31953 }, { "epoch": 0.9793428956724286, "grad_norm": 2.0183036624823054, "learning_rate": 1.1186070728564369e-08, "loss": 0.6543, "step": 31954 }, { "epoch": 0.9793735441951698, "grad_norm": 2.068724147358849, "learning_rate": 1.1152914600649489e-08, "loss": 0.4995, "step": 31955 }, { "epoch": 0.979404192717911, "grad_norm": 1.9109513153725721, "learning_rate": 1.1119807629121348e-08, "loss": 0.6399, "step": 31956 }, { "epoch": 0.9794348412406522, "grad_norm": 2.1124404875536835, "learning_rate": 1.1086749814306352e-08, "loss": 0.5507, "step": 31957 }, { "epoch": 0.9794654897633934, "grad_norm": 2.0223195735238817, "learning_rate": 1.1053741156529795e-08, "loss": 0.5844, "step": 31958 }, { "epoch": 0.9794961382861346, "grad_norm": 1.9259743971039909, "learning_rate": 1.1020781656116975e-08, "loss": 0.5772, "step": 31959 }, { "epoch": 0.9795267868088758, "grad_norm": 0.7774717593851073, "learning_rate": 1.0987871313393183e-08, "loss": 0.396, "step": 31960 }, { "epoch": 0.979557435331617, "grad_norm": 1.8505126790320385, "learning_rate": 1.0955010128682608e-08, "loss": 0.5615, "step": 31961 }, { "epoch": 0.9795880838543582, "grad_norm": 1.8841107910905441, "learning_rate": 1.0922198102308878e-08, "loss": 0.5454, "step": 31962 }, { "epoch": 0.9796187323770994, "grad_norm": 1.8546057665406201, "learning_rate": 1.0889435234594514e-08, "loss": 0.6504, "step": 31963 }, { "epoch": 0.9796493808998407, "grad_norm": 1.9571624594655888, "learning_rate": 1.0856721525863701e-08, "loss": 0.6011, "step": 31964 }, { "epoch": 0.9796800294225818, "grad_norm": 0.7914785260951971, "learning_rate": 1.0824056976437846e-08, "loss": 0.3923, "step": 31965 }, { "epoch": 0.9797106779453231, "grad_norm": 1.8337540058320454, "learning_rate": 1.0791441586639472e-08, "loss": 0.523, "step": 31966 }, { "epoch": 0.9797413264680642, "grad_norm": 1.9246109982727395, "learning_rate": 1.0758875356789434e-08, "loss": 0.6586, "step": 31967 }, { "epoch": 0.9797719749908055, "grad_norm": 1.8395795032472855, "learning_rate": 1.0726358287208583e-08, "loss": 0.4691, "step": 31968 }, { "epoch": 0.9798026235135466, "grad_norm": 1.8916481664249096, "learning_rate": 1.069389037821722e-08, "loss": 0.5879, "step": 31969 }, { "epoch": 0.9798332720362879, "grad_norm": 1.9984094009467344, "learning_rate": 1.0661471630135644e-08, "loss": 0.5965, "step": 31970 }, { "epoch": 0.979863920559029, "grad_norm": 1.9373659739658333, "learning_rate": 1.0629102043283602e-08, "loss": 0.6142, "step": 31971 }, { "epoch": 0.9798945690817703, "grad_norm": 1.9278199302222796, "learning_rate": 1.0596781617979168e-08, "loss": 0.5897, "step": 31972 }, { "epoch": 0.9799252176045115, "grad_norm": 0.8088340161358509, "learning_rate": 1.0564510354541535e-08, "loss": 0.3933, "step": 31973 }, { "epoch": 0.9799558661272527, "grad_norm": 1.8898264480293452, "learning_rate": 1.0532288253288225e-08, "loss": 0.5878, "step": 31974 }, { "epoch": 0.9799865146499939, "grad_norm": 0.80194322097358, "learning_rate": 1.0500115314536763e-08, "loss": 0.3865, "step": 31975 }, { "epoch": 0.9800171631727351, "grad_norm": 1.8868163882563185, "learning_rate": 1.0467991538604672e-08, "loss": 0.5057, "step": 31976 }, { "epoch": 0.9800478116954763, "grad_norm": 2.1957109911678425, "learning_rate": 1.043591692580781e-08, "loss": 0.621, "step": 31977 }, { "epoch": 0.9800784602182174, "grad_norm": 2.0409043830423794, "learning_rate": 1.040389147646259e-08, "loss": 0.5783, "step": 31978 }, { "epoch": 0.9801091087409587, "grad_norm": 1.7657718486447838, "learning_rate": 1.0371915190884319e-08, "loss": 0.5053, "step": 31979 }, { "epoch": 0.9801397572636998, "grad_norm": 1.9217984803933938, "learning_rate": 1.0339988069388295e-08, "loss": 0.6132, "step": 31980 }, { "epoch": 0.9801704057864411, "grad_norm": 2.032217031377303, "learning_rate": 1.0308110112289271e-08, "loss": 0.5874, "step": 31981 }, { "epoch": 0.9802010543091823, "grad_norm": 1.6346896007866465, "learning_rate": 1.0276281319900883e-08, "loss": 0.5065, "step": 31982 }, { "epoch": 0.9802317028319235, "grad_norm": 1.8483017756172266, "learning_rate": 1.0244501692536768e-08, "loss": 0.5809, "step": 31983 }, { "epoch": 0.9802623513546647, "grad_norm": 1.9229872184680892, "learning_rate": 1.0212771230510565e-08, "loss": 0.5531, "step": 31984 }, { "epoch": 0.9802929998774059, "grad_norm": 0.8236928416824567, "learning_rate": 1.0181089934134247e-08, "loss": 0.3848, "step": 31985 }, { "epoch": 0.9803236484001471, "grad_norm": 2.019003898043624, "learning_rate": 1.0149457803720897e-08, "loss": 0.5663, "step": 31986 }, { "epoch": 0.9803542969228883, "grad_norm": 1.8459651346374513, "learning_rate": 1.0117874839581376e-08, "loss": 0.6318, "step": 31987 }, { "epoch": 0.9803849454456295, "grad_norm": 1.9551996703521368, "learning_rate": 1.0086341042027104e-08, "loss": 0.5698, "step": 31988 }, { "epoch": 0.9804155939683707, "grad_norm": 0.8124863435432201, "learning_rate": 1.0054856411368941e-08, "loss": 0.3921, "step": 31989 }, { "epoch": 0.9804462424911119, "grad_norm": 2.0590925586661943, "learning_rate": 1.0023420947917195e-08, "loss": 0.5513, "step": 31990 }, { "epoch": 0.9804768910138532, "grad_norm": 2.2291040719064745, "learning_rate": 9.992034651981064e-09, "loss": 0.613, "step": 31991 }, { "epoch": 0.9805075395365943, "grad_norm": 2.1074526656071826, "learning_rate": 9.9606975238703e-09, "loss": 0.6021, "step": 31992 }, { "epoch": 0.9805381880593356, "grad_norm": 0.7807808607320236, "learning_rate": 9.929409563893544e-09, "loss": 0.3729, "step": 31993 }, { "epoch": 0.9805688365820767, "grad_norm": 1.8474246657196873, "learning_rate": 9.898170772358883e-09, "loss": 0.5924, "step": 31994 }, { "epoch": 0.980599485104818, "grad_norm": 2.0260794011705, "learning_rate": 9.866981149574405e-09, "loss": 0.5365, "step": 31995 }, { "epoch": 0.9806301336275591, "grad_norm": 1.8009490017319887, "learning_rate": 9.835840695847643e-09, "loss": 0.5835, "step": 31996 }, { "epoch": 0.9806607821503004, "grad_norm": 1.8323801591591018, "learning_rate": 9.804749411485014e-09, "loss": 0.593, "step": 31997 }, { "epoch": 0.9806914306730415, "grad_norm": 1.7678693994760308, "learning_rate": 9.773707296792944e-09, "loss": 0.5832, "step": 31998 }, { "epoch": 0.9807220791957828, "grad_norm": 1.98199456137436, "learning_rate": 9.742714352077298e-09, "loss": 0.6594, "step": 31999 }, { "epoch": 0.980752727718524, "grad_norm": 1.9614080535684848, "learning_rate": 9.711770577643387e-09, "loss": 0.5488, "step": 32000 }, { "epoch": 0.9807833762412652, "grad_norm": 1.9184793159808053, "learning_rate": 9.680875973795966e-09, "loss": 0.5864, "step": 32001 }, { "epoch": 0.9808140247640064, "grad_norm": 2.003624381670176, "learning_rate": 9.650030540840349e-09, "loss": 0.6331, "step": 32002 }, { "epoch": 0.9808446732867476, "grad_norm": 0.8064583925934871, "learning_rate": 9.619234279079625e-09, "loss": 0.3839, "step": 32003 }, { "epoch": 0.9808753218094888, "grad_norm": 2.1519519889551515, "learning_rate": 9.588487188816886e-09, "loss": 0.5805, "step": 32004 }, { "epoch": 0.98090597033223, "grad_norm": 1.909206219710678, "learning_rate": 9.557789270356333e-09, "loss": 0.5342, "step": 32005 }, { "epoch": 0.9809366188549712, "grad_norm": 1.957865054821352, "learning_rate": 9.52714052399939e-09, "loss": 0.5988, "step": 32006 }, { "epoch": 0.9809672673777124, "grad_norm": 1.8889723672885048, "learning_rate": 9.496540950048594e-09, "loss": 0.527, "step": 32007 }, { "epoch": 0.9809979159004536, "grad_norm": 0.8131956178953065, "learning_rate": 9.46599054880537e-09, "loss": 0.4055, "step": 32008 }, { "epoch": 0.9810285644231947, "grad_norm": 0.8180858500670286, "learning_rate": 9.435489320570035e-09, "loss": 0.3812, "step": 32009 }, { "epoch": 0.981059212945936, "grad_norm": 0.7534990955695939, "learning_rate": 9.405037265644568e-09, "loss": 0.3791, "step": 32010 }, { "epoch": 0.9810898614686772, "grad_norm": 1.8051291569174583, "learning_rate": 9.37463438432762e-09, "loss": 0.5239, "step": 32011 }, { "epoch": 0.9811205099914184, "grad_norm": 2.0512921265416186, "learning_rate": 9.344280676918949e-09, "loss": 0.562, "step": 32012 }, { "epoch": 0.9811511585141596, "grad_norm": 1.9142620282584628, "learning_rate": 9.313976143718873e-09, "loss": 0.6085, "step": 32013 }, { "epoch": 0.9811818070369008, "grad_norm": 1.815811489239524, "learning_rate": 9.283720785024376e-09, "loss": 0.5033, "step": 32014 }, { "epoch": 0.981212455559642, "grad_norm": 1.797385905216103, "learning_rate": 9.25351460113466e-09, "loss": 0.5342, "step": 32015 }, { "epoch": 0.9812431040823832, "grad_norm": 0.808864421194006, "learning_rate": 9.223357592347272e-09, "loss": 0.3962, "step": 32016 }, { "epoch": 0.9812737526051244, "grad_norm": 2.1440032755217473, "learning_rate": 9.193249758958633e-09, "loss": 0.5608, "step": 32017 }, { "epoch": 0.9813044011278657, "grad_norm": 1.7083799424177553, "learning_rate": 9.163191101265734e-09, "loss": 0.5838, "step": 32018 }, { "epoch": 0.9813350496506068, "grad_norm": 2.0735264125069994, "learning_rate": 9.133181619565002e-09, "loss": 0.5729, "step": 32019 }, { "epoch": 0.9813656981733481, "grad_norm": 1.9068144146968775, "learning_rate": 9.10322131415231e-09, "loss": 0.5258, "step": 32020 }, { "epoch": 0.9813963466960892, "grad_norm": 1.8931667077873044, "learning_rate": 9.073310185322425e-09, "loss": 0.6312, "step": 32021 }, { "epoch": 0.9814269952188305, "grad_norm": 1.7584145178854658, "learning_rate": 9.043448233370111e-09, "loss": 0.6059, "step": 32022 }, { "epoch": 0.9814576437415716, "grad_norm": 0.7759676616239347, "learning_rate": 9.013635458589575e-09, "loss": 0.3847, "step": 32023 }, { "epoch": 0.9814882922643129, "grad_norm": 1.9517171840609335, "learning_rate": 8.983871861275029e-09, "loss": 0.5291, "step": 32024 }, { "epoch": 0.981518940787054, "grad_norm": 2.108356889826978, "learning_rate": 8.954157441719014e-09, "loss": 0.5984, "step": 32025 }, { "epoch": 0.9815495893097953, "grad_norm": 2.236236991659414, "learning_rate": 8.92449220021463e-09, "loss": 0.5699, "step": 32026 }, { "epoch": 0.9815802378325365, "grad_norm": 0.7624748645407461, "learning_rate": 8.89487613705442e-09, "loss": 0.3671, "step": 32027 }, { "epoch": 0.9816108863552777, "grad_norm": 1.7450841648746453, "learning_rate": 8.865309252530374e-09, "loss": 0.5423, "step": 32028 }, { "epoch": 0.9816415348780189, "grad_norm": 1.659716261829808, "learning_rate": 8.83579154693337e-09, "loss": 0.5094, "step": 32029 }, { "epoch": 0.9816721834007601, "grad_norm": 1.9179031460171334, "learning_rate": 8.806323020553731e-09, "loss": 0.5093, "step": 32030 }, { "epoch": 0.9817028319235013, "grad_norm": 2.0111016894024334, "learning_rate": 8.776903673683446e-09, "loss": 0.6167, "step": 32031 }, { "epoch": 0.9817334804462425, "grad_norm": 2.064461791649948, "learning_rate": 8.747533506610618e-09, "loss": 0.5856, "step": 32032 }, { "epoch": 0.9817641289689837, "grad_norm": 0.7953891307708895, "learning_rate": 8.718212519625569e-09, "loss": 0.4048, "step": 32033 }, { "epoch": 0.981794777491725, "grad_norm": 1.8456653137846164, "learning_rate": 8.688940713016958e-09, "loss": 0.513, "step": 32034 }, { "epoch": 0.9818254260144661, "grad_norm": 2.0646082224301483, "learning_rate": 8.659718087073998e-09, "loss": 0.5967, "step": 32035 }, { "epoch": 0.9818560745372074, "grad_norm": 2.038990487222901, "learning_rate": 8.630544642083128e-09, "loss": 0.5304, "step": 32036 }, { "epoch": 0.9818867230599485, "grad_norm": 0.83940234227609, "learning_rate": 8.601420378333003e-09, "loss": 0.4033, "step": 32037 }, { "epoch": 0.9819173715826898, "grad_norm": 1.9727868864227052, "learning_rate": 8.572345296109508e-09, "loss": 0.6516, "step": 32038 }, { "epoch": 0.9819480201054309, "grad_norm": 1.8482120977624652, "learning_rate": 8.543319395700744e-09, "loss": 0.473, "step": 32039 }, { "epoch": 0.9819786686281721, "grad_norm": 1.836410263845654, "learning_rate": 8.514342677391486e-09, "loss": 0.5742, "step": 32040 }, { "epoch": 0.9820093171509133, "grad_norm": 2.086075700287257, "learning_rate": 8.485415141467057e-09, "loss": 0.6396, "step": 32041 }, { "epoch": 0.9820399656736545, "grad_norm": 2.002854052635818, "learning_rate": 8.456536788213343e-09, "loss": 0.6121, "step": 32042 }, { "epoch": 0.9820706141963957, "grad_norm": 2.2766670224312744, "learning_rate": 8.42770761791456e-09, "loss": 0.6854, "step": 32043 }, { "epoch": 0.9821012627191369, "grad_norm": 0.8173810355297702, "learning_rate": 8.398927630854925e-09, "loss": 0.4132, "step": 32044 }, { "epoch": 0.9821319112418782, "grad_norm": 1.75004972227259, "learning_rate": 8.370196827317545e-09, "loss": 0.5147, "step": 32045 }, { "epoch": 0.9821625597646193, "grad_norm": 1.872961931847651, "learning_rate": 8.341515207585526e-09, "loss": 0.6013, "step": 32046 }, { "epoch": 0.9821932082873606, "grad_norm": 1.8258353612499236, "learning_rate": 8.312882771941976e-09, "loss": 0.5483, "step": 32047 }, { "epoch": 0.9822238568101017, "grad_norm": 1.8962229953185814, "learning_rate": 8.284299520668892e-09, "loss": 0.5557, "step": 32048 }, { "epoch": 0.982254505332843, "grad_norm": 0.7845979902786705, "learning_rate": 8.255765454047716e-09, "loss": 0.3904, "step": 32049 }, { "epoch": 0.9822851538555841, "grad_norm": 1.6893446929841256, "learning_rate": 8.227280572359331e-09, "loss": 0.5668, "step": 32050 }, { "epoch": 0.9823158023783254, "grad_norm": 2.011081175108313, "learning_rate": 8.198844875885182e-09, "loss": 0.6, "step": 32051 }, { "epoch": 0.9823464509010665, "grad_norm": 1.769806930675884, "learning_rate": 8.170458364905043e-09, "loss": 0.5637, "step": 32052 }, { "epoch": 0.9823770994238078, "grad_norm": 1.8461914426390449, "learning_rate": 8.142121039698136e-09, "loss": 0.5499, "step": 32053 }, { "epoch": 0.982407747946549, "grad_norm": 2.1186789941540694, "learning_rate": 8.113832900544239e-09, "loss": 0.5195, "step": 32054 }, { "epoch": 0.9824383964692902, "grad_norm": 1.8851794612737631, "learning_rate": 8.085593947722569e-09, "loss": 0.531, "step": 32055 }, { "epoch": 0.9824690449920314, "grad_norm": 1.8264619505200508, "learning_rate": 8.057404181510131e-09, "loss": 0.5619, "step": 32056 }, { "epoch": 0.9824996935147726, "grad_norm": 2.0008131157654496, "learning_rate": 8.029263602185588e-09, "loss": 0.6514, "step": 32057 }, { "epoch": 0.9825303420375138, "grad_norm": 1.8104441757622263, "learning_rate": 8.001172210025942e-09, "loss": 0.4945, "step": 32058 }, { "epoch": 0.982560990560255, "grad_norm": 1.8411826111272667, "learning_rate": 7.973130005308193e-09, "loss": 0.6361, "step": 32059 }, { "epoch": 0.9825916390829962, "grad_norm": 0.8227007418191568, "learning_rate": 7.945136988308232e-09, "loss": 0.4115, "step": 32060 }, { "epoch": 0.9826222876057374, "grad_norm": 1.872275505103735, "learning_rate": 7.91719315930195e-09, "loss": 0.5826, "step": 32061 }, { "epoch": 0.9826529361284786, "grad_norm": 0.7502101885103806, "learning_rate": 7.889298518565236e-09, "loss": 0.3735, "step": 32062 }, { "epoch": 0.9826835846512199, "grad_norm": 1.8885030631177957, "learning_rate": 7.861453066372316e-09, "loss": 0.5635, "step": 32063 }, { "epoch": 0.982714233173961, "grad_norm": 1.963163099892888, "learning_rate": 7.833656802997968e-09, "loss": 0.567, "step": 32064 }, { "epoch": 0.9827448816967023, "grad_norm": 2.176711093288529, "learning_rate": 7.805909728715866e-09, "loss": 0.587, "step": 32065 }, { "epoch": 0.9827755302194434, "grad_norm": 1.9286608924434867, "learning_rate": 7.778211843799122e-09, "loss": 0.6022, "step": 32066 }, { "epoch": 0.9828061787421847, "grad_norm": 1.8475393175055588, "learning_rate": 7.750563148521406e-09, "loss": 0.5389, "step": 32067 }, { "epoch": 0.9828368272649258, "grad_norm": 1.7450833558515164, "learning_rate": 7.722963643154169e-09, "loss": 0.6028, "step": 32068 }, { "epoch": 0.9828674757876671, "grad_norm": 1.81546661933752, "learning_rate": 7.695413327970525e-09, "loss": 0.5996, "step": 32069 }, { "epoch": 0.9828981243104082, "grad_norm": 1.7848550767781883, "learning_rate": 7.667912203240812e-09, "loss": 0.505, "step": 32070 }, { "epoch": 0.9829287728331494, "grad_norm": 1.608312575381778, "learning_rate": 7.640460269237038e-09, "loss": 0.5159, "step": 32071 }, { "epoch": 0.9829594213558907, "grad_norm": 2.260847731308854, "learning_rate": 7.613057526228428e-09, "loss": 0.5175, "step": 32072 }, { "epoch": 0.9829900698786318, "grad_norm": 1.9460906587573807, "learning_rate": 7.585703974486435e-09, "loss": 0.528, "step": 32073 }, { "epoch": 0.9830207184013731, "grad_norm": 0.8059224621895098, "learning_rate": 7.558399614279732e-09, "loss": 0.4049, "step": 32074 }, { "epoch": 0.9830513669241142, "grad_norm": 1.9188064958298787, "learning_rate": 7.531144445876993e-09, "loss": 0.5401, "step": 32075 }, { "epoch": 0.9830820154468555, "grad_norm": 0.7704400428127524, "learning_rate": 7.503938469547444e-09, "loss": 0.382, "step": 32076 }, { "epoch": 0.9831126639695966, "grad_norm": 1.6786535080310283, "learning_rate": 7.47678168555921e-09, "loss": 0.5536, "step": 32077 }, { "epoch": 0.9831433124923379, "grad_norm": 1.900212206573953, "learning_rate": 7.449674094179848e-09, "loss": 0.6077, "step": 32078 }, { "epoch": 0.983173961015079, "grad_norm": 1.8441255479639869, "learning_rate": 7.422615695675817e-09, "loss": 0.5578, "step": 32079 }, { "epoch": 0.9832046095378203, "grad_norm": 1.8946336959134495, "learning_rate": 7.395606490314122e-09, "loss": 0.625, "step": 32080 }, { "epoch": 0.9832352580605614, "grad_norm": 2.187548534919168, "learning_rate": 7.3686464783612185e-09, "loss": 0.6327, "step": 32081 }, { "epoch": 0.9832659065833027, "grad_norm": 1.9142993594413114, "learning_rate": 7.34173566008245e-09, "loss": 0.5092, "step": 32082 }, { "epoch": 0.9832965551060439, "grad_norm": 1.9376258412856266, "learning_rate": 7.3148740357426025e-09, "loss": 0.5949, "step": 32083 }, { "epoch": 0.9833272036287851, "grad_norm": 2.227633777829901, "learning_rate": 7.288061605607022e-09, "loss": 0.6297, "step": 32084 }, { "epoch": 0.9833578521515263, "grad_norm": 0.7915539918779755, "learning_rate": 7.261298369939939e-09, "loss": 0.4092, "step": 32085 }, { "epoch": 0.9833885006742675, "grad_norm": 2.3157230721266933, "learning_rate": 7.234584329003924e-09, "loss": 0.7563, "step": 32086 }, { "epoch": 0.9834191491970087, "grad_norm": 2.09016037983261, "learning_rate": 7.207919483063763e-09, "loss": 0.5462, "step": 32087 }, { "epoch": 0.9834497977197499, "grad_norm": 2.0126592238691483, "learning_rate": 7.181303832380915e-09, "loss": 0.6309, "step": 32088 }, { "epoch": 0.9834804462424911, "grad_norm": 1.853168712974103, "learning_rate": 7.154737377218501e-09, "loss": 0.5679, "step": 32089 }, { "epoch": 0.9835110947652324, "grad_norm": 0.7540866073230365, "learning_rate": 7.12822011783798e-09, "loss": 0.3785, "step": 32090 }, { "epoch": 0.9835417432879735, "grad_norm": 1.7951255811127218, "learning_rate": 7.101752054500255e-09, "loss": 0.5491, "step": 32091 }, { "epoch": 0.9835723918107148, "grad_norm": 1.878776439541849, "learning_rate": 7.075333187466782e-09, "loss": 0.5508, "step": 32092 }, { "epoch": 0.9836030403334559, "grad_norm": 2.3282919791148395, "learning_rate": 7.048963516997354e-09, "loss": 0.5277, "step": 32093 }, { "epoch": 0.9836336888561972, "grad_norm": 0.7642656812535754, "learning_rate": 7.022643043351762e-09, "loss": 0.403, "step": 32094 }, { "epoch": 0.9836643373789383, "grad_norm": 2.1034629572765544, "learning_rate": 6.9963717667898e-09, "loss": 0.5264, "step": 32095 }, { "epoch": 0.9836949859016796, "grad_norm": 0.8102528323412399, "learning_rate": 6.970149687570149e-09, "loss": 0.3839, "step": 32096 }, { "epoch": 0.9837256344244207, "grad_norm": 2.049003035760739, "learning_rate": 6.943976805950936e-09, "loss": 0.6175, "step": 32097 }, { "epoch": 0.983756282947162, "grad_norm": 1.9148311258387554, "learning_rate": 6.917853122190843e-09, "loss": 0.5547, "step": 32098 }, { "epoch": 0.9837869314699031, "grad_norm": 1.7783633575719633, "learning_rate": 6.891778636546331e-09, "loss": 0.5661, "step": 32099 }, { "epoch": 0.9838175799926444, "grad_norm": 1.917440205511402, "learning_rate": 6.865753349274418e-09, "loss": 0.6295, "step": 32100 }, { "epoch": 0.9838482285153856, "grad_norm": 2.1239229945077205, "learning_rate": 6.8397772606315635e-09, "loss": 0.5803, "step": 32101 }, { "epoch": 0.9838788770381267, "grad_norm": 1.9828564465780258, "learning_rate": 6.813850370874786e-09, "loss": 0.6254, "step": 32102 }, { "epoch": 0.983909525560868, "grad_norm": 1.9164695195599697, "learning_rate": 6.78797268025777e-09, "loss": 0.4881, "step": 32103 }, { "epoch": 0.9839401740836091, "grad_norm": 0.8288615014375638, "learning_rate": 6.762144189036978e-09, "loss": 0.3843, "step": 32104 }, { "epoch": 0.9839708226063504, "grad_norm": 1.9576406027012268, "learning_rate": 6.7363648974666514e-09, "loss": 0.5717, "step": 32105 }, { "epoch": 0.9840014711290915, "grad_norm": 1.9675983762993101, "learning_rate": 6.710634805799921e-09, "loss": 0.5548, "step": 32106 }, { "epoch": 0.9840321196518328, "grad_norm": 1.9821035713678428, "learning_rate": 6.684953914291026e-09, "loss": 0.5721, "step": 32107 }, { "epoch": 0.9840627681745739, "grad_norm": 2.037490932562613, "learning_rate": 6.659322223193098e-09, "loss": 0.683, "step": 32108 }, { "epoch": 0.9840934166973152, "grad_norm": 1.8009626955086522, "learning_rate": 6.63373973275816e-09, "loss": 0.5198, "step": 32109 }, { "epoch": 0.9841240652200564, "grad_norm": 2.0576698754037817, "learning_rate": 6.608206443238785e-09, "loss": 0.6509, "step": 32110 }, { "epoch": 0.9841547137427976, "grad_norm": 1.7626166457613506, "learning_rate": 6.58272235488644e-09, "loss": 0.5426, "step": 32111 }, { "epoch": 0.9841853622655388, "grad_norm": 2.017501333228605, "learning_rate": 6.557287467952034e-09, "loss": 0.5069, "step": 32112 }, { "epoch": 0.98421601078828, "grad_norm": 1.8207007909368162, "learning_rate": 6.531901782686478e-09, "loss": 0.5569, "step": 32113 }, { "epoch": 0.9842466593110212, "grad_norm": 1.9310399962267562, "learning_rate": 6.5065652993395736e-09, "loss": 0.6457, "step": 32114 }, { "epoch": 0.9842773078337624, "grad_norm": 1.8913628859625735, "learning_rate": 6.481278018161119e-09, "loss": 0.5564, "step": 32115 }, { "epoch": 0.9843079563565036, "grad_norm": 1.9059668257479678, "learning_rate": 6.45603993940036e-09, "loss": 0.6091, "step": 32116 }, { "epoch": 0.9843386048792448, "grad_norm": 1.8394968915530427, "learning_rate": 6.430851063305432e-09, "loss": 0.5864, "step": 32117 }, { "epoch": 0.984369253401986, "grad_norm": 1.765680239184946, "learning_rate": 6.40571139012558e-09, "loss": 0.5003, "step": 32118 }, { "epoch": 0.9843999019247273, "grad_norm": 1.874694871162933, "learning_rate": 6.380620920107827e-09, "loss": 0.5733, "step": 32119 }, { "epoch": 0.9844305504474684, "grad_norm": 2.0646488986050153, "learning_rate": 6.3555796534992e-09, "loss": 0.6085, "step": 32120 }, { "epoch": 0.9844611989702097, "grad_norm": 2.178370899113514, "learning_rate": 6.330587590546722e-09, "loss": 0.5966, "step": 32121 }, { "epoch": 0.9844918474929508, "grad_norm": 2.1521574512247774, "learning_rate": 6.305644731496863e-09, "loss": 0.5764, "step": 32122 }, { "epoch": 0.9845224960156921, "grad_norm": 2.0071922786927248, "learning_rate": 6.280751076594982e-09, "loss": 0.5773, "step": 32123 }, { "epoch": 0.9845531445384332, "grad_norm": 2.016206331089607, "learning_rate": 6.255906626086994e-09, "loss": 0.5245, "step": 32124 }, { "epoch": 0.9845837930611745, "grad_norm": 1.8835427417200508, "learning_rate": 6.231111380217147e-09, "loss": 0.6246, "step": 32125 }, { "epoch": 0.9846144415839156, "grad_norm": 0.7637162871147954, "learning_rate": 6.206365339229692e-09, "loss": 0.3885, "step": 32126 }, { "epoch": 0.9846450901066569, "grad_norm": 1.9619666079586087, "learning_rate": 6.181668503368321e-09, "loss": 0.6558, "step": 32127 }, { "epoch": 0.9846757386293981, "grad_norm": 1.9181336744683208, "learning_rate": 6.157020872877284e-09, "loss": 0.6169, "step": 32128 }, { "epoch": 0.9847063871521393, "grad_norm": 2.031251411936697, "learning_rate": 6.13242244799861e-09, "loss": 0.603, "step": 32129 }, { "epoch": 0.9847370356748805, "grad_norm": 2.055838831414465, "learning_rate": 6.107873228974881e-09, "loss": 0.5197, "step": 32130 }, { "epoch": 0.9847676841976217, "grad_norm": 1.8676322748424878, "learning_rate": 6.083373216048127e-09, "loss": 0.5321, "step": 32131 }, { "epoch": 0.9847983327203629, "grad_norm": 1.8869173392774625, "learning_rate": 6.058922409459267e-09, "loss": 0.5943, "step": 32132 }, { "epoch": 0.984828981243104, "grad_norm": 1.8616059360517128, "learning_rate": 6.034520809449773e-09, "loss": 0.6827, "step": 32133 }, { "epoch": 0.9848596297658453, "grad_norm": 2.3401479587616763, "learning_rate": 6.01016841626001e-09, "loss": 0.6086, "step": 32134 }, { "epoch": 0.9848902782885864, "grad_norm": 1.7526917834913114, "learning_rate": 5.985865230129784e-09, "loss": 0.5569, "step": 32135 }, { "epoch": 0.9849209268113277, "grad_norm": 0.7863599701782031, "learning_rate": 5.961611251298904e-09, "loss": 0.3944, "step": 32136 }, { "epoch": 0.9849515753340689, "grad_norm": 1.597700851730749, "learning_rate": 5.9374064800060695e-09, "loss": 0.5221, "step": 32137 }, { "epoch": 0.9849822238568101, "grad_norm": 1.9416851198001104, "learning_rate": 5.9132509164888664e-09, "loss": 0.5368, "step": 32138 }, { "epoch": 0.9850128723795513, "grad_norm": 1.7338769730102026, "learning_rate": 5.889144560987103e-09, "loss": 0.554, "step": 32139 }, { "epoch": 0.9850435209022925, "grad_norm": 2.000654787603007, "learning_rate": 5.8650874137372586e-09, "loss": 0.5657, "step": 32140 }, { "epoch": 0.9850741694250337, "grad_norm": 1.8206627709968017, "learning_rate": 5.841079474976363e-09, "loss": 0.5789, "step": 32141 }, { "epoch": 0.9851048179477749, "grad_norm": 1.9423160418477605, "learning_rate": 5.817120744940896e-09, "loss": 0.564, "step": 32142 }, { "epoch": 0.9851354664705161, "grad_norm": 1.751477953167837, "learning_rate": 5.793211223867334e-09, "loss": 0.569, "step": 32143 }, { "epoch": 0.9851661149932573, "grad_norm": 1.6884098950256825, "learning_rate": 5.7693509119910455e-09, "loss": 0.6115, "step": 32144 }, { "epoch": 0.9851967635159985, "grad_norm": 1.7879168427286751, "learning_rate": 5.745539809547396e-09, "loss": 0.5534, "step": 32145 }, { "epoch": 0.9852274120387398, "grad_norm": 1.92737118961795, "learning_rate": 5.721777916770643e-09, "loss": 0.5658, "step": 32146 }, { "epoch": 0.9852580605614809, "grad_norm": 2.152723209755073, "learning_rate": 5.698065233895045e-09, "loss": 0.5892, "step": 32147 }, { "epoch": 0.9852887090842222, "grad_norm": 1.8250826780639031, "learning_rate": 5.674401761154302e-09, "loss": 0.462, "step": 32148 }, { "epoch": 0.9853193576069633, "grad_norm": 0.7683808374996174, "learning_rate": 5.650787498781563e-09, "loss": 0.3938, "step": 32149 }, { "epoch": 0.9853500061297046, "grad_norm": 0.7897190747097925, "learning_rate": 5.627222447009417e-09, "loss": 0.409, "step": 32150 }, { "epoch": 0.9853806546524457, "grad_norm": 1.814815050031637, "learning_rate": 5.603706606069903e-09, "loss": 0.542, "step": 32151 }, { "epoch": 0.985411303175187, "grad_norm": 1.8318769292853074, "learning_rate": 5.580239976195057e-09, "loss": 0.5909, "step": 32152 }, { "epoch": 0.9854419516979281, "grad_norm": 2.135524915053273, "learning_rate": 5.556822557615804e-09, "loss": 0.5119, "step": 32153 }, { "epoch": 0.9854726002206694, "grad_norm": 1.9526865604786987, "learning_rate": 5.5334543505636275e-09, "loss": 0.5729, "step": 32154 }, { "epoch": 0.9855032487434106, "grad_norm": 0.8601601827691663, "learning_rate": 5.5101353552677876e-09, "loss": 0.3756, "step": 32155 }, { "epoch": 0.9855338972661518, "grad_norm": 1.9516135008344637, "learning_rate": 5.486865571958655e-09, "loss": 0.6055, "step": 32156 }, { "epoch": 0.985564545788893, "grad_norm": 1.843114779832284, "learning_rate": 5.463645000864937e-09, "loss": 0.6545, "step": 32157 }, { "epoch": 0.9855951943116342, "grad_norm": 1.9256268350171313, "learning_rate": 5.440473642216449e-09, "loss": 0.543, "step": 32158 }, { "epoch": 0.9856258428343754, "grad_norm": 0.823210495446886, "learning_rate": 5.417351496240786e-09, "loss": 0.3929, "step": 32159 }, { "epoch": 0.9856564913571166, "grad_norm": 2.0821418785032537, "learning_rate": 5.3942785631655444e-09, "loss": 0.6489, "step": 32160 }, { "epoch": 0.9856871398798578, "grad_norm": 0.8114654272372399, "learning_rate": 5.371254843218321e-09, "loss": 0.4128, "step": 32161 }, { "epoch": 0.985717788402599, "grad_norm": 1.870478169197147, "learning_rate": 5.34828033662671e-09, "loss": 0.5324, "step": 32162 }, { "epoch": 0.9857484369253402, "grad_norm": 1.9983496152994686, "learning_rate": 5.325355043615532e-09, "loss": 0.6408, "step": 32163 }, { "epoch": 0.9857790854480813, "grad_norm": 2.02811786070875, "learning_rate": 5.302478964412383e-09, "loss": 0.6626, "step": 32164 }, { "epoch": 0.9858097339708226, "grad_norm": 0.8425484653265515, "learning_rate": 5.279652099241528e-09, "loss": 0.4039, "step": 32165 }, { "epoch": 0.9858403824935638, "grad_norm": 1.7402283314858409, "learning_rate": 5.256874448328342e-09, "loss": 0.5148, "step": 32166 }, { "epoch": 0.985871031016305, "grad_norm": 2.1824515427200377, "learning_rate": 5.23414601189709e-09, "loss": 0.5608, "step": 32167 }, { "epoch": 0.9859016795390462, "grad_norm": 1.8802516674925316, "learning_rate": 5.211466790171482e-09, "loss": 0.5672, "step": 32168 }, { "epoch": 0.9859323280617874, "grad_norm": 1.974438411456843, "learning_rate": 5.188836783375228e-09, "loss": 0.4974, "step": 32169 }, { "epoch": 0.9859629765845286, "grad_norm": 1.8956857602609283, "learning_rate": 5.166255991731484e-09, "loss": 0.5133, "step": 32170 }, { "epoch": 0.9859936251072698, "grad_norm": 1.860621978447314, "learning_rate": 5.143724415462847e-09, "loss": 0.507, "step": 32171 }, { "epoch": 0.986024273630011, "grad_norm": 1.7526104171923664, "learning_rate": 5.1212420547908095e-09, "loss": 0.5225, "step": 32172 }, { "epoch": 0.9860549221527523, "grad_norm": 2.157527357259956, "learning_rate": 5.098808909937414e-09, "loss": 0.5916, "step": 32173 }, { "epoch": 0.9860855706754934, "grad_norm": 1.9719745325863633, "learning_rate": 5.07642498112304e-09, "loss": 0.5671, "step": 32174 }, { "epoch": 0.9861162191982347, "grad_norm": 1.8587645937059005, "learning_rate": 5.054090268569178e-09, "loss": 0.5891, "step": 32175 }, { "epoch": 0.9861468677209758, "grad_norm": 0.8284946626324323, "learning_rate": 5.031804772495097e-09, "loss": 0.3931, "step": 32176 }, { "epoch": 0.9861775162437171, "grad_norm": 1.8605764848143003, "learning_rate": 5.00956849312062e-09, "loss": 0.6033, "step": 32177 }, { "epoch": 0.9862081647664582, "grad_norm": 0.7819590517040335, "learning_rate": 4.987381430665017e-09, "loss": 0.411, "step": 32178 }, { "epoch": 0.9862388132891995, "grad_norm": 1.8504504902410337, "learning_rate": 4.965243585346447e-09, "loss": 0.5194, "step": 32179 }, { "epoch": 0.9862694618119406, "grad_norm": 1.8975039924903372, "learning_rate": 4.943154957384177e-09, "loss": 0.5677, "step": 32180 }, { "epoch": 0.9863001103346819, "grad_norm": 1.9092605578129695, "learning_rate": 4.921115546994148e-09, "loss": 0.5733, "step": 32181 }, { "epoch": 0.986330758857423, "grad_norm": 0.7797848914976064, "learning_rate": 4.899125354395074e-09, "loss": 0.3769, "step": 32182 }, { "epoch": 0.9863614073801643, "grad_norm": 2.0503409407295727, "learning_rate": 4.877184379802335e-09, "loss": 0.6076, "step": 32183 }, { "epoch": 0.9863920559029055, "grad_norm": 1.6344436633345008, "learning_rate": 4.855292623432983e-09, "loss": 0.4878, "step": 32184 }, { "epoch": 0.9864227044256467, "grad_norm": 1.788843673450308, "learning_rate": 4.8334500855029555e-09, "loss": 0.5557, "step": 32185 }, { "epoch": 0.9864533529483879, "grad_norm": 1.932931227417717, "learning_rate": 4.811656766226524e-09, "loss": 0.5843, "step": 32186 }, { "epoch": 0.9864840014711291, "grad_norm": 1.8632941887458132, "learning_rate": 4.7899126658190745e-09, "loss": 0.602, "step": 32187 }, { "epoch": 0.9865146499938703, "grad_norm": 1.9471303721456579, "learning_rate": 4.7682177844948775e-09, "loss": 0.6263, "step": 32188 }, { "epoch": 0.9865452985166115, "grad_norm": 2.1477313127025974, "learning_rate": 4.746572122467097e-09, "loss": 0.5013, "step": 32189 }, { "epoch": 0.9865759470393527, "grad_norm": 1.9570904364160888, "learning_rate": 4.72497567994945e-09, "loss": 0.5457, "step": 32190 }, { "epoch": 0.986606595562094, "grad_norm": 2.2215150925256593, "learning_rate": 4.703428457155102e-09, "loss": 0.5239, "step": 32191 }, { "epoch": 0.9866372440848351, "grad_norm": 1.850322505438101, "learning_rate": 4.681930454295547e-09, "loss": 0.579, "step": 32192 }, { "epoch": 0.9866678926075764, "grad_norm": 1.8449529303632448, "learning_rate": 4.660481671583394e-09, "loss": 0.5897, "step": 32193 }, { "epoch": 0.9866985411303175, "grad_norm": 2.0695083336910955, "learning_rate": 4.639082109229587e-09, "loss": 0.6, "step": 32194 }, { "epoch": 0.9867291896530587, "grad_norm": 2.0245268423535983, "learning_rate": 4.617731767445066e-09, "loss": 0.5491, "step": 32195 }, { "epoch": 0.9867598381757999, "grad_norm": 1.8629116248158342, "learning_rate": 4.596430646439664e-09, "loss": 0.5178, "step": 32196 }, { "epoch": 0.9867904866985411, "grad_norm": 2.0246353052794097, "learning_rate": 4.575178746424324e-09, "loss": 0.5724, "step": 32197 }, { "epoch": 0.9868211352212823, "grad_norm": 1.822492455233751, "learning_rate": 4.553976067607768e-09, "loss": 0.5321, "step": 32198 }, { "epoch": 0.9868517837440235, "grad_norm": 1.8283371594830122, "learning_rate": 4.532822610198717e-09, "loss": 0.5569, "step": 32199 }, { "epoch": 0.9868824322667648, "grad_norm": 1.86182680842588, "learning_rate": 4.511718374406448e-09, "loss": 0.5186, "step": 32200 }, { "epoch": 0.9869130807895059, "grad_norm": 1.795125767724452, "learning_rate": 4.490663360438019e-09, "loss": 0.502, "step": 32201 }, { "epoch": 0.9869437293122472, "grad_norm": 1.8729907667626347, "learning_rate": 4.4696575685010406e-09, "loss": 0.582, "step": 32202 }, { "epoch": 0.9869743778349883, "grad_norm": 1.9859984896924678, "learning_rate": 4.448700998803124e-09, "loss": 0.5097, "step": 32203 }, { "epoch": 0.9870050263577296, "grad_norm": 2.128585990631028, "learning_rate": 4.427793651550216e-09, "loss": 0.6388, "step": 32204 }, { "epoch": 0.9870356748804707, "grad_norm": 1.805038700210401, "learning_rate": 4.406935526948264e-09, "loss": 0.5265, "step": 32205 }, { "epoch": 0.987066323403212, "grad_norm": 2.110387490300948, "learning_rate": 4.386126625202658e-09, "loss": 0.6389, "step": 32206 }, { "epoch": 0.9870969719259531, "grad_norm": 0.7730748247702898, "learning_rate": 4.365366946519345e-09, "loss": 0.3726, "step": 32207 }, { "epoch": 0.9871276204486944, "grad_norm": 2.048451452045281, "learning_rate": 4.34465649110205e-09, "loss": 0.6265, "step": 32208 }, { "epoch": 0.9871582689714355, "grad_norm": 2.1757032719535885, "learning_rate": 4.323995259155056e-09, "loss": 0.6229, "step": 32209 }, { "epoch": 0.9871889174941768, "grad_norm": 0.7941359928635768, "learning_rate": 4.3033832508815325e-09, "loss": 0.4191, "step": 32210 }, { "epoch": 0.987219566016918, "grad_norm": 0.7694174650853886, "learning_rate": 4.28282046648576e-09, "loss": 0.4072, "step": 32211 }, { "epoch": 0.9872502145396592, "grad_norm": 1.7265407166560198, "learning_rate": 4.262306906168689e-09, "loss": 0.5766, "step": 32212 }, { "epoch": 0.9872808630624004, "grad_norm": 2.1932803561001672, "learning_rate": 4.241842570134047e-09, "loss": 0.6062, "step": 32213 }, { "epoch": 0.9873115115851416, "grad_norm": 1.8117841679690938, "learning_rate": 4.221427458582228e-09, "loss": 0.6357, "step": 32214 }, { "epoch": 0.9873421601078828, "grad_norm": 1.8173269500741098, "learning_rate": 4.201061571715292e-09, "loss": 0.585, "step": 32215 }, { "epoch": 0.987372808630624, "grad_norm": 1.9037973565945123, "learning_rate": 4.180744909733636e-09, "loss": 0.633, "step": 32216 }, { "epoch": 0.9874034571533652, "grad_norm": 1.9612720374069939, "learning_rate": 4.160477472837099e-09, "loss": 0.6893, "step": 32217 }, { "epoch": 0.9874341056761065, "grad_norm": 1.936639053211424, "learning_rate": 4.140259261225521e-09, "loss": 0.6218, "step": 32218 }, { "epoch": 0.9874647541988476, "grad_norm": 0.8475129935707264, "learning_rate": 4.120090275098187e-09, "loss": 0.4137, "step": 32219 }, { "epoch": 0.9874954027215889, "grad_norm": 2.1199289667982555, "learning_rate": 4.099970514653828e-09, "loss": 0.5593, "step": 32220 }, { "epoch": 0.98752605124433, "grad_norm": 1.8428306815754096, "learning_rate": 4.079899980091173e-09, "loss": 0.5747, "step": 32221 }, { "epoch": 0.9875566997670713, "grad_norm": 1.7984612543243763, "learning_rate": 4.059878671607287e-09, "loss": 0.6101, "step": 32222 }, { "epoch": 0.9875873482898124, "grad_norm": 0.763883657444421, "learning_rate": 4.039906589399234e-09, "loss": 0.4004, "step": 32223 }, { "epoch": 0.9876179968125537, "grad_norm": 2.1004638525357064, "learning_rate": 4.019983733664634e-09, "loss": 0.6418, "step": 32224 }, { "epoch": 0.9876486453352948, "grad_norm": 2.0379004514972783, "learning_rate": 4.000110104599442e-09, "loss": 0.4931, "step": 32225 }, { "epoch": 0.987679293858036, "grad_norm": 0.783448930171779, "learning_rate": 3.980285702399611e-09, "loss": 0.398, "step": 32226 }, { "epoch": 0.9877099423807773, "grad_norm": 1.990666501569811, "learning_rate": 3.960510527259986e-09, "loss": 0.546, "step": 32227 }, { "epoch": 0.9877405909035184, "grad_norm": 1.778667597230556, "learning_rate": 3.9407845793759665e-09, "loss": 0.5701, "step": 32228 }, { "epoch": 0.9877712394262597, "grad_norm": 1.931221792540909, "learning_rate": 3.921107858941287e-09, "loss": 0.5922, "step": 32229 }, { "epoch": 0.9878018879490008, "grad_norm": 1.979304449793369, "learning_rate": 3.90148036615079e-09, "loss": 0.6101, "step": 32230 }, { "epoch": 0.9878325364717421, "grad_norm": 2.068155465243968, "learning_rate": 3.881902101197099e-09, "loss": 0.5447, "step": 32231 }, { "epoch": 0.9878631849944832, "grad_norm": 1.8467232784039986, "learning_rate": 3.862373064273395e-09, "loss": 0.5331, "step": 32232 }, { "epoch": 0.9878938335172245, "grad_norm": 2.280171031500444, "learning_rate": 3.842893255571745e-09, "loss": 0.5653, "step": 32233 }, { "epoch": 0.9879244820399656, "grad_norm": 1.8540085945340654, "learning_rate": 3.823462675284772e-09, "loss": 0.5827, "step": 32234 }, { "epoch": 0.9879551305627069, "grad_norm": 2.135988259963244, "learning_rate": 3.804081323603437e-09, "loss": 0.5343, "step": 32235 }, { "epoch": 0.987985779085448, "grad_norm": 1.803590683291635, "learning_rate": 3.784749200718696e-09, "loss": 0.5851, "step": 32236 }, { "epoch": 0.9880164276081893, "grad_norm": 0.8003353686763306, "learning_rate": 3.765466306820953e-09, "loss": 0.3845, "step": 32237 }, { "epoch": 0.9880470761309305, "grad_norm": 1.6982522956613724, "learning_rate": 3.746232642100611e-09, "loss": 0.5453, "step": 32238 }, { "epoch": 0.9880777246536717, "grad_norm": 1.9145183490635151, "learning_rate": 3.727048206746964e-09, "loss": 0.5286, "step": 32239 }, { "epoch": 0.9881083731764129, "grad_norm": 0.7651950476585828, "learning_rate": 3.7079130009493035e-09, "loss": 0.3825, "step": 32240 }, { "epoch": 0.9881390216991541, "grad_norm": 1.95574396627305, "learning_rate": 3.6888270248958136e-09, "loss": 0.547, "step": 32241 }, { "epoch": 0.9881696702218953, "grad_norm": 1.750787758012265, "learning_rate": 3.6697902787746763e-09, "loss": 0.6026, "step": 32242 }, { "epoch": 0.9882003187446365, "grad_norm": 1.841225239114983, "learning_rate": 3.6508027627735198e-09, "loss": 0.574, "step": 32243 }, { "epoch": 0.9882309672673777, "grad_norm": 0.763162597280135, "learning_rate": 3.6318644770788613e-09, "loss": 0.3976, "step": 32244 }, { "epoch": 0.988261615790119, "grad_norm": 1.9142805769689704, "learning_rate": 3.6129754218783286e-09, "loss": 0.5253, "step": 32245 }, { "epoch": 0.9882922643128601, "grad_norm": 1.8516854655785686, "learning_rate": 3.5941355973573288e-09, "loss": 0.5351, "step": 32246 }, { "epoch": 0.9883229128356014, "grad_norm": 2.0159087513926206, "learning_rate": 3.5753450037018244e-09, "loss": 0.5908, "step": 32247 }, { "epoch": 0.9883535613583425, "grad_norm": 1.8120194463923358, "learning_rate": 3.556603641097223e-09, "loss": 0.5788, "step": 32248 }, { "epoch": 0.9883842098810838, "grad_norm": 1.8048947764614445, "learning_rate": 3.5379115097272655e-09, "loss": 0.5309, "step": 32249 }, { "epoch": 0.9884148584038249, "grad_norm": 1.9394321370093994, "learning_rate": 3.5192686097768045e-09, "loss": 0.4979, "step": 32250 }, { "epoch": 0.9884455069265662, "grad_norm": 0.8637550157744022, "learning_rate": 3.5006749414295825e-09, "loss": 0.3843, "step": 32251 }, { "epoch": 0.9884761554493073, "grad_norm": 2.3426402918538076, "learning_rate": 3.482130504868231e-09, "loss": 0.6138, "step": 32252 }, { "epoch": 0.9885068039720486, "grad_norm": 1.9899286388729285, "learning_rate": 3.463635300275936e-09, "loss": 0.5137, "step": 32253 }, { "epoch": 0.9885374524947897, "grad_norm": 1.774475995704833, "learning_rate": 3.445189327834775e-09, "loss": 0.551, "step": 32254 }, { "epoch": 0.988568101017531, "grad_norm": 0.7697518250217043, "learning_rate": 3.4267925877268238e-09, "loss": 0.4009, "step": 32255 }, { "epoch": 0.9885987495402722, "grad_norm": 2.3643913254875857, "learning_rate": 3.4084450801330493e-09, "loss": 0.6988, "step": 32256 }, { "epoch": 0.9886293980630133, "grad_norm": 1.9143391676352595, "learning_rate": 3.3901468052344177e-09, "loss": 0.5332, "step": 32257 }, { "epoch": 0.9886600465857546, "grad_norm": 1.8770483870358863, "learning_rate": 3.3718977632113404e-09, "loss": 0.6108, "step": 32258 }, { "epoch": 0.9886906951084957, "grad_norm": 1.890287187003058, "learning_rate": 3.353697954243118e-09, "loss": 0.5766, "step": 32259 }, { "epoch": 0.988721343631237, "grad_norm": 1.9541312728695222, "learning_rate": 3.335547378509052e-09, "loss": 0.6227, "step": 32260 }, { "epoch": 0.9887519921539781, "grad_norm": 1.9635470913781583, "learning_rate": 3.3174460361884432e-09, "loss": 0.5995, "step": 32261 }, { "epoch": 0.9887826406767194, "grad_norm": 1.9824097759397545, "learning_rate": 3.2993939274594823e-09, "loss": 0.5479, "step": 32262 }, { "epoch": 0.9888132891994605, "grad_norm": 1.6946325778826274, "learning_rate": 3.28139105250036e-09, "loss": 0.5798, "step": 32263 }, { "epoch": 0.9888439377222018, "grad_norm": 1.735656938267684, "learning_rate": 3.2634374114881574e-09, "loss": 0.5829, "step": 32264 }, { "epoch": 0.988874586244943, "grad_norm": 2.2283621347997222, "learning_rate": 3.2455330045993994e-09, "loss": 0.5784, "step": 32265 }, { "epoch": 0.9889052347676842, "grad_norm": 1.9441022850304974, "learning_rate": 3.2276778320111666e-09, "loss": 0.5276, "step": 32266 }, { "epoch": 0.9889358832904254, "grad_norm": 1.7023372294551942, "learning_rate": 3.209871893898875e-09, "loss": 0.5316, "step": 32267 }, { "epoch": 0.9889665318131666, "grad_norm": 0.7771233260374095, "learning_rate": 3.192115190438494e-09, "loss": 0.3965, "step": 32268 }, { "epoch": 0.9889971803359078, "grad_norm": 1.850064781517279, "learning_rate": 3.174407721804329e-09, "loss": 0.6092, "step": 32269 }, { "epoch": 0.989027828858649, "grad_norm": 1.9024669561725371, "learning_rate": 3.15674948817124e-09, "loss": 0.5878, "step": 32270 }, { "epoch": 0.9890584773813902, "grad_norm": 1.9714840647295342, "learning_rate": 3.1391404897135323e-09, "loss": 0.5802, "step": 32271 }, { "epoch": 0.9890891259041314, "grad_norm": 1.8054647553765486, "learning_rate": 3.121580726604401e-09, "loss": 0.4758, "step": 32272 }, { "epoch": 0.9891197744268726, "grad_norm": 1.7596961999436902, "learning_rate": 3.1040701990164844e-09, "loss": 0.4517, "step": 32273 }, { "epoch": 0.9891504229496139, "grad_norm": 1.8328337935918229, "learning_rate": 3.086608907122979e-09, "loss": 0.6212, "step": 32274 }, { "epoch": 0.989181071472355, "grad_norm": 1.9732698676740656, "learning_rate": 3.069196851095413e-09, "loss": 0.5799, "step": 32275 }, { "epoch": 0.9892117199950963, "grad_norm": 1.76601797804861, "learning_rate": 3.051834031105316e-09, "loss": 0.5354, "step": 32276 }, { "epoch": 0.9892423685178374, "grad_norm": 1.9544380476330125, "learning_rate": 3.0345204473247735e-09, "loss": 0.5322, "step": 32277 }, { "epoch": 0.9892730170405787, "grad_norm": 1.8089045690079744, "learning_rate": 3.0172560999230937e-09, "loss": 0.4589, "step": 32278 }, { "epoch": 0.9893036655633198, "grad_norm": 1.9130724069265286, "learning_rate": 3.000040989071251e-09, "loss": 0.4429, "step": 32279 }, { "epoch": 0.9893343140860611, "grad_norm": 0.7910041515565472, "learning_rate": 2.9828751149379997e-09, "loss": 0.4162, "step": 32280 }, { "epoch": 0.9893649626088022, "grad_norm": 2.018214970642845, "learning_rate": 2.9657584776932035e-09, "loss": 0.575, "step": 32281 }, { "epoch": 0.9893956111315435, "grad_norm": 1.9808614313547368, "learning_rate": 2.9486910775056165e-09, "loss": 0.6024, "step": 32282 }, { "epoch": 0.9894262596542847, "grad_norm": 1.7242514193103176, "learning_rate": 2.9316729145428825e-09, "loss": 0.4989, "step": 32283 }, { "epoch": 0.9894569081770259, "grad_norm": 1.8037851597943726, "learning_rate": 2.9147039889731997e-09, "loss": 0.5533, "step": 32284 }, { "epoch": 0.9894875566997671, "grad_norm": 0.8117418259629621, "learning_rate": 2.8977843009631025e-09, "loss": 0.4018, "step": 32285 }, { "epoch": 0.9895182052225083, "grad_norm": 1.7872813320001866, "learning_rate": 2.8809138506802338e-09, "loss": 0.5254, "step": 32286 }, { "epoch": 0.9895488537452495, "grad_norm": 1.8771045727603848, "learning_rate": 2.864092638290017e-09, "loss": 0.5976, "step": 32287 }, { "epoch": 0.9895795022679906, "grad_norm": 1.9046268586798567, "learning_rate": 2.8473206639584307e-09, "loss": 0.5828, "step": 32288 }, { "epoch": 0.9896101507907319, "grad_norm": 1.8584435656437983, "learning_rate": 2.8305979278508977e-09, "loss": 0.5862, "step": 32289 }, { "epoch": 0.989640799313473, "grad_norm": 2.1937332462833394, "learning_rate": 2.8139244301317316e-09, "loss": 0.6171, "step": 32290 }, { "epoch": 0.9896714478362143, "grad_norm": 1.7031239252346566, "learning_rate": 2.7973001709658e-09, "loss": 0.5366, "step": 32291 }, { "epoch": 0.9897020963589555, "grad_norm": 0.8070005581951195, "learning_rate": 2.7807251505168608e-09, "loss": 0.3724, "step": 32292 }, { "epoch": 0.9897327448816967, "grad_norm": 2.1978367450161964, "learning_rate": 2.7641993689475623e-09, "loss": 0.5574, "step": 32293 }, { "epoch": 0.9897633934044379, "grad_norm": 1.9507128715783832, "learning_rate": 2.7477228264216614e-09, "loss": 0.5572, "step": 32294 }, { "epoch": 0.9897940419271791, "grad_norm": 1.7654928441353481, "learning_rate": 2.7312955231006966e-09, "loss": 0.5002, "step": 32295 }, { "epoch": 0.9898246904499203, "grad_norm": 1.726081073698921, "learning_rate": 2.7149174591467597e-09, "loss": 0.6059, "step": 32296 }, { "epoch": 0.9898553389726615, "grad_norm": 1.7044705953724408, "learning_rate": 2.6985886347219438e-09, "loss": 0.5695, "step": 32297 }, { "epoch": 0.9898859874954027, "grad_norm": 0.8130417859817631, "learning_rate": 2.6823090499861204e-09, "loss": 0.4015, "step": 32298 }, { "epoch": 0.989916636018144, "grad_norm": 0.7874767086928876, "learning_rate": 2.6660787051002724e-09, "loss": 0.4038, "step": 32299 }, { "epoch": 0.9899472845408851, "grad_norm": 2.13959510227846, "learning_rate": 2.6498976002237166e-09, "loss": 0.6184, "step": 32300 }, { "epoch": 0.9899779330636264, "grad_norm": 1.9832329048290565, "learning_rate": 2.63376573551688e-09, "loss": 0.5479, "step": 32301 }, { "epoch": 0.9900085815863675, "grad_norm": 2.128928472824126, "learning_rate": 2.6176831111379697e-09, "loss": 0.685, "step": 32302 }, { "epoch": 0.9900392301091088, "grad_norm": 1.9930631442622004, "learning_rate": 2.6016497272457473e-09, "loss": 0.5667, "step": 32303 }, { "epoch": 0.9900698786318499, "grad_norm": 0.7633079144739536, "learning_rate": 2.5856655839984203e-09, "loss": 0.3795, "step": 32304 }, { "epoch": 0.9901005271545912, "grad_norm": 1.8668627130864, "learning_rate": 2.5697306815530842e-09, "loss": 0.5989, "step": 32305 }, { "epoch": 0.9901311756773323, "grad_norm": 1.9534994715188156, "learning_rate": 2.553845020066281e-09, "loss": 0.5863, "step": 32306 }, { "epoch": 0.9901618242000736, "grad_norm": 1.8537962149663938, "learning_rate": 2.538008599695663e-09, "loss": 0.4624, "step": 32307 }, { "epoch": 0.9901924727228147, "grad_norm": 1.727414049319096, "learning_rate": 2.522221420596105e-09, "loss": 0.4794, "step": 32308 }, { "epoch": 0.990223121245556, "grad_norm": 1.9652356865871272, "learning_rate": 2.5064834829241492e-09, "loss": 0.5866, "step": 32309 }, { "epoch": 0.9902537697682972, "grad_norm": 2.0028786029464203, "learning_rate": 2.4907947868346717e-09, "loss": 0.5822, "step": 32310 }, { "epoch": 0.9902844182910384, "grad_norm": 1.6968563410431623, "learning_rate": 2.475155332481438e-09, "loss": 0.5387, "step": 32311 }, { "epoch": 0.9903150668137796, "grad_norm": 2.1893432590352884, "learning_rate": 2.45956512001988e-09, "loss": 0.5458, "step": 32312 }, { "epoch": 0.9903457153365208, "grad_norm": 2.0217247287745264, "learning_rate": 2.4440241496026527e-09, "loss": 0.5504, "step": 32313 }, { "epoch": 0.990376363859262, "grad_norm": 0.7973183652562535, "learning_rate": 2.4285324213829675e-09, "loss": 0.3891, "step": 32314 }, { "epoch": 0.9904070123820032, "grad_norm": 1.850354696338655, "learning_rate": 2.4130899355140346e-09, "loss": 0.5924, "step": 32315 }, { "epoch": 0.9904376609047444, "grad_norm": 0.7787276853462833, "learning_rate": 2.3976966921468448e-09, "loss": 0.3809, "step": 32316 }, { "epoch": 0.9904683094274856, "grad_norm": 1.787871277020972, "learning_rate": 2.3823526914346086e-09, "loss": 0.5376, "step": 32317 }, { "epoch": 0.9904989579502268, "grad_norm": 1.9132058288633633, "learning_rate": 2.367057933527206e-09, "loss": 0.5692, "step": 32318 }, { "epoch": 0.990529606472968, "grad_norm": 1.8389651399992923, "learning_rate": 2.3518124185761827e-09, "loss": 0.5811, "step": 32319 }, { "epoch": 0.9905602549957092, "grad_norm": 1.9254825704629697, "learning_rate": 2.3366161467314187e-09, "loss": 0.5742, "step": 32320 }, { "epoch": 0.9905909035184504, "grad_norm": 2.0796120994600225, "learning_rate": 2.321469118142794e-09, "loss": 0.4711, "step": 32321 }, { "epoch": 0.9906215520411916, "grad_norm": 1.8646197543004688, "learning_rate": 2.3063713329590787e-09, "loss": 0.5447, "step": 32322 }, { "epoch": 0.9906522005639328, "grad_norm": 1.8754400405178673, "learning_rate": 2.291322791330153e-09, "loss": 0.6077, "step": 32323 }, { "epoch": 0.990682849086674, "grad_norm": 1.9937562276109695, "learning_rate": 2.2763234934025656e-09, "loss": 0.5491, "step": 32324 }, { "epoch": 0.9907134976094152, "grad_norm": 1.8037395744907188, "learning_rate": 2.2613734393256427e-09, "loss": 0.5944, "step": 32325 }, { "epoch": 0.9907441461321564, "grad_norm": 1.8545556493032525, "learning_rate": 2.2464726292459326e-09, "loss": 0.5279, "step": 32326 }, { "epoch": 0.9907747946548976, "grad_norm": 1.8260676969163159, "learning_rate": 2.2316210633105406e-09, "loss": 0.4928, "step": 32327 }, { "epoch": 0.9908054431776389, "grad_norm": 1.9113340096538314, "learning_rate": 2.2168187416660158e-09, "loss": 0.6794, "step": 32328 }, { "epoch": 0.99083609170038, "grad_norm": 1.90818675465824, "learning_rate": 2.2020656644577976e-09, "loss": 0.6112, "step": 32329 }, { "epoch": 0.9908667402231213, "grad_norm": 1.8687595654921185, "learning_rate": 2.1873618318307698e-09, "loss": 0.4662, "step": 32330 }, { "epoch": 0.9908973887458624, "grad_norm": 2.033680253019711, "learning_rate": 2.172707243930927e-09, "loss": 0.5469, "step": 32331 }, { "epoch": 0.9909280372686037, "grad_norm": 1.811499829042566, "learning_rate": 2.1581019009020434e-09, "loss": 0.5924, "step": 32332 }, { "epoch": 0.9909586857913448, "grad_norm": 0.8128461397105116, "learning_rate": 2.143545802888447e-09, "loss": 0.3887, "step": 32333 }, { "epoch": 0.9909893343140861, "grad_norm": 1.7244266731144327, "learning_rate": 2.1290389500328023e-09, "loss": 0.5328, "step": 32334 }, { "epoch": 0.9910199828368272, "grad_norm": 1.913145213483092, "learning_rate": 2.114581342478883e-09, "loss": 0.5862, "step": 32335 }, { "epoch": 0.9910506313595685, "grad_norm": 1.9183772887577866, "learning_rate": 2.1001729803682424e-09, "loss": 0.5684, "step": 32336 }, { "epoch": 0.9910812798823097, "grad_norm": 2.0997620875951992, "learning_rate": 2.0858138638440995e-09, "loss": 0.5168, "step": 32337 }, { "epoch": 0.9911119284050509, "grad_norm": 1.6523076352979513, "learning_rate": 2.071503993046342e-09, "loss": 0.5491, "step": 32338 }, { "epoch": 0.9911425769277921, "grad_norm": 1.877765093625312, "learning_rate": 2.057243368117634e-09, "loss": 0.5445, "step": 32339 }, { "epoch": 0.9911732254505333, "grad_norm": 1.8201494293176619, "learning_rate": 2.043031989197308e-09, "loss": 0.5152, "step": 32340 }, { "epoch": 0.9912038739732745, "grad_norm": 1.862486968069916, "learning_rate": 2.028869856425808e-09, "loss": 0.5878, "step": 32341 }, { "epoch": 0.9912345224960157, "grad_norm": 2.0392747811564784, "learning_rate": 2.0147569699424664e-09, "loss": 0.668, "step": 32342 }, { "epoch": 0.9912651710187569, "grad_norm": 1.7778946458565557, "learning_rate": 2.000693329886616e-09, "loss": 0.5908, "step": 32343 }, { "epoch": 0.9912958195414981, "grad_norm": 1.8670087528483288, "learning_rate": 1.98667893639648e-09, "loss": 0.5299, "step": 32344 }, { "epoch": 0.9913264680642393, "grad_norm": 1.7950451741069617, "learning_rate": 1.972713789610836e-09, "loss": 0.5171, "step": 32345 }, { "epoch": 0.9913571165869806, "grad_norm": 2.055907930974508, "learning_rate": 1.958797889666797e-09, "loss": 0.5661, "step": 32346 }, { "epoch": 0.9913877651097217, "grad_norm": 1.938267099128094, "learning_rate": 1.944931236701475e-09, "loss": 0.6156, "step": 32347 }, { "epoch": 0.991418413632463, "grad_norm": 2.026606896224071, "learning_rate": 1.9311138308514276e-09, "loss": 0.5882, "step": 32348 }, { "epoch": 0.9914490621552041, "grad_norm": 0.8666064585734491, "learning_rate": 1.9173456722526574e-09, "loss": 0.401, "step": 32349 }, { "epoch": 0.9914797106779453, "grad_norm": 1.8627516768464893, "learning_rate": 1.9036267610417215e-09, "loss": 0.6323, "step": 32350 }, { "epoch": 0.9915103592006865, "grad_norm": 1.919088534080887, "learning_rate": 1.889957097352957e-09, "loss": 0.4786, "step": 32351 }, { "epoch": 0.9915410077234277, "grad_norm": 2.130395710596046, "learning_rate": 1.876336681321256e-09, "loss": 0.6126, "step": 32352 }, { "epoch": 0.9915716562461689, "grad_norm": 0.8023121195240088, "learning_rate": 1.8627655130804e-09, "loss": 0.3948, "step": 32353 }, { "epoch": 0.9916023047689101, "grad_norm": 1.6545009545868972, "learning_rate": 1.849243592765282e-09, "loss": 0.5146, "step": 32354 }, { "epoch": 0.9916329532916514, "grad_norm": 2.023190288680263, "learning_rate": 1.8357709205080177e-09, "loss": 0.58, "step": 32355 }, { "epoch": 0.9916636018143925, "grad_norm": 1.744054856676193, "learning_rate": 1.8223474964418343e-09, "loss": 0.4798, "step": 32356 }, { "epoch": 0.9916942503371338, "grad_norm": 1.9828304142184896, "learning_rate": 1.808973320698293e-09, "loss": 0.5821, "step": 32357 }, { "epoch": 0.9917248988598749, "grad_norm": 1.7316210617088583, "learning_rate": 1.7956483934106205e-09, "loss": 0.5913, "step": 32358 }, { "epoch": 0.9917555473826162, "grad_norm": 2.1680754216286946, "learning_rate": 1.7823727147087132e-09, "loss": 0.5198, "step": 32359 }, { "epoch": 0.9917861959053573, "grad_norm": 1.907460114892345, "learning_rate": 1.7691462847241325e-09, "loss": 0.5602, "step": 32360 }, { "epoch": 0.9918168444280986, "grad_norm": 1.8599520084832315, "learning_rate": 1.7559691035873295e-09, "loss": 0.5241, "step": 32361 }, { "epoch": 0.9918474929508397, "grad_norm": 2.3679243428744914, "learning_rate": 1.742841171427645e-09, "loss": 0.6275, "step": 32362 }, { "epoch": 0.991878141473581, "grad_norm": 2.036859270697984, "learning_rate": 1.7297624883744203e-09, "loss": 0.591, "step": 32363 }, { "epoch": 0.9919087899963221, "grad_norm": 0.7664919000358935, "learning_rate": 1.716733054556441e-09, "loss": 0.4006, "step": 32364 }, { "epoch": 0.9919394385190634, "grad_norm": 1.6236200433945298, "learning_rate": 1.703752870103048e-09, "loss": 0.5065, "step": 32365 }, { "epoch": 0.9919700870418046, "grad_norm": 0.7888277478791679, "learning_rate": 1.6908219351408072e-09, "loss": 0.3795, "step": 32366 }, { "epoch": 0.9920007355645458, "grad_norm": 2.0248111419979327, "learning_rate": 1.6779402497979491e-09, "loss": 0.5415, "step": 32367 }, { "epoch": 0.992031384087287, "grad_norm": 1.8815750259545174, "learning_rate": 1.6651078142015942e-09, "loss": 0.5651, "step": 32368 }, { "epoch": 0.9920620326100282, "grad_norm": 2.116542277151326, "learning_rate": 1.6523246284777528e-09, "loss": 0.516, "step": 32369 }, { "epoch": 0.9920926811327694, "grad_norm": 1.9905515251945736, "learning_rate": 1.63959069275188e-09, "loss": 0.5725, "step": 32370 }, { "epoch": 0.9921233296555106, "grad_norm": 1.899027653971234, "learning_rate": 1.6269060071505416e-09, "loss": 0.5493, "step": 32371 }, { "epoch": 0.9921539781782518, "grad_norm": 0.7917135873031516, "learning_rate": 1.6142705717980823e-09, "loss": 0.3962, "step": 32372 }, { "epoch": 0.992184626700993, "grad_norm": 2.072392712619704, "learning_rate": 1.601684386818847e-09, "loss": 0.618, "step": 32373 }, { "epoch": 0.9922152752237342, "grad_norm": 1.9154467161097268, "learning_rate": 1.5891474523371809e-09, "loss": 0.4986, "step": 32374 }, { "epoch": 0.9922459237464755, "grad_norm": 1.6864190860787498, "learning_rate": 1.5766597684768737e-09, "loss": 0.547, "step": 32375 }, { "epoch": 0.9922765722692166, "grad_norm": 2.0402753577535857, "learning_rate": 1.564221335360605e-09, "loss": 0.6099, "step": 32376 }, { "epoch": 0.9923072207919579, "grad_norm": 0.8103302728392725, "learning_rate": 1.5518321531104996e-09, "loss": 0.4048, "step": 32377 }, { "epoch": 0.992337869314699, "grad_norm": 1.7062447093467132, "learning_rate": 1.539492221849237e-09, "loss": 0.5603, "step": 32378 }, { "epoch": 0.9923685178374403, "grad_norm": 2.098390138173315, "learning_rate": 1.5272015416983866e-09, "loss": 0.5323, "step": 32379 }, { "epoch": 0.9923991663601814, "grad_norm": 1.9090116481382617, "learning_rate": 1.5149601127789627e-09, "loss": 0.5261, "step": 32380 }, { "epoch": 0.9924298148829226, "grad_norm": 2.385791742427983, "learning_rate": 1.5027679352119795e-09, "loss": 0.6526, "step": 32381 }, { "epoch": 0.9924604634056639, "grad_norm": 1.8117169198658563, "learning_rate": 1.490625009116231e-09, "loss": 0.5456, "step": 32382 }, { "epoch": 0.992491111928405, "grad_norm": 1.9412008348660312, "learning_rate": 1.4785313346132868e-09, "loss": 0.58, "step": 32383 }, { "epoch": 0.9925217604511463, "grad_norm": 1.888139657799969, "learning_rate": 1.466486911820275e-09, "loss": 0.5917, "step": 32384 }, { "epoch": 0.9925524089738874, "grad_norm": 2.012723001531079, "learning_rate": 1.4544917408576553e-09, "loss": 0.4963, "step": 32385 }, { "epoch": 0.9925830574966287, "grad_norm": 2.024330568442075, "learning_rate": 1.442545821842556e-09, "loss": 0.6014, "step": 32386 }, { "epoch": 0.9926137060193698, "grad_norm": 0.806843011558152, "learning_rate": 1.4306491548932156e-09, "loss": 0.4004, "step": 32387 }, { "epoch": 0.9926443545421111, "grad_norm": 1.8630564129436604, "learning_rate": 1.4188017401262077e-09, "loss": 0.5249, "step": 32388 }, { "epoch": 0.9926750030648522, "grad_norm": 1.9565992092254143, "learning_rate": 1.4070035776592162e-09, "loss": 0.6726, "step": 32389 }, { "epoch": 0.9927056515875935, "grad_norm": 1.8553358552473593, "learning_rate": 1.395254667607704e-09, "loss": 0.5638, "step": 32390 }, { "epoch": 0.9927363001103346, "grad_norm": 1.7618600725059559, "learning_rate": 1.3835550100876892e-09, "loss": 0.5474, "step": 32391 }, { "epoch": 0.9927669486330759, "grad_norm": 1.920962930205549, "learning_rate": 1.3719046052140805e-09, "loss": 0.5224, "step": 32392 }, { "epoch": 0.9927975971558171, "grad_norm": 0.8348806576198882, "learning_rate": 1.3603034531023407e-09, "loss": 0.3902, "step": 32393 }, { "epoch": 0.9928282456785583, "grad_norm": 2.033635904748177, "learning_rate": 1.3487515538668229e-09, "loss": 0.6075, "step": 32394 }, { "epoch": 0.9928588942012995, "grad_norm": 1.9444144166023891, "learning_rate": 1.3372489076207695e-09, "loss": 0.6033, "step": 32395 }, { "epoch": 0.9928895427240407, "grad_norm": 1.9199559680265048, "learning_rate": 1.3257955144774238e-09, "loss": 0.5451, "step": 32396 }, { "epoch": 0.9929201912467819, "grad_norm": 1.8028044986563536, "learning_rate": 1.3143913745505831e-09, "loss": 0.5516, "step": 32397 }, { "epoch": 0.9929508397695231, "grad_norm": 0.8373252779523769, "learning_rate": 1.3030364879518253e-09, "loss": 0.408, "step": 32398 }, { "epoch": 0.9929814882922643, "grad_norm": 1.9119705845750166, "learning_rate": 1.2917308547932828e-09, "loss": 0.6282, "step": 32399 }, { "epoch": 0.9930121368150056, "grad_norm": 2.258582144786885, "learning_rate": 1.2804744751859777e-09, "loss": 0.7292, "step": 32400 }, { "epoch": 0.9930427853377467, "grad_norm": 1.772617230266316, "learning_rate": 1.2692673492414875e-09, "loss": 0.5165, "step": 32401 }, { "epoch": 0.993073433860488, "grad_norm": 1.915660482305068, "learning_rate": 1.2581094770697243e-09, "loss": 0.6035, "step": 32402 }, { "epoch": 0.9931040823832291, "grad_norm": 1.8270633995876655, "learning_rate": 1.2470008587806004e-09, "loss": 0.5394, "step": 32403 }, { "epoch": 0.9931347309059704, "grad_norm": 2.0288162021626173, "learning_rate": 1.2359414944840276e-09, "loss": 0.5365, "step": 32404 }, { "epoch": 0.9931653794287115, "grad_norm": 1.7090416818381298, "learning_rate": 1.2249313842882527e-09, "loss": 0.5815, "step": 32405 }, { "epoch": 0.9931960279514528, "grad_norm": 2.1115839431066425, "learning_rate": 1.2139705283026326e-09, "loss": 0.6343, "step": 32406 }, { "epoch": 0.9932266764741939, "grad_norm": 0.8098782063634047, "learning_rate": 1.203058926634859e-09, "loss": 0.3847, "step": 32407 }, { "epoch": 0.9932573249969352, "grad_norm": 1.6489259175296398, "learning_rate": 1.1921965793920687e-09, "loss": 0.5351, "step": 32408 }, { "epoch": 0.9932879735196763, "grad_norm": 2.2652814825909138, "learning_rate": 1.1813834866819529e-09, "loss": 0.5618, "step": 32409 }, { "epoch": 0.9933186220424176, "grad_norm": 3.6247582502436786, "learning_rate": 1.170619648609983e-09, "loss": 0.6888, "step": 32410 }, { "epoch": 0.9933492705651588, "grad_norm": 0.7868903995385159, "learning_rate": 1.1599050652832955e-09, "loss": 0.4, "step": 32411 }, { "epoch": 0.9933799190878999, "grad_norm": 1.975984324667343, "learning_rate": 1.1492397368073615e-09, "loss": 0.5234, "step": 32412 }, { "epoch": 0.9934105676106412, "grad_norm": 1.818983758917286, "learning_rate": 1.1386236632865421e-09, "loss": 0.5599, "step": 32413 }, { "epoch": 0.9934412161333823, "grad_norm": 1.6643780720420174, "learning_rate": 1.1280568448263084e-09, "loss": 0.5291, "step": 32414 }, { "epoch": 0.9934718646561236, "grad_norm": 1.8592699114268707, "learning_rate": 1.1175392815299112e-09, "loss": 0.5726, "step": 32415 }, { "epoch": 0.9935025131788647, "grad_norm": 1.7114869031922368, "learning_rate": 1.1070709735017115e-09, "loss": 0.5693, "step": 32416 }, { "epoch": 0.993533161701606, "grad_norm": 1.7654414224557602, "learning_rate": 1.0966519208444048e-09, "loss": 0.6574, "step": 32417 }, { "epoch": 0.9935638102243471, "grad_norm": 2.3153774050921716, "learning_rate": 1.0862821236606868e-09, "loss": 0.4556, "step": 32418 }, { "epoch": 0.9935944587470884, "grad_norm": 1.8404114285179909, "learning_rate": 1.0759615820532532e-09, "loss": 0.5697, "step": 32419 }, { "epoch": 0.9936251072698296, "grad_norm": 0.8726328553403891, "learning_rate": 1.065690296123134e-09, "loss": 0.3884, "step": 32420 }, { "epoch": 0.9936557557925708, "grad_norm": 0.7689584619449756, "learning_rate": 1.0554682659719152e-09, "loss": 0.3761, "step": 32421 }, { "epoch": 0.993686404315312, "grad_norm": 2.2655048705340657, "learning_rate": 1.0452954917000713e-09, "loss": 0.5262, "step": 32422 }, { "epoch": 0.9937170528380532, "grad_norm": 2.3430263558618045, "learning_rate": 1.035171973408078e-09, "loss": 0.5871, "step": 32423 }, { "epoch": 0.9937477013607944, "grad_norm": 1.9650198482891699, "learning_rate": 1.0250977111952998e-09, "loss": 0.5563, "step": 32424 }, { "epoch": 0.9937783498835356, "grad_norm": 1.893484003502866, "learning_rate": 1.0150727051616572e-09, "loss": 0.6624, "step": 32425 }, { "epoch": 0.9938089984062768, "grad_norm": 1.9188913495664206, "learning_rate": 1.0050969554054047e-09, "loss": 0.4929, "step": 32426 }, { "epoch": 0.993839646929018, "grad_norm": 2.066759382559697, "learning_rate": 9.95170462024797e-10, "loss": 0.5971, "step": 32427 }, { "epoch": 0.9938702954517592, "grad_norm": 1.9117894211724775, "learning_rate": 9.852932251180891e-10, "loss": 0.5493, "step": 32428 }, { "epoch": 0.9939009439745005, "grad_norm": 0.7416758395124246, "learning_rate": 9.754652447818702e-10, "loss": 0.377, "step": 32429 }, { "epoch": 0.9939315924972416, "grad_norm": 1.7839703024412514, "learning_rate": 9.6568652111384e-10, "loss": 0.4911, "step": 32430 }, { "epoch": 0.9939622410199829, "grad_norm": 2.0092306375442823, "learning_rate": 9.559570542100327e-10, "loss": 0.5881, "step": 32431 }, { "epoch": 0.993992889542724, "grad_norm": 2.167592057448729, "learning_rate": 9.462768441659276e-10, "loss": 0.5464, "step": 32432 }, { "epoch": 0.9940235380654653, "grad_norm": 1.9207917836334187, "learning_rate": 9.366458910775588e-10, "loss": 0.6098, "step": 32433 }, { "epoch": 0.9940541865882064, "grad_norm": 1.9848592380857635, "learning_rate": 9.270641950392956e-10, "loss": 0.6307, "step": 32434 }, { "epoch": 0.9940848351109477, "grad_norm": 2.3392064424511236, "learning_rate": 9.175317561460617e-10, "loss": 0.5478, "step": 32435 }, { "epoch": 0.9941154836336888, "grad_norm": 1.790689152362571, "learning_rate": 9.08048574491116e-10, "loss": 0.4661, "step": 32436 }, { "epoch": 0.9941461321564301, "grad_norm": 0.7771549005719156, "learning_rate": 8.986146501682724e-10, "loss": 0.3969, "step": 32437 }, { "epoch": 0.9941767806791713, "grad_norm": 1.610307290942385, "learning_rate": 8.892299832707896e-10, "loss": 0.4597, "step": 32438 }, { "epoch": 0.9942074292019125, "grad_norm": 2.0102363195185404, "learning_rate": 8.798945738902609e-10, "loss": 0.586, "step": 32439 }, { "epoch": 0.9942380777246537, "grad_norm": 0.822760531753198, "learning_rate": 8.7060842211939e-10, "loss": 0.3952, "step": 32440 }, { "epoch": 0.9942687262473949, "grad_norm": 2.098571436738282, "learning_rate": 8.613715280497703e-10, "loss": 0.5849, "step": 32441 }, { "epoch": 0.9942993747701361, "grad_norm": 1.7748485844351118, "learning_rate": 8.52183891771885e-10, "loss": 0.5338, "step": 32442 }, { "epoch": 0.9943300232928772, "grad_norm": 1.7140724350549013, "learning_rate": 8.430455133767723e-10, "loss": 0.4983, "step": 32443 }, { "epoch": 0.9943606718156185, "grad_norm": 2.2050476489290105, "learning_rate": 8.339563929538052e-10, "loss": 0.5762, "step": 32444 }, { "epoch": 0.9943913203383596, "grad_norm": 1.8782828791279074, "learning_rate": 8.249165305929119e-10, "loss": 0.5343, "step": 32445 }, { "epoch": 0.9944219688611009, "grad_norm": 1.9092194782102274, "learning_rate": 8.159259263834651e-10, "loss": 0.6454, "step": 32446 }, { "epoch": 0.994452617383842, "grad_norm": 1.7646827164452838, "learning_rate": 8.069845804142828e-10, "loss": 0.5803, "step": 32447 }, { "epoch": 0.9944832659065833, "grad_norm": 1.7564931305162836, "learning_rate": 7.980924927725175e-10, "loss": 0.5815, "step": 32448 }, { "epoch": 0.9945139144293245, "grad_norm": 2.014022441834454, "learning_rate": 7.892496635458769e-10, "loss": 0.556, "step": 32449 }, { "epoch": 0.9945445629520657, "grad_norm": 2.038988728431421, "learning_rate": 7.804560928226234e-10, "loss": 0.4929, "step": 32450 }, { "epoch": 0.9945752114748069, "grad_norm": 1.7898077919081776, "learning_rate": 7.717117806876895e-10, "loss": 0.5566, "step": 32451 }, { "epoch": 0.9946058599975481, "grad_norm": 2.1786971999917415, "learning_rate": 7.630167272287825e-10, "loss": 0.5917, "step": 32452 }, { "epoch": 0.9946365085202893, "grad_norm": 1.8240011931062887, "learning_rate": 7.543709325313897e-10, "loss": 0.4684, "step": 32453 }, { "epoch": 0.9946671570430305, "grad_norm": 1.9719427424998235, "learning_rate": 7.457743966793329e-10, "loss": 0.594, "step": 32454 }, { "epoch": 0.9946978055657717, "grad_norm": 1.8186113195140907, "learning_rate": 7.372271197592096e-10, "loss": 0.5353, "step": 32455 }, { "epoch": 0.994728454088513, "grad_norm": 1.814493557143603, "learning_rate": 7.287291018537312e-10, "loss": 0.5609, "step": 32456 }, { "epoch": 0.9947591026112541, "grad_norm": 1.9313140932140858, "learning_rate": 7.202803430472749e-10, "loss": 0.5796, "step": 32457 }, { "epoch": 0.9947897511339954, "grad_norm": 1.8598414563501968, "learning_rate": 7.118808434231073e-10, "loss": 0.5976, "step": 32458 }, { "epoch": 0.9948203996567365, "grad_norm": 2.220743846919311, "learning_rate": 7.0353060306394e-10, "loss": 0.5674, "step": 32459 }, { "epoch": 0.9948510481794778, "grad_norm": 1.7039612364803436, "learning_rate": 6.952296220519294e-10, "loss": 0.5375, "step": 32460 }, { "epoch": 0.9948816967022189, "grad_norm": 0.7760243408079823, "learning_rate": 6.869779004692323e-10, "loss": 0.3869, "step": 32461 }, { "epoch": 0.9949123452249602, "grad_norm": 1.9704454207655802, "learning_rate": 6.787754383963396e-10, "loss": 0.5739, "step": 32462 }, { "epoch": 0.9949429937477013, "grad_norm": 1.9306476801308432, "learning_rate": 6.706222359148529e-10, "loss": 0.5325, "step": 32463 }, { "epoch": 0.9949736422704426, "grad_norm": 2.122972407202374, "learning_rate": 6.62518293104708e-10, "loss": 0.6418, "step": 32464 }, { "epoch": 0.9950042907931838, "grad_norm": 1.7792109215416412, "learning_rate": 6.544636100463963e-10, "loss": 0.6902, "step": 32465 }, { "epoch": 0.995034939315925, "grad_norm": 1.8453834391839739, "learning_rate": 6.464581868181885e-10, "loss": 0.4497, "step": 32466 }, { "epoch": 0.9950655878386662, "grad_norm": 1.6808676391088242, "learning_rate": 6.385020235000206e-10, "loss": 0.4829, "step": 32467 }, { "epoch": 0.9950962363614074, "grad_norm": 0.7593812641611267, "learning_rate": 6.305951201696081e-10, "loss": 0.3934, "step": 32468 }, { "epoch": 0.9951268848841486, "grad_norm": 2.0131238438843266, "learning_rate": 6.227374769052219e-10, "loss": 0.5211, "step": 32469 }, { "epoch": 0.9951575334068898, "grad_norm": 1.7145272096717052, "learning_rate": 6.149290937840224e-10, "loss": 0.5491, "step": 32470 }, { "epoch": 0.995188181929631, "grad_norm": 1.9189581445620798, "learning_rate": 6.071699708831702e-10, "loss": 0.5377, "step": 32471 }, { "epoch": 0.9952188304523722, "grad_norm": 1.8108460090133052, "learning_rate": 5.994601082787155e-10, "loss": 0.4805, "step": 32472 }, { "epoch": 0.9952494789751134, "grad_norm": 1.8326639905729902, "learning_rate": 5.917995060472636e-10, "loss": 0.5401, "step": 32473 }, { "epoch": 0.9952801274978545, "grad_norm": 1.9592937449826902, "learning_rate": 5.841881642637548e-10, "loss": 0.5887, "step": 32474 }, { "epoch": 0.9953107760205958, "grad_norm": 1.978130212320474, "learning_rate": 5.766260830036841e-10, "loss": 0.5688, "step": 32475 }, { "epoch": 0.995341424543337, "grad_norm": 1.7957016773589156, "learning_rate": 5.691132623414364e-10, "loss": 0.5762, "step": 32476 }, { "epoch": 0.9953720730660782, "grad_norm": 2.1085157316239322, "learning_rate": 5.616497023502865e-10, "loss": 0.6096, "step": 32477 }, { "epoch": 0.9954027215888194, "grad_norm": 2.1592769750084657, "learning_rate": 5.542354031046193e-10, "loss": 0.6149, "step": 32478 }, { "epoch": 0.9954333701115606, "grad_norm": 1.9735830595489254, "learning_rate": 5.468703646771545e-10, "loss": 0.5348, "step": 32479 }, { "epoch": 0.9954640186343018, "grad_norm": 1.9278102871205498, "learning_rate": 5.395545871406116e-10, "loss": 0.5877, "step": 32480 }, { "epoch": 0.995494667157043, "grad_norm": 1.9919007660814803, "learning_rate": 5.322880705671552e-10, "loss": 0.4802, "step": 32481 }, { "epoch": 0.9955253156797842, "grad_norm": 1.8385078234402312, "learning_rate": 5.250708150283946e-10, "loss": 0.5497, "step": 32482 }, { "epoch": 0.9955559642025255, "grad_norm": 1.7691639924249711, "learning_rate": 5.179028205948289e-10, "loss": 0.5863, "step": 32483 }, { "epoch": 0.9955866127252666, "grad_norm": 2.058815353807029, "learning_rate": 5.107840873375125e-10, "loss": 0.5985, "step": 32484 }, { "epoch": 0.9956172612480079, "grad_norm": 1.8605298832595476, "learning_rate": 5.037146153269446e-10, "loss": 0.5437, "step": 32485 }, { "epoch": 0.995647909770749, "grad_norm": 1.5653971700185851, "learning_rate": 4.96694404632514e-10, "loss": 0.6142, "step": 32486 }, { "epoch": 0.9956785582934903, "grad_norm": 1.769936605877186, "learning_rate": 4.897234553230546e-10, "loss": 0.5638, "step": 32487 }, { "epoch": 0.9957092068162314, "grad_norm": 1.9098415971114955, "learning_rate": 4.828017674674002e-10, "loss": 0.5844, "step": 32488 }, { "epoch": 0.9957398553389727, "grad_norm": 1.781580527272616, "learning_rate": 4.759293411343846e-10, "loss": 0.6099, "step": 32489 }, { "epoch": 0.9957705038617138, "grad_norm": 1.5311510508435056, "learning_rate": 4.691061763906213e-10, "loss": 0.4768, "step": 32490 }, { "epoch": 0.9958011523844551, "grad_norm": 1.8905640722246262, "learning_rate": 4.623322733043889e-10, "loss": 0.6182, "step": 32491 }, { "epoch": 0.9958318009071963, "grad_norm": 0.7750294866979908, "learning_rate": 4.556076319417457e-10, "loss": 0.3829, "step": 32492 }, { "epoch": 0.9958624494299375, "grad_norm": 1.61438288267145, "learning_rate": 4.489322523693052e-10, "loss": 0.4033, "step": 32493 }, { "epoch": 0.9958930979526787, "grad_norm": 1.9568754858337418, "learning_rate": 4.4230613465257033e-10, "loss": 0.5986, "step": 32494 }, { "epoch": 0.9959237464754199, "grad_norm": 1.827548871274276, "learning_rate": 4.3572927885704443e-10, "loss": 0.4755, "step": 32495 }, { "epoch": 0.9959543949981611, "grad_norm": 1.8344200423820187, "learning_rate": 4.2920168504767547e-10, "loss": 0.5403, "step": 32496 }, { "epoch": 0.9959850435209023, "grad_norm": 0.7916115336737735, "learning_rate": 4.2272335328830127e-10, "loss": 0.3929, "step": 32497 }, { "epoch": 0.9960156920436435, "grad_norm": 1.877916550661781, "learning_rate": 4.162942836433148e-10, "loss": 0.618, "step": 32498 }, { "epoch": 0.9960463405663847, "grad_norm": 1.761994399882847, "learning_rate": 4.0991447617599876e-10, "loss": 0.6368, "step": 32499 }, { "epoch": 0.9960769890891259, "grad_norm": 2.1864805726522922, "learning_rate": 4.035839309485257e-10, "loss": 0.6271, "step": 32500 }, { "epoch": 0.9961076376118672, "grad_norm": 1.9242109304767032, "learning_rate": 3.973026480236231e-10, "loss": 0.5647, "step": 32501 }, { "epoch": 0.9961382861346083, "grad_norm": 1.7474248594530843, "learning_rate": 3.9107062746346346e-10, "loss": 0.5437, "step": 32502 }, { "epoch": 0.9961689346573496, "grad_norm": 1.871250686035161, "learning_rate": 3.848878693296643e-10, "loss": 0.5241, "step": 32503 }, { "epoch": 0.9961995831800907, "grad_norm": 2.169042261404359, "learning_rate": 3.787543736821775e-10, "loss": 0.5817, "step": 32504 }, { "epoch": 0.9962302317028319, "grad_norm": 1.9941710181970609, "learning_rate": 3.726701405826205e-10, "loss": 0.6221, "step": 32505 }, { "epoch": 0.9962608802255731, "grad_norm": 1.811009300052714, "learning_rate": 3.666351700898352e-10, "loss": 0.5443, "step": 32506 }, { "epoch": 0.9962915287483143, "grad_norm": 2.119216235217823, "learning_rate": 3.606494622643286e-10, "loss": 0.5768, "step": 32507 }, { "epoch": 0.9963221772710555, "grad_norm": 1.7283965203299783, "learning_rate": 3.5471301716383246e-10, "loss": 0.6466, "step": 32508 }, { "epoch": 0.9963528257937967, "grad_norm": 1.7611573809048569, "learning_rate": 3.4882583484829867e-10, "loss": 0.5724, "step": 32509 }, { "epoch": 0.996383474316538, "grad_norm": 1.9555929088864943, "learning_rate": 3.4298791537434874e-10, "loss": 0.5888, "step": 32510 }, { "epoch": 0.9964141228392791, "grad_norm": 1.8487075093390417, "learning_rate": 3.3719925880082437e-10, "loss": 0.5845, "step": 32511 }, { "epoch": 0.9964447713620204, "grad_norm": 0.7935717631851266, "learning_rate": 3.314598651837919e-10, "loss": 0.3859, "step": 32512 }, { "epoch": 0.9964754198847615, "grad_norm": 1.6845447985862105, "learning_rate": 3.257697345798727e-10, "loss": 0.5527, "step": 32513 }, { "epoch": 0.9965060684075028, "grad_norm": 1.8503024190808444, "learning_rate": 3.2012886704568814e-10, "loss": 0.5589, "step": 32514 }, { "epoch": 0.9965367169302439, "grad_norm": 1.7298240914289544, "learning_rate": 3.1453726263619424e-10, "loss": 0.5296, "step": 32515 }, { "epoch": 0.9965673654529852, "grad_norm": 1.8607150950820026, "learning_rate": 3.089949214069021e-10, "loss": 0.4958, "step": 32516 }, { "epoch": 0.9965980139757263, "grad_norm": 1.830685069489406, "learning_rate": 3.035018434127679e-10, "loss": 0.5308, "step": 32517 }, { "epoch": 0.9966286624984676, "grad_norm": 2.0377088239609846, "learning_rate": 2.9805802870708224e-10, "loss": 0.5488, "step": 32518 }, { "epoch": 0.9966593110212087, "grad_norm": 1.835817432568892, "learning_rate": 2.926634773436909e-10, "loss": 0.499, "step": 32519 }, { "epoch": 0.99668995954395, "grad_norm": 0.7847537024371078, "learning_rate": 2.8731818937588473e-10, "loss": 0.404, "step": 32520 }, { "epoch": 0.9967206080666912, "grad_norm": 2.05898278423979, "learning_rate": 2.820221648569543e-10, "loss": 0.5744, "step": 32521 }, { "epoch": 0.9967512565894324, "grad_norm": 1.7522798599677787, "learning_rate": 2.7677540383796996e-10, "loss": 0.5533, "step": 32522 }, { "epoch": 0.9967819051121736, "grad_norm": 1.7881632125964166, "learning_rate": 2.7157790637111213e-10, "loss": 0.5987, "step": 32523 }, { "epoch": 0.9968125536349148, "grad_norm": 1.9771471248759125, "learning_rate": 2.664296725080062e-10, "loss": 0.4941, "step": 32524 }, { "epoch": 0.996843202157656, "grad_norm": 0.7931894726426594, "learning_rate": 2.613307022986122e-10, "loss": 0.3998, "step": 32525 }, { "epoch": 0.9968738506803972, "grad_norm": 2.0505974150982156, "learning_rate": 2.5628099579344535e-10, "loss": 0.6116, "step": 32526 }, { "epoch": 0.9969044992031384, "grad_norm": 1.973881046920517, "learning_rate": 2.5128055304302067e-10, "loss": 0.5981, "step": 32527 }, { "epoch": 0.9969351477258797, "grad_norm": 2.0094748913123035, "learning_rate": 2.4632937409563297e-10, "loss": 0.5649, "step": 32528 }, { "epoch": 0.9969657962486208, "grad_norm": 1.921706027655259, "learning_rate": 2.4142745900013196e-10, "loss": 0.6011, "step": 32529 }, { "epoch": 0.9969964447713621, "grad_norm": 1.9632121558040196, "learning_rate": 2.365748078053676e-10, "loss": 0.5639, "step": 32530 }, { "epoch": 0.9970270932941032, "grad_norm": 2.0045312991275113, "learning_rate": 2.3177142055907931e-10, "loss": 0.5895, "step": 32531 }, { "epoch": 0.9970577418168445, "grad_norm": 1.791420884671684, "learning_rate": 2.2701729730789657e-10, "loss": 0.5343, "step": 32532 }, { "epoch": 0.9970883903395856, "grad_norm": 1.944889675372813, "learning_rate": 2.2231243809955895e-10, "loss": 0.5422, "step": 32533 }, { "epoch": 0.9971190388623269, "grad_norm": 1.9385606876561798, "learning_rate": 2.1765684298014068e-10, "loss": 0.6482, "step": 32534 }, { "epoch": 0.997149687385068, "grad_norm": 1.894128516592279, "learning_rate": 2.130505119951609e-10, "loss": 0.555, "step": 32535 }, { "epoch": 0.9971803359078092, "grad_norm": 0.8130320957908672, "learning_rate": 2.084934451901388e-10, "loss": 0.3847, "step": 32536 }, { "epoch": 0.9972109844305505, "grad_norm": 1.8026502288715838, "learning_rate": 2.039856426100384e-10, "loss": 0.627, "step": 32537 }, { "epoch": 0.9972416329532916, "grad_norm": 1.7484254267240986, "learning_rate": 1.995271042998237e-10, "loss": 0.549, "step": 32538 }, { "epoch": 0.9972722814760329, "grad_norm": 1.656117963607354, "learning_rate": 1.951178303022383e-10, "loss": 0.5425, "step": 32539 }, { "epoch": 0.997302929998774, "grad_norm": 1.8658317105577433, "learning_rate": 1.9075782066169114e-10, "loss": 0.6168, "step": 32540 }, { "epoch": 0.9973335785215153, "grad_norm": 0.7879327651598306, "learning_rate": 1.8644707542092578e-10, "loss": 0.3874, "step": 32541 }, { "epoch": 0.9973642270442564, "grad_norm": 0.8048062834726323, "learning_rate": 1.8218559462268582e-10, "loss": 0.366, "step": 32542 }, { "epoch": 0.9973948755669977, "grad_norm": 1.8921423909914452, "learning_rate": 1.779733783080495e-10, "loss": 0.6573, "step": 32543 }, { "epoch": 0.9974255240897388, "grad_norm": 1.838703786011133, "learning_rate": 1.7381042651920531e-10, "loss": 0.504, "step": 32544 }, { "epoch": 0.9974561726124801, "grad_norm": 1.9592946219895004, "learning_rate": 1.696967392972315e-10, "loss": 0.4459, "step": 32545 }, { "epoch": 0.9974868211352212, "grad_norm": 1.8376329240944225, "learning_rate": 1.6563231668265124e-10, "loss": 0.5002, "step": 32546 }, { "epoch": 0.9975174696579625, "grad_norm": 2.1468842526543255, "learning_rate": 1.616171587154325e-10, "loss": 0.5217, "step": 32547 }, { "epoch": 0.9975481181807037, "grad_norm": 2.2149445582291745, "learning_rate": 1.576512654344331e-10, "loss": 0.5119, "step": 32548 }, { "epoch": 0.9975787667034449, "grad_norm": 1.9000848484734936, "learning_rate": 1.5373463687962108e-10, "loss": 0.55, "step": 32549 }, { "epoch": 0.9976094152261861, "grad_norm": 0.7824076341561896, "learning_rate": 1.4986727308985427e-10, "loss": 0.3898, "step": 32550 }, { "epoch": 0.9976400637489273, "grad_norm": 1.7463400386697834, "learning_rate": 1.4604917410232511e-10, "loss": 0.4644, "step": 32551 }, { "epoch": 0.9976707122716685, "grad_norm": 1.768453109424755, "learning_rate": 1.4228033995478118e-10, "loss": 0.5186, "step": 32552 }, { "epoch": 0.9977013607944097, "grad_norm": 1.6402206515594036, "learning_rate": 1.385607706849701e-10, "loss": 0.4516, "step": 32553 }, { "epoch": 0.9977320093171509, "grad_norm": 1.982591144042332, "learning_rate": 1.3489046632897406e-10, "loss": 0.6246, "step": 32554 }, { "epoch": 0.9977626578398922, "grad_norm": 1.7623203286076565, "learning_rate": 1.3126942692343046e-10, "loss": 0.5801, "step": 32555 }, { "epoch": 0.9977933063626333, "grad_norm": 1.8865741218455918, "learning_rate": 1.2769765250331135e-10, "loss": 0.6302, "step": 32556 }, { "epoch": 0.9978239548853746, "grad_norm": 1.906566246983705, "learning_rate": 1.241751431046989e-10, "loss": 0.6325, "step": 32557 }, { "epoch": 0.9978546034081157, "grad_norm": 2.1113498782520974, "learning_rate": 1.20701898761455e-10, "loss": 0.6021, "step": 32558 }, { "epoch": 0.997885251930857, "grad_norm": 1.7638103477918858, "learning_rate": 1.172779195085516e-10, "loss": 0.51, "step": 32559 }, { "epoch": 0.9979159004535981, "grad_norm": 1.6672211490610211, "learning_rate": 1.1390320537929545e-10, "loss": 0.5472, "step": 32560 }, { "epoch": 0.9979465489763394, "grad_norm": 0.8144430283336112, "learning_rate": 1.105777564069932e-10, "loss": 0.3924, "step": 32561 }, { "epoch": 0.9979771974990805, "grad_norm": 1.7973461360595844, "learning_rate": 1.0730157262495156e-10, "loss": 0.53, "step": 32562 }, { "epoch": 0.9980078460218218, "grad_norm": 1.835725515312279, "learning_rate": 1.0407465406425677e-10, "loss": 0.5777, "step": 32563 }, { "epoch": 0.998038494544563, "grad_norm": 1.9797432259638723, "learning_rate": 1.0089700075766041e-10, "loss": 0.6083, "step": 32564 }, { "epoch": 0.9980691430673042, "grad_norm": 0.8112860236056678, "learning_rate": 9.776861273624871e-11, "loss": 0.4111, "step": 32565 }, { "epoch": 0.9980997915900454, "grad_norm": 1.8006988770795123, "learning_rate": 9.468949003055283e-11, "loss": 0.5295, "step": 32566 }, { "epoch": 0.9981304401127865, "grad_norm": 1.8721891760530947, "learning_rate": 9.165963267110389e-11, "loss": 0.5331, "step": 32567 }, { "epoch": 0.9981610886355278, "grad_norm": 1.8160120478779402, "learning_rate": 8.867904068843303e-11, "loss": 0.5272, "step": 32568 }, { "epoch": 0.9981917371582689, "grad_norm": 2.0404915924022657, "learning_rate": 8.574771411085093e-11, "loss": 0.6189, "step": 32569 }, { "epoch": 0.9982223856810102, "grad_norm": 1.6987777733419924, "learning_rate": 8.286565296777848e-11, "loss": 0.5493, "step": 32570 }, { "epoch": 0.9982530342037513, "grad_norm": 1.6562554348964913, "learning_rate": 8.00328572869713e-11, "loss": 0.4337, "step": 32571 }, { "epoch": 0.9982836827264926, "grad_norm": 1.75103252181554, "learning_rate": 7.724932709785027e-11, "loss": 0.4954, "step": 32572 }, { "epoch": 0.9983143312492337, "grad_norm": 0.8274116469928839, "learning_rate": 7.451506242595053e-11, "loss": 0.4067, "step": 32573 }, { "epoch": 0.998344979771975, "grad_norm": 0.7773869842667587, "learning_rate": 7.183006329958276e-11, "loss": 0.3968, "step": 32574 }, { "epoch": 0.9983756282947162, "grad_norm": 2.1933417995604523, "learning_rate": 6.919432974483719e-11, "loss": 0.5705, "step": 32575 }, { "epoch": 0.9984062768174574, "grad_norm": 0.7630368916738295, "learning_rate": 6.660786178780409e-11, "loss": 0.3691, "step": 32576 }, { "epoch": 0.9984369253401986, "grad_norm": 0.8356315473139404, "learning_rate": 6.407065945346347e-11, "loss": 0.3842, "step": 32577 }, { "epoch": 0.9984675738629398, "grad_norm": 1.9957267900237006, "learning_rate": 6.158272276679533e-11, "loss": 0.54, "step": 32578 }, { "epoch": 0.998498222385681, "grad_norm": 1.791237934602006, "learning_rate": 5.914405175333482e-11, "loss": 0.5352, "step": 32579 }, { "epoch": 0.9985288709084222, "grad_norm": 0.7697810364065011, "learning_rate": 5.67546464358415e-11, "loss": 0.3979, "step": 32580 }, { "epoch": 0.9985595194311634, "grad_norm": 1.9601439638049711, "learning_rate": 5.441450683874028e-11, "loss": 0.5475, "step": 32581 }, { "epoch": 0.9985901679539047, "grad_norm": 1.8697973056479091, "learning_rate": 5.212363298479073e-11, "loss": 0.5868, "step": 32582 }, { "epoch": 0.9986208164766458, "grad_norm": 1.7554768179446358, "learning_rate": 4.9882024896752427e-11, "loss": 0.5988, "step": 32583 }, { "epoch": 0.9986514649993871, "grad_norm": 1.9256891700194616, "learning_rate": 4.768968259627471e-11, "loss": 0.4906, "step": 32584 }, { "epoch": 0.9986821135221282, "grad_norm": 0.8069172173034996, "learning_rate": 4.554660610500694e-11, "loss": 0.3872, "step": 32585 }, { "epoch": 0.9987127620448695, "grad_norm": 1.800279804183096, "learning_rate": 4.3452795444598464e-11, "loss": 0.5431, "step": 32586 }, { "epoch": 0.9987434105676106, "grad_norm": 0.7804391390519072, "learning_rate": 4.1408250635033284e-11, "loss": 0.3996, "step": 32587 }, { "epoch": 0.9987740590903519, "grad_norm": 0.7670135913754451, "learning_rate": 3.9412971696850545e-11, "loss": 0.3799, "step": 32588 }, { "epoch": 0.998804707613093, "grad_norm": 1.8596046987760353, "learning_rate": 3.7466958649479136e-11, "loss": 0.6164, "step": 32589 }, { "epoch": 0.9988353561358343, "grad_norm": 1.94191581735636, "learning_rate": 3.5570211512903076e-11, "loss": 0.6295, "step": 32590 }, { "epoch": 0.9988660046585754, "grad_norm": 0.8307885557989587, "learning_rate": 3.372273030433082e-11, "loss": 0.3964, "step": 32591 }, { "epoch": 0.9988966531813167, "grad_norm": 2.0351339089356077, "learning_rate": 3.1924515043191275e-11, "loss": 0.6365, "step": 32592 }, { "epoch": 0.9989273017040579, "grad_norm": 1.8829294156151837, "learning_rate": 3.0175565746692894e-11, "loss": 0.5292, "step": 32593 }, { "epoch": 0.9989579502267991, "grad_norm": 1.9659973085386557, "learning_rate": 2.8475882432599245e-11, "loss": 0.5277, "step": 32594 }, { "epoch": 0.9989885987495403, "grad_norm": 1.8319911538487135, "learning_rate": 2.6825465117008564e-11, "loss": 0.5268, "step": 32595 }, { "epoch": 0.9990192472722815, "grad_norm": 1.975662110981465, "learning_rate": 2.5224313816019086e-11, "loss": 0.5788, "step": 32596 }, { "epoch": 0.9990498957950227, "grad_norm": 1.8158912757278973, "learning_rate": 2.3672428546284155e-11, "loss": 0.5873, "step": 32597 }, { "epoch": 0.9990805443177638, "grad_norm": 2.051283735207216, "learning_rate": 2.216980932223667e-11, "loss": 0.5992, "step": 32598 }, { "epoch": 0.9991111928405051, "grad_norm": 2.0106777543267613, "learning_rate": 2.071645615886464e-11, "loss": 0.5308, "step": 32599 }, { "epoch": 0.9991418413632462, "grad_norm": 1.8694554943372983, "learning_rate": 1.931236907115608e-11, "loss": 0.5219, "step": 32600 }, { "epoch": 0.9991724898859875, "grad_norm": 1.725994360407251, "learning_rate": 1.7957548072433662e-11, "loss": 0.5258, "step": 32601 }, { "epoch": 0.9992031384087287, "grad_norm": 1.883498350566362, "learning_rate": 1.6651993176020065e-11, "loss": 0.5139, "step": 32602 }, { "epoch": 0.9992337869314699, "grad_norm": 1.7182459064135425, "learning_rate": 1.5395704394682852e-11, "loss": 0.5656, "step": 32603 }, { "epoch": 0.9992644354542111, "grad_norm": 2.2676420074470998, "learning_rate": 1.418868174063448e-11, "loss": 0.5744, "step": 32604 }, { "epoch": 0.9992950839769523, "grad_norm": 1.7812301206215584, "learning_rate": 1.303092522664251e-11, "loss": 0.4687, "step": 32605 }, { "epoch": 0.9993257324996935, "grad_norm": 2.03024946813698, "learning_rate": 1.1922434863254063e-11, "loss": 0.6514, "step": 32606 }, { "epoch": 0.9993563810224347, "grad_norm": 1.813081014164946, "learning_rate": 1.0863210662126478e-11, "loss": 0.5773, "step": 32607 }, { "epoch": 0.9993870295451759, "grad_norm": 1.840268499913335, "learning_rate": 9.853252632696652e-12, "loss": 0.5232, "step": 32608 }, { "epoch": 0.9994176780679171, "grad_norm": 0.8219903402021789, "learning_rate": 8.892560786066817e-12, "loss": 0.4078, "step": 32609 }, { "epoch": 0.9994483265906583, "grad_norm": 2.1520604953272238, "learning_rate": 7.981135130563644e-12, "loss": 0.6343, "step": 32610 }, { "epoch": 0.9994789751133996, "grad_norm": 1.9505836505811718, "learning_rate": 7.118975676179141e-12, "loss": 0.5376, "step": 32611 }, { "epoch": 0.9995096236361407, "grad_norm": 1.7622214207566387, "learning_rate": 6.306082430684868e-12, "loss": 0.6196, "step": 32612 }, { "epoch": 0.999540272158882, "grad_norm": 1.6711520449176345, "learning_rate": 5.542455402407499e-12, "loss": 0.5057, "step": 32613 }, { "epoch": 0.9995709206816231, "grad_norm": 0.7759173613169017, "learning_rate": 4.8280945991185935e-12, "loss": 0.3878, "step": 32614 }, { "epoch": 0.9996015692043644, "grad_norm": 0.8062338101491675, "learning_rate": 4.16300002692438e-12, "loss": 0.3921, "step": 32615 }, { "epoch": 0.9996322177271055, "grad_norm": 2.1926745469453572, "learning_rate": 3.5471716935964183e-12, "loss": 0.59, "step": 32616 }, { "epoch": 0.9996628662498468, "grad_norm": 1.7537341652114582, "learning_rate": 2.980609604130713e-12, "loss": 0.5844, "step": 32617 }, { "epoch": 0.9996935147725879, "grad_norm": 1.8750733788346663, "learning_rate": 2.463313765188602e-12, "loss": 0.4933, "step": 32618 }, { "epoch": 0.9997241632953292, "grad_norm": 1.8970030599458279, "learning_rate": 1.9952841806558656e-12, "loss": 0.6204, "step": 32619 }, { "epoch": 0.9997548118180704, "grad_norm": 1.7975500782236078, "learning_rate": 1.5765208560836187e-12, "loss": 0.5605, "step": 32620 }, { "epoch": 0.9997854603408116, "grad_norm": 1.883584583281274, "learning_rate": 1.2070237953576425e-12, "loss": 0.6066, "step": 32621 }, { "epoch": 0.9998161088635528, "grad_norm": 2.172016664944165, "learning_rate": 8.867930018086057e-13, "loss": 0.571, "step": 32622 }, { "epoch": 0.999846757386294, "grad_norm": 1.8493748024547958, "learning_rate": 6.15828479322289e-13, "loss": 0.4624, "step": 32623 }, { "epoch": 0.9998774059090352, "grad_norm": 1.7905111932006559, "learning_rate": 3.9413022956402703e-13, "loss": 0.4968, "step": 32624 }, { "epoch": 0.9999080544317764, "grad_norm": 1.5782453500734084, "learning_rate": 2.2169825530937716e-13, "loss": 0.4832, "step": 32625 }, { "epoch": 0.9999387029545176, "grad_norm": 1.8615298675419594, "learning_rate": 9.853255822367403e-14, "loss": 0.5593, "step": 32626 }, { "epoch": 0.9999693514772588, "grad_norm": 1.0884508281833924, "learning_rate": 2.4633139417140626e-14, "loss": 0.424, "step": 32627 }, { "epoch": 1.0, "grad_norm": 1.977194309437267, "learning_rate": 0.0, "loss": 0.5395, "step": 32628 }, { "epoch": 1.0, "step": 32628, "total_flos": 1.3205010417123328e+16, "train_loss": 0.6281804487061687, "train_runtime": 381666.8813, "train_samples_per_second": 10.942, "train_steps_per_second": 0.085 } ], "logging_steps": 1.0, "max_steps": 32628, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3205010417123328e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }