{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9968971081047537, "eval_steps": 504, "global_step": 8056, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004964627032394191, "grad_norm": 0.2896804752195731, "learning_rate": 5.000000000000001e-07, "loss": 0.6569, "step": 1 }, { "epoch": 0.0004964627032394191, "eval_loss": 0.6948701739311218, "eval_runtime": 258.3281, "eval_samples_per_second": 117.498, "eval_steps_per_second": 14.691, "step": 1 }, { "epoch": 0.0009929254064788382, "grad_norm": 0.3050018791891007, "learning_rate": 1.0000000000000002e-06, "loss": 0.6721, "step": 2 }, { "epoch": 0.0014893881097182574, "grad_norm": 0.29882980651965635, "learning_rate": 1.5e-06, "loss": 0.681, "step": 3 }, { "epoch": 0.0019858508129576764, "grad_norm": 0.2769455424447693, "learning_rate": 2.0000000000000003e-06, "loss": 0.7166, "step": 4 }, { "epoch": 0.002482313516197096, "grad_norm": 0.28865369141827507, "learning_rate": 2.5e-06, "loss": 0.7363, "step": 5 }, { "epoch": 0.002978776219436515, "grad_norm": 0.2722576701771127, "learning_rate": 3e-06, "loss": 0.7034, "step": 6 }, { "epoch": 0.0034752389226759338, "grad_norm": 0.27760553070368027, "learning_rate": 3.5e-06, "loss": 0.7189, "step": 7 }, { "epoch": 0.003971701625915353, "grad_norm": 0.2560906764365523, "learning_rate": 4.000000000000001e-06, "loss": 0.6724, "step": 8 }, { "epoch": 0.004468164329154772, "grad_norm": 0.24966702280304454, "learning_rate": 4.5e-06, "loss": 0.7125, "step": 9 }, { "epoch": 0.004964627032394192, "grad_norm": 0.20972188158050864, "learning_rate": 5e-06, "loss": 0.6863, "step": 10 }, { "epoch": 0.00546108973563361, "grad_norm": 0.2008074817888515, "learning_rate": 5.500000000000001e-06, "loss": 0.6582, "step": 11 }, { "epoch": 0.00595755243887303, "grad_norm": 0.22832944172310443, "learning_rate": 6e-06, "loss": 0.642, "step": 12 }, { "epoch": 0.006454015142112449, "grad_norm": 0.2459156254861726, "learning_rate": 6.5000000000000004e-06, "loss": 0.671, "step": 13 }, { "epoch": 0.0069504778453518675, "grad_norm": 0.31687300308761013, "learning_rate": 7e-06, "loss": 0.6689, "step": 14 }, { "epoch": 0.007446940548591287, "grad_norm": 0.20119950815160606, "learning_rate": 7.500000000000001e-06, "loss": 0.6279, "step": 15 }, { "epoch": 0.007943403251830706, "grad_norm": 0.21772431498741993, "learning_rate": 8.000000000000001e-06, "loss": 0.6732, "step": 16 }, { "epoch": 0.008439865955070125, "grad_norm": 0.17454628482212584, "learning_rate": 8.5e-06, "loss": 0.614, "step": 17 }, { "epoch": 0.008936328658309544, "grad_norm": 0.1674880235536819, "learning_rate": 9e-06, "loss": 0.6335, "step": 18 }, { "epoch": 0.009432791361548964, "grad_norm": 0.18488918178360303, "learning_rate": 9.5e-06, "loss": 0.6027, "step": 19 }, { "epoch": 0.009929254064788383, "grad_norm": 0.24570525549058408, "learning_rate": 1e-05, "loss": 0.621, "step": 20 }, { "epoch": 0.010425716768027803, "grad_norm": 0.15022107044829835, "learning_rate": 9.999999617915087e-06, "loss": 0.6039, "step": 21 }, { "epoch": 0.01092217947126722, "grad_norm": 0.16517798078299958, "learning_rate": 9.999998471660397e-06, "loss": 0.6237, "step": 22 }, { "epoch": 0.01141864217450664, "grad_norm": 0.19491836617556169, "learning_rate": 9.999996561236111e-06, "loss": 0.6763, "step": 23 }, { "epoch": 0.01191510487774606, "grad_norm": 0.15511366832327092, "learning_rate": 9.99999388664252e-06, "loss": 0.6079, "step": 24 }, { "epoch": 0.012411567580985479, "grad_norm": 0.14847965517533848, "learning_rate": 9.999990447880033e-06, "loss": 0.6019, "step": 25 }, { "epoch": 0.012908030284224898, "grad_norm": 0.1517877987340116, "learning_rate": 9.999986244949173e-06, "loss": 0.5898, "step": 26 }, { "epoch": 0.013404492987464317, "grad_norm": 0.1467811827869313, "learning_rate": 9.999981277850585e-06, "loss": 0.6418, "step": 27 }, { "epoch": 0.013900955690703735, "grad_norm": 0.14177217518216353, "learning_rate": 9.999975546585027e-06, "loss": 0.653, "step": 28 }, { "epoch": 0.014397418393943155, "grad_norm": 0.14013715426768192, "learning_rate": 9.999969051153376e-06, "loss": 0.6336, "step": 29 }, { "epoch": 0.014893881097182574, "grad_norm": 0.13838564452582985, "learning_rate": 9.999961791556623e-06, "loss": 0.6115, "step": 30 }, { "epoch": 0.015390343800421993, "grad_norm": 0.14196459752762547, "learning_rate": 9.999953767795879e-06, "loss": 0.6651, "step": 31 }, { "epoch": 0.01588680650366141, "grad_norm": 0.13929013135958662, "learning_rate": 9.99994497987237e-06, "loss": 0.6196, "step": 32 }, { "epoch": 0.016383269206900832, "grad_norm": 0.1377890217750465, "learning_rate": 9.999935427787437e-06, "loss": 0.6128, "step": 33 }, { "epoch": 0.01687973191014025, "grad_norm": 0.13403481454155775, "learning_rate": 9.999925111542544e-06, "loss": 0.6287, "step": 34 }, { "epoch": 0.01737619461337967, "grad_norm": 0.13706676440981166, "learning_rate": 9.999914031139264e-06, "loss": 0.5834, "step": 35 }, { "epoch": 0.01787265731661909, "grad_norm": 0.13655131236247117, "learning_rate": 9.99990218657929e-06, "loss": 0.5886, "step": 36 }, { "epoch": 0.018369120019858506, "grad_norm": 0.13692053883385027, "learning_rate": 9.999889577864439e-06, "loss": 0.6128, "step": 37 }, { "epoch": 0.018865582723097928, "grad_norm": 0.1284073363888045, "learning_rate": 9.99987620499663e-06, "loss": 0.635, "step": 38 }, { "epoch": 0.019362045426337345, "grad_norm": 0.13517475428298725, "learning_rate": 9.999862067977911e-06, "loss": 0.6698, "step": 39 }, { "epoch": 0.019858508129576766, "grad_norm": 0.1355982640698863, "learning_rate": 9.999847166810441e-06, "loss": 0.6236, "step": 40 }, { "epoch": 0.020354970832816184, "grad_norm": 0.12984313043454054, "learning_rate": 9.999831501496497e-06, "loss": 0.5833, "step": 41 }, { "epoch": 0.020851433536055605, "grad_norm": 0.1383145715551888, "learning_rate": 9.999815072038476e-06, "loss": 0.5906, "step": 42 }, { "epoch": 0.021347896239295023, "grad_norm": 0.1288047804494723, "learning_rate": 9.999797878438886e-06, "loss": 0.6161, "step": 43 }, { "epoch": 0.02184435894253444, "grad_norm": 0.13478081351814658, "learning_rate": 9.999779920700358e-06, "loss": 0.5896, "step": 44 }, { "epoch": 0.022340821645773862, "grad_norm": 0.13613809579000827, "learning_rate": 9.999761198825633e-06, "loss": 0.6314, "step": 45 }, { "epoch": 0.02283728434901328, "grad_norm": 0.13321658898143937, "learning_rate": 9.999741712817574e-06, "loss": 0.6312, "step": 46 }, { "epoch": 0.0233337470522527, "grad_norm": 0.13603038062117492, "learning_rate": 9.999721462679158e-06, "loss": 0.5961, "step": 47 }, { "epoch": 0.02383020975549212, "grad_norm": 0.13675851643279544, "learning_rate": 9.999700448413483e-06, "loss": 0.6154, "step": 48 }, { "epoch": 0.02432667245873154, "grad_norm": 0.13729938609740347, "learning_rate": 9.999678670023756e-06, "loss": 0.6242, "step": 49 }, { "epoch": 0.024823135161970957, "grad_norm": 0.13441177177690306, "learning_rate": 9.99965612751331e-06, "loss": 0.6007, "step": 50 }, { "epoch": 0.025319597865210375, "grad_norm": 0.13547565356560712, "learning_rate": 9.999632820885588e-06, "loss": 0.6411, "step": 51 }, { "epoch": 0.025816060568449796, "grad_norm": 0.1324008772576428, "learning_rate": 9.999608750144152e-06, "loss": 0.6177, "step": 52 }, { "epoch": 0.026312523271689214, "grad_norm": 0.13979089573869022, "learning_rate": 9.999583915292681e-06, "loss": 0.5949, "step": 53 }, { "epoch": 0.026808985974928635, "grad_norm": 0.13859893177899527, "learning_rate": 9.999558316334971e-06, "loss": 0.6541, "step": 54 }, { "epoch": 0.027305448678168052, "grad_norm": 0.13635221865975883, "learning_rate": 9.999531953274934e-06, "loss": 0.6091, "step": 55 }, { "epoch": 0.02780191138140747, "grad_norm": 0.1282420474503579, "learning_rate": 9.9995048261166e-06, "loss": 0.5882, "step": 56 }, { "epoch": 0.02829837408464689, "grad_norm": 0.13401369032020452, "learning_rate": 9.999476934864113e-06, "loss": 0.5938, "step": 57 }, { "epoch": 0.02879483678788631, "grad_norm": 0.13628126334123536, "learning_rate": 9.999448279521737e-06, "loss": 0.5985, "step": 58 }, { "epoch": 0.02929129949112573, "grad_norm": 0.1341361184774213, "learning_rate": 9.999418860093852e-06, "loss": 0.5719, "step": 59 }, { "epoch": 0.029787762194365148, "grad_norm": 0.13786027891551675, "learning_rate": 9.999388676584956e-06, "loss": 0.6298, "step": 60 }, { "epoch": 0.03028422489760457, "grad_norm": 0.1286588798863049, "learning_rate": 9.999357728999657e-06, "loss": 0.5584, "step": 61 }, { "epoch": 0.030780687600843987, "grad_norm": 0.13054259557276518, "learning_rate": 9.999326017342688e-06, "loss": 0.6112, "step": 62 }, { "epoch": 0.03127715030408341, "grad_norm": 0.12726900077110587, "learning_rate": 9.999293541618898e-06, "loss": 0.6069, "step": 63 }, { "epoch": 0.03177361300732282, "grad_norm": 0.1284338678113, "learning_rate": 9.999260301833245e-06, "loss": 0.5741, "step": 64 }, { "epoch": 0.03227007571056224, "grad_norm": 0.13254345170506723, "learning_rate": 9.999226297990812e-06, "loss": 0.6091, "step": 65 }, { "epoch": 0.032766538413801664, "grad_norm": 0.12810965726821946, "learning_rate": 9.999191530096798e-06, "loss": 0.591, "step": 66 }, { "epoch": 0.033263001117041086, "grad_norm": 0.13053684029880475, "learning_rate": 9.999155998156511e-06, "loss": 0.6185, "step": 67 }, { "epoch": 0.0337594638202805, "grad_norm": 0.1249000403363421, "learning_rate": 9.999119702175388e-06, "loss": 0.6039, "step": 68 }, { "epoch": 0.03425592652351992, "grad_norm": 0.13159419524811636, "learning_rate": 9.999082642158972e-06, "loss": 0.6349, "step": 69 }, { "epoch": 0.03475238922675934, "grad_norm": 0.12994532055439553, "learning_rate": 9.999044818112929e-06, "loss": 0.5775, "step": 70 }, { "epoch": 0.035248851929998756, "grad_norm": 0.12935935865475864, "learning_rate": 9.999006230043039e-06, "loss": 0.5856, "step": 71 }, { "epoch": 0.03574531463323818, "grad_norm": 0.12636961689532966, "learning_rate": 9.9989668779552e-06, "loss": 0.5693, "step": 72 }, { "epoch": 0.0362417773364776, "grad_norm": 0.1291945677437944, "learning_rate": 9.998926761855425e-06, "loss": 0.5856, "step": 73 }, { "epoch": 0.03673824003971701, "grad_norm": 0.12423558807838328, "learning_rate": 9.998885881749847e-06, "loss": 0.6412, "step": 74 }, { "epoch": 0.037234702742956434, "grad_norm": 0.11838472293339844, "learning_rate": 9.998844237644714e-06, "loss": 0.5923, "step": 75 }, { "epoch": 0.037731165446195855, "grad_norm": 0.12070342951020165, "learning_rate": 9.998801829546387e-06, "loss": 0.6009, "step": 76 }, { "epoch": 0.038227628149435276, "grad_norm": 0.12133413872988458, "learning_rate": 9.998758657461353e-06, "loss": 0.5928, "step": 77 }, { "epoch": 0.03872409085267469, "grad_norm": 0.11847302132399616, "learning_rate": 9.998714721396206e-06, "loss": 0.5873, "step": 78 }, { "epoch": 0.03922055355591411, "grad_norm": 0.11586463974187308, "learning_rate": 9.998670021357662e-06, "loss": 0.599, "step": 79 }, { "epoch": 0.03971701625915353, "grad_norm": 0.11611971202617413, "learning_rate": 9.998624557352552e-06, "loss": 0.5925, "step": 80 }, { "epoch": 0.04021347896239295, "grad_norm": 0.11063709969348022, "learning_rate": 9.998578329387826e-06, "loss": 0.5651, "step": 81 }, { "epoch": 0.04070994166563237, "grad_norm": 0.11228145005616408, "learning_rate": 9.998531337470548e-06, "loss": 0.5596, "step": 82 }, { "epoch": 0.04120640436887179, "grad_norm": 0.10686949876281465, "learning_rate": 9.9984835816079e-06, "loss": 0.5968, "step": 83 }, { "epoch": 0.04170286707211121, "grad_norm": 0.10654597088862137, "learning_rate": 9.998435061807184e-06, "loss": 0.5967, "step": 84 }, { "epoch": 0.042199329775350625, "grad_norm": 0.10033719891548469, "learning_rate": 9.99838577807581e-06, "loss": 0.5501, "step": 85 }, { "epoch": 0.042695792478590046, "grad_norm": 0.10549280001704904, "learning_rate": 9.998335730421313e-06, "loss": 0.5786, "step": 86 }, { "epoch": 0.04319225518182947, "grad_norm": 0.102951502668565, "learning_rate": 9.998284918851343e-06, "loss": 0.5747, "step": 87 }, { "epoch": 0.04368871788506888, "grad_norm": 0.09770170164060958, "learning_rate": 9.998233343373664e-06, "loss": 0.5826, "step": 88 }, { "epoch": 0.0441851805883083, "grad_norm": 0.0958977534181121, "learning_rate": 9.998181003996159e-06, "loss": 0.6037, "step": 89 }, { "epoch": 0.044681643291547724, "grad_norm": 0.0920383420000729, "learning_rate": 9.998127900726825e-06, "loss": 0.5708, "step": 90 }, { "epoch": 0.045178105994787145, "grad_norm": 0.093747232922122, "learning_rate": 9.998074033573783e-06, "loss": 0.6067, "step": 91 }, { "epoch": 0.04567456869802656, "grad_norm": 0.08851388967347672, "learning_rate": 9.998019402545264e-06, "loss": 0.5706, "step": 92 }, { "epoch": 0.04617103140126598, "grad_norm": 0.0867258581115883, "learning_rate": 9.997964007649614e-06, "loss": 0.5673, "step": 93 }, { "epoch": 0.0466674941045054, "grad_norm": 0.08589006292805439, "learning_rate": 9.997907848895304e-06, "loss": 0.5839, "step": 94 }, { "epoch": 0.047163956807744815, "grad_norm": 0.08567652602207897, "learning_rate": 9.997850926290912e-06, "loss": 0.5604, "step": 95 }, { "epoch": 0.04766041951098424, "grad_norm": 0.09168476487857387, "learning_rate": 9.997793239845141e-06, "loss": 0.6102, "step": 96 }, { "epoch": 0.04815688221422366, "grad_norm": 0.0862684302577442, "learning_rate": 9.997734789566809e-06, "loss": 0.6065, "step": 97 }, { "epoch": 0.04865334491746308, "grad_norm": 0.08721353894012582, "learning_rate": 9.997675575464844e-06, "loss": 0.6129, "step": 98 }, { "epoch": 0.04914980762070249, "grad_norm": 0.08863015163099411, "learning_rate": 9.997615597548302e-06, "loss": 0.5802, "step": 99 }, { "epoch": 0.049646270323941914, "grad_norm": 0.08225243222916953, "learning_rate": 9.997554855826343e-06, "loss": 0.599, "step": 100 }, { "epoch": 0.050142733027181335, "grad_norm": 0.08453041407170588, "learning_rate": 9.997493350308258e-06, "loss": 0.6146, "step": 101 }, { "epoch": 0.05063919573042075, "grad_norm": 0.07970489213260537, "learning_rate": 9.99743108100344e-06, "loss": 0.574, "step": 102 }, { "epoch": 0.05113565843366017, "grad_norm": 0.07863283495549847, "learning_rate": 9.99736804792141e-06, "loss": 0.5682, "step": 103 }, { "epoch": 0.05163212113689959, "grad_norm": 0.07752696100053419, "learning_rate": 9.997304251071802e-06, "loss": 0.5569, "step": 104 }, { "epoch": 0.052128583840139006, "grad_norm": 0.13358841508181127, "learning_rate": 9.997239690464362e-06, "loss": 0.6168, "step": 105 }, { "epoch": 0.05262504654337843, "grad_norm": 0.07348128316840971, "learning_rate": 9.997174366108962e-06, "loss": 0.5819, "step": 106 }, { "epoch": 0.05312150924661785, "grad_norm": 0.07799188911676157, "learning_rate": 9.997108278015583e-06, "loss": 0.5903, "step": 107 }, { "epoch": 0.05361797194985727, "grad_norm": 0.08319318876985128, "learning_rate": 9.997041426194327e-06, "loss": 0.6054, "step": 108 }, { "epoch": 0.054114434653096684, "grad_norm": 0.07577969048637133, "learning_rate": 9.996973810655409e-06, "loss": 0.588, "step": 109 }, { "epoch": 0.054610897356336105, "grad_norm": 0.07929330066652941, "learning_rate": 9.996905431409165e-06, "loss": 0.577, "step": 110 }, { "epoch": 0.055107360059575526, "grad_norm": 0.07669462435166145, "learning_rate": 9.996836288466046e-06, "loss": 0.5774, "step": 111 }, { "epoch": 0.05560382276281494, "grad_norm": 0.07442700190295497, "learning_rate": 9.996766381836617e-06, "loss": 0.5979, "step": 112 }, { "epoch": 0.05610028546605436, "grad_norm": 0.07330916527882274, "learning_rate": 9.996695711531565e-06, "loss": 0.5559, "step": 113 }, { "epoch": 0.05659674816929378, "grad_norm": 0.07665817971888089, "learning_rate": 9.99662427756169e-06, "loss": 0.5979, "step": 114 }, { "epoch": 0.057093210872533204, "grad_norm": 0.08003254184842483, "learning_rate": 9.996552079937907e-06, "loss": 0.599, "step": 115 }, { "epoch": 0.05758967357577262, "grad_norm": 0.07510789705619458, "learning_rate": 9.996479118671255e-06, "loss": 0.6122, "step": 116 }, { "epoch": 0.05808613627901204, "grad_norm": 0.07624743707532947, "learning_rate": 9.99640539377288e-06, "loss": 0.6061, "step": 117 }, { "epoch": 0.05858259898225146, "grad_norm": 0.08231933957861946, "learning_rate": 9.99633090525405e-06, "loss": 0.6115, "step": 118 }, { "epoch": 0.059079061685490875, "grad_norm": 0.07413718802908581, "learning_rate": 9.996255653126155e-06, "loss": 0.5688, "step": 119 }, { "epoch": 0.059575524388730296, "grad_norm": 0.0715246453925429, "learning_rate": 9.996179637400689e-06, "loss": 0.6037, "step": 120 }, { "epoch": 0.06007198709196972, "grad_norm": 0.07150754449533633, "learning_rate": 9.996102858089276e-06, "loss": 0.5793, "step": 121 }, { "epoch": 0.06056844979520914, "grad_norm": 0.07498273940894117, "learning_rate": 9.996025315203645e-06, "loss": 0.5924, "step": 122 }, { "epoch": 0.06106491249844855, "grad_norm": 0.07613201051621966, "learning_rate": 9.995947008755651e-06, "loss": 0.599, "step": 123 }, { "epoch": 0.06156137520168797, "grad_norm": 0.07403145307648665, "learning_rate": 9.99586793875726e-06, "loss": 0.5827, "step": 124 }, { "epoch": 0.062057837904927395, "grad_norm": 0.07365891943371078, "learning_rate": 9.99578810522056e-06, "loss": 0.5593, "step": 125 }, { "epoch": 0.06255430060816682, "grad_norm": 0.07495245460403768, "learning_rate": 9.995707508157746e-06, "loss": 0.5925, "step": 126 }, { "epoch": 0.06305076331140623, "grad_norm": 0.07153054219977284, "learning_rate": 9.995626147581141e-06, "loss": 0.5627, "step": 127 }, { "epoch": 0.06354722601464564, "grad_norm": 0.07620684738653265, "learning_rate": 9.995544023503179e-06, "loss": 0.6354, "step": 128 }, { "epoch": 0.06404368871788507, "grad_norm": 0.0745933597128834, "learning_rate": 9.995461135936409e-06, "loss": 0.565, "step": 129 }, { "epoch": 0.06454015142112449, "grad_norm": 0.07494493059038407, "learning_rate": 9.9953774848935e-06, "loss": 0.5732, "step": 130 }, { "epoch": 0.0650366141243639, "grad_norm": 0.07408197272957125, "learning_rate": 9.995293070387237e-06, "loss": 0.6197, "step": 131 }, { "epoch": 0.06553307682760333, "grad_norm": 0.07097144151203463, "learning_rate": 9.995207892430525e-06, "loss": 0.5706, "step": 132 }, { "epoch": 0.06602953953084274, "grad_norm": 0.07411471261841912, "learning_rate": 9.995121951036375e-06, "loss": 0.6672, "step": 133 }, { "epoch": 0.06652600223408217, "grad_norm": 0.08002634782883043, "learning_rate": 9.995035246217928e-06, "loss": 0.5513, "step": 134 }, { "epoch": 0.06702246493732159, "grad_norm": 0.07632470968229874, "learning_rate": 9.99494777798843e-06, "loss": 0.6046, "step": 135 }, { "epoch": 0.067518927640561, "grad_norm": 0.07572691173078472, "learning_rate": 9.994859546361255e-06, "loss": 0.5843, "step": 136 }, { "epoch": 0.06801539034380043, "grad_norm": 0.06904451720592344, "learning_rate": 9.994770551349884e-06, "loss": 0.5461, "step": 137 }, { "epoch": 0.06851185304703984, "grad_norm": 0.07299490802645708, "learning_rate": 9.99468079296792e-06, "loss": 0.56, "step": 138 }, { "epoch": 0.06900831575027926, "grad_norm": 0.07518480559637351, "learning_rate": 9.994590271229077e-06, "loss": 0.5714, "step": 139 }, { "epoch": 0.06950477845351868, "grad_norm": 0.07587964520731476, "learning_rate": 9.994498986147196e-06, "loss": 0.5743, "step": 140 }, { "epoch": 0.0700012411567581, "grad_norm": 0.07140668444553665, "learning_rate": 9.994406937736225e-06, "loss": 0.5956, "step": 141 }, { "epoch": 0.07049770385999751, "grad_norm": 0.08342316385982693, "learning_rate": 9.994314126010234e-06, "loss": 0.6292, "step": 142 }, { "epoch": 0.07099416656323694, "grad_norm": 0.07516288618827946, "learning_rate": 9.994220550983404e-06, "loss": 0.5675, "step": 143 }, { "epoch": 0.07149062926647635, "grad_norm": 0.07794453135148774, "learning_rate": 9.994126212670042e-06, "loss": 0.6, "step": 144 }, { "epoch": 0.07198709196971577, "grad_norm": 0.07508762812017271, "learning_rate": 9.99403111108456e-06, "loss": 0.6324, "step": 145 }, { "epoch": 0.0724835546729552, "grad_norm": 0.07307616676603801, "learning_rate": 9.9939352462415e-06, "loss": 0.5638, "step": 146 }, { "epoch": 0.07298001737619461, "grad_norm": 0.07276886958258562, "learning_rate": 9.993838618155505e-06, "loss": 0.5773, "step": 147 }, { "epoch": 0.07347648007943403, "grad_norm": 0.07557752398442871, "learning_rate": 9.99374122684135e-06, "loss": 0.5808, "step": 148 }, { "epoch": 0.07397294278267345, "grad_norm": 0.0733881295710281, "learning_rate": 9.993643072313916e-06, "loss": 0.5679, "step": 149 }, { "epoch": 0.07446940548591287, "grad_norm": 0.07381382684727514, "learning_rate": 9.993544154588206e-06, "loss": 0.5883, "step": 150 }, { "epoch": 0.0749658681891523, "grad_norm": 0.07520498828814545, "learning_rate": 9.993444473679337e-06, "loss": 0.6129, "step": 151 }, { "epoch": 0.07546233089239171, "grad_norm": 0.07066703681374047, "learning_rate": 9.993344029602543e-06, "loss": 0.5465, "step": 152 }, { "epoch": 0.07595879359563112, "grad_norm": 0.06991119947069825, "learning_rate": 9.993242822373178e-06, "loss": 0.5595, "step": 153 }, { "epoch": 0.07645525629887055, "grad_norm": 0.07303529433121117, "learning_rate": 9.993140852006708e-06, "loss": 0.5904, "step": 154 }, { "epoch": 0.07695171900210997, "grad_norm": 0.08000746313978774, "learning_rate": 9.993038118518716e-06, "loss": 0.6066, "step": 155 }, { "epoch": 0.07744818170534938, "grad_norm": 0.07301721545465954, "learning_rate": 9.992934621924906e-06, "loss": 0.5661, "step": 156 }, { "epoch": 0.07794464440858881, "grad_norm": 0.07220658851768877, "learning_rate": 9.992830362241094e-06, "loss": 0.5603, "step": 157 }, { "epoch": 0.07844110711182822, "grad_norm": 0.07414895354653442, "learning_rate": 9.992725339483218e-06, "loss": 0.5871, "step": 158 }, { "epoch": 0.07893756981506764, "grad_norm": 0.0741359241647344, "learning_rate": 9.992619553667321e-06, "loss": 0.5853, "step": 159 }, { "epoch": 0.07943403251830707, "grad_norm": 0.07264805114336846, "learning_rate": 9.99251300480958e-06, "loss": 0.5678, "step": 160 }, { "epoch": 0.07993049522154648, "grad_norm": 0.07229450444583423, "learning_rate": 9.992405692926273e-06, "loss": 0.5884, "step": 161 }, { "epoch": 0.0804269579247859, "grad_norm": 0.0740811801883871, "learning_rate": 9.992297618033803e-06, "loss": 0.5824, "step": 162 }, { "epoch": 0.08092342062802532, "grad_norm": 0.07435862309763852, "learning_rate": 9.99218878014869e-06, "loss": 0.5959, "step": 163 }, { "epoch": 0.08141988333126474, "grad_norm": 0.0758562192646611, "learning_rate": 9.992079179287564e-06, "loss": 0.6195, "step": 164 }, { "epoch": 0.08191634603450416, "grad_norm": 0.07278506836733034, "learning_rate": 9.991968815467176e-06, "loss": 0.5523, "step": 165 }, { "epoch": 0.08241280873774358, "grad_norm": 0.07266838334484792, "learning_rate": 9.991857688704396e-06, "loss": 0.6246, "step": 166 }, { "epoch": 0.08290927144098299, "grad_norm": 0.07365765610190685, "learning_rate": 9.991745799016206e-06, "loss": 0.5916, "step": 167 }, { "epoch": 0.08340573414422242, "grad_norm": 0.07200755697270038, "learning_rate": 9.991633146419707e-06, "loss": 0.5803, "step": 168 }, { "epoch": 0.08390219684746184, "grad_norm": 0.07210206192758828, "learning_rate": 9.991519730932118e-06, "loss": 0.574, "step": 169 }, { "epoch": 0.08439865955070125, "grad_norm": 0.07267719726276502, "learning_rate": 9.99140555257077e-06, "loss": 0.5798, "step": 170 }, { "epoch": 0.08489512225394068, "grad_norm": 0.07155471898330006, "learning_rate": 9.991290611353116e-06, "loss": 0.5677, "step": 171 }, { "epoch": 0.08539158495718009, "grad_norm": 0.07381712566994895, "learning_rate": 9.991174907296718e-06, "loss": 0.6053, "step": 172 }, { "epoch": 0.0858880476604195, "grad_norm": 0.07574128504119582, "learning_rate": 9.991058440419265e-06, "loss": 0.562, "step": 173 }, { "epoch": 0.08638451036365893, "grad_norm": 0.07475073669944175, "learning_rate": 9.990941210738553e-06, "loss": 0.5858, "step": 174 }, { "epoch": 0.08688097306689835, "grad_norm": 0.07344393479012397, "learning_rate": 9.990823218272503e-06, "loss": 0.5688, "step": 175 }, { "epoch": 0.08737743577013776, "grad_norm": 0.07812657615336012, "learning_rate": 9.990704463039144e-06, "loss": 0.6131, "step": 176 }, { "epoch": 0.08787389847337719, "grad_norm": 0.0750686775572329, "learning_rate": 9.99058494505663e-06, "loss": 0.5756, "step": 177 }, { "epoch": 0.0883703611766166, "grad_norm": 0.07104870179442725, "learning_rate": 9.990464664343223e-06, "loss": 0.5716, "step": 178 }, { "epoch": 0.08886682387985602, "grad_norm": 0.07287131127888745, "learning_rate": 9.990343620917308e-06, "loss": 0.5997, "step": 179 }, { "epoch": 0.08936328658309545, "grad_norm": 0.07489206888108628, "learning_rate": 9.990221814797386e-06, "loss": 0.5612, "step": 180 }, { "epoch": 0.08985974928633486, "grad_norm": 0.07593251655355389, "learning_rate": 9.990099246002071e-06, "loss": 0.5759, "step": 181 }, { "epoch": 0.09035621198957429, "grad_norm": 0.07450332229553874, "learning_rate": 9.989975914550097e-06, "loss": 0.5707, "step": 182 }, { "epoch": 0.0908526746928137, "grad_norm": 0.0711423216721135, "learning_rate": 9.989851820460312e-06, "loss": 0.5571, "step": 183 }, { "epoch": 0.09134913739605312, "grad_norm": 0.07565282940352779, "learning_rate": 9.989726963751683e-06, "loss": 0.5767, "step": 184 }, { "epoch": 0.09184560009929255, "grad_norm": 0.07365920974609773, "learning_rate": 9.989601344443291e-06, "loss": 0.5579, "step": 185 }, { "epoch": 0.09234206280253196, "grad_norm": 0.07434713572581113, "learning_rate": 9.989474962554335e-06, "loss": 0.6034, "step": 186 }, { "epoch": 0.09283852550577137, "grad_norm": 0.07382111167342237, "learning_rate": 9.989347818104134e-06, "loss": 0.5744, "step": 187 }, { "epoch": 0.0933349882090108, "grad_norm": 0.07136125426616181, "learning_rate": 9.989219911112114e-06, "loss": 0.5631, "step": 188 }, { "epoch": 0.09383145091225022, "grad_norm": 0.0733337379350001, "learning_rate": 9.989091241597828e-06, "loss": 0.5793, "step": 189 }, { "epoch": 0.09432791361548963, "grad_norm": 0.07566215084062144, "learning_rate": 9.988961809580939e-06, "loss": 0.5552, "step": 190 }, { "epoch": 0.09482437631872906, "grad_norm": 0.07440377413674429, "learning_rate": 9.988831615081231e-06, "loss": 0.6103, "step": 191 }, { "epoch": 0.09532083902196847, "grad_norm": 0.07548618401783257, "learning_rate": 9.9887006581186e-06, "loss": 0.5744, "step": 192 }, { "epoch": 0.09581730172520789, "grad_norm": 0.0704767952117985, "learning_rate": 9.98856893871306e-06, "loss": 0.569, "step": 193 }, { "epoch": 0.09631376442844732, "grad_norm": 0.0725464372041384, "learning_rate": 9.988436456884743e-06, "loss": 0.5742, "step": 194 }, { "epoch": 0.09681022713168673, "grad_norm": 0.0710557842749959, "learning_rate": 9.988303212653898e-06, "loss": 0.5572, "step": 195 }, { "epoch": 0.09730668983492616, "grad_norm": 0.0735237151063019, "learning_rate": 9.988169206040889e-06, "loss": 0.5654, "step": 196 }, { "epoch": 0.09780315253816557, "grad_norm": 0.07411964385544856, "learning_rate": 9.988034437066195e-06, "loss": 0.5732, "step": 197 }, { "epoch": 0.09829961524140499, "grad_norm": 0.07267276911713769, "learning_rate": 9.987898905750416e-06, "loss": 0.5752, "step": 198 }, { "epoch": 0.09879607794464441, "grad_norm": 0.07442839143860121, "learning_rate": 9.987762612114262e-06, "loss": 0.5654, "step": 199 }, { "epoch": 0.09929254064788383, "grad_norm": 0.07200329045156166, "learning_rate": 9.987625556178566e-06, "loss": 0.5513, "step": 200 }, { "epoch": 0.09978900335112324, "grad_norm": 0.07040542641505096, "learning_rate": 9.987487737964274e-06, "loss": 0.5314, "step": 201 }, { "epoch": 0.10028546605436267, "grad_norm": 0.07287637792348034, "learning_rate": 9.98734915749245e-06, "loss": 0.5962, "step": 202 }, { "epoch": 0.10078192875760209, "grad_norm": 0.07244379147061296, "learning_rate": 9.987209814784273e-06, "loss": 0.5623, "step": 203 }, { "epoch": 0.1012783914608415, "grad_norm": 0.07236469504659584, "learning_rate": 9.98706970986104e-06, "loss": 0.5538, "step": 204 }, { "epoch": 0.10177485416408093, "grad_norm": 0.07466623347714184, "learning_rate": 9.986928842744163e-06, "loss": 0.5645, "step": 205 }, { "epoch": 0.10227131686732034, "grad_norm": 0.07052940531611344, "learning_rate": 9.986787213455174e-06, "loss": 0.5309, "step": 206 }, { "epoch": 0.10276777957055976, "grad_norm": 0.07516276980019747, "learning_rate": 9.986644822015715e-06, "loss": 0.5487, "step": 207 }, { "epoch": 0.10326424227379918, "grad_norm": 0.07671293855982048, "learning_rate": 9.986501668447547e-06, "loss": 0.5879, "step": 208 }, { "epoch": 0.1037607049770386, "grad_norm": 0.07208784254703426, "learning_rate": 9.986357752772555e-06, "loss": 0.5536, "step": 209 }, { "epoch": 0.10425716768027801, "grad_norm": 0.07815563722985788, "learning_rate": 9.98621307501273e-06, "loss": 0.5642, "step": 210 }, { "epoch": 0.10475363038351744, "grad_norm": 0.07531957358708724, "learning_rate": 9.986067635190184e-06, "loss": 0.5764, "step": 211 }, { "epoch": 0.10525009308675685, "grad_norm": 0.07516730483203957, "learning_rate": 9.985921433327144e-06, "loss": 0.5893, "step": 212 }, { "epoch": 0.10574655578999628, "grad_norm": 0.07452516834494967, "learning_rate": 9.985774469445957e-06, "loss": 0.5878, "step": 213 }, { "epoch": 0.1062430184932357, "grad_norm": 0.0767192562209654, "learning_rate": 9.985626743569083e-06, "loss": 0.5586, "step": 214 }, { "epoch": 0.10673948119647511, "grad_norm": 0.07419626208845845, "learning_rate": 9.9854782557191e-06, "loss": 0.5828, "step": 215 }, { "epoch": 0.10723594389971454, "grad_norm": 0.07684498206577063, "learning_rate": 9.985329005918702e-06, "loss": 0.5651, "step": 216 }, { "epoch": 0.10773240660295395, "grad_norm": 0.07714407534776108, "learning_rate": 9.9851789941907e-06, "loss": 0.6076, "step": 217 }, { "epoch": 0.10822886930619337, "grad_norm": 0.07198389859968202, "learning_rate": 9.98502822055802e-06, "loss": 0.5612, "step": 218 }, { "epoch": 0.1087253320094328, "grad_norm": 0.07571784717245837, "learning_rate": 9.984876685043703e-06, "loss": 0.6116, "step": 219 }, { "epoch": 0.10922179471267221, "grad_norm": 0.07953051944371987, "learning_rate": 9.984724387670912e-06, "loss": 0.5886, "step": 220 }, { "epoch": 0.10971825741591162, "grad_norm": 0.07770772328549033, "learning_rate": 9.984571328462924e-06, "loss": 0.5531, "step": 221 }, { "epoch": 0.11021472011915105, "grad_norm": 0.07536582074411971, "learning_rate": 9.984417507443128e-06, "loss": 0.595, "step": 222 }, { "epoch": 0.11071118282239047, "grad_norm": 0.07405994836904185, "learning_rate": 9.984262924635036e-06, "loss": 0.5833, "step": 223 }, { "epoch": 0.11120764552562988, "grad_norm": 0.08017377083919287, "learning_rate": 9.984107580062273e-06, "loss": 0.6021, "step": 224 }, { "epoch": 0.11170410822886931, "grad_norm": 0.08072092192849259, "learning_rate": 9.983951473748579e-06, "loss": 0.5736, "step": 225 }, { "epoch": 0.11220057093210872, "grad_norm": 0.07534405176458785, "learning_rate": 9.983794605717815e-06, "loss": 0.5621, "step": 226 }, { "epoch": 0.11269703363534815, "grad_norm": 0.07547844096854822, "learning_rate": 9.983636975993953e-06, "loss": 0.6161, "step": 227 }, { "epoch": 0.11319349633858757, "grad_norm": 0.07934085975212889, "learning_rate": 9.983478584601088e-06, "loss": 0.5867, "step": 228 }, { "epoch": 0.11368995904182698, "grad_norm": 0.07912470758493029, "learning_rate": 9.983319431563424e-06, "loss": 0.5841, "step": 229 }, { "epoch": 0.11418642174506641, "grad_norm": 0.07275368539544204, "learning_rate": 9.983159516905287e-06, "loss": 0.5827, "step": 230 }, { "epoch": 0.11468288444830582, "grad_norm": 0.07569968972355616, "learning_rate": 9.982998840651117e-06, "loss": 0.5851, "step": 231 }, { "epoch": 0.11517934715154524, "grad_norm": 0.07962842640771996, "learning_rate": 9.98283740282547e-06, "loss": 0.596, "step": 232 }, { "epoch": 0.11567580985478466, "grad_norm": 0.07804031431161682, "learning_rate": 9.982675203453018e-06, "loss": 0.571, "step": 233 }, { "epoch": 0.11617227255802408, "grad_norm": 0.07700514481764709, "learning_rate": 9.982512242558555e-06, "loss": 0.565, "step": 234 }, { "epoch": 0.11666873526126349, "grad_norm": 0.0708208906544335, "learning_rate": 9.982348520166982e-06, "loss": 0.5604, "step": 235 }, { "epoch": 0.11716519796450292, "grad_norm": 0.0754541608709195, "learning_rate": 9.982184036303326e-06, "loss": 0.5987, "step": 236 }, { "epoch": 0.11766166066774233, "grad_norm": 0.07554717965041857, "learning_rate": 9.982018790992722e-06, "loss": 0.5691, "step": 237 }, { "epoch": 0.11815812337098175, "grad_norm": 0.07752289757631088, "learning_rate": 9.981852784260427e-06, "loss": 0.5637, "step": 238 }, { "epoch": 0.11865458607422118, "grad_norm": 0.0762749443511498, "learning_rate": 9.98168601613181e-06, "loss": 0.6146, "step": 239 }, { "epoch": 0.11915104877746059, "grad_norm": 0.07530073323828042, "learning_rate": 9.981518486632363e-06, "loss": 0.5591, "step": 240 }, { "epoch": 0.1196475114807, "grad_norm": 0.07507111474292993, "learning_rate": 9.981350195787685e-06, "loss": 0.551, "step": 241 }, { "epoch": 0.12014397418393943, "grad_norm": 0.07371412964257293, "learning_rate": 9.981181143623501e-06, "loss": 0.5903, "step": 242 }, { "epoch": 0.12064043688717885, "grad_norm": 0.0704662212442052, "learning_rate": 9.981011330165648e-06, "loss": 0.5489, "step": 243 }, { "epoch": 0.12113689959041828, "grad_norm": 0.07236692881961186, "learning_rate": 9.980840755440075e-06, "loss": 0.5612, "step": 244 }, { "epoch": 0.12163336229365769, "grad_norm": 0.07532610257826663, "learning_rate": 9.980669419472856e-06, "loss": 0.555, "step": 245 }, { "epoch": 0.1221298249968971, "grad_norm": 0.07443526564494042, "learning_rate": 9.980497322290174e-06, "loss": 0.5804, "step": 246 }, { "epoch": 0.12262628770013653, "grad_norm": 0.07415250825090586, "learning_rate": 9.980324463918333e-06, "loss": 0.5916, "step": 247 }, { "epoch": 0.12312275040337595, "grad_norm": 0.07255581056843356, "learning_rate": 9.980150844383753e-06, "loss": 0.5838, "step": 248 }, { "epoch": 0.12361921310661536, "grad_norm": 0.07386636777765433, "learning_rate": 9.979976463712966e-06, "loss": 0.5536, "step": 249 }, { "epoch": 0.12411567580985479, "grad_norm": 0.07611005306904636, "learning_rate": 9.979801321932624e-06, "loss": 0.6237, "step": 250 }, { "epoch": 0.1246121385130942, "grad_norm": 0.07533106950641502, "learning_rate": 9.979625419069495e-06, "loss": 0.5828, "step": 251 }, { "epoch": 0.12510860121633363, "grad_norm": 0.07288766763589251, "learning_rate": 9.979448755150461e-06, "loss": 0.5625, "step": 252 }, { "epoch": 0.12560506391957305, "grad_norm": 0.07783186390956214, "learning_rate": 9.979271330202527e-06, "loss": 0.5782, "step": 253 }, { "epoch": 0.12610152662281246, "grad_norm": 0.0739141983854619, "learning_rate": 9.979093144252804e-06, "loss": 0.6089, "step": 254 }, { "epoch": 0.12659798932605187, "grad_norm": 0.0776536814927388, "learning_rate": 9.978914197328531e-06, "loss": 0.593, "step": 255 }, { "epoch": 0.1270944520292913, "grad_norm": 0.08012066470866044, "learning_rate": 9.978734489457051e-06, "loss": 0.5998, "step": 256 }, { "epoch": 0.12759091473253073, "grad_norm": 0.07669771884662993, "learning_rate": 9.978554020665834e-06, "loss": 0.534, "step": 257 }, { "epoch": 0.12808737743577014, "grad_norm": 0.07662332363179943, "learning_rate": 9.978372790982457e-06, "loss": 0.5994, "step": 258 }, { "epoch": 0.12858384013900956, "grad_norm": 0.07043282149907523, "learning_rate": 9.978190800434624e-06, "loss": 0.5717, "step": 259 }, { "epoch": 0.12908030284224897, "grad_norm": 0.07420387749370701, "learning_rate": 9.978008049050145e-06, "loss": 0.5903, "step": 260 }, { "epoch": 0.1295767655454884, "grad_norm": 0.07327527104462504, "learning_rate": 9.977824536856953e-06, "loss": 0.562, "step": 261 }, { "epoch": 0.1300732282487278, "grad_norm": 0.07017862418856051, "learning_rate": 9.977640263883095e-06, "loss": 0.5443, "step": 262 }, { "epoch": 0.13056969095196724, "grad_norm": 0.07109669443509863, "learning_rate": 9.97745523015673e-06, "loss": 0.5225, "step": 263 }, { "epoch": 0.13106615365520666, "grad_norm": 0.07932382983195517, "learning_rate": 9.977269435706142e-06, "loss": 0.5852, "step": 264 }, { "epoch": 0.13156261635844607, "grad_norm": 0.07553526767911642, "learning_rate": 9.977082880559725e-06, "loss": 0.5724, "step": 265 }, { "epoch": 0.13205907906168549, "grad_norm": 0.07454386060375846, "learning_rate": 9.976895564745993e-06, "loss": 0.5578, "step": 266 }, { "epoch": 0.1325555417649249, "grad_norm": 0.07445473105285273, "learning_rate": 9.976707488293569e-06, "loss": 0.5321, "step": 267 }, { "epoch": 0.13305200446816434, "grad_norm": 0.07898175514251754, "learning_rate": 9.976518651231203e-06, "loss": 0.622, "step": 268 }, { "epoch": 0.13354846717140376, "grad_norm": 0.07381948302565586, "learning_rate": 9.976329053587754e-06, "loss": 0.5391, "step": 269 }, { "epoch": 0.13404492987464317, "grad_norm": 0.0806159559251752, "learning_rate": 9.976138695392196e-06, "loss": 0.5758, "step": 270 }, { "epoch": 0.13454139257788258, "grad_norm": 0.08268280266256835, "learning_rate": 9.975947576673628e-06, "loss": 0.5687, "step": 271 }, { "epoch": 0.135037855281122, "grad_norm": 0.07927875514189735, "learning_rate": 9.975755697461254e-06, "loss": 0.5535, "step": 272 }, { "epoch": 0.1355343179843614, "grad_norm": 0.07325803338115366, "learning_rate": 9.975563057784402e-06, "loss": 0.5325, "step": 273 }, { "epoch": 0.13603078068760086, "grad_norm": 0.07448375330798707, "learning_rate": 9.975369657672514e-06, "loss": 0.5782, "step": 274 }, { "epoch": 0.13652724339084027, "grad_norm": 0.07525898100338235, "learning_rate": 9.975175497155149e-06, "loss": 0.5459, "step": 275 }, { "epoch": 0.13702370609407968, "grad_norm": 0.0757930579472259, "learning_rate": 9.97498057626198e-06, "loss": 0.5652, "step": 276 }, { "epoch": 0.1375201687973191, "grad_norm": 0.07625175178868698, "learning_rate": 9.974784895022796e-06, "loss": 0.5101, "step": 277 }, { "epoch": 0.1380166315005585, "grad_norm": 0.07588767928782164, "learning_rate": 9.974588453467506e-06, "loss": 0.5882, "step": 278 }, { "epoch": 0.13851309420379793, "grad_norm": 0.0733443443040492, "learning_rate": 9.974391251626132e-06, "loss": 0.5476, "step": 279 }, { "epoch": 0.13900955690703737, "grad_norm": 0.07328131307535096, "learning_rate": 9.974193289528814e-06, "loss": 0.5836, "step": 280 }, { "epoch": 0.13950601961027678, "grad_norm": 0.07874953449684817, "learning_rate": 9.973994567205806e-06, "loss": 0.6189, "step": 281 }, { "epoch": 0.1400024823135162, "grad_norm": 0.07891877733000807, "learning_rate": 9.973795084687481e-06, "loss": 0.5618, "step": 282 }, { "epoch": 0.1404989450167556, "grad_norm": 0.0785897983413089, "learning_rate": 9.973594842004327e-06, "loss": 0.584, "step": 283 }, { "epoch": 0.14099540771999503, "grad_norm": 0.07245957200919304, "learning_rate": 9.973393839186946e-06, "loss": 0.549, "step": 284 }, { "epoch": 0.14149187042323447, "grad_norm": 0.07728054437478935, "learning_rate": 9.973192076266058e-06, "loss": 0.5761, "step": 285 }, { "epoch": 0.14198833312647388, "grad_norm": 0.09235613660316606, "learning_rate": 9.972989553272501e-06, "loss": 0.6147, "step": 286 }, { "epoch": 0.1424847958297133, "grad_norm": 0.07380406310732682, "learning_rate": 9.972786270237228e-06, "loss": 0.6105, "step": 287 }, { "epoch": 0.1429812585329527, "grad_norm": 0.07278111762088124, "learning_rate": 9.972582227191305e-06, "loss": 0.5478, "step": 288 }, { "epoch": 0.14347772123619212, "grad_norm": 0.07524458713971298, "learning_rate": 9.972377424165918e-06, "loss": 0.5486, "step": 289 }, { "epoch": 0.14397418393943154, "grad_norm": 0.08081462769097447, "learning_rate": 9.972171861192368e-06, "loss": 0.5772, "step": 290 }, { "epoch": 0.14447064664267098, "grad_norm": 0.07598914546101065, "learning_rate": 9.97196553830207e-06, "loss": 0.5576, "step": 291 }, { "epoch": 0.1449671093459104, "grad_norm": 0.07361633688826458, "learning_rate": 9.971758455526562e-06, "loss": 0.6215, "step": 292 }, { "epoch": 0.1454635720491498, "grad_norm": 0.07371424684661225, "learning_rate": 9.971550612897487e-06, "loss": 0.5612, "step": 293 }, { "epoch": 0.14596003475238922, "grad_norm": 0.07537311603859617, "learning_rate": 9.971342010446615e-06, "loss": 0.5596, "step": 294 }, { "epoch": 0.14645649745562864, "grad_norm": 0.07370938816688641, "learning_rate": 9.971132648205826e-06, "loss": 0.5895, "step": 295 }, { "epoch": 0.14695296015886805, "grad_norm": 0.07938936326373351, "learning_rate": 9.97092252620712e-06, "loss": 0.5979, "step": 296 }, { "epoch": 0.1474494228621075, "grad_norm": 0.07655933756147211, "learning_rate": 9.970711644482605e-06, "loss": 0.571, "step": 297 }, { "epoch": 0.1479458855653469, "grad_norm": 0.06978512801359449, "learning_rate": 9.970500003064517e-06, "loss": 0.5677, "step": 298 }, { "epoch": 0.14844234826858632, "grad_norm": 0.07479095468689204, "learning_rate": 9.970287601985197e-06, "loss": 0.5436, "step": 299 }, { "epoch": 0.14893881097182574, "grad_norm": 0.07551809721412871, "learning_rate": 9.970074441277111e-06, "loss": 0.5156, "step": 300 }, { "epoch": 0.14943527367506515, "grad_norm": 0.0726621639022755, "learning_rate": 9.969860520972835e-06, "loss": 0.5155, "step": 301 }, { "epoch": 0.1499317363783046, "grad_norm": 0.07510700099410206, "learning_rate": 9.969645841105065e-06, "loss": 0.5828, "step": 302 }, { "epoch": 0.150428199081544, "grad_norm": 0.07620065582710349, "learning_rate": 9.96943040170661e-06, "loss": 0.5825, "step": 303 }, { "epoch": 0.15092466178478342, "grad_norm": 0.07454115542645492, "learning_rate": 9.969214202810397e-06, "loss": 0.566, "step": 304 }, { "epoch": 0.15142112448802283, "grad_norm": 0.07571175610952988, "learning_rate": 9.968997244449467e-06, "loss": 0.5883, "step": 305 }, { "epoch": 0.15191758719126225, "grad_norm": 0.07918443748502917, "learning_rate": 9.968779526656981e-06, "loss": 0.5837, "step": 306 }, { "epoch": 0.15241404989450166, "grad_norm": 0.08231002500440109, "learning_rate": 9.968561049466214e-06, "loss": 0.5906, "step": 307 }, { "epoch": 0.1529105125977411, "grad_norm": 0.08172637702489811, "learning_rate": 9.968341812910553e-06, "loss": 0.5776, "step": 308 }, { "epoch": 0.15340697530098052, "grad_norm": 0.07541026588900428, "learning_rate": 9.96812181702351e-06, "loss": 0.5984, "step": 309 }, { "epoch": 0.15390343800421993, "grad_norm": 0.07833737504960553, "learning_rate": 9.967901061838703e-06, "loss": 0.5902, "step": 310 }, { "epoch": 0.15439990070745935, "grad_norm": 0.08772300134995734, "learning_rate": 9.967679547389874e-06, "loss": 0.5492, "step": 311 }, { "epoch": 0.15489636341069876, "grad_norm": 0.0802801470852147, "learning_rate": 9.967457273710877e-06, "loss": 0.6016, "step": 312 }, { "epoch": 0.1553928261139382, "grad_norm": 0.07234712570516748, "learning_rate": 9.967234240835682e-06, "loss": 0.5536, "step": 313 }, { "epoch": 0.15588928881717762, "grad_norm": 0.07228246183847936, "learning_rate": 9.967010448798376e-06, "loss": 0.5698, "step": 314 }, { "epoch": 0.15638575152041703, "grad_norm": 0.0791197093555623, "learning_rate": 9.966785897633164e-06, "loss": 0.5761, "step": 315 }, { "epoch": 0.15688221422365645, "grad_norm": 0.0809617690187681, "learning_rate": 9.966560587374363e-06, "loss": 0.5784, "step": 316 }, { "epoch": 0.15737867692689586, "grad_norm": 0.07455628784150602, "learning_rate": 9.96633451805641e-06, "loss": 0.5549, "step": 317 }, { "epoch": 0.15787513963013527, "grad_norm": 0.07556973516566123, "learning_rate": 9.966107689713855e-06, "loss": 0.5946, "step": 318 }, { "epoch": 0.15837160233337472, "grad_norm": 0.07815659673820999, "learning_rate": 9.965880102381364e-06, "loss": 0.5446, "step": 319 }, { "epoch": 0.15886806503661413, "grad_norm": 0.07204342389468431, "learning_rate": 9.965651756093724e-06, "loss": 0.5454, "step": 320 }, { "epoch": 0.15936452773985355, "grad_norm": 0.07453809229436384, "learning_rate": 9.965422650885829e-06, "loss": 0.5483, "step": 321 }, { "epoch": 0.15986099044309296, "grad_norm": 0.07787163871393117, "learning_rate": 9.965192786792696e-06, "loss": 0.5361, "step": 322 }, { "epoch": 0.16035745314633237, "grad_norm": 0.07623670256160507, "learning_rate": 9.964962163849457e-06, "loss": 0.6004, "step": 323 }, { "epoch": 0.1608539158495718, "grad_norm": 0.073269081360789, "learning_rate": 9.964730782091358e-06, "loss": 0.5865, "step": 324 }, { "epoch": 0.16135037855281123, "grad_norm": 0.08034710038842101, "learning_rate": 9.964498641553764e-06, "loss": 0.6188, "step": 325 }, { "epoch": 0.16184684125605064, "grad_norm": 0.07878506103100462, "learning_rate": 9.96426574227215e-06, "loss": 0.5174, "step": 326 }, { "epoch": 0.16234330395929006, "grad_norm": 0.0811582536605504, "learning_rate": 9.964032084282115e-06, "loss": 0.5869, "step": 327 }, { "epoch": 0.16283976666252947, "grad_norm": 0.07615396770698715, "learning_rate": 9.963797667619368e-06, "loss": 0.5489, "step": 328 }, { "epoch": 0.1633362293657689, "grad_norm": 0.07375822341527581, "learning_rate": 9.963562492319733e-06, "loss": 0.5423, "step": 329 }, { "epoch": 0.16383269206900833, "grad_norm": 0.07342225085820822, "learning_rate": 9.96332655841916e-06, "loss": 0.5443, "step": 330 }, { "epoch": 0.16432915477224774, "grad_norm": 0.07522646471314104, "learning_rate": 9.963089865953701e-06, "loss": 0.5947, "step": 331 }, { "epoch": 0.16482561747548716, "grad_norm": 0.07093754634633222, "learning_rate": 9.962852414959534e-06, "loss": 0.5636, "step": 332 }, { "epoch": 0.16532208017872657, "grad_norm": 0.07554089167522272, "learning_rate": 9.962614205472948e-06, "loss": 0.5728, "step": 333 }, { "epoch": 0.16581854288196599, "grad_norm": 0.07642283336009119, "learning_rate": 9.96237523753035e-06, "loss": 0.5617, "step": 334 }, { "epoch": 0.1663150055852054, "grad_norm": 0.0754751987943475, "learning_rate": 9.962135511168263e-06, "loss": 0.5669, "step": 335 }, { "epoch": 0.16681146828844484, "grad_norm": 0.07530280584445144, "learning_rate": 9.961895026423325e-06, "loss": 0.5727, "step": 336 }, { "epoch": 0.16730793099168426, "grad_norm": 0.07544117416085358, "learning_rate": 9.96165378333229e-06, "loss": 0.5597, "step": 337 }, { "epoch": 0.16780439369492367, "grad_norm": 0.07451626557128686, "learning_rate": 9.961411781932029e-06, "loss": 0.5533, "step": 338 }, { "epoch": 0.16830085639816308, "grad_norm": 0.07645683165516677, "learning_rate": 9.961169022259527e-06, "loss": 0.5832, "step": 339 }, { "epoch": 0.1687973191014025, "grad_norm": 0.074515428017266, "learning_rate": 9.960925504351885e-06, "loss": 0.5517, "step": 340 }, { "epoch": 0.1692937818046419, "grad_norm": 0.07453622025099385, "learning_rate": 9.960681228246323e-06, "loss": 0.5404, "step": 341 }, { "epoch": 0.16979024450788135, "grad_norm": 0.07490735728929213, "learning_rate": 9.960436193980175e-06, "loss": 0.5322, "step": 342 }, { "epoch": 0.17028670721112077, "grad_norm": 0.07533458746856145, "learning_rate": 9.960190401590886e-06, "loss": 0.556, "step": 343 }, { "epoch": 0.17078316991436018, "grad_norm": 0.07518188762712508, "learning_rate": 9.959943851116027e-06, "loss": 0.5683, "step": 344 }, { "epoch": 0.1712796326175996, "grad_norm": 0.07031283405558475, "learning_rate": 9.959696542593278e-06, "loss": 0.5393, "step": 345 }, { "epoch": 0.171776095320839, "grad_norm": 0.0748003316183382, "learning_rate": 9.959448476060434e-06, "loss": 0.5515, "step": 346 }, { "epoch": 0.17227255802407845, "grad_norm": 0.07589049005675153, "learning_rate": 9.959199651555409e-06, "loss": 0.587, "step": 347 }, { "epoch": 0.17276902072731787, "grad_norm": 0.0807346094812496, "learning_rate": 9.95895006911623e-06, "loss": 0.6032, "step": 348 }, { "epoch": 0.17326548343055728, "grad_norm": 0.07472065836941111, "learning_rate": 9.958699728781046e-06, "loss": 0.5518, "step": 349 }, { "epoch": 0.1737619461337967, "grad_norm": 0.08115486432870875, "learning_rate": 9.958448630588115e-06, "loss": 0.62, "step": 350 }, { "epoch": 0.1742584088370361, "grad_norm": 0.07873831391601213, "learning_rate": 9.958196774575814e-06, "loss": 0.5792, "step": 351 }, { "epoch": 0.17475487154027552, "grad_norm": 0.08112665550357574, "learning_rate": 9.957944160782634e-06, "loss": 0.5438, "step": 352 }, { "epoch": 0.17525133424351497, "grad_norm": 0.07331800307555766, "learning_rate": 9.957690789247183e-06, "loss": 0.5474, "step": 353 }, { "epoch": 0.17574779694675438, "grad_norm": 0.07563212018228109, "learning_rate": 9.957436660008187e-06, "loss": 0.5735, "step": 354 }, { "epoch": 0.1762442596499938, "grad_norm": 0.07469151724699816, "learning_rate": 9.957181773104482e-06, "loss": 0.5557, "step": 355 }, { "epoch": 0.1767407223532332, "grad_norm": 0.07800059494765482, "learning_rate": 9.956926128575026e-06, "loss": 0.5756, "step": 356 }, { "epoch": 0.17723718505647262, "grad_norm": 0.07027184828449723, "learning_rate": 9.95666972645889e-06, "loss": 0.5288, "step": 357 }, { "epoch": 0.17773364775971204, "grad_norm": 0.07683077691940579, "learning_rate": 9.95641256679526e-06, "loss": 0.5958, "step": 358 }, { "epoch": 0.17823011046295148, "grad_norm": 0.07576235890205875, "learning_rate": 9.95615464962344e-06, "loss": 0.5925, "step": 359 }, { "epoch": 0.1787265731661909, "grad_norm": 0.07861161538869584, "learning_rate": 9.955895974982848e-06, "loss": 0.5677, "step": 360 }, { "epoch": 0.1792230358694303, "grad_norm": 0.07894396117012514, "learning_rate": 9.955636542913017e-06, "loss": 0.606, "step": 361 }, { "epoch": 0.17971949857266972, "grad_norm": 0.08030622968705446, "learning_rate": 9.955376353453599e-06, "loss": 0.5616, "step": 362 }, { "epoch": 0.18021596127590914, "grad_norm": 0.07372907998809378, "learning_rate": 9.955115406644357e-06, "loss": 0.5405, "step": 363 }, { "epoch": 0.18071242397914858, "grad_norm": 0.07619467670383653, "learning_rate": 9.954853702525176e-06, "loss": 0.5579, "step": 364 }, { "epoch": 0.181208886682388, "grad_norm": 0.07535913099715885, "learning_rate": 9.95459124113605e-06, "loss": 0.5446, "step": 365 }, { "epoch": 0.1817053493856274, "grad_norm": 0.07285682873619495, "learning_rate": 9.954328022517094e-06, "loss": 0.567, "step": 366 }, { "epoch": 0.18220181208886682, "grad_norm": 0.0794068286097486, "learning_rate": 9.954064046708537e-06, "loss": 0.5818, "step": 367 }, { "epoch": 0.18269827479210624, "grad_norm": 0.07850596036013317, "learning_rate": 9.953799313750723e-06, "loss": 0.5736, "step": 368 }, { "epoch": 0.18319473749534565, "grad_norm": 0.07724909085841658, "learning_rate": 9.953533823684112e-06, "loss": 0.5274, "step": 369 }, { "epoch": 0.1836912001985851, "grad_norm": 0.08096743311474128, "learning_rate": 9.953267576549279e-06, "loss": 0.5494, "step": 370 }, { "epoch": 0.1841876629018245, "grad_norm": 0.07398016211295078, "learning_rate": 9.953000572386916e-06, "loss": 0.6018, "step": 371 }, { "epoch": 0.18468412560506392, "grad_norm": 0.07594761131816251, "learning_rate": 9.952732811237833e-06, "loss": 0.5691, "step": 372 }, { "epoch": 0.18518058830830333, "grad_norm": 0.07473651094317643, "learning_rate": 9.952464293142951e-06, "loss": 0.5498, "step": 373 }, { "epoch": 0.18567705101154275, "grad_norm": 0.07447128483703518, "learning_rate": 9.952195018143308e-06, "loss": 0.5824, "step": 374 }, { "epoch": 0.1861735137147822, "grad_norm": 0.07317042373061025, "learning_rate": 9.951924986280057e-06, "loss": 0.5782, "step": 375 }, { "epoch": 0.1866699764180216, "grad_norm": 0.07459200175934973, "learning_rate": 9.951654197594471e-06, "loss": 0.5294, "step": 376 }, { "epoch": 0.18716643912126102, "grad_norm": 0.07699746371876615, "learning_rate": 9.951382652127935e-06, "loss": 0.5476, "step": 377 }, { "epoch": 0.18766290182450043, "grad_norm": 0.07448223810955458, "learning_rate": 9.951110349921951e-06, "loss": 0.543, "step": 378 }, { "epoch": 0.18815936452773985, "grad_norm": 0.07973769043959483, "learning_rate": 9.950837291018133e-06, "loss": 0.5619, "step": 379 }, { "epoch": 0.18865582723097926, "grad_norm": 0.07310856359312369, "learning_rate": 9.950563475458218e-06, "loss": 0.5514, "step": 380 }, { "epoch": 0.1891522899342187, "grad_norm": 0.07375459505878652, "learning_rate": 9.950288903284049e-06, "loss": 0.5505, "step": 381 }, { "epoch": 0.18964875263745812, "grad_norm": 0.0728782672537732, "learning_rate": 9.950013574537595e-06, "loss": 0.5459, "step": 382 }, { "epoch": 0.19014521534069753, "grad_norm": 0.07740246677875098, "learning_rate": 9.949737489260933e-06, "loss": 0.5548, "step": 383 }, { "epoch": 0.19064167804393695, "grad_norm": 0.07391007909762363, "learning_rate": 9.949460647496258e-06, "loss": 0.5493, "step": 384 }, { "epoch": 0.19113814074717636, "grad_norm": 0.07598698030318797, "learning_rate": 9.949183049285884e-06, "loss": 0.5422, "step": 385 }, { "epoch": 0.19163460345041577, "grad_norm": 0.07479001021475261, "learning_rate": 9.948904694672232e-06, "loss": 0.5607, "step": 386 }, { "epoch": 0.19213106615365522, "grad_norm": 0.07615526983796699, "learning_rate": 9.94862558369785e-06, "loss": 0.5476, "step": 387 }, { "epoch": 0.19262752885689463, "grad_norm": 0.08074869204916085, "learning_rate": 9.94834571640539e-06, "loss": 0.5802, "step": 388 }, { "epoch": 0.19312399156013405, "grad_norm": 0.07415445663241096, "learning_rate": 9.948065092837631e-06, "loss": 0.5499, "step": 389 }, { "epoch": 0.19362045426337346, "grad_norm": 0.08328624631064929, "learning_rate": 9.947783713037456e-06, "loss": 0.5712, "step": 390 }, { "epoch": 0.19411691696661287, "grad_norm": 0.07891866140239774, "learning_rate": 9.947501577047874e-06, "loss": 0.5556, "step": 391 }, { "epoch": 0.19461337966985232, "grad_norm": 0.07476368567789123, "learning_rate": 9.947218684912001e-06, "loss": 0.5371, "step": 392 }, { "epoch": 0.19510984237309173, "grad_norm": 0.07675097706660683, "learning_rate": 9.946935036673076e-06, "loss": 0.5569, "step": 393 }, { "epoch": 0.19560630507633114, "grad_norm": 0.07496823454348485, "learning_rate": 9.94665063237445e-06, "loss": 0.5464, "step": 394 }, { "epoch": 0.19610276777957056, "grad_norm": 0.07818052652954, "learning_rate": 9.946365472059586e-06, "loss": 0.5942, "step": 395 }, { "epoch": 0.19659923048280997, "grad_norm": 0.07282933318423163, "learning_rate": 9.94607955577207e-06, "loss": 0.5462, "step": 396 }, { "epoch": 0.1970956931860494, "grad_norm": 0.0770165134901903, "learning_rate": 9.945792883555597e-06, "loss": 0.556, "step": 397 }, { "epoch": 0.19759215588928883, "grad_norm": 0.07225926792740466, "learning_rate": 9.945505455453983e-06, "loss": 0.5248, "step": 398 }, { "epoch": 0.19808861859252824, "grad_norm": 0.07352401208203305, "learning_rate": 9.945217271511154e-06, "loss": 0.5481, "step": 399 }, { "epoch": 0.19858508129576766, "grad_norm": 0.07321353135194356, "learning_rate": 9.944928331771157e-06, "loss": 0.5812, "step": 400 }, { "epoch": 0.19908154399900707, "grad_norm": 0.07936442126421304, "learning_rate": 9.944638636278148e-06, "loss": 0.5859, "step": 401 }, { "epoch": 0.19957800670224649, "grad_norm": 0.08462233434104645, "learning_rate": 9.944348185076406e-06, "loss": 0.561, "step": 402 }, { "epoch": 0.2000744694054859, "grad_norm": 0.07418308136608491, "learning_rate": 9.94405697821032e-06, "loss": 0.539, "step": 403 }, { "epoch": 0.20057093210872534, "grad_norm": 0.0734957540044604, "learning_rate": 9.9437650157244e-06, "loss": 0.5333, "step": 404 }, { "epoch": 0.20106739481196476, "grad_norm": 0.07575478601259587, "learning_rate": 9.943472297663262e-06, "loss": 0.5794, "step": 405 }, { "epoch": 0.20156385751520417, "grad_norm": 0.07710822943994676, "learning_rate": 9.943178824071646e-06, "loss": 0.5631, "step": 406 }, { "epoch": 0.20206032021844358, "grad_norm": 0.07942163293112221, "learning_rate": 9.942884594994405e-06, "loss": 0.5554, "step": 407 }, { "epoch": 0.202556782921683, "grad_norm": 0.07804741682603977, "learning_rate": 9.942589610476505e-06, "loss": 0.5989, "step": 408 }, { "epoch": 0.20305324562492244, "grad_norm": 0.07451188573309668, "learning_rate": 9.942293870563033e-06, "loss": 0.5556, "step": 409 }, { "epoch": 0.20354970832816185, "grad_norm": 0.07990449182548652, "learning_rate": 9.941997375299187e-06, "loss": 0.5745, "step": 410 }, { "epoch": 0.20404617103140127, "grad_norm": 0.07288130572381771, "learning_rate": 9.94170012473028e-06, "loss": 0.5495, "step": 411 }, { "epoch": 0.20454263373464068, "grad_norm": 0.08238928905167173, "learning_rate": 9.941402118901743e-06, "loss": 0.5754, "step": 412 }, { "epoch": 0.2050390964378801, "grad_norm": 0.07171358910901696, "learning_rate": 9.941103357859123e-06, "loss": 0.5121, "step": 413 }, { "epoch": 0.2055355591411195, "grad_norm": 0.07325000035957444, "learning_rate": 9.940803841648077e-06, "loss": 0.5408, "step": 414 }, { "epoch": 0.20603202184435895, "grad_norm": 0.07568269297866982, "learning_rate": 9.940503570314386e-06, "loss": 0.5639, "step": 415 }, { "epoch": 0.20652848454759837, "grad_norm": 0.07535697622710859, "learning_rate": 9.940202543903939e-06, "loss": 0.5552, "step": 416 }, { "epoch": 0.20702494725083778, "grad_norm": 0.07271044147735484, "learning_rate": 9.939900762462741e-06, "loss": 0.5388, "step": 417 }, { "epoch": 0.2075214099540772, "grad_norm": 0.07604343150784937, "learning_rate": 9.939598226036919e-06, "loss": 0.5462, "step": 418 }, { "epoch": 0.2080178726573166, "grad_norm": 0.07562199878785764, "learning_rate": 9.939294934672707e-06, "loss": 0.5484, "step": 419 }, { "epoch": 0.20851433536055602, "grad_norm": 0.07652378289509235, "learning_rate": 9.93899088841646e-06, "loss": 0.5564, "step": 420 }, { "epoch": 0.20901079806379547, "grad_norm": 0.07287884883749866, "learning_rate": 9.938686087314647e-06, "loss": 0.5504, "step": 421 }, { "epoch": 0.20950726076703488, "grad_norm": 0.07792523226972259, "learning_rate": 9.938380531413851e-06, "loss": 0.587, "step": 422 }, { "epoch": 0.2100037234702743, "grad_norm": 0.0762025358754494, "learning_rate": 9.938074220760774e-06, "loss": 0.558, "step": 423 }, { "epoch": 0.2105001861735137, "grad_norm": 0.07611991158494283, "learning_rate": 9.937767155402224e-06, "loss": 0.5858, "step": 424 }, { "epoch": 0.21099664887675312, "grad_norm": 0.07515544902399915, "learning_rate": 9.937459335385137e-06, "loss": 0.5216, "step": 425 }, { "epoch": 0.21149311157999257, "grad_norm": 0.07843087477525403, "learning_rate": 9.93715076075656e-06, "loss": 0.5596, "step": 426 }, { "epoch": 0.21198957428323198, "grad_norm": 0.07327576359699134, "learning_rate": 9.936841431563646e-06, "loss": 0.5336, "step": 427 }, { "epoch": 0.2124860369864714, "grad_norm": 0.07622638457443663, "learning_rate": 9.936531347853677e-06, "loss": 0.5381, "step": 428 }, { "epoch": 0.2129824996897108, "grad_norm": 0.07207576415296685, "learning_rate": 9.936220509674044e-06, "loss": 0.5643, "step": 429 }, { "epoch": 0.21347896239295022, "grad_norm": 0.0754727197146664, "learning_rate": 9.935908917072253e-06, "loss": 0.5492, "step": 430 }, { "epoch": 0.21397542509618964, "grad_norm": 0.07524418858314785, "learning_rate": 9.935596570095923e-06, "loss": 0.5614, "step": 431 }, { "epoch": 0.21447188779942908, "grad_norm": 0.07703493076500291, "learning_rate": 9.935283468792793e-06, "loss": 0.5979, "step": 432 }, { "epoch": 0.2149683505026685, "grad_norm": 0.07870804145032773, "learning_rate": 9.934969613210718e-06, "loss": 0.5497, "step": 433 }, { "epoch": 0.2154648132059079, "grad_norm": 0.07806957664461764, "learning_rate": 9.934655003397663e-06, "loss": 0.5389, "step": 434 }, { "epoch": 0.21596127590914732, "grad_norm": 0.08327261700919904, "learning_rate": 9.934339639401712e-06, "loss": 0.5602, "step": 435 }, { "epoch": 0.21645773861238674, "grad_norm": 0.07699308770199148, "learning_rate": 9.934023521271063e-06, "loss": 0.5289, "step": 436 }, { "epoch": 0.21695420131562618, "grad_norm": 0.0835512172071361, "learning_rate": 9.93370664905403e-06, "loss": 0.5343, "step": 437 }, { "epoch": 0.2174506640188656, "grad_norm": 0.08008747971552731, "learning_rate": 9.933389022799042e-06, "loss": 0.5932, "step": 438 }, { "epoch": 0.217947126722105, "grad_norm": 0.07758508618190278, "learning_rate": 9.933070642554643e-06, "loss": 0.5512, "step": 439 }, { "epoch": 0.21844358942534442, "grad_norm": 0.07692264693388792, "learning_rate": 9.932751508369492e-06, "loss": 0.577, "step": 440 }, { "epoch": 0.21894005212858383, "grad_norm": 0.07702699031893523, "learning_rate": 9.932431620292363e-06, "loss": 0.5902, "step": 441 }, { "epoch": 0.21943651483182325, "grad_norm": 0.072153289238452, "learning_rate": 9.932110978372145e-06, "loss": 0.5765, "step": 442 }, { "epoch": 0.2199329775350627, "grad_norm": 0.07279626044785806, "learning_rate": 9.931789582657847e-06, "loss": 0.5358, "step": 443 }, { "epoch": 0.2204294402383021, "grad_norm": 0.07892917204214381, "learning_rate": 9.931467433198585e-06, "loss": 0.5756, "step": 444 }, { "epoch": 0.22092590294154152, "grad_norm": 0.07994939514502163, "learning_rate": 9.931144530043597e-06, "loss": 0.5978, "step": 445 }, { "epoch": 0.22142236564478093, "grad_norm": 0.0718213225002427, "learning_rate": 9.93082087324223e-06, "loss": 0.5351, "step": 446 }, { "epoch": 0.22191882834802035, "grad_norm": 0.07658771671069661, "learning_rate": 9.930496462843954e-06, "loss": 0.5683, "step": 447 }, { "epoch": 0.22241529105125976, "grad_norm": 0.07738177647845323, "learning_rate": 9.93017129889835e-06, "loss": 0.5711, "step": 448 }, { "epoch": 0.2229117537544992, "grad_norm": 0.07708589306273327, "learning_rate": 9.92984538145511e-06, "loss": 0.5684, "step": 449 }, { "epoch": 0.22340821645773862, "grad_norm": 0.07332682077027314, "learning_rate": 9.929518710564048e-06, "loss": 0.5464, "step": 450 }, { "epoch": 0.22390467916097803, "grad_norm": 0.0765110806185179, "learning_rate": 9.929191286275088e-06, "loss": 0.5874, "step": 451 }, { "epoch": 0.22440114186421745, "grad_norm": 0.07381967262364783, "learning_rate": 9.928863108638275e-06, "loss": 0.545, "step": 452 }, { "epoch": 0.22489760456745686, "grad_norm": 0.11056903774769831, "learning_rate": 9.928534177703766e-06, "loss": 0.5859, "step": 453 }, { "epoch": 0.2253940672706963, "grad_norm": 0.07627989286727224, "learning_rate": 9.92820449352183e-06, "loss": 0.5719, "step": 454 }, { "epoch": 0.22589052997393572, "grad_norm": 0.07573328286843277, "learning_rate": 9.927874056142854e-06, "loss": 0.5622, "step": 455 }, { "epoch": 0.22638699267717513, "grad_norm": 0.07510309770011216, "learning_rate": 9.92754286561734e-06, "loss": 0.5315, "step": 456 }, { "epoch": 0.22688345538041454, "grad_norm": 0.07782909898671676, "learning_rate": 9.927210921995908e-06, "loss": 0.5428, "step": 457 }, { "epoch": 0.22737991808365396, "grad_norm": 0.07580841682733405, "learning_rate": 9.926878225329288e-06, "loss": 0.5477, "step": 458 }, { "epoch": 0.22787638078689337, "grad_norm": 0.0771663444644485, "learning_rate": 9.926544775668327e-06, "loss": 0.5514, "step": 459 }, { "epoch": 0.22837284349013282, "grad_norm": 0.07642125797083227, "learning_rate": 9.926210573063988e-06, "loss": 0.5357, "step": 460 }, { "epoch": 0.22886930619337223, "grad_norm": 0.07302968349139202, "learning_rate": 9.92587561756735e-06, "loss": 0.5141, "step": 461 }, { "epoch": 0.22936576889661164, "grad_norm": 0.07237104897439163, "learning_rate": 9.925539909229604e-06, "loss": 0.566, "step": 462 }, { "epoch": 0.22986223159985106, "grad_norm": 0.07717542777821745, "learning_rate": 9.925203448102058e-06, "loss": 0.5367, "step": 463 }, { "epoch": 0.23035869430309047, "grad_norm": 0.07671951294820442, "learning_rate": 9.924866234236134e-06, "loss": 0.5584, "step": 464 }, { "epoch": 0.2308551570063299, "grad_norm": 0.07746481421960554, "learning_rate": 9.924528267683372e-06, "loss": 0.5457, "step": 465 }, { "epoch": 0.23135161970956933, "grad_norm": 0.0710642404833701, "learning_rate": 9.92418954849542e-06, "loss": 0.5095, "step": 466 }, { "epoch": 0.23184808241280874, "grad_norm": 0.07191806610081845, "learning_rate": 9.923850076724051e-06, "loss": 0.5267, "step": 467 }, { "epoch": 0.23234454511604816, "grad_norm": 0.07714488111055401, "learning_rate": 9.923509852421144e-06, "loss": 0.5202, "step": 468 }, { "epoch": 0.23284100781928757, "grad_norm": 0.07325260373990966, "learning_rate": 9.923168875638701e-06, "loss": 0.5433, "step": 469 }, { "epoch": 0.23333747052252699, "grad_norm": 0.07665478368369943, "learning_rate": 9.92282714642883e-06, "loss": 0.5683, "step": 470 }, { "epoch": 0.23383393322576643, "grad_norm": 0.07516189982030862, "learning_rate": 9.922484664843763e-06, "loss": 0.5862, "step": 471 }, { "epoch": 0.23433039592900584, "grad_norm": 0.07760388755225243, "learning_rate": 9.922141430935842e-06, "loss": 0.5648, "step": 472 }, { "epoch": 0.23482685863224526, "grad_norm": 0.07609970967938699, "learning_rate": 9.921797444757521e-06, "loss": 0.5241, "step": 473 }, { "epoch": 0.23532332133548467, "grad_norm": 0.07592265968380732, "learning_rate": 9.921452706361376e-06, "loss": 0.5935, "step": 474 }, { "epoch": 0.23581978403872408, "grad_norm": 0.07410650337647612, "learning_rate": 9.921107215800095e-06, "loss": 0.543, "step": 475 }, { "epoch": 0.2363162467419635, "grad_norm": 0.07506294240734249, "learning_rate": 9.92076097312648e-06, "loss": 0.5379, "step": 476 }, { "epoch": 0.23681270944520294, "grad_norm": 0.07440602775514321, "learning_rate": 9.920413978393449e-06, "loss": 0.5319, "step": 477 }, { "epoch": 0.23730917214844235, "grad_norm": 0.07528653646912088, "learning_rate": 9.920066231654035e-06, "loss": 0.5447, "step": 478 }, { "epoch": 0.23780563485168177, "grad_norm": 0.07376070732880129, "learning_rate": 9.919717732961383e-06, "loss": 0.536, "step": 479 }, { "epoch": 0.23830209755492118, "grad_norm": 0.0742063997969741, "learning_rate": 9.919368482368758e-06, "loss": 0.5376, "step": 480 }, { "epoch": 0.2387985602581606, "grad_norm": 0.07499906235302634, "learning_rate": 9.919018479929536e-06, "loss": 0.5609, "step": 481 }, { "epoch": 0.2392950229614, "grad_norm": 0.07489207589373727, "learning_rate": 9.91866772569721e-06, "loss": 0.5582, "step": 482 }, { "epoch": 0.23979148566463945, "grad_norm": 0.07892405072079473, "learning_rate": 9.918316219725388e-06, "loss": 0.554, "step": 483 }, { "epoch": 0.24028794836787887, "grad_norm": 0.07499498235806762, "learning_rate": 9.91796396206779e-06, "loss": 0.5505, "step": 484 }, { "epoch": 0.24078441107111828, "grad_norm": 0.08189343597090869, "learning_rate": 9.917610952778253e-06, "loss": 0.569, "step": 485 }, { "epoch": 0.2412808737743577, "grad_norm": 0.07400601949803445, "learning_rate": 9.917257191910732e-06, "loss": 0.5384, "step": 486 }, { "epoch": 0.2417773364775971, "grad_norm": 0.07901731252692805, "learning_rate": 9.91690267951929e-06, "loss": 0.5531, "step": 487 }, { "epoch": 0.24227379918083655, "grad_norm": 0.07870573377666493, "learning_rate": 9.916547415658111e-06, "loss": 0.5674, "step": 488 }, { "epoch": 0.24277026188407597, "grad_norm": 0.07911850022053922, "learning_rate": 9.91619140038149e-06, "loss": 0.5721, "step": 489 }, { "epoch": 0.24326672458731538, "grad_norm": 0.0761702583766454, "learning_rate": 9.915834633743838e-06, "loss": 0.5678, "step": 490 }, { "epoch": 0.2437631872905548, "grad_norm": 0.07665990511246726, "learning_rate": 9.915477115799682e-06, "loss": 0.5718, "step": 491 }, { "epoch": 0.2442596499937942, "grad_norm": 0.07872700425419384, "learning_rate": 9.915118846603661e-06, "loss": 0.5889, "step": 492 }, { "epoch": 0.24475611269703362, "grad_norm": 0.07395617425590947, "learning_rate": 9.914759826210534e-06, "loss": 0.5295, "step": 493 }, { "epoch": 0.24525257540027307, "grad_norm": 0.07483689041527114, "learning_rate": 9.91440005467517e-06, "loss": 0.5562, "step": 494 }, { "epoch": 0.24574903810351248, "grad_norm": 0.07604027911059567, "learning_rate": 9.914039532052553e-06, "loss": 0.5269, "step": 495 }, { "epoch": 0.2462455008067519, "grad_norm": 0.08297691072826975, "learning_rate": 9.913678258397785e-06, "loss": 0.559, "step": 496 }, { "epoch": 0.2467419635099913, "grad_norm": 0.08073844389509381, "learning_rate": 9.91331623376608e-06, "loss": 0.5625, "step": 497 }, { "epoch": 0.24723842621323072, "grad_norm": 0.07564059049053361, "learning_rate": 9.912953458212769e-06, "loss": 0.5461, "step": 498 }, { "epoch": 0.24773488891647016, "grad_norm": 0.07604428278991741, "learning_rate": 9.912589931793294e-06, "loss": 0.5307, "step": 499 }, { "epoch": 0.24823135161970958, "grad_norm": 0.08041158897596114, "learning_rate": 9.912225654563214e-06, "loss": 0.5683, "step": 500 }, { "epoch": 0.248727814322949, "grad_norm": 0.07384877613143606, "learning_rate": 9.911860626578204e-06, "loss": 0.5493, "step": 501 }, { "epoch": 0.2492242770261884, "grad_norm": 0.07510188927857986, "learning_rate": 9.911494847894055e-06, "loss": 0.5718, "step": 502 }, { "epoch": 0.24972073972942782, "grad_norm": 0.07700133387805451, "learning_rate": 9.911128318566668e-06, "loss": 0.5205, "step": 503 }, { "epoch": 0.25021720243266726, "grad_norm": 0.08281754395286774, "learning_rate": 9.91076103865206e-06, "loss": 0.5612, "step": 504 }, { "epoch": 0.25021720243266726, "eval_loss": 0.5554779171943665, "eval_runtime": 259.1788, "eval_samples_per_second": 117.112, "eval_steps_per_second": 14.642, "step": 504 }, { "epoch": 0.25071366513590665, "grad_norm": 0.07858966858013353, "learning_rate": 9.910393008206367e-06, "loss": 0.5472, "step": 505 }, { "epoch": 0.2512101278391461, "grad_norm": 0.07292630081923775, "learning_rate": 9.910024227285832e-06, "loss": 0.5426, "step": 506 }, { "epoch": 0.2517065905423855, "grad_norm": 0.08028416640572211, "learning_rate": 9.909654695946823e-06, "loss": 0.607, "step": 507 }, { "epoch": 0.2522030532456249, "grad_norm": 0.0817807763355325, "learning_rate": 9.909284414245815e-06, "loss": 0.598, "step": 508 }, { "epoch": 0.25269951594886436, "grad_norm": 0.08037659726296809, "learning_rate": 9.908913382239396e-06, "loss": 0.5399, "step": 509 }, { "epoch": 0.25319597865210375, "grad_norm": 0.07398564863526783, "learning_rate": 9.908541599984276e-06, "loss": 0.5296, "step": 510 }, { "epoch": 0.2536924413553432, "grad_norm": 0.07735462650188997, "learning_rate": 9.908169067537274e-06, "loss": 0.5356, "step": 511 }, { "epoch": 0.2541889040585826, "grad_norm": 0.08361063862669793, "learning_rate": 9.907795784955327e-06, "loss": 0.5265, "step": 512 }, { "epoch": 0.254685366761822, "grad_norm": 0.07798942247321251, "learning_rate": 9.907421752295485e-06, "loss": 0.5287, "step": 513 }, { "epoch": 0.25518182946506146, "grad_norm": 0.07672209798270986, "learning_rate": 9.907046969614913e-06, "loss": 0.5767, "step": 514 }, { "epoch": 0.25567829216830085, "grad_norm": 0.07957972986175162, "learning_rate": 9.906671436970891e-06, "loss": 0.5744, "step": 515 }, { "epoch": 0.2561747548715403, "grad_norm": 0.0846841236085876, "learning_rate": 9.906295154420811e-06, "loss": 0.6052, "step": 516 }, { "epoch": 0.2566712175747797, "grad_norm": 0.07794538960974101, "learning_rate": 9.905918122022183e-06, "loss": 0.5237, "step": 517 }, { "epoch": 0.2571676802780191, "grad_norm": 0.07389212609092594, "learning_rate": 9.905540339832632e-06, "loss": 0.5577, "step": 518 }, { "epoch": 0.25766414298125856, "grad_norm": 0.0747315216036527, "learning_rate": 9.905161807909893e-06, "loss": 0.5305, "step": 519 }, { "epoch": 0.25816060568449795, "grad_norm": 0.0738348078705221, "learning_rate": 9.90478252631182e-06, "loss": 0.5486, "step": 520 }, { "epoch": 0.2586570683877374, "grad_norm": 0.07980710964169403, "learning_rate": 9.90440249509638e-06, "loss": 0.5498, "step": 521 }, { "epoch": 0.2591535310909768, "grad_norm": 0.07444828843776845, "learning_rate": 9.904021714321656e-06, "loss": 0.5437, "step": 522 }, { "epoch": 0.2596499937942162, "grad_norm": 0.07359542898272906, "learning_rate": 9.903640184045842e-06, "loss": 0.5591, "step": 523 }, { "epoch": 0.2601464564974556, "grad_norm": 0.07496639130963002, "learning_rate": 9.90325790432725e-06, "loss": 0.5553, "step": 524 }, { "epoch": 0.26064291920069504, "grad_norm": 0.07983536927042992, "learning_rate": 9.902874875224305e-06, "loss": 0.575, "step": 525 }, { "epoch": 0.2611393819039345, "grad_norm": 0.08616908954969195, "learning_rate": 9.902491096795546e-06, "loss": 0.5634, "step": 526 }, { "epoch": 0.2616358446071739, "grad_norm": 0.07576389919139324, "learning_rate": 9.90210656909963e-06, "loss": 0.5424, "step": 527 }, { "epoch": 0.2621323073104133, "grad_norm": 0.07910649495763554, "learning_rate": 9.901721292195323e-06, "loss": 0.5469, "step": 528 }, { "epoch": 0.2626287700136527, "grad_norm": 0.08224845731683662, "learning_rate": 9.90133526614151e-06, "loss": 0.5707, "step": 529 }, { "epoch": 0.26312523271689214, "grad_norm": 0.07698783008012512, "learning_rate": 9.900948490997188e-06, "loss": 0.51, "step": 530 }, { "epoch": 0.2636216954201316, "grad_norm": 0.07524972064564883, "learning_rate": 9.90056096682147e-06, "loss": 0.5452, "step": 531 }, { "epoch": 0.26411815812337097, "grad_norm": 0.08116567664979311, "learning_rate": 9.900172693673584e-06, "loss": 0.5794, "step": 532 }, { "epoch": 0.2646146208266104, "grad_norm": 0.07193670033849783, "learning_rate": 9.899783671612868e-06, "loss": 0.5361, "step": 533 }, { "epoch": 0.2651110835298498, "grad_norm": 0.08234755603848125, "learning_rate": 9.899393900698781e-06, "loss": 0.5482, "step": 534 }, { "epoch": 0.26560754623308924, "grad_norm": 0.07336446642087938, "learning_rate": 9.899003380990893e-06, "loss": 0.5652, "step": 535 }, { "epoch": 0.2661040089363287, "grad_norm": 0.07633905125421954, "learning_rate": 9.898612112548886e-06, "loss": 0.5608, "step": 536 }, { "epoch": 0.26660047163956807, "grad_norm": 0.07758130640934637, "learning_rate": 9.898220095432562e-06, "loss": 0.567, "step": 537 }, { "epoch": 0.2670969343428075, "grad_norm": 0.07536999521216046, "learning_rate": 9.897827329701834e-06, "loss": 0.5478, "step": 538 }, { "epoch": 0.2675933970460469, "grad_norm": 0.07343119222091014, "learning_rate": 9.897433815416729e-06, "loss": 0.5643, "step": 539 }, { "epoch": 0.26808985974928634, "grad_norm": 0.07673148700528902, "learning_rate": 9.897039552637389e-06, "loss": 0.5488, "step": 540 }, { "epoch": 0.2685863224525257, "grad_norm": 0.07898713801424365, "learning_rate": 9.896644541424071e-06, "loss": 0.5334, "step": 541 }, { "epoch": 0.26908278515576517, "grad_norm": 0.07708722025335718, "learning_rate": 9.896248781837148e-06, "loss": 0.6273, "step": 542 }, { "epoch": 0.2695792478590046, "grad_norm": 0.07444543294418082, "learning_rate": 9.895852273937103e-06, "loss": 0.5379, "step": 543 }, { "epoch": 0.270075710562244, "grad_norm": 0.07292472383292499, "learning_rate": 9.895455017784536e-06, "loss": 0.5395, "step": 544 }, { "epoch": 0.27057217326548344, "grad_norm": 0.0765918576393523, "learning_rate": 9.895057013440163e-06, "loss": 0.5501, "step": 545 }, { "epoch": 0.2710686359687228, "grad_norm": 0.07405716714879264, "learning_rate": 9.894658260964814e-06, "loss": 0.512, "step": 546 }, { "epoch": 0.27156509867196227, "grad_norm": 0.07477208653408075, "learning_rate": 9.894258760419427e-06, "loss": 0.5439, "step": 547 }, { "epoch": 0.2720615613752017, "grad_norm": 0.11975148352494779, "learning_rate": 9.893858511865063e-06, "loss": 0.5547, "step": 548 }, { "epoch": 0.2725580240784411, "grad_norm": 0.07579011485998241, "learning_rate": 9.893457515362892e-06, "loss": 0.568, "step": 549 }, { "epoch": 0.27305448678168054, "grad_norm": 0.07553870503848775, "learning_rate": 9.893055770974202e-06, "loss": 0.514, "step": 550 }, { "epoch": 0.2735509494849199, "grad_norm": 0.07576057451860235, "learning_rate": 9.892653278760389e-06, "loss": 0.5584, "step": 551 }, { "epoch": 0.27404741218815937, "grad_norm": 0.07550325207874099, "learning_rate": 9.892250038782972e-06, "loss": 0.5471, "step": 552 }, { "epoch": 0.2745438748913988, "grad_norm": 0.07554409900790758, "learning_rate": 9.891846051103578e-06, "loss": 0.547, "step": 553 }, { "epoch": 0.2750403375946382, "grad_norm": 0.07453073205900222, "learning_rate": 9.89144131578395e-06, "loss": 0.5599, "step": 554 }, { "epoch": 0.27553680029787764, "grad_norm": 0.07678267422238774, "learning_rate": 9.891035832885942e-06, "loss": 0.5784, "step": 555 }, { "epoch": 0.276033263001117, "grad_norm": 0.07675969130014183, "learning_rate": 9.890629602471532e-06, "loss": 0.5526, "step": 556 }, { "epoch": 0.27652972570435647, "grad_norm": 0.07974318262948232, "learning_rate": 9.8902226246028e-06, "loss": 0.576, "step": 557 }, { "epoch": 0.27702618840759585, "grad_norm": 0.08261341445162391, "learning_rate": 9.889814899341951e-06, "loss": 0.5915, "step": 558 }, { "epoch": 0.2775226511108353, "grad_norm": 0.08009330000358027, "learning_rate": 9.889406426751296e-06, "loss": 0.5485, "step": 559 }, { "epoch": 0.27801911381407474, "grad_norm": 0.07270863892033039, "learning_rate": 9.888997206893266e-06, "loss": 0.5429, "step": 560 }, { "epoch": 0.2785155765173141, "grad_norm": 0.07411980046220777, "learning_rate": 9.8885872398304e-06, "loss": 0.5412, "step": 561 }, { "epoch": 0.27901203922055356, "grad_norm": 0.07859149685871875, "learning_rate": 9.888176525625358e-06, "loss": 0.5605, "step": 562 }, { "epoch": 0.27950850192379295, "grad_norm": 0.0750640057546601, "learning_rate": 9.887765064340909e-06, "loss": 0.5223, "step": 563 }, { "epoch": 0.2800049646270324, "grad_norm": 0.07240010518351624, "learning_rate": 9.88735285603994e-06, "loss": 0.5534, "step": 564 }, { "epoch": 0.28050142733027184, "grad_norm": 0.0749384510617718, "learning_rate": 9.886939900785448e-06, "loss": 0.5472, "step": 565 }, { "epoch": 0.2809978900335112, "grad_norm": 0.07800629633952484, "learning_rate": 9.88652619864055e-06, "loss": 0.5542, "step": 566 }, { "epoch": 0.28149435273675066, "grad_norm": 0.07478324010808358, "learning_rate": 9.886111749668472e-06, "loss": 0.508, "step": 567 }, { "epoch": 0.28199081543999005, "grad_norm": 0.07731985483902606, "learning_rate": 9.885696553932556e-06, "loss": 0.5436, "step": 568 }, { "epoch": 0.2824872781432295, "grad_norm": 0.0750464125046347, "learning_rate": 9.885280611496256e-06, "loss": 0.5576, "step": 569 }, { "epoch": 0.28298374084646893, "grad_norm": 0.07461691953178973, "learning_rate": 9.884863922423147e-06, "loss": 0.5685, "step": 570 }, { "epoch": 0.2834802035497083, "grad_norm": 0.0794903059853572, "learning_rate": 9.884446486776908e-06, "loss": 0.5607, "step": 571 }, { "epoch": 0.28397666625294776, "grad_norm": 0.07287052459802834, "learning_rate": 9.884028304621341e-06, "loss": 0.5501, "step": 572 }, { "epoch": 0.28447312895618715, "grad_norm": 0.0788064114434013, "learning_rate": 9.883609376020356e-06, "loss": 0.5665, "step": 573 }, { "epoch": 0.2849695916594266, "grad_norm": 0.07663493649905155, "learning_rate": 9.883189701037981e-06, "loss": 0.6092, "step": 574 }, { "epoch": 0.285466054362666, "grad_norm": 0.07479983199950002, "learning_rate": 9.882769279738355e-06, "loss": 0.5612, "step": 575 }, { "epoch": 0.2859625170659054, "grad_norm": 0.07959254652565169, "learning_rate": 9.882348112185736e-06, "loss": 0.5183, "step": 576 }, { "epoch": 0.28645897976914486, "grad_norm": 0.0768553794284454, "learning_rate": 9.881926198444489e-06, "loss": 0.5195, "step": 577 }, { "epoch": 0.28695544247238425, "grad_norm": 0.07481572126590574, "learning_rate": 9.8815035385791e-06, "loss": 0.5684, "step": 578 }, { "epoch": 0.2874519051756237, "grad_norm": 0.07493217713328718, "learning_rate": 9.881080132654163e-06, "loss": 0.5947, "step": 579 }, { "epoch": 0.2879483678788631, "grad_norm": 0.07507225285212296, "learning_rate": 9.880655980734391e-06, "loss": 0.5543, "step": 580 }, { "epoch": 0.2884448305821025, "grad_norm": 0.07904490711995418, "learning_rate": 9.880231082884605e-06, "loss": 0.5551, "step": 581 }, { "epoch": 0.28894129328534196, "grad_norm": 0.07295892810312825, "learning_rate": 9.87980543916975e-06, "loss": 0.5464, "step": 582 }, { "epoch": 0.28943775598858135, "grad_norm": 0.07508250816956526, "learning_rate": 9.879379049654872e-06, "loss": 0.5399, "step": 583 }, { "epoch": 0.2899342186918208, "grad_norm": 0.07218965100235024, "learning_rate": 9.878951914405144e-06, "loss": 0.5525, "step": 584 }, { "epoch": 0.2904306813950602, "grad_norm": 0.07712208747682248, "learning_rate": 9.878524033485843e-06, "loss": 0.6231, "step": 585 }, { "epoch": 0.2909271440982996, "grad_norm": 0.07426488021803046, "learning_rate": 9.878095406962364e-06, "loss": 0.5467, "step": 586 }, { "epoch": 0.29142360680153906, "grad_norm": 0.07595195548804266, "learning_rate": 9.877666034900216e-06, "loss": 0.5568, "step": 587 }, { "epoch": 0.29192006950477845, "grad_norm": 0.07309985737679185, "learning_rate": 9.877235917365022e-06, "loss": 0.5023, "step": 588 }, { "epoch": 0.2924165322080179, "grad_norm": 0.07629964951526776, "learning_rate": 9.87680505442252e-06, "loss": 0.5656, "step": 589 }, { "epoch": 0.2929129949112573, "grad_norm": 0.07525170076872399, "learning_rate": 9.876373446138559e-06, "loss": 0.5583, "step": 590 }, { "epoch": 0.2934094576144967, "grad_norm": 0.0719482938902049, "learning_rate": 9.875941092579102e-06, "loss": 0.5843, "step": 591 }, { "epoch": 0.2939059203177361, "grad_norm": 0.07697109291618406, "learning_rate": 9.87550799381023e-06, "loss": 0.5399, "step": 592 }, { "epoch": 0.29440238302097554, "grad_norm": 0.0767166776402075, "learning_rate": 9.875074149898133e-06, "loss": 0.5276, "step": 593 }, { "epoch": 0.294898845724215, "grad_norm": 0.07102348146377147, "learning_rate": 9.874639560909118e-06, "loss": 0.5192, "step": 594 }, { "epoch": 0.2953953084274544, "grad_norm": 0.07279805219484177, "learning_rate": 9.874204226909607e-06, "loss": 0.5372, "step": 595 }, { "epoch": 0.2958917711306938, "grad_norm": 0.07433801339092601, "learning_rate": 9.87376814796613e-06, "loss": 0.5315, "step": 596 }, { "epoch": 0.2963882338339332, "grad_norm": 0.07657789334389173, "learning_rate": 9.873331324145337e-06, "loss": 0.5445, "step": 597 }, { "epoch": 0.29688469653717264, "grad_norm": 0.07705144270666044, "learning_rate": 9.872893755513987e-06, "loss": 0.5607, "step": 598 }, { "epoch": 0.2973811592404121, "grad_norm": 0.07917616577221262, "learning_rate": 9.872455442138962e-06, "loss": 0.6034, "step": 599 }, { "epoch": 0.29787762194365147, "grad_norm": 0.07628640882027855, "learning_rate": 9.872016384087243e-06, "loss": 0.5514, "step": 600 }, { "epoch": 0.2983740846468909, "grad_norm": 0.07818376880862994, "learning_rate": 9.871576581425937e-06, "loss": 0.6002, "step": 601 }, { "epoch": 0.2988705473501303, "grad_norm": 0.07665596539606143, "learning_rate": 9.871136034222262e-06, "loss": 0.5429, "step": 602 }, { "epoch": 0.29936701005336974, "grad_norm": 0.07325381782592988, "learning_rate": 9.870694742543544e-06, "loss": 0.5471, "step": 603 }, { "epoch": 0.2998634727566092, "grad_norm": 0.07639459489392834, "learning_rate": 9.870252706457233e-06, "loss": 0.5711, "step": 604 }, { "epoch": 0.30035993545984857, "grad_norm": 0.07623059529194105, "learning_rate": 9.869809926030883e-06, "loss": 0.5278, "step": 605 }, { "epoch": 0.300856398163088, "grad_norm": 0.08082827370502862, "learning_rate": 9.869366401332169e-06, "loss": 0.5308, "step": 606 }, { "epoch": 0.3013528608663274, "grad_norm": 0.07480986187389814, "learning_rate": 9.868922132428871e-06, "loss": 0.5428, "step": 607 }, { "epoch": 0.30184932356956684, "grad_norm": 0.07518561564283756, "learning_rate": 9.868477119388897e-06, "loss": 0.5774, "step": 608 }, { "epoch": 0.3023457862728063, "grad_norm": 0.07628450056698415, "learning_rate": 9.868031362280253e-06, "loss": 0.5611, "step": 609 }, { "epoch": 0.30284224897604567, "grad_norm": 0.0766015170643728, "learning_rate": 9.867584861171067e-06, "loss": 0.5309, "step": 610 }, { "epoch": 0.3033387116792851, "grad_norm": 0.07643369967531052, "learning_rate": 9.867137616129583e-06, "loss": 0.5623, "step": 611 }, { "epoch": 0.3038351743825245, "grad_norm": 0.0728482813967606, "learning_rate": 9.866689627224152e-06, "loss": 0.5233, "step": 612 }, { "epoch": 0.30433163708576394, "grad_norm": 0.08393283031423471, "learning_rate": 9.866240894523242e-06, "loss": 0.5962, "step": 613 }, { "epoch": 0.3048280997890033, "grad_norm": 0.0771796216168662, "learning_rate": 9.865791418095437e-06, "loss": 0.5253, "step": 614 }, { "epoch": 0.30532456249224277, "grad_norm": 0.08018751861730207, "learning_rate": 9.86534119800943e-06, "loss": 0.517, "step": 615 }, { "epoch": 0.3058210251954822, "grad_norm": 0.07787379590523566, "learning_rate": 9.864890234334032e-06, "loss": 0.5463, "step": 616 }, { "epoch": 0.3063174878987216, "grad_norm": 0.07763330346534424, "learning_rate": 9.864438527138163e-06, "loss": 0.5793, "step": 617 }, { "epoch": 0.30681395060196104, "grad_norm": 0.07826557061221215, "learning_rate": 9.86398607649086e-06, "loss": 0.5621, "step": 618 }, { "epoch": 0.3073104133052004, "grad_norm": 0.07926385658236025, "learning_rate": 9.863532882461275e-06, "loss": 0.5326, "step": 619 }, { "epoch": 0.30780687600843987, "grad_norm": 0.07685251032990977, "learning_rate": 9.86307894511867e-06, "loss": 0.5696, "step": 620 }, { "epoch": 0.3083033387116793, "grad_norm": 0.08400275382323562, "learning_rate": 9.86262426453242e-06, "loss": 0.5638, "step": 621 }, { "epoch": 0.3087998014149187, "grad_norm": 0.07637247838560719, "learning_rate": 9.862168840772018e-06, "loss": 0.5353, "step": 622 }, { "epoch": 0.30929626411815814, "grad_norm": 0.07567155856874061, "learning_rate": 9.861712673907067e-06, "loss": 0.5624, "step": 623 }, { "epoch": 0.3097927268213975, "grad_norm": 0.07599653080274857, "learning_rate": 9.861255764007288e-06, "loss": 0.5851, "step": 624 }, { "epoch": 0.31028918952463697, "grad_norm": 0.07657107986209026, "learning_rate": 9.860798111142507e-06, "loss": 0.557, "step": 625 }, { "epoch": 0.3107856522278764, "grad_norm": 0.08159401440687102, "learning_rate": 9.860339715382671e-06, "loss": 0.5407, "step": 626 }, { "epoch": 0.3112821149311158, "grad_norm": 0.08214172807970598, "learning_rate": 9.859880576797842e-06, "loss": 0.5532, "step": 627 }, { "epoch": 0.31177857763435524, "grad_norm": 0.07544880084915004, "learning_rate": 9.859420695458187e-06, "loss": 0.5316, "step": 628 }, { "epoch": 0.3122750403375946, "grad_norm": 0.07386885211105122, "learning_rate": 9.858960071433994e-06, "loss": 0.5423, "step": 629 }, { "epoch": 0.31277150304083406, "grad_norm": 0.07501408504334331, "learning_rate": 9.858498704795663e-06, "loss": 0.5534, "step": 630 }, { "epoch": 0.31326796574407345, "grad_norm": 0.0759136680167307, "learning_rate": 9.858036595613704e-06, "loss": 0.5307, "step": 631 }, { "epoch": 0.3137644284473129, "grad_norm": 0.08163074955687628, "learning_rate": 9.857573743958744e-06, "loss": 0.5957, "step": 632 }, { "epoch": 0.31426089115055234, "grad_norm": 0.07935519929681213, "learning_rate": 9.857110149901521e-06, "loss": 0.567, "step": 633 }, { "epoch": 0.3147573538537917, "grad_norm": 0.07839620252500945, "learning_rate": 9.856645813512892e-06, "loss": 0.5368, "step": 634 }, { "epoch": 0.31525381655703116, "grad_norm": 0.07681988893148366, "learning_rate": 9.85618073486382e-06, "loss": 0.504, "step": 635 }, { "epoch": 0.31575027926027055, "grad_norm": 0.0830345969868476, "learning_rate": 9.855714914025386e-06, "loss": 0.5278, "step": 636 }, { "epoch": 0.31624674196351, "grad_norm": 0.08129993849570155, "learning_rate": 9.855248351068781e-06, "loss": 0.5402, "step": 637 }, { "epoch": 0.31674320466674943, "grad_norm": 0.07952213949351684, "learning_rate": 9.854781046065317e-06, "loss": 0.543, "step": 638 }, { "epoch": 0.3172396673699888, "grad_norm": 0.0751788341311726, "learning_rate": 9.854312999086406e-06, "loss": 0.5411, "step": 639 }, { "epoch": 0.31773613007322826, "grad_norm": 0.08444156643795851, "learning_rate": 9.85384421020359e-06, "loss": 0.5884, "step": 640 }, { "epoch": 0.31823259277646765, "grad_norm": 0.07583571988436319, "learning_rate": 9.85337467948851e-06, "loss": 0.5645, "step": 641 }, { "epoch": 0.3187290554797071, "grad_norm": 0.07686002390051062, "learning_rate": 9.852904407012929e-06, "loss": 0.5691, "step": 642 }, { "epoch": 0.31922551818294653, "grad_norm": 0.07795666555244363, "learning_rate": 9.852433392848718e-06, "loss": 0.5192, "step": 643 }, { "epoch": 0.3197219808861859, "grad_norm": 0.08034340447050264, "learning_rate": 9.851961637067869e-06, "loss": 0.6113, "step": 644 }, { "epoch": 0.32021844358942536, "grad_norm": 0.0748943142336664, "learning_rate": 9.851489139742476e-06, "loss": 0.5362, "step": 645 }, { "epoch": 0.32071490629266475, "grad_norm": 0.07507270048060632, "learning_rate": 9.851015900944757e-06, "loss": 0.5509, "step": 646 }, { "epoch": 0.3212113689959042, "grad_norm": 0.07633316575711642, "learning_rate": 9.850541920747037e-06, "loss": 0.6085, "step": 647 }, { "epoch": 0.3217078316991436, "grad_norm": 0.07528076533630637, "learning_rate": 9.850067199221758e-06, "loss": 0.526, "step": 648 }, { "epoch": 0.322204294402383, "grad_norm": 0.07707243425328635, "learning_rate": 9.849591736441473e-06, "loss": 0.5695, "step": 649 }, { "epoch": 0.32270075710562246, "grad_norm": 0.07654913980669235, "learning_rate": 9.849115532478848e-06, "loss": 0.5705, "step": 650 }, { "epoch": 0.32319721980886185, "grad_norm": 0.07645374658029656, "learning_rate": 9.848638587406661e-06, "loss": 0.5474, "step": 651 }, { "epoch": 0.3236936825121013, "grad_norm": 0.07807950547456723, "learning_rate": 9.84816090129781e-06, "loss": 0.5887, "step": 652 }, { "epoch": 0.3241901452153407, "grad_norm": 0.07646768926967591, "learning_rate": 9.8476824742253e-06, "loss": 0.5449, "step": 653 }, { "epoch": 0.3246866079185801, "grad_norm": 0.07654133567981597, "learning_rate": 9.84720330626225e-06, "loss": 0.562, "step": 654 }, { "epoch": 0.32518307062181956, "grad_norm": 0.07443762509568126, "learning_rate": 9.846723397481892e-06, "loss": 0.553, "step": 655 }, { "epoch": 0.32567953332505895, "grad_norm": 0.07370022698268587, "learning_rate": 9.846242747957578e-06, "loss": 0.5053, "step": 656 }, { "epoch": 0.3261759960282984, "grad_norm": 0.075692372137261, "learning_rate": 9.84576135776276e-06, "loss": 0.5376, "step": 657 }, { "epoch": 0.3266724587315378, "grad_norm": 0.07705713356659613, "learning_rate": 9.845279226971016e-06, "loss": 0.5515, "step": 658 }, { "epoch": 0.3271689214347772, "grad_norm": 0.07295323071068782, "learning_rate": 9.84479635565603e-06, "loss": 0.5299, "step": 659 }, { "epoch": 0.32766538413801666, "grad_norm": 0.0758470961017997, "learning_rate": 9.8443127438916e-06, "loss": 0.5667, "step": 660 }, { "epoch": 0.32816184684125604, "grad_norm": 0.07599385356522345, "learning_rate": 9.843828391751642e-06, "loss": 0.5288, "step": 661 }, { "epoch": 0.3286583095444955, "grad_norm": 0.0746482852812454, "learning_rate": 9.843343299310177e-06, "loss": 0.517, "step": 662 }, { "epoch": 0.3291547722477349, "grad_norm": 0.07538540101260628, "learning_rate": 9.842857466641348e-06, "loss": 0.5433, "step": 663 }, { "epoch": 0.3296512349509743, "grad_norm": 0.07526342937730707, "learning_rate": 9.842370893819404e-06, "loss": 0.5547, "step": 664 }, { "epoch": 0.3301476976542137, "grad_norm": 0.0789429151978629, "learning_rate": 9.84188358091871e-06, "loss": 0.5368, "step": 665 }, { "epoch": 0.33064416035745314, "grad_norm": 0.07763526839910767, "learning_rate": 9.841395528013744e-06, "loss": 0.5795, "step": 666 }, { "epoch": 0.3311406230606926, "grad_norm": 0.07494340511997437, "learning_rate": 9.840906735179096e-06, "loss": 0.6035, "step": 667 }, { "epoch": 0.33163708576393197, "grad_norm": 0.07910952886635866, "learning_rate": 9.840417202489473e-06, "loss": 0.5854, "step": 668 }, { "epoch": 0.3321335484671714, "grad_norm": 0.07892986798515082, "learning_rate": 9.839926930019692e-06, "loss": 0.5396, "step": 669 }, { "epoch": 0.3326300111704108, "grad_norm": 0.0729563764448872, "learning_rate": 9.839435917844682e-06, "loss": 0.524, "step": 670 }, { "epoch": 0.33312647387365024, "grad_norm": 0.07802658438460508, "learning_rate": 9.838944166039486e-06, "loss": 0.553, "step": 671 }, { "epoch": 0.3336229365768897, "grad_norm": 0.07792180398015403, "learning_rate": 9.83845167467926e-06, "loss": 0.5449, "step": 672 }, { "epoch": 0.33411939928012907, "grad_norm": 0.07574586523670872, "learning_rate": 9.837958443839274e-06, "loss": 0.595, "step": 673 }, { "epoch": 0.3346158619833685, "grad_norm": 0.07591400320048404, "learning_rate": 9.837464473594911e-06, "loss": 0.5292, "step": 674 }, { "epoch": 0.3351123246866079, "grad_norm": 0.07644973731060183, "learning_rate": 9.836969764021666e-06, "loss": 0.5455, "step": 675 }, { "epoch": 0.33560878738984734, "grad_norm": 0.08101825987060038, "learning_rate": 9.836474315195148e-06, "loss": 0.5767, "step": 676 }, { "epoch": 0.3361052500930868, "grad_norm": 0.07730742434766155, "learning_rate": 9.835978127191077e-06, "loss": 0.6241, "step": 677 }, { "epoch": 0.33660171279632617, "grad_norm": 0.07874246801059555, "learning_rate": 9.83548120008529e-06, "loss": 0.5278, "step": 678 }, { "epoch": 0.3370981754995656, "grad_norm": 0.07397850549930313, "learning_rate": 9.83498353395373e-06, "loss": 0.4994, "step": 679 }, { "epoch": 0.337594638202805, "grad_norm": 0.07591277824088376, "learning_rate": 9.834485128872462e-06, "loss": 0.5196, "step": 680 }, { "epoch": 0.33809110090604444, "grad_norm": 0.07229658778373127, "learning_rate": 9.833985984917656e-06, "loss": 0.5368, "step": 681 }, { "epoch": 0.3385875636092838, "grad_norm": 0.08099111946438035, "learning_rate": 9.8334861021656e-06, "loss": 0.5697, "step": 682 }, { "epoch": 0.33908402631252327, "grad_norm": 0.07252478515888824, "learning_rate": 9.832985480692691e-06, "loss": 0.5604, "step": 683 }, { "epoch": 0.3395804890157627, "grad_norm": 0.07239879222289877, "learning_rate": 9.832484120575446e-06, "loss": 0.487, "step": 684 }, { "epoch": 0.3400769517190021, "grad_norm": 0.07153328866561025, "learning_rate": 9.831982021890483e-06, "loss": 0.5233, "step": 685 }, { "epoch": 0.34057341442224154, "grad_norm": 0.07439020024051021, "learning_rate": 9.831479184714543e-06, "loss": 0.5375, "step": 686 }, { "epoch": 0.3410698771254809, "grad_norm": 0.07404184993442114, "learning_rate": 9.830975609124477e-06, "loss": 0.5332, "step": 687 }, { "epoch": 0.34156633982872037, "grad_norm": 0.07438693972814182, "learning_rate": 9.830471295197248e-06, "loss": 0.5536, "step": 688 }, { "epoch": 0.3420628025319598, "grad_norm": 0.075698102229209, "learning_rate": 9.829966243009932e-06, "loss": 0.5361, "step": 689 }, { "epoch": 0.3425592652351992, "grad_norm": 0.07580611925918088, "learning_rate": 9.829460452639718e-06, "loss": 0.5569, "step": 690 }, { "epoch": 0.34305572793843864, "grad_norm": 0.07367114112343696, "learning_rate": 9.828953924163908e-06, "loss": 0.557, "step": 691 }, { "epoch": 0.343552190641678, "grad_norm": 0.0733573832874063, "learning_rate": 9.828446657659919e-06, "loss": 0.5857, "step": 692 }, { "epoch": 0.34404865334491747, "grad_norm": 0.08186579423918036, "learning_rate": 9.827938653205275e-06, "loss": 0.5366, "step": 693 }, { "epoch": 0.3445451160481569, "grad_norm": 0.07582312092529461, "learning_rate": 9.82742991087762e-06, "loss": 0.5524, "step": 694 }, { "epoch": 0.3450415787513963, "grad_norm": 0.08324114328209058, "learning_rate": 9.826920430754703e-06, "loss": 0.5471, "step": 695 }, { "epoch": 0.34553804145463574, "grad_norm": 0.07489973171052326, "learning_rate": 9.826410212914393e-06, "loss": 0.5547, "step": 696 }, { "epoch": 0.3460345041578751, "grad_norm": 0.07337731136398072, "learning_rate": 9.825899257434667e-06, "loss": 0.5299, "step": 697 }, { "epoch": 0.34653096686111456, "grad_norm": 0.0813945270751598, "learning_rate": 9.825387564393616e-06, "loss": 0.5469, "step": 698 }, { "epoch": 0.34702742956435395, "grad_norm": 0.07553165147863128, "learning_rate": 9.824875133869447e-06, "loss": 0.5235, "step": 699 }, { "epoch": 0.3475238922675934, "grad_norm": 0.07798665622206621, "learning_rate": 9.824361965940475e-06, "loss": 0.5449, "step": 700 }, { "epoch": 0.34802035497083283, "grad_norm": 0.07720920154984248, "learning_rate": 9.823848060685125e-06, "loss": 0.5516, "step": 701 }, { "epoch": 0.3485168176740722, "grad_norm": 0.07394946809380538, "learning_rate": 9.823333418181948e-06, "loss": 0.5434, "step": 702 }, { "epoch": 0.34901328037731166, "grad_norm": 0.07443280799785286, "learning_rate": 9.822818038509593e-06, "loss": 0.5206, "step": 703 }, { "epoch": 0.34950974308055105, "grad_norm": 0.07109902087405791, "learning_rate": 9.822301921746829e-06, "loss": 0.5129, "step": 704 }, { "epoch": 0.3500062057837905, "grad_norm": 0.07494737825986637, "learning_rate": 9.821785067972536e-06, "loss": 0.5297, "step": 705 }, { "epoch": 0.35050266848702993, "grad_norm": 0.07233626869785674, "learning_rate": 9.821267477265705e-06, "loss": 0.5418, "step": 706 }, { "epoch": 0.3509991311902693, "grad_norm": 0.07147306841083971, "learning_rate": 9.820749149705445e-06, "loss": 0.5428, "step": 707 }, { "epoch": 0.35149559389350876, "grad_norm": 0.07276839503056264, "learning_rate": 9.820230085370972e-06, "loss": 0.5634, "step": 708 }, { "epoch": 0.35199205659674815, "grad_norm": 0.07575865554460536, "learning_rate": 9.819710284341618e-06, "loss": 0.5377, "step": 709 }, { "epoch": 0.3524885192999876, "grad_norm": 0.0739559499625771, "learning_rate": 9.819189746696823e-06, "loss": 0.5323, "step": 710 }, { "epoch": 0.35298498200322703, "grad_norm": 0.07331544965217528, "learning_rate": 9.818668472516146e-06, "loss": 0.5642, "step": 711 }, { "epoch": 0.3534814447064664, "grad_norm": 0.07851875713635985, "learning_rate": 9.818146461879256e-06, "loss": 0.598, "step": 712 }, { "epoch": 0.35397790740970586, "grad_norm": 0.07382231426622198, "learning_rate": 9.817623714865931e-06, "loss": 0.5683, "step": 713 }, { "epoch": 0.35447437011294525, "grad_norm": 0.07647833075770147, "learning_rate": 9.817100231556065e-06, "loss": 0.5647, "step": 714 }, { "epoch": 0.3549708328161847, "grad_norm": 0.07453844364837098, "learning_rate": 9.816576012029666e-06, "loss": 0.5479, "step": 715 }, { "epoch": 0.3554672955194241, "grad_norm": 0.06994784433794665, "learning_rate": 9.816051056366851e-06, "loss": 0.5545, "step": 716 }, { "epoch": 0.3559637582226635, "grad_norm": 0.0708558727408941, "learning_rate": 9.815525364647853e-06, "loss": 0.5235, "step": 717 }, { "epoch": 0.35646022092590296, "grad_norm": 0.07211829677081709, "learning_rate": 9.814998936953012e-06, "loss": 0.5375, "step": 718 }, { "epoch": 0.35695668362914235, "grad_norm": 0.0778707977537702, "learning_rate": 9.814471773362788e-06, "loss": 0.5003, "step": 719 }, { "epoch": 0.3574531463323818, "grad_norm": 0.07346792577565847, "learning_rate": 9.813943873957748e-06, "loss": 0.5252, "step": 720 }, { "epoch": 0.3579496090356212, "grad_norm": 0.0780037540912097, "learning_rate": 9.81341523881857e-06, "loss": 0.5609, "step": 721 }, { "epoch": 0.3584460717388606, "grad_norm": 0.12649906090457236, "learning_rate": 9.812885868026052e-06, "loss": 0.5611, "step": 722 }, { "epoch": 0.35894253444210006, "grad_norm": 0.07460362088706496, "learning_rate": 9.812355761661096e-06, "loss": 0.538, "step": 723 }, { "epoch": 0.35943899714533945, "grad_norm": 0.0739528024367322, "learning_rate": 9.811824919804725e-06, "loss": 0.5721, "step": 724 }, { "epoch": 0.3599354598485789, "grad_norm": 0.07753894839284191, "learning_rate": 9.811293342538063e-06, "loss": 0.5858, "step": 725 }, { "epoch": 0.3604319225518183, "grad_norm": 0.07368755167074739, "learning_rate": 9.81076102994236e-06, "loss": 0.5774, "step": 726 }, { "epoch": 0.3609283852550577, "grad_norm": 0.07661834913233602, "learning_rate": 9.810227982098968e-06, "loss": 0.5737, "step": 727 }, { "epoch": 0.36142484795829716, "grad_norm": 0.07973682947552374, "learning_rate": 9.809694199089352e-06, "loss": 0.5407, "step": 728 }, { "epoch": 0.36192131066153654, "grad_norm": 0.07276884200881067, "learning_rate": 9.809159680995098e-06, "loss": 0.5372, "step": 729 }, { "epoch": 0.362417773364776, "grad_norm": 0.07705887562756104, "learning_rate": 9.808624427897896e-06, "loss": 0.5148, "step": 730 }, { "epoch": 0.3629142360680154, "grad_norm": 0.07781544338816433, "learning_rate": 9.80808843987955e-06, "loss": 0.5545, "step": 731 }, { "epoch": 0.3634106987712548, "grad_norm": 0.0806221612612232, "learning_rate": 9.807551717021977e-06, "loss": 0.5428, "step": 732 }, { "epoch": 0.36390716147449426, "grad_norm": 0.07365922527604281, "learning_rate": 9.807014259407209e-06, "loss": 0.5246, "step": 733 }, { "epoch": 0.36440362417773364, "grad_norm": 0.07405742185015944, "learning_rate": 9.806476067117384e-06, "loss": 0.5392, "step": 734 }, { "epoch": 0.3649000868809731, "grad_norm": 0.07652890186972217, "learning_rate": 9.80593714023476e-06, "loss": 0.5149, "step": 735 }, { "epoch": 0.36539654958421247, "grad_norm": 0.07593774024556392, "learning_rate": 9.8053974788417e-06, "loss": 0.5356, "step": 736 }, { "epoch": 0.3658930122874519, "grad_norm": 0.07415413956682762, "learning_rate": 9.804857083020685e-06, "loss": 0.5415, "step": 737 }, { "epoch": 0.3663894749906913, "grad_norm": 0.07595510777957903, "learning_rate": 9.804315952854304e-06, "loss": 0.5259, "step": 738 }, { "epoch": 0.36688593769393074, "grad_norm": 0.07424855325354711, "learning_rate": 9.803774088425262e-06, "loss": 0.5243, "step": 739 }, { "epoch": 0.3673824003971702, "grad_norm": 0.07812406038583773, "learning_rate": 9.803231489816371e-06, "loss": 0.5584, "step": 740 }, { "epoch": 0.36787886310040957, "grad_norm": 0.07240708263932845, "learning_rate": 9.802688157110564e-06, "loss": 0.5248, "step": 741 }, { "epoch": 0.368375325803649, "grad_norm": 0.07605118794849668, "learning_rate": 9.802144090390875e-06, "loss": 0.5226, "step": 742 }, { "epoch": 0.3688717885068884, "grad_norm": 0.15783281913989158, "learning_rate": 9.80159928974046e-06, "loss": 0.5453, "step": 743 }, { "epoch": 0.36936825121012784, "grad_norm": 0.07410517260137793, "learning_rate": 9.80105375524258e-06, "loss": 0.5069, "step": 744 }, { "epoch": 0.3698647139133673, "grad_norm": 0.07734176892139462, "learning_rate": 9.800507486980613e-06, "loss": 0.5359, "step": 745 }, { "epoch": 0.37036117661660667, "grad_norm": 0.07149204470583237, "learning_rate": 9.799960485038047e-06, "loss": 0.4931, "step": 746 }, { "epoch": 0.3708576393198461, "grad_norm": 0.0721698434133166, "learning_rate": 9.799412749498483e-06, "loss": 0.5284, "step": 747 }, { "epoch": 0.3713541020230855, "grad_norm": 0.07167968336400159, "learning_rate": 9.798864280445633e-06, "loss": 0.5619, "step": 748 }, { "epoch": 0.37185056472632494, "grad_norm": 0.07408407334726448, "learning_rate": 9.798315077963321e-06, "loss": 0.5305, "step": 749 }, { "epoch": 0.3723470274295644, "grad_norm": 0.07298928263403225, "learning_rate": 9.797765142135483e-06, "loss": 0.5543, "step": 750 }, { "epoch": 0.37284349013280377, "grad_norm": 0.075371200502978, "learning_rate": 9.797214473046171e-06, "loss": 0.5802, "step": 751 }, { "epoch": 0.3733399528360432, "grad_norm": 0.0738341822930936, "learning_rate": 9.796663070779545e-06, "loss": 0.5377, "step": 752 }, { "epoch": 0.3738364155392826, "grad_norm": 0.07475281086820233, "learning_rate": 9.796110935419876e-06, "loss": 0.5433, "step": 753 }, { "epoch": 0.37433287824252204, "grad_norm": 0.07636423039177996, "learning_rate": 9.79555806705155e-06, "loss": 0.5191, "step": 754 }, { "epoch": 0.3748293409457614, "grad_norm": 0.07553329273177121, "learning_rate": 9.795004465759067e-06, "loss": 0.5685, "step": 755 }, { "epoch": 0.37532580364900087, "grad_norm": 0.07669955016383546, "learning_rate": 9.79445013162703e-06, "loss": 0.5391, "step": 756 }, { "epoch": 0.3758222663522403, "grad_norm": 0.07544294491461324, "learning_rate": 9.793895064740166e-06, "loss": 0.5612, "step": 757 }, { "epoch": 0.3763187290554797, "grad_norm": 0.0754930880002628, "learning_rate": 9.793339265183303e-06, "loss": 0.5458, "step": 758 }, { "epoch": 0.37681519175871914, "grad_norm": 0.07749670149783751, "learning_rate": 9.79278273304139e-06, "loss": 0.5937, "step": 759 }, { "epoch": 0.3773116544619585, "grad_norm": 0.07268005398789518, "learning_rate": 9.792225468399485e-06, "loss": 0.556, "step": 760 }, { "epoch": 0.37780811716519797, "grad_norm": 0.07239528164494755, "learning_rate": 9.791667471342752e-06, "loss": 0.5385, "step": 761 }, { "epoch": 0.3783045798684374, "grad_norm": 0.07334739224571676, "learning_rate": 9.791108741956476e-06, "loss": 0.5158, "step": 762 }, { "epoch": 0.3788010425716768, "grad_norm": 0.07033808439598177, "learning_rate": 9.790549280326046e-06, "loss": 0.5481, "step": 763 }, { "epoch": 0.37929750527491624, "grad_norm": 0.07552295599412208, "learning_rate": 9.789989086536971e-06, "loss": 0.5242, "step": 764 }, { "epoch": 0.3797939679781556, "grad_norm": 0.07401484746819591, "learning_rate": 9.789428160674865e-06, "loss": 0.5562, "step": 765 }, { "epoch": 0.38029043068139506, "grad_norm": 0.11414006306157573, "learning_rate": 9.788866502825458e-06, "loss": 0.5698, "step": 766 }, { "epoch": 0.3807868933846345, "grad_norm": 0.07121253600333775, "learning_rate": 9.78830411307459e-06, "loss": 0.5284, "step": 767 }, { "epoch": 0.3812833560878739, "grad_norm": 0.07265804118235417, "learning_rate": 9.787740991508212e-06, "loss": 0.5158, "step": 768 }, { "epoch": 0.38177981879111333, "grad_norm": 0.07254318353819536, "learning_rate": 9.787177138212391e-06, "loss": 0.5091, "step": 769 }, { "epoch": 0.3822762814943527, "grad_norm": 0.07281349514641368, "learning_rate": 9.786612553273298e-06, "loss": 0.5231, "step": 770 }, { "epoch": 0.38277274419759216, "grad_norm": 0.07050000723172054, "learning_rate": 9.786047236777225e-06, "loss": 0.5579, "step": 771 }, { "epoch": 0.38326920690083155, "grad_norm": 0.07541531424572744, "learning_rate": 9.78548118881057e-06, "loss": 0.5373, "step": 772 }, { "epoch": 0.383765669604071, "grad_norm": 0.07453492248282302, "learning_rate": 9.784914409459847e-06, "loss": 0.5556, "step": 773 }, { "epoch": 0.38426213230731043, "grad_norm": 0.0757761604807883, "learning_rate": 9.784346898811675e-06, "loss": 0.5199, "step": 774 }, { "epoch": 0.3847585950105498, "grad_norm": 0.07568484772982484, "learning_rate": 9.78377865695279e-06, "loss": 0.5324, "step": 775 }, { "epoch": 0.38525505771378926, "grad_norm": 0.07364200493411856, "learning_rate": 9.78320968397004e-06, "loss": 0.5339, "step": 776 }, { "epoch": 0.38575152041702865, "grad_norm": 0.07497774875981036, "learning_rate": 9.782639979950382e-06, "loss": 0.5578, "step": 777 }, { "epoch": 0.3862479831202681, "grad_norm": 0.08179279051563958, "learning_rate": 9.782069544980887e-06, "loss": 0.5467, "step": 778 }, { "epoch": 0.38674444582350753, "grad_norm": 0.07280472908410666, "learning_rate": 9.781498379148738e-06, "loss": 0.501, "step": 779 }, { "epoch": 0.3872409085267469, "grad_norm": 0.07490404669769593, "learning_rate": 9.780926482541227e-06, "loss": 0.5342, "step": 780 }, { "epoch": 0.38773737122998636, "grad_norm": 0.07570507230711694, "learning_rate": 9.780353855245759e-06, "loss": 0.5816, "step": 781 }, { "epoch": 0.38823383393322575, "grad_norm": 0.07622051872508213, "learning_rate": 9.779780497349852e-06, "loss": 0.5522, "step": 782 }, { "epoch": 0.3887302966364652, "grad_norm": 0.0765505749512821, "learning_rate": 9.779206408941131e-06, "loss": 0.5141, "step": 783 }, { "epoch": 0.38922675933970463, "grad_norm": 0.07629599426112826, "learning_rate": 9.778631590107342e-06, "loss": 0.5388, "step": 784 }, { "epoch": 0.389723222042944, "grad_norm": 0.07362072760938124, "learning_rate": 9.778056040936333e-06, "loss": 0.5531, "step": 785 }, { "epoch": 0.39021968474618346, "grad_norm": 0.07712485250124279, "learning_rate": 9.777479761516069e-06, "loss": 0.5088, "step": 786 }, { "epoch": 0.39071614744942285, "grad_norm": 0.07248722869142624, "learning_rate": 9.776902751934625e-06, "loss": 0.5495, "step": 787 }, { "epoch": 0.3912126101526623, "grad_norm": 0.07969653988684422, "learning_rate": 9.776325012280185e-06, "loss": 0.5586, "step": 788 }, { "epoch": 0.3917090728559017, "grad_norm": 0.07887747490682782, "learning_rate": 9.775746542641052e-06, "loss": 0.5469, "step": 789 }, { "epoch": 0.3922055355591411, "grad_norm": 0.07511324662951371, "learning_rate": 9.77516734310563e-06, "loss": 0.5043, "step": 790 }, { "epoch": 0.39270199826238056, "grad_norm": 0.074682710636718, "learning_rate": 9.774587413762448e-06, "loss": 0.573, "step": 791 }, { "epoch": 0.39319846096561994, "grad_norm": 0.07823987539462922, "learning_rate": 9.774006754700132e-06, "loss": 0.5012, "step": 792 }, { "epoch": 0.3936949236688594, "grad_norm": 0.07927560532508765, "learning_rate": 9.77342536600743e-06, "loss": 0.5332, "step": 793 }, { "epoch": 0.3941913863720988, "grad_norm": 0.081075947027259, "learning_rate": 9.772843247773197e-06, "loss": 0.571, "step": 794 }, { "epoch": 0.3946878490753382, "grad_norm": 0.07529856682734566, "learning_rate": 9.772260400086397e-06, "loss": 0.5392, "step": 795 }, { "epoch": 0.39518431177857766, "grad_norm": 0.07593026315167804, "learning_rate": 9.771676823036115e-06, "loss": 0.513, "step": 796 }, { "epoch": 0.39568077448181704, "grad_norm": 0.07488551320818262, "learning_rate": 9.771092516711538e-06, "loss": 0.5658, "step": 797 }, { "epoch": 0.3961772371850565, "grad_norm": 0.07448583103314095, "learning_rate": 9.77050748120197e-06, "loss": 0.5064, "step": 798 }, { "epoch": 0.39667369988829587, "grad_norm": 0.07407263475614544, "learning_rate": 9.76992171659682e-06, "loss": 0.5379, "step": 799 }, { "epoch": 0.3971701625915353, "grad_norm": 0.0748585105071261, "learning_rate": 9.769335222985617e-06, "loss": 0.5195, "step": 800 }, { "epoch": 0.39766662529477476, "grad_norm": 0.07391534395442713, "learning_rate": 9.768748000457996e-06, "loss": 0.5131, "step": 801 }, { "epoch": 0.39816308799801414, "grad_norm": 0.07465639657356582, "learning_rate": 9.768160049103702e-06, "loss": 0.5405, "step": 802 }, { "epoch": 0.3986595507012536, "grad_norm": 0.07476172520797038, "learning_rate": 9.767571369012599e-06, "loss": 0.5231, "step": 803 }, { "epoch": 0.39915601340449297, "grad_norm": 0.08064284315067823, "learning_rate": 9.766981960274653e-06, "loss": 0.5443, "step": 804 }, { "epoch": 0.3996524761077324, "grad_norm": 0.07430645599205549, "learning_rate": 9.766391822979948e-06, "loss": 0.5142, "step": 805 }, { "epoch": 0.4001489388109718, "grad_norm": 0.07410980228388762, "learning_rate": 9.765800957218677e-06, "loss": 0.5517, "step": 806 }, { "epoch": 0.40064540151421124, "grad_norm": 0.06867453077929601, "learning_rate": 9.765209363081141e-06, "loss": 0.5649, "step": 807 }, { "epoch": 0.4011418642174507, "grad_norm": 0.07353979734131227, "learning_rate": 9.764617040657759e-06, "loss": 0.5204, "step": 808 }, { "epoch": 0.40163832692069007, "grad_norm": 0.07525469655765583, "learning_rate": 9.764023990039058e-06, "loss": 0.5532, "step": 809 }, { "epoch": 0.4021347896239295, "grad_norm": 0.07078684341894576, "learning_rate": 9.763430211315675e-06, "loss": 0.5532, "step": 810 }, { "epoch": 0.4026312523271689, "grad_norm": 0.07734947995505723, "learning_rate": 9.76283570457836e-06, "loss": 0.5731, "step": 811 }, { "epoch": 0.40312771503040834, "grad_norm": 0.0751893291465059, "learning_rate": 9.762240469917972e-06, "loss": 0.5656, "step": 812 }, { "epoch": 0.4036241777336478, "grad_norm": 0.07498423865346433, "learning_rate": 9.761644507425487e-06, "loss": 0.5528, "step": 813 }, { "epoch": 0.40412064043688717, "grad_norm": 0.07905003613185434, "learning_rate": 9.761047817191987e-06, "loss": 0.5376, "step": 814 }, { "epoch": 0.4046171031401266, "grad_norm": 0.0739115656470773, "learning_rate": 9.760450399308662e-06, "loss": 0.5471, "step": 815 }, { "epoch": 0.405113565843366, "grad_norm": 0.07314428919759865, "learning_rate": 9.759852253866825e-06, "loss": 0.5458, "step": 816 }, { "epoch": 0.40561002854660544, "grad_norm": 0.0754779528703246, "learning_rate": 9.759253380957889e-06, "loss": 0.5271, "step": 817 }, { "epoch": 0.4061064912498449, "grad_norm": 0.0767964488723569, "learning_rate": 9.758653780673381e-06, "loss": 0.5481, "step": 818 }, { "epoch": 0.40660295395308427, "grad_norm": 0.08174582451126397, "learning_rate": 9.758053453104943e-06, "loss": 0.5661, "step": 819 }, { "epoch": 0.4070994166563237, "grad_norm": 0.07215132459628594, "learning_rate": 9.757452398344324e-06, "loss": 0.5176, "step": 820 }, { "epoch": 0.4075958793595631, "grad_norm": 0.0716641776433738, "learning_rate": 9.756850616483386e-06, "loss": 0.5315, "step": 821 }, { "epoch": 0.40809234206280254, "grad_norm": 0.07282126077177561, "learning_rate": 9.7562481076141e-06, "loss": 0.5332, "step": 822 }, { "epoch": 0.4085888047660419, "grad_norm": 0.07751227831062067, "learning_rate": 9.755644871828555e-06, "loss": 0.5222, "step": 823 }, { "epoch": 0.40908526746928137, "grad_norm": 0.07485329801902614, "learning_rate": 9.75504090921894e-06, "loss": 0.5188, "step": 824 }, { "epoch": 0.4095817301725208, "grad_norm": 0.07291242005129558, "learning_rate": 9.754436219877564e-06, "loss": 0.5199, "step": 825 }, { "epoch": 0.4100781928757602, "grad_norm": 0.07463161826705483, "learning_rate": 9.753830803896842e-06, "loss": 0.5158, "step": 826 }, { "epoch": 0.41057465557899964, "grad_norm": 0.07832879467891833, "learning_rate": 9.753224661369304e-06, "loss": 0.5607, "step": 827 }, { "epoch": 0.411071118282239, "grad_norm": 0.07111309195620046, "learning_rate": 9.75261779238759e-06, "loss": 0.5544, "step": 828 }, { "epoch": 0.41156758098547847, "grad_norm": 0.0819846829516738, "learning_rate": 9.752010197044448e-06, "loss": 0.5557, "step": 829 }, { "epoch": 0.4120640436887179, "grad_norm": 0.07898817088337341, "learning_rate": 9.75140187543274e-06, "loss": 0.546, "step": 830 }, { "epoch": 0.4125605063919573, "grad_norm": 0.07647522418031873, "learning_rate": 9.750792827645438e-06, "loss": 0.5149, "step": 831 }, { "epoch": 0.41305696909519674, "grad_norm": 0.07515552620271704, "learning_rate": 9.750183053775625e-06, "loss": 0.5493, "step": 832 }, { "epoch": 0.4135534317984361, "grad_norm": 0.08341787971048387, "learning_rate": 9.749572553916497e-06, "loss": 0.5505, "step": 833 }, { "epoch": 0.41404989450167556, "grad_norm": 0.07297396297855867, "learning_rate": 9.748961328161358e-06, "loss": 0.5288, "step": 834 }, { "epoch": 0.414546357204915, "grad_norm": 0.0742750111482202, "learning_rate": 9.748349376603622e-06, "loss": 0.541, "step": 835 }, { "epoch": 0.4150428199081544, "grad_norm": 0.0742391348271075, "learning_rate": 9.747736699336819e-06, "loss": 0.5399, "step": 836 }, { "epoch": 0.41553928261139383, "grad_norm": 0.07769022255581656, "learning_rate": 9.747123296454584e-06, "loss": 0.5403, "step": 837 }, { "epoch": 0.4160357453146332, "grad_norm": 0.07478718017828734, "learning_rate": 9.74650916805067e-06, "loss": 0.5456, "step": 838 }, { "epoch": 0.41653220801787266, "grad_norm": 0.07963698330755821, "learning_rate": 9.745894314218933e-06, "loss": 0.5898, "step": 839 }, { "epoch": 0.41702867072111205, "grad_norm": 0.07829076168531228, "learning_rate": 9.745278735053345e-06, "loss": 0.515, "step": 840 }, { "epoch": 0.4175251334243515, "grad_norm": 0.07440206958109287, "learning_rate": 9.744662430647986e-06, "loss": 0.5477, "step": 841 }, { "epoch": 0.41802159612759093, "grad_norm": 0.07089389200900213, "learning_rate": 9.74404540109705e-06, "loss": 0.5084, "step": 842 }, { "epoch": 0.4185180588308303, "grad_norm": 0.07302037721807964, "learning_rate": 9.74342764649484e-06, "loss": 0.5318, "step": 843 }, { "epoch": 0.41901452153406976, "grad_norm": 0.0783625897828163, "learning_rate": 9.742809166935768e-06, "loss": 0.5938, "step": 844 }, { "epoch": 0.41951098423730915, "grad_norm": 0.07369207278532003, "learning_rate": 9.742189962514361e-06, "loss": 0.5423, "step": 845 }, { "epoch": 0.4200074469405486, "grad_norm": 0.07785657874090317, "learning_rate": 9.741570033325254e-06, "loss": 0.5319, "step": 846 }, { "epoch": 0.42050390964378803, "grad_norm": 0.08593813392209855, "learning_rate": 9.740949379463192e-06, "loss": 0.5383, "step": 847 }, { "epoch": 0.4210003723470274, "grad_norm": 0.07225369547298743, "learning_rate": 9.740328001023032e-06, "loss": 0.506, "step": 848 }, { "epoch": 0.42149683505026686, "grad_norm": 0.07409992281897824, "learning_rate": 9.739705898099743e-06, "loss": 0.5119, "step": 849 }, { "epoch": 0.42199329775350625, "grad_norm": 0.07016946900319038, "learning_rate": 9.739083070788405e-06, "loss": 0.5482, "step": 850 }, { "epoch": 0.4224897604567457, "grad_norm": 0.07800094484472588, "learning_rate": 9.738459519184203e-06, "loss": 0.5533, "step": 851 }, { "epoch": 0.42298622315998513, "grad_norm": 0.07530706526398431, "learning_rate": 9.737835243382438e-06, "loss": 0.6289, "step": 852 }, { "epoch": 0.4234826858632245, "grad_norm": 0.07309750256723833, "learning_rate": 9.737210243478522e-06, "loss": 0.5388, "step": 853 }, { "epoch": 0.42397914856646396, "grad_norm": 0.07402449977170224, "learning_rate": 9.736584519567976e-06, "loss": 0.5155, "step": 854 }, { "epoch": 0.42447561126970335, "grad_norm": 0.07780518098869948, "learning_rate": 9.735958071746431e-06, "loss": 0.5241, "step": 855 }, { "epoch": 0.4249720739729428, "grad_norm": 0.07374222764934292, "learning_rate": 9.735330900109631e-06, "loss": 0.5132, "step": 856 }, { "epoch": 0.4254685366761822, "grad_norm": 0.0745070085758739, "learning_rate": 9.734703004753429e-06, "loss": 0.5271, "step": 857 }, { "epoch": 0.4259649993794216, "grad_norm": 0.08051238332124257, "learning_rate": 9.734074385773786e-06, "loss": 0.4877, "step": 858 }, { "epoch": 0.42646146208266106, "grad_norm": 0.07259415930172602, "learning_rate": 9.733445043266779e-06, "loss": 0.5827, "step": 859 }, { "epoch": 0.42695792478590044, "grad_norm": 0.07557236145733463, "learning_rate": 9.732814977328593e-06, "loss": 0.5468, "step": 860 }, { "epoch": 0.4274543874891399, "grad_norm": 0.07259157033280643, "learning_rate": 9.732184188055522e-06, "loss": 0.5329, "step": 861 }, { "epoch": 0.4279508501923793, "grad_norm": 0.07549072522153745, "learning_rate": 9.731552675543972e-06, "loss": 0.5211, "step": 862 }, { "epoch": 0.4284473128956187, "grad_norm": 0.07288970060708126, "learning_rate": 9.73092043989046e-06, "loss": 0.5334, "step": 863 }, { "epoch": 0.42894377559885816, "grad_norm": 0.07075634318269011, "learning_rate": 9.730287481191615e-06, "loss": 0.5053, "step": 864 }, { "epoch": 0.42944023830209754, "grad_norm": 0.07614606694018755, "learning_rate": 9.729653799544171e-06, "loss": 0.5587, "step": 865 }, { "epoch": 0.429936701005337, "grad_norm": 0.07316335947524487, "learning_rate": 9.729019395044979e-06, "loss": 0.5342, "step": 866 }, { "epoch": 0.43043316370857637, "grad_norm": 0.07053572019737249, "learning_rate": 9.728384267790997e-06, "loss": 0.5213, "step": 867 }, { "epoch": 0.4309296264118158, "grad_norm": 0.07164245763524565, "learning_rate": 9.727748417879293e-06, "loss": 0.5445, "step": 868 }, { "epoch": 0.43142608911505526, "grad_norm": 0.07308996659537338, "learning_rate": 9.727111845407046e-06, "loss": 0.5487, "step": 869 }, { "epoch": 0.43192255181829464, "grad_norm": 0.0806618601368084, "learning_rate": 9.726474550471549e-06, "loss": 0.5628, "step": 870 }, { "epoch": 0.4324190145215341, "grad_norm": 0.11999394247964658, "learning_rate": 9.725836533170199e-06, "loss": 0.5501, "step": 871 }, { "epoch": 0.43291547722477347, "grad_norm": 0.07338156895848527, "learning_rate": 9.725197793600508e-06, "loss": 0.5529, "step": 872 }, { "epoch": 0.4334119399280129, "grad_norm": 0.07414444706146318, "learning_rate": 9.724558331860097e-06, "loss": 0.522, "step": 873 }, { "epoch": 0.43390840263125235, "grad_norm": 0.07241558922926562, "learning_rate": 9.723918148046696e-06, "loss": 0.4884, "step": 874 }, { "epoch": 0.43440486533449174, "grad_norm": 0.07214285111060169, "learning_rate": 9.723277242258151e-06, "loss": 0.5147, "step": 875 }, { "epoch": 0.4349013280377312, "grad_norm": 0.0754089726353843, "learning_rate": 9.72263561459241e-06, "loss": 0.5296, "step": 876 }, { "epoch": 0.43539779074097057, "grad_norm": 0.07237060558190542, "learning_rate": 9.721993265147539e-06, "loss": 0.5269, "step": 877 }, { "epoch": 0.43589425344421, "grad_norm": 0.07486793573002197, "learning_rate": 9.721350194021705e-06, "loss": 0.5243, "step": 878 }, { "epoch": 0.4363907161474494, "grad_norm": 0.08538442120689331, "learning_rate": 9.720706401313199e-06, "loss": 0.5686, "step": 879 }, { "epoch": 0.43688717885068884, "grad_norm": 0.06871510871572026, "learning_rate": 9.720061887120408e-06, "loss": 0.5311, "step": 880 }, { "epoch": 0.4373836415539283, "grad_norm": 0.07273195512905156, "learning_rate": 9.719416651541839e-06, "loss": 0.527, "step": 881 }, { "epoch": 0.43788010425716767, "grad_norm": 0.07259430458734004, "learning_rate": 9.718770694676103e-06, "loss": 0.5396, "step": 882 }, { "epoch": 0.4383765669604071, "grad_norm": 0.0744120171521945, "learning_rate": 9.718124016621929e-06, "loss": 0.5203, "step": 883 }, { "epoch": 0.4388730296636465, "grad_norm": 0.0766040114467303, "learning_rate": 9.717476617478146e-06, "loss": 0.5254, "step": 884 }, { "epoch": 0.43936949236688594, "grad_norm": 0.07952016478925952, "learning_rate": 9.716828497343702e-06, "loss": 0.6041, "step": 885 }, { "epoch": 0.4398659550701254, "grad_norm": 0.07515893930429524, "learning_rate": 9.716179656317651e-06, "loss": 0.5671, "step": 886 }, { "epoch": 0.44036241777336477, "grad_norm": 0.07275833676676684, "learning_rate": 9.715530094499157e-06, "loss": 0.5301, "step": 887 }, { "epoch": 0.4408588804766042, "grad_norm": 0.07382926331753971, "learning_rate": 9.714879811987496e-06, "loss": 0.5506, "step": 888 }, { "epoch": 0.4413553431798436, "grad_norm": 0.076328236140326, "learning_rate": 9.714228808882054e-06, "loss": 0.5138, "step": 889 }, { "epoch": 0.44185180588308304, "grad_norm": 0.07171275885569235, "learning_rate": 9.713577085282325e-06, "loss": 0.5414, "step": 890 }, { "epoch": 0.4423482685863225, "grad_norm": 0.07231082291098188, "learning_rate": 9.712924641287915e-06, "loss": 0.5006, "step": 891 }, { "epoch": 0.44284473128956187, "grad_norm": 0.0746099129815945, "learning_rate": 9.712271476998538e-06, "loss": 0.5469, "step": 892 }, { "epoch": 0.4433411939928013, "grad_norm": 0.07493537326196857, "learning_rate": 9.711617592514024e-06, "loss": 0.5532, "step": 893 }, { "epoch": 0.4438376566960407, "grad_norm": 0.07910135513976987, "learning_rate": 9.710962987934305e-06, "loss": 0.6094, "step": 894 }, { "epoch": 0.44433411939928014, "grad_norm": 0.07074797883606643, "learning_rate": 9.710307663359426e-06, "loss": 0.5247, "step": 895 }, { "epoch": 0.4448305821025195, "grad_norm": 0.07081481487079631, "learning_rate": 9.709651618889546e-06, "loss": 0.5358, "step": 896 }, { "epoch": 0.44532704480575896, "grad_norm": 0.07530093918611445, "learning_rate": 9.70899485462493e-06, "loss": 0.5725, "step": 897 }, { "epoch": 0.4458235075089984, "grad_norm": 0.07414071773290506, "learning_rate": 9.708337370665954e-06, "loss": 0.5377, "step": 898 }, { "epoch": 0.4463199702122378, "grad_norm": 0.07189120190783159, "learning_rate": 9.707679167113102e-06, "loss": 0.5394, "step": 899 }, { "epoch": 0.44681643291547724, "grad_norm": 0.07351739293690249, "learning_rate": 9.707020244066972e-06, "loss": 0.5597, "step": 900 }, { "epoch": 0.4473128956187166, "grad_norm": 0.07610739851431275, "learning_rate": 9.70636060162827e-06, "loss": 0.5451, "step": 901 }, { "epoch": 0.44780935832195606, "grad_norm": 0.07981647785507498, "learning_rate": 9.705700239897809e-06, "loss": 0.5411, "step": 902 }, { "epoch": 0.4483058210251955, "grad_norm": 0.06992496152308564, "learning_rate": 9.705039158976517e-06, "loss": 0.5144, "step": 903 }, { "epoch": 0.4488022837284349, "grad_norm": 0.0700880272974536, "learning_rate": 9.70437735896543e-06, "loss": 0.5535, "step": 904 }, { "epoch": 0.44929874643167433, "grad_norm": 0.07061113641641646, "learning_rate": 9.70371483996569e-06, "loss": 0.5413, "step": 905 }, { "epoch": 0.4497952091349137, "grad_norm": 0.0736462909733856, "learning_rate": 9.703051602078557e-06, "loss": 0.5498, "step": 906 }, { "epoch": 0.45029167183815316, "grad_norm": 0.07228895838116739, "learning_rate": 9.702387645405396e-06, "loss": 0.5474, "step": 907 }, { "epoch": 0.4507881345413926, "grad_norm": 0.07671904216956826, "learning_rate": 9.701722970047679e-06, "loss": 0.5754, "step": 908 }, { "epoch": 0.451284597244632, "grad_norm": 0.07480903224238651, "learning_rate": 9.701057576106991e-06, "loss": 0.5272, "step": 909 }, { "epoch": 0.45178105994787143, "grad_norm": 0.0748176099276939, "learning_rate": 9.700391463685029e-06, "loss": 0.5494, "step": 910 }, { "epoch": 0.4522775226511108, "grad_norm": 0.09152256842941854, "learning_rate": 9.699724632883598e-06, "loss": 0.5582, "step": 911 }, { "epoch": 0.45277398535435026, "grad_norm": 0.07611999755029966, "learning_rate": 9.699057083804609e-06, "loss": 0.5313, "step": 912 }, { "epoch": 0.45327044805758965, "grad_norm": 0.07116184510313514, "learning_rate": 9.69838881655009e-06, "loss": 0.56, "step": 913 }, { "epoch": 0.4537669107608291, "grad_norm": 0.07594954740931627, "learning_rate": 9.697719831222173e-06, "loss": 0.5031, "step": 914 }, { "epoch": 0.45426337346406853, "grad_norm": 0.0746375214764949, "learning_rate": 9.697050127923102e-06, "loss": 0.556, "step": 915 }, { "epoch": 0.4547598361673079, "grad_norm": 0.07771038247489137, "learning_rate": 9.69637970675523e-06, "loss": 0.5592, "step": 916 }, { "epoch": 0.45525629887054736, "grad_norm": 0.07327276935059207, "learning_rate": 9.695708567821021e-06, "loss": 0.5774, "step": 917 }, { "epoch": 0.45575276157378675, "grad_norm": 0.07594839409251314, "learning_rate": 9.695036711223049e-06, "loss": 0.6068, "step": 918 }, { "epoch": 0.4562492242770262, "grad_norm": 0.07720893984030859, "learning_rate": 9.694364137063993e-06, "loss": 0.5783, "step": 919 }, { "epoch": 0.45674568698026563, "grad_norm": 0.07205569253228451, "learning_rate": 9.693690845446647e-06, "loss": 0.5348, "step": 920 }, { "epoch": 0.457242149683505, "grad_norm": 0.07485535224592804, "learning_rate": 9.693016836473913e-06, "loss": 0.506, "step": 921 }, { "epoch": 0.45773861238674446, "grad_norm": 0.07966542633741693, "learning_rate": 9.692342110248802e-06, "loss": 0.5605, "step": 922 }, { "epoch": 0.45823507508998385, "grad_norm": 0.07722893541906527, "learning_rate": 9.691666666874438e-06, "loss": 0.5184, "step": 923 }, { "epoch": 0.4587315377932233, "grad_norm": 0.0737626275345602, "learning_rate": 9.690990506454045e-06, "loss": 0.5655, "step": 924 }, { "epoch": 0.45922800049646273, "grad_norm": 0.07332977419296617, "learning_rate": 9.69031362909097e-06, "loss": 0.5281, "step": 925 }, { "epoch": 0.4597244631997021, "grad_norm": 0.07823065260085653, "learning_rate": 9.689636034888662e-06, "loss": 0.5293, "step": 926 }, { "epoch": 0.46022092590294156, "grad_norm": 0.07332590189924752, "learning_rate": 9.688957723950675e-06, "loss": 0.5392, "step": 927 }, { "epoch": 0.46071738860618094, "grad_norm": 0.07084330672852263, "learning_rate": 9.688278696380684e-06, "loss": 0.5319, "step": 928 }, { "epoch": 0.4612138513094204, "grad_norm": 0.0737821820663325, "learning_rate": 9.687598952282462e-06, "loss": 0.5817, "step": 929 }, { "epoch": 0.4617103140126598, "grad_norm": 0.08094122475419381, "learning_rate": 9.686918491759904e-06, "loss": 0.5447, "step": 930 }, { "epoch": 0.4622067767158992, "grad_norm": 0.07590994311402643, "learning_rate": 9.686237314917e-06, "loss": 0.5669, "step": 931 }, { "epoch": 0.46270323941913866, "grad_norm": 0.07528128755147503, "learning_rate": 9.685555421857864e-06, "loss": 0.5497, "step": 932 }, { "epoch": 0.46319970212237804, "grad_norm": 0.07434670733221073, "learning_rate": 9.684872812686706e-06, "loss": 0.5615, "step": 933 }, { "epoch": 0.4636961648256175, "grad_norm": 0.0690160514658518, "learning_rate": 9.684189487507857e-06, "loss": 0.5144, "step": 934 }, { "epoch": 0.46419262752885687, "grad_norm": 0.07220825561231929, "learning_rate": 9.683505446425749e-06, "loss": 0.5099, "step": 935 }, { "epoch": 0.4646890902320963, "grad_norm": 0.07408178533769717, "learning_rate": 9.682820689544927e-06, "loss": 0.5517, "step": 936 }, { "epoch": 0.46518555293533576, "grad_norm": 0.0760943537779404, "learning_rate": 9.682135216970048e-06, "loss": 0.5589, "step": 937 }, { "epoch": 0.46568201563857514, "grad_norm": 0.07109341085075326, "learning_rate": 9.681449028805872e-06, "loss": 0.5042, "step": 938 }, { "epoch": 0.4661784783418146, "grad_norm": 0.07783378438863178, "learning_rate": 9.680762125157273e-06, "loss": 0.5291, "step": 939 }, { "epoch": 0.46667494104505397, "grad_norm": 0.07299282855701139, "learning_rate": 9.680074506129235e-06, "loss": 0.5279, "step": 940 }, { "epoch": 0.4671714037482934, "grad_norm": 0.13566447811720078, "learning_rate": 9.679386171826846e-06, "loss": 0.5316, "step": 941 }, { "epoch": 0.46766786645153285, "grad_norm": 0.07187672347320467, "learning_rate": 9.678697122355311e-06, "loss": 0.5478, "step": 942 }, { "epoch": 0.46816432915477224, "grad_norm": 0.07340560611836003, "learning_rate": 9.678007357819936e-06, "loss": 0.5629, "step": 943 }, { "epoch": 0.4686607918580117, "grad_norm": 0.08015408546831505, "learning_rate": 9.677316878326144e-06, "loss": 0.5513, "step": 944 }, { "epoch": 0.46915725456125107, "grad_norm": 0.07401280117648018, "learning_rate": 9.676625683979462e-06, "loss": 0.5633, "step": 945 }, { "epoch": 0.4696537172644905, "grad_norm": 0.07324544085941354, "learning_rate": 9.675933774885529e-06, "loss": 0.5684, "step": 946 }, { "epoch": 0.4701501799677299, "grad_norm": 0.07175023640004917, "learning_rate": 9.67524115115009e-06, "loss": 0.5248, "step": 947 }, { "epoch": 0.47064664267096934, "grad_norm": 0.07095640867631502, "learning_rate": 9.674547812879002e-06, "loss": 0.519, "step": 948 }, { "epoch": 0.4711431053742088, "grad_norm": 0.07508488266902727, "learning_rate": 9.673853760178233e-06, "loss": 0.5225, "step": 949 }, { "epoch": 0.47163956807744817, "grad_norm": 0.07858038399219275, "learning_rate": 9.673158993153857e-06, "loss": 0.5636, "step": 950 }, { "epoch": 0.4721360307806876, "grad_norm": 0.07043208671603315, "learning_rate": 9.672463511912056e-06, "loss": 0.5295, "step": 951 }, { "epoch": 0.472632493483927, "grad_norm": 0.07784227872499093, "learning_rate": 9.671767316559124e-06, "loss": 0.5456, "step": 952 }, { "epoch": 0.47312895618716644, "grad_norm": 0.07574228547831556, "learning_rate": 9.671070407201465e-06, "loss": 0.5656, "step": 953 }, { "epoch": 0.4736254188904059, "grad_norm": 0.06957012762899452, "learning_rate": 9.67037278394559e-06, "loss": 0.5382, "step": 954 }, { "epoch": 0.47412188159364527, "grad_norm": 0.09679534685898586, "learning_rate": 9.669674446898116e-06, "loss": 0.5197, "step": 955 }, { "epoch": 0.4746183442968847, "grad_norm": 0.06803644728978617, "learning_rate": 9.668975396165776e-06, "loss": 0.4857, "step": 956 }, { "epoch": 0.4751148070001241, "grad_norm": 0.07328282551492919, "learning_rate": 9.66827563185541e-06, "loss": 0.5177, "step": 957 }, { "epoch": 0.47561126970336354, "grad_norm": 0.06901815690540343, "learning_rate": 9.667575154073962e-06, "loss": 0.5271, "step": 958 }, { "epoch": 0.476107732406603, "grad_norm": 0.071811828342126, "learning_rate": 9.666873962928491e-06, "loss": 0.5402, "step": 959 }, { "epoch": 0.47660419510984237, "grad_norm": 0.0745044806945251, "learning_rate": 9.666172058526162e-06, "loss": 0.5485, "step": 960 }, { "epoch": 0.4771006578130818, "grad_norm": 0.06727332445949166, "learning_rate": 9.66546944097425e-06, "loss": 0.5302, "step": 961 }, { "epoch": 0.4775971205163212, "grad_norm": 0.0723126514873621, "learning_rate": 9.664766110380141e-06, "loss": 0.5599, "step": 962 }, { "epoch": 0.47809358321956064, "grad_norm": 0.07395350429498122, "learning_rate": 9.664062066851325e-06, "loss": 0.5192, "step": 963 }, { "epoch": 0.4785900459228, "grad_norm": 0.07378023026691215, "learning_rate": 9.663357310495404e-06, "loss": 0.5469, "step": 964 }, { "epoch": 0.47908650862603946, "grad_norm": 0.07156838728300292, "learning_rate": 9.66265184142009e-06, "loss": 0.5467, "step": 965 }, { "epoch": 0.4795829713292789, "grad_norm": 0.07649205641151963, "learning_rate": 9.661945659733201e-06, "loss": 0.5472, "step": 966 }, { "epoch": 0.4800794340325183, "grad_norm": 0.0729425278563696, "learning_rate": 9.661238765542668e-06, "loss": 0.5397, "step": 967 }, { "epoch": 0.48057589673575773, "grad_norm": 0.07096620529347483, "learning_rate": 9.660531158956525e-06, "loss": 0.5676, "step": 968 }, { "epoch": 0.4810723594389971, "grad_norm": 0.07513824604507036, "learning_rate": 9.659822840082922e-06, "loss": 0.5125, "step": 969 }, { "epoch": 0.48156882214223656, "grad_norm": 0.07212660742880082, "learning_rate": 9.659113809030112e-06, "loss": 0.5174, "step": 970 }, { "epoch": 0.482065284845476, "grad_norm": 0.07547232447246666, "learning_rate": 9.65840406590646e-06, "loss": 0.5458, "step": 971 }, { "epoch": 0.4825617475487154, "grad_norm": 0.07145748264825304, "learning_rate": 9.657693610820437e-06, "loss": 0.5544, "step": 972 }, { "epoch": 0.48305821025195483, "grad_norm": 0.07035145889253784, "learning_rate": 9.656982443880626e-06, "loss": 0.5336, "step": 973 }, { "epoch": 0.4835546729551942, "grad_norm": 0.07483673669785795, "learning_rate": 9.65627056519572e-06, "loss": 0.5237, "step": 974 }, { "epoch": 0.48405113565843366, "grad_norm": 0.07455887409410537, "learning_rate": 9.655557974874512e-06, "loss": 0.5963, "step": 975 }, { "epoch": 0.4845475983616731, "grad_norm": 0.09159875498697193, "learning_rate": 9.654844673025917e-06, "loss": 0.6166, "step": 976 }, { "epoch": 0.4850440610649125, "grad_norm": 0.07885063412079849, "learning_rate": 9.654130659758947e-06, "loss": 0.5867, "step": 977 }, { "epoch": 0.48554052376815193, "grad_norm": 0.07055769737141665, "learning_rate": 9.653415935182728e-06, "loss": 0.5366, "step": 978 }, { "epoch": 0.4860369864713913, "grad_norm": 0.07245998065157473, "learning_rate": 9.652700499406497e-06, "loss": 0.5324, "step": 979 }, { "epoch": 0.48653344917463076, "grad_norm": 0.07611782049187152, "learning_rate": 9.651984352539595e-06, "loss": 0.5655, "step": 980 }, { "epoch": 0.48702991187787015, "grad_norm": 0.07353257519946095, "learning_rate": 9.651267494691471e-06, "loss": 0.5203, "step": 981 }, { "epoch": 0.4875263745811096, "grad_norm": 0.07449068355216579, "learning_rate": 9.65054992597169e-06, "loss": 0.5287, "step": 982 }, { "epoch": 0.48802283728434903, "grad_norm": 0.07332839129254907, "learning_rate": 9.64983164648992e-06, "loss": 0.5633, "step": 983 }, { "epoch": 0.4885192999875884, "grad_norm": 0.07099306409257887, "learning_rate": 9.649112656355936e-06, "loss": 0.5225, "step": 984 }, { "epoch": 0.48901576269082786, "grad_norm": 0.07467585354112222, "learning_rate": 9.648392955679624e-06, "loss": 0.5589, "step": 985 }, { "epoch": 0.48951222539406725, "grad_norm": 0.07407066249938493, "learning_rate": 9.647672544570981e-06, "loss": 0.5499, "step": 986 }, { "epoch": 0.4900086880973067, "grad_norm": 0.06921009250090104, "learning_rate": 9.64695142314011e-06, "loss": 0.5514, "step": 987 }, { "epoch": 0.49050515080054613, "grad_norm": 0.07365692083114214, "learning_rate": 9.646229591497222e-06, "loss": 0.5355, "step": 988 }, { "epoch": 0.4910016135037855, "grad_norm": 0.0718859256778461, "learning_rate": 9.645507049752637e-06, "loss": 0.4963, "step": 989 }, { "epoch": 0.49149807620702496, "grad_norm": 0.07324913526100718, "learning_rate": 9.644783798016785e-06, "loss": 0.5726, "step": 990 }, { "epoch": 0.49199453891026435, "grad_norm": 0.08062267007727522, "learning_rate": 9.644059836400203e-06, "loss": 0.5604, "step": 991 }, { "epoch": 0.4924910016135038, "grad_norm": 0.0721895312156475, "learning_rate": 9.643335165013536e-06, "loss": 0.5275, "step": 992 }, { "epoch": 0.49298746431674323, "grad_norm": 0.07242784574028176, "learning_rate": 9.642609783967539e-06, "loss": 0.5313, "step": 993 }, { "epoch": 0.4934839270199826, "grad_norm": 0.07572575082005072, "learning_rate": 9.641883693373077e-06, "loss": 0.5442, "step": 994 }, { "epoch": 0.49398038972322206, "grad_norm": 0.07228182174445916, "learning_rate": 9.641156893341117e-06, "loss": 0.5612, "step": 995 }, { "epoch": 0.49447685242646144, "grad_norm": 0.07245474432940825, "learning_rate": 9.640429383982743e-06, "loss": 0.522, "step": 996 }, { "epoch": 0.4949733151297009, "grad_norm": 0.07547168525249354, "learning_rate": 9.63970116540914e-06, "loss": 0.5637, "step": 997 }, { "epoch": 0.49546977783294033, "grad_norm": 0.07571162844471516, "learning_rate": 9.638972237731608e-06, "loss": 0.5458, "step": 998 }, { "epoch": 0.4959662405361797, "grad_norm": 0.06792756036208669, "learning_rate": 9.638242601061547e-06, "loss": 0.5037, "step": 999 }, { "epoch": 0.49646270323941916, "grad_norm": 0.07543208718463203, "learning_rate": 9.637512255510475e-06, "loss": 0.5627, "step": 1000 }, { "epoch": 0.49695916594265854, "grad_norm": 0.0744355804303214, "learning_rate": 9.63678120119001e-06, "loss": 0.5292, "step": 1001 }, { "epoch": 0.497455628645898, "grad_norm": 0.07343036450567166, "learning_rate": 9.636049438211883e-06, "loss": 0.5386, "step": 1002 }, { "epoch": 0.49795209134913737, "grad_norm": 0.0705676280818943, "learning_rate": 9.635316966687935e-06, "loss": 0.5206, "step": 1003 }, { "epoch": 0.4984485540523768, "grad_norm": 0.07682429996586587, "learning_rate": 9.63458378673011e-06, "loss": 0.5468, "step": 1004 }, { "epoch": 0.49894501675561626, "grad_norm": 0.07274415834069523, "learning_rate": 9.633849898450463e-06, "loss": 0.5605, "step": 1005 }, { "epoch": 0.49944147945885564, "grad_norm": 0.06851151007874164, "learning_rate": 9.633115301961156e-06, "loss": 0.4902, "step": 1006 }, { "epoch": 0.4999379421620951, "grad_norm": 0.07046304690001261, "learning_rate": 9.632379997374462e-06, "loss": 0.496, "step": 1007 }, { "epoch": 0.5004344048653345, "grad_norm": 0.07583845542191688, "learning_rate": 9.63164398480276e-06, "loss": 0.5543, "step": 1008 }, { "epoch": 0.5004344048653345, "eval_loss": 0.5403582453727722, "eval_runtime": 259.5561, "eval_samples_per_second": 116.942, "eval_steps_per_second": 14.621, "step": 1008 }, { "epoch": 0.5009308675685739, "grad_norm": 0.07170711724531288, "learning_rate": 9.630907264358538e-06, "loss": 0.5503, "step": 1009 }, { "epoch": 0.5014273302718133, "grad_norm": 0.07524070502561764, "learning_rate": 9.630169836154391e-06, "loss": 0.5661, "step": 1010 }, { "epoch": 0.5019237929750527, "grad_norm": 0.07608869983603515, "learning_rate": 9.629431700303025e-06, "loss": 0.5455, "step": 1011 }, { "epoch": 0.5024202556782922, "grad_norm": 0.07104754400679542, "learning_rate": 9.628692856917249e-06, "loss": 0.5146, "step": 1012 }, { "epoch": 0.5029167183815316, "grad_norm": 0.0782620771521833, "learning_rate": 9.627953306109985e-06, "loss": 0.5954, "step": 1013 }, { "epoch": 0.503413181084771, "grad_norm": 0.07064620528254402, "learning_rate": 9.627213047994265e-06, "loss": 0.5271, "step": 1014 }, { "epoch": 0.5039096437880104, "grad_norm": 0.0728348121611852, "learning_rate": 9.62647208268322e-06, "loss": 0.5188, "step": 1015 }, { "epoch": 0.5044061064912498, "grad_norm": 0.076035071497151, "learning_rate": 9.625730410290097e-06, "loss": 0.5236, "step": 1016 }, { "epoch": 0.5049025691944893, "grad_norm": 0.07007830649545077, "learning_rate": 9.624988030928248e-06, "loss": 0.539, "step": 1017 }, { "epoch": 0.5053990318977287, "grad_norm": 0.07422124598706549, "learning_rate": 9.624244944711137e-06, "loss": 0.5177, "step": 1018 }, { "epoch": 0.505895494600968, "grad_norm": 0.07464012356026203, "learning_rate": 9.623501151752329e-06, "loss": 0.5557, "step": 1019 }, { "epoch": 0.5063919573042075, "grad_norm": 0.07069286385577062, "learning_rate": 9.622756652165501e-06, "loss": 0.5034, "step": 1020 }, { "epoch": 0.5068884200074469, "grad_norm": 0.0731620752819763, "learning_rate": 9.622011446064439e-06, "loss": 0.5405, "step": 1021 }, { "epoch": 0.5073848827106864, "grad_norm": 0.0747099004906738, "learning_rate": 9.621265533563038e-06, "loss": 0.5481, "step": 1022 }, { "epoch": 0.5078813454139258, "grad_norm": 0.0724421928759601, "learning_rate": 9.620518914775295e-06, "loss": 0.53, "step": 1023 }, { "epoch": 0.5083778081171652, "grad_norm": 0.07314562621509882, "learning_rate": 9.61977158981532e-06, "loss": 0.5443, "step": 1024 }, { "epoch": 0.5088742708204046, "grad_norm": 0.07300052887850178, "learning_rate": 9.61902355879733e-06, "loss": 0.5279, "step": 1025 }, { "epoch": 0.509370733523644, "grad_norm": 0.06762167908780276, "learning_rate": 9.61827482183565e-06, "loss": 0.4825, "step": 1026 }, { "epoch": 0.5098671962268835, "grad_norm": 0.07544837802714413, "learning_rate": 9.617525379044712e-06, "loss": 0.5472, "step": 1027 }, { "epoch": 0.5103636589301229, "grad_norm": 0.07456712402407543, "learning_rate": 9.616775230539057e-06, "loss": 0.5526, "step": 1028 }, { "epoch": 0.5108601216333623, "grad_norm": 0.0686351546215355, "learning_rate": 9.61602437643333e-06, "loss": 0.5186, "step": 1029 }, { "epoch": 0.5113565843366017, "grad_norm": 0.07819276782161572, "learning_rate": 9.615272816842292e-06, "loss": 0.5653, "step": 1030 }, { "epoch": 0.5118530470398411, "grad_norm": 0.07382695755499852, "learning_rate": 9.614520551880802e-06, "loss": 0.5593, "step": 1031 }, { "epoch": 0.5123495097430806, "grad_norm": 0.07567047708008438, "learning_rate": 9.613767581663836e-06, "loss": 0.5334, "step": 1032 }, { "epoch": 0.51284597244632, "grad_norm": 0.07617923757435242, "learning_rate": 9.61301390630647e-06, "loss": 0.5528, "step": 1033 }, { "epoch": 0.5133424351495594, "grad_norm": 0.07073358897699591, "learning_rate": 9.612259525923893e-06, "loss": 0.5084, "step": 1034 }, { "epoch": 0.5138388978527988, "grad_norm": 0.08154535797533109, "learning_rate": 9.611504440631398e-06, "loss": 0.5354, "step": 1035 }, { "epoch": 0.5143353605560382, "grad_norm": 0.07259202484825346, "learning_rate": 9.610748650544391e-06, "loss": 0.531, "step": 1036 }, { "epoch": 0.5148318232592777, "grad_norm": 0.07867424820741396, "learning_rate": 9.609992155778377e-06, "loss": 0.5624, "step": 1037 }, { "epoch": 0.5153282859625171, "grad_norm": 0.07292637512788251, "learning_rate": 9.609234956448983e-06, "loss": 0.5455, "step": 1038 }, { "epoch": 0.5158247486657564, "grad_norm": 0.07415662463746647, "learning_rate": 9.608477052671926e-06, "loss": 0.5407, "step": 1039 }, { "epoch": 0.5163212113689959, "grad_norm": 0.07079322361068474, "learning_rate": 9.607718444563044e-06, "loss": 0.5477, "step": 1040 }, { "epoch": 0.5168176740722353, "grad_norm": 0.07276915746515902, "learning_rate": 9.606959132238276e-06, "loss": 0.5467, "step": 1041 }, { "epoch": 0.5173141367754748, "grad_norm": 0.07502606002689027, "learning_rate": 9.606199115813672e-06, "loss": 0.5336, "step": 1042 }, { "epoch": 0.5178105994787141, "grad_norm": 0.07326432550036611, "learning_rate": 9.605438395405388e-06, "loss": 0.557, "step": 1043 }, { "epoch": 0.5183070621819535, "grad_norm": 0.07453174120545882, "learning_rate": 9.604676971129687e-06, "loss": 0.5426, "step": 1044 }, { "epoch": 0.518803524885193, "grad_norm": 0.07528215530131163, "learning_rate": 9.603914843102941e-06, "loss": 0.5746, "step": 1045 }, { "epoch": 0.5192999875884324, "grad_norm": 0.07100176094569843, "learning_rate": 9.603152011441631e-06, "loss": 0.5121, "step": 1046 }, { "epoch": 0.5197964502916719, "grad_norm": 0.07327171771917512, "learning_rate": 9.602388476262342e-06, "loss": 0.507, "step": 1047 }, { "epoch": 0.5202929129949112, "grad_norm": 0.0748795466452681, "learning_rate": 9.601624237681769e-06, "loss": 0.5467, "step": 1048 }, { "epoch": 0.5207893756981506, "grad_norm": 0.07123730635813282, "learning_rate": 9.600859295816708e-06, "loss": 0.5062, "step": 1049 }, { "epoch": 0.5212858384013901, "grad_norm": 0.07552662977747024, "learning_rate": 9.600093650784077e-06, "loss": 0.541, "step": 1050 }, { "epoch": 0.5217823011046295, "grad_norm": 0.069532099508212, "learning_rate": 9.599327302700888e-06, "loss": 0.5098, "step": 1051 }, { "epoch": 0.522278763807869, "grad_norm": 0.07438165535163875, "learning_rate": 9.598560251684265e-06, "loss": 0.5462, "step": 1052 }, { "epoch": 0.5227752265111083, "grad_norm": 0.07334192360035025, "learning_rate": 9.59779249785144e-06, "loss": 0.5125, "step": 1053 }, { "epoch": 0.5232716892143477, "grad_norm": 0.0712734477574473, "learning_rate": 9.597024041319752e-06, "loss": 0.5463, "step": 1054 }, { "epoch": 0.5237681519175872, "grad_norm": 0.0737924898215157, "learning_rate": 9.596254882206645e-06, "loss": 0.5285, "step": 1055 }, { "epoch": 0.5242646146208266, "grad_norm": 0.07618318262965273, "learning_rate": 9.595485020629676e-06, "loss": 0.5492, "step": 1056 }, { "epoch": 0.5247610773240661, "grad_norm": 0.07006285871473625, "learning_rate": 9.594714456706504e-06, "loss": 0.504, "step": 1057 }, { "epoch": 0.5252575400273054, "grad_norm": 0.07399743684753544, "learning_rate": 9.593943190554899e-06, "loss": 0.5292, "step": 1058 }, { "epoch": 0.5257540027305448, "grad_norm": 0.07619518300934514, "learning_rate": 9.593171222292734e-06, "loss": 0.5795, "step": 1059 }, { "epoch": 0.5262504654337843, "grad_norm": 0.071100769018996, "learning_rate": 9.592398552037995e-06, "loss": 0.5385, "step": 1060 }, { "epoch": 0.5267469281370237, "grad_norm": 0.06999405161525964, "learning_rate": 9.59162517990877e-06, "loss": 0.5259, "step": 1061 }, { "epoch": 0.5272433908402632, "grad_norm": 0.0741871177310929, "learning_rate": 9.590851106023257e-06, "loss": 0.5522, "step": 1062 }, { "epoch": 0.5277398535435025, "grad_norm": 0.06944998603394864, "learning_rate": 9.590076330499763e-06, "loss": 0.5268, "step": 1063 }, { "epoch": 0.5282363162467419, "grad_norm": 0.07174352781617274, "learning_rate": 9.589300853456698e-06, "loss": 0.5529, "step": 1064 }, { "epoch": 0.5287327789499814, "grad_norm": 0.07175214299724526, "learning_rate": 9.58852467501258e-06, "loss": 0.5444, "step": 1065 }, { "epoch": 0.5292292416532208, "grad_norm": 0.07400820280039133, "learning_rate": 9.587747795286037e-06, "loss": 0.5185, "step": 1066 }, { "epoch": 0.5297257043564603, "grad_norm": 0.07475007237984427, "learning_rate": 9.586970214395804e-06, "loss": 0.5226, "step": 1067 }, { "epoch": 0.5302221670596996, "grad_norm": 0.07264770723943843, "learning_rate": 9.586191932460718e-06, "loss": 0.529, "step": 1068 }, { "epoch": 0.530718629762939, "grad_norm": 0.07050995046572377, "learning_rate": 9.58541294959973e-06, "loss": 0.4913, "step": 1069 }, { "epoch": 0.5312150924661785, "grad_norm": 0.07586926085270411, "learning_rate": 9.584633265931894e-06, "loss": 0.5405, "step": 1070 }, { "epoch": 0.5317115551694179, "grad_norm": 0.06959496775952001, "learning_rate": 9.583852881576372e-06, "loss": 0.5308, "step": 1071 }, { "epoch": 0.5322080178726574, "grad_norm": 0.078697865178032, "learning_rate": 9.583071796652434e-06, "loss": 0.5671, "step": 1072 }, { "epoch": 0.5327044805758967, "grad_norm": 0.0706600680730896, "learning_rate": 9.582290011279457e-06, "loss": 0.5489, "step": 1073 }, { "epoch": 0.5332009432791361, "grad_norm": 0.07534168289206372, "learning_rate": 9.581507525576922e-06, "loss": 0.502, "step": 1074 }, { "epoch": 0.5336974059823756, "grad_norm": 0.07268063895409936, "learning_rate": 9.58072433966442e-06, "loss": 0.5371, "step": 1075 }, { "epoch": 0.534193868685615, "grad_norm": 0.07665390496410063, "learning_rate": 9.57994045366165e-06, "loss": 0.5468, "step": 1076 }, { "epoch": 0.5346903313888545, "grad_norm": 0.06923978802164854, "learning_rate": 9.579155867688415e-06, "loss": 0.522, "step": 1077 }, { "epoch": 0.5351867940920938, "grad_norm": 0.07948336708054222, "learning_rate": 9.578370581864627e-06, "loss": 0.5454, "step": 1078 }, { "epoch": 0.5356832567953332, "grad_norm": 0.07172413843820762, "learning_rate": 9.577584596310305e-06, "loss": 0.5099, "step": 1079 }, { "epoch": 0.5361797194985727, "grad_norm": 0.07080159293682067, "learning_rate": 9.576797911145572e-06, "loss": 0.5212, "step": 1080 }, { "epoch": 0.5366761822018121, "grad_norm": 0.07070283593873587, "learning_rate": 9.576010526490662e-06, "loss": 0.5213, "step": 1081 }, { "epoch": 0.5371726449050515, "grad_norm": 0.07139818749133874, "learning_rate": 9.575222442465915e-06, "loss": 0.5458, "step": 1082 }, { "epoch": 0.5376691076082909, "grad_norm": 0.07288959818990044, "learning_rate": 9.574433659191775e-06, "loss": 0.5262, "step": 1083 }, { "epoch": 0.5381655703115303, "grad_norm": 0.0727858790407329, "learning_rate": 9.573644176788795e-06, "loss": 0.5592, "step": 1084 }, { "epoch": 0.5386620330147698, "grad_norm": 0.07284011673299268, "learning_rate": 9.572853995377635e-06, "loss": 0.5534, "step": 1085 }, { "epoch": 0.5391584957180092, "grad_norm": 0.07010630635054382, "learning_rate": 9.572063115079063e-06, "loss": 0.5031, "step": 1086 }, { "epoch": 0.5396549584212486, "grad_norm": 0.07812136413943721, "learning_rate": 9.57127153601395e-06, "loss": 0.5893, "step": 1087 }, { "epoch": 0.540151421124488, "grad_norm": 0.07326998894383631, "learning_rate": 9.57047925830328e-06, "loss": 0.5561, "step": 1088 }, { "epoch": 0.5406478838277274, "grad_norm": 0.06966971851917615, "learning_rate": 9.569686282068135e-06, "loss": 0.5104, "step": 1089 }, { "epoch": 0.5411443465309669, "grad_norm": 0.07507880630445724, "learning_rate": 9.568892607429712e-06, "loss": 0.5341, "step": 1090 }, { "epoch": 0.5416408092342063, "grad_norm": 0.07202610071319031, "learning_rate": 9.568098234509312e-06, "loss": 0.555, "step": 1091 }, { "epoch": 0.5421372719374457, "grad_norm": 0.07189145724012203, "learning_rate": 9.567303163428338e-06, "loss": 0.539, "step": 1092 }, { "epoch": 0.5426337346406851, "grad_norm": 0.07689989643038242, "learning_rate": 9.566507394308309e-06, "loss": 0.5525, "step": 1093 }, { "epoch": 0.5431301973439245, "grad_norm": 0.07067817954470944, "learning_rate": 9.565710927270843e-06, "loss": 0.5403, "step": 1094 }, { "epoch": 0.543626660047164, "grad_norm": 0.07050407504479753, "learning_rate": 9.564913762437667e-06, "loss": 0.5197, "step": 1095 }, { "epoch": 0.5441231227504034, "grad_norm": 0.07475858179014994, "learning_rate": 9.564115899930614e-06, "loss": 0.54, "step": 1096 }, { "epoch": 0.5446195854536428, "grad_norm": 0.07052195765532154, "learning_rate": 9.563317339871626e-06, "loss": 0.5485, "step": 1097 }, { "epoch": 0.5451160481568822, "grad_norm": 0.07254645987577656, "learning_rate": 9.562518082382751e-06, "loss": 0.5237, "step": 1098 }, { "epoch": 0.5456125108601216, "grad_norm": 0.07384503839511744, "learning_rate": 9.561718127586141e-06, "loss": 0.5251, "step": 1099 }, { "epoch": 0.5461089735633611, "grad_norm": 0.07695343336131556, "learning_rate": 9.560917475604057e-06, "loss": 0.5794, "step": 1100 }, { "epoch": 0.5466054362666005, "grad_norm": 0.07275898006626658, "learning_rate": 9.560116126558864e-06, "loss": 0.5573, "step": 1101 }, { "epoch": 0.5471018989698399, "grad_norm": 0.07389739244860348, "learning_rate": 9.559314080573038e-06, "loss": 0.5215, "step": 1102 }, { "epoch": 0.5475983616730793, "grad_norm": 0.07810033073554441, "learning_rate": 9.558511337769158e-06, "loss": 0.539, "step": 1103 }, { "epoch": 0.5480948243763187, "grad_norm": 0.0731889932716798, "learning_rate": 9.557707898269912e-06, "loss": 0.5211, "step": 1104 }, { "epoch": 0.5485912870795582, "grad_norm": 0.07607334826510194, "learning_rate": 9.55690376219809e-06, "loss": 0.5702, "step": 1105 }, { "epoch": 0.5490877497827976, "grad_norm": 0.07426832365368147, "learning_rate": 9.556098929676591e-06, "loss": 0.5429, "step": 1106 }, { "epoch": 0.549584212486037, "grad_norm": 0.07795795275763351, "learning_rate": 9.555293400828422e-06, "loss": 0.5335, "step": 1107 }, { "epoch": 0.5500806751892764, "grad_norm": 0.07490574809345175, "learning_rate": 9.554487175776697e-06, "loss": 0.5489, "step": 1108 }, { "epoch": 0.5505771378925158, "grad_norm": 0.07261446271483263, "learning_rate": 9.553680254644631e-06, "loss": 0.5622, "step": 1109 }, { "epoch": 0.5510736005957553, "grad_norm": 0.07465238791494326, "learning_rate": 9.552872637555553e-06, "loss": 0.5249, "step": 1110 }, { "epoch": 0.5515700632989947, "grad_norm": 0.07195004690331216, "learning_rate": 9.55206432463289e-06, "loss": 0.5016, "step": 1111 }, { "epoch": 0.552066526002234, "grad_norm": 0.07826854935318284, "learning_rate": 9.551255316000183e-06, "loss": 0.5317, "step": 1112 }, { "epoch": 0.5525629887054735, "grad_norm": 0.07676533756395308, "learning_rate": 9.550445611781073e-06, "loss": 0.569, "step": 1113 }, { "epoch": 0.5530594514087129, "grad_norm": 0.07932871190286644, "learning_rate": 9.549635212099315e-06, "loss": 0.5872, "step": 1114 }, { "epoch": 0.5535559141119524, "grad_norm": 0.0730298940147842, "learning_rate": 9.54882411707876e-06, "loss": 0.594, "step": 1115 }, { "epoch": 0.5540523768151917, "grad_norm": 0.07036776038592685, "learning_rate": 9.548012326843374e-06, "loss": 0.5188, "step": 1116 }, { "epoch": 0.5545488395184311, "grad_norm": 0.0714932448289326, "learning_rate": 9.547199841517228e-06, "loss": 0.5192, "step": 1117 }, { "epoch": 0.5550453022216706, "grad_norm": 0.0750650728139119, "learning_rate": 9.546386661224492e-06, "loss": 0.5213, "step": 1118 }, { "epoch": 0.55554176492491, "grad_norm": 0.0718806510750882, "learning_rate": 9.545572786089452e-06, "loss": 0.5466, "step": 1119 }, { "epoch": 0.5560382276281495, "grad_norm": 0.07230351199367406, "learning_rate": 9.544758216236494e-06, "loss": 0.5218, "step": 1120 }, { "epoch": 0.5565346903313888, "grad_norm": 0.06979791491219231, "learning_rate": 9.543942951790113e-06, "loss": 0.5168, "step": 1121 }, { "epoch": 0.5570311530346282, "grad_norm": 0.07957089363539423, "learning_rate": 9.543126992874909e-06, "loss": 0.5539, "step": 1122 }, { "epoch": 0.5575276157378677, "grad_norm": 0.07492299237904469, "learning_rate": 9.542310339615586e-06, "loss": 0.5481, "step": 1123 }, { "epoch": 0.5580240784411071, "grad_norm": 0.07431977064563716, "learning_rate": 9.541492992136958e-06, "loss": 0.5316, "step": 1124 }, { "epoch": 0.5585205411443466, "grad_norm": 0.07458363210156903, "learning_rate": 9.540674950563943e-06, "loss": 0.4946, "step": 1125 }, { "epoch": 0.5590170038475859, "grad_norm": 0.07055614119573127, "learning_rate": 9.539856215021568e-06, "loss": 0.5266, "step": 1126 }, { "epoch": 0.5595134665508253, "grad_norm": 0.07321974300708112, "learning_rate": 9.539036785634961e-06, "loss": 0.5345, "step": 1127 }, { "epoch": 0.5600099292540648, "grad_norm": 0.06997988509814153, "learning_rate": 9.53821666252936e-06, "loss": 0.543, "step": 1128 }, { "epoch": 0.5605063919573042, "grad_norm": 0.06901480956636961, "learning_rate": 9.537395845830105e-06, "loss": 0.5296, "step": 1129 }, { "epoch": 0.5610028546605437, "grad_norm": 0.06729539521159718, "learning_rate": 9.536574335662647e-06, "loss": 0.508, "step": 1130 }, { "epoch": 0.561499317363783, "grad_norm": 0.07692036381800664, "learning_rate": 9.535752132152542e-06, "loss": 0.5671, "step": 1131 }, { "epoch": 0.5619957800670224, "grad_norm": 0.07201167584301307, "learning_rate": 9.534929235425447e-06, "loss": 0.5745, "step": 1132 }, { "epoch": 0.5624922427702619, "grad_norm": 0.07778363775383548, "learning_rate": 9.53410564560713e-06, "loss": 0.5466, "step": 1133 }, { "epoch": 0.5629887054735013, "grad_norm": 0.07348871677637435, "learning_rate": 9.533281362823465e-06, "loss": 0.5215, "step": 1134 }, { "epoch": 0.5634851681767408, "grad_norm": 0.07291607632635455, "learning_rate": 9.532456387200431e-06, "loss": 0.5355, "step": 1135 }, { "epoch": 0.5639816308799801, "grad_norm": 0.07240540144940869, "learning_rate": 9.531630718864108e-06, "loss": 0.5696, "step": 1136 }, { "epoch": 0.5644780935832195, "grad_norm": 0.08262533186527253, "learning_rate": 9.53080435794069e-06, "loss": 0.5232, "step": 1137 }, { "epoch": 0.564974556286459, "grad_norm": 0.07723729011533378, "learning_rate": 9.52997730455647e-06, "loss": 0.5464, "step": 1138 }, { "epoch": 0.5654710189896984, "grad_norm": 0.07315058133761201, "learning_rate": 9.529149558837853e-06, "loss": 0.5389, "step": 1139 }, { "epoch": 0.5659674816929379, "grad_norm": 0.06782611091937678, "learning_rate": 9.528321120911345e-06, "loss": 0.4988, "step": 1140 }, { "epoch": 0.5664639443961772, "grad_norm": 0.07674985492643183, "learning_rate": 9.527491990903562e-06, "loss": 0.5416, "step": 1141 }, { "epoch": 0.5669604070994166, "grad_norm": 0.07480549990375804, "learning_rate": 9.526662168941219e-06, "loss": 0.5337, "step": 1142 }, { "epoch": 0.5674568698026561, "grad_norm": 0.07146162705187283, "learning_rate": 9.525831655151143e-06, "loss": 0.5153, "step": 1143 }, { "epoch": 0.5679533325058955, "grad_norm": 0.07313978051760994, "learning_rate": 9.525000449660264e-06, "loss": 0.5112, "step": 1144 }, { "epoch": 0.568449795209135, "grad_norm": 0.07549591293564042, "learning_rate": 9.524168552595621e-06, "loss": 0.5301, "step": 1145 }, { "epoch": 0.5689462579123743, "grad_norm": 0.07296790622958364, "learning_rate": 9.523335964084352e-06, "loss": 0.5089, "step": 1146 }, { "epoch": 0.5694427206156137, "grad_norm": 0.074992297201754, "learning_rate": 9.522502684253709e-06, "loss": 0.543, "step": 1147 }, { "epoch": 0.5699391833188532, "grad_norm": 0.07555928711395048, "learning_rate": 9.521668713231042e-06, "loss": 0.5485, "step": 1148 }, { "epoch": 0.5704356460220926, "grad_norm": 0.07238072731934045, "learning_rate": 9.520834051143814e-06, "loss": 0.5176, "step": 1149 }, { "epoch": 0.570932108725332, "grad_norm": 0.07105926905059287, "learning_rate": 9.519998698119586e-06, "loss": 0.5119, "step": 1150 }, { "epoch": 0.5714285714285714, "grad_norm": 0.07693800848614878, "learning_rate": 9.51916265428603e-06, "loss": 0.5527, "step": 1151 }, { "epoch": 0.5719250341318108, "grad_norm": 0.07346670069398563, "learning_rate": 9.518325919770919e-06, "loss": 0.574, "step": 1152 }, { "epoch": 0.5724214968350503, "grad_norm": 0.06977185406549447, "learning_rate": 9.517488494702141e-06, "loss": 0.5303, "step": 1153 }, { "epoch": 0.5729179595382897, "grad_norm": 0.07082572887503037, "learning_rate": 9.516650379207677e-06, "loss": 0.4947, "step": 1154 }, { "epoch": 0.573414422241529, "grad_norm": 0.07376675912920903, "learning_rate": 9.515811573415621e-06, "loss": 0.5449, "step": 1155 }, { "epoch": 0.5739108849447685, "grad_norm": 0.07164889694586393, "learning_rate": 9.514972077454171e-06, "loss": 0.5522, "step": 1156 }, { "epoch": 0.5744073476480079, "grad_norm": 0.07192965800935237, "learning_rate": 9.514131891451632e-06, "loss": 0.5016, "step": 1157 }, { "epoch": 0.5749038103512474, "grad_norm": 0.07115596024143077, "learning_rate": 9.513291015536413e-06, "loss": 0.516, "step": 1158 }, { "epoch": 0.5754002730544868, "grad_norm": 0.07566922011447912, "learning_rate": 9.512449449837026e-06, "loss": 0.5476, "step": 1159 }, { "epoch": 0.5758967357577262, "grad_norm": 0.07194571071415225, "learning_rate": 9.511607194482093e-06, "loss": 0.5302, "step": 1160 }, { "epoch": 0.5763931984609656, "grad_norm": 0.07100717600020252, "learning_rate": 9.510764249600339e-06, "loss": 0.5227, "step": 1161 }, { "epoch": 0.576889661164205, "grad_norm": 0.07209205885217734, "learning_rate": 9.509920615320593e-06, "loss": 0.5497, "step": 1162 }, { "epoch": 0.5773861238674445, "grad_norm": 0.07457087613777853, "learning_rate": 9.509076291771793e-06, "loss": 0.5196, "step": 1163 }, { "epoch": 0.5778825865706839, "grad_norm": 0.07480024464625332, "learning_rate": 9.508231279082978e-06, "loss": 0.5484, "step": 1164 }, { "epoch": 0.5783790492739233, "grad_norm": 0.07444790838817751, "learning_rate": 9.507385577383297e-06, "loss": 0.5677, "step": 1165 }, { "epoch": 0.5788755119771627, "grad_norm": 0.07413996735633248, "learning_rate": 9.506539186802e-06, "loss": 0.5577, "step": 1166 }, { "epoch": 0.5793719746804021, "grad_norm": 0.0750851696775246, "learning_rate": 9.505692107468446e-06, "loss": 0.5659, "step": 1167 }, { "epoch": 0.5798684373836416, "grad_norm": 0.07516933716182636, "learning_rate": 9.504844339512096e-06, "loss": 0.5069, "step": 1168 }, { "epoch": 0.580364900086881, "grad_norm": 0.07516454409413244, "learning_rate": 9.503995883062519e-06, "loss": 0.5497, "step": 1169 }, { "epoch": 0.5808613627901204, "grad_norm": 0.074536429781554, "learning_rate": 9.503146738249386e-06, "loss": 0.5364, "step": 1170 }, { "epoch": 0.5813578254933598, "grad_norm": 0.07812964313209408, "learning_rate": 9.502296905202479e-06, "loss": 0.5796, "step": 1171 }, { "epoch": 0.5818542881965992, "grad_norm": 0.07219971092833606, "learning_rate": 9.501446384051678e-06, "loss": 0.5287, "step": 1172 }, { "epoch": 0.5823507508998387, "grad_norm": 0.07519688233518797, "learning_rate": 9.500595174926972e-06, "loss": 0.5642, "step": 1173 }, { "epoch": 0.5828472136030781, "grad_norm": 0.07012348080736677, "learning_rate": 9.499743277958453e-06, "loss": 0.4809, "step": 1174 }, { "epoch": 0.5833436763063174, "grad_norm": 0.06959138377666908, "learning_rate": 9.498890693276326e-06, "loss": 0.5087, "step": 1175 }, { "epoch": 0.5838401390095569, "grad_norm": 0.07524711459895272, "learning_rate": 9.498037421010888e-06, "loss": 0.539, "step": 1176 }, { "epoch": 0.5843366017127963, "grad_norm": 0.07190994525714839, "learning_rate": 9.497183461292552e-06, "loss": 0.5616, "step": 1177 }, { "epoch": 0.5848330644160358, "grad_norm": 0.07368602441399731, "learning_rate": 9.49632881425183e-06, "loss": 0.5239, "step": 1178 }, { "epoch": 0.5853295271192752, "grad_norm": 0.07519071489636418, "learning_rate": 9.495473480019341e-06, "loss": 0.5277, "step": 1179 }, { "epoch": 0.5858259898225145, "grad_norm": 0.07510671666338055, "learning_rate": 9.494617458725812e-06, "loss": 0.5218, "step": 1180 }, { "epoch": 0.586322452525754, "grad_norm": 0.07479722093999988, "learning_rate": 9.493760750502068e-06, "loss": 0.5061, "step": 1181 }, { "epoch": 0.5868189152289934, "grad_norm": 0.07429647836890872, "learning_rate": 9.492903355479047e-06, "loss": 0.5201, "step": 1182 }, { "epoch": 0.5873153779322329, "grad_norm": 0.07243145250868639, "learning_rate": 9.492045273787787e-06, "loss": 0.5262, "step": 1183 }, { "epoch": 0.5878118406354722, "grad_norm": 0.07714760997242102, "learning_rate": 9.49118650555943e-06, "loss": 0.5683, "step": 1184 }, { "epoch": 0.5883083033387116, "grad_norm": 0.07303272362298142, "learning_rate": 9.490327050925225e-06, "loss": 0.5389, "step": 1185 }, { "epoch": 0.5888047660419511, "grad_norm": 0.07126612983172714, "learning_rate": 9.48946691001653e-06, "loss": 0.5098, "step": 1186 }, { "epoch": 0.5893012287451905, "grad_norm": 0.07285329202992465, "learning_rate": 9.4886060829648e-06, "loss": 0.5297, "step": 1187 }, { "epoch": 0.58979769144843, "grad_norm": 0.07647879877688098, "learning_rate": 9.487744569901598e-06, "loss": 0.5249, "step": 1188 }, { "epoch": 0.5902941541516693, "grad_norm": 0.07188442696621349, "learning_rate": 9.486882370958596e-06, "loss": 0.5199, "step": 1189 }, { "epoch": 0.5907906168549087, "grad_norm": 0.07496456315600855, "learning_rate": 9.486019486267563e-06, "loss": 0.5416, "step": 1190 }, { "epoch": 0.5912870795581482, "grad_norm": 0.07576728726087202, "learning_rate": 9.485155915960383e-06, "loss": 0.5398, "step": 1191 }, { "epoch": 0.5917835422613876, "grad_norm": 0.07305444440941704, "learning_rate": 9.484291660169031e-06, "loss": 0.5462, "step": 1192 }, { "epoch": 0.5922800049646271, "grad_norm": 0.07182476967325477, "learning_rate": 9.4834267190256e-06, "loss": 0.5501, "step": 1193 }, { "epoch": 0.5927764676678664, "grad_norm": 0.07024451668694567, "learning_rate": 9.482561092662284e-06, "loss": 0.532, "step": 1194 }, { "epoch": 0.5932729303711058, "grad_norm": 0.07155857699067651, "learning_rate": 9.481694781211375e-06, "loss": 0.5501, "step": 1195 }, { "epoch": 0.5937693930743453, "grad_norm": 0.07197262136863748, "learning_rate": 9.480827784805278e-06, "loss": 0.5288, "step": 1196 }, { "epoch": 0.5942658557775847, "grad_norm": 0.07585902794239587, "learning_rate": 9.4799601035765e-06, "loss": 0.5626, "step": 1197 }, { "epoch": 0.5947623184808242, "grad_norm": 0.07401948158206198, "learning_rate": 9.479091737657649e-06, "loss": 0.5111, "step": 1198 }, { "epoch": 0.5952587811840635, "grad_norm": 0.07407351265240464, "learning_rate": 9.478222687181444e-06, "loss": 0.5287, "step": 1199 }, { "epoch": 0.5957552438873029, "grad_norm": 0.07262590404108443, "learning_rate": 9.477352952280703e-06, "loss": 0.5257, "step": 1200 }, { "epoch": 0.5962517065905424, "grad_norm": 0.07358832154964039, "learning_rate": 9.476482533088351e-06, "loss": 0.5292, "step": 1201 }, { "epoch": 0.5967481692937818, "grad_norm": 0.07178211125756767, "learning_rate": 9.475611429737422e-06, "loss": 0.5368, "step": 1202 }, { "epoch": 0.5972446319970213, "grad_norm": 0.07339341623133162, "learning_rate": 9.474739642361043e-06, "loss": 0.5549, "step": 1203 }, { "epoch": 0.5977410947002606, "grad_norm": 0.07412432663341953, "learning_rate": 9.473867171092458e-06, "loss": 0.5407, "step": 1204 }, { "epoch": 0.5982375574035, "grad_norm": 0.07436474908834359, "learning_rate": 9.47299401606501e-06, "loss": 0.5611, "step": 1205 }, { "epoch": 0.5987340201067395, "grad_norm": 0.07736756388873084, "learning_rate": 9.472120177412147e-06, "loss": 0.5572, "step": 1206 }, { "epoch": 0.5992304828099789, "grad_norm": 0.07576101680111423, "learning_rate": 9.471245655267419e-06, "loss": 0.5409, "step": 1207 }, { "epoch": 0.5997269455132184, "grad_norm": 0.07436447613810461, "learning_rate": 9.470370449764481e-06, "loss": 0.5538, "step": 1208 }, { "epoch": 0.6002234082164577, "grad_norm": 0.07528145343081451, "learning_rate": 9.469494561037097e-06, "loss": 0.5238, "step": 1209 }, { "epoch": 0.6007198709196971, "grad_norm": 0.06902241269601483, "learning_rate": 9.468617989219136e-06, "loss": 0.5147, "step": 1210 }, { "epoch": 0.6012163336229366, "grad_norm": 0.07674309299936645, "learning_rate": 9.46774073444456e-06, "loss": 0.5588, "step": 1211 }, { "epoch": 0.601712796326176, "grad_norm": 0.07272454622505915, "learning_rate": 9.46686279684745e-06, "loss": 0.5379, "step": 1212 }, { "epoch": 0.6022092590294155, "grad_norm": 0.07281798474656986, "learning_rate": 9.465984176561982e-06, "loss": 0.5513, "step": 1213 }, { "epoch": 0.6027057217326548, "grad_norm": 0.07450970928418141, "learning_rate": 9.46510487372244e-06, "loss": 0.544, "step": 1214 }, { "epoch": 0.6032021844358942, "grad_norm": 0.08725930469560868, "learning_rate": 9.464224888463208e-06, "loss": 0.5453, "step": 1215 }, { "epoch": 0.6036986471391337, "grad_norm": 0.07862582216165505, "learning_rate": 9.463344220918781e-06, "loss": 0.5306, "step": 1216 }, { "epoch": 0.6041951098423731, "grad_norm": 0.07528725574920143, "learning_rate": 9.462462871223755e-06, "loss": 0.5449, "step": 1217 }, { "epoch": 0.6046915725456126, "grad_norm": 0.07316670669943198, "learning_rate": 9.461580839512829e-06, "loss": 0.5591, "step": 1218 }, { "epoch": 0.6051880352488519, "grad_norm": 0.07230039876916608, "learning_rate": 9.46069812592081e-06, "loss": 0.4814, "step": 1219 }, { "epoch": 0.6056844979520913, "grad_norm": 0.07364861679682433, "learning_rate": 9.459814730582599e-06, "loss": 0.5321, "step": 1220 }, { "epoch": 0.6061809606553308, "grad_norm": 0.07241383458550736, "learning_rate": 9.458930653633218e-06, "loss": 0.5114, "step": 1221 }, { "epoch": 0.6066774233585702, "grad_norm": 0.0731938940819185, "learning_rate": 9.45804589520778e-06, "loss": 0.577, "step": 1222 }, { "epoch": 0.6071738860618096, "grad_norm": 0.0723275474160098, "learning_rate": 9.457160455441505e-06, "loss": 0.5483, "step": 1223 }, { "epoch": 0.607670348765049, "grad_norm": 0.07637854588101618, "learning_rate": 9.45627433446972e-06, "loss": 0.5549, "step": 1224 }, { "epoch": 0.6081668114682884, "grad_norm": 0.07879115938829902, "learning_rate": 9.455387532427854e-06, "loss": 0.6051, "step": 1225 }, { "epoch": 0.6086632741715279, "grad_norm": 0.07309959660536655, "learning_rate": 9.45450004945144e-06, "loss": 0.5086, "step": 1226 }, { "epoch": 0.6091597368747673, "grad_norm": 0.07694131558099677, "learning_rate": 9.453611885676115e-06, "loss": 0.5543, "step": 1227 }, { "epoch": 0.6096561995780067, "grad_norm": 0.0755960681922189, "learning_rate": 9.452723041237624e-06, "loss": 0.5145, "step": 1228 }, { "epoch": 0.6101526622812461, "grad_norm": 0.07295682109883318, "learning_rate": 9.45183351627181e-06, "loss": 0.5224, "step": 1229 }, { "epoch": 0.6106491249844855, "grad_norm": 0.07128517723475501, "learning_rate": 9.45094331091462e-06, "loss": 0.5116, "step": 1230 }, { "epoch": 0.611145587687725, "grad_norm": 0.08115875461185187, "learning_rate": 9.450052425302112e-06, "loss": 0.513, "step": 1231 }, { "epoch": 0.6116420503909644, "grad_norm": 0.07377422236820412, "learning_rate": 9.44916085957044e-06, "loss": 0.5435, "step": 1232 }, { "epoch": 0.6121385130942038, "grad_norm": 0.07775316082126749, "learning_rate": 9.448268613855871e-06, "loss": 0.5434, "step": 1233 }, { "epoch": 0.6126349757974432, "grad_norm": 0.0731516601538171, "learning_rate": 9.447375688294765e-06, "loss": 0.5603, "step": 1234 }, { "epoch": 0.6131314385006826, "grad_norm": 0.07441731588595143, "learning_rate": 9.446482083023594e-06, "loss": 0.5506, "step": 1235 }, { "epoch": 0.6136279012039221, "grad_norm": 0.07198200814304813, "learning_rate": 9.44558779817893e-06, "loss": 0.5357, "step": 1236 }, { "epoch": 0.6141243639071615, "grad_norm": 0.07315295770611031, "learning_rate": 9.444692833897451e-06, "loss": 0.5532, "step": 1237 }, { "epoch": 0.6146208266104008, "grad_norm": 0.07174351007581338, "learning_rate": 9.443797190315938e-06, "loss": 0.5331, "step": 1238 }, { "epoch": 0.6151172893136403, "grad_norm": 0.06979723079086682, "learning_rate": 9.442900867571274e-06, "loss": 0.5135, "step": 1239 }, { "epoch": 0.6156137520168797, "grad_norm": 0.07178731116164377, "learning_rate": 9.442003865800448e-06, "loss": 0.4953, "step": 1240 }, { "epoch": 0.6161102147201192, "grad_norm": 0.07054593892362157, "learning_rate": 9.441106185140557e-06, "loss": 0.5395, "step": 1241 }, { "epoch": 0.6166066774233586, "grad_norm": 0.0736531086968794, "learning_rate": 9.44020782572879e-06, "loss": 0.5721, "step": 1242 }, { "epoch": 0.617103140126598, "grad_norm": 0.07586757881170157, "learning_rate": 9.43930878770245e-06, "loss": 0.5557, "step": 1243 }, { "epoch": 0.6175996028298374, "grad_norm": 0.07191430014087756, "learning_rate": 9.438409071198944e-06, "loss": 0.5094, "step": 1244 }, { "epoch": 0.6180960655330768, "grad_norm": 0.07452596818342855, "learning_rate": 9.437508676355774e-06, "loss": 0.5543, "step": 1245 }, { "epoch": 0.6185925282363163, "grad_norm": 0.08010039702376043, "learning_rate": 9.436607603310553e-06, "loss": 0.5622, "step": 1246 }, { "epoch": 0.6190889909395557, "grad_norm": 0.07695327823931182, "learning_rate": 9.435705852200994e-06, "loss": 0.5231, "step": 1247 }, { "epoch": 0.619585453642795, "grad_norm": 0.07276200871027631, "learning_rate": 9.434803423164917e-06, "loss": 0.518, "step": 1248 }, { "epoch": 0.6200819163460345, "grad_norm": 0.07104138436967752, "learning_rate": 9.433900316340246e-06, "loss": 0.4994, "step": 1249 }, { "epoch": 0.6205783790492739, "grad_norm": 0.07404188758170115, "learning_rate": 9.432996531865001e-06, "loss": 0.5293, "step": 1250 }, { "epoch": 0.6210748417525134, "grad_norm": 0.0716565229661667, "learning_rate": 9.432092069877315e-06, "loss": 0.5112, "step": 1251 }, { "epoch": 0.6215713044557528, "grad_norm": 0.08010756662952559, "learning_rate": 9.431186930515419e-06, "loss": 0.5259, "step": 1252 }, { "epoch": 0.6220677671589921, "grad_norm": 0.07413909114414897, "learning_rate": 9.430281113917649e-06, "loss": 0.5255, "step": 1253 }, { "epoch": 0.6225642298622316, "grad_norm": 0.07327540089614044, "learning_rate": 9.429374620222448e-06, "loss": 0.53, "step": 1254 }, { "epoch": 0.623060692565471, "grad_norm": 0.07091853000727939, "learning_rate": 9.428467449568352e-06, "loss": 0.5135, "step": 1255 }, { "epoch": 0.6235571552687105, "grad_norm": 0.07410900989671004, "learning_rate": 9.427559602094011e-06, "loss": 0.5276, "step": 1256 }, { "epoch": 0.6240536179719498, "grad_norm": 0.07515041114499699, "learning_rate": 9.426651077938178e-06, "loss": 0.528, "step": 1257 }, { "epoch": 0.6245500806751892, "grad_norm": 0.07444010187119487, "learning_rate": 9.425741877239703e-06, "loss": 0.5479, "step": 1258 }, { "epoch": 0.6250465433784287, "grad_norm": 0.07370774747030723, "learning_rate": 9.424832000137542e-06, "loss": 0.5541, "step": 1259 }, { "epoch": 0.6255430060816681, "grad_norm": 0.0714035032190127, "learning_rate": 9.423921446770759e-06, "loss": 0.4864, "step": 1260 }, { "epoch": 0.6260394687849076, "grad_norm": 0.07730674579399738, "learning_rate": 9.423010217278515e-06, "loss": 0.5534, "step": 1261 }, { "epoch": 0.6265359314881469, "grad_norm": 0.07138774544095353, "learning_rate": 9.422098311800074e-06, "loss": 0.5399, "step": 1262 }, { "epoch": 0.6270323941913863, "grad_norm": 0.06852941610757017, "learning_rate": 9.421185730474811e-06, "loss": 0.519, "step": 1263 }, { "epoch": 0.6275288568946258, "grad_norm": 0.07367955240222782, "learning_rate": 9.420272473442198e-06, "loss": 0.5456, "step": 1264 }, { "epoch": 0.6280253195978652, "grad_norm": 0.07541228844186217, "learning_rate": 9.41935854084181e-06, "loss": 0.5314, "step": 1265 }, { "epoch": 0.6285217823011047, "grad_norm": 0.07435558350951817, "learning_rate": 9.418443932813328e-06, "loss": 0.5269, "step": 1266 }, { "epoch": 0.629018245004344, "grad_norm": 0.0728163151724603, "learning_rate": 9.417528649496535e-06, "loss": 0.547, "step": 1267 }, { "epoch": 0.6295147077075834, "grad_norm": 0.07338689354836506, "learning_rate": 9.41661269103132e-06, "loss": 0.5555, "step": 1268 }, { "epoch": 0.6300111704108229, "grad_norm": 0.07076974627201735, "learning_rate": 9.415696057557667e-06, "loss": 0.5584, "step": 1269 }, { "epoch": 0.6305076331140623, "grad_norm": 0.07256759974301101, "learning_rate": 9.414778749215673e-06, "loss": 0.5451, "step": 1270 }, { "epoch": 0.6310040958173018, "grad_norm": 0.07319635272423798, "learning_rate": 9.413860766145533e-06, "loss": 0.5177, "step": 1271 }, { "epoch": 0.6315005585205411, "grad_norm": 0.07689312407289102, "learning_rate": 9.412942108487545e-06, "loss": 0.5209, "step": 1272 }, { "epoch": 0.6319970212237805, "grad_norm": 0.0734130755794059, "learning_rate": 9.412022776382113e-06, "loss": 0.5675, "step": 1273 }, { "epoch": 0.63249348392702, "grad_norm": 0.07228991038198292, "learning_rate": 9.411102769969742e-06, "loss": 0.5065, "step": 1274 }, { "epoch": 0.6329899466302594, "grad_norm": 0.07467310042619545, "learning_rate": 9.410182089391039e-06, "loss": 0.5473, "step": 1275 }, { "epoch": 0.6334864093334989, "grad_norm": 0.0730424746593062, "learning_rate": 9.409260734786713e-06, "loss": 0.5342, "step": 1276 }, { "epoch": 0.6339828720367382, "grad_norm": 0.07040514946923192, "learning_rate": 9.408338706297581e-06, "loss": 0.5214, "step": 1277 }, { "epoch": 0.6344793347399776, "grad_norm": 0.07277326050517614, "learning_rate": 9.407416004064562e-06, "loss": 0.5741, "step": 1278 }, { "epoch": 0.6349757974432171, "grad_norm": 0.07219826740008305, "learning_rate": 9.406492628228674e-06, "loss": 0.5273, "step": 1279 }, { "epoch": 0.6354722601464565, "grad_norm": 0.06850912173296844, "learning_rate": 9.405568578931042e-06, "loss": 0.5055, "step": 1280 }, { "epoch": 0.635968722849696, "grad_norm": 0.07229403813315836, "learning_rate": 9.404643856312887e-06, "loss": 0.5369, "step": 1281 }, { "epoch": 0.6364651855529353, "grad_norm": 0.06994299050471268, "learning_rate": 9.403718460515544e-06, "loss": 0.5254, "step": 1282 }, { "epoch": 0.6369616482561747, "grad_norm": 0.06823196465634472, "learning_rate": 9.402792391680443e-06, "loss": 0.4799, "step": 1283 }, { "epoch": 0.6374581109594142, "grad_norm": 0.07588187341041024, "learning_rate": 9.401865649949116e-06, "loss": 0.5718, "step": 1284 }, { "epoch": 0.6379545736626536, "grad_norm": 0.07594938287429129, "learning_rate": 9.400938235463203e-06, "loss": 0.526, "step": 1285 }, { "epoch": 0.6384510363658931, "grad_norm": 0.07332308146260103, "learning_rate": 9.400010148364447e-06, "loss": 0.5429, "step": 1286 }, { "epoch": 0.6389474990691324, "grad_norm": 0.07072214782920225, "learning_rate": 9.399081388794688e-06, "loss": 0.5202, "step": 1287 }, { "epoch": 0.6394439617723718, "grad_norm": 0.0746382662046732, "learning_rate": 9.398151956895872e-06, "loss": 0.5072, "step": 1288 }, { "epoch": 0.6399404244756113, "grad_norm": 0.0735533069677649, "learning_rate": 9.397221852810049e-06, "loss": 0.5329, "step": 1289 }, { "epoch": 0.6404368871788507, "grad_norm": 0.07615515782140855, "learning_rate": 9.396291076679369e-06, "loss": 0.5768, "step": 1290 }, { "epoch": 0.64093334988209, "grad_norm": 0.07001774866514658, "learning_rate": 9.395359628646087e-06, "loss": 0.5257, "step": 1291 }, { "epoch": 0.6414298125853295, "grad_norm": 0.07375326519499116, "learning_rate": 9.39442750885256e-06, "loss": 0.5092, "step": 1292 }, { "epoch": 0.6419262752885689, "grad_norm": 0.07259307932350538, "learning_rate": 9.393494717441248e-06, "loss": 0.5599, "step": 1293 }, { "epoch": 0.6424227379918084, "grad_norm": 0.07088258695341772, "learning_rate": 9.392561254554712e-06, "loss": 0.4888, "step": 1294 }, { "epoch": 0.6429192006950478, "grad_norm": 0.07264741884460572, "learning_rate": 9.391627120335618e-06, "loss": 0.5168, "step": 1295 }, { "epoch": 0.6434156633982872, "grad_norm": 0.07619595372888417, "learning_rate": 9.390692314926734e-06, "loss": 0.5409, "step": 1296 }, { "epoch": 0.6439121261015266, "grad_norm": 0.07051164086514715, "learning_rate": 9.389756838470929e-06, "loss": 0.5437, "step": 1297 }, { "epoch": 0.644408588804766, "grad_norm": 0.07311117048289703, "learning_rate": 9.388820691111175e-06, "loss": 0.4895, "step": 1298 }, { "epoch": 0.6449050515080055, "grad_norm": 0.07300862751347871, "learning_rate": 9.387883872990547e-06, "loss": 0.5208, "step": 1299 }, { "epoch": 0.6454015142112449, "grad_norm": 0.07600173346135768, "learning_rate": 9.386946384252225e-06, "loss": 0.5377, "step": 1300 }, { "epoch": 0.6458979769144843, "grad_norm": 0.06913090974935486, "learning_rate": 9.386008225039486e-06, "loss": 0.549, "step": 1301 }, { "epoch": 0.6463944396177237, "grad_norm": 0.07293082318748247, "learning_rate": 9.385069395495715e-06, "loss": 0.5494, "step": 1302 }, { "epoch": 0.6468909023209631, "grad_norm": 0.07192021828481457, "learning_rate": 9.384129895764396e-06, "loss": 0.5539, "step": 1303 }, { "epoch": 0.6473873650242026, "grad_norm": 0.0677715996313107, "learning_rate": 9.383189725989117e-06, "loss": 0.4885, "step": 1304 }, { "epoch": 0.647883827727442, "grad_norm": 0.06993925668460728, "learning_rate": 9.382248886313568e-06, "loss": 0.5666, "step": 1305 }, { "epoch": 0.6483802904306813, "grad_norm": 0.0732675510502527, "learning_rate": 9.381307376881538e-06, "loss": 0.5244, "step": 1306 }, { "epoch": 0.6488767531339208, "grad_norm": 0.07833454832413211, "learning_rate": 9.380365197836927e-06, "loss": 0.5291, "step": 1307 }, { "epoch": 0.6493732158371602, "grad_norm": 0.07246897473704703, "learning_rate": 9.379422349323728e-06, "loss": 0.5336, "step": 1308 }, { "epoch": 0.6498696785403997, "grad_norm": 0.08306837527169122, "learning_rate": 9.378478831486042e-06, "loss": 0.5433, "step": 1309 }, { "epoch": 0.6503661412436391, "grad_norm": 0.07296698699268978, "learning_rate": 9.37753464446807e-06, "loss": 0.5181, "step": 1310 }, { "epoch": 0.6508626039468784, "grad_norm": 0.07287062275676794, "learning_rate": 9.376589788414116e-06, "loss": 0.5266, "step": 1311 }, { "epoch": 0.6513590666501179, "grad_norm": 0.07284381245821252, "learning_rate": 9.375644263468586e-06, "loss": 0.547, "step": 1312 }, { "epoch": 0.6518555293533573, "grad_norm": 0.07656099448383531, "learning_rate": 9.374698069775989e-06, "loss": 0.5341, "step": 1313 }, { "epoch": 0.6523519920565968, "grad_norm": 0.0708265851795634, "learning_rate": 9.373751207480935e-06, "loss": 0.5366, "step": 1314 }, { "epoch": 0.6528484547598362, "grad_norm": 0.07593480603910471, "learning_rate": 9.372803676728138e-06, "loss": 0.5393, "step": 1315 }, { "epoch": 0.6533449174630755, "grad_norm": 0.07672621233019099, "learning_rate": 9.371855477662409e-06, "loss": 0.5437, "step": 1316 }, { "epoch": 0.653841380166315, "grad_norm": 0.07057674890321015, "learning_rate": 9.37090661042867e-06, "loss": 0.5431, "step": 1317 }, { "epoch": 0.6543378428695544, "grad_norm": 0.07035606365375213, "learning_rate": 9.369957075171935e-06, "loss": 0.5158, "step": 1318 }, { "epoch": 0.6548343055727939, "grad_norm": 0.07424226230822006, "learning_rate": 9.369006872037329e-06, "loss": 0.533, "step": 1319 }, { "epoch": 0.6553307682760333, "grad_norm": 0.07381144539992311, "learning_rate": 9.368056001170077e-06, "loss": 0.5094, "step": 1320 }, { "epoch": 0.6558272309792726, "grad_norm": 0.06949672074730547, "learning_rate": 9.367104462715498e-06, "loss": 0.5161, "step": 1321 }, { "epoch": 0.6563236936825121, "grad_norm": 0.07454285891198276, "learning_rate": 9.366152256819025e-06, "loss": 0.5661, "step": 1322 }, { "epoch": 0.6568201563857515, "grad_norm": 0.0710626384867322, "learning_rate": 9.365199383626184e-06, "loss": 0.5173, "step": 1323 }, { "epoch": 0.657316619088991, "grad_norm": 0.07589984877939893, "learning_rate": 9.36424584328261e-06, "loss": 0.5494, "step": 1324 }, { "epoch": 0.6578130817922304, "grad_norm": 0.07614774842121315, "learning_rate": 9.363291635934033e-06, "loss": 0.5115, "step": 1325 }, { "epoch": 0.6583095444954697, "grad_norm": 0.07569403053633093, "learning_rate": 9.36233676172629e-06, "loss": 0.5351, "step": 1326 }, { "epoch": 0.6588060071987092, "grad_norm": 0.07563821512274932, "learning_rate": 9.361381220805317e-06, "loss": 0.5736, "step": 1327 }, { "epoch": 0.6593024699019486, "grad_norm": 0.07013748950442422, "learning_rate": 9.360425013317153e-06, "loss": 0.4882, "step": 1328 }, { "epoch": 0.6597989326051881, "grad_norm": 0.07779315687414581, "learning_rate": 9.359468139407942e-06, "loss": 0.5296, "step": 1329 }, { "epoch": 0.6602953953084274, "grad_norm": 0.07400049233265299, "learning_rate": 9.358510599223922e-06, "loss": 0.5161, "step": 1330 }, { "epoch": 0.6607918580116668, "grad_norm": 0.06767323424859467, "learning_rate": 9.357552392911444e-06, "loss": 0.5141, "step": 1331 }, { "epoch": 0.6612883207149063, "grad_norm": 0.07102033958900007, "learning_rate": 9.356593520616948e-06, "loss": 0.5587, "step": 1332 }, { "epoch": 0.6617847834181457, "grad_norm": 0.07588371408730442, "learning_rate": 9.355633982486986e-06, "loss": 0.5885, "step": 1333 }, { "epoch": 0.6622812461213852, "grad_norm": 0.07715392657364961, "learning_rate": 9.354673778668206e-06, "loss": 0.5403, "step": 1334 }, { "epoch": 0.6627777088246245, "grad_norm": 0.07455958529451663, "learning_rate": 9.353712909307361e-06, "loss": 0.5134, "step": 1335 }, { "epoch": 0.6632741715278639, "grad_norm": 0.07191533309938422, "learning_rate": 9.352751374551305e-06, "loss": 0.5087, "step": 1336 }, { "epoch": 0.6637706342311034, "grad_norm": 0.07186294289718925, "learning_rate": 9.351789174546993e-06, "loss": 0.539, "step": 1337 }, { "epoch": 0.6642670969343428, "grad_norm": 0.0754358841299627, "learning_rate": 9.350826309441481e-06, "loss": 0.4992, "step": 1338 }, { "epoch": 0.6647635596375823, "grad_norm": 0.07627703380934699, "learning_rate": 9.349862779381926e-06, "loss": 0.5061, "step": 1339 }, { "epoch": 0.6652600223408216, "grad_norm": 0.0734366299613896, "learning_rate": 9.348898584515593e-06, "loss": 0.5316, "step": 1340 }, { "epoch": 0.665756485044061, "grad_norm": 0.07674901990204858, "learning_rate": 9.347933724989839e-06, "loss": 0.5538, "step": 1341 }, { "epoch": 0.6662529477473005, "grad_norm": 0.07020692260151944, "learning_rate": 9.346968200952129e-06, "loss": 0.5035, "step": 1342 }, { "epoch": 0.6667494104505399, "grad_norm": 0.07093936922660374, "learning_rate": 9.346002012550027e-06, "loss": 0.5251, "step": 1343 }, { "epoch": 0.6672458731537794, "grad_norm": 0.07673979882696808, "learning_rate": 9.345035159931201e-06, "loss": 0.5519, "step": 1344 }, { "epoch": 0.6677423358570187, "grad_norm": 0.06821342541699567, "learning_rate": 9.344067643243419e-06, "loss": 0.5204, "step": 1345 }, { "epoch": 0.6682387985602581, "grad_norm": 0.07188197485984522, "learning_rate": 9.343099462634548e-06, "loss": 0.5607, "step": 1346 }, { "epoch": 0.6687352612634976, "grad_norm": 0.07745552205514158, "learning_rate": 9.34213061825256e-06, "loss": 0.5732, "step": 1347 }, { "epoch": 0.669231723966737, "grad_norm": 0.07068348754676128, "learning_rate": 9.34116111024553e-06, "loss": 0.4946, "step": 1348 }, { "epoch": 0.6697281866699765, "grad_norm": 0.07112019014542299, "learning_rate": 9.340190938761628e-06, "loss": 0.5266, "step": 1349 }, { "epoch": 0.6702246493732158, "grad_norm": 0.07273078228929754, "learning_rate": 9.339220103949132e-06, "loss": 0.5487, "step": 1350 }, { "epoch": 0.6707211120764552, "grad_norm": 0.07688997752683249, "learning_rate": 9.338248605956416e-06, "loss": 0.5285, "step": 1351 }, { "epoch": 0.6712175747796947, "grad_norm": 0.06971510713006104, "learning_rate": 9.337276444931959e-06, "loss": 0.4981, "step": 1352 }, { "epoch": 0.6717140374829341, "grad_norm": 0.0722029002376623, "learning_rate": 9.33630362102434e-06, "loss": 0.5059, "step": 1353 }, { "epoch": 0.6722105001861736, "grad_norm": 0.08013613409130184, "learning_rate": 9.335330134382242e-06, "loss": 0.5564, "step": 1354 }, { "epoch": 0.6727069628894129, "grad_norm": 0.07975096625901282, "learning_rate": 9.334355985154444e-06, "loss": 0.5727, "step": 1355 }, { "epoch": 0.6732034255926523, "grad_norm": 0.07276394010589855, "learning_rate": 9.333381173489828e-06, "loss": 0.5428, "step": 1356 }, { "epoch": 0.6736998882958918, "grad_norm": 0.07450651270520357, "learning_rate": 9.332405699537382e-06, "loss": 0.5589, "step": 1357 }, { "epoch": 0.6741963509991312, "grad_norm": 0.07503406849766957, "learning_rate": 9.331429563446189e-06, "loss": 0.5258, "step": 1358 }, { "epoch": 0.6746928137023707, "grad_norm": 0.07217189294540513, "learning_rate": 9.330452765365436e-06, "loss": 0.5145, "step": 1359 }, { "epoch": 0.67518927640561, "grad_norm": 0.07214669475300672, "learning_rate": 9.32947530544441e-06, "loss": 0.5306, "step": 1360 }, { "epoch": 0.6756857391088494, "grad_norm": 0.07055049962573819, "learning_rate": 9.328497183832505e-06, "loss": 0.5267, "step": 1361 }, { "epoch": 0.6761822018120889, "grad_norm": 0.07106910357432814, "learning_rate": 9.327518400679206e-06, "loss": 0.52, "step": 1362 }, { "epoch": 0.6766786645153283, "grad_norm": 0.07301694174327626, "learning_rate": 9.326538956134106e-06, "loss": 0.5302, "step": 1363 }, { "epoch": 0.6771751272185677, "grad_norm": 0.07032215298745946, "learning_rate": 9.325558850346897e-06, "loss": 0.5197, "step": 1364 }, { "epoch": 0.6776715899218071, "grad_norm": 0.06935838081944155, "learning_rate": 9.324578083467372e-06, "loss": 0.5363, "step": 1365 }, { "epoch": 0.6781680526250465, "grad_norm": 0.07423776754352923, "learning_rate": 9.323596655645427e-06, "loss": 0.5485, "step": 1366 }, { "epoch": 0.678664515328286, "grad_norm": 0.07315712610205513, "learning_rate": 9.322614567031056e-06, "loss": 0.5104, "step": 1367 }, { "epoch": 0.6791609780315254, "grad_norm": 0.07445358870399109, "learning_rate": 9.321631817774358e-06, "loss": 0.5472, "step": 1368 }, { "epoch": 0.6796574407347648, "grad_norm": 0.06898749856725994, "learning_rate": 9.320648408025528e-06, "loss": 0.4936, "step": 1369 }, { "epoch": 0.6801539034380042, "grad_norm": 0.07381816406555464, "learning_rate": 9.319664337934865e-06, "loss": 0.5203, "step": 1370 }, { "epoch": 0.6806503661412436, "grad_norm": 0.07735751452122537, "learning_rate": 9.318679607652768e-06, "loss": 0.5364, "step": 1371 }, { "epoch": 0.6811468288444831, "grad_norm": 0.07326646920478877, "learning_rate": 9.317694217329737e-06, "loss": 0.5142, "step": 1372 }, { "epoch": 0.6816432915477225, "grad_norm": 0.073764703258718, "learning_rate": 9.316708167116377e-06, "loss": 0.5554, "step": 1373 }, { "epoch": 0.6821397542509618, "grad_norm": 0.07390414723701343, "learning_rate": 9.315721457163384e-06, "loss": 0.567, "step": 1374 }, { "epoch": 0.6826362169542013, "grad_norm": 0.07393942919613289, "learning_rate": 9.314734087621566e-06, "loss": 0.5123, "step": 1375 }, { "epoch": 0.6831326796574407, "grad_norm": 0.07620045589780511, "learning_rate": 9.313746058641822e-06, "loss": 0.5372, "step": 1376 }, { "epoch": 0.6836291423606802, "grad_norm": 0.07583882363513543, "learning_rate": 9.312757370375159e-06, "loss": 0.5732, "step": 1377 }, { "epoch": 0.6841256050639196, "grad_norm": 0.07726479654671939, "learning_rate": 9.311768022972682e-06, "loss": 0.5422, "step": 1378 }, { "epoch": 0.684622067767159, "grad_norm": 0.0760180487581988, "learning_rate": 9.310778016585597e-06, "loss": 0.558, "step": 1379 }, { "epoch": 0.6851185304703984, "grad_norm": 0.0691920787710429, "learning_rate": 9.30978735136521e-06, "loss": 0.4954, "step": 1380 }, { "epoch": 0.6856149931736378, "grad_norm": 0.07452724187979319, "learning_rate": 9.308796027462928e-06, "loss": 0.5316, "step": 1381 }, { "epoch": 0.6861114558768773, "grad_norm": 0.0756477838695681, "learning_rate": 9.30780404503026e-06, "loss": 0.5423, "step": 1382 }, { "epoch": 0.6866079185801167, "grad_norm": 0.07516990810148905, "learning_rate": 9.306811404218814e-06, "loss": 0.5624, "step": 1383 }, { "epoch": 0.687104381283356, "grad_norm": 0.07346320934704692, "learning_rate": 9.3058181051803e-06, "loss": 0.5434, "step": 1384 }, { "epoch": 0.6876008439865955, "grad_norm": 0.07279939873545586, "learning_rate": 9.304824148066526e-06, "loss": 0.5349, "step": 1385 }, { "epoch": 0.6880973066898349, "grad_norm": 0.08384339788396475, "learning_rate": 9.303829533029406e-06, "loss": 0.5348, "step": 1386 }, { "epoch": 0.6885937693930744, "grad_norm": 0.07095408782774806, "learning_rate": 9.302834260220945e-06, "loss": 0.4933, "step": 1387 }, { "epoch": 0.6890902320963138, "grad_norm": 0.07549768019197384, "learning_rate": 9.30183832979326e-06, "loss": 0.5506, "step": 1388 }, { "epoch": 0.6895866947995531, "grad_norm": 0.0766368547269983, "learning_rate": 9.30084174189856e-06, "loss": 0.5791, "step": 1389 }, { "epoch": 0.6900831575027926, "grad_norm": 0.07073419123255918, "learning_rate": 9.29984449668916e-06, "loss": 0.4825, "step": 1390 }, { "epoch": 0.690579620206032, "grad_norm": 0.07395858346898046, "learning_rate": 9.298846594317471e-06, "loss": 0.5832, "step": 1391 }, { "epoch": 0.6910760829092715, "grad_norm": 0.06889946396344103, "learning_rate": 9.297848034936007e-06, "loss": 0.5146, "step": 1392 }, { "epoch": 0.6915725456125109, "grad_norm": 0.07150224954000071, "learning_rate": 9.296848818697381e-06, "loss": 0.5077, "step": 1393 }, { "epoch": 0.6920690083157502, "grad_norm": 0.07339810430184905, "learning_rate": 9.295848945754308e-06, "loss": 0.5329, "step": 1394 }, { "epoch": 0.6925654710189897, "grad_norm": 0.06917499027819218, "learning_rate": 9.294848416259603e-06, "loss": 0.515, "step": 1395 }, { "epoch": 0.6930619337222291, "grad_norm": 0.07238605456545845, "learning_rate": 9.293847230366178e-06, "loss": 0.553, "step": 1396 }, { "epoch": 0.6935583964254686, "grad_norm": 0.07049884661873568, "learning_rate": 9.292845388227052e-06, "loss": 0.5143, "step": 1397 }, { "epoch": 0.6940548591287079, "grad_norm": 0.07546666489246001, "learning_rate": 9.291842889995339e-06, "loss": 0.5639, "step": 1398 }, { "epoch": 0.6945513218319473, "grad_norm": 0.0751436423514526, "learning_rate": 9.290839735824254e-06, "loss": 0.5102, "step": 1399 }, { "epoch": 0.6950477845351868, "grad_norm": 0.07389138982609979, "learning_rate": 9.289835925867116e-06, "loss": 0.528, "step": 1400 }, { "epoch": 0.6955442472384262, "grad_norm": 0.06625971976683562, "learning_rate": 9.288831460277337e-06, "loss": 0.5063, "step": 1401 }, { "epoch": 0.6960407099416657, "grad_norm": 0.07734680372898531, "learning_rate": 9.287826339208436e-06, "loss": 0.5726, "step": 1402 }, { "epoch": 0.696537172644905, "grad_norm": 0.06951228908969029, "learning_rate": 9.286820562814029e-06, "loss": 0.5191, "step": 1403 }, { "epoch": 0.6970336353481444, "grad_norm": 0.07287713737550759, "learning_rate": 9.285814131247831e-06, "loss": 0.5476, "step": 1404 }, { "epoch": 0.6975300980513839, "grad_norm": 0.06956108091891039, "learning_rate": 9.284807044663663e-06, "loss": 0.5176, "step": 1405 }, { "epoch": 0.6980265607546233, "grad_norm": 0.08380941217622373, "learning_rate": 9.283799303215442e-06, "loss": 0.5468, "step": 1406 }, { "epoch": 0.6985230234578628, "grad_norm": 0.07138672000727564, "learning_rate": 9.28279090705718e-06, "loss": 0.4963, "step": 1407 }, { "epoch": 0.6990194861611021, "grad_norm": 0.07292069605598205, "learning_rate": 9.281781856342998e-06, "loss": 0.542, "step": 1408 }, { "epoch": 0.6995159488643415, "grad_norm": 0.07332813612145221, "learning_rate": 9.280772151227112e-06, "loss": 0.5548, "step": 1409 }, { "epoch": 0.700012411567581, "grad_norm": 0.07246818987746459, "learning_rate": 9.279761791863839e-06, "loss": 0.5503, "step": 1410 }, { "epoch": 0.7005088742708204, "grad_norm": 0.07005478459710343, "learning_rate": 9.2787507784076e-06, "loss": 0.5004, "step": 1411 }, { "epoch": 0.7010053369740599, "grad_norm": 0.06934183608207299, "learning_rate": 9.277739111012905e-06, "loss": 0.5528, "step": 1412 }, { "epoch": 0.7015017996772992, "grad_norm": 0.07052000189461478, "learning_rate": 9.276726789834378e-06, "loss": 0.5087, "step": 1413 }, { "epoch": 0.7019982623805386, "grad_norm": 0.07301837264825195, "learning_rate": 9.275713815026732e-06, "loss": 0.53, "step": 1414 }, { "epoch": 0.7024947250837781, "grad_norm": 0.0722789778678723, "learning_rate": 9.274700186744786e-06, "loss": 0.5401, "step": 1415 }, { "epoch": 0.7029911877870175, "grad_norm": 0.07527888887630624, "learning_rate": 9.273685905143454e-06, "loss": 0.5287, "step": 1416 }, { "epoch": 0.703487650490257, "grad_norm": 0.07285186180656061, "learning_rate": 9.272670970377758e-06, "loss": 0.5305, "step": 1417 }, { "epoch": 0.7039841131934963, "grad_norm": 0.07059684009978115, "learning_rate": 9.271655382602809e-06, "loss": 0.5263, "step": 1418 }, { "epoch": 0.7044805758967357, "grad_norm": 0.07370364807475144, "learning_rate": 9.270639141973826e-06, "loss": 0.536, "step": 1419 }, { "epoch": 0.7049770385999752, "grad_norm": 0.07584063600861424, "learning_rate": 9.269622248646124e-06, "loss": 0.5108, "step": 1420 }, { "epoch": 0.7054735013032146, "grad_norm": 0.11190360389061123, "learning_rate": 9.26860470277512e-06, "loss": 0.5217, "step": 1421 }, { "epoch": 0.7059699640064541, "grad_norm": 0.07173583317483305, "learning_rate": 9.267586504516331e-06, "loss": 0.5281, "step": 1422 }, { "epoch": 0.7064664267096934, "grad_norm": 0.07299583837400489, "learning_rate": 9.266567654025369e-06, "loss": 0.5074, "step": 1423 }, { "epoch": 0.7069628894129328, "grad_norm": 0.07453659993742369, "learning_rate": 9.265548151457949e-06, "loss": 0.5803, "step": 1424 }, { "epoch": 0.7074593521161723, "grad_norm": 0.07321720883021786, "learning_rate": 9.264527996969888e-06, "loss": 0.5109, "step": 1425 }, { "epoch": 0.7079558148194117, "grad_norm": 0.07222177910537633, "learning_rate": 9.2635071907171e-06, "loss": 0.5059, "step": 1426 }, { "epoch": 0.7084522775226512, "grad_norm": 0.07247620140102892, "learning_rate": 9.262485732855597e-06, "loss": 0.5467, "step": 1427 }, { "epoch": 0.7089487402258905, "grad_norm": 0.07077818847784131, "learning_rate": 9.261463623541493e-06, "loss": 0.5223, "step": 1428 }, { "epoch": 0.7094452029291299, "grad_norm": 0.07067550789715772, "learning_rate": 9.260440862931002e-06, "loss": 0.4934, "step": 1429 }, { "epoch": 0.7099416656323694, "grad_norm": 0.07365823742825753, "learning_rate": 9.259417451180437e-06, "loss": 0.5329, "step": 1430 }, { "epoch": 0.7104381283356088, "grad_norm": 0.07127326575777367, "learning_rate": 9.258393388446208e-06, "loss": 0.5535, "step": 1431 }, { "epoch": 0.7109345910388482, "grad_norm": 0.07488318035883733, "learning_rate": 9.257368674884829e-06, "loss": 0.5271, "step": 1432 }, { "epoch": 0.7114310537420876, "grad_norm": 0.07724788510343007, "learning_rate": 9.256343310652907e-06, "loss": 0.5321, "step": 1433 }, { "epoch": 0.711927516445327, "grad_norm": 0.07923242956914898, "learning_rate": 9.255317295907158e-06, "loss": 0.547, "step": 1434 }, { "epoch": 0.7124239791485665, "grad_norm": 0.07674530249324395, "learning_rate": 9.254290630804387e-06, "loss": 0.5312, "step": 1435 }, { "epoch": 0.7129204418518059, "grad_norm": 0.07180989622822076, "learning_rate": 9.253263315501508e-06, "loss": 0.5144, "step": 1436 }, { "epoch": 0.7134169045550453, "grad_norm": 0.09719368557984771, "learning_rate": 9.252235350155524e-06, "loss": 0.5375, "step": 1437 }, { "epoch": 0.7139133672582847, "grad_norm": 0.07200959908689893, "learning_rate": 9.25120673492355e-06, "loss": 0.5303, "step": 1438 }, { "epoch": 0.7144098299615241, "grad_norm": 0.07490489574435455, "learning_rate": 9.250177469962787e-06, "loss": 0.5515, "step": 1439 }, { "epoch": 0.7149062926647636, "grad_norm": 0.07704389871771329, "learning_rate": 9.249147555430545e-06, "loss": 0.5139, "step": 1440 }, { "epoch": 0.715402755368003, "grad_norm": 0.07348214641843492, "learning_rate": 9.24811699148423e-06, "loss": 0.5126, "step": 1441 }, { "epoch": 0.7158992180712423, "grad_norm": 0.07586501625655766, "learning_rate": 9.247085778281342e-06, "loss": 0.55, "step": 1442 }, { "epoch": 0.7163956807744818, "grad_norm": 0.07483427178766174, "learning_rate": 9.246053915979492e-06, "loss": 0.5241, "step": 1443 }, { "epoch": 0.7168921434777212, "grad_norm": 0.07846284947721777, "learning_rate": 9.245021404736382e-06, "loss": 0.5398, "step": 1444 }, { "epoch": 0.7173886061809607, "grad_norm": 0.06935259803429858, "learning_rate": 9.243988244709815e-06, "loss": 0.5141, "step": 1445 }, { "epoch": 0.7178850688842001, "grad_norm": 0.07133423082003033, "learning_rate": 9.24295443605769e-06, "loss": 0.5239, "step": 1446 }, { "epoch": 0.7183815315874394, "grad_norm": 0.0789951285815156, "learning_rate": 9.24191997893801e-06, "loss": 0.5489, "step": 1447 }, { "epoch": 0.7188779942906789, "grad_norm": 0.07789738837985204, "learning_rate": 9.240884873508876e-06, "loss": 0.5433, "step": 1448 }, { "epoch": 0.7193744569939183, "grad_norm": 0.07407387509806816, "learning_rate": 9.239849119928486e-06, "loss": 0.515, "step": 1449 }, { "epoch": 0.7198709196971578, "grad_norm": 0.07376724827013004, "learning_rate": 9.23881271835514e-06, "loss": 0.5465, "step": 1450 }, { "epoch": 0.7203673824003972, "grad_norm": 0.07614036890621281, "learning_rate": 9.237775668947233e-06, "loss": 0.5728, "step": 1451 }, { "epoch": 0.7208638451036365, "grad_norm": 0.07088363970569353, "learning_rate": 9.236737971863263e-06, "loss": 0.506, "step": 1452 }, { "epoch": 0.721360307806876, "grad_norm": 0.0731833784182166, "learning_rate": 9.235699627261825e-06, "loss": 0.485, "step": 1453 }, { "epoch": 0.7218567705101154, "grad_norm": 0.07542331219080749, "learning_rate": 9.234660635301613e-06, "loss": 0.5164, "step": 1454 }, { "epoch": 0.7223532332133549, "grad_norm": 0.06950421429090972, "learning_rate": 9.233620996141421e-06, "loss": 0.509, "step": 1455 }, { "epoch": 0.7228496959165943, "grad_norm": 0.06914956109999934, "learning_rate": 9.23258070994014e-06, "loss": 0.5121, "step": 1456 }, { "epoch": 0.7233461586198336, "grad_norm": 0.07448552133247559, "learning_rate": 9.231539776856764e-06, "loss": 0.5373, "step": 1457 }, { "epoch": 0.7238426213230731, "grad_norm": 0.07433314213228254, "learning_rate": 9.230498197050377e-06, "loss": 0.5063, "step": 1458 }, { "epoch": 0.7243390840263125, "grad_norm": 0.07175828404165463, "learning_rate": 9.229455970680175e-06, "loss": 0.4841, "step": 1459 }, { "epoch": 0.724835546729552, "grad_norm": 0.07405100866158164, "learning_rate": 9.22841309790544e-06, "loss": 0.543, "step": 1460 }, { "epoch": 0.7253320094327914, "grad_norm": 0.07068543317117736, "learning_rate": 9.227369578885561e-06, "loss": 0.5044, "step": 1461 }, { "epoch": 0.7258284721360307, "grad_norm": 0.07369786122206998, "learning_rate": 9.226325413780021e-06, "loss": 0.5316, "step": 1462 }, { "epoch": 0.7263249348392702, "grad_norm": 0.07139828290659295, "learning_rate": 9.225280602748408e-06, "loss": 0.5262, "step": 1463 }, { "epoch": 0.7268213975425096, "grad_norm": 0.06840457873936871, "learning_rate": 9.2242351459504e-06, "loss": 0.521, "step": 1464 }, { "epoch": 0.7273178602457491, "grad_norm": 0.0730202453066913, "learning_rate": 9.223189043545783e-06, "loss": 0.4982, "step": 1465 }, { "epoch": 0.7278143229489885, "grad_norm": 0.07738082853275433, "learning_rate": 9.222142295694432e-06, "loss": 0.5325, "step": 1466 }, { "epoch": 0.7283107856522278, "grad_norm": 0.10268618786682156, "learning_rate": 9.221094902556329e-06, "loss": 0.5434, "step": 1467 }, { "epoch": 0.7288072483554673, "grad_norm": 0.07328876949375858, "learning_rate": 9.220046864291549e-06, "loss": 0.5175, "step": 1468 }, { "epoch": 0.7293037110587067, "grad_norm": 0.07254546474047666, "learning_rate": 9.218998181060271e-06, "loss": 0.5406, "step": 1469 }, { "epoch": 0.7298001737619462, "grad_norm": 0.07561106514048177, "learning_rate": 9.217948853022766e-06, "loss": 0.5229, "step": 1470 }, { "epoch": 0.7302966364651855, "grad_norm": 0.08213765210369488, "learning_rate": 9.21689888033941e-06, "loss": 0.5214, "step": 1471 }, { "epoch": 0.7307930991684249, "grad_norm": 0.07417262119469888, "learning_rate": 9.215848263170672e-06, "loss": 0.5309, "step": 1472 }, { "epoch": 0.7312895618716644, "grad_norm": 0.06898938755371146, "learning_rate": 9.214797001677122e-06, "loss": 0.4872, "step": 1473 }, { "epoch": 0.7317860245749038, "grad_norm": 0.1042117632122199, "learning_rate": 9.213745096019432e-06, "loss": 0.5298, "step": 1474 }, { "epoch": 0.7322824872781433, "grad_norm": 0.07050395956024427, "learning_rate": 9.212692546358364e-06, "loss": 0.491, "step": 1475 }, { "epoch": 0.7327789499813826, "grad_norm": 0.07451854816196715, "learning_rate": 9.211639352854786e-06, "loss": 0.5502, "step": 1476 }, { "epoch": 0.733275412684622, "grad_norm": 0.06968755487318913, "learning_rate": 9.210585515669664e-06, "loss": 0.4877, "step": 1477 }, { "epoch": 0.7337718753878615, "grad_norm": 0.07622242603043373, "learning_rate": 9.209531034964055e-06, "loss": 0.555, "step": 1478 }, { "epoch": 0.7342683380911009, "grad_norm": 0.06999707168192029, "learning_rate": 9.208475910899121e-06, "loss": 0.5393, "step": 1479 }, { "epoch": 0.7347648007943404, "grad_norm": 0.07107633271501804, "learning_rate": 9.207420143636124e-06, "loss": 0.484, "step": 1480 }, { "epoch": 0.7352612634975797, "grad_norm": 0.07559982394848742, "learning_rate": 9.206363733336419e-06, "loss": 0.5636, "step": 1481 }, { "epoch": 0.7357577262008191, "grad_norm": 0.2065710551000181, "learning_rate": 9.20530668016146e-06, "loss": 0.5293, "step": 1482 }, { "epoch": 0.7362541889040586, "grad_norm": 0.07006483401468061, "learning_rate": 9.204248984272802e-06, "loss": 0.5436, "step": 1483 }, { "epoch": 0.736750651607298, "grad_norm": 0.08039091210606134, "learning_rate": 9.203190645832098e-06, "loss": 0.5436, "step": 1484 }, { "epoch": 0.7372471143105375, "grad_norm": 0.07361816607337918, "learning_rate": 9.202131665001096e-06, "loss": 0.5386, "step": 1485 }, { "epoch": 0.7377435770137768, "grad_norm": 0.07428889038193494, "learning_rate": 9.201072041941644e-06, "loss": 0.5428, "step": 1486 }, { "epoch": 0.7382400397170162, "grad_norm": 0.0741239104187118, "learning_rate": 9.200011776815691e-06, "loss": 0.564, "step": 1487 }, { "epoch": 0.7387365024202557, "grad_norm": 0.0761608368835551, "learning_rate": 9.19895086978528e-06, "loss": 0.511, "step": 1488 }, { "epoch": 0.7392329651234951, "grad_norm": 0.07634971872131308, "learning_rate": 9.197889321012552e-06, "loss": 0.5189, "step": 1489 }, { "epoch": 0.7397294278267346, "grad_norm": 0.07451573011243572, "learning_rate": 9.196827130659752e-06, "loss": 0.5581, "step": 1490 }, { "epoch": 0.7402258905299739, "grad_norm": 0.0752122816148327, "learning_rate": 9.195764298889213e-06, "loss": 0.5336, "step": 1491 }, { "epoch": 0.7407223532332133, "grad_norm": 0.07846376180300381, "learning_rate": 9.194700825863377e-06, "loss": 0.5395, "step": 1492 }, { "epoch": 0.7412188159364528, "grad_norm": 0.0724115759426848, "learning_rate": 9.193636711744775e-06, "loss": 0.5405, "step": 1493 }, { "epoch": 0.7417152786396922, "grad_norm": 0.07415138326255291, "learning_rate": 9.192571956696044e-06, "loss": 0.5617, "step": 1494 }, { "epoch": 0.7422117413429317, "grad_norm": 0.07195572234281511, "learning_rate": 9.19150656087991e-06, "loss": 0.5277, "step": 1495 }, { "epoch": 0.742708204046171, "grad_norm": 0.07114858026308328, "learning_rate": 9.190440524459203e-06, "loss": 0.5088, "step": 1496 }, { "epoch": 0.7432046667494104, "grad_norm": 0.07622345987455985, "learning_rate": 9.189373847596853e-06, "loss": 0.534, "step": 1497 }, { "epoch": 0.7437011294526499, "grad_norm": 0.07656296285911206, "learning_rate": 9.188306530455882e-06, "loss": 0.4895, "step": 1498 }, { "epoch": 0.7441975921558893, "grad_norm": 0.07356439663118937, "learning_rate": 9.187238573199411e-06, "loss": 0.5347, "step": 1499 }, { "epoch": 0.7446940548591288, "grad_norm": 0.07131573683028186, "learning_rate": 9.18616997599066e-06, "loss": 0.5155, "step": 1500 }, { "epoch": 0.7451905175623681, "grad_norm": 0.07289576946445583, "learning_rate": 9.18510073899295e-06, "loss": 0.5572, "step": 1501 }, { "epoch": 0.7456869802656075, "grad_norm": 0.26584442872430764, "learning_rate": 9.184030862369694e-06, "loss": 0.5308, "step": 1502 }, { "epoch": 0.746183442968847, "grad_norm": 0.07300736520636775, "learning_rate": 9.182960346284408e-06, "loss": 0.5297, "step": 1503 }, { "epoch": 0.7466799056720864, "grad_norm": 0.07341036866622214, "learning_rate": 9.181889190900702e-06, "loss": 0.5043, "step": 1504 }, { "epoch": 0.7471763683753258, "grad_norm": 0.07191242330312968, "learning_rate": 9.180817396382283e-06, "loss": 0.5342, "step": 1505 }, { "epoch": 0.7476728310785652, "grad_norm": 0.0695672717047009, "learning_rate": 9.17974496289296e-06, "loss": 0.5275, "step": 1506 }, { "epoch": 0.7481692937818046, "grad_norm": 0.07559861194849239, "learning_rate": 9.178671890596636e-06, "loss": 0.553, "step": 1507 }, { "epoch": 0.7486657564850441, "grad_norm": 0.08380571084657057, "learning_rate": 9.177598179657314e-06, "loss": 0.5684, "step": 1508 }, { "epoch": 0.7491622191882835, "grad_norm": 0.07901289401940544, "learning_rate": 9.176523830239093e-06, "loss": 0.5268, "step": 1509 }, { "epoch": 0.7496586818915228, "grad_norm": 0.0688523499951963, "learning_rate": 9.17544884250617e-06, "loss": 0.5068, "step": 1510 }, { "epoch": 0.7501551445947623, "grad_norm": 0.06947079013396563, "learning_rate": 9.174373216622841e-06, "loss": 0.4972, "step": 1511 }, { "epoch": 0.7506516072980017, "grad_norm": 0.07146732680663952, "learning_rate": 9.173296952753494e-06, "loss": 0.5429, "step": 1512 }, { "epoch": 0.7506516072980017, "eval_loss": 0.5325629115104675, "eval_runtime": 259.2283, "eval_samples_per_second": 117.09, "eval_steps_per_second": 14.64, "step": 1512 }, { "epoch": 0.7511480700012412, "grad_norm": 0.07362115281936982, "learning_rate": 9.172220051062624e-06, "loss": 0.4972, "step": 1513 }, { "epoch": 0.7516445327044806, "grad_norm": 0.07398697191188404, "learning_rate": 9.171142511714815e-06, "loss": 0.5469, "step": 1514 }, { "epoch": 0.75214099540772, "grad_norm": 0.0726269568320206, "learning_rate": 9.17006433487475e-06, "loss": 0.524, "step": 1515 }, { "epoch": 0.7526374581109594, "grad_norm": 0.07055758850826166, "learning_rate": 9.168985520707215e-06, "loss": 0.5181, "step": 1516 }, { "epoch": 0.7531339208141988, "grad_norm": 0.07245448024401645, "learning_rate": 9.167906069377088e-06, "loss": 0.5171, "step": 1517 }, { "epoch": 0.7536303835174383, "grad_norm": 0.07100972584744263, "learning_rate": 9.166825981049345e-06, "loss": 0.5148, "step": 1518 }, { "epoch": 0.7541268462206777, "grad_norm": 0.0724354876187257, "learning_rate": 9.165745255889062e-06, "loss": 0.5152, "step": 1519 }, { "epoch": 0.754623308923917, "grad_norm": 0.07341777423839119, "learning_rate": 9.164663894061408e-06, "loss": 0.5398, "step": 1520 }, { "epoch": 0.7551197716271565, "grad_norm": 0.08055372063895748, "learning_rate": 9.163581895731654e-06, "loss": 0.5235, "step": 1521 }, { "epoch": 0.7556162343303959, "grad_norm": 0.07199240388013335, "learning_rate": 9.162499261065164e-06, "loss": 0.5076, "step": 1522 }, { "epoch": 0.7561126970336354, "grad_norm": 0.06963714899756415, "learning_rate": 9.161415990227405e-06, "loss": 0.5237, "step": 1523 }, { "epoch": 0.7566091597368748, "grad_norm": 0.0770440952254681, "learning_rate": 9.160332083383933e-06, "loss": 0.5329, "step": 1524 }, { "epoch": 0.7571056224401141, "grad_norm": 0.07151756468144972, "learning_rate": 9.15924754070041e-06, "loss": 0.5189, "step": 1525 }, { "epoch": 0.7576020851433536, "grad_norm": 0.0729147292162753, "learning_rate": 9.158162362342584e-06, "loss": 0.5355, "step": 1526 }, { "epoch": 0.758098547846593, "grad_norm": 0.07368817324840382, "learning_rate": 9.157076548476317e-06, "loss": 0.5393, "step": 1527 }, { "epoch": 0.7585950105498325, "grad_norm": 0.0968328493278401, "learning_rate": 9.155990099267551e-06, "loss": 0.4916, "step": 1528 }, { "epoch": 0.7590914732530719, "grad_norm": 0.06967081513272996, "learning_rate": 9.154903014882334e-06, "loss": 0.512, "step": 1529 }, { "epoch": 0.7595879359563112, "grad_norm": 0.06960396037018117, "learning_rate": 9.153815295486811e-06, "loss": 0.5085, "step": 1530 }, { "epoch": 0.7600843986595507, "grad_norm": 0.06801654295562962, "learning_rate": 9.152726941247223e-06, "loss": 0.5055, "step": 1531 }, { "epoch": 0.7605808613627901, "grad_norm": 0.0752300824946068, "learning_rate": 9.151637952329903e-06, "loss": 0.5427, "step": 1532 }, { "epoch": 0.7610773240660296, "grad_norm": 0.07452158768511066, "learning_rate": 9.15054832890129e-06, "loss": 0.5597, "step": 1533 }, { "epoch": 0.761573786769269, "grad_norm": 0.07028632531598884, "learning_rate": 9.149458071127914e-06, "loss": 0.5202, "step": 1534 }, { "epoch": 0.7620702494725083, "grad_norm": 0.07239151178732862, "learning_rate": 9.148367179176405e-06, "loss": 0.5144, "step": 1535 }, { "epoch": 0.7625667121757478, "grad_norm": 0.07357075200741311, "learning_rate": 9.147275653213484e-06, "loss": 0.5582, "step": 1536 }, { "epoch": 0.7630631748789872, "grad_norm": 0.07390560783887343, "learning_rate": 9.146183493405976e-06, "loss": 0.5662, "step": 1537 }, { "epoch": 0.7635596375822267, "grad_norm": 0.07055852319994438, "learning_rate": 9.145090699920801e-06, "loss": 0.5061, "step": 1538 }, { "epoch": 0.764056100285466, "grad_norm": 0.0736301172340416, "learning_rate": 9.143997272924974e-06, "loss": 0.5179, "step": 1539 }, { "epoch": 0.7645525629887054, "grad_norm": 0.06884159473420354, "learning_rate": 9.142903212585607e-06, "loss": 0.5153, "step": 1540 }, { "epoch": 0.7650490256919449, "grad_norm": 0.07591997410813622, "learning_rate": 9.14180851906991e-06, "loss": 0.5573, "step": 1541 }, { "epoch": 0.7655454883951843, "grad_norm": 0.06765520349623405, "learning_rate": 9.140713192545193e-06, "loss": 0.5003, "step": 1542 }, { "epoch": 0.7660419510984238, "grad_norm": 0.07139622939405862, "learning_rate": 9.139617233178853e-06, "loss": 0.54, "step": 1543 }, { "epoch": 0.7665384138016631, "grad_norm": 0.0734046035089621, "learning_rate": 9.138520641138391e-06, "loss": 0.5298, "step": 1544 }, { "epoch": 0.7670348765049025, "grad_norm": 0.07129862203399848, "learning_rate": 9.137423416591408e-06, "loss": 0.537, "step": 1545 }, { "epoch": 0.767531339208142, "grad_norm": 0.07123101461490239, "learning_rate": 9.136325559705593e-06, "loss": 0.5424, "step": 1546 }, { "epoch": 0.7680278019113814, "grad_norm": 0.0680050575809704, "learning_rate": 9.135227070648737e-06, "loss": 0.507, "step": 1547 }, { "epoch": 0.7685242646146209, "grad_norm": 0.07185264556036576, "learning_rate": 9.134127949588727e-06, "loss": 0.5327, "step": 1548 }, { "epoch": 0.7690207273178602, "grad_norm": 0.07539063274123536, "learning_rate": 9.133028196693548e-06, "loss": 0.5232, "step": 1549 }, { "epoch": 0.7695171900210996, "grad_norm": 0.07386130365503506, "learning_rate": 9.131927812131273e-06, "loss": 0.5364, "step": 1550 }, { "epoch": 0.7700136527243391, "grad_norm": 0.06818654256924661, "learning_rate": 9.130826796070085e-06, "loss": 0.5088, "step": 1551 }, { "epoch": 0.7705101154275785, "grad_norm": 0.06906607950611202, "learning_rate": 9.129725148678252e-06, "loss": 0.497, "step": 1552 }, { "epoch": 0.771006578130818, "grad_norm": 7.740988013167634, "learning_rate": 9.128622870124147e-06, "loss": 0.5968, "step": 1553 }, { "epoch": 0.7715030408340573, "grad_norm": 0.07199899183090658, "learning_rate": 9.127519960576234e-06, "loss": 0.5153, "step": 1554 }, { "epoch": 0.7719995035372967, "grad_norm": 0.07229222543077109, "learning_rate": 9.126416420203072e-06, "loss": 0.5207, "step": 1555 }, { "epoch": 0.7724959662405362, "grad_norm": 0.07716579011178949, "learning_rate": 9.125312249173325e-06, "loss": 0.5099, "step": 1556 }, { "epoch": 0.7729924289437756, "grad_norm": 0.07253532602344766, "learning_rate": 9.124207447655744e-06, "loss": 0.5323, "step": 1557 }, { "epoch": 0.7734888916470151, "grad_norm": 0.07497907003215884, "learning_rate": 9.123102015819184e-06, "loss": 0.5201, "step": 1558 }, { "epoch": 0.7739853543502544, "grad_norm": 0.07354183539985416, "learning_rate": 9.121995953832585e-06, "loss": 0.5053, "step": 1559 }, { "epoch": 0.7744818170534938, "grad_norm": 0.07035972316181158, "learning_rate": 9.120889261864999e-06, "loss": 0.5156, "step": 1560 }, { "epoch": 0.7749782797567333, "grad_norm": 0.08187819899227436, "learning_rate": 9.119781940085561e-06, "loss": 0.5326, "step": 1561 }, { "epoch": 0.7754747424599727, "grad_norm": 0.0736392754058841, "learning_rate": 9.11867398866351e-06, "loss": 0.5146, "step": 1562 }, { "epoch": 0.7759712051632122, "grad_norm": 0.07456619741450733, "learning_rate": 9.117565407768178e-06, "loss": 0.5476, "step": 1563 }, { "epoch": 0.7764676678664515, "grad_norm": 0.07319757416197845, "learning_rate": 9.116456197568993e-06, "loss": 0.5591, "step": 1564 }, { "epoch": 0.7769641305696909, "grad_norm": 0.06997134853606297, "learning_rate": 9.11534635823548e-06, "loss": 0.4941, "step": 1565 }, { "epoch": 0.7774605932729304, "grad_norm": 0.07205444260596973, "learning_rate": 9.114235889937262e-06, "loss": 0.5427, "step": 1566 }, { "epoch": 0.7779570559761698, "grad_norm": 0.07788512971157865, "learning_rate": 9.113124792844053e-06, "loss": 0.5207, "step": 1567 }, { "epoch": 0.7784535186794093, "grad_norm": 0.08002426048799419, "learning_rate": 9.112013067125671e-06, "loss": 0.5307, "step": 1568 }, { "epoch": 0.7789499813826486, "grad_norm": 0.07468268399263038, "learning_rate": 9.11090071295202e-06, "loss": 0.5262, "step": 1569 }, { "epoch": 0.779446444085888, "grad_norm": 0.07298474341759807, "learning_rate": 9.109787730493111e-06, "loss": 0.5407, "step": 1570 }, { "epoch": 0.7799429067891275, "grad_norm": 0.07728077776334637, "learning_rate": 9.10867411991904e-06, "loss": 0.5579, "step": 1571 }, { "epoch": 0.7804393694923669, "grad_norm": 0.07344685556951021, "learning_rate": 9.10755988140001e-06, "loss": 0.5205, "step": 1572 }, { "epoch": 0.7809358321956062, "grad_norm": 0.07014883880403816, "learning_rate": 9.10644501510631e-06, "loss": 0.4727, "step": 1573 }, { "epoch": 0.7814322948988457, "grad_norm": 0.07213557704529419, "learning_rate": 9.105329521208334e-06, "loss": 0.5279, "step": 1574 }, { "epoch": 0.7819287576020851, "grad_norm": 0.0757860432054162, "learning_rate": 9.104213399876562e-06, "loss": 0.5387, "step": 1575 }, { "epoch": 0.7824252203053246, "grad_norm": 0.07057256214391969, "learning_rate": 9.103096651281578e-06, "loss": 0.5345, "step": 1576 }, { "epoch": 0.782921683008564, "grad_norm": 0.07132201540873075, "learning_rate": 9.101979275594061e-06, "loss": 0.5098, "step": 1577 }, { "epoch": 0.7834181457118033, "grad_norm": 0.07152535103531563, "learning_rate": 9.10086127298478e-06, "loss": 0.5062, "step": 1578 }, { "epoch": 0.7839146084150428, "grad_norm": 0.07093279415393061, "learning_rate": 9.099742643624607e-06, "loss": 0.5225, "step": 1579 }, { "epoch": 0.7844110711182822, "grad_norm": 0.07636988653396815, "learning_rate": 9.098623387684504e-06, "loss": 0.5901, "step": 1580 }, { "epoch": 0.7849075338215217, "grad_norm": 0.0726902485186396, "learning_rate": 9.097503505335534e-06, "loss": 0.5992, "step": 1581 }, { "epoch": 0.7854039965247611, "grad_norm": 0.07148367002169637, "learning_rate": 9.09638299674885e-06, "loss": 0.5249, "step": 1582 }, { "epoch": 0.7859004592280004, "grad_norm": 0.07187191854489607, "learning_rate": 9.095261862095706e-06, "loss": 0.5149, "step": 1583 }, { "epoch": 0.7863969219312399, "grad_norm": 0.07433331234158458, "learning_rate": 9.09414010154745e-06, "loss": 0.4996, "step": 1584 }, { "epoch": 0.7868933846344793, "grad_norm": 0.07421628137732915, "learning_rate": 9.093017715275523e-06, "loss": 0.5373, "step": 1585 }, { "epoch": 0.7873898473377188, "grad_norm": 0.0745071207582901, "learning_rate": 9.091894703451464e-06, "loss": 0.5044, "step": 1586 }, { "epoch": 0.7878863100409582, "grad_norm": 0.07565857135893618, "learning_rate": 9.090771066246911e-06, "loss": 0.536, "step": 1587 }, { "epoch": 0.7883827727441975, "grad_norm": 0.07130484120336626, "learning_rate": 9.089646803833589e-06, "loss": 0.4945, "step": 1588 }, { "epoch": 0.788879235447437, "grad_norm": 0.07351509883364295, "learning_rate": 9.088521916383326e-06, "loss": 0.544, "step": 1589 }, { "epoch": 0.7893756981506764, "grad_norm": 0.07229735716128917, "learning_rate": 9.087396404068043e-06, "loss": 0.5031, "step": 1590 }, { "epoch": 0.7898721608539159, "grad_norm": 0.07640419900882879, "learning_rate": 9.086270267059755e-06, "loss": 0.5397, "step": 1591 }, { "epoch": 0.7903686235571553, "grad_norm": 0.07396099607093055, "learning_rate": 9.085143505530576e-06, "loss": 0.564, "step": 1592 }, { "epoch": 0.7908650862603946, "grad_norm": 0.06974958002197082, "learning_rate": 9.084016119652711e-06, "loss": 0.5277, "step": 1593 }, { "epoch": 0.7913615489636341, "grad_norm": 0.07711746671171255, "learning_rate": 9.082888109598465e-06, "loss": 0.52, "step": 1594 }, { "epoch": 0.7918580116668735, "grad_norm": 0.07562328283238078, "learning_rate": 9.081759475540236e-06, "loss": 0.5187, "step": 1595 }, { "epoch": 0.792354474370113, "grad_norm": 0.07163646761900765, "learning_rate": 9.080630217650516e-06, "loss": 0.5116, "step": 1596 }, { "epoch": 0.7928509370733524, "grad_norm": 0.07470949012871686, "learning_rate": 9.079500336101898e-06, "loss": 0.511, "step": 1597 }, { "epoch": 0.7933473997765917, "grad_norm": 0.07250875886295932, "learning_rate": 9.078369831067062e-06, "loss": 0.5349, "step": 1598 }, { "epoch": 0.7938438624798312, "grad_norm": 0.0739653329382637, "learning_rate": 9.077238702718786e-06, "loss": 0.5229, "step": 1599 }, { "epoch": 0.7943403251830706, "grad_norm": 0.07107778628692231, "learning_rate": 9.076106951229952e-06, "loss": 0.516, "step": 1600 }, { "epoch": 0.7948367878863101, "grad_norm": 0.0725789052221172, "learning_rate": 9.074974576773525e-06, "loss": 0.5106, "step": 1601 }, { "epoch": 0.7953332505895495, "grad_norm": 0.07305095035904646, "learning_rate": 9.073841579522571e-06, "loss": 0.5172, "step": 1602 }, { "epoch": 0.7958297132927888, "grad_norm": 0.07264990359957456, "learning_rate": 9.07270795965025e-06, "loss": 0.508, "step": 1603 }, { "epoch": 0.7963261759960283, "grad_norm": 0.0826982852729199, "learning_rate": 9.071573717329818e-06, "loss": 0.5049, "step": 1604 }, { "epoch": 0.7968226386992677, "grad_norm": 0.07656668555306413, "learning_rate": 9.070438852734627e-06, "loss": 0.5313, "step": 1605 }, { "epoch": 0.7973191014025072, "grad_norm": 0.07069294942555418, "learning_rate": 9.069303366038122e-06, "loss": 0.5177, "step": 1606 }, { "epoch": 0.7978155641057466, "grad_norm": 0.0777471019112174, "learning_rate": 9.068167257413842e-06, "loss": 0.5439, "step": 1607 }, { "epoch": 0.7983120268089859, "grad_norm": 0.07682960008512643, "learning_rate": 9.067030527035426e-06, "loss": 0.5406, "step": 1608 }, { "epoch": 0.7988084895122254, "grad_norm": 0.0807223836950902, "learning_rate": 9.065893175076604e-06, "loss": 0.561, "step": 1609 }, { "epoch": 0.7993049522154648, "grad_norm": 0.07047014648160267, "learning_rate": 9.064755201711202e-06, "loss": 0.5154, "step": 1610 }, { "epoch": 0.7998014149187043, "grad_norm": 0.07595726981086956, "learning_rate": 9.06361660711314e-06, "loss": 0.5027, "step": 1611 }, { "epoch": 0.8002978776219436, "grad_norm": 0.08005685606981752, "learning_rate": 9.062477391456436e-06, "loss": 0.5118, "step": 1612 }, { "epoch": 0.800794340325183, "grad_norm": 0.07718134842573832, "learning_rate": 9.0613375549152e-06, "loss": 0.565, "step": 1613 }, { "epoch": 0.8012908030284225, "grad_norm": 0.07158710304795181, "learning_rate": 9.060197097663634e-06, "loss": 0.5297, "step": 1614 }, { "epoch": 0.8017872657316619, "grad_norm": 0.07203230378497506, "learning_rate": 9.059056019876044e-06, "loss": 0.5164, "step": 1615 }, { "epoch": 0.8022837284349014, "grad_norm": 0.07406422518581404, "learning_rate": 9.057914321726824e-06, "loss": 0.5327, "step": 1616 }, { "epoch": 0.8027801911381407, "grad_norm": 0.07251157226369823, "learning_rate": 9.056772003390464e-06, "loss": 0.5002, "step": 1617 }, { "epoch": 0.8032766538413801, "grad_norm": 0.07167096759529387, "learning_rate": 9.055629065041547e-06, "loss": 0.492, "step": 1618 }, { "epoch": 0.8037731165446196, "grad_norm": 0.07455451424426704, "learning_rate": 9.054485506854756e-06, "loss": 0.5225, "step": 1619 }, { "epoch": 0.804269579247859, "grad_norm": 0.07080913355805624, "learning_rate": 9.053341329004863e-06, "loss": 0.5133, "step": 1620 }, { "epoch": 0.8047660419510985, "grad_norm": 0.07413789776565281, "learning_rate": 9.05219653166674e-06, "loss": 0.5035, "step": 1621 }, { "epoch": 0.8052625046543378, "grad_norm": 0.0703480239098969, "learning_rate": 9.051051115015346e-06, "loss": 0.5276, "step": 1622 }, { "epoch": 0.8057589673575772, "grad_norm": 0.0765687761483791, "learning_rate": 9.049905079225744e-06, "loss": 0.5311, "step": 1623 }, { "epoch": 0.8062554300608167, "grad_norm": 0.07143355651802613, "learning_rate": 9.048758424473088e-06, "loss": 0.5458, "step": 1624 }, { "epoch": 0.8067518927640561, "grad_norm": 0.07465766054033428, "learning_rate": 9.047611150932621e-06, "loss": 0.5764, "step": 1625 }, { "epoch": 0.8072483554672956, "grad_norm": 0.07113501350075914, "learning_rate": 9.04646325877969e-06, "loss": 0.5159, "step": 1626 }, { "epoch": 0.8077448181705349, "grad_norm": 0.0710495387744484, "learning_rate": 9.045314748189728e-06, "loss": 0.5323, "step": 1627 }, { "epoch": 0.8082412808737743, "grad_norm": 0.07185246674290095, "learning_rate": 9.04416561933827e-06, "loss": 0.5299, "step": 1628 }, { "epoch": 0.8087377435770138, "grad_norm": 0.06825471797050817, "learning_rate": 9.04301587240094e-06, "loss": 0.4911, "step": 1629 }, { "epoch": 0.8092342062802532, "grad_norm": 0.07128246257879234, "learning_rate": 9.041865507553458e-06, "loss": 0.5186, "step": 1630 }, { "epoch": 0.8097306689834927, "grad_norm": 0.0731020684241034, "learning_rate": 9.04071452497164e-06, "loss": 0.5209, "step": 1631 }, { "epoch": 0.810227131686732, "grad_norm": 0.07250812004228605, "learning_rate": 9.039562924831395e-06, "loss": 0.5277, "step": 1632 }, { "epoch": 0.8107235943899714, "grad_norm": 0.0768123954655183, "learning_rate": 9.038410707308727e-06, "loss": 0.5584, "step": 1633 }, { "epoch": 0.8112200570932109, "grad_norm": 0.07224607675157957, "learning_rate": 9.037257872579733e-06, "loss": 0.5241, "step": 1634 }, { "epoch": 0.8117165197964503, "grad_norm": 0.07069975133524385, "learning_rate": 9.036104420820606e-06, "loss": 0.5242, "step": 1635 }, { "epoch": 0.8122129824996898, "grad_norm": 0.07184499072459798, "learning_rate": 9.034950352207632e-06, "loss": 0.5434, "step": 1636 }, { "epoch": 0.8127094452029291, "grad_norm": 0.07144391298900397, "learning_rate": 9.033795666917191e-06, "loss": 0.5169, "step": 1637 }, { "epoch": 0.8132059079061685, "grad_norm": 0.06912538260937971, "learning_rate": 9.032640365125761e-06, "loss": 0.5312, "step": 1638 }, { "epoch": 0.813702370609408, "grad_norm": 0.07046782884763288, "learning_rate": 9.031484447009908e-06, "loss": 0.5351, "step": 1639 }, { "epoch": 0.8141988333126474, "grad_norm": 0.06976119450345018, "learning_rate": 9.0303279127463e-06, "loss": 0.4855, "step": 1640 }, { "epoch": 0.8146952960158869, "grad_norm": 0.06869843149728422, "learning_rate": 9.02917076251169e-06, "loss": 0.5057, "step": 1641 }, { "epoch": 0.8151917587191262, "grad_norm": 0.07434287432224491, "learning_rate": 9.02801299648293e-06, "loss": 0.5234, "step": 1642 }, { "epoch": 0.8156882214223656, "grad_norm": 0.06957914337939076, "learning_rate": 9.02685461483697e-06, "loss": 0.541, "step": 1643 }, { "epoch": 0.8161846841256051, "grad_norm": 0.07218018252814719, "learning_rate": 9.025695617750848e-06, "loss": 0.5486, "step": 1644 }, { "epoch": 0.8166811468288445, "grad_norm": 0.07168288119557607, "learning_rate": 9.024536005401697e-06, "loss": 0.5013, "step": 1645 }, { "epoch": 0.8171776095320838, "grad_norm": 0.0750914692398203, "learning_rate": 9.023375777966747e-06, "loss": 0.5278, "step": 1646 }, { "epoch": 0.8176740722353233, "grad_norm": 0.07583223590925382, "learning_rate": 9.022214935623318e-06, "loss": 0.5198, "step": 1647 }, { "epoch": 0.8181705349385627, "grad_norm": 0.07173398671171025, "learning_rate": 9.02105347854883e-06, "loss": 0.5073, "step": 1648 }, { "epoch": 0.8186669976418022, "grad_norm": 0.0745847654854046, "learning_rate": 9.019891406920788e-06, "loss": 0.5383, "step": 1649 }, { "epoch": 0.8191634603450416, "grad_norm": 0.07086350747239142, "learning_rate": 9.018728720916798e-06, "loss": 0.5287, "step": 1650 }, { "epoch": 0.819659923048281, "grad_norm": 0.07777382699121162, "learning_rate": 9.01756542071456e-06, "loss": 0.5203, "step": 1651 }, { "epoch": 0.8201563857515204, "grad_norm": 0.07603501493619808, "learning_rate": 9.016401506491863e-06, "loss": 0.5363, "step": 1652 }, { "epoch": 0.8206528484547598, "grad_norm": 0.07394262339412508, "learning_rate": 9.015236978426595e-06, "loss": 0.5478, "step": 1653 }, { "epoch": 0.8211493111579993, "grad_norm": 0.07074353754605808, "learning_rate": 9.014071836696734e-06, "loss": 0.4966, "step": 1654 }, { "epoch": 0.8216457738612387, "grad_norm": 0.07423057235044962, "learning_rate": 9.012906081480354e-06, "loss": 0.5172, "step": 1655 }, { "epoch": 0.822142236564478, "grad_norm": 0.07522410434633135, "learning_rate": 9.011739712955621e-06, "loss": 0.5228, "step": 1656 }, { "epoch": 0.8226386992677175, "grad_norm": 0.07547811533928037, "learning_rate": 9.010572731300796e-06, "loss": 0.5376, "step": 1657 }, { "epoch": 0.8231351619709569, "grad_norm": 0.07471384072083828, "learning_rate": 9.009405136694234e-06, "loss": 0.5194, "step": 1658 }, { "epoch": 0.8236316246741964, "grad_norm": 0.07608351907222413, "learning_rate": 9.008236929314383e-06, "loss": 0.5367, "step": 1659 }, { "epoch": 0.8241280873774358, "grad_norm": 0.07635352589296011, "learning_rate": 9.007068109339783e-06, "loss": 0.5292, "step": 1660 }, { "epoch": 0.8246245500806751, "grad_norm": 0.07282084669410545, "learning_rate": 9.005898676949073e-06, "loss": 0.5448, "step": 1661 }, { "epoch": 0.8251210127839146, "grad_norm": 0.0707264456584643, "learning_rate": 9.00472863232098e-06, "loss": 0.492, "step": 1662 }, { "epoch": 0.825617475487154, "grad_norm": 0.0731668244630585, "learning_rate": 9.003557975634325e-06, "loss": 0.5227, "step": 1663 }, { "epoch": 0.8261139381903935, "grad_norm": 0.07211067458154444, "learning_rate": 9.002386707068026e-06, "loss": 0.5333, "step": 1664 }, { "epoch": 0.8266104008936329, "grad_norm": 0.08009101745689773, "learning_rate": 9.001214826801092e-06, "loss": 0.5483, "step": 1665 }, { "epoch": 0.8271068635968722, "grad_norm": 0.0698658302792023, "learning_rate": 9.000042335012627e-06, "loss": 0.537, "step": 1666 }, { "epoch": 0.8276033263001117, "grad_norm": 0.0739225167598179, "learning_rate": 8.998869231881827e-06, "loss": 0.5246, "step": 1667 }, { "epoch": 0.8280997890033511, "grad_norm": 0.07383535775288107, "learning_rate": 8.997695517587981e-06, "loss": 0.5328, "step": 1668 }, { "epoch": 0.8285962517065906, "grad_norm": 0.07322555009863138, "learning_rate": 8.996521192310474e-06, "loss": 0.5529, "step": 1669 }, { "epoch": 0.82909271440983, "grad_norm": 0.07112105943789013, "learning_rate": 8.995346256228782e-06, "loss": 0.5422, "step": 1670 }, { "epoch": 0.8295891771130693, "grad_norm": 0.07542664568841825, "learning_rate": 8.994170709522473e-06, "loss": 0.5439, "step": 1671 }, { "epoch": 0.8300856398163088, "grad_norm": 0.07050701387729236, "learning_rate": 8.992994552371217e-06, "loss": 0.5269, "step": 1672 }, { "epoch": 0.8305821025195482, "grad_norm": 0.07092483775981985, "learning_rate": 8.991817784954764e-06, "loss": 0.5481, "step": 1673 }, { "epoch": 0.8310785652227877, "grad_norm": 0.06851211677419224, "learning_rate": 8.990640407452966e-06, "loss": 0.5005, "step": 1674 }, { "epoch": 0.8315750279260271, "grad_norm": 0.074848670606186, "learning_rate": 8.989462420045768e-06, "loss": 0.5087, "step": 1675 }, { "epoch": 0.8320714906292664, "grad_norm": 0.07915295738445463, "learning_rate": 8.988283822913205e-06, "loss": 0.5564, "step": 1676 }, { "epoch": 0.8325679533325059, "grad_norm": 0.07375116283085385, "learning_rate": 8.987104616235407e-06, "loss": 0.5184, "step": 1677 }, { "epoch": 0.8330644160357453, "grad_norm": 0.07598742406736596, "learning_rate": 8.985924800192597e-06, "loss": 0.5732, "step": 1678 }, { "epoch": 0.8335608787389848, "grad_norm": 0.0733126506443, "learning_rate": 8.98474437496509e-06, "loss": 0.596, "step": 1679 }, { "epoch": 0.8340573414422241, "grad_norm": 0.07315827370163273, "learning_rate": 8.983563340733296e-06, "loss": 0.5723, "step": 1680 }, { "epoch": 0.8345538041454635, "grad_norm": 0.07562637951263981, "learning_rate": 8.982381697677717e-06, "loss": 0.5499, "step": 1681 }, { "epoch": 0.835050266848703, "grad_norm": 0.07599468393821707, "learning_rate": 8.981199445978947e-06, "loss": 0.5214, "step": 1682 }, { "epoch": 0.8355467295519424, "grad_norm": 0.07295503864472273, "learning_rate": 8.980016585817677e-06, "loss": 0.5008, "step": 1683 }, { "epoch": 0.8360431922551819, "grad_norm": 0.07626131253063163, "learning_rate": 8.978833117374685e-06, "loss": 0.5449, "step": 1684 }, { "epoch": 0.8365396549584212, "grad_norm": 0.06955667754604454, "learning_rate": 8.97764904083085e-06, "loss": 0.5214, "step": 1685 }, { "epoch": 0.8370361176616606, "grad_norm": 0.0730561593210296, "learning_rate": 8.976464356367133e-06, "loss": 0.548, "step": 1686 }, { "epoch": 0.8375325803649001, "grad_norm": 0.06996728191269769, "learning_rate": 8.975279064164597e-06, "loss": 0.5328, "step": 1687 }, { "epoch": 0.8380290430681395, "grad_norm": 0.07041962149072396, "learning_rate": 8.974093164404396e-06, "loss": 0.5198, "step": 1688 }, { "epoch": 0.838525505771379, "grad_norm": 0.07163834216651387, "learning_rate": 8.972906657267773e-06, "loss": 0.5137, "step": 1689 }, { "epoch": 0.8390219684746183, "grad_norm": 0.07387087026768005, "learning_rate": 8.97171954293607e-06, "loss": 0.6089, "step": 1690 }, { "epoch": 0.8395184311778577, "grad_norm": 0.07356742299332733, "learning_rate": 8.970531821590715e-06, "loss": 0.5234, "step": 1691 }, { "epoch": 0.8400148938810972, "grad_norm": 0.07259975026238215, "learning_rate": 8.969343493413234e-06, "loss": 0.5185, "step": 1692 }, { "epoch": 0.8405113565843366, "grad_norm": 0.07374705958699512, "learning_rate": 8.968154558585244e-06, "loss": 0.5354, "step": 1693 }, { "epoch": 0.8410078192875761, "grad_norm": 0.07018652218556445, "learning_rate": 8.966965017288456e-06, "loss": 0.5333, "step": 1694 }, { "epoch": 0.8415042819908154, "grad_norm": 0.0723460304446156, "learning_rate": 8.965774869704669e-06, "loss": 0.549, "step": 1695 }, { "epoch": 0.8420007446940548, "grad_norm": 0.07257751184105775, "learning_rate": 8.964584116015777e-06, "loss": 0.5398, "step": 1696 }, { "epoch": 0.8424972073972943, "grad_norm": 0.07080762620664613, "learning_rate": 8.963392756403774e-06, "loss": 0.5359, "step": 1697 }, { "epoch": 0.8429936701005337, "grad_norm": 0.0716877086229164, "learning_rate": 8.962200791050734e-06, "loss": 0.5142, "step": 1698 }, { "epoch": 0.8434901328037732, "grad_norm": 0.07454251146437897, "learning_rate": 8.961008220138833e-06, "loss": 0.5383, "step": 1699 }, { "epoch": 0.8439865955070125, "grad_norm": 0.0736072882247968, "learning_rate": 8.959815043850336e-06, "loss": 0.545, "step": 1700 }, { "epoch": 0.8444830582102519, "grad_norm": 0.07201680235850795, "learning_rate": 8.9586212623676e-06, "loss": 0.5033, "step": 1701 }, { "epoch": 0.8449795209134914, "grad_norm": 0.07282467805491234, "learning_rate": 8.957426875873075e-06, "loss": 0.5163, "step": 1702 }, { "epoch": 0.8454759836167308, "grad_norm": 0.07208805465158266, "learning_rate": 8.956231884549304e-06, "loss": 0.502, "step": 1703 }, { "epoch": 0.8459724463199703, "grad_norm": 0.07232790534446927, "learning_rate": 8.955036288578924e-06, "loss": 0.5406, "step": 1704 }, { "epoch": 0.8464689090232096, "grad_norm": 0.07481224199301578, "learning_rate": 8.953840088144663e-06, "loss": 0.546, "step": 1705 }, { "epoch": 0.846965371726449, "grad_norm": 0.06953858909134908, "learning_rate": 8.952643283429337e-06, "loss": 0.4947, "step": 1706 }, { "epoch": 0.8474618344296885, "grad_norm": 0.07353616438918857, "learning_rate": 8.951445874615862e-06, "loss": 0.5537, "step": 1707 }, { "epoch": 0.8479582971329279, "grad_norm": 0.06906196166656396, "learning_rate": 8.950247861887242e-06, "loss": 0.4881, "step": 1708 }, { "epoch": 0.8484547598361674, "grad_norm": 0.07103588869751419, "learning_rate": 8.949049245426573e-06, "loss": 0.4984, "step": 1709 }, { "epoch": 0.8489512225394067, "grad_norm": 0.07620017467523198, "learning_rate": 8.947850025417044e-06, "loss": 0.5356, "step": 1710 }, { "epoch": 0.8494476852426461, "grad_norm": 0.07734988462561786, "learning_rate": 8.94665020204194e-06, "loss": 0.5357, "step": 1711 }, { "epoch": 0.8499441479458856, "grad_norm": 0.07415145536146497, "learning_rate": 8.945449775484631e-06, "loss": 0.5621, "step": 1712 }, { "epoch": 0.850440610649125, "grad_norm": 0.07228473869129433, "learning_rate": 8.944248745928584e-06, "loss": 0.5114, "step": 1713 }, { "epoch": 0.8509370733523643, "grad_norm": 0.06940824642647671, "learning_rate": 8.943047113557358e-06, "loss": 0.5199, "step": 1714 }, { "epoch": 0.8514335360556038, "grad_norm": 0.07607055327588681, "learning_rate": 8.941844878554602e-06, "loss": 0.5556, "step": 1715 }, { "epoch": 0.8519299987588432, "grad_norm": 0.07198339251155979, "learning_rate": 8.94064204110406e-06, "loss": 0.5257, "step": 1716 }, { "epoch": 0.8524264614620827, "grad_norm": 0.06934540644110943, "learning_rate": 8.939438601389566e-06, "loss": 0.5085, "step": 1717 }, { "epoch": 0.8529229241653221, "grad_norm": 0.07389823313277775, "learning_rate": 8.938234559595044e-06, "loss": 0.5853, "step": 1718 }, { "epoch": 0.8534193868685614, "grad_norm": 0.07106273787523364, "learning_rate": 8.937029915904515e-06, "loss": 0.5172, "step": 1719 }, { "epoch": 0.8539158495718009, "grad_norm": 0.07626576874496946, "learning_rate": 8.93582467050209e-06, "loss": 0.5902, "step": 1720 }, { "epoch": 0.8544123122750403, "grad_norm": 0.07104800686298383, "learning_rate": 8.934618823571968e-06, "loss": 0.5669, "step": 1721 }, { "epoch": 0.8549087749782798, "grad_norm": 0.07098134787197038, "learning_rate": 8.933412375298447e-06, "loss": 0.5071, "step": 1722 }, { "epoch": 0.8554052376815192, "grad_norm": 0.07178430609385278, "learning_rate": 8.932205325865912e-06, "loss": 0.5663, "step": 1723 }, { "epoch": 0.8559017003847585, "grad_norm": 0.07321592545745911, "learning_rate": 8.93099767545884e-06, "loss": 0.5684, "step": 1724 }, { "epoch": 0.856398163087998, "grad_norm": 0.07405468733080081, "learning_rate": 8.929789424261804e-06, "loss": 0.5093, "step": 1725 }, { "epoch": 0.8568946257912374, "grad_norm": 0.06958960035844272, "learning_rate": 8.928580572459462e-06, "loss": 0.516, "step": 1726 }, { "epoch": 0.8573910884944769, "grad_norm": 0.07477229658813306, "learning_rate": 8.92737112023657e-06, "loss": 0.5131, "step": 1727 }, { "epoch": 0.8578875511977163, "grad_norm": 0.07175257894439932, "learning_rate": 8.926161067777973e-06, "loss": 0.5119, "step": 1728 }, { "epoch": 0.8583840139009556, "grad_norm": 0.07751441304214718, "learning_rate": 8.924950415268609e-06, "loss": 0.563, "step": 1729 }, { "epoch": 0.8588804766041951, "grad_norm": 0.07420172426697907, "learning_rate": 8.923739162893505e-06, "loss": 0.5324, "step": 1730 }, { "epoch": 0.8593769393074345, "grad_norm": 0.07407924635811906, "learning_rate": 8.922527310837782e-06, "loss": 0.4887, "step": 1731 }, { "epoch": 0.859873402010674, "grad_norm": 0.07350646391304734, "learning_rate": 8.921314859286651e-06, "loss": 0.5163, "step": 1732 }, { "epoch": 0.8603698647139134, "grad_norm": 0.07043961460651518, "learning_rate": 8.920101808425422e-06, "loss": 0.5057, "step": 1733 }, { "epoch": 0.8608663274171527, "grad_norm": 0.17234638710081665, "learning_rate": 8.91888815843948e-06, "loss": 0.5448, "step": 1734 }, { "epoch": 0.8613627901203922, "grad_norm": 0.0732175517309694, "learning_rate": 8.917673909514321e-06, "loss": 0.5299, "step": 1735 }, { "epoch": 0.8618592528236316, "grad_norm": 0.07453686320781494, "learning_rate": 8.916459061835519e-06, "loss": 0.546, "step": 1736 }, { "epoch": 0.8623557155268711, "grad_norm": 0.07567184139704818, "learning_rate": 8.915243615588745e-06, "loss": 0.5669, "step": 1737 }, { "epoch": 0.8628521782301105, "grad_norm": 0.07425811097859154, "learning_rate": 8.914027570959762e-06, "loss": 0.5029, "step": 1738 }, { "epoch": 0.8633486409333498, "grad_norm": 0.07391851841657099, "learning_rate": 8.91281092813442e-06, "loss": 0.532, "step": 1739 }, { "epoch": 0.8638451036365893, "grad_norm": 0.09916635466093679, "learning_rate": 8.911593687298665e-06, "loss": 0.5352, "step": 1740 }, { "epoch": 0.8643415663398287, "grad_norm": 0.0723850348145785, "learning_rate": 8.910375848638532e-06, "loss": 0.5222, "step": 1741 }, { "epoch": 0.8648380290430682, "grad_norm": 0.0707452473794512, "learning_rate": 8.90915741234015e-06, "loss": 0.5296, "step": 1742 }, { "epoch": 0.8653344917463076, "grad_norm": 0.07299743926849714, "learning_rate": 8.907938378589735e-06, "loss": 0.5235, "step": 1743 }, { "epoch": 0.8658309544495469, "grad_norm": 0.0714751596943647, "learning_rate": 8.906718747573598e-06, "loss": 0.5276, "step": 1744 }, { "epoch": 0.8663274171527864, "grad_norm": 0.07178899786010984, "learning_rate": 8.90549851947814e-06, "loss": 0.5128, "step": 1745 }, { "epoch": 0.8668238798560258, "grad_norm": 0.0731518645963831, "learning_rate": 8.904277694489853e-06, "loss": 0.5194, "step": 1746 }, { "epoch": 0.8673203425592653, "grad_norm": 0.07149517704501569, "learning_rate": 8.903056272795322e-06, "loss": 0.5135, "step": 1747 }, { "epoch": 0.8678168052625047, "grad_norm": 0.0706661260761562, "learning_rate": 8.901834254581219e-06, "loss": 0.5, "step": 1748 }, { "epoch": 0.868313267965744, "grad_norm": 0.0699038656327828, "learning_rate": 8.900611640034313e-06, "loss": 0.5424, "step": 1749 }, { "epoch": 0.8688097306689835, "grad_norm": 0.07062531372070045, "learning_rate": 8.899388429341459e-06, "loss": 0.5001, "step": 1750 }, { "epoch": 0.8693061933722229, "grad_norm": 0.0734084689002575, "learning_rate": 8.898164622689604e-06, "loss": 0.4946, "step": 1751 }, { "epoch": 0.8698026560754624, "grad_norm": 0.07054036867742379, "learning_rate": 8.896940220265789e-06, "loss": 0.4898, "step": 1752 }, { "epoch": 0.8702991187787017, "grad_norm": 0.07268417521194488, "learning_rate": 8.895715222257144e-06, "loss": 0.539, "step": 1753 }, { "epoch": 0.8707955814819411, "grad_norm": 0.08161951973156148, "learning_rate": 8.894489628850891e-06, "loss": 0.501, "step": 1754 }, { "epoch": 0.8712920441851806, "grad_norm": 0.07063657796104031, "learning_rate": 8.893263440234341e-06, "loss": 0.5108, "step": 1755 }, { "epoch": 0.87178850688842, "grad_norm": 0.07093034108492971, "learning_rate": 8.892036656594898e-06, "loss": 0.5045, "step": 1756 }, { "epoch": 0.8722849695916595, "grad_norm": 0.07173587549839525, "learning_rate": 8.890809278120056e-06, "loss": 0.5442, "step": 1757 }, { "epoch": 0.8727814322948988, "grad_norm": 0.07594294618433388, "learning_rate": 8.889581304997401e-06, "loss": 0.5691, "step": 1758 }, { "epoch": 0.8732778949981382, "grad_norm": 0.07023217187033555, "learning_rate": 8.88835273741461e-06, "loss": 0.494, "step": 1759 }, { "epoch": 0.8737743577013777, "grad_norm": 0.0747344031630131, "learning_rate": 8.887123575559445e-06, "loss": 0.5287, "step": 1760 }, { "epoch": 0.8742708204046171, "grad_norm": 0.070787626979846, "learning_rate": 8.885893819619768e-06, "loss": 0.4994, "step": 1761 }, { "epoch": 0.8747672831078566, "grad_norm": 0.07053403609249528, "learning_rate": 8.884663469783526e-06, "loss": 0.5206, "step": 1762 }, { "epoch": 0.8752637458110959, "grad_norm": 0.07093547787233846, "learning_rate": 8.883432526238757e-06, "loss": 0.5518, "step": 1763 }, { "epoch": 0.8757602085143353, "grad_norm": 0.07623805022634478, "learning_rate": 8.882200989173595e-06, "loss": 0.5243, "step": 1764 }, { "epoch": 0.8762566712175748, "grad_norm": 0.07046322887327075, "learning_rate": 8.880968858776257e-06, "loss": 0.5434, "step": 1765 }, { "epoch": 0.8767531339208142, "grad_norm": 0.07111009011290098, "learning_rate": 8.879736135235055e-06, "loss": 0.5327, "step": 1766 }, { "epoch": 0.8772495966240537, "grad_norm": 0.07166896583234532, "learning_rate": 8.878502818738393e-06, "loss": 0.5299, "step": 1767 }, { "epoch": 0.877746059327293, "grad_norm": 0.07148408961382326, "learning_rate": 8.87726890947476e-06, "loss": 0.5107, "step": 1768 }, { "epoch": 0.8782425220305324, "grad_norm": 0.07238265024706987, "learning_rate": 8.876034407632743e-06, "loss": 0.5135, "step": 1769 }, { "epoch": 0.8787389847337719, "grad_norm": 0.07047522806209866, "learning_rate": 8.874799313401014e-06, "loss": 0.5153, "step": 1770 }, { "epoch": 0.8792354474370113, "grad_norm": 0.07174926494461373, "learning_rate": 8.873563626968337e-06, "loss": 0.5344, "step": 1771 }, { "epoch": 0.8797319101402508, "grad_norm": 0.07469035136029122, "learning_rate": 8.87232734852357e-06, "loss": 0.5491, "step": 1772 }, { "epoch": 0.8802283728434901, "grad_norm": 0.07023138060368163, "learning_rate": 8.871090478255654e-06, "loss": 0.5239, "step": 1773 }, { "epoch": 0.8807248355467295, "grad_norm": 0.07037733604789666, "learning_rate": 8.869853016353627e-06, "loss": 0.5273, "step": 1774 }, { "epoch": 0.881221298249969, "grad_norm": 0.06824188101306318, "learning_rate": 8.868614963006615e-06, "loss": 0.5118, "step": 1775 }, { "epoch": 0.8817177609532084, "grad_norm": 0.06912832231818924, "learning_rate": 8.867376318403834e-06, "loss": 0.4811, "step": 1776 }, { "epoch": 0.8822142236564479, "grad_norm": 0.07774906294910376, "learning_rate": 8.866137082734591e-06, "loss": 0.5466, "step": 1777 }, { "epoch": 0.8827106863596872, "grad_norm": 0.07045859944368564, "learning_rate": 8.864897256188283e-06, "loss": 0.5163, "step": 1778 }, { "epoch": 0.8832071490629266, "grad_norm": 0.07003492490692469, "learning_rate": 8.8636568389544e-06, "loss": 0.5414, "step": 1779 }, { "epoch": 0.8837036117661661, "grad_norm": 0.07307705639634823, "learning_rate": 8.862415831222518e-06, "loss": 0.5571, "step": 1780 }, { "epoch": 0.8842000744694055, "grad_norm": 0.07483261513383652, "learning_rate": 8.861174233182303e-06, "loss": 0.5431, "step": 1781 }, { "epoch": 0.884696537172645, "grad_norm": 0.07173435811049399, "learning_rate": 8.859932045023518e-06, "loss": 0.5266, "step": 1782 }, { "epoch": 0.8851929998758843, "grad_norm": 0.07344114936635032, "learning_rate": 8.85868926693601e-06, "loss": 0.5074, "step": 1783 }, { "epoch": 0.8856894625791237, "grad_norm": 0.0749870011007574, "learning_rate": 8.857445899109716e-06, "loss": 0.537, "step": 1784 }, { "epoch": 0.8861859252823632, "grad_norm": 0.07353741724151482, "learning_rate": 8.856201941734664e-06, "loss": 0.5381, "step": 1785 }, { "epoch": 0.8866823879856026, "grad_norm": 0.06935062733647068, "learning_rate": 8.854957395000977e-06, "loss": 0.5384, "step": 1786 }, { "epoch": 0.887178850688842, "grad_norm": 0.06984395557189015, "learning_rate": 8.853712259098862e-06, "loss": 0.5432, "step": 1787 }, { "epoch": 0.8876753133920814, "grad_norm": 0.07009190296729087, "learning_rate": 8.852466534218615e-06, "loss": 0.5282, "step": 1788 }, { "epoch": 0.8881717760953208, "grad_norm": 0.0713077561850042, "learning_rate": 8.851220220550631e-06, "loss": 0.5399, "step": 1789 }, { "epoch": 0.8886682387985603, "grad_norm": 0.07335240913305834, "learning_rate": 8.849973318285382e-06, "loss": 0.5419, "step": 1790 }, { "epoch": 0.8891647015017997, "grad_norm": 0.06929966245122514, "learning_rate": 8.848725827613445e-06, "loss": 0.547, "step": 1791 }, { "epoch": 0.889661164205039, "grad_norm": 0.06926234064819861, "learning_rate": 8.847477748725473e-06, "loss": 0.5263, "step": 1792 }, { "epoch": 0.8901576269082785, "grad_norm": 0.07213756111054107, "learning_rate": 8.846229081812217e-06, "loss": 0.5545, "step": 1793 }, { "epoch": 0.8906540896115179, "grad_norm": 0.07418020782540029, "learning_rate": 8.844979827064517e-06, "loss": 0.4989, "step": 1794 }, { "epoch": 0.8911505523147574, "grad_norm": 0.07369827208226862, "learning_rate": 8.843729984673296e-06, "loss": 0.511, "step": 1795 }, { "epoch": 0.8916470150179968, "grad_norm": 0.0731866798675951, "learning_rate": 8.842479554829579e-06, "loss": 0.509, "step": 1796 }, { "epoch": 0.8921434777212361, "grad_norm": 0.07227353713539185, "learning_rate": 8.841228537724473e-06, "loss": 0.5569, "step": 1797 }, { "epoch": 0.8926399404244756, "grad_norm": 0.07354933028438398, "learning_rate": 8.839976933549173e-06, "loss": 0.5679, "step": 1798 }, { "epoch": 0.893136403127715, "grad_norm": 0.07250987685680538, "learning_rate": 8.838724742494966e-06, "loss": 0.4917, "step": 1799 }, { "epoch": 0.8936328658309545, "grad_norm": 0.07615518221178341, "learning_rate": 8.837471964753234e-06, "loss": 0.5601, "step": 1800 }, { "epoch": 0.8941293285341939, "grad_norm": 0.07152492656736423, "learning_rate": 8.83621860051544e-06, "loss": 0.5565, "step": 1801 }, { "epoch": 0.8946257912374332, "grad_norm": 0.07224335571893448, "learning_rate": 8.834964649973144e-06, "loss": 0.5633, "step": 1802 }, { "epoch": 0.8951222539406727, "grad_norm": 0.07269768082488755, "learning_rate": 8.833710113317988e-06, "loss": 0.5332, "step": 1803 }, { "epoch": 0.8956187166439121, "grad_norm": 0.07315786298863783, "learning_rate": 8.83245499074171e-06, "loss": 0.4924, "step": 1804 }, { "epoch": 0.8961151793471516, "grad_norm": 0.06806955055055416, "learning_rate": 8.831199282436136e-06, "loss": 0.5016, "step": 1805 }, { "epoch": 0.896611642050391, "grad_norm": 0.07498148185530268, "learning_rate": 8.829942988593181e-06, "loss": 0.5304, "step": 1806 }, { "epoch": 0.8971081047536303, "grad_norm": 0.07445796417367874, "learning_rate": 8.828686109404848e-06, "loss": 0.5823, "step": 1807 }, { "epoch": 0.8976045674568698, "grad_norm": 0.07278033300821443, "learning_rate": 8.827428645063231e-06, "loss": 0.5328, "step": 1808 }, { "epoch": 0.8981010301601092, "grad_norm": 0.07392660401254864, "learning_rate": 8.826170595760515e-06, "loss": 0.5118, "step": 1809 }, { "epoch": 0.8985974928633487, "grad_norm": 0.07188258391302393, "learning_rate": 8.824911961688971e-06, "loss": 0.5152, "step": 1810 }, { "epoch": 0.8990939555665881, "grad_norm": 0.07434385570738812, "learning_rate": 8.823652743040961e-06, "loss": 0.5152, "step": 1811 }, { "epoch": 0.8995904182698274, "grad_norm": 0.07354820990692497, "learning_rate": 8.822392940008937e-06, "loss": 0.5516, "step": 1812 }, { "epoch": 0.9000868809730669, "grad_norm": 0.07453766204422067, "learning_rate": 8.82113255278544e-06, "loss": 0.5199, "step": 1813 }, { "epoch": 0.9005833436763063, "grad_norm": 0.07462682684863291, "learning_rate": 8.819871581563098e-06, "loss": 0.5774, "step": 1814 }, { "epoch": 0.9010798063795458, "grad_norm": 0.07218553639506078, "learning_rate": 8.818610026534633e-06, "loss": 0.5245, "step": 1815 }, { "epoch": 0.9015762690827852, "grad_norm": 0.07027550804968397, "learning_rate": 8.817347887892852e-06, "loss": 0.531, "step": 1816 }, { "epoch": 0.9020727317860245, "grad_norm": 0.06859338763619163, "learning_rate": 8.816085165830654e-06, "loss": 0.5051, "step": 1817 }, { "epoch": 0.902569194489264, "grad_norm": 0.07183332754922603, "learning_rate": 8.814821860541024e-06, "loss": 0.5111, "step": 1818 }, { "epoch": 0.9030656571925034, "grad_norm": 0.07322285377385977, "learning_rate": 8.813557972217038e-06, "loss": 0.5105, "step": 1819 }, { "epoch": 0.9035621198957429, "grad_norm": 0.07539086388590406, "learning_rate": 8.812293501051862e-06, "loss": 0.5726, "step": 1820 }, { "epoch": 0.9040585825989822, "grad_norm": 0.07114256778119082, "learning_rate": 8.81102844723875e-06, "loss": 0.5378, "step": 1821 }, { "epoch": 0.9045550453022216, "grad_norm": 0.07529377230408558, "learning_rate": 8.809762810971044e-06, "loss": 0.5446, "step": 1822 }, { "epoch": 0.9050515080054611, "grad_norm": 0.0744774028212076, "learning_rate": 8.808496592442178e-06, "loss": 0.5322, "step": 1823 }, { "epoch": 0.9055479707087005, "grad_norm": 0.07169001776450037, "learning_rate": 8.807229791845673e-06, "loss": 0.5588, "step": 1824 }, { "epoch": 0.90604443341194, "grad_norm": 0.07010171567219378, "learning_rate": 8.805962409375138e-06, "loss": 0.5316, "step": 1825 }, { "epoch": 0.9065408961151793, "grad_norm": 0.07048707759482971, "learning_rate": 8.804694445224274e-06, "loss": 0.5199, "step": 1826 }, { "epoch": 0.9070373588184187, "grad_norm": 0.08194347977828213, "learning_rate": 8.803425899586865e-06, "loss": 0.6072, "step": 1827 }, { "epoch": 0.9075338215216582, "grad_norm": 0.07117753158187239, "learning_rate": 8.802156772656793e-06, "loss": 0.5572, "step": 1828 }, { "epoch": 0.9080302842248976, "grad_norm": 0.06968828674618492, "learning_rate": 8.80088706462802e-06, "loss": 0.5047, "step": 1829 }, { "epoch": 0.9085267469281371, "grad_norm": 0.07433432012948353, "learning_rate": 8.799616775694601e-06, "loss": 0.4883, "step": 1830 }, { "epoch": 0.9090232096313764, "grad_norm": 0.06853377833470747, "learning_rate": 8.798345906050683e-06, "loss": 0.4933, "step": 1831 }, { "epoch": 0.9095196723346158, "grad_norm": 0.07198218840214864, "learning_rate": 8.797074455890493e-06, "loss": 0.5172, "step": 1832 }, { "epoch": 0.9100161350378553, "grad_norm": 0.07144054209959581, "learning_rate": 8.795802425408352e-06, "loss": 0.5285, "step": 1833 }, { "epoch": 0.9105125977410947, "grad_norm": 0.07241882710808062, "learning_rate": 8.794529814798674e-06, "loss": 0.5499, "step": 1834 }, { "epoch": 0.9110090604443342, "grad_norm": 0.07128523526894211, "learning_rate": 8.793256624255954e-06, "loss": 0.4895, "step": 1835 }, { "epoch": 0.9115055231475735, "grad_norm": 0.07431748476081376, "learning_rate": 8.79198285397478e-06, "loss": 0.531, "step": 1836 }, { "epoch": 0.9120019858508129, "grad_norm": 0.07264275110965394, "learning_rate": 8.790708504149825e-06, "loss": 0.5144, "step": 1837 }, { "epoch": 0.9124984485540524, "grad_norm": 0.07383415119938352, "learning_rate": 8.789433574975856e-06, "loss": 0.5325, "step": 1838 }, { "epoch": 0.9129949112572918, "grad_norm": 0.0732949206741093, "learning_rate": 8.788158066647724e-06, "loss": 0.5266, "step": 1839 }, { "epoch": 0.9134913739605313, "grad_norm": 0.07200921781459274, "learning_rate": 8.786881979360368e-06, "loss": 0.5584, "step": 1840 }, { "epoch": 0.9139878366637706, "grad_norm": 0.0723153842242348, "learning_rate": 8.78560531330882e-06, "loss": 0.5425, "step": 1841 }, { "epoch": 0.91448429936701, "grad_norm": 0.07059693624782344, "learning_rate": 8.784328068688199e-06, "loss": 0.5163, "step": 1842 }, { "epoch": 0.9149807620702495, "grad_norm": 0.07088592353046123, "learning_rate": 8.78305024569371e-06, "loss": 0.5158, "step": 1843 }, { "epoch": 0.9154772247734889, "grad_norm": 0.07026501902575008, "learning_rate": 8.781771844520646e-06, "loss": 0.4983, "step": 1844 }, { "epoch": 0.9159736874767284, "grad_norm": 0.07359600617343613, "learning_rate": 8.780492865364392e-06, "loss": 0.5598, "step": 1845 }, { "epoch": 0.9164701501799677, "grad_norm": 0.0729931146847884, "learning_rate": 8.779213308420418e-06, "loss": 0.5495, "step": 1846 }, { "epoch": 0.9169666128832071, "grad_norm": 0.07492975377099075, "learning_rate": 8.777933173884288e-06, "loss": 0.5328, "step": 1847 }, { "epoch": 0.9174630755864466, "grad_norm": 0.07235530445499065, "learning_rate": 8.776652461951644e-06, "loss": 0.4969, "step": 1848 }, { "epoch": 0.917959538289686, "grad_norm": 0.08357922589860901, "learning_rate": 8.775371172818226e-06, "loss": 0.5883, "step": 1849 }, { "epoch": 0.9184560009929255, "grad_norm": 0.07047535892914779, "learning_rate": 8.774089306679859e-06, "loss": 0.5016, "step": 1850 }, { "epoch": 0.9189524636961648, "grad_norm": 0.07455933744307272, "learning_rate": 8.772806863732454e-06, "loss": 0.4909, "step": 1851 }, { "epoch": 0.9194489263994042, "grad_norm": 0.0718812451067364, "learning_rate": 8.771523844172012e-06, "loss": 0.5203, "step": 1852 }, { "epoch": 0.9199453891026437, "grad_norm": 0.07798810160053044, "learning_rate": 8.770240248194622e-06, "loss": 0.5235, "step": 1853 }, { "epoch": 0.9204418518058831, "grad_norm": 0.07655454615769729, "learning_rate": 8.76895607599646e-06, "loss": 0.5527, "step": 1854 }, { "epoch": 0.9209383145091226, "grad_norm": 0.07714897953371495, "learning_rate": 8.767671327773793e-06, "loss": 0.5689, "step": 1855 }, { "epoch": 0.9214347772123619, "grad_norm": 0.06788417157684977, "learning_rate": 8.766386003722975e-06, "loss": 0.5284, "step": 1856 }, { "epoch": 0.9219312399156013, "grad_norm": 0.07465274826817246, "learning_rate": 8.765100104040446e-06, "loss": 0.5368, "step": 1857 }, { "epoch": 0.9224277026188408, "grad_norm": 0.06873679249158743, "learning_rate": 8.763813628922732e-06, "loss": 0.5331, "step": 1858 }, { "epoch": 0.9229241653220802, "grad_norm": 0.07176820186024782, "learning_rate": 8.762526578566455e-06, "loss": 0.5211, "step": 1859 }, { "epoch": 0.9234206280253195, "grad_norm": 0.07147325275088282, "learning_rate": 8.76123895316832e-06, "loss": 0.5231, "step": 1860 }, { "epoch": 0.923917090728559, "grad_norm": 0.06921441012509941, "learning_rate": 8.759950752925114e-06, "loss": 0.5153, "step": 1861 }, { "epoch": 0.9244135534317984, "grad_norm": 0.07374828268990942, "learning_rate": 8.758661978033723e-06, "loss": 0.5341, "step": 1862 }, { "epoch": 0.9249100161350379, "grad_norm": 0.07350976624933628, "learning_rate": 8.757372628691115e-06, "loss": 0.5232, "step": 1863 }, { "epoch": 0.9254064788382773, "grad_norm": 0.07539026737326146, "learning_rate": 8.756082705094344e-06, "loss": 0.5454, "step": 1864 }, { "epoch": 0.9259029415415166, "grad_norm": 0.07085472578928324, "learning_rate": 8.754792207440557e-06, "loss": 0.5424, "step": 1865 }, { "epoch": 0.9263994042447561, "grad_norm": 0.07878932254764395, "learning_rate": 8.753501135926985e-06, "loss": 0.5218, "step": 1866 }, { "epoch": 0.9268958669479955, "grad_norm": 0.06901455357252793, "learning_rate": 8.752209490750947e-06, "loss": 0.5258, "step": 1867 }, { "epoch": 0.927392329651235, "grad_norm": 0.0713378413072867, "learning_rate": 8.750917272109849e-06, "loss": 0.5579, "step": 1868 }, { "epoch": 0.9278887923544744, "grad_norm": 0.07256870817533925, "learning_rate": 8.749624480201188e-06, "loss": 0.5068, "step": 1869 }, { "epoch": 0.9283852550577137, "grad_norm": 0.07103653504023608, "learning_rate": 8.748331115222546e-06, "loss": 0.515, "step": 1870 }, { "epoch": 0.9288817177609532, "grad_norm": 0.0738251937073301, "learning_rate": 8.747037177371593e-06, "loss": 0.5185, "step": 1871 }, { "epoch": 0.9293781804641926, "grad_norm": 0.07390345582909139, "learning_rate": 8.745742666846088e-06, "loss": 0.5159, "step": 1872 }, { "epoch": 0.9298746431674321, "grad_norm": 0.07285618531105278, "learning_rate": 8.744447583843874e-06, "loss": 0.5519, "step": 1873 }, { "epoch": 0.9303711058706715, "grad_norm": 0.06928191759599445, "learning_rate": 8.743151928562883e-06, "loss": 0.5162, "step": 1874 }, { "epoch": 0.9308675685739108, "grad_norm": 0.07672934530948829, "learning_rate": 8.741855701201138e-06, "loss": 0.5844, "step": 1875 }, { "epoch": 0.9313640312771503, "grad_norm": 0.07565381558885703, "learning_rate": 8.740558901956745e-06, "loss": 0.512, "step": 1876 }, { "epoch": 0.9318604939803897, "grad_norm": 0.07428988447726668, "learning_rate": 8.739261531027899e-06, "loss": 0.4979, "step": 1877 }, { "epoch": 0.9323569566836292, "grad_norm": 0.07272692110414847, "learning_rate": 8.737963588612882e-06, "loss": 0.5256, "step": 1878 }, { "epoch": 0.9328534193868686, "grad_norm": 0.07394818427713572, "learning_rate": 8.736665074910064e-06, "loss": 0.5442, "step": 1879 }, { "epoch": 0.9333498820901079, "grad_norm": 0.07489595106458445, "learning_rate": 8.735365990117904e-06, "loss": 0.5009, "step": 1880 }, { "epoch": 0.9338463447933474, "grad_norm": 0.07379647498011625, "learning_rate": 8.734066334434944e-06, "loss": 0.4879, "step": 1881 }, { "epoch": 0.9343428074965868, "grad_norm": 0.07110250204025945, "learning_rate": 8.732766108059814e-06, "loss": 0.5298, "step": 1882 }, { "epoch": 0.9348392701998263, "grad_norm": 0.07081073435018918, "learning_rate": 8.731465311191237e-06, "loss": 0.5221, "step": 1883 }, { "epoch": 0.9353357329030657, "grad_norm": 0.07000873579274325, "learning_rate": 8.730163944028013e-06, "loss": 0.5187, "step": 1884 }, { "epoch": 0.935832195606305, "grad_norm": 0.07240937120624676, "learning_rate": 8.728862006769043e-06, "loss": 0.5041, "step": 1885 }, { "epoch": 0.9363286583095445, "grad_norm": 0.0746119118897645, "learning_rate": 8.7275594996133e-06, "loss": 0.5379, "step": 1886 }, { "epoch": 0.9368251210127839, "grad_norm": 0.07015740462906615, "learning_rate": 8.726256422759857e-06, "loss": 0.5386, "step": 1887 }, { "epoch": 0.9373215837160234, "grad_norm": 0.06777154576680991, "learning_rate": 8.724952776407864e-06, "loss": 0.4789, "step": 1888 }, { "epoch": 0.9378180464192628, "grad_norm": 0.07200557741488726, "learning_rate": 8.723648560756565e-06, "loss": 0.5483, "step": 1889 }, { "epoch": 0.9383145091225021, "grad_norm": 0.07146897275049505, "learning_rate": 8.722343776005288e-06, "loss": 0.4842, "step": 1890 }, { "epoch": 0.9388109718257416, "grad_norm": 0.07349104098205175, "learning_rate": 8.721038422353447e-06, "loss": 0.5368, "step": 1891 }, { "epoch": 0.939307434528981, "grad_norm": 0.07452676274413209, "learning_rate": 8.719732500000547e-06, "loss": 0.4931, "step": 1892 }, { "epoch": 0.9398038972322205, "grad_norm": 0.07592669585842883, "learning_rate": 8.718426009146174e-06, "loss": 0.5249, "step": 1893 }, { "epoch": 0.9403003599354598, "grad_norm": 0.06953574446858567, "learning_rate": 8.717118949990006e-06, "loss": 0.4989, "step": 1894 }, { "epoch": 0.9407968226386992, "grad_norm": 0.06939013011968077, "learning_rate": 8.715811322731808e-06, "loss": 0.5243, "step": 1895 }, { "epoch": 0.9412932853419387, "grad_norm": 0.07460509153996832, "learning_rate": 8.714503127571425e-06, "loss": 0.55, "step": 1896 }, { "epoch": 0.9417897480451781, "grad_norm": 0.07492121354167032, "learning_rate": 8.713194364708799e-06, "loss": 0.5162, "step": 1897 }, { "epoch": 0.9422862107484176, "grad_norm": 0.07201694827718819, "learning_rate": 8.71188503434395e-06, "loss": 0.5435, "step": 1898 }, { "epoch": 0.9427826734516569, "grad_norm": 0.0698106849011963, "learning_rate": 8.710575136676988e-06, "loss": 0.5325, "step": 1899 }, { "epoch": 0.9432791361548963, "grad_norm": 0.07457961112892264, "learning_rate": 8.709264671908113e-06, "loss": 0.5269, "step": 1900 }, { "epoch": 0.9437755988581358, "grad_norm": 0.07363246085888084, "learning_rate": 8.707953640237605e-06, "loss": 0.5467, "step": 1901 }, { "epoch": 0.9442720615613752, "grad_norm": 0.07217700611369388, "learning_rate": 8.706642041865836e-06, "loss": 0.5387, "step": 1902 }, { "epoch": 0.9447685242646147, "grad_norm": 0.0692734553474916, "learning_rate": 8.705329876993262e-06, "loss": 0.506, "step": 1903 }, { "epoch": 0.945264986967854, "grad_norm": 0.07196674390301115, "learning_rate": 8.704017145820427e-06, "loss": 0.549, "step": 1904 }, { "epoch": 0.9457614496710934, "grad_norm": 0.07512068711896185, "learning_rate": 8.70270384854796e-06, "loss": 0.5663, "step": 1905 }, { "epoch": 0.9462579123743329, "grad_norm": 0.07421738591859378, "learning_rate": 8.701389985376578e-06, "loss": 0.5622, "step": 1906 }, { "epoch": 0.9467543750775723, "grad_norm": 0.06964931407494165, "learning_rate": 8.700075556507085e-06, "loss": 0.4877, "step": 1907 }, { "epoch": 0.9472508377808118, "grad_norm": 0.07205777339082249, "learning_rate": 8.698760562140369e-06, "loss": 0.5148, "step": 1908 }, { "epoch": 0.9477473004840511, "grad_norm": 0.06972774499188475, "learning_rate": 8.697445002477408e-06, "loss": 0.5172, "step": 1909 }, { "epoch": 0.9482437631872905, "grad_norm": 0.0730143838918345, "learning_rate": 8.696128877719258e-06, "loss": 0.5825, "step": 1910 }, { "epoch": 0.94874022589053, "grad_norm": 0.06930828954474016, "learning_rate": 8.694812188067077e-06, "loss": 0.5488, "step": 1911 }, { "epoch": 0.9492366885937694, "grad_norm": 0.06817526079016235, "learning_rate": 8.693494933722091e-06, "loss": 0.5137, "step": 1912 }, { "epoch": 0.9497331512970089, "grad_norm": 0.07130049301765591, "learning_rate": 8.692177114885626e-06, "loss": 0.5108, "step": 1913 }, { "epoch": 0.9502296140002482, "grad_norm": 0.06952714928758798, "learning_rate": 8.69085873175909e-06, "loss": 0.5593, "step": 1914 }, { "epoch": 0.9507260767034876, "grad_norm": 0.07028375429072933, "learning_rate": 8.689539784543975e-06, "loss": 0.5354, "step": 1915 }, { "epoch": 0.9512225394067271, "grad_norm": 0.06774708549780749, "learning_rate": 8.68822027344186e-06, "loss": 0.4871, "step": 1916 }, { "epoch": 0.9517190021099665, "grad_norm": 0.07171222831888815, "learning_rate": 8.686900198654413e-06, "loss": 0.5434, "step": 1917 }, { "epoch": 0.952215464813206, "grad_norm": 0.07452854923353851, "learning_rate": 8.685579560383386e-06, "loss": 0.5635, "step": 1918 }, { "epoch": 0.9527119275164453, "grad_norm": 0.07754262739318235, "learning_rate": 8.684258358830617e-06, "loss": 0.5291, "step": 1919 }, { "epoch": 0.9532083902196847, "grad_norm": 0.07210569027672796, "learning_rate": 8.682936594198029e-06, "loss": 0.5101, "step": 1920 }, { "epoch": 0.9537048529229242, "grad_norm": 0.07585209431668576, "learning_rate": 8.681614266687634e-06, "loss": 0.5186, "step": 1921 }, { "epoch": 0.9542013156261636, "grad_norm": 0.07508056085158579, "learning_rate": 8.680291376501531e-06, "loss": 0.5519, "step": 1922 }, { "epoch": 0.9546977783294031, "grad_norm": 0.06911989074015167, "learning_rate": 8.678967923841897e-06, "loss": 0.5203, "step": 1923 }, { "epoch": 0.9551942410326424, "grad_norm": 0.07000431990955368, "learning_rate": 8.677643908911007e-06, "loss": 0.5072, "step": 1924 }, { "epoch": 0.9556907037358818, "grad_norm": 0.0706651754552142, "learning_rate": 8.67631933191121e-06, "loss": 0.5236, "step": 1925 }, { "epoch": 0.9561871664391213, "grad_norm": 0.07233656910668078, "learning_rate": 8.674994193044947e-06, "loss": 0.5408, "step": 1926 }, { "epoch": 0.9566836291423607, "grad_norm": 0.07165155095334301, "learning_rate": 8.673668492514748e-06, "loss": 0.5361, "step": 1927 }, { "epoch": 0.9571800918456, "grad_norm": 0.07104402132471122, "learning_rate": 8.672342230523222e-06, "loss": 0.5252, "step": 1928 }, { "epoch": 0.9576765545488395, "grad_norm": 0.07062543323954087, "learning_rate": 8.671015407273067e-06, "loss": 0.5412, "step": 1929 }, { "epoch": 0.9581730172520789, "grad_norm": 0.06926853707239683, "learning_rate": 8.669688022967068e-06, "loss": 0.5511, "step": 1930 }, { "epoch": 0.9586694799553184, "grad_norm": 0.07159547068682602, "learning_rate": 8.668360077808093e-06, "loss": 0.5512, "step": 1931 }, { "epoch": 0.9591659426585578, "grad_norm": 0.07115586840594067, "learning_rate": 8.667031571999098e-06, "loss": 0.5133, "step": 1932 }, { "epoch": 0.9596624053617971, "grad_norm": 0.07200342418348525, "learning_rate": 8.665702505743125e-06, "loss": 0.5322, "step": 1933 }, { "epoch": 0.9601588680650366, "grad_norm": 0.06825765717088793, "learning_rate": 8.664372879243297e-06, "loss": 0.5292, "step": 1934 }, { "epoch": 0.960655330768276, "grad_norm": 0.07283728005005825, "learning_rate": 8.66304269270283e-06, "loss": 0.5449, "step": 1935 }, { "epoch": 0.9611517934715155, "grad_norm": 0.07304900897173891, "learning_rate": 8.661711946325018e-06, "loss": 0.5512, "step": 1936 }, { "epoch": 0.9616482561747549, "grad_norm": 0.06853811178509077, "learning_rate": 8.660380640313247e-06, "loss": 0.4965, "step": 1937 }, { "epoch": 0.9621447188779942, "grad_norm": 0.0751622154001969, "learning_rate": 8.659048774870986e-06, "loss": 0.5175, "step": 1938 }, { "epoch": 0.9626411815812337, "grad_norm": 0.07096935969983968, "learning_rate": 8.657716350201786e-06, "loss": 0.5653, "step": 1939 }, { "epoch": 0.9631376442844731, "grad_norm": 0.07358237864616872, "learning_rate": 8.656383366509292e-06, "loss": 0.5213, "step": 1940 }, { "epoch": 0.9636341069877126, "grad_norm": 0.06865112950199855, "learning_rate": 8.655049823997222e-06, "loss": 0.5004, "step": 1941 }, { "epoch": 0.964130569690952, "grad_norm": 0.06913087429662826, "learning_rate": 8.653715722869394e-06, "loss": 0.5091, "step": 1942 }, { "epoch": 0.9646270323941913, "grad_norm": 0.07334073189778997, "learning_rate": 8.652381063329697e-06, "loss": 0.5273, "step": 1943 }, { "epoch": 0.9651234950974308, "grad_norm": 0.07154596833896756, "learning_rate": 8.65104584558212e-06, "loss": 0.5221, "step": 1944 }, { "epoch": 0.9656199578006702, "grad_norm": 0.0669766137567477, "learning_rate": 8.649710069830723e-06, "loss": 0.4856, "step": 1945 }, { "epoch": 0.9661164205039097, "grad_norm": 0.06695881028521661, "learning_rate": 8.648373736279662e-06, "loss": 0.5006, "step": 1946 }, { "epoch": 0.9666128832071491, "grad_norm": 0.07056372764666827, "learning_rate": 8.647036845133171e-06, "loss": 0.5038, "step": 1947 }, { "epoch": 0.9671093459103884, "grad_norm": 0.06721853167954736, "learning_rate": 8.645699396595574e-06, "loss": 0.5192, "step": 1948 }, { "epoch": 0.9676058086136279, "grad_norm": 0.07297678022854343, "learning_rate": 8.644361390871281e-06, "loss": 0.5508, "step": 1949 }, { "epoch": 0.9681022713168673, "grad_norm": 0.07278377444176168, "learning_rate": 8.64302282816478e-06, "loss": 0.5509, "step": 1950 }, { "epoch": 0.9685987340201068, "grad_norm": 0.06846087625519698, "learning_rate": 8.641683708680653e-06, "loss": 0.4928, "step": 1951 }, { "epoch": 0.9690951967233462, "grad_norm": 0.06533270866026043, "learning_rate": 8.64034403262356e-06, "loss": 0.485, "step": 1952 }, { "epoch": 0.9695916594265855, "grad_norm": 0.07265110323601376, "learning_rate": 8.63900380019825e-06, "loss": 0.5153, "step": 1953 }, { "epoch": 0.970088122129825, "grad_norm": 0.07263717621963031, "learning_rate": 8.637663011609556e-06, "loss": 0.5068, "step": 1954 }, { "epoch": 0.9705845848330644, "grad_norm": 0.06931969752092298, "learning_rate": 8.636321667062398e-06, "loss": 0.4819, "step": 1955 }, { "epoch": 0.9710810475363039, "grad_norm": 0.07329123137887754, "learning_rate": 8.634979766761775e-06, "loss": 0.5111, "step": 1956 }, { "epoch": 0.9715775102395433, "grad_norm": 0.0776376074481898, "learning_rate": 8.633637310912777e-06, "loss": 0.5299, "step": 1957 }, { "epoch": 0.9720739729427826, "grad_norm": 0.07357164179881222, "learning_rate": 8.632294299720578e-06, "loss": 0.5242, "step": 1958 }, { "epoch": 0.9725704356460221, "grad_norm": 0.06918945191060966, "learning_rate": 8.630950733390434e-06, "loss": 0.5112, "step": 1959 }, { "epoch": 0.9730668983492615, "grad_norm": 0.07259280346654948, "learning_rate": 8.62960661212769e-06, "loss": 0.5416, "step": 1960 }, { "epoch": 0.973563361052501, "grad_norm": 0.0710695063103676, "learning_rate": 8.628261936137769e-06, "loss": 0.543, "step": 1961 }, { "epoch": 0.9740598237557403, "grad_norm": 0.07278661425053735, "learning_rate": 8.626916705626186e-06, "loss": 0.5096, "step": 1962 }, { "epoch": 0.9745562864589797, "grad_norm": 0.07217270046199414, "learning_rate": 8.62557092079854e-06, "loss": 0.5251, "step": 1963 }, { "epoch": 0.9750527491622192, "grad_norm": 0.07443189862074188, "learning_rate": 8.62422458186051e-06, "loss": 0.5263, "step": 1964 }, { "epoch": 0.9755492118654586, "grad_norm": 0.0720078190705775, "learning_rate": 8.62287768901786e-06, "loss": 0.5163, "step": 1965 }, { "epoch": 0.9760456745686981, "grad_norm": 0.07216624095705255, "learning_rate": 8.621530242476446e-06, "loss": 0.5491, "step": 1966 }, { "epoch": 0.9765421372719374, "grad_norm": 0.07025665529312518, "learning_rate": 8.620182242442202e-06, "loss": 0.5003, "step": 1967 }, { "epoch": 0.9770385999751768, "grad_norm": 0.07990291482485633, "learning_rate": 8.618833689121147e-06, "loss": 0.5196, "step": 1968 }, { "epoch": 0.9775350626784163, "grad_norm": 0.07182987590456455, "learning_rate": 8.617484582719384e-06, "loss": 0.4937, "step": 1969 }, { "epoch": 0.9780315253816557, "grad_norm": 0.07065785829898263, "learning_rate": 8.616134923443107e-06, "loss": 0.5059, "step": 1970 }, { "epoch": 0.9785279880848952, "grad_norm": 0.07052696820744597, "learning_rate": 8.614784711498586e-06, "loss": 0.5154, "step": 1971 }, { "epoch": 0.9790244507881345, "grad_norm": 0.07208828528634863, "learning_rate": 8.61343394709218e-06, "loss": 0.5148, "step": 1972 }, { "epoch": 0.9795209134913739, "grad_norm": 0.07218283214224182, "learning_rate": 8.612082630430333e-06, "loss": 0.5254, "step": 1973 }, { "epoch": 0.9800173761946134, "grad_norm": 0.0700016743353793, "learning_rate": 8.610730761719573e-06, "loss": 0.524, "step": 1974 }, { "epoch": 0.9805138388978528, "grad_norm": 0.06872083929445431, "learning_rate": 8.609378341166508e-06, "loss": 0.5152, "step": 1975 }, { "epoch": 0.9810103016010923, "grad_norm": 0.07548071827402791, "learning_rate": 8.608025368977834e-06, "loss": 0.5443, "step": 1976 }, { "epoch": 0.9815067643043316, "grad_norm": 0.07492045099973432, "learning_rate": 8.606671845360334e-06, "loss": 0.5235, "step": 1977 }, { "epoch": 0.982003227007571, "grad_norm": 0.0725796525707599, "learning_rate": 8.605317770520871e-06, "loss": 0.5093, "step": 1978 }, { "epoch": 0.9824996897108105, "grad_norm": 0.0716698247485905, "learning_rate": 8.603963144666393e-06, "loss": 0.537, "step": 1979 }, { "epoch": 0.9829961524140499, "grad_norm": 0.07084273949601609, "learning_rate": 8.602607968003935e-06, "loss": 0.5064, "step": 1980 }, { "epoch": 0.9834926151172894, "grad_norm": 0.07171289777818614, "learning_rate": 8.601252240740611e-06, "loss": 0.5377, "step": 1981 }, { "epoch": 0.9839890778205287, "grad_norm": 0.07193685972435261, "learning_rate": 8.599895963083627e-06, "loss": 0.5365, "step": 1982 }, { "epoch": 0.9844855405237681, "grad_norm": 0.0716912144267546, "learning_rate": 8.598539135240263e-06, "loss": 0.501, "step": 1983 }, { "epoch": 0.9849820032270076, "grad_norm": 0.07125142000863152, "learning_rate": 8.597181757417889e-06, "loss": 0.5495, "step": 1984 }, { "epoch": 0.985478465930247, "grad_norm": 0.07060360439051618, "learning_rate": 8.59582382982396e-06, "loss": 0.5344, "step": 1985 }, { "epoch": 0.9859749286334865, "grad_norm": 0.07445172176295355, "learning_rate": 8.594465352666015e-06, "loss": 0.57, "step": 1986 }, { "epoch": 0.9864713913367258, "grad_norm": 0.07371532452638814, "learning_rate": 8.593106326151672e-06, "loss": 0.5311, "step": 1987 }, { "epoch": 0.9869678540399652, "grad_norm": 0.0696119232310848, "learning_rate": 8.591746750488639e-06, "loss": 0.4858, "step": 1988 }, { "epoch": 0.9874643167432047, "grad_norm": 0.07048082002862621, "learning_rate": 8.590386625884703e-06, "loss": 0.5171, "step": 1989 }, { "epoch": 0.9879607794464441, "grad_norm": 0.07171447996292699, "learning_rate": 8.58902595254774e-06, "loss": 0.5028, "step": 1990 }, { "epoch": 0.9884572421496836, "grad_norm": 0.07039308554220271, "learning_rate": 8.587664730685707e-06, "loss": 0.5248, "step": 1991 }, { "epoch": 0.9889537048529229, "grad_norm": 0.07283522641859247, "learning_rate": 8.586302960506643e-06, "loss": 0.5451, "step": 1992 }, { "epoch": 0.9894501675561623, "grad_norm": 0.07227866851044816, "learning_rate": 8.584940642218672e-06, "loss": 0.5738, "step": 1993 }, { "epoch": 0.9899466302594018, "grad_norm": 0.07329916195849069, "learning_rate": 8.583577776030005e-06, "loss": 0.5331, "step": 1994 }, { "epoch": 0.9904430929626412, "grad_norm": 0.07113529640045456, "learning_rate": 8.582214362148932e-06, "loss": 0.4991, "step": 1995 }, { "epoch": 0.9909395556658807, "grad_norm": 0.07416831869367692, "learning_rate": 8.580850400783833e-06, "loss": 0.5589, "step": 1996 }, { "epoch": 0.99143601836912, "grad_norm": 0.0719901887328746, "learning_rate": 8.579485892143163e-06, "loss": 0.535, "step": 1997 }, { "epoch": 0.9919324810723594, "grad_norm": 0.07290232210396123, "learning_rate": 8.578120836435467e-06, "loss": 0.5309, "step": 1998 }, { "epoch": 0.9924289437755989, "grad_norm": 0.07268201060562912, "learning_rate": 8.576755233869372e-06, "loss": 0.5039, "step": 1999 }, { "epoch": 0.9929254064788383, "grad_norm": 0.069235314012733, "learning_rate": 8.57538908465359e-06, "loss": 0.4977, "step": 2000 }, { "epoch": 0.9934218691820776, "grad_norm": 0.07348961416704078, "learning_rate": 8.574022388996913e-06, "loss": 0.5569, "step": 2001 }, { "epoch": 0.9939183318853171, "grad_norm": 0.0729022760562524, "learning_rate": 8.572655147108217e-06, "loss": 0.5113, "step": 2002 }, { "epoch": 0.9944147945885565, "grad_norm": 0.07172086453966273, "learning_rate": 8.571287359196466e-06, "loss": 0.5105, "step": 2003 }, { "epoch": 0.994911257291796, "grad_norm": 0.06853033454269805, "learning_rate": 8.569919025470704e-06, "loss": 0.4886, "step": 2004 }, { "epoch": 0.9954077199950354, "grad_norm": 0.06903654454945618, "learning_rate": 8.568550146140056e-06, "loss": 0.5157, "step": 2005 }, { "epoch": 0.9959041826982747, "grad_norm": 0.07258062579104714, "learning_rate": 8.567180721413736e-06, "loss": 0.5318, "step": 2006 }, { "epoch": 0.9964006454015142, "grad_norm": 0.07499795243636775, "learning_rate": 8.56581075150104e-06, "loss": 0.5757, "step": 2007 }, { "epoch": 0.9968971081047536, "grad_norm": 0.07104244022202039, "learning_rate": 8.564440236611344e-06, "loss": 0.4889, "step": 2008 }, { "epoch": 0.9973935708079931, "grad_norm": 0.07249943092428517, "learning_rate": 8.563069176954108e-06, "loss": 0.552, "step": 2009 }, { "epoch": 0.9978900335112325, "grad_norm": 0.07144120704844, "learning_rate": 8.561697572738878e-06, "loss": 0.5075, "step": 2010 }, { "epoch": 0.9983864962144718, "grad_norm": 0.0742860541852747, "learning_rate": 8.560325424175282e-06, "loss": 0.501, "step": 2011 }, { "epoch": 0.9988829589177113, "grad_norm": 0.06847104594866535, "learning_rate": 8.558952731473031e-06, "loss": 0.5224, "step": 2012 }, { "epoch": 0.9993794216209507, "grad_norm": 0.07003316710867157, "learning_rate": 8.557579494841918e-06, "loss": 0.5415, "step": 2013 }, { "epoch": 0.9998758843241902, "grad_norm": 0.0748671769203952, "learning_rate": 8.55620571449182e-06, "loss": 0.6028, "step": 2014 }, { "epoch": 1.0, "grad_norm": 0.0748671769203952, "learning_rate": 8.5548313906327e-06, "loss": 0.1345, "step": 2015 }, { "epoch": 1.0003723470274295, "grad_norm": 0.07559504504545322, "learning_rate": 8.553456523474596e-06, "loss": 0.4075, "step": 2016 }, { "epoch": 1.0003723470274295, "eval_loss": 0.5260358452796936, "eval_runtime": 258.7864, "eval_samples_per_second": 117.29, "eval_steps_per_second": 14.665, "step": 2016 }, { "epoch": 1.0004964627032393, "grad_norm": 0.09547152201342973, "learning_rate": 8.55208111322764e-06, "loss": 0.4972, "step": 2017 }, { "epoch": 1.0009929254064789, "grad_norm": 0.08024846390865537, "learning_rate": 8.550705160102037e-06, "loss": 0.4913, "step": 2018 }, { "epoch": 1.0014893881097182, "grad_norm": 0.07164772320273062, "learning_rate": 8.549328664308084e-06, "loss": 0.4719, "step": 2019 }, { "epoch": 1.0019858508129578, "grad_norm": 0.0791799829849966, "learning_rate": 8.547951626056152e-06, "loss": 0.4754, "step": 2020 }, { "epoch": 1.002482313516197, "grad_norm": 0.08827479564010493, "learning_rate": 8.546574045556702e-06, "loss": 0.5106, "step": 2021 }, { "epoch": 1.0029787762194364, "grad_norm": 0.08009835838102199, "learning_rate": 8.545195923020273e-06, "loss": 0.4828, "step": 2022 }, { "epoch": 1.003475238922676, "grad_norm": 0.07696883614047778, "learning_rate": 8.543817258657493e-06, "loss": 0.4869, "step": 2023 }, { "epoch": 1.0039717016259153, "grad_norm": 0.08329873785631868, "learning_rate": 8.542438052679063e-06, "loss": 0.5088, "step": 2024 }, { "epoch": 1.0044681643291549, "grad_norm": 0.08107257398321653, "learning_rate": 8.541058305295777e-06, "loss": 0.5295, "step": 2025 }, { "epoch": 1.0049646270323942, "grad_norm": 0.07242648419214391, "learning_rate": 8.539678016718505e-06, "loss": 0.4733, "step": 2026 }, { "epoch": 1.0054610897356335, "grad_norm": 0.07294681808712751, "learning_rate": 8.538297187158202e-06, "loss": 0.4937, "step": 2027 }, { "epoch": 1.005957552438873, "grad_norm": 0.07774560057953825, "learning_rate": 8.536915816825906e-06, "loss": 0.4661, "step": 2028 }, { "epoch": 1.0064540151421124, "grad_norm": 0.07321460359687063, "learning_rate": 8.535533905932739e-06, "loss": 0.4938, "step": 2029 }, { "epoch": 1.006950477845352, "grad_norm": 0.07353088039144523, "learning_rate": 8.534151454689901e-06, "loss": 0.4904, "step": 2030 }, { "epoch": 1.0074469405485913, "grad_norm": 0.07221853677357111, "learning_rate": 8.532768463308679e-06, "loss": 0.4859, "step": 2031 }, { "epoch": 1.0079434032518306, "grad_norm": 0.07626837115175593, "learning_rate": 8.531384932000442e-06, "loss": 0.5063, "step": 2032 }, { "epoch": 1.0084398659550702, "grad_norm": 0.07611380745129669, "learning_rate": 8.530000860976639e-06, "loss": 0.5051, "step": 2033 }, { "epoch": 1.0089363286583095, "grad_norm": 0.07174543909991783, "learning_rate": 8.528616250448805e-06, "loss": 0.4955, "step": 2034 }, { "epoch": 1.009432791361549, "grad_norm": 0.07442453470054473, "learning_rate": 8.527231100628553e-06, "loss": 0.4755, "step": 2035 }, { "epoch": 1.0099292540647884, "grad_norm": 0.0724289281191574, "learning_rate": 8.525845411727581e-06, "loss": 0.4908, "step": 2036 }, { "epoch": 1.0104257167680277, "grad_norm": 0.07872472635051855, "learning_rate": 8.524459183957673e-06, "loss": 0.5112, "step": 2037 }, { "epoch": 1.0109221794712673, "grad_norm": 0.06971999213309993, "learning_rate": 8.523072417530686e-06, "loss": 0.4562, "step": 2038 }, { "epoch": 1.0114186421745066, "grad_norm": 0.07312421396196309, "learning_rate": 8.52168511265857e-06, "loss": 0.5046, "step": 2039 }, { "epoch": 1.0119151048777462, "grad_norm": 0.07995669672272424, "learning_rate": 8.52029726955335e-06, "loss": 0.5058, "step": 2040 }, { "epoch": 1.0124115675809855, "grad_norm": 0.0752474945014717, "learning_rate": 8.518908888427137e-06, "loss": 0.4896, "step": 2041 }, { "epoch": 1.0129080302842248, "grad_norm": 0.07367661958562566, "learning_rate": 8.51751996949212e-06, "loss": 0.5159, "step": 2042 }, { "epoch": 1.0134044929874644, "grad_norm": 0.06979249530045643, "learning_rate": 8.516130512960576e-06, "loss": 0.4974, "step": 2043 }, { "epoch": 1.0139009556907037, "grad_norm": 0.07119884891727783, "learning_rate": 8.51474051904486e-06, "loss": 0.4919, "step": 2044 }, { "epoch": 1.0143974183939433, "grad_norm": 0.07609361710536897, "learning_rate": 8.513349987957411e-06, "loss": 0.4723, "step": 2045 }, { "epoch": 1.0148938810971826, "grad_norm": 0.06703588203654677, "learning_rate": 8.511958919910748e-06, "loss": 0.4399, "step": 2046 }, { "epoch": 1.015390343800422, "grad_norm": 0.07309725028188202, "learning_rate": 8.510567315117472e-06, "loss": 0.4804, "step": 2047 }, { "epoch": 1.0158868065036615, "grad_norm": 0.068314468194369, "learning_rate": 8.509175173790271e-06, "loss": 0.4567, "step": 2048 }, { "epoch": 1.0163832692069008, "grad_norm": 0.07906321230711548, "learning_rate": 8.507782496141911e-06, "loss": 0.5249, "step": 2049 }, { "epoch": 1.0168797319101404, "grad_norm": 0.07035411917162568, "learning_rate": 8.506389282385242e-06, "loss": 0.4569, "step": 2050 }, { "epoch": 1.0173761946133797, "grad_norm": 0.07087902433432516, "learning_rate": 8.504995532733187e-06, "loss": 0.4752, "step": 2051 }, { "epoch": 1.017872657316619, "grad_norm": 0.09428637704107151, "learning_rate": 8.503601247398765e-06, "loss": 0.5069, "step": 2052 }, { "epoch": 1.0183691200198586, "grad_norm": 0.07313205171308455, "learning_rate": 8.502206426595069e-06, "loss": 0.48, "step": 2053 }, { "epoch": 1.018865582723098, "grad_norm": 0.07444256500494913, "learning_rate": 8.500811070535271e-06, "loss": 0.505, "step": 2054 }, { "epoch": 1.0193620454263372, "grad_norm": 0.07187260748173696, "learning_rate": 8.499415179432635e-06, "loss": 0.4878, "step": 2055 }, { "epoch": 1.0198585081295768, "grad_norm": 0.07521015775447627, "learning_rate": 8.4980187535005e-06, "loss": 0.5347, "step": 2056 }, { "epoch": 1.0203549708328161, "grad_norm": 0.07519459961664571, "learning_rate": 8.49662179295228e-06, "loss": 0.4939, "step": 2057 }, { "epoch": 1.0208514335360557, "grad_norm": 0.07478071090183817, "learning_rate": 8.495224298001487e-06, "loss": 0.4952, "step": 2058 }, { "epoch": 1.021347896239295, "grad_norm": 0.0730378035645826, "learning_rate": 8.4938262688617e-06, "loss": 0.4901, "step": 2059 }, { "epoch": 1.0218443589425343, "grad_norm": 0.07650825318167469, "learning_rate": 8.492427705746587e-06, "loss": 0.4727, "step": 2060 }, { "epoch": 1.022340821645774, "grad_norm": 0.07265259240781462, "learning_rate": 8.491028608869895e-06, "loss": 0.4697, "step": 2061 }, { "epoch": 1.0228372843490132, "grad_norm": 0.07192624745986412, "learning_rate": 8.489628978445456e-06, "loss": 0.475, "step": 2062 }, { "epoch": 1.0233337470522528, "grad_norm": 0.07911963050617478, "learning_rate": 8.488228814687178e-06, "loss": 0.5122, "step": 2063 }, { "epoch": 1.023830209755492, "grad_norm": 0.0741852579227699, "learning_rate": 8.486828117809057e-06, "loss": 0.5003, "step": 2064 }, { "epoch": 1.0243266724587314, "grad_norm": 0.07311097676007818, "learning_rate": 8.485426888025166e-06, "loss": 0.5349, "step": 2065 }, { "epoch": 1.024823135161971, "grad_norm": 0.07532799313479474, "learning_rate": 8.484025125549658e-06, "loss": 0.4889, "step": 2066 }, { "epoch": 1.0253195978652103, "grad_norm": 0.08160693762566441, "learning_rate": 8.482622830596772e-06, "loss": 0.5053, "step": 2067 }, { "epoch": 1.0258160605684499, "grad_norm": 0.07068127338378674, "learning_rate": 8.481220003380826e-06, "loss": 0.5036, "step": 2068 }, { "epoch": 1.0263125232716892, "grad_norm": 0.07424244914892054, "learning_rate": 8.479816644116218e-06, "loss": 0.4704, "step": 2069 }, { "epoch": 1.0268089859749285, "grad_norm": 0.07315128718542926, "learning_rate": 8.478412753017433e-06, "loss": 0.4754, "step": 2070 }, { "epoch": 1.027305448678168, "grad_norm": 0.07772853008795041, "learning_rate": 8.47700833029903e-06, "loss": 0.5077, "step": 2071 }, { "epoch": 1.0278019113814074, "grad_norm": 0.07141108432687718, "learning_rate": 8.475603376175654e-06, "loss": 0.4918, "step": 2072 }, { "epoch": 1.028298374084647, "grad_norm": 0.08059188331356318, "learning_rate": 8.474197890862028e-06, "loss": 0.5103, "step": 2073 }, { "epoch": 1.0287948367878863, "grad_norm": 0.07290453191714018, "learning_rate": 8.472791874572958e-06, "loss": 0.4811, "step": 2074 }, { "epoch": 1.0292912994911256, "grad_norm": 0.07126996934228423, "learning_rate": 8.471385327523333e-06, "loss": 0.4988, "step": 2075 }, { "epoch": 1.0297877621943652, "grad_norm": 0.07682736893829598, "learning_rate": 8.469978249928122e-06, "loss": 0.4967, "step": 2076 }, { "epoch": 1.0302842248976045, "grad_norm": 0.07159491409294527, "learning_rate": 8.46857064200237e-06, "loss": 0.5015, "step": 2077 }, { "epoch": 1.030780687600844, "grad_norm": 0.07150838517569402, "learning_rate": 8.467162503961209e-06, "loss": 0.5317, "step": 2078 }, { "epoch": 1.0312771503040834, "grad_norm": 0.07504062599965208, "learning_rate": 8.465753836019853e-06, "loss": 0.4991, "step": 2079 }, { "epoch": 1.0317736130073227, "grad_norm": 0.06785624085900396, "learning_rate": 8.46434463839359e-06, "loss": 0.4836, "step": 2080 }, { "epoch": 1.0322700757105623, "grad_norm": 0.07273111308848569, "learning_rate": 8.462934911297797e-06, "loss": 0.5016, "step": 2081 }, { "epoch": 1.0327665384138016, "grad_norm": 0.07520341003875997, "learning_rate": 8.461524654947927e-06, "loss": 0.4808, "step": 2082 }, { "epoch": 1.0332630011170412, "grad_norm": 0.07682200973431808, "learning_rate": 8.460113869559517e-06, "loss": 0.4908, "step": 2083 }, { "epoch": 1.0337594638202805, "grad_norm": 0.07078292265875742, "learning_rate": 8.458702555348176e-06, "loss": 0.4962, "step": 2084 }, { "epoch": 1.0342559265235198, "grad_norm": 0.0772565123724033, "learning_rate": 8.45729071252961e-06, "loss": 0.5454, "step": 2085 }, { "epoch": 1.0347523892267594, "grad_norm": 0.076563402523467, "learning_rate": 8.45587834131959e-06, "loss": 0.517, "step": 2086 }, { "epoch": 1.0352488519299987, "grad_norm": 0.0725286596603905, "learning_rate": 8.454465441933976e-06, "loss": 0.4779, "step": 2087 }, { "epoch": 1.0357453146332383, "grad_norm": 0.07453353990137378, "learning_rate": 8.453052014588707e-06, "loss": 0.477, "step": 2088 }, { "epoch": 1.0362417773364776, "grad_norm": 0.07335625469651262, "learning_rate": 8.451638059499803e-06, "loss": 0.5118, "step": 2089 }, { "epoch": 1.036738240039717, "grad_norm": 0.07587133404150129, "learning_rate": 8.450223576883365e-06, "loss": 0.5302, "step": 2090 }, { "epoch": 1.0372347027429565, "grad_norm": 0.07153404212574806, "learning_rate": 8.448808566955575e-06, "loss": 0.4998, "step": 2091 }, { "epoch": 1.0377311654461958, "grad_norm": 0.07745704974069888, "learning_rate": 8.447393029932692e-06, "loss": 0.5115, "step": 2092 }, { "epoch": 1.0382276281494354, "grad_norm": 0.07431588797598375, "learning_rate": 8.445976966031057e-06, "loss": 0.4692, "step": 2093 }, { "epoch": 1.0387240908526747, "grad_norm": 0.07436571219611558, "learning_rate": 8.444560375467098e-06, "loss": 0.5276, "step": 2094 }, { "epoch": 1.039220553555914, "grad_norm": 0.07294391240711395, "learning_rate": 8.443143258457311e-06, "loss": 0.4775, "step": 2095 }, { "epoch": 1.0397170162591536, "grad_norm": 0.07387829012012691, "learning_rate": 8.441725615218287e-06, "loss": 0.4563, "step": 2096 }, { "epoch": 1.040213478962393, "grad_norm": 0.07242229489258531, "learning_rate": 8.440307445966684e-06, "loss": 0.4983, "step": 2097 }, { "epoch": 1.0407099416656325, "grad_norm": 0.07064880963925171, "learning_rate": 8.438888750919252e-06, "loss": 0.5024, "step": 2098 }, { "epoch": 1.0412064043688718, "grad_norm": 0.07072758643070051, "learning_rate": 8.43746953029281e-06, "loss": 0.4797, "step": 2099 }, { "epoch": 1.0417028670721111, "grad_norm": 0.07118181371356039, "learning_rate": 8.436049784304268e-06, "loss": 0.4881, "step": 2100 }, { "epoch": 1.0421993297753507, "grad_norm": 0.07017625392750985, "learning_rate": 8.43462951317061e-06, "loss": 0.4812, "step": 2101 }, { "epoch": 1.04269579247859, "grad_norm": 0.07377904358026054, "learning_rate": 8.433208717108899e-06, "loss": 0.5008, "step": 2102 }, { "epoch": 1.0431922551818296, "grad_norm": 0.07445763458420322, "learning_rate": 8.431787396336283e-06, "loss": 0.4829, "step": 2103 }, { "epoch": 1.043688717885069, "grad_norm": 0.07470552441745613, "learning_rate": 8.430365551069989e-06, "loss": 0.5315, "step": 2104 }, { "epoch": 1.0441851805883082, "grad_norm": 0.07295084291229291, "learning_rate": 8.42894318152732e-06, "loss": 0.5156, "step": 2105 }, { "epoch": 1.0446816432915478, "grad_norm": 0.07646343472351082, "learning_rate": 8.427520287925669e-06, "loss": 0.5085, "step": 2106 }, { "epoch": 1.045178105994787, "grad_norm": 0.07191136555573914, "learning_rate": 8.426096870482495e-06, "loss": 0.4682, "step": 2107 }, { "epoch": 1.0456745686980267, "grad_norm": 0.07060992139300228, "learning_rate": 8.424672929415347e-06, "loss": 0.4731, "step": 2108 }, { "epoch": 1.046171031401266, "grad_norm": 0.07325524222008324, "learning_rate": 8.423248464941854e-06, "loss": 0.5259, "step": 2109 }, { "epoch": 1.0466674941045053, "grad_norm": 0.07085481033742728, "learning_rate": 8.421823477279719e-06, "loss": 0.4817, "step": 2110 }, { "epoch": 1.0471639568077449, "grad_norm": 0.07275605555140592, "learning_rate": 8.420397966646732e-06, "loss": 0.5319, "step": 2111 }, { "epoch": 1.0476604195109842, "grad_norm": 0.07397160654166765, "learning_rate": 8.418971933260755e-06, "loss": 0.4864, "step": 2112 }, { "epoch": 1.0481568822142238, "grad_norm": 0.07219789497661702, "learning_rate": 8.417545377339739e-06, "loss": 0.5307, "step": 2113 }, { "epoch": 1.048653344917463, "grad_norm": 0.06817862255465229, "learning_rate": 8.41611829910171e-06, "loss": 0.5066, "step": 2114 }, { "epoch": 1.0491498076207024, "grad_norm": 0.07474161367633145, "learning_rate": 8.41469069876477e-06, "loss": 0.5049, "step": 2115 }, { "epoch": 1.049646270323942, "grad_norm": 0.07441496396684265, "learning_rate": 8.413262576547108e-06, "loss": 0.4809, "step": 2116 }, { "epoch": 1.0501427330271813, "grad_norm": 0.07004502242911621, "learning_rate": 8.411833932666989e-06, "loss": 0.4678, "step": 2117 }, { "epoch": 1.0506391957304206, "grad_norm": 0.0737960103890059, "learning_rate": 8.410404767342757e-06, "loss": 0.496, "step": 2118 }, { "epoch": 1.0511356584336602, "grad_norm": 0.07332685142857046, "learning_rate": 8.408975080792839e-06, "loss": 0.462, "step": 2119 }, { "epoch": 1.0516321211368995, "grad_norm": 0.07372161091283125, "learning_rate": 8.407544873235736e-06, "loss": 0.5025, "step": 2120 }, { "epoch": 1.052128583840139, "grad_norm": 0.07211336962240478, "learning_rate": 8.406114144890038e-06, "loss": 0.4897, "step": 2121 }, { "epoch": 1.0526250465433784, "grad_norm": 0.07149511493399731, "learning_rate": 8.404682895974404e-06, "loss": 0.4974, "step": 2122 }, { "epoch": 1.0531215092466177, "grad_norm": 0.07374106989538137, "learning_rate": 8.403251126707581e-06, "loss": 0.5235, "step": 2123 }, { "epoch": 1.0536179719498573, "grad_norm": 0.07446852193118496, "learning_rate": 8.401818837308388e-06, "loss": 0.4998, "step": 2124 }, { "epoch": 1.0541144346530966, "grad_norm": 0.07002579988117166, "learning_rate": 8.400386027995732e-06, "loss": 0.4651, "step": 2125 }, { "epoch": 1.0546108973563362, "grad_norm": 0.07254323023643783, "learning_rate": 8.398952698988592e-06, "loss": 0.4839, "step": 2126 }, { "epoch": 1.0551073600595755, "grad_norm": 0.07723775990560815, "learning_rate": 8.39751885050603e-06, "loss": 0.4951, "step": 2127 }, { "epoch": 1.0556038227628148, "grad_norm": 0.07673925620439852, "learning_rate": 8.396084482767186e-06, "loss": 0.5034, "step": 2128 }, { "epoch": 1.0561002854660544, "grad_norm": 0.07694300866783188, "learning_rate": 8.39464959599128e-06, "loss": 0.4752, "step": 2129 }, { "epoch": 1.0565967481692937, "grad_norm": 0.07237850482434734, "learning_rate": 8.393214190397615e-06, "loss": 0.5097, "step": 2130 }, { "epoch": 1.0570932108725333, "grad_norm": 0.07628338215290212, "learning_rate": 8.391778266205565e-06, "loss": 0.5026, "step": 2131 }, { "epoch": 1.0575896735757726, "grad_norm": 0.07604150356394877, "learning_rate": 8.390341823634591e-06, "loss": 0.5094, "step": 2132 }, { "epoch": 1.058086136279012, "grad_norm": 0.07318041529403152, "learning_rate": 8.38890486290423e-06, "loss": 0.513, "step": 2133 }, { "epoch": 1.0585825989822515, "grad_norm": 0.07150538307929709, "learning_rate": 8.387467384234096e-06, "loss": 0.495, "step": 2134 }, { "epoch": 1.0590790616854908, "grad_norm": 0.0738979649614615, "learning_rate": 8.386029387843888e-06, "loss": 0.4943, "step": 2135 }, { "epoch": 1.0595755243887304, "grad_norm": 0.07223352302598267, "learning_rate": 8.384590873953376e-06, "loss": 0.5002, "step": 2136 }, { "epoch": 1.0600719870919697, "grad_norm": 0.07703756058610264, "learning_rate": 8.38315184278242e-06, "loss": 0.4888, "step": 2137 }, { "epoch": 1.060568449795209, "grad_norm": 0.06992834400071618, "learning_rate": 8.381712294550948e-06, "loss": 0.4591, "step": 2138 }, { "epoch": 1.0610649124984486, "grad_norm": 0.07346783438489786, "learning_rate": 8.380272229478974e-06, "loss": 0.4877, "step": 2139 }, { "epoch": 1.061561375201688, "grad_norm": 0.07244028096269002, "learning_rate": 8.378831647786586e-06, "loss": 0.4724, "step": 2140 }, { "epoch": 1.0620578379049275, "grad_norm": 0.07201885190546764, "learning_rate": 8.377390549693959e-06, "loss": 0.493, "step": 2141 }, { "epoch": 1.0625543006081668, "grad_norm": 0.07556559260892799, "learning_rate": 8.375948935421337e-06, "loss": 0.491, "step": 2142 }, { "epoch": 1.0630507633114061, "grad_norm": 0.07291427292666455, "learning_rate": 8.374506805189051e-06, "loss": 0.5273, "step": 2143 }, { "epoch": 1.0635472260146457, "grad_norm": 0.07549265071798042, "learning_rate": 8.373064159217506e-06, "loss": 0.5096, "step": 2144 }, { "epoch": 1.064043688717885, "grad_norm": 0.07475791606852278, "learning_rate": 8.371620997727184e-06, "loss": 0.4717, "step": 2145 }, { "epoch": 1.0645401514211246, "grad_norm": 0.07413928468511259, "learning_rate": 8.370177320938656e-06, "loss": 0.5129, "step": 2146 }, { "epoch": 1.065036614124364, "grad_norm": 0.07800399161763176, "learning_rate": 8.36873312907256e-06, "loss": 0.5194, "step": 2147 }, { "epoch": 1.0655330768276032, "grad_norm": 0.07524291986138339, "learning_rate": 8.367288422349617e-06, "loss": 0.5008, "step": 2148 }, { "epoch": 1.0660295395308428, "grad_norm": 0.07511255026748975, "learning_rate": 8.365843200990632e-06, "loss": 0.4985, "step": 2149 }, { "epoch": 1.0665260022340821, "grad_norm": 0.07054144916309989, "learning_rate": 8.364397465216479e-06, "loss": 0.4743, "step": 2150 }, { "epoch": 1.0670224649373217, "grad_norm": 0.07193153540851546, "learning_rate": 8.362951215248118e-06, "loss": 0.5347, "step": 2151 }, { "epoch": 1.067518927640561, "grad_norm": 0.07176991910719552, "learning_rate": 8.361504451306585e-06, "loss": 0.5031, "step": 2152 }, { "epoch": 1.0680153903438003, "grad_norm": 0.07170040930551358, "learning_rate": 8.360057173612993e-06, "loss": 0.492, "step": 2153 }, { "epoch": 1.0685118530470399, "grad_norm": 0.07157465397253797, "learning_rate": 8.358609382388538e-06, "loss": 0.5148, "step": 2154 }, { "epoch": 1.0690083157502792, "grad_norm": 0.07006770452583463, "learning_rate": 8.35716107785449e-06, "loss": 0.4604, "step": 2155 }, { "epoch": 1.0695047784535188, "grad_norm": 0.07385970105527723, "learning_rate": 8.355712260232197e-06, "loss": 0.5139, "step": 2156 }, { "epoch": 1.070001241156758, "grad_norm": 0.07298273056849365, "learning_rate": 8.35426292974309e-06, "loss": 0.4724, "step": 2157 }, { "epoch": 1.0704977038599974, "grad_norm": 0.07226179617360365, "learning_rate": 8.352813086608678e-06, "loss": 0.4996, "step": 2158 }, { "epoch": 1.070994166563237, "grad_norm": 0.07083077084368294, "learning_rate": 8.351362731050542e-06, "loss": 0.4659, "step": 2159 }, { "epoch": 1.0714906292664763, "grad_norm": 0.07357266169838603, "learning_rate": 8.34991186329035e-06, "loss": 0.4889, "step": 2160 }, { "epoch": 1.0719870919697159, "grad_norm": 0.07734031191337602, "learning_rate": 8.348460483549841e-06, "loss": 0.5021, "step": 2161 }, { "epoch": 1.0724835546729552, "grad_norm": 0.07378794415957397, "learning_rate": 8.347008592050834e-06, "loss": 0.5122, "step": 2162 }, { "epoch": 1.0729800173761945, "grad_norm": 0.07201934091861806, "learning_rate": 8.345556189015231e-06, "loss": 0.4443, "step": 2163 }, { "epoch": 1.073476480079434, "grad_norm": 0.07040976076289147, "learning_rate": 8.344103274665002e-06, "loss": 0.4995, "step": 2164 }, { "epoch": 1.0739729427826734, "grad_norm": 0.07088121726877565, "learning_rate": 8.34264984922221e-06, "loss": 0.4891, "step": 2165 }, { "epoch": 1.074469405485913, "grad_norm": 0.07686251463455322, "learning_rate": 8.341195912908984e-06, "loss": 0.5565, "step": 2166 }, { "epoch": 1.0749658681891523, "grad_norm": 0.07252309150564194, "learning_rate": 8.339741465947533e-06, "loss": 0.4759, "step": 2167 }, { "epoch": 1.0754623308923916, "grad_norm": 0.07087699232546477, "learning_rate": 8.338286508560148e-06, "loss": 0.4977, "step": 2168 }, { "epoch": 1.0759587935956312, "grad_norm": 0.07432595022478114, "learning_rate": 8.336831040969196e-06, "loss": 0.5416, "step": 2169 }, { "epoch": 1.0764552562988705, "grad_norm": 0.0734004314269313, "learning_rate": 8.335375063397123e-06, "loss": 0.4883, "step": 2170 }, { "epoch": 1.07695171900211, "grad_norm": 0.07405951233615124, "learning_rate": 8.333918576066446e-06, "loss": 0.4907, "step": 2171 }, { "epoch": 1.0774481817053494, "grad_norm": 0.07060359600736023, "learning_rate": 8.332461579199773e-06, "loss": 0.4841, "step": 2172 }, { "epoch": 1.0779446444085887, "grad_norm": 0.06899558938544381, "learning_rate": 8.33100407301978e-06, "loss": 0.4777, "step": 2173 }, { "epoch": 1.0784411071118283, "grad_norm": 0.07481858213782207, "learning_rate": 8.32954605774922e-06, "loss": 0.4673, "step": 2174 }, { "epoch": 1.0789375698150676, "grad_norm": 0.075847623970544, "learning_rate": 8.328087533610933e-06, "loss": 0.4756, "step": 2175 }, { "epoch": 1.0794340325183072, "grad_norm": 0.07120553324275082, "learning_rate": 8.326628500827826e-06, "loss": 0.4693, "step": 2176 }, { "epoch": 1.0799304952215465, "grad_norm": 0.07415639230784418, "learning_rate": 8.325168959622893e-06, "loss": 0.4867, "step": 2177 }, { "epoch": 1.0804269579247858, "grad_norm": 0.07374489312987371, "learning_rate": 8.323708910219201e-06, "loss": 0.4627, "step": 2178 }, { "epoch": 1.0809234206280254, "grad_norm": 0.07383508955193396, "learning_rate": 8.322248352839893e-06, "loss": 0.5022, "step": 2179 }, { "epoch": 1.0814198833312647, "grad_norm": 0.0708623476675988, "learning_rate": 8.32078728770819e-06, "loss": 0.4897, "step": 2180 }, { "epoch": 1.0819163460345043, "grad_norm": 0.07441658954175509, "learning_rate": 8.319325715047394e-06, "loss": 0.4839, "step": 2181 }, { "epoch": 1.0824128087377436, "grad_norm": 0.07083548626853245, "learning_rate": 8.317863635080886e-06, "loss": 0.5122, "step": 2182 }, { "epoch": 1.082909271440983, "grad_norm": 0.07421527166175029, "learning_rate": 8.316401048032121e-06, "loss": 0.4773, "step": 2183 }, { "epoch": 1.0834057341442225, "grad_norm": 0.07327130466932932, "learning_rate": 8.314937954124629e-06, "loss": 0.5122, "step": 2184 }, { "epoch": 1.0839021968474618, "grad_norm": 0.07536245786155, "learning_rate": 8.31347435358202e-06, "loss": 0.499, "step": 2185 }, { "epoch": 1.0843986595507014, "grad_norm": 0.07593717142738794, "learning_rate": 8.312010246627986e-06, "loss": 0.485, "step": 2186 }, { "epoch": 1.0848951222539407, "grad_norm": 0.07167997632106814, "learning_rate": 8.31054563348629e-06, "loss": 0.459, "step": 2187 }, { "epoch": 1.08539158495718, "grad_norm": 0.07270113397492473, "learning_rate": 8.309080514380771e-06, "loss": 0.4727, "step": 2188 }, { "epoch": 1.0858880476604196, "grad_norm": 0.07627450526912384, "learning_rate": 8.307614889535354e-06, "loss": 0.4767, "step": 2189 }, { "epoch": 1.086384510363659, "grad_norm": 0.08054828675705718, "learning_rate": 8.306148759174036e-06, "loss": 0.5516, "step": 2190 }, { "epoch": 1.0868809730668985, "grad_norm": 0.07374128808206212, "learning_rate": 8.30468212352089e-06, "loss": 0.4905, "step": 2191 }, { "epoch": 1.0873774357701378, "grad_norm": 0.07487202792279594, "learning_rate": 8.303214982800067e-06, "loss": 0.5264, "step": 2192 }, { "epoch": 1.0878738984733771, "grad_norm": 0.07381112821428028, "learning_rate": 8.301747337235798e-06, "loss": 0.4757, "step": 2193 }, { "epoch": 1.0883703611766167, "grad_norm": 0.07339639597409396, "learning_rate": 8.300279187052386e-06, "loss": 0.48, "step": 2194 }, { "epoch": 1.088866823879856, "grad_norm": 0.076185034343091, "learning_rate": 8.298810532474218e-06, "loss": 0.4781, "step": 2195 }, { "epoch": 1.0893632865830956, "grad_norm": 0.068948413488983, "learning_rate": 8.297341373725754e-06, "loss": 0.4594, "step": 2196 }, { "epoch": 1.089859749286335, "grad_norm": 0.07517076494598852, "learning_rate": 8.295871711031527e-06, "loss": 0.5134, "step": 2197 }, { "epoch": 1.0903562119895742, "grad_norm": 0.07353154295953639, "learning_rate": 8.294401544616155e-06, "loss": 0.5114, "step": 2198 }, { "epoch": 1.0908526746928138, "grad_norm": 0.07625897388957875, "learning_rate": 8.292930874704328e-06, "loss": 0.4849, "step": 2199 }, { "epoch": 1.091349137396053, "grad_norm": 0.07626683441495624, "learning_rate": 8.291459701520816e-06, "loss": 0.4931, "step": 2200 }, { "epoch": 1.0918456000992927, "grad_norm": 0.07301701910481151, "learning_rate": 8.289988025290463e-06, "loss": 0.4972, "step": 2201 }, { "epoch": 1.092342062802532, "grad_norm": 0.0725810819115238, "learning_rate": 8.288515846238193e-06, "loss": 0.4686, "step": 2202 }, { "epoch": 1.0928385255057713, "grad_norm": 0.07479074559006994, "learning_rate": 8.287043164589001e-06, "loss": 0.5003, "step": 2203 }, { "epoch": 1.0933349882090109, "grad_norm": 0.07312842594353675, "learning_rate": 8.285569980567965e-06, "loss": 0.4772, "step": 2204 }, { "epoch": 1.0938314509122502, "grad_norm": 0.07054737968378164, "learning_rate": 8.284096294400238e-06, "loss": 0.5029, "step": 2205 }, { "epoch": 1.0943279136154895, "grad_norm": 0.07641103035051312, "learning_rate": 8.282622106311049e-06, "loss": 0.5053, "step": 2206 }, { "epoch": 1.094824376318729, "grad_norm": 0.07449614033992102, "learning_rate": 8.281147416525704e-06, "loss": 0.5007, "step": 2207 }, { "epoch": 1.0953208390219684, "grad_norm": 0.0757024611392332, "learning_rate": 8.279672225269584e-06, "loss": 0.5168, "step": 2208 }, { "epoch": 1.095817301725208, "grad_norm": 0.07543978367284243, "learning_rate": 8.278196532768152e-06, "loss": 0.4986, "step": 2209 }, { "epoch": 1.0963137644284473, "grad_norm": 0.07885663632789892, "learning_rate": 8.276720339246942e-06, "loss": 0.5038, "step": 2210 }, { "epoch": 1.0968102271316866, "grad_norm": 0.07595383345135583, "learning_rate": 8.275243644931565e-06, "loss": 0.4982, "step": 2211 }, { "epoch": 1.0973066898349262, "grad_norm": 0.0739185354406121, "learning_rate": 8.273766450047713e-06, "loss": 0.493, "step": 2212 }, { "epoch": 1.0978031525381655, "grad_norm": 0.07391837862098494, "learning_rate": 8.272288754821149e-06, "loss": 0.533, "step": 2213 }, { "epoch": 1.098299615241405, "grad_norm": 0.08110713367122144, "learning_rate": 8.270810559477716e-06, "loss": 0.4997, "step": 2214 }, { "epoch": 1.0987960779446444, "grad_norm": 0.0758720474097381, "learning_rate": 8.26933186424333e-06, "loss": 0.5158, "step": 2215 }, { "epoch": 1.0992925406478837, "grad_norm": 0.0718223545522586, "learning_rate": 8.267852669343991e-06, "loss": 0.4516, "step": 2216 }, { "epoch": 1.0997890033511233, "grad_norm": 0.07859584280020389, "learning_rate": 8.266372975005768e-06, "loss": 0.5332, "step": 2217 }, { "epoch": 1.1002854660543626, "grad_norm": 0.07291038437725997, "learning_rate": 8.264892781454807e-06, "loss": 0.4982, "step": 2218 }, { "epoch": 1.1007819287576022, "grad_norm": 0.07372572346588169, "learning_rate": 8.263412088917333e-06, "loss": 0.4804, "step": 2219 }, { "epoch": 1.1012783914608415, "grad_norm": 0.06939509607738924, "learning_rate": 8.261930897619647e-06, "loss": 0.4397, "step": 2220 }, { "epoch": 1.1017748541640808, "grad_norm": 0.0728653700593787, "learning_rate": 8.260449207788124e-06, "loss": 0.4753, "step": 2221 }, { "epoch": 1.1022713168673204, "grad_norm": 0.07454856223228062, "learning_rate": 8.258967019649216e-06, "loss": 0.5085, "step": 2222 }, { "epoch": 1.1027677795705597, "grad_norm": 0.074275864871691, "learning_rate": 8.257484333429452e-06, "loss": 0.4788, "step": 2223 }, { "epoch": 1.1032642422737993, "grad_norm": 0.07351109388785956, "learning_rate": 8.256001149355439e-06, "loss": 0.5037, "step": 2224 }, { "epoch": 1.1037607049770386, "grad_norm": 0.07596144114073311, "learning_rate": 8.254517467653858e-06, "loss": 0.4851, "step": 2225 }, { "epoch": 1.104257167680278, "grad_norm": 0.07320020631956403, "learning_rate": 8.253033288551463e-06, "loss": 0.5113, "step": 2226 }, { "epoch": 1.1047536303835175, "grad_norm": 0.07343420892205225, "learning_rate": 8.251548612275086e-06, "loss": 0.5087, "step": 2227 }, { "epoch": 1.1052500930867568, "grad_norm": 0.0763838063102248, "learning_rate": 8.25006343905164e-06, "loss": 0.4955, "step": 2228 }, { "epoch": 1.1057465557899964, "grad_norm": 0.07339654182978661, "learning_rate": 8.248577769108106e-06, "loss": 0.4988, "step": 2229 }, { "epoch": 1.1062430184932357, "grad_norm": 0.07362892885262858, "learning_rate": 8.247091602671551e-06, "loss": 0.4855, "step": 2230 }, { "epoch": 1.106739481196475, "grad_norm": 0.06953026409193383, "learning_rate": 8.245604939969104e-06, "loss": 0.4896, "step": 2231 }, { "epoch": 1.1072359438997146, "grad_norm": 0.07111640843092504, "learning_rate": 8.244117781227982e-06, "loss": 0.5095, "step": 2232 }, { "epoch": 1.107732406602954, "grad_norm": 0.07481021626932494, "learning_rate": 8.242630126675475e-06, "loss": 0.5125, "step": 2233 }, { "epoch": 1.1082288693061935, "grad_norm": 0.07395283841841717, "learning_rate": 8.241141976538944e-06, "loss": 0.5416, "step": 2234 }, { "epoch": 1.1087253320094328, "grad_norm": 0.07130473116026037, "learning_rate": 8.239653331045827e-06, "loss": 0.4824, "step": 2235 }, { "epoch": 1.1092217947126721, "grad_norm": 0.06947962941312745, "learning_rate": 8.238164190423645e-06, "loss": 0.4668, "step": 2236 }, { "epoch": 1.1097182574159117, "grad_norm": 0.07191577528119829, "learning_rate": 8.236674554899985e-06, "loss": 0.4853, "step": 2237 }, { "epoch": 1.110214720119151, "grad_norm": 0.07496165833604053, "learning_rate": 8.235184424702516e-06, "loss": 0.4787, "step": 2238 }, { "epoch": 1.1107111828223906, "grad_norm": 0.07213792735155145, "learning_rate": 8.23369380005898e-06, "loss": 0.5311, "step": 2239 }, { "epoch": 1.11120764552563, "grad_norm": 0.07053117748052583, "learning_rate": 8.232202681197194e-06, "loss": 0.4654, "step": 2240 }, { "epoch": 1.1117041082288692, "grad_norm": 0.07528814724049783, "learning_rate": 8.230711068345055e-06, "loss": 0.5237, "step": 2241 }, { "epoch": 1.1122005709321088, "grad_norm": 0.06950506610855729, "learning_rate": 8.229218961730527e-06, "loss": 0.4976, "step": 2242 }, { "epoch": 1.112697033635348, "grad_norm": 0.07195918947291367, "learning_rate": 8.227726361581659e-06, "loss": 0.4846, "step": 2243 }, { "epoch": 1.1131934963385877, "grad_norm": 0.06959712282406523, "learning_rate": 8.22623326812657e-06, "loss": 0.4849, "step": 2244 }, { "epoch": 1.113689959041827, "grad_norm": 0.07051471449226025, "learning_rate": 8.224739681593453e-06, "loss": 0.4855, "step": 2245 }, { "epoch": 1.1141864217450663, "grad_norm": 0.07379793580293327, "learning_rate": 8.22324560221058e-06, "loss": 0.5028, "step": 2246 }, { "epoch": 1.1146828844483059, "grad_norm": 0.07176973461803826, "learning_rate": 8.221751030206297e-06, "loss": 0.4903, "step": 2247 }, { "epoch": 1.1151793471515452, "grad_norm": 0.07613295929530475, "learning_rate": 8.220255965809027e-06, "loss": 0.498, "step": 2248 }, { "epoch": 1.1156758098547848, "grad_norm": 0.07481677436822458, "learning_rate": 8.218760409247267e-06, "loss": 0.4896, "step": 2249 }, { "epoch": 1.116172272558024, "grad_norm": 0.07606898831692863, "learning_rate": 8.217264360749587e-06, "loss": 0.4571, "step": 2250 }, { "epoch": 1.1166687352612634, "grad_norm": 0.07151037999784068, "learning_rate": 8.215767820544633e-06, "loss": 0.5038, "step": 2251 }, { "epoch": 1.117165197964503, "grad_norm": 0.0750769122463959, "learning_rate": 8.21427078886113e-06, "loss": 0.5074, "step": 2252 }, { "epoch": 1.1176616606677423, "grad_norm": 0.0749081827284495, "learning_rate": 8.21277326592787e-06, "loss": 0.4922, "step": 2253 }, { "epoch": 1.1181581233709816, "grad_norm": 0.07152106501318276, "learning_rate": 8.211275251973734e-06, "loss": 0.4843, "step": 2254 }, { "epoch": 1.1186545860742212, "grad_norm": 0.07098442469069642, "learning_rate": 8.209776747227663e-06, "loss": 0.4735, "step": 2255 }, { "epoch": 1.1191510487774605, "grad_norm": 0.0719612578777751, "learning_rate": 8.20827775191868e-06, "loss": 0.5094, "step": 2256 }, { "epoch": 1.1196475114807, "grad_norm": 0.07179858593800634, "learning_rate": 8.206778266275885e-06, "loss": 0.488, "step": 2257 }, { "epoch": 1.1201439741839394, "grad_norm": 0.07118495482126498, "learning_rate": 8.205278290528446e-06, "loss": 0.465, "step": 2258 }, { "epoch": 1.1206404368871787, "grad_norm": 0.07419162410687083, "learning_rate": 8.203777824905617e-06, "loss": 0.4747, "step": 2259 }, { "epoch": 1.1211368995904183, "grad_norm": 0.0710781813716537, "learning_rate": 8.202276869636713e-06, "loss": 0.494, "step": 2260 }, { "epoch": 1.1216333622936576, "grad_norm": 0.07732360536034513, "learning_rate": 8.200775424951137e-06, "loss": 0.504, "step": 2261 }, { "epoch": 1.1221298249968972, "grad_norm": 0.06777372530014544, "learning_rate": 8.199273491078355e-06, "loss": 0.4637, "step": 2262 }, { "epoch": 1.1226262877001365, "grad_norm": 0.07084811372777965, "learning_rate": 8.197771068247917e-06, "loss": 0.485, "step": 2263 }, { "epoch": 1.1231227504033758, "grad_norm": 0.07658689251813063, "learning_rate": 8.196268156689444e-06, "loss": 0.5295, "step": 2264 }, { "epoch": 1.1236192131066154, "grad_norm": 0.07540282347213605, "learning_rate": 8.194764756632632e-06, "loss": 0.4946, "step": 2265 }, { "epoch": 1.1241156758098547, "grad_norm": 0.07431439448491374, "learning_rate": 8.193260868307251e-06, "loss": 0.5025, "step": 2266 }, { "epoch": 1.1246121385130943, "grad_norm": 0.08623105926325221, "learning_rate": 8.191756491943146e-06, "loss": 0.5448, "step": 2267 }, { "epoch": 1.1251086012163336, "grad_norm": 0.07666257532724999, "learning_rate": 8.190251627770237e-06, "loss": 0.5008, "step": 2268 }, { "epoch": 1.125605063919573, "grad_norm": 0.0760010099142199, "learning_rate": 8.188746276018518e-06, "loss": 0.4786, "step": 2269 }, { "epoch": 1.1261015266228125, "grad_norm": 0.07651610079909663, "learning_rate": 8.187240436918057e-06, "loss": 0.4941, "step": 2270 }, { "epoch": 1.1265979893260518, "grad_norm": 0.07217604222108294, "learning_rate": 8.185734110699002e-06, "loss": 0.4981, "step": 2271 }, { "epoch": 1.1270944520292914, "grad_norm": 0.07313906323366962, "learning_rate": 8.184227297591568e-06, "loss": 0.4865, "step": 2272 }, { "epoch": 1.1275909147325307, "grad_norm": 0.07579452675806608, "learning_rate": 8.182719997826043e-06, "loss": 0.4815, "step": 2273 }, { "epoch": 1.12808737743577, "grad_norm": 0.07607016220528141, "learning_rate": 8.1812122116328e-06, "loss": 0.4973, "step": 2274 }, { "epoch": 1.1285838401390096, "grad_norm": 0.06925031457091706, "learning_rate": 8.179703939242276e-06, "loss": 0.47, "step": 2275 }, { "epoch": 1.129080302842249, "grad_norm": 0.07533759199309992, "learning_rate": 8.178195180884989e-06, "loss": 0.4871, "step": 2276 }, { "epoch": 1.1295767655454885, "grad_norm": 0.0749165977272755, "learning_rate": 8.176685936791526e-06, "loss": 0.4734, "step": 2277 }, { "epoch": 1.1300732282487278, "grad_norm": 0.07436005370185177, "learning_rate": 8.175176207192552e-06, "loss": 0.5079, "step": 2278 }, { "epoch": 1.1305696909519671, "grad_norm": 0.07557004980259102, "learning_rate": 8.173665992318805e-06, "loss": 0.5002, "step": 2279 }, { "epoch": 1.1310661536552067, "grad_norm": 0.07567715209828088, "learning_rate": 8.172155292401096e-06, "loss": 0.5125, "step": 2280 }, { "epoch": 1.131562616358446, "grad_norm": 0.07764282615456619, "learning_rate": 8.170644107670313e-06, "loss": 0.4966, "step": 2281 }, { "epoch": 1.1320590790616856, "grad_norm": 0.07372865116940043, "learning_rate": 8.169132438357416e-06, "loss": 0.5107, "step": 2282 }, { "epoch": 1.132555541764925, "grad_norm": 0.07638680398573897, "learning_rate": 8.16762028469344e-06, "loss": 0.4882, "step": 2283 }, { "epoch": 1.1330520044681642, "grad_norm": 0.07121039450346492, "learning_rate": 8.166107646909491e-06, "loss": 0.5046, "step": 2284 }, { "epoch": 1.1335484671714038, "grad_norm": 0.07401683052681642, "learning_rate": 8.164594525236752e-06, "loss": 0.4988, "step": 2285 }, { "epoch": 1.1340449298746431, "grad_norm": 0.07398370488195749, "learning_rate": 8.163080919906482e-06, "loss": 0.4866, "step": 2286 }, { "epoch": 1.1345413925778827, "grad_norm": 0.07422405936633944, "learning_rate": 8.161566831150007e-06, "loss": 0.4901, "step": 2287 }, { "epoch": 1.135037855281122, "grad_norm": 0.06830231848502405, "learning_rate": 8.160052259198737e-06, "loss": 0.4868, "step": 2288 }, { "epoch": 1.1355343179843613, "grad_norm": 0.07658053146961001, "learning_rate": 8.158537204284145e-06, "loss": 0.4899, "step": 2289 }, { "epoch": 1.1360307806876009, "grad_norm": 0.07266971889913688, "learning_rate": 8.157021666637783e-06, "loss": 0.4585, "step": 2290 }, { "epoch": 1.1365272433908402, "grad_norm": 0.07396138422667788, "learning_rate": 8.155505646491282e-06, "loss": 0.4892, "step": 2291 }, { "epoch": 1.1370237060940798, "grad_norm": 0.07774988658009194, "learning_rate": 8.153989144076335e-06, "loss": 0.5008, "step": 2292 }, { "epoch": 1.137520168797319, "grad_norm": 0.07188359362083234, "learning_rate": 8.152472159624718e-06, "loss": 0.5156, "step": 2293 }, { "epoch": 1.1380166315005584, "grad_norm": 0.07587558951701104, "learning_rate": 8.150954693368278e-06, "loss": 0.4923, "step": 2294 }, { "epoch": 1.138513094203798, "grad_norm": 0.07985374664068505, "learning_rate": 8.149436745538934e-06, "loss": 0.5302, "step": 2295 }, { "epoch": 1.1390095569070373, "grad_norm": 0.07351485081740546, "learning_rate": 8.14791831636868e-06, "loss": 0.5146, "step": 2296 }, { "epoch": 1.1395060196102769, "grad_norm": 0.07543744388533724, "learning_rate": 8.146399406089587e-06, "loss": 0.5112, "step": 2297 }, { "epoch": 1.1400024823135162, "grad_norm": 0.07015283401717387, "learning_rate": 8.144880014933791e-06, "loss": 0.4671, "step": 2298 }, { "epoch": 1.1404989450167555, "grad_norm": 0.07424863703283004, "learning_rate": 8.143360143133512e-06, "loss": 0.5043, "step": 2299 }, { "epoch": 1.140995407719995, "grad_norm": 0.07182043679790903, "learning_rate": 8.141839790921033e-06, "loss": 0.4948, "step": 2300 }, { "epoch": 1.1414918704232344, "grad_norm": 0.07320854472829157, "learning_rate": 8.140318958528717e-06, "loss": 0.4831, "step": 2301 }, { "epoch": 1.141988333126474, "grad_norm": 0.07046526965939379, "learning_rate": 8.138797646189e-06, "loss": 0.5034, "step": 2302 }, { "epoch": 1.1424847958297133, "grad_norm": 0.0738887751278947, "learning_rate": 8.137275854134391e-06, "loss": 0.5195, "step": 2303 }, { "epoch": 1.1429812585329526, "grad_norm": 0.0734698833161908, "learning_rate": 8.135753582597468e-06, "loss": 0.5302, "step": 2304 }, { "epoch": 1.1434777212361922, "grad_norm": 0.07969162891614834, "learning_rate": 8.13423083181089e-06, "loss": 0.5185, "step": 2305 }, { "epoch": 1.1439741839394315, "grad_norm": 0.07469688708542015, "learning_rate": 8.132707602007381e-06, "loss": 0.5084, "step": 2306 }, { "epoch": 1.144470646642671, "grad_norm": 0.07579153342194478, "learning_rate": 8.131183893419746e-06, "loss": 0.505, "step": 2307 }, { "epoch": 1.1449671093459104, "grad_norm": 0.06906370518919978, "learning_rate": 8.129659706280856e-06, "loss": 0.4814, "step": 2308 }, { "epoch": 1.1454635720491497, "grad_norm": 0.07068549367332468, "learning_rate": 8.128135040823661e-06, "loss": 0.4601, "step": 2309 }, { "epoch": 1.1459600347523893, "grad_norm": 0.07520549253264013, "learning_rate": 8.12660989728118e-06, "loss": 0.5018, "step": 2310 }, { "epoch": 1.1464564974556286, "grad_norm": 0.07694693218495899, "learning_rate": 8.125084275886507e-06, "loss": 0.5548, "step": 2311 }, { "epoch": 1.1469529601588682, "grad_norm": 0.08540895386379119, "learning_rate": 8.123558176872812e-06, "loss": 0.4933, "step": 2312 }, { "epoch": 1.1474494228621075, "grad_norm": 0.07170201494666945, "learning_rate": 8.12203160047333e-06, "loss": 0.4806, "step": 2313 }, { "epoch": 1.1479458855653468, "grad_norm": 0.07479344185796176, "learning_rate": 8.120504546921377e-06, "loss": 0.495, "step": 2314 }, { "epoch": 1.1484423482685864, "grad_norm": 0.07586036128037843, "learning_rate": 8.118977016450337e-06, "loss": 0.4885, "step": 2315 }, { "epoch": 1.1489388109718257, "grad_norm": 0.07298836535827842, "learning_rate": 8.117449009293668e-06, "loss": 0.4822, "step": 2316 }, { "epoch": 1.1494352736750653, "grad_norm": 0.07424610160942582, "learning_rate": 8.115920525684904e-06, "loss": 0.4881, "step": 2317 }, { "epoch": 1.1499317363783046, "grad_norm": 0.07711573726644844, "learning_rate": 8.114391565857647e-06, "loss": 0.4964, "step": 2318 }, { "epoch": 1.150428199081544, "grad_norm": 0.07179898256858769, "learning_rate": 8.112862130045574e-06, "loss": 0.496, "step": 2319 }, { "epoch": 1.1509246617847835, "grad_norm": 0.06936652859448023, "learning_rate": 8.111332218482436e-06, "loss": 0.4672, "step": 2320 }, { "epoch": 1.1514211244880228, "grad_norm": 0.07455917920637714, "learning_rate": 8.109801831402056e-06, "loss": 0.4856, "step": 2321 }, { "epoch": 1.1519175871912624, "grad_norm": 0.07669239126681408, "learning_rate": 8.108270969038326e-06, "loss": 0.467, "step": 2322 }, { "epoch": 1.1524140498945017, "grad_norm": 0.0754927369489174, "learning_rate": 8.106739631625216e-06, "loss": 0.4973, "step": 2323 }, { "epoch": 1.152910512597741, "grad_norm": 0.07373289963214065, "learning_rate": 8.105207819396767e-06, "loss": 0.4617, "step": 2324 }, { "epoch": 1.1534069753009806, "grad_norm": 0.07214062657617648, "learning_rate": 8.10367553258709e-06, "loss": 0.517, "step": 2325 }, { "epoch": 1.15390343800422, "grad_norm": 0.07328961733541389, "learning_rate": 8.102142771430373e-06, "loss": 0.5141, "step": 2326 }, { "epoch": 1.1543999007074595, "grad_norm": 0.07648118911976606, "learning_rate": 8.100609536160871e-06, "loss": 0.5044, "step": 2327 }, { "epoch": 1.1548963634106988, "grad_norm": 0.07367333108806713, "learning_rate": 8.099075827012917e-06, "loss": 0.4904, "step": 2328 }, { "epoch": 1.1553928261139381, "grad_norm": 0.07368647392363427, "learning_rate": 8.097541644220912e-06, "loss": 0.4628, "step": 2329 }, { "epoch": 1.1558892888171777, "grad_norm": 0.07439036359702104, "learning_rate": 8.096006988019331e-06, "loss": 0.497, "step": 2330 }, { "epoch": 1.156385751520417, "grad_norm": 0.0698387579277891, "learning_rate": 8.094471858642726e-06, "loss": 0.4735, "step": 2331 }, { "epoch": 1.1568822142236566, "grad_norm": 0.07457852845134195, "learning_rate": 8.092936256325709e-06, "loss": 0.466, "step": 2332 }, { "epoch": 1.1573786769268959, "grad_norm": 0.07697323290341726, "learning_rate": 8.09140018130298e-06, "loss": 0.5094, "step": 2333 }, { "epoch": 1.1578751396301352, "grad_norm": 0.06998446316294323, "learning_rate": 8.089863633809298e-06, "loss": 0.4626, "step": 2334 }, { "epoch": 1.1583716023333748, "grad_norm": 0.07158518666753631, "learning_rate": 8.088326614079503e-06, "loss": 0.4773, "step": 2335 }, { "epoch": 1.158868065036614, "grad_norm": 0.07123059869176439, "learning_rate": 8.086789122348504e-06, "loss": 0.4914, "step": 2336 }, { "epoch": 1.1593645277398537, "grad_norm": 0.07132547049758975, "learning_rate": 8.085251158851278e-06, "loss": 0.4692, "step": 2337 }, { "epoch": 1.159860990443093, "grad_norm": 0.07362286127579233, "learning_rate": 8.08371272382288e-06, "loss": 0.5461, "step": 2338 }, { "epoch": 1.1603574531463323, "grad_norm": 0.07510848961258125, "learning_rate": 8.08217381749844e-06, "loss": 0.495, "step": 2339 }, { "epoch": 1.1608539158495719, "grad_norm": 0.06994507718975675, "learning_rate": 8.080634440113147e-06, "loss": 0.4877, "step": 2340 }, { "epoch": 1.1613503785528112, "grad_norm": 0.07484978381716109, "learning_rate": 8.079094591902275e-06, "loss": 0.4768, "step": 2341 }, { "epoch": 1.1618468412560508, "grad_norm": 0.07331691403913798, "learning_rate": 8.077554273101165e-06, "loss": 0.4996, "step": 2342 }, { "epoch": 1.16234330395929, "grad_norm": 0.07655827308992964, "learning_rate": 8.076013483945228e-06, "loss": 0.5221, "step": 2343 }, { "epoch": 1.1628397666625294, "grad_norm": 0.07121203009120934, "learning_rate": 8.074472224669952e-06, "loss": 0.4673, "step": 2344 }, { "epoch": 1.163336229365769, "grad_norm": 0.07461640470824088, "learning_rate": 8.072930495510888e-06, "loss": 0.4544, "step": 2345 }, { "epoch": 1.1638326920690083, "grad_norm": 0.07204944499795159, "learning_rate": 8.071388296703672e-06, "loss": 0.4868, "step": 2346 }, { "epoch": 1.1643291547722479, "grad_norm": 0.0716988652101321, "learning_rate": 8.069845628484002e-06, "loss": 0.4686, "step": 2347 }, { "epoch": 1.1648256174754872, "grad_norm": 0.077316414868866, "learning_rate": 8.068302491087645e-06, "loss": 0.4858, "step": 2348 }, { "epoch": 1.1653220801787265, "grad_norm": 0.07517214266739652, "learning_rate": 8.06675888475045e-06, "loss": 0.5266, "step": 2349 }, { "epoch": 1.165818542881966, "grad_norm": 0.07561697197455328, "learning_rate": 8.065214809708332e-06, "loss": 0.4932, "step": 2350 }, { "epoch": 1.1663150055852054, "grad_norm": 0.07393071703889437, "learning_rate": 8.063670266197278e-06, "loss": 0.4945, "step": 2351 }, { "epoch": 1.166811468288445, "grad_norm": 0.07018592225608884, "learning_rate": 8.062125254453343e-06, "loss": 0.4697, "step": 2352 }, { "epoch": 1.1673079309916843, "grad_norm": 0.0704428825737941, "learning_rate": 8.060579774712664e-06, "loss": 0.4706, "step": 2353 }, { "epoch": 1.1678043936949236, "grad_norm": 0.07140627571096919, "learning_rate": 8.059033827211438e-06, "loss": 0.496, "step": 2354 }, { "epoch": 1.1683008563981632, "grad_norm": 0.0734276480348377, "learning_rate": 8.057487412185937e-06, "loss": 0.5266, "step": 2355 }, { "epoch": 1.1687973191014025, "grad_norm": 0.0770116552694326, "learning_rate": 8.055940529872512e-06, "loss": 0.4968, "step": 2356 }, { "epoch": 1.1692937818046418, "grad_norm": 0.0752006355628804, "learning_rate": 8.054393180507572e-06, "loss": 0.5169, "step": 2357 }, { "epoch": 1.1697902445078814, "grad_norm": 0.07104098237478916, "learning_rate": 8.052845364327609e-06, "loss": 0.4847, "step": 2358 }, { "epoch": 1.1702867072111207, "grad_norm": 0.0732561308111403, "learning_rate": 8.05129708156918e-06, "loss": 0.4757, "step": 2359 }, { "epoch": 1.1707831699143603, "grad_norm": 0.0726216716118377, "learning_rate": 8.049748332468917e-06, "loss": 0.4785, "step": 2360 }, { "epoch": 1.1712796326175996, "grad_norm": 0.07151328576643422, "learning_rate": 8.04819911726352e-06, "loss": 0.5001, "step": 2361 }, { "epoch": 1.171776095320839, "grad_norm": 0.0711654992225538, "learning_rate": 8.046649436189763e-06, "loss": 0.4915, "step": 2362 }, { "epoch": 1.1722725580240785, "grad_norm": 0.07352594087862271, "learning_rate": 8.045099289484488e-06, "loss": 0.4671, "step": 2363 }, { "epoch": 1.1727690207273178, "grad_norm": 0.07484080237772299, "learning_rate": 8.043548677384611e-06, "loss": 0.4861, "step": 2364 }, { "epoch": 1.1732654834305574, "grad_norm": 0.07550160233111058, "learning_rate": 8.041997600127118e-06, "loss": 0.5023, "step": 2365 }, { "epoch": 1.1737619461337967, "grad_norm": 0.07114299993253431, "learning_rate": 8.040446057949067e-06, "loss": 0.5116, "step": 2366 }, { "epoch": 1.174258408837036, "grad_norm": 0.07022660054047274, "learning_rate": 8.038894051087587e-06, "loss": 0.4845, "step": 2367 }, { "epoch": 1.1747548715402756, "grad_norm": 0.08090894423687531, "learning_rate": 8.037341579779875e-06, "loss": 0.5432, "step": 2368 }, { "epoch": 1.175251334243515, "grad_norm": 0.07285277362700689, "learning_rate": 8.035788644263203e-06, "loss": 0.4921, "step": 2369 }, { "epoch": 1.1757477969467545, "grad_norm": 0.07243641654934144, "learning_rate": 8.034235244774911e-06, "loss": 0.431, "step": 2370 }, { "epoch": 1.1762442596499938, "grad_norm": 0.07669360482068058, "learning_rate": 8.032681381552415e-06, "loss": 0.5152, "step": 2371 }, { "epoch": 1.1767407223532331, "grad_norm": 0.07436716811386304, "learning_rate": 8.031127054833192e-06, "loss": 0.5089, "step": 2372 }, { "epoch": 1.1772371850564727, "grad_norm": 0.0720366346955759, "learning_rate": 8.029572264854799e-06, "loss": 0.4832, "step": 2373 }, { "epoch": 1.177733647759712, "grad_norm": 0.0702137374413376, "learning_rate": 8.028017011854861e-06, "loss": 0.4817, "step": 2374 }, { "epoch": 1.1782301104629516, "grad_norm": 0.0720483512228606, "learning_rate": 8.026461296071075e-06, "loss": 0.4676, "step": 2375 }, { "epoch": 1.178726573166191, "grad_norm": 0.07397180830163641, "learning_rate": 8.024905117741204e-06, "loss": 0.5442, "step": 2376 }, { "epoch": 1.1792230358694302, "grad_norm": 0.07716130964866487, "learning_rate": 8.023348477103088e-06, "loss": 0.5113, "step": 2377 }, { "epoch": 1.1797194985726698, "grad_norm": 0.07335130875745567, "learning_rate": 8.021791374394631e-06, "loss": 0.5032, "step": 2378 }, { "epoch": 1.180215961275909, "grad_norm": 0.07146794267621975, "learning_rate": 8.020233809853815e-06, "loss": 0.4932, "step": 2379 }, { "epoch": 1.1807124239791487, "grad_norm": 0.07252371872661585, "learning_rate": 8.018675783718686e-06, "loss": 0.5086, "step": 2380 }, { "epoch": 1.181208886682388, "grad_norm": 0.07261609295457848, "learning_rate": 8.017117296227364e-06, "loss": 0.4856, "step": 2381 }, { "epoch": 1.1817053493856273, "grad_norm": 0.07734803531243527, "learning_rate": 8.015558347618039e-06, "loss": 0.5243, "step": 2382 }, { "epoch": 1.1822018120888669, "grad_norm": 0.06979903430013636, "learning_rate": 8.013998938128973e-06, "loss": 0.4699, "step": 2383 }, { "epoch": 1.1826982747921062, "grad_norm": 0.07386585758579457, "learning_rate": 8.012439067998494e-06, "loss": 0.4932, "step": 2384 }, { "epoch": 1.1831947374953455, "grad_norm": 0.07541637636041616, "learning_rate": 8.010878737465003e-06, "loss": 0.5202, "step": 2385 }, { "epoch": 1.183691200198585, "grad_norm": 0.07199214991500814, "learning_rate": 8.009317946766975e-06, "loss": 0.4632, "step": 2386 }, { "epoch": 1.1841876629018244, "grad_norm": 0.07387274022313904, "learning_rate": 8.007756696142948e-06, "loss": 0.4928, "step": 2387 }, { "epoch": 1.184684125605064, "grad_norm": 0.0709506582818121, "learning_rate": 8.006194985831537e-06, "loss": 0.4685, "step": 2388 }, { "epoch": 1.1851805883083033, "grad_norm": 0.07741639300132216, "learning_rate": 8.004632816071422e-06, "loss": 0.4852, "step": 2389 }, { "epoch": 1.1856770510115426, "grad_norm": 0.07182104672096723, "learning_rate": 8.003070187101356e-06, "loss": 0.4759, "step": 2390 }, { "epoch": 1.1861735137147822, "grad_norm": 0.0745573604205288, "learning_rate": 8.001507099160164e-06, "loss": 0.4728, "step": 2391 }, { "epoch": 1.1866699764180215, "grad_norm": 0.07176520107798823, "learning_rate": 7.999943552486737e-06, "loss": 0.4857, "step": 2392 }, { "epoch": 1.187166439121261, "grad_norm": 0.07181856926868868, "learning_rate": 7.998379547320038e-06, "loss": 0.4712, "step": 2393 }, { "epoch": 1.1876629018245004, "grad_norm": 0.07050318637166479, "learning_rate": 7.996815083899102e-06, "loss": 0.4546, "step": 2394 }, { "epoch": 1.1881593645277397, "grad_norm": 0.07293329762823501, "learning_rate": 7.995250162463028e-06, "loss": 0.4993, "step": 2395 }, { "epoch": 1.1886558272309793, "grad_norm": 0.07689205680729931, "learning_rate": 7.993684783250994e-06, "loss": 0.4835, "step": 2396 }, { "epoch": 1.1891522899342186, "grad_norm": 0.07560318192495996, "learning_rate": 7.99211894650224e-06, "loss": 0.461, "step": 2397 }, { "epoch": 1.1896487526374582, "grad_norm": 0.07309599735206149, "learning_rate": 7.99055265245608e-06, "loss": 0.4779, "step": 2398 }, { "epoch": 1.1901452153406975, "grad_norm": 0.07095152261698759, "learning_rate": 7.988985901351898e-06, "loss": 0.4905, "step": 2399 }, { "epoch": 1.1906416780439368, "grad_norm": 0.07164890116119788, "learning_rate": 7.987418693429145e-06, "loss": 0.5099, "step": 2400 }, { "epoch": 1.1911381407471764, "grad_norm": 0.07124693531064728, "learning_rate": 7.985851028927344e-06, "loss": 0.4604, "step": 2401 }, { "epoch": 1.1916346034504157, "grad_norm": 0.07435149050158306, "learning_rate": 7.98428290808609e-06, "loss": 0.4801, "step": 2402 }, { "epoch": 1.1921310661536553, "grad_norm": 0.07294313694584678, "learning_rate": 7.98271433114504e-06, "loss": 0.509, "step": 2403 }, { "epoch": 1.1926275288568946, "grad_norm": 0.06978769812413184, "learning_rate": 7.981145298343929e-06, "loss": 0.4892, "step": 2404 }, { "epoch": 1.193123991560134, "grad_norm": 0.06912663308966963, "learning_rate": 7.979575809922559e-06, "loss": 0.4898, "step": 2405 }, { "epoch": 1.1936204542633735, "grad_norm": 0.07301215898745836, "learning_rate": 7.9780058661208e-06, "loss": 0.4756, "step": 2406 }, { "epoch": 1.1941169169666128, "grad_norm": 0.07229724679847097, "learning_rate": 7.976435467178592e-06, "loss": 0.5056, "step": 2407 }, { "epoch": 1.1946133796698524, "grad_norm": 0.07301499091902912, "learning_rate": 7.97486461333595e-06, "loss": 0.5187, "step": 2408 }, { "epoch": 1.1951098423730917, "grad_norm": 0.07395695237003778, "learning_rate": 7.973293304832946e-06, "loss": 0.4859, "step": 2409 }, { "epoch": 1.195606305076331, "grad_norm": 0.07147072664149673, "learning_rate": 7.971721541909734e-06, "loss": 0.4762, "step": 2410 }, { "epoch": 1.1961027677795706, "grad_norm": 0.0710124719486651, "learning_rate": 7.970149324806535e-06, "loss": 0.4821, "step": 2411 }, { "epoch": 1.19659923048281, "grad_norm": 0.06924011820147728, "learning_rate": 7.968576653763633e-06, "loss": 0.4777, "step": 2412 }, { "epoch": 1.1970956931860495, "grad_norm": 0.07631306801702314, "learning_rate": 7.967003529021386e-06, "loss": 0.4963, "step": 2413 }, { "epoch": 1.1975921558892888, "grad_norm": 0.0724947892734663, "learning_rate": 7.965429950820222e-06, "loss": 0.4936, "step": 2414 }, { "epoch": 1.1980886185925281, "grad_norm": 0.07164010492469318, "learning_rate": 7.963855919400639e-06, "loss": 0.4679, "step": 2415 }, { "epoch": 1.1985850812957677, "grad_norm": 0.06825874139662513, "learning_rate": 7.962281435003199e-06, "loss": 0.4449, "step": 2416 }, { "epoch": 1.199081543999007, "grad_norm": 0.07409910389087015, "learning_rate": 7.960706497868537e-06, "loss": 0.4958, "step": 2417 }, { "epoch": 1.1995780067022466, "grad_norm": 0.0739553558009401, "learning_rate": 7.959131108237361e-06, "loss": 0.5019, "step": 2418 }, { "epoch": 1.200074469405486, "grad_norm": 0.07170215535474178, "learning_rate": 7.95755526635044e-06, "loss": 0.4887, "step": 2419 }, { "epoch": 1.2005709321087252, "grad_norm": 0.07683710435298069, "learning_rate": 7.955978972448618e-06, "loss": 0.4925, "step": 2420 }, { "epoch": 1.2010673948119648, "grad_norm": 0.07377097983405335, "learning_rate": 7.954402226772804e-06, "loss": 0.4788, "step": 2421 }, { "epoch": 1.2015638575152041, "grad_norm": 0.07655586471619548, "learning_rate": 7.95282502956398e-06, "loss": 0.5347, "step": 2422 }, { "epoch": 1.2020603202184437, "grad_norm": 0.07406558881865472, "learning_rate": 7.951247381063195e-06, "loss": 0.5173, "step": 2423 }, { "epoch": 1.202556782921683, "grad_norm": 0.07322830499207313, "learning_rate": 7.949669281511569e-06, "loss": 0.4733, "step": 2424 }, { "epoch": 1.2030532456249223, "grad_norm": 0.07039907851339344, "learning_rate": 7.948090731150287e-06, "loss": 0.477, "step": 2425 }, { "epoch": 1.2035497083281619, "grad_norm": 0.07037418167901223, "learning_rate": 7.946511730220605e-06, "loss": 0.4692, "step": 2426 }, { "epoch": 1.2040461710314012, "grad_norm": 0.07368459831288712, "learning_rate": 7.94493227896385e-06, "loss": 0.4857, "step": 2427 }, { "epoch": 1.2045426337346408, "grad_norm": 0.07426055953297137, "learning_rate": 7.943352377621414e-06, "loss": 0.489, "step": 2428 }, { "epoch": 1.20503909643788, "grad_norm": 0.06945786010896897, "learning_rate": 7.941772026434759e-06, "loss": 0.4716, "step": 2429 }, { "epoch": 1.2055355591411194, "grad_norm": 0.07502359795174329, "learning_rate": 7.94019122564542e-06, "loss": 0.4873, "step": 2430 }, { "epoch": 1.206032021844359, "grad_norm": 0.07253641226610841, "learning_rate": 7.938609975494992e-06, "loss": 0.4767, "step": 2431 }, { "epoch": 1.2065284845475983, "grad_norm": 0.07172366435546958, "learning_rate": 7.937028276225149e-06, "loss": 0.475, "step": 2432 }, { "epoch": 1.2070249472508379, "grad_norm": 0.07075938334855661, "learning_rate": 7.935446128077624e-06, "loss": 0.4911, "step": 2433 }, { "epoch": 1.2075214099540772, "grad_norm": 0.0729907659202211, "learning_rate": 7.933863531294224e-06, "loss": 0.4841, "step": 2434 }, { "epoch": 1.2080178726573165, "grad_norm": 0.07374115829562204, "learning_rate": 7.932280486116825e-06, "loss": 0.4853, "step": 2435 }, { "epoch": 1.208514335360556, "grad_norm": 0.07806369217645258, "learning_rate": 7.93069699278737e-06, "loss": 0.5576, "step": 2436 }, { "epoch": 1.2090107980637954, "grad_norm": 0.0718331092080611, "learning_rate": 7.92911305154787e-06, "loss": 0.4735, "step": 2437 }, { "epoch": 1.209507260767035, "grad_norm": 0.06731176175485117, "learning_rate": 7.927528662640402e-06, "loss": 0.4893, "step": 2438 }, { "epoch": 1.2100037234702743, "grad_norm": 0.07186021670426744, "learning_rate": 7.925943826307119e-06, "loss": 0.5123, "step": 2439 }, { "epoch": 1.2105001861735136, "grad_norm": 0.07473382580979585, "learning_rate": 7.924358542790236e-06, "loss": 0.5024, "step": 2440 }, { "epoch": 1.2109966488767532, "grad_norm": 0.07386057347004561, "learning_rate": 7.922772812332038e-06, "loss": 0.4854, "step": 2441 }, { "epoch": 1.2114931115799925, "grad_norm": 0.07206280929710557, "learning_rate": 7.92118663517488e-06, "loss": 0.5094, "step": 2442 }, { "epoch": 1.211989574283232, "grad_norm": 0.07061427887302714, "learning_rate": 7.919600011561181e-06, "loss": 0.4914, "step": 2443 }, { "epoch": 1.2124860369864714, "grad_norm": 0.07364474032932036, "learning_rate": 7.918012941733434e-06, "loss": 0.5141, "step": 2444 }, { "epoch": 1.2129824996897107, "grad_norm": 0.07157728506982447, "learning_rate": 7.916425425934195e-06, "loss": 0.5009, "step": 2445 }, { "epoch": 1.2134789623929503, "grad_norm": 0.07574472192509478, "learning_rate": 7.91483746440609e-06, "loss": 0.4826, "step": 2446 }, { "epoch": 1.2139754250961896, "grad_norm": 0.07518967003949961, "learning_rate": 7.913249057391815e-06, "loss": 0.5062, "step": 2447 }, { "epoch": 1.2144718877994292, "grad_norm": 0.07260291659745174, "learning_rate": 7.911660205134132e-06, "loss": 0.5081, "step": 2448 }, { "epoch": 1.2149683505026685, "grad_norm": 0.07482200230732527, "learning_rate": 7.910070907875871e-06, "loss": 0.4947, "step": 2449 }, { "epoch": 1.2154648132059078, "grad_norm": 0.07251676217475807, "learning_rate": 7.90848116585993e-06, "loss": 0.479, "step": 2450 }, { "epoch": 1.2159612759091474, "grad_norm": 0.0737268000170673, "learning_rate": 7.906890979329282e-06, "loss": 0.5006, "step": 2451 }, { "epoch": 1.2164577386123867, "grad_norm": 0.07387770278193574, "learning_rate": 7.905300348526951e-06, "loss": 0.4883, "step": 2452 }, { "epoch": 1.2169542013156263, "grad_norm": 0.07216039271740472, "learning_rate": 7.903709273696047e-06, "loss": 0.469, "step": 2453 }, { "epoch": 1.2174506640188656, "grad_norm": 0.07259470892183868, "learning_rate": 7.902117755079738e-06, "loss": 0.4811, "step": 2454 }, { "epoch": 1.217947126722105, "grad_norm": 0.07458876832011727, "learning_rate": 7.90052579292126e-06, "loss": 0.5109, "step": 2455 }, { "epoch": 1.2184435894253445, "grad_norm": 0.06901029085737141, "learning_rate": 7.898933387463924e-06, "loss": 0.5012, "step": 2456 }, { "epoch": 1.2189400521285838, "grad_norm": 0.07324817525795049, "learning_rate": 7.897340538951099e-06, "loss": 0.4823, "step": 2457 }, { "epoch": 1.2194365148318234, "grad_norm": 0.08003178451935818, "learning_rate": 7.895747247626228e-06, "loss": 0.5125, "step": 2458 }, { "epoch": 1.2199329775350627, "grad_norm": 0.07274596398321198, "learning_rate": 7.89415351373282e-06, "loss": 0.4934, "step": 2459 }, { "epoch": 1.220429440238302, "grad_norm": 0.06964652576032265, "learning_rate": 7.892559337514451e-06, "loss": 0.4658, "step": 2460 }, { "epoch": 1.2209259029415416, "grad_norm": 0.07116022602457564, "learning_rate": 7.890964719214767e-06, "loss": 0.4764, "step": 2461 }, { "epoch": 1.221422365644781, "grad_norm": 0.06952619399608187, "learning_rate": 7.88936965907748e-06, "loss": 0.4558, "step": 2462 }, { "epoch": 1.2219188283480205, "grad_norm": 0.07518655995107235, "learning_rate": 7.887774157346365e-06, "loss": 0.4865, "step": 2463 }, { "epoch": 1.2224152910512598, "grad_norm": 0.07376865854194593, "learning_rate": 7.886178214265274e-06, "loss": 0.4992, "step": 2464 }, { "epoch": 1.2229117537544991, "grad_norm": 0.07342072581864792, "learning_rate": 7.884581830078118e-06, "loss": 0.5262, "step": 2465 }, { "epoch": 1.2234082164577387, "grad_norm": 0.07167032318175433, "learning_rate": 7.88298500502888e-06, "loss": 0.5072, "step": 2466 }, { "epoch": 1.223904679160978, "grad_norm": 0.07097269675747088, "learning_rate": 7.88138773936161e-06, "loss": 0.4793, "step": 2467 }, { "epoch": 1.2244011418642176, "grad_norm": 0.07013672385308536, "learning_rate": 7.879790033320424e-06, "loss": 0.4929, "step": 2468 }, { "epoch": 1.2248976045674569, "grad_norm": 0.07223102583442119, "learning_rate": 7.878191887149504e-06, "loss": 0.511, "step": 2469 }, { "epoch": 1.2253940672706962, "grad_norm": 0.07669917513271436, "learning_rate": 7.876593301093104e-06, "loss": 0.5289, "step": 2470 }, { "epoch": 1.2258905299739358, "grad_norm": 0.07446008605700466, "learning_rate": 7.87499427539554e-06, "loss": 0.5093, "step": 2471 }, { "epoch": 1.226386992677175, "grad_norm": 0.07618064450545196, "learning_rate": 7.873394810301198e-06, "loss": 0.4884, "step": 2472 }, { "epoch": 1.2268834553804147, "grad_norm": 0.07167284442434069, "learning_rate": 7.87179490605453e-06, "loss": 0.4948, "step": 2473 }, { "epoch": 1.227379918083654, "grad_norm": 0.07401798466940147, "learning_rate": 7.870194562900055e-06, "loss": 0.4849, "step": 2474 }, { "epoch": 1.2278763807868933, "grad_norm": 0.07186829556894118, "learning_rate": 7.868593781082364e-06, "loss": 0.5079, "step": 2475 }, { "epoch": 1.2283728434901329, "grad_norm": 0.06863177399733751, "learning_rate": 7.866992560846107e-06, "loss": 0.4458, "step": 2476 }, { "epoch": 1.2288693061933722, "grad_norm": 0.07297315411470365, "learning_rate": 7.865390902436005e-06, "loss": 0.4827, "step": 2477 }, { "epoch": 1.2293657688966118, "grad_norm": 0.07130156565218151, "learning_rate": 7.863788806096847e-06, "loss": 0.4858, "step": 2478 }, { "epoch": 1.229862231599851, "grad_norm": 0.07309681317328279, "learning_rate": 7.862186272073489e-06, "loss": 0.4791, "step": 2479 }, { "epoch": 1.2303586943030904, "grad_norm": 0.07059455181087342, "learning_rate": 7.860583300610849e-06, "loss": 0.4786, "step": 2480 }, { "epoch": 1.23085515700633, "grad_norm": 0.07278780610477634, "learning_rate": 7.858979891953918e-06, "loss": 0.487, "step": 2481 }, { "epoch": 1.2313516197095693, "grad_norm": 0.07228312983604328, "learning_rate": 7.85737604634775e-06, "loss": 0.4954, "step": 2482 }, { "epoch": 1.2318480824128089, "grad_norm": 0.07191368746552866, "learning_rate": 7.85577176403747e-06, "loss": 0.4872, "step": 2483 }, { "epoch": 1.2323445451160482, "grad_norm": 0.07136655375077475, "learning_rate": 7.854167045268265e-06, "loss": 0.502, "step": 2484 }, { "epoch": 1.2328410078192875, "grad_norm": 0.07208287450277875, "learning_rate": 7.852561890285385e-06, "loss": 0.4857, "step": 2485 }, { "epoch": 1.233337470522527, "grad_norm": 0.07296424887863194, "learning_rate": 7.850956299334162e-06, "loss": 0.5175, "step": 2486 }, { "epoch": 1.2338339332257664, "grad_norm": 0.07366881702863355, "learning_rate": 7.84935027265998e-06, "loss": 0.5011, "step": 2487 }, { "epoch": 1.234330395929006, "grad_norm": 0.07599137022216883, "learning_rate": 7.847743810508292e-06, "loss": 0.5042, "step": 2488 }, { "epoch": 1.2348268586322453, "grad_norm": 0.07111617274127456, "learning_rate": 7.846136913124627e-06, "loss": 0.4698, "step": 2489 }, { "epoch": 1.2353233213354846, "grad_norm": 0.07201232200572243, "learning_rate": 7.844529580754566e-06, "loss": 0.472, "step": 2490 }, { "epoch": 1.2358197840387242, "grad_norm": 0.07341460404524983, "learning_rate": 7.842921813643767e-06, "loss": 0.4736, "step": 2491 }, { "epoch": 1.2363162467419635, "grad_norm": 0.07516599673488107, "learning_rate": 7.841313612037953e-06, "loss": 0.536, "step": 2492 }, { "epoch": 1.236812709445203, "grad_norm": 0.07612536960775762, "learning_rate": 7.83970497618291e-06, "loss": 0.5107, "step": 2493 }, { "epoch": 1.2373091721484424, "grad_norm": 0.0712196177649546, "learning_rate": 7.838095906324493e-06, "loss": 0.5272, "step": 2494 }, { "epoch": 1.2378056348516817, "grad_norm": 0.07174382506085918, "learning_rate": 7.83648640270862e-06, "loss": 0.4699, "step": 2495 }, { "epoch": 1.2383020975549213, "grad_norm": 0.07176500264816929, "learning_rate": 7.834876465581283e-06, "loss": 0.4689, "step": 2496 }, { "epoch": 1.2387985602581606, "grad_norm": 0.07375994219799964, "learning_rate": 7.83326609518853e-06, "loss": 0.4781, "step": 2497 }, { "epoch": 1.2392950229614, "grad_norm": 0.0706960013355042, "learning_rate": 7.831655291776484e-06, "loss": 0.493, "step": 2498 }, { "epoch": 1.2397914856646395, "grad_norm": 0.07198655800007961, "learning_rate": 7.830044055591326e-06, "loss": 0.4644, "step": 2499 }, { "epoch": 1.2402879483678788, "grad_norm": 0.07948572448179321, "learning_rate": 7.828432386879314e-06, "loss": 0.5537, "step": 2500 }, { "epoch": 1.2407844110711184, "grad_norm": 0.07649974228423813, "learning_rate": 7.82682028588676e-06, "loss": 0.508, "step": 2501 }, { "epoch": 1.2412808737743577, "grad_norm": 0.07004967280284491, "learning_rate": 7.82520775286005e-06, "loss": 0.4786, "step": 2502 }, { "epoch": 1.241777336477597, "grad_norm": 0.07152110383482235, "learning_rate": 7.823594788045633e-06, "loss": 0.5309, "step": 2503 }, { "epoch": 1.2422737991808366, "grad_norm": 0.07063608077865494, "learning_rate": 7.821981391690026e-06, "loss": 0.4706, "step": 2504 }, { "epoch": 1.242770261884076, "grad_norm": 0.07701316889800286, "learning_rate": 7.82036756403981e-06, "loss": 0.5144, "step": 2505 }, { "epoch": 1.2432667245873155, "grad_norm": 0.07226679143037919, "learning_rate": 7.818753305341635e-06, "loss": 0.5024, "step": 2506 }, { "epoch": 1.2437631872905548, "grad_norm": 0.07865435918533266, "learning_rate": 7.817138615842212e-06, "loss": 0.5544, "step": 2507 }, { "epoch": 1.2442596499937941, "grad_norm": 0.07980179701888628, "learning_rate": 7.81552349578832e-06, "loss": 0.5101, "step": 2508 }, { "epoch": 1.2447561126970337, "grad_norm": 0.0720486211523932, "learning_rate": 7.813907945426806e-06, "loss": 0.4818, "step": 2509 }, { "epoch": 1.245252575400273, "grad_norm": 0.0697060574436996, "learning_rate": 7.81229196500458e-06, "loss": 0.4905, "step": 2510 }, { "epoch": 1.2457490381035126, "grad_norm": 0.0712762778017296, "learning_rate": 7.810675554768616e-06, "loss": 0.4704, "step": 2511 }, { "epoch": 1.246245500806752, "grad_norm": 0.07355863446675028, "learning_rate": 7.809058714965962e-06, "loss": 0.479, "step": 2512 }, { "epoch": 1.2467419635099912, "grad_norm": 0.07255719671250284, "learning_rate": 7.807441445843723e-06, "loss": 0.4959, "step": 2513 }, { "epoch": 1.2472384262132308, "grad_norm": 0.07805962130769145, "learning_rate": 7.805823747649073e-06, "loss": 0.4964, "step": 2514 }, { "epoch": 1.24773488891647, "grad_norm": 0.07922959031722938, "learning_rate": 7.80420562062925e-06, "loss": 0.4982, "step": 2515 }, { "epoch": 1.2482313516197097, "grad_norm": 0.07529660466681241, "learning_rate": 7.802587065031561e-06, "loss": 0.4909, "step": 2516 }, { "epoch": 1.248727814322949, "grad_norm": 0.07433614085477507, "learning_rate": 7.800968081103375e-06, "loss": 0.5054, "step": 2517 }, { "epoch": 1.2492242770261883, "grad_norm": 0.07091809704367669, "learning_rate": 7.799348669092128e-06, "loss": 0.4713, "step": 2518 }, { "epoch": 1.2497207397294279, "grad_norm": 0.07280705234842633, "learning_rate": 7.797728829245321e-06, "loss": 0.4936, "step": 2519 }, { "epoch": 1.2502172024326672, "grad_norm": 0.07158759022082693, "learning_rate": 7.79610856181052e-06, "loss": 0.4401, "step": 2520 }, { "epoch": 1.2502172024326672, "eval_loss": 0.5242970585823059, "eval_runtime": 259.1392, "eval_samples_per_second": 117.13, "eval_steps_per_second": 14.645, "step": 2520 }, { "epoch": 1.2507136651359065, "grad_norm": 0.07377314632312748, "learning_rate": 7.794487867035358e-06, "loss": 0.4947, "step": 2521 }, { "epoch": 1.251210127839146, "grad_norm": 0.07476997296963254, "learning_rate": 7.792866745167532e-06, "loss": 0.4837, "step": 2522 }, { "epoch": 1.2517065905423854, "grad_norm": 0.06860856143991592, "learning_rate": 7.791245196454803e-06, "loss": 0.4677, "step": 2523 }, { "epoch": 1.252203053245625, "grad_norm": 0.07706477950077137, "learning_rate": 7.789623221145002e-06, "loss": 0.5447, "step": 2524 }, { "epoch": 1.2526995159488643, "grad_norm": 0.07191297047243664, "learning_rate": 7.788000819486019e-06, "loss": 0.4662, "step": 2525 }, { "epoch": 1.2531959786521036, "grad_norm": 0.07723494482816311, "learning_rate": 7.786377991725813e-06, "loss": 0.5004, "step": 2526 }, { "epoch": 1.2536924413553432, "grad_norm": 0.07573264886896905, "learning_rate": 7.784754738112406e-06, "loss": 0.4696, "step": 2527 }, { "epoch": 1.2541889040585825, "grad_norm": 0.07425027782995233, "learning_rate": 7.783131058893889e-06, "loss": 0.5003, "step": 2528 }, { "epoch": 1.254685366761822, "grad_norm": 0.07825615066186287, "learning_rate": 7.781506954318413e-06, "loss": 0.5098, "step": 2529 }, { "epoch": 1.2551818294650614, "grad_norm": 0.08341839776678486, "learning_rate": 7.779882424634197e-06, "loss": 0.5447, "step": 2530 }, { "epoch": 1.2556782921683007, "grad_norm": 0.07146841653369992, "learning_rate": 7.778257470089524e-06, "loss": 0.5097, "step": 2531 }, { "epoch": 1.2561747548715403, "grad_norm": 0.07257801387976101, "learning_rate": 7.776632090932745e-06, "loss": 0.4839, "step": 2532 }, { "epoch": 1.2566712175747796, "grad_norm": 0.0735635305923275, "learning_rate": 7.775006287412268e-06, "loss": 0.4542, "step": 2533 }, { "epoch": 1.2571676802780192, "grad_norm": 0.07223004129649539, "learning_rate": 7.773380059776575e-06, "loss": 0.4778, "step": 2534 }, { "epoch": 1.2576641429812585, "grad_norm": 0.07769649472723285, "learning_rate": 7.771753408274208e-06, "loss": 0.5006, "step": 2535 }, { "epoch": 1.2581606056844978, "grad_norm": 0.0733052305747097, "learning_rate": 7.770126333153772e-06, "loss": 0.5167, "step": 2536 }, { "epoch": 1.2586570683877374, "grad_norm": 0.07351854004910238, "learning_rate": 7.768498834663945e-06, "loss": 0.4859, "step": 2537 }, { "epoch": 1.2591535310909767, "grad_norm": 0.07657201822534493, "learning_rate": 7.766870913053456e-06, "loss": 0.4979, "step": 2538 }, { "epoch": 1.2596499937942163, "grad_norm": 0.0742793733950408, "learning_rate": 7.765242568571116e-06, "loss": 0.5173, "step": 2539 }, { "epoch": 1.2601464564974556, "grad_norm": 0.07628534364392862, "learning_rate": 7.763613801465785e-06, "loss": 0.4761, "step": 2540 }, { "epoch": 1.260642919200695, "grad_norm": 0.07099537482619052, "learning_rate": 7.761984611986396e-06, "loss": 0.4917, "step": 2541 }, { "epoch": 1.2611393819039345, "grad_norm": 0.07077673464762375, "learning_rate": 7.760355000381942e-06, "loss": 0.5155, "step": 2542 }, { "epoch": 1.2616358446071738, "grad_norm": 0.07090392931999898, "learning_rate": 7.758724966901487e-06, "loss": 0.4918, "step": 2543 }, { "epoch": 1.2621323073104134, "grad_norm": 0.07125484939936946, "learning_rate": 7.757094511794155e-06, "loss": 0.472, "step": 2544 }, { "epoch": 1.2626287700136527, "grad_norm": 0.0718501582192958, "learning_rate": 7.755463635309131e-06, "loss": 0.5081, "step": 2545 }, { "epoch": 1.263125232716892, "grad_norm": 0.07819740213883132, "learning_rate": 7.753832337695672e-06, "loss": 0.524, "step": 2546 }, { "epoch": 1.2636216954201316, "grad_norm": 0.07653866302850325, "learning_rate": 7.752200619203094e-06, "loss": 0.5639, "step": 2547 }, { "epoch": 1.264118158123371, "grad_norm": 0.07348045181586392, "learning_rate": 7.75056848008078e-06, "loss": 0.4803, "step": 2548 }, { "epoch": 1.2646146208266105, "grad_norm": 0.07076246994678682, "learning_rate": 7.748935920578176e-06, "loss": 0.4713, "step": 2549 }, { "epoch": 1.2651110835298498, "grad_norm": 0.07313613013227323, "learning_rate": 7.747302940944791e-06, "loss": 0.5083, "step": 2550 }, { "epoch": 1.2656075462330891, "grad_norm": 0.07349928306237019, "learning_rate": 7.745669541430204e-06, "loss": 0.5042, "step": 2551 }, { "epoch": 1.2661040089363287, "grad_norm": 0.07157914735485121, "learning_rate": 7.744035722284049e-06, "loss": 0.5005, "step": 2552 }, { "epoch": 1.266600471639568, "grad_norm": 0.07520177710019567, "learning_rate": 7.74240148375603e-06, "loss": 0.5022, "step": 2553 }, { "epoch": 1.2670969343428076, "grad_norm": 0.07163077315954698, "learning_rate": 7.740766826095918e-06, "loss": 0.4607, "step": 2554 }, { "epoch": 1.267593397046047, "grad_norm": 0.0743815357073748, "learning_rate": 7.73913174955354e-06, "loss": 0.4875, "step": 2555 }, { "epoch": 1.2680898597492862, "grad_norm": 0.06912879169110557, "learning_rate": 7.737496254378794e-06, "loss": 0.4677, "step": 2556 }, { "epoch": 1.2685863224525258, "grad_norm": 0.07262641887138097, "learning_rate": 7.735860340821635e-06, "loss": 0.496, "step": 2557 }, { "epoch": 1.2690827851557651, "grad_norm": 0.07555540297786063, "learning_rate": 7.734224009132091e-06, "loss": 0.4857, "step": 2558 }, { "epoch": 1.2695792478590047, "grad_norm": 0.07481020040902342, "learning_rate": 7.732587259560247e-06, "loss": 0.4845, "step": 2559 }, { "epoch": 1.270075710562244, "grad_norm": 0.07347608023900698, "learning_rate": 7.730950092356254e-06, "loss": 0.4766, "step": 2560 }, { "epoch": 1.2705721732654833, "grad_norm": 0.07310849706261968, "learning_rate": 7.729312507770326e-06, "loss": 0.4632, "step": 2561 }, { "epoch": 1.2710686359687229, "grad_norm": 0.07375541777210778, "learning_rate": 7.727674506052744e-06, "loss": 0.5228, "step": 2562 }, { "epoch": 1.2715650986719622, "grad_norm": 0.07333928626803109, "learning_rate": 7.726036087453848e-06, "loss": 0.4628, "step": 2563 }, { "epoch": 1.2720615613752018, "grad_norm": 0.07561533426069027, "learning_rate": 7.724397252224045e-06, "loss": 0.5148, "step": 2564 }, { "epoch": 1.272558024078441, "grad_norm": 0.07474417547503953, "learning_rate": 7.722758000613804e-06, "loss": 0.5097, "step": 2565 }, { "epoch": 1.2730544867816804, "grad_norm": 0.07102407219450897, "learning_rate": 7.721118332873659e-06, "loss": 0.476, "step": 2566 }, { "epoch": 1.27355094948492, "grad_norm": 0.07287426452910022, "learning_rate": 7.719478249254206e-06, "loss": 0.47, "step": 2567 }, { "epoch": 1.2740474121881593, "grad_norm": 0.07176497136363207, "learning_rate": 7.717837750006106e-06, "loss": 0.4692, "step": 2568 }, { "epoch": 1.2745438748913989, "grad_norm": 0.07219630348957971, "learning_rate": 7.716196835380084e-06, "loss": 0.4837, "step": 2569 }, { "epoch": 1.2750403375946382, "grad_norm": 0.07536620917998516, "learning_rate": 7.714555505626927e-06, "loss": 0.5168, "step": 2570 }, { "epoch": 1.2755368002978775, "grad_norm": 0.0714646589840732, "learning_rate": 7.712913760997484e-06, "loss": 0.4711, "step": 2571 }, { "epoch": 1.276033263001117, "grad_norm": 0.07502031913388156, "learning_rate": 7.71127160174267e-06, "loss": 0.5066, "step": 2572 }, { "epoch": 1.2765297257043564, "grad_norm": 0.07134378711895495, "learning_rate": 7.709629028113468e-06, "loss": 0.5159, "step": 2573 }, { "epoch": 1.277026188407596, "grad_norm": 0.07332695455794223, "learning_rate": 7.707986040360911e-06, "loss": 0.5021, "step": 2574 }, { "epoch": 1.2775226511108353, "grad_norm": 0.07145066063460748, "learning_rate": 7.706342638736108e-06, "loss": 0.512, "step": 2575 }, { "epoch": 1.2780191138140746, "grad_norm": 0.07354359727508271, "learning_rate": 7.704698823490226e-06, "loss": 0.4953, "step": 2576 }, { "epoch": 1.2785155765173142, "grad_norm": 0.07270371860886368, "learning_rate": 7.703054594874495e-06, "loss": 0.5099, "step": 2577 }, { "epoch": 1.2790120392205535, "grad_norm": 0.06624808865885844, "learning_rate": 7.701409953140209e-06, "loss": 0.4564, "step": 2578 }, { "epoch": 1.279508501923793, "grad_norm": 0.07412750614095082, "learning_rate": 7.699764898538726e-06, "loss": 0.5055, "step": 2579 }, { "epoch": 1.2800049646270324, "grad_norm": 0.07275195995178917, "learning_rate": 7.698119431321464e-06, "loss": 0.4772, "step": 2580 }, { "epoch": 1.2805014273302717, "grad_norm": 0.07425362001957296, "learning_rate": 7.69647355173991e-06, "loss": 0.5233, "step": 2581 }, { "epoch": 1.2809978900335113, "grad_norm": 0.07469451138696086, "learning_rate": 7.694827260045608e-06, "loss": 0.4987, "step": 2582 }, { "epoch": 1.2814943527367506, "grad_norm": 0.0708213062123937, "learning_rate": 7.693180556490167e-06, "loss": 0.5357, "step": 2583 }, { "epoch": 1.2819908154399902, "grad_norm": 0.07497939814476405, "learning_rate": 7.691533441325261e-06, "loss": 0.5359, "step": 2584 }, { "epoch": 1.2824872781432295, "grad_norm": 0.07164073703137543, "learning_rate": 7.689885914802622e-06, "loss": 0.4742, "step": 2585 }, { "epoch": 1.2829837408464688, "grad_norm": 0.07677183840172884, "learning_rate": 7.68823797717405e-06, "loss": 0.4812, "step": 2586 }, { "epoch": 1.2834802035497084, "grad_norm": 0.07077909405950275, "learning_rate": 7.68658962869141e-06, "loss": 0.4696, "step": 2587 }, { "epoch": 1.2839766662529477, "grad_norm": 0.07354323740522496, "learning_rate": 7.684940869606617e-06, "loss": 0.4724, "step": 2588 }, { "epoch": 1.2844731289561873, "grad_norm": 0.06993500834216944, "learning_rate": 7.683291700171663e-06, "loss": 0.4929, "step": 2589 }, { "epoch": 1.2849695916594266, "grad_norm": 0.07345783185058559, "learning_rate": 7.681642120638596e-06, "loss": 0.507, "step": 2590 }, { "epoch": 1.285466054362666, "grad_norm": 0.07367018462400143, "learning_rate": 7.679992131259528e-06, "loss": 0.497, "step": 2591 }, { "epoch": 1.2859625170659055, "grad_norm": 0.07077074820163252, "learning_rate": 7.678341732286633e-06, "loss": 0.4722, "step": 2592 }, { "epoch": 1.2864589797691448, "grad_norm": 0.07093567714122909, "learning_rate": 7.676690923972148e-06, "loss": 0.4881, "step": 2593 }, { "epoch": 1.2869554424723844, "grad_norm": 0.0692286985895299, "learning_rate": 7.675039706568373e-06, "loss": 0.4654, "step": 2594 }, { "epoch": 1.2874519051756237, "grad_norm": 0.07470922721879901, "learning_rate": 7.673388080327669e-06, "loss": 0.464, "step": 2595 }, { "epoch": 1.287948367878863, "grad_norm": 0.07267808804570815, "learning_rate": 7.671736045502462e-06, "loss": 0.525, "step": 2596 }, { "epoch": 1.2884448305821026, "grad_norm": 0.07322570248638664, "learning_rate": 7.670083602345239e-06, "loss": 0.4661, "step": 2597 }, { "epoch": 1.288941293285342, "grad_norm": 0.07374076711966049, "learning_rate": 7.66843075110855e-06, "loss": 0.4996, "step": 2598 }, { "epoch": 1.2894377559885815, "grad_norm": 0.07305715181136896, "learning_rate": 7.666777492045003e-06, "loss": 0.4805, "step": 2599 }, { "epoch": 1.2899342186918208, "grad_norm": 0.07041169265149858, "learning_rate": 7.665123825407276e-06, "loss": 0.465, "step": 2600 }, { "epoch": 1.2904306813950601, "grad_norm": 0.07711313194745137, "learning_rate": 7.663469751448104e-06, "loss": 0.4995, "step": 2601 }, { "epoch": 1.2909271440982997, "grad_norm": 0.07276220005027466, "learning_rate": 7.661815270420286e-06, "loss": 0.5004, "step": 2602 }, { "epoch": 1.291423606801539, "grad_norm": 0.06923528529241481, "learning_rate": 7.660160382576683e-06, "loss": 0.4784, "step": 2603 }, { "epoch": 1.2919200695047786, "grad_norm": 0.07204538421004412, "learning_rate": 7.65850508817022e-06, "loss": 0.504, "step": 2604 }, { "epoch": 1.2924165322080179, "grad_norm": 0.0729160710003885, "learning_rate": 7.656849387453878e-06, "loss": 0.4906, "step": 2605 }, { "epoch": 1.2929129949112572, "grad_norm": 0.07535310209672472, "learning_rate": 7.655193280680706e-06, "loss": 0.5436, "step": 2606 }, { "epoch": 1.2934094576144968, "grad_norm": 0.0726456432774452, "learning_rate": 7.653536768103814e-06, "loss": 0.4971, "step": 2607 }, { "epoch": 1.293905920317736, "grad_norm": 0.07302223221909274, "learning_rate": 7.651879849976374e-06, "loss": 0.4966, "step": 2608 }, { "epoch": 1.2944023830209757, "grad_norm": 0.07004922600667704, "learning_rate": 7.650222526551618e-06, "loss": 0.4657, "step": 2609 }, { "epoch": 1.294898845724215, "grad_norm": 0.07015347440478134, "learning_rate": 7.648564798082842e-06, "loss": 0.459, "step": 2610 }, { "epoch": 1.2953953084274543, "grad_norm": 0.0745936418355579, "learning_rate": 7.646906664823403e-06, "loss": 0.4771, "step": 2611 }, { "epoch": 1.2958917711306939, "grad_norm": 0.07667816509699976, "learning_rate": 7.645248127026723e-06, "loss": 0.5219, "step": 2612 }, { "epoch": 1.2963882338339332, "grad_norm": 0.07029635717147024, "learning_rate": 7.643589184946277e-06, "loss": 0.4832, "step": 2613 }, { "epoch": 1.2968846965371728, "grad_norm": 0.07366949480303261, "learning_rate": 7.641929838835613e-06, "loss": 0.5084, "step": 2614 }, { "epoch": 1.297381159240412, "grad_norm": 0.07378733016384503, "learning_rate": 7.640270088948332e-06, "loss": 0.502, "step": 2615 }, { "epoch": 1.2978776219436514, "grad_norm": 0.07359555858747464, "learning_rate": 7.6386099355381e-06, "loss": 0.4991, "step": 2616 }, { "epoch": 1.298374084646891, "grad_norm": 0.07487529791386079, "learning_rate": 7.636949378858647e-06, "loss": 0.5023, "step": 2617 }, { "epoch": 1.2988705473501303, "grad_norm": 0.07261861801231229, "learning_rate": 7.635288419163763e-06, "loss": 0.4968, "step": 2618 }, { "epoch": 1.2993670100533699, "grad_norm": 0.07170644816281213, "learning_rate": 7.633627056707297e-06, "loss": 0.495, "step": 2619 }, { "epoch": 1.2998634727566092, "grad_norm": 0.07326782739072436, "learning_rate": 7.631965291743163e-06, "loss": 0.4749, "step": 2620 }, { "epoch": 1.3003599354598485, "grad_norm": 0.07354949520465813, "learning_rate": 7.630303124525333e-06, "loss": 0.4887, "step": 2621 }, { "epoch": 1.300856398163088, "grad_norm": 0.07677994339072212, "learning_rate": 7.628640555307845e-06, "loss": 0.4941, "step": 2622 }, { "epoch": 1.3013528608663274, "grad_norm": 0.07331225391921062, "learning_rate": 7.626977584344795e-06, "loss": 0.4858, "step": 2623 }, { "epoch": 1.301849323569567, "grad_norm": 0.07170856926012528, "learning_rate": 7.625314211890342e-06, "loss": 0.4455, "step": 2624 }, { "epoch": 1.3023457862728063, "grad_norm": 0.07475766562711376, "learning_rate": 7.623650438198707e-06, "loss": 0.4724, "step": 2625 }, { "epoch": 1.3028422489760456, "grad_norm": 0.07303322858619267, "learning_rate": 7.621986263524166e-06, "loss": 0.5025, "step": 2626 }, { "epoch": 1.3033387116792852, "grad_norm": 0.07209015796425527, "learning_rate": 7.620321688121066e-06, "loss": 0.533, "step": 2627 }, { "epoch": 1.3038351743825245, "grad_norm": 0.07418574085022885, "learning_rate": 7.618656712243813e-06, "loss": 0.479, "step": 2628 }, { "epoch": 1.304331637085764, "grad_norm": 0.07566746702664336, "learning_rate": 7.616991336146864e-06, "loss": 0.4857, "step": 2629 }, { "epoch": 1.3048280997890034, "grad_norm": 0.06931794377147632, "learning_rate": 7.615325560084752e-06, "loss": 0.4614, "step": 2630 }, { "epoch": 1.3053245624922427, "grad_norm": 0.07616031124079053, "learning_rate": 7.613659384312062e-06, "loss": 0.5274, "step": 2631 }, { "epoch": 1.3058210251954823, "grad_norm": 0.07085893200757314, "learning_rate": 7.611992809083439e-06, "loss": 0.4691, "step": 2632 }, { "epoch": 1.3063174878987216, "grad_norm": 0.07319646074018087, "learning_rate": 7.610325834653598e-06, "loss": 0.517, "step": 2633 }, { "epoch": 1.3068139506019611, "grad_norm": 0.07459044022717175, "learning_rate": 7.6086584612773055e-06, "loss": 0.4582, "step": 2634 }, { "epoch": 1.3073104133052005, "grad_norm": 0.07570613366111596, "learning_rate": 7.606990689209395e-06, "loss": 0.5111, "step": 2635 }, { "epoch": 1.3078068760084398, "grad_norm": 0.07428287381621476, "learning_rate": 7.605322518704759e-06, "loss": 0.4913, "step": 2636 }, { "epoch": 1.3083033387116794, "grad_norm": 0.0734226364221571, "learning_rate": 7.603653950018346e-06, "loss": 0.4762, "step": 2637 }, { "epoch": 1.3087998014149187, "grad_norm": 0.07332280586009639, "learning_rate": 7.601984983405173e-06, "loss": 0.4946, "step": 2638 }, { "epoch": 1.3092962641181582, "grad_norm": 0.07120489183911462, "learning_rate": 7.600315619120317e-06, "loss": 0.4762, "step": 2639 }, { "epoch": 1.3097927268213976, "grad_norm": 0.07303767095401219, "learning_rate": 7.59864585741891e-06, "loss": 0.475, "step": 2640 }, { "epoch": 1.310289189524637, "grad_norm": 0.07187071930757646, "learning_rate": 7.596975698556151e-06, "loss": 0.474, "step": 2641 }, { "epoch": 1.3107856522278765, "grad_norm": 0.07415230799455923, "learning_rate": 7.595305142787294e-06, "loss": 0.5229, "step": 2642 }, { "epoch": 1.3112821149311158, "grad_norm": 0.07447483592407889, "learning_rate": 7.59363419036766e-06, "loss": 0.5035, "step": 2643 }, { "epoch": 1.3117785776343553, "grad_norm": 0.07040880107400416, "learning_rate": 7.591962841552627e-06, "loss": 0.4725, "step": 2644 }, { "epoch": 1.3122750403375947, "grad_norm": 0.06793138068471151, "learning_rate": 7.590291096597631e-06, "loss": 0.4412, "step": 2645 }, { "epoch": 1.312771503040834, "grad_norm": 0.07495097651516075, "learning_rate": 7.588618955758173e-06, "loss": 0.5002, "step": 2646 }, { "epoch": 1.3132679657440733, "grad_norm": 0.07565513056199885, "learning_rate": 7.586946419289813e-06, "loss": 0.495, "step": 2647 }, { "epoch": 1.313764428447313, "grad_norm": 0.07846426523907774, "learning_rate": 7.58527348744817e-06, "loss": 0.4963, "step": 2648 }, { "epoch": 1.3142608911505524, "grad_norm": 0.07726618154433754, "learning_rate": 7.583600160488929e-06, "loss": 0.5038, "step": 2649 }, { "epoch": 1.3147573538537918, "grad_norm": 0.07177971035598558, "learning_rate": 7.581926438667826e-06, "loss": 0.4977, "step": 2650 }, { "epoch": 1.315253816557031, "grad_norm": 0.07234611189725769, "learning_rate": 7.580252322240666e-06, "loss": 0.4719, "step": 2651 }, { "epoch": 1.3157502792602704, "grad_norm": 0.07201594842948567, "learning_rate": 7.57857781146331e-06, "loss": 0.5089, "step": 2652 }, { "epoch": 1.31624674196351, "grad_norm": 0.07498288918290671, "learning_rate": 7.57690290659168e-06, "loss": 0.4814, "step": 2653 }, { "epoch": 1.3167432046667495, "grad_norm": 0.07618518497314423, "learning_rate": 7.575227607881757e-06, "loss": 0.494, "step": 2654 }, { "epoch": 1.3172396673699889, "grad_norm": 0.07245035215910761, "learning_rate": 7.573551915589586e-06, "loss": 0.5111, "step": 2655 }, { "epoch": 1.3177361300732282, "grad_norm": 0.07063014586866156, "learning_rate": 7.571875829971267e-06, "loss": 0.5076, "step": 2656 }, { "epoch": 1.3182325927764675, "grad_norm": 0.07384313400197565, "learning_rate": 7.5701993512829664e-06, "loss": 0.4998, "step": 2657 }, { "epoch": 1.318729055479707, "grad_norm": 0.0752424884466584, "learning_rate": 7.568522479780903e-06, "loss": 0.4881, "step": 2658 }, { "epoch": 1.3192255181829466, "grad_norm": 0.08052341158611934, "learning_rate": 7.566845215721362e-06, "loss": 0.495, "step": 2659 }, { "epoch": 1.319721980886186, "grad_norm": 0.07208145165414205, "learning_rate": 7.5651675593606876e-06, "loss": 0.4981, "step": 2660 }, { "epoch": 1.3202184435894253, "grad_norm": 0.07313301936435801, "learning_rate": 7.5634895109552795e-06, "loss": 0.4965, "step": 2661 }, { "epoch": 1.3207149062926646, "grad_norm": 0.07556523446833772, "learning_rate": 7.561811070761602e-06, "loss": 0.5176, "step": 2662 }, { "epoch": 1.3212113689959042, "grad_norm": 0.07319725779490793, "learning_rate": 7.56013223903618e-06, "loss": 0.4585, "step": 2663 }, { "epoch": 1.3217078316991435, "grad_norm": 0.07083563037225474, "learning_rate": 7.558453016035592e-06, "loss": 0.482, "step": 2664 }, { "epoch": 1.322204294402383, "grad_norm": 0.07220725027684433, "learning_rate": 7.556773402016482e-06, "loss": 0.5062, "step": 2665 }, { "epoch": 1.3227007571056224, "grad_norm": 0.07152061737141101, "learning_rate": 7.555093397235553e-06, "loss": 0.4993, "step": 2666 }, { "epoch": 1.3231972198088617, "grad_norm": 0.0731269231629725, "learning_rate": 7.553413001949566e-06, "loss": 0.5071, "step": 2667 }, { "epoch": 1.3236936825121013, "grad_norm": 0.07211546920533103, "learning_rate": 7.551732216415342e-06, "loss": 0.4899, "step": 2668 }, { "epoch": 1.3241901452153406, "grad_norm": 0.07427869438620326, "learning_rate": 7.5500510408897634e-06, "loss": 0.518, "step": 2669 }, { "epoch": 1.3246866079185802, "grad_norm": 0.07411425941651224, "learning_rate": 7.548369475629769e-06, "loss": 0.5099, "step": 2670 }, { "epoch": 1.3251830706218195, "grad_norm": 0.07102919893674516, "learning_rate": 7.546687520892361e-06, "loss": 0.4577, "step": 2671 }, { "epoch": 1.3256795333250588, "grad_norm": 0.07124317917931709, "learning_rate": 7.545005176934597e-06, "loss": 0.4857, "step": 2672 }, { "epoch": 1.3261759960282984, "grad_norm": 0.06769496175031949, "learning_rate": 7.543322444013601e-06, "loss": 0.4641, "step": 2673 }, { "epoch": 1.3266724587315377, "grad_norm": 0.06899557389748309, "learning_rate": 7.541639322386546e-06, "loss": 0.4664, "step": 2674 }, { "epoch": 1.3271689214347773, "grad_norm": 0.07481434699614627, "learning_rate": 7.539955812310673e-06, "loss": 0.4744, "step": 2675 }, { "epoch": 1.3276653841380166, "grad_norm": 0.07304587484098414, "learning_rate": 7.538271914043281e-06, "loss": 0.4559, "step": 2676 }, { "epoch": 1.328161846841256, "grad_norm": 0.07415770338497507, "learning_rate": 7.536587627841723e-06, "loss": 0.5121, "step": 2677 }, { "epoch": 1.3286583095444955, "grad_norm": 0.07228770607947253, "learning_rate": 7.534902953963417e-06, "loss": 0.5157, "step": 2678 }, { "epoch": 1.3291547722477348, "grad_norm": 0.07319401385123882, "learning_rate": 7.533217892665839e-06, "loss": 0.504, "step": 2679 }, { "epoch": 1.3296512349509744, "grad_norm": 0.07045236666155964, "learning_rate": 7.531532444206524e-06, "loss": 0.5008, "step": 2680 }, { "epoch": 1.3301476976542137, "grad_norm": 0.07117873637101506, "learning_rate": 7.529846608843063e-06, "loss": 0.4847, "step": 2681 }, { "epoch": 1.330644160357453, "grad_norm": 0.07067616323712225, "learning_rate": 7.528160386833112e-06, "loss": 0.4804, "step": 2682 }, { "epoch": 1.3311406230606926, "grad_norm": 0.0759415536662072, "learning_rate": 7.526473778434383e-06, "loss": 0.4969, "step": 2683 }, { "epoch": 1.331637085763932, "grad_norm": 0.07086435551103258, "learning_rate": 7.524786783904645e-06, "loss": 0.4638, "step": 2684 }, { "epoch": 1.3321335484671715, "grad_norm": 0.07035449113846659, "learning_rate": 7.52309940350173e-06, "loss": 0.4932, "step": 2685 }, { "epoch": 1.3326300111704108, "grad_norm": 0.07525591700961363, "learning_rate": 7.521411637483525e-06, "loss": 0.4691, "step": 2686 }, { "epoch": 1.3331264738736501, "grad_norm": 0.07621426823298727, "learning_rate": 7.519723486107977e-06, "loss": 0.5114, "step": 2687 }, { "epoch": 1.3336229365768897, "grad_norm": 0.07019664010034184, "learning_rate": 7.518034949633097e-06, "loss": 0.4966, "step": 2688 }, { "epoch": 1.334119399280129, "grad_norm": 0.07454034140674029, "learning_rate": 7.51634602831695e-06, "loss": 0.4781, "step": 2689 }, { "epoch": 1.3346158619833686, "grad_norm": 0.07418990783466832, "learning_rate": 7.514656722417656e-06, "loss": 0.5156, "step": 2690 }, { "epoch": 1.335112324686608, "grad_norm": 0.07262590410613991, "learning_rate": 7.512967032193404e-06, "loss": 0.4759, "step": 2691 }, { "epoch": 1.3356087873898472, "grad_norm": 0.07326886517366066, "learning_rate": 7.511276957902431e-06, "loss": 0.5035, "step": 2692 }, { "epoch": 1.3361052500930868, "grad_norm": 0.07557567814539083, "learning_rate": 7.509586499803042e-06, "loss": 0.5098, "step": 2693 }, { "epoch": 1.3366017127963261, "grad_norm": 0.07269601973403042, "learning_rate": 7.507895658153594e-06, "loss": 0.4628, "step": 2694 }, { "epoch": 1.3370981754995657, "grad_norm": 0.0717655547977608, "learning_rate": 7.5062044332125076e-06, "loss": 0.5035, "step": 2695 }, { "epoch": 1.337594638202805, "grad_norm": 0.06961351252939559, "learning_rate": 7.504512825238255e-06, "loss": 0.47, "step": 2696 }, { "epoch": 1.3380911009060443, "grad_norm": 0.07492553610892763, "learning_rate": 7.502820834489374e-06, "loss": 0.5463, "step": 2697 }, { "epoch": 1.3385875636092839, "grad_norm": 0.07210512256673479, "learning_rate": 7.5011284612244585e-06, "loss": 0.4816, "step": 2698 }, { "epoch": 1.3390840263125232, "grad_norm": 0.0779198103694556, "learning_rate": 7.499435705702161e-06, "loss": 0.462, "step": 2699 }, { "epoch": 1.3395804890157628, "grad_norm": 0.07158341114436462, "learning_rate": 7.497742568181191e-06, "loss": 0.4965, "step": 2700 }, { "epoch": 1.340076951719002, "grad_norm": 0.07158678321680322, "learning_rate": 7.496049048920317e-06, "loss": 0.4917, "step": 2701 }, { "epoch": 1.3405734144222414, "grad_norm": 0.07424116914927884, "learning_rate": 7.494355148178368e-06, "loss": 0.5064, "step": 2702 }, { "epoch": 1.341069877125481, "grad_norm": 0.08002397795499651, "learning_rate": 7.492660866214228e-06, "loss": 0.5164, "step": 2703 }, { "epoch": 1.3415663398287203, "grad_norm": 0.07652500765458362, "learning_rate": 7.490966203286841e-06, "loss": 0.4886, "step": 2704 }, { "epoch": 1.3420628025319599, "grad_norm": 0.075845565077813, "learning_rate": 7.489271159655212e-06, "loss": 0.5047, "step": 2705 }, { "epoch": 1.3425592652351992, "grad_norm": 0.07487393856531731, "learning_rate": 7.4875757355783955e-06, "loss": 0.4791, "step": 2706 }, { "epoch": 1.3430557279384385, "grad_norm": 0.07701656968563915, "learning_rate": 7.485879931315514e-06, "loss": 0.5021, "step": 2707 }, { "epoch": 1.343552190641678, "grad_norm": 0.07312866184878818, "learning_rate": 7.484183747125743e-06, "loss": 0.5169, "step": 2708 }, { "epoch": 1.3440486533449174, "grad_norm": 0.07242385146219911, "learning_rate": 7.482487183268318e-06, "loss": 0.5085, "step": 2709 }, { "epoch": 1.344545116048157, "grad_norm": 0.07506511763062933, "learning_rate": 7.480790240002529e-06, "loss": 0.4956, "step": 2710 }, { "epoch": 1.3450415787513963, "grad_norm": 0.07677366310399905, "learning_rate": 7.4790929175877305e-06, "loss": 0.5031, "step": 2711 }, { "epoch": 1.3455380414546356, "grad_norm": 0.07216912197089925, "learning_rate": 7.477395216283328e-06, "loss": 0.4683, "step": 2712 }, { "epoch": 1.3460345041578752, "grad_norm": 0.07071635918706311, "learning_rate": 7.475697136348787e-06, "loss": 0.4681, "step": 2713 }, { "epoch": 1.3465309668611145, "grad_norm": 0.07775974984507308, "learning_rate": 7.4739986780436345e-06, "loss": 0.5062, "step": 2714 }, { "epoch": 1.347027429564354, "grad_norm": 0.07973251419899774, "learning_rate": 7.472299841627452e-06, "loss": 0.4732, "step": 2715 }, { "epoch": 1.3475238922675934, "grad_norm": 0.0776256133543414, "learning_rate": 7.470600627359879e-06, "loss": 0.4989, "step": 2716 }, { "epoch": 1.3480203549708327, "grad_norm": 0.07218639025971221, "learning_rate": 7.468901035500613e-06, "loss": 0.4695, "step": 2717 }, { "epoch": 1.3485168176740723, "grad_norm": 0.07177156812514981, "learning_rate": 7.46720106630941e-06, "loss": 0.4525, "step": 2718 }, { "epoch": 1.3490132803773116, "grad_norm": 0.0743645465552789, "learning_rate": 7.465500720046082e-06, "loss": 0.4782, "step": 2719 }, { "epoch": 1.3495097430805512, "grad_norm": 0.07670649036827379, "learning_rate": 7.4637999969705e-06, "loss": 0.4898, "step": 2720 }, { "epoch": 1.3500062057837905, "grad_norm": 0.08775144330304403, "learning_rate": 7.462098897342593e-06, "loss": 0.4931, "step": 2721 }, { "epoch": 1.3505026684870298, "grad_norm": 0.07840866502242834, "learning_rate": 7.460397421422346e-06, "loss": 0.5265, "step": 2722 }, { "epoch": 1.3509991311902694, "grad_norm": 0.07196359299149803, "learning_rate": 7.458695569469802e-06, "loss": 0.4771, "step": 2723 }, { "epoch": 1.3514955938935087, "grad_norm": 0.06953071754532851, "learning_rate": 7.456993341745063e-06, "loss": 0.4897, "step": 2724 }, { "epoch": 1.3519920565967483, "grad_norm": 0.07063937728790774, "learning_rate": 7.455290738508288e-06, "loss": 0.4808, "step": 2725 }, { "epoch": 1.3524885192999876, "grad_norm": 0.08088110766023791, "learning_rate": 7.453587760019691e-06, "loss": 0.4938, "step": 2726 }, { "epoch": 1.352984982003227, "grad_norm": 0.0736673081073693, "learning_rate": 7.451884406539545e-06, "loss": 0.4748, "step": 2727 }, { "epoch": 1.3534814447064665, "grad_norm": 0.06947625515604816, "learning_rate": 7.4501806783281785e-06, "loss": 0.4733, "step": 2728 }, { "epoch": 1.3539779074097058, "grad_norm": 0.07218112018283263, "learning_rate": 7.448476575645982e-06, "loss": 0.5189, "step": 2729 }, { "epoch": 1.3544743701129454, "grad_norm": 0.07421666853034066, "learning_rate": 7.446772098753398e-06, "loss": 0.4864, "step": 2730 }, { "epoch": 1.3549708328161847, "grad_norm": 0.0689127465215305, "learning_rate": 7.445067247910931e-06, "loss": 0.4633, "step": 2731 }, { "epoch": 1.355467295519424, "grad_norm": 0.07185351280383354, "learning_rate": 7.44336202337914e-06, "loss": 0.4856, "step": 2732 }, { "epoch": 1.3559637582226636, "grad_norm": 0.07237908595669429, "learning_rate": 7.441656425418639e-06, "loss": 0.4672, "step": 2733 }, { "epoch": 1.356460220925903, "grad_norm": 0.0717292512341406, "learning_rate": 7.439950454290103e-06, "loss": 0.4777, "step": 2734 }, { "epoch": 1.3569566836291425, "grad_norm": 0.07388599914969228, "learning_rate": 7.43824411025426e-06, "loss": 0.4854, "step": 2735 }, { "epoch": 1.3574531463323818, "grad_norm": 0.07748978640369678, "learning_rate": 7.4365373935719e-06, "loss": 0.4898, "step": 2736 }, { "epoch": 1.3579496090356211, "grad_norm": 0.07054638978491355, "learning_rate": 7.434830304503866e-06, "loss": 0.4763, "step": 2737 }, { "epoch": 1.3584460717388607, "grad_norm": 0.07292503890545296, "learning_rate": 7.43312284331106e-06, "loss": 0.5078, "step": 2738 }, { "epoch": 1.3589425344421, "grad_norm": 0.07105075474486573, "learning_rate": 7.431415010254439e-06, "loss": 0.4839, "step": 2739 }, { "epoch": 1.3594389971453396, "grad_norm": 0.07334885127452315, "learning_rate": 7.42970680559502e-06, "loss": 0.5086, "step": 2740 }, { "epoch": 1.3599354598485789, "grad_norm": 0.07527855807145163, "learning_rate": 7.427998229593873e-06, "loss": 0.4987, "step": 2741 }, { "epoch": 1.3604319225518182, "grad_norm": 0.07325157344838848, "learning_rate": 7.426289282512125e-06, "loss": 0.5008, "step": 2742 }, { "epoch": 1.3609283852550578, "grad_norm": 0.07267084590873454, "learning_rate": 7.424579964610963e-06, "loss": 0.4874, "step": 2743 }, { "epoch": 1.361424847958297, "grad_norm": 0.07193623146080504, "learning_rate": 7.422870276151629e-06, "loss": 0.471, "step": 2744 }, { "epoch": 1.3619213106615367, "grad_norm": 0.07179868825468037, "learning_rate": 7.42116021739542e-06, "loss": 0.497, "step": 2745 }, { "epoch": 1.362417773364776, "grad_norm": 0.0734773074878661, "learning_rate": 7.419449788603693e-06, "loss": 0.4774, "step": 2746 }, { "epoch": 1.3629142360680153, "grad_norm": 0.07023057493932133, "learning_rate": 7.417738990037859e-06, "loss": 0.4829, "step": 2747 }, { "epoch": 1.3634106987712549, "grad_norm": 0.07282086501091081, "learning_rate": 7.416027821959387e-06, "loss": 0.4958, "step": 2748 }, { "epoch": 1.3639071614744942, "grad_norm": 0.07089773487569606, "learning_rate": 7.414316284629799e-06, "loss": 0.4971, "step": 2749 }, { "epoch": 1.3644036241777338, "grad_norm": 0.0677423809061766, "learning_rate": 7.412604378310677e-06, "loss": 0.496, "step": 2750 }, { "epoch": 1.364900086880973, "grad_norm": 0.06777700725873004, "learning_rate": 7.4108921032636605e-06, "loss": 0.4729, "step": 2751 }, { "epoch": 1.3653965495842124, "grad_norm": 0.07065655577723433, "learning_rate": 7.409179459750439e-06, "loss": 0.521, "step": 2752 }, { "epoch": 1.365893012287452, "grad_norm": 0.07392407970241713, "learning_rate": 7.407466448032768e-06, "loss": 0.4896, "step": 2753 }, { "epoch": 1.3663894749906913, "grad_norm": 0.06934154352736671, "learning_rate": 7.405753068372451e-06, "loss": 0.4528, "step": 2754 }, { "epoch": 1.3668859376939309, "grad_norm": 0.06887110619053861, "learning_rate": 7.40403932103135e-06, "loss": 0.4637, "step": 2755 }, { "epoch": 1.3673824003971702, "grad_norm": 0.07123362248000452, "learning_rate": 7.402325206271385e-06, "loss": 0.5373, "step": 2756 }, { "epoch": 1.3678788631004095, "grad_norm": 0.07431889053149492, "learning_rate": 7.400610724354531e-06, "loss": 0.4968, "step": 2757 }, { "epoch": 1.368375325803649, "grad_norm": 0.07044786714598965, "learning_rate": 7.398895875542818e-06, "loss": 0.4992, "step": 2758 }, { "epoch": 1.3688717885068884, "grad_norm": 0.08173014827815345, "learning_rate": 7.397180660098334e-06, "loss": 0.5075, "step": 2759 }, { "epoch": 1.369368251210128, "grad_norm": 0.07510902820516983, "learning_rate": 7.395465078283222e-06, "loss": 0.4954, "step": 2760 }, { "epoch": 1.3698647139133673, "grad_norm": 0.0717772886882652, "learning_rate": 7.393749130359681e-06, "loss": 0.5002, "step": 2761 }, { "epoch": 1.3703611766166066, "grad_norm": 0.07860853949103024, "learning_rate": 7.392032816589965e-06, "loss": 0.5241, "step": 2762 }, { "epoch": 1.3708576393198462, "grad_norm": 0.1974487115378719, "learning_rate": 7.390316137236389e-06, "loss": 0.5133, "step": 2763 }, { "epoch": 1.3713541020230855, "grad_norm": 0.07342381822412332, "learning_rate": 7.388599092561315e-06, "loss": 0.5204, "step": 2764 }, { "epoch": 1.371850564726325, "grad_norm": 0.07415428627463654, "learning_rate": 7.38688168282717e-06, "loss": 0.4918, "step": 2765 }, { "epoch": 1.3723470274295644, "grad_norm": 0.07375186245968954, "learning_rate": 7.3851639082964285e-06, "loss": 0.4917, "step": 2766 }, { "epoch": 1.3728434901328037, "grad_norm": 0.07311946684531592, "learning_rate": 7.383445769231628e-06, "loss": 0.54, "step": 2767 }, { "epoch": 1.3733399528360433, "grad_norm": 0.07154186959129635, "learning_rate": 7.381727265895356e-06, "loss": 0.4945, "step": 2768 }, { "epoch": 1.3738364155392826, "grad_norm": 0.07311048103614315, "learning_rate": 7.38000839855026e-06, "loss": 0.5018, "step": 2769 }, { "epoch": 1.3743328782425221, "grad_norm": 0.07076489048383193, "learning_rate": 7.378289167459043e-06, "loss": 0.5027, "step": 2770 }, { "epoch": 1.3748293409457615, "grad_norm": 0.07211214606420607, "learning_rate": 7.376569572884457e-06, "loss": 0.4903, "step": 2771 }, { "epoch": 1.3753258036490008, "grad_norm": 0.07315568940001275, "learning_rate": 7.374849615089318e-06, "loss": 0.497, "step": 2772 }, { "epoch": 1.3758222663522404, "grad_norm": 0.07074353994217372, "learning_rate": 7.373129294336494e-06, "loss": 0.4647, "step": 2773 }, { "epoch": 1.3763187290554797, "grad_norm": 0.07573576492885328, "learning_rate": 7.371408610888907e-06, "loss": 0.4607, "step": 2774 }, { "epoch": 1.3768151917587192, "grad_norm": 0.07374494787897591, "learning_rate": 7.3696875650095355e-06, "loss": 0.474, "step": 2775 }, { "epoch": 1.3773116544619586, "grad_norm": 0.06844979894385408, "learning_rate": 7.367966156961417e-06, "loss": 0.463, "step": 2776 }, { "epoch": 1.377808117165198, "grad_norm": 0.07494696046611961, "learning_rate": 7.366244387007637e-06, "loss": 0.4904, "step": 2777 }, { "epoch": 1.3783045798684375, "grad_norm": 0.07245749426844175, "learning_rate": 7.364522255411342e-06, "loss": 0.4692, "step": 2778 }, { "epoch": 1.3788010425716768, "grad_norm": 0.07166509196102683, "learning_rate": 7.362799762435733e-06, "loss": 0.4576, "step": 2779 }, { "epoch": 1.3792975052749163, "grad_norm": 0.0692664814713353, "learning_rate": 7.361076908344066e-06, "loss": 0.481, "step": 2780 }, { "epoch": 1.3797939679781557, "grad_norm": 0.07418340449606887, "learning_rate": 7.359353693399651e-06, "loss": 0.4919, "step": 2781 }, { "epoch": 1.380290430681395, "grad_norm": 0.07669373600032768, "learning_rate": 7.357630117865852e-06, "loss": 0.5127, "step": 2782 }, { "epoch": 1.3807868933846346, "grad_norm": 0.07323914105177598, "learning_rate": 7.355906182006091e-06, "loss": 0.4794, "step": 2783 }, { "epoch": 1.381283356087874, "grad_norm": 0.07130416980727011, "learning_rate": 7.354181886083843e-06, "loss": 0.5066, "step": 2784 }, { "epoch": 1.3817798187911134, "grad_norm": 0.07414140959170529, "learning_rate": 7.3524572303626415e-06, "loss": 0.481, "step": 2785 }, { "epoch": 1.3822762814943528, "grad_norm": 0.07282023842599468, "learning_rate": 7.3507322151060725e-06, "loss": 0.5111, "step": 2786 }, { "epoch": 1.382772744197592, "grad_norm": 0.07371876509619173, "learning_rate": 7.3490068405777736e-06, "loss": 0.4518, "step": 2787 }, { "epoch": 1.3832692069008314, "grad_norm": 0.07242717641607478, "learning_rate": 7.347281107041443e-06, "loss": 0.4797, "step": 2788 }, { "epoch": 1.383765669604071, "grad_norm": 0.0745013084369651, "learning_rate": 7.345555014760832e-06, "loss": 0.5479, "step": 2789 }, { "epoch": 1.3842621323073105, "grad_norm": 0.07221130133499926, "learning_rate": 7.343828563999744e-06, "loss": 0.4875, "step": 2790 }, { "epoch": 1.3847585950105499, "grad_norm": 0.07154952038380806, "learning_rate": 7.342101755022041e-06, "loss": 0.4759, "step": 2791 }, { "epoch": 1.3852550577137892, "grad_norm": 0.07185399247769826, "learning_rate": 7.340374588091638e-06, "loss": 0.4831, "step": 2792 }, { "epoch": 1.3857515204170285, "grad_norm": 0.07022152259713238, "learning_rate": 7.338647063472503e-06, "loss": 0.4607, "step": 2793 }, { "epoch": 1.386247983120268, "grad_norm": 0.07530734274608732, "learning_rate": 7.336919181428661e-06, "loss": 0.4538, "step": 2794 }, { "epoch": 1.3867444458235076, "grad_norm": 0.0800273980847199, "learning_rate": 7.335190942224193e-06, "loss": 0.5545, "step": 2795 }, { "epoch": 1.387240908526747, "grad_norm": 0.07118403274784829, "learning_rate": 7.333462346123232e-06, "loss": 0.51, "step": 2796 }, { "epoch": 1.3877373712299863, "grad_norm": 0.07267712142071889, "learning_rate": 7.331733393389965e-06, "loss": 0.4748, "step": 2797 }, { "epoch": 1.3882338339332256, "grad_norm": 0.07444760909833327, "learning_rate": 7.330004084288636e-06, "loss": 0.5047, "step": 2798 }, { "epoch": 1.3887302966364652, "grad_norm": 0.07464387036798184, "learning_rate": 7.328274419083541e-06, "loss": 0.5287, "step": 2799 }, { "epoch": 1.3892267593397047, "grad_norm": 0.07055343310815258, "learning_rate": 7.326544398039032e-06, "loss": 0.4614, "step": 2800 }, { "epoch": 1.389723222042944, "grad_norm": 0.07357320411155402, "learning_rate": 7.324814021419514e-06, "loss": 0.4733, "step": 2801 }, { "epoch": 1.3902196847461834, "grad_norm": 0.06994120383038073, "learning_rate": 7.32308328948945e-06, "loss": 0.4627, "step": 2802 }, { "epoch": 1.3907161474494227, "grad_norm": 0.07092734310261352, "learning_rate": 7.321352202513352e-06, "loss": 0.4506, "step": 2803 }, { "epoch": 1.3912126101526623, "grad_norm": 0.0725892321942883, "learning_rate": 7.31962076075579e-06, "loss": 0.4783, "step": 2804 }, { "epoch": 1.3917090728559016, "grad_norm": 0.07270086524200547, "learning_rate": 7.3178889644813875e-06, "loss": 0.5169, "step": 2805 }, { "epoch": 1.3922055355591412, "grad_norm": 0.0710477813020673, "learning_rate": 7.316156813954821e-06, "loss": 0.4634, "step": 2806 }, { "epoch": 1.3927019982623805, "grad_norm": 0.0737346523243725, "learning_rate": 7.314424309440822e-06, "loss": 0.5189, "step": 2807 }, { "epoch": 1.3931984609656198, "grad_norm": 0.07447635620495711, "learning_rate": 7.312691451204178e-06, "loss": 0.496, "step": 2808 }, { "epoch": 1.3936949236688594, "grad_norm": 0.07035201190670005, "learning_rate": 7.310958239509725e-06, "loss": 0.477, "step": 2809 }, { "epoch": 1.3941913863720987, "grad_norm": 0.07253712416056099, "learning_rate": 7.309224674622358e-06, "loss": 0.4931, "step": 2810 }, { "epoch": 1.3946878490753383, "grad_norm": 0.07026343239920593, "learning_rate": 7.3074907568070266e-06, "loss": 0.4995, "step": 2811 }, { "epoch": 1.3951843117785776, "grad_norm": 0.07375095563668935, "learning_rate": 7.3057564863287304e-06, "loss": 0.4753, "step": 2812 }, { "epoch": 1.395680774481817, "grad_norm": 0.0723516046829901, "learning_rate": 7.304021863452525e-06, "loss": 0.4861, "step": 2813 }, { "epoch": 1.3961772371850565, "grad_norm": 0.07226116513343771, "learning_rate": 7.30228688844352e-06, "loss": 0.5004, "step": 2814 }, { "epoch": 1.3966736998882958, "grad_norm": 0.07093942414377284, "learning_rate": 7.3005515615668785e-06, "loss": 0.5041, "step": 2815 }, { "epoch": 1.3971701625915354, "grad_norm": 0.07556771300420517, "learning_rate": 7.2988158830878174e-06, "loss": 0.5133, "step": 2816 }, { "epoch": 1.3976666252947747, "grad_norm": 0.07498104781592332, "learning_rate": 7.297079853271607e-06, "loss": 0.508, "step": 2817 }, { "epoch": 1.398163087998014, "grad_norm": 0.07281928634469736, "learning_rate": 7.295343472383573e-06, "loss": 0.4827, "step": 2818 }, { "epoch": 1.3986595507012536, "grad_norm": 0.07150001155960911, "learning_rate": 7.293606740689091e-06, "loss": 0.4767, "step": 2819 }, { "epoch": 1.399156013404493, "grad_norm": 0.0696624964643031, "learning_rate": 7.291869658453594e-06, "loss": 0.4941, "step": 2820 }, { "epoch": 1.3996524761077325, "grad_norm": 0.07346208294226515, "learning_rate": 7.2901322259425675e-06, "loss": 0.5148, "step": 2821 }, { "epoch": 1.4001489388109718, "grad_norm": 0.07387505696804075, "learning_rate": 7.28839444342155e-06, "loss": 0.5054, "step": 2822 }, { "epoch": 1.4006454015142111, "grad_norm": 0.07202026689241921, "learning_rate": 7.286656311156133e-06, "loss": 0.5098, "step": 2823 }, { "epoch": 1.4011418642174507, "grad_norm": 0.07254707901648703, "learning_rate": 7.2849178294119635e-06, "loss": 0.4801, "step": 2824 }, { "epoch": 1.40163832692069, "grad_norm": 0.07426847450752543, "learning_rate": 7.283178998454738e-06, "loss": 0.5112, "step": 2825 }, { "epoch": 1.4021347896239296, "grad_norm": 0.0742033526393686, "learning_rate": 7.281439818550211e-06, "loss": 0.5388, "step": 2826 }, { "epoch": 1.402631252327169, "grad_norm": 0.07310257455356693, "learning_rate": 7.279700289964187e-06, "loss": 0.4672, "step": 2827 }, { "epoch": 1.4031277150304082, "grad_norm": 0.07575871999901848, "learning_rate": 7.277960412962528e-06, "loss": 0.4735, "step": 2828 }, { "epoch": 1.4036241777336478, "grad_norm": 0.07061926156970885, "learning_rate": 7.276220187811144e-06, "loss": 0.5155, "step": 2829 }, { "epoch": 1.4041206404368871, "grad_norm": 0.0745984279597669, "learning_rate": 7.274479614776001e-06, "loss": 0.4973, "step": 2830 }, { "epoch": 1.4046171031401267, "grad_norm": 0.07292084635183517, "learning_rate": 7.272738694123116e-06, "loss": 0.5167, "step": 2831 }, { "epoch": 1.405113565843366, "grad_norm": 0.07291076477353618, "learning_rate": 7.270997426118563e-06, "loss": 0.4759, "step": 2832 }, { "epoch": 1.4056100285466053, "grad_norm": 0.06936979630980541, "learning_rate": 7.269255811028464e-06, "loss": 0.4838, "step": 2833 }, { "epoch": 1.4061064912498449, "grad_norm": 0.06729679362346919, "learning_rate": 7.267513849119001e-06, "loss": 0.466, "step": 2834 }, { "epoch": 1.4066029539530842, "grad_norm": 0.07515873936232569, "learning_rate": 7.265771540656404e-06, "loss": 0.5145, "step": 2835 }, { "epoch": 1.4070994166563238, "grad_norm": 0.07335281716005654, "learning_rate": 7.264028885906953e-06, "loss": 0.5136, "step": 2836 }, { "epoch": 1.407595879359563, "grad_norm": 0.07390288119336746, "learning_rate": 7.26228588513699e-06, "loss": 0.4892, "step": 2837 }, { "epoch": 1.4080923420628024, "grad_norm": 0.07085400460434918, "learning_rate": 7.260542538612902e-06, "loss": 0.4625, "step": 2838 }, { "epoch": 1.408588804766042, "grad_norm": 0.07153303266929528, "learning_rate": 7.258798846601132e-06, "loss": 0.4791, "step": 2839 }, { "epoch": 1.4090852674692813, "grad_norm": 0.07431484763570721, "learning_rate": 7.257054809368176e-06, "loss": 0.4823, "step": 2840 }, { "epoch": 1.4095817301725209, "grad_norm": 0.07242693206760291, "learning_rate": 7.255310427180579e-06, "loss": 0.4911, "step": 2841 }, { "epoch": 1.4100781928757602, "grad_norm": 0.06913102542680617, "learning_rate": 7.253565700304946e-06, "loss": 0.4698, "step": 2842 }, { "epoch": 1.4105746555789995, "grad_norm": 0.07432955588039054, "learning_rate": 7.25182062900793e-06, "loss": 0.4929, "step": 2843 }, { "epoch": 1.411071118282239, "grad_norm": 0.07030319072588502, "learning_rate": 7.250075213556234e-06, "loss": 0.5069, "step": 2844 }, { "epoch": 1.4115675809854784, "grad_norm": 0.06783577787923013, "learning_rate": 7.24832945421662e-06, "loss": 0.4538, "step": 2845 }, { "epoch": 1.412064043688718, "grad_norm": 0.07226793652217142, "learning_rate": 7.246583351255899e-06, "loss": 0.4924, "step": 2846 }, { "epoch": 1.4125605063919573, "grad_norm": 0.07379520096028509, "learning_rate": 7.244836904940933e-06, "loss": 0.4852, "step": 2847 }, { "epoch": 1.4130569690951966, "grad_norm": 0.0696803836747072, "learning_rate": 7.243090115538639e-06, "loss": 0.4686, "step": 2848 }, { "epoch": 1.4135534317984362, "grad_norm": 0.07439574493493696, "learning_rate": 7.241342983315985e-06, "loss": 0.517, "step": 2849 }, { "epoch": 1.4140498945016755, "grad_norm": 0.07341222404947037, "learning_rate": 7.239595508539995e-06, "loss": 0.4851, "step": 2850 }, { "epoch": 1.414546357204915, "grad_norm": 0.07253343281665837, "learning_rate": 7.237847691477741e-06, "loss": 0.4916, "step": 2851 }, { "epoch": 1.4150428199081544, "grad_norm": 0.07444987740405767, "learning_rate": 7.236099532396347e-06, "loss": 0.4789, "step": 2852 }, { "epoch": 1.4155392826113937, "grad_norm": 0.07095562862050957, "learning_rate": 7.234351031562994e-06, "loss": 0.4731, "step": 2853 }, { "epoch": 1.4160357453146333, "grad_norm": 0.07390168113853861, "learning_rate": 7.2326021892449105e-06, "loss": 0.4769, "step": 2854 }, { "epoch": 1.4165322080178726, "grad_norm": 0.07134364630264015, "learning_rate": 7.230853005709378e-06, "loss": 0.4674, "step": 2855 }, { "epoch": 1.4170286707211122, "grad_norm": 0.07607452559927425, "learning_rate": 7.229103481223735e-06, "loss": 0.4917, "step": 2856 }, { "epoch": 1.4175251334243515, "grad_norm": 0.07022996173204776, "learning_rate": 7.227353616055364e-06, "loss": 0.4808, "step": 2857 }, { "epoch": 1.4180215961275908, "grad_norm": 0.06904514897261274, "learning_rate": 7.225603410471707e-06, "loss": 0.5062, "step": 2858 }, { "epoch": 1.4185180588308304, "grad_norm": 0.07173966517539798, "learning_rate": 7.223852864740251e-06, "loss": 0.4777, "step": 2859 }, { "epoch": 1.4190145215340697, "grad_norm": 0.07105840102585351, "learning_rate": 7.222101979128544e-06, "loss": 0.4838, "step": 2860 }, { "epoch": 1.4195109842373093, "grad_norm": 0.0693002905559142, "learning_rate": 7.220350753904177e-06, "loss": 0.4807, "step": 2861 }, { "epoch": 1.4200074469405486, "grad_norm": 0.07366922326583022, "learning_rate": 7.218599189334799e-06, "loss": 0.4974, "step": 2862 }, { "epoch": 1.420503909643788, "grad_norm": 0.07105657624613147, "learning_rate": 7.216847285688106e-06, "loss": 0.495, "step": 2863 }, { "epoch": 1.4210003723470275, "grad_norm": 0.0759918025493373, "learning_rate": 7.215095043231852e-06, "loss": 0.5141, "step": 2864 }, { "epoch": 1.4214968350502668, "grad_norm": 0.07233086192834333, "learning_rate": 7.213342462233835e-06, "loss": 0.508, "step": 2865 }, { "epoch": 1.4219932977535064, "grad_norm": 0.07490086058497883, "learning_rate": 7.211589542961911e-06, "loss": 0.5053, "step": 2866 }, { "epoch": 1.4224897604567457, "grad_norm": 0.0710248696653551, "learning_rate": 7.209836285683987e-06, "loss": 0.4889, "step": 2867 }, { "epoch": 1.422986223159985, "grad_norm": 0.07134268814850037, "learning_rate": 7.208082690668017e-06, "loss": 0.4672, "step": 2868 }, { "epoch": 1.4234826858632246, "grad_norm": 0.0724334537858711, "learning_rate": 7.206328758182013e-06, "loss": 0.4637, "step": 2869 }, { "epoch": 1.423979148566464, "grad_norm": 0.07165662584752473, "learning_rate": 7.204574488494034e-06, "loss": 0.473, "step": 2870 }, { "epoch": 1.4244756112697035, "grad_norm": 0.07574200923260407, "learning_rate": 7.202819881872191e-06, "loss": 0.5067, "step": 2871 }, { "epoch": 1.4249720739729428, "grad_norm": 0.07262595793489623, "learning_rate": 7.2010649385846484e-06, "loss": 0.4653, "step": 2872 }, { "epoch": 1.4254685366761821, "grad_norm": 0.07190810770150632, "learning_rate": 7.199309658899623e-06, "loss": 0.5003, "step": 2873 }, { "epoch": 1.4259649993794217, "grad_norm": 0.07146006958831883, "learning_rate": 7.197554043085378e-06, "loss": 0.477, "step": 2874 }, { "epoch": 1.426461462082661, "grad_norm": 0.07287315250247191, "learning_rate": 7.195798091410233e-06, "loss": 0.4694, "step": 2875 }, { "epoch": 1.4269579247859006, "grad_norm": 0.07145354823728287, "learning_rate": 7.194041804142556e-06, "loss": 0.5067, "step": 2876 }, { "epoch": 1.4274543874891399, "grad_norm": 0.07374432502560274, "learning_rate": 7.19228518155077e-06, "loss": 0.5045, "step": 2877 }, { "epoch": 1.4279508501923792, "grad_norm": 0.06905942432712643, "learning_rate": 7.190528223903345e-06, "loss": 0.4945, "step": 2878 }, { "epoch": 1.4284473128956188, "grad_norm": 0.07235972820329792, "learning_rate": 7.188770931468802e-06, "loss": 0.4949, "step": 2879 }, { "epoch": 1.428943775598858, "grad_norm": 0.07379607473437916, "learning_rate": 7.187013304515715e-06, "loss": 0.4942, "step": 2880 }, { "epoch": 1.4294402383020977, "grad_norm": 0.07148160710339348, "learning_rate": 7.185255343312712e-06, "loss": 0.4643, "step": 2881 }, { "epoch": 1.429936701005337, "grad_norm": 0.07115263600446331, "learning_rate": 7.183497048128467e-06, "loss": 0.4849, "step": 2882 }, { "epoch": 1.4304331637085763, "grad_norm": 0.07313485288182806, "learning_rate": 7.181738419231708e-06, "loss": 0.4904, "step": 2883 }, { "epoch": 1.4309296264118159, "grad_norm": 0.07403245647540267, "learning_rate": 7.179979456891214e-06, "loss": 0.5176, "step": 2884 }, { "epoch": 1.4314260891150552, "grad_norm": 0.07430889143593936, "learning_rate": 7.178220161375814e-06, "loss": 0.4958, "step": 2885 }, { "epoch": 1.4319225518182948, "grad_norm": 0.07146626429946829, "learning_rate": 7.176460532954386e-06, "loss": 0.4735, "step": 2886 }, { "epoch": 1.432419014521534, "grad_norm": 0.07133541636516295, "learning_rate": 7.174700571895863e-06, "loss": 0.4905, "step": 2887 }, { "epoch": 1.4329154772247734, "grad_norm": 0.07064549164282272, "learning_rate": 7.172940278469225e-06, "loss": 0.483, "step": 2888 }, { "epoch": 1.433411939928013, "grad_norm": 0.0718501679320543, "learning_rate": 7.171179652943507e-06, "loss": 0.4555, "step": 2889 }, { "epoch": 1.4339084026312523, "grad_norm": 0.07331695386537057, "learning_rate": 7.169418695587791e-06, "loss": 0.4912, "step": 2890 }, { "epoch": 1.4344048653344919, "grad_norm": 0.07150200496801334, "learning_rate": 7.167657406671212e-06, "loss": 0.4946, "step": 2891 }, { "epoch": 1.4349013280377312, "grad_norm": 0.06955567648220769, "learning_rate": 7.165895786462953e-06, "loss": 0.4716, "step": 2892 }, { "epoch": 1.4353977907409705, "grad_norm": 0.07185399643784783, "learning_rate": 7.164133835232252e-06, "loss": 0.491, "step": 2893 }, { "epoch": 1.43589425344421, "grad_norm": 0.0705794713571087, "learning_rate": 7.162371553248393e-06, "loss": 0.474, "step": 2894 }, { "epoch": 1.4363907161474494, "grad_norm": 0.07619159211445736, "learning_rate": 7.160608940780713e-06, "loss": 0.4653, "step": 2895 }, { "epoch": 1.436887178850689, "grad_norm": 0.07272819639781909, "learning_rate": 7.158845998098598e-06, "loss": 0.478, "step": 2896 }, { "epoch": 1.4373836415539283, "grad_norm": 0.07353706245344895, "learning_rate": 7.157082725471488e-06, "loss": 0.5007, "step": 2897 }, { "epoch": 1.4378801042571676, "grad_norm": 0.0710932004339634, "learning_rate": 7.155319123168869e-06, "loss": 0.5049, "step": 2898 }, { "epoch": 1.4383765669604072, "grad_norm": 0.07036816763446126, "learning_rate": 7.1535551914602804e-06, "loss": 0.5002, "step": 2899 }, { "epoch": 1.4388730296636465, "grad_norm": 0.07323068489466551, "learning_rate": 7.15179093061531e-06, "loss": 0.4988, "step": 2900 }, { "epoch": 1.439369492366886, "grad_norm": 0.07103226205608117, "learning_rate": 7.150026340903597e-06, "loss": 0.4955, "step": 2901 }, { "epoch": 1.4398659550701254, "grad_norm": 0.07179736174595919, "learning_rate": 7.148261422594832e-06, "loss": 0.4768, "step": 2902 }, { "epoch": 1.4403624177733647, "grad_norm": 0.07171588950765849, "learning_rate": 7.146496175958753e-06, "loss": 0.4781, "step": 2903 }, { "epoch": 1.4408588804766043, "grad_norm": 0.07386979687028543, "learning_rate": 7.144730601265148e-06, "loss": 0.4838, "step": 2904 }, { "epoch": 1.4413553431798436, "grad_norm": 0.07078290659161332, "learning_rate": 7.142964698783861e-06, "loss": 0.4747, "step": 2905 }, { "epoch": 1.4418518058830831, "grad_norm": 0.07123642642593622, "learning_rate": 7.141198468784778e-06, "loss": 0.4782, "step": 2906 }, { "epoch": 1.4423482685863225, "grad_norm": 0.07365790707179352, "learning_rate": 7.139431911537842e-06, "loss": 0.4699, "step": 2907 }, { "epoch": 1.4428447312895618, "grad_norm": 0.07225394246849842, "learning_rate": 7.13766502731304e-06, "loss": 0.5103, "step": 2908 }, { "epoch": 1.4433411939928014, "grad_norm": 0.07235066461489845, "learning_rate": 7.135897816380415e-06, "loss": 0.5005, "step": 2909 }, { "epoch": 1.4438376566960407, "grad_norm": 0.07100236241952648, "learning_rate": 7.1341302790100546e-06, "loss": 0.478, "step": 2910 }, { "epoch": 1.4443341193992802, "grad_norm": 0.0699430966374579, "learning_rate": 7.132362415472099e-06, "loss": 0.5328, "step": 2911 }, { "epoch": 1.4448305821025196, "grad_norm": 0.07467905849188111, "learning_rate": 7.130594226036739e-06, "loss": 0.5144, "step": 2912 }, { "epoch": 1.445327044805759, "grad_norm": 0.06929574174293675, "learning_rate": 7.128825710974212e-06, "loss": 0.4824, "step": 2913 }, { "epoch": 1.4458235075089985, "grad_norm": 0.07491153198596696, "learning_rate": 7.127056870554807e-06, "loss": 0.5402, "step": 2914 }, { "epoch": 1.4463199702122378, "grad_norm": 0.07041488508688841, "learning_rate": 7.125287705048867e-06, "loss": 0.4973, "step": 2915 }, { "epoch": 1.4468164329154773, "grad_norm": 0.07148072216701858, "learning_rate": 7.123518214726775e-06, "loss": 0.5064, "step": 2916 }, { "epoch": 1.4473128956187167, "grad_norm": 0.07176842480825346, "learning_rate": 7.121748399858974e-06, "loss": 0.504, "step": 2917 }, { "epoch": 1.447809358321956, "grad_norm": 0.07080899273892569, "learning_rate": 7.1199782607159494e-06, "loss": 0.4491, "step": 2918 }, { "epoch": 1.4483058210251956, "grad_norm": 0.07295946872020888, "learning_rate": 7.118207797568238e-06, "loss": 0.5054, "step": 2919 }, { "epoch": 1.448802283728435, "grad_norm": 0.07196455584447783, "learning_rate": 7.116437010686427e-06, "loss": 0.4746, "step": 2920 }, { "epoch": 1.4492987464316744, "grad_norm": 0.0711831036893069, "learning_rate": 7.1146659003411554e-06, "loss": 0.5072, "step": 2921 }, { "epoch": 1.4497952091349138, "grad_norm": 0.07047858314978178, "learning_rate": 7.112894466803106e-06, "loss": 0.4919, "step": 2922 }, { "epoch": 1.450291671838153, "grad_norm": 0.07333316027850086, "learning_rate": 7.1111227103430145e-06, "loss": 0.5077, "step": 2923 }, { "epoch": 1.4507881345413927, "grad_norm": 0.07565813630229153, "learning_rate": 7.109350631231666e-06, "loss": 0.4875, "step": 2924 }, { "epoch": 1.451284597244632, "grad_norm": 0.07422857378436408, "learning_rate": 7.107578229739895e-06, "loss": 0.5018, "step": 2925 }, { "epoch": 1.4517810599478715, "grad_norm": 0.07004713204598993, "learning_rate": 7.105805506138586e-06, "loss": 0.4845, "step": 2926 }, { "epoch": 1.4522775226511109, "grad_norm": 0.07332153238042952, "learning_rate": 7.104032460698668e-06, "loss": 0.5136, "step": 2927 }, { "epoch": 1.4527739853543502, "grad_norm": 0.07400446058276221, "learning_rate": 7.102259093691122e-06, "loss": 0.4801, "step": 2928 }, { "epoch": 1.4532704480575895, "grad_norm": 0.0774114970002744, "learning_rate": 7.100485405386982e-06, "loss": 0.4807, "step": 2929 }, { "epoch": 1.453766910760829, "grad_norm": 0.0734254664750455, "learning_rate": 7.098711396057326e-06, "loss": 0.4963, "step": 2930 }, { "epoch": 1.4542633734640686, "grad_norm": 0.0694875881388645, "learning_rate": 7.096937065973285e-06, "loss": 0.4768, "step": 2931 }, { "epoch": 1.454759836167308, "grad_norm": 0.07529738097359195, "learning_rate": 7.095162415406034e-06, "loss": 0.5126, "step": 2932 }, { "epoch": 1.4552562988705473, "grad_norm": 0.07667108396414482, "learning_rate": 7.093387444626801e-06, "loss": 0.4962, "step": 2933 }, { "epoch": 1.4557527615737866, "grad_norm": 0.07234479864658316, "learning_rate": 7.0916121539068635e-06, "loss": 0.4739, "step": 2934 }, { "epoch": 1.4562492242770262, "grad_norm": 0.07242903221297743, "learning_rate": 7.0898365435175435e-06, "loss": 0.515, "step": 2935 }, { "epoch": 1.4567456869802657, "grad_norm": 0.07507384079677762, "learning_rate": 7.088060613730215e-06, "loss": 0.5275, "step": 2936 }, { "epoch": 1.457242149683505, "grad_norm": 0.07458928181042776, "learning_rate": 7.0862843648163024e-06, "loss": 0.5047, "step": 2937 }, { "epoch": 1.4577386123867444, "grad_norm": 0.07596881392589007, "learning_rate": 7.084507797047276e-06, "loss": 0.5599, "step": 2938 }, { "epoch": 1.4582350750899837, "grad_norm": 0.07243129312968741, "learning_rate": 7.082730910694655e-06, "loss": 0.4748, "step": 2939 }, { "epoch": 1.4587315377932233, "grad_norm": 0.07248717359550946, "learning_rate": 7.080953706030007e-06, "loss": 0.5158, "step": 2940 }, { "epoch": 1.4592280004964628, "grad_norm": 0.0731657949003018, "learning_rate": 7.079176183324952e-06, "loss": 0.4821, "step": 2941 }, { "epoch": 1.4597244631997022, "grad_norm": 0.07581383406250891, "learning_rate": 7.077398342851155e-06, "loss": 0.5324, "step": 2942 }, { "epoch": 1.4602209259029415, "grad_norm": 0.06881955653615653, "learning_rate": 7.07562018488033e-06, "loss": 0.4593, "step": 2943 }, { "epoch": 1.4607173886061808, "grad_norm": 0.07432003151885896, "learning_rate": 7.073841709684238e-06, "loss": 0.5089, "step": 2944 }, { "epoch": 1.4612138513094204, "grad_norm": 0.06822754389937877, "learning_rate": 7.072062917534693e-06, "loss": 0.5, "step": 2945 }, { "epoch": 1.4617103140126597, "grad_norm": 0.0747311313862388, "learning_rate": 7.070283808703553e-06, "loss": 0.5058, "step": 2946 }, { "epoch": 1.4622067767158993, "grad_norm": 0.07102663976968088, "learning_rate": 7.068504383462729e-06, "loss": 0.4682, "step": 2947 }, { "epoch": 1.4627032394191386, "grad_norm": 0.07725801668567019, "learning_rate": 7.0667246420841754e-06, "loss": 0.5274, "step": 2948 }, { "epoch": 1.463199702122378, "grad_norm": 0.07117806160550469, "learning_rate": 7.064944584839898e-06, "loss": 0.5052, "step": 2949 }, { "epoch": 1.4636961648256175, "grad_norm": 0.07271174891546413, "learning_rate": 7.06316421200195e-06, "loss": 0.4849, "step": 2950 }, { "epoch": 1.4641926275288568, "grad_norm": 0.07361228563816642, "learning_rate": 7.061383523842431e-06, "loss": 0.4672, "step": 2951 }, { "epoch": 1.4646890902320964, "grad_norm": 0.07345309673612148, "learning_rate": 7.0596025206334925e-06, "loss": 0.4988, "step": 2952 }, { "epoch": 1.4651855529353357, "grad_norm": 0.0750803661826231, "learning_rate": 7.057821202647332e-06, "loss": 0.4906, "step": 2953 }, { "epoch": 1.465682015638575, "grad_norm": 0.0725746788290917, "learning_rate": 7.056039570156197e-06, "loss": 0.491, "step": 2954 }, { "epoch": 1.4661784783418146, "grad_norm": 0.07240507467234208, "learning_rate": 7.054257623432378e-06, "loss": 0.5088, "step": 2955 }, { "epoch": 1.466674941045054, "grad_norm": 0.07216315731512568, "learning_rate": 7.052475362748219e-06, "loss": 0.4697, "step": 2956 }, { "epoch": 1.4671714037482935, "grad_norm": 0.07076545303447647, "learning_rate": 7.05069278837611e-06, "loss": 0.4697, "step": 2957 }, { "epoch": 1.4676678664515328, "grad_norm": 0.07005482997181905, "learning_rate": 7.048909900588488e-06, "loss": 0.4614, "step": 2958 }, { "epoch": 1.4681643291547721, "grad_norm": 0.07570364017599765, "learning_rate": 7.047126699657842e-06, "loss": 0.5188, "step": 2959 }, { "epoch": 1.4686607918580117, "grad_norm": 0.07325286216717752, "learning_rate": 7.045343185856701e-06, "loss": 0.4743, "step": 2960 }, { "epoch": 1.469157254561251, "grad_norm": 0.07542132612527033, "learning_rate": 7.043559359457648e-06, "loss": 0.5081, "step": 2961 }, { "epoch": 1.4696537172644906, "grad_norm": 0.10995884031213667, "learning_rate": 7.041775220733313e-06, "loss": 0.4853, "step": 2962 }, { "epoch": 1.47015017996773, "grad_norm": 0.07255210762978338, "learning_rate": 7.039990769956374e-06, "loss": 0.4904, "step": 2963 }, { "epoch": 1.4706466426709692, "grad_norm": 0.07431973502861508, "learning_rate": 7.038206007399555e-06, "loss": 0.5069, "step": 2964 }, { "epoch": 1.4711431053742088, "grad_norm": 0.07192623705516626, "learning_rate": 7.036420933335627e-06, "loss": 0.4764, "step": 2965 }, { "epoch": 1.4716395680774481, "grad_norm": 0.07182207582734508, "learning_rate": 7.034635548037412e-06, "loss": 0.4933, "step": 2966 }, { "epoch": 1.4721360307806877, "grad_norm": 0.07525017370629115, "learning_rate": 7.032849851777774e-06, "loss": 0.4918, "step": 2967 }, { "epoch": 1.472632493483927, "grad_norm": 0.07277899449762383, "learning_rate": 7.031063844829632e-06, "loss": 0.4659, "step": 2968 }, { "epoch": 1.4731289561871663, "grad_norm": 0.0699788593870598, "learning_rate": 7.029277527465948e-06, "loss": 0.4704, "step": 2969 }, { "epoch": 1.4736254188904059, "grad_norm": 0.07548125236861504, "learning_rate": 7.027490899959729e-06, "loss": 0.5407, "step": 2970 }, { "epoch": 1.4741218815936452, "grad_norm": 0.07361918602319495, "learning_rate": 7.025703962584035e-06, "loss": 0.5167, "step": 2971 }, { "epoch": 1.4746183442968848, "grad_norm": 0.07164684601867893, "learning_rate": 7.023916715611969e-06, "loss": 0.476, "step": 2972 }, { "epoch": 1.475114807000124, "grad_norm": 0.07342930896696674, "learning_rate": 7.022129159316685e-06, "loss": 0.4862, "step": 2973 }, { "epoch": 1.4756112697033634, "grad_norm": 0.07448008055751498, "learning_rate": 7.020341293971383e-06, "loss": 0.5077, "step": 2974 }, { "epoch": 1.476107732406603, "grad_norm": 0.07590357611172563, "learning_rate": 7.018553119849306e-06, "loss": 0.4977, "step": 2975 }, { "epoch": 1.4766041951098423, "grad_norm": 0.07000464301210924, "learning_rate": 7.0167646372237495e-06, "loss": 0.4695, "step": 2976 }, { "epoch": 1.4771006578130819, "grad_norm": 0.06744354825036339, "learning_rate": 7.014975846368055e-06, "loss": 0.4525, "step": 2977 }, { "epoch": 1.4775971205163212, "grad_norm": 0.07206037360333768, "learning_rate": 7.013186747555611e-06, "loss": 0.4837, "step": 2978 }, { "epoch": 1.4780935832195605, "grad_norm": 0.07395975276717683, "learning_rate": 7.01139734105985e-06, "loss": 0.4924, "step": 2979 }, { "epoch": 1.4785900459228, "grad_norm": 0.07113804829814217, "learning_rate": 7.009607627154257e-06, "loss": 0.4946, "step": 2980 }, { "epoch": 1.4790865086260394, "grad_norm": 0.07233617024386177, "learning_rate": 7.0078176061123595e-06, "loss": 0.5144, "step": 2981 }, { "epoch": 1.479582971329279, "grad_norm": 0.07350380568403078, "learning_rate": 7.006027278207734e-06, "loss": 0.4951, "step": 2982 }, { "epoch": 1.4800794340325183, "grad_norm": 0.07026993393268165, "learning_rate": 7.004236643714002e-06, "loss": 0.4697, "step": 2983 }, { "epoch": 1.4805758967357576, "grad_norm": 0.07222164558693017, "learning_rate": 7.002445702904835e-06, "loss": 0.4953, "step": 2984 }, { "epoch": 1.4810723594389972, "grad_norm": 0.07143971798306151, "learning_rate": 7.000654456053949e-06, "loss": 0.4687, "step": 2985 }, { "epoch": 1.4815688221422365, "grad_norm": 0.07459913209718101, "learning_rate": 6.998862903435109e-06, "loss": 0.4708, "step": 2986 }, { "epoch": 1.482065284845476, "grad_norm": 0.07352362619317326, "learning_rate": 6.997071045322123e-06, "loss": 0.5225, "step": 2987 }, { "epoch": 1.4825617475487154, "grad_norm": 0.07326362477169654, "learning_rate": 6.995278881988847e-06, "loss": 0.4651, "step": 2988 }, { "epoch": 1.4830582102519547, "grad_norm": 0.07617706210167993, "learning_rate": 6.993486413709187e-06, "loss": 0.4713, "step": 2989 }, { "epoch": 1.4835546729551943, "grad_norm": 0.07272746269808053, "learning_rate": 6.991693640757091e-06, "loss": 0.5036, "step": 2990 }, { "epoch": 1.4840511356584336, "grad_norm": 0.0735407652235623, "learning_rate": 6.989900563406557e-06, "loss": 0.5171, "step": 2991 }, { "epoch": 1.4845475983616732, "grad_norm": 0.07481908248468541, "learning_rate": 6.988107181931627e-06, "loss": 0.4865, "step": 2992 }, { "epoch": 1.4850440610649125, "grad_norm": 0.07569078793936909, "learning_rate": 6.986313496606392e-06, "loss": 0.5161, "step": 2993 }, { "epoch": 1.4855405237681518, "grad_norm": 0.06977321199854734, "learning_rate": 6.984519507704985e-06, "loss": 0.5026, "step": 2994 }, { "epoch": 1.4860369864713914, "grad_norm": 0.07160834128804278, "learning_rate": 6.982725215501592e-06, "loss": 0.4607, "step": 2995 }, { "epoch": 1.4865334491746307, "grad_norm": 0.07161162488316418, "learning_rate": 6.980930620270441e-06, "loss": 0.518, "step": 2996 }, { "epoch": 1.4870299118778703, "grad_norm": 0.07373692206462576, "learning_rate": 6.9791357222858054e-06, "loss": 0.4838, "step": 2997 }, { "epoch": 1.4875263745811096, "grad_norm": 0.07201153669770845, "learning_rate": 6.977340521822009e-06, "loss": 0.4788, "step": 2998 }, { "epoch": 1.488022837284349, "grad_norm": 0.07630818774232069, "learning_rate": 6.975545019153418e-06, "loss": 0.4986, "step": 2999 }, { "epoch": 1.4885192999875885, "grad_norm": 0.07161586866226113, "learning_rate": 6.973749214554445e-06, "loss": 0.4932, "step": 3000 }, { "epoch": 1.4890157626908278, "grad_norm": 0.07103890527803151, "learning_rate": 6.9719531082995516e-06, "loss": 0.4894, "step": 3001 }, { "epoch": 1.4895122253940674, "grad_norm": 0.070312542505285, "learning_rate": 6.970156700663244e-06, "loss": 0.481, "step": 3002 }, { "epoch": 1.4900086880973067, "grad_norm": 0.07497732442687259, "learning_rate": 6.968359991920073e-06, "loss": 0.5113, "step": 3003 }, { "epoch": 1.490505150800546, "grad_norm": 0.07381885890576345, "learning_rate": 6.9665629823446375e-06, "loss": 0.4943, "step": 3004 }, { "epoch": 1.4910016135037856, "grad_norm": 0.07282606616955749, "learning_rate": 6.964765672211582e-06, "loss": 0.4979, "step": 3005 }, { "epoch": 1.491498076207025, "grad_norm": 0.07314271604221277, "learning_rate": 6.962968061795596e-06, "loss": 0.564, "step": 3006 }, { "epoch": 1.4919945389102645, "grad_norm": 0.07348280758443781, "learning_rate": 6.9611701513714165e-06, "loss": 0.5347, "step": 3007 }, { "epoch": 1.4924910016135038, "grad_norm": 0.07602272659194373, "learning_rate": 6.959371941213824e-06, "loss": 0.4952, "step": 3008 }, { "epoch": 1.4929874643167431, "grad_norm": 0.07447850718939307, "learning_rate": 6.957573431597646e-06, "loss": 0.519, "step": 3009 }, { "epoch": 1.4934839270199827, "grad_norm": 0.06810881816124986, "learning_rate": 6.955774622797755e-06, "loss": 0.4765, "step": 3010 }, { "epoch": 1.493980389723222, "grad_norm": 0.07211306225922738, "learning_rate": 6.953975515089073e-06, "loss": 0.4819, "step": 3011 }, { "epoch": 1.4944768524264616, "grad_norm": 0.07325489823289166, "learning_rate": 6.952176108746563e-06, "loss": 0.4665, "step": 3012 }, { "epoch": 1.4949733151297009, "grad_norm": 0.07668839966145699, "learning_rate": 6.950376404045235e-06, "loss": 0.5321, "step": 3013 }, { "epoch": 1.4954697778329402, "grad_norm": 0.06965813102342724, "learning_rate": 6.948576401260147e-06, "loss": 0.456, "step": 3014 }, { "epoch": 1.4959662405361798, "grad_norm": 0.0714214738156806, "learning_rate": 6.946776100666397e-06, "loss": 0.4824, "step": 3015 }, { "epoch": 1.496462703239419, "grad_norm": 0.0728874617313902, "learning_rate": 6.9449755025391355e-06, "loss": 0.4896, "step": 3016 }, { "epoch": 1.4969591659426587, "grad_norm": 0.07702335377688295, "learning_rate": 6.943174607153553e-06, "loss": 0.4881, "step": 3017 }, { "epoch": 1.497455628645898, "grad_norm": 0.07239817990267242, "learning_rate": 6.941373414784889e-06, "loss": 0.4867, "step": 3018 }, { "epoch": 1.4979520913491373, "grad_norm": 0.07323119244865894, "learning_rate": 6.939571925708426e-06, "loss": 0.493, "step": 3019 }, { "epoch": 1.4984485540523769, "grad_norm": 0.07165300199457361, "learning_rate": 6.937770140199491e-06, "loss": 0.4982, "step": 3020 }, { "epoch": 1.4989450167556162, "grad_norm": 0.06964794952545811, "learning_rate": 6.935968058533462e-06, "loss": 0.4833, "step": 3021 }, { "epoch": 1.4994414794588558, "grad_norm": 0.07611127268165498, "learning_rate": 6.934165680985756e-06, "loss": 0.5043, "step": 3022 }, { "epoch": 1.499937942162095, "grad_norm": 0.07052388962092157, "learning_rate": 6.932363007831837e-06, "loss": 0.4756, "step": 3023 }, { "epoch": 1.5004344048653344, "grad_norm": 0.06976733179848893, "learning_rate": 6.930560039347216e-06, "loss": 0.477, "step": 3024 }, { "epoch": 1.5004344048653344, "eval_loss": 0.5206817984580994, "eval_runtime": 259.3872, "eval_samples_per_second": 117.018, "eval_steps_per_second": 14.631, "step": 3024 }, { "epoch": 1.5009308675685737, "grad_norm": 0.07903128042684261, "learning_rate": 6.928756775807447e-06, "loss": 0.4959, "step": 3025 }, { "epoch": 1.5014273302718133, "grad_norm": 0.08126205695713004, "learning_rate": 6.926953217488129e-06, "loss": 0.5213, "step": 3026 }, { "epoch": 1.5019237929750529, "grad_norm": 0.07352870292325349, "learning_rate": 6.925149364664909e-06, "loss": 0.4891, "step": 3027 }, { "epoch": 1.5024202556782922, "grad_norm": 0.07176814755640172, "learning_rate": 6.923345217613477e-06, "loss": 0.5191, "step": 3028 }, { "epoch": 1.5029167183815315, "grad_norm": 0.07282967027183829, "learning_rate": 6.921540776609564e-06, "loss": 0.4886, "step": 3029 }, { "epoch": 1.5034131810847708, "grad_norm": 0.07341157454941727, "learning_rate": 6.919736041928956e-06, "loss": 0.4969, "step": 3030 }, { "epoch": 1.5039096437880104, "grad_norm": 0.07174957878205697, "learning_rate": 6.9179310138474734e-06, "loss": 0.5151, "step": 3031 }, { "epoch": 1.50440610649125, "grad_norm": 0.07186055210570085, "learning_rate": 6.916125692640987e-06, "loss": 0.4882, "step": 3032 }, { "epoch": 1.5049025691944893, "grad_norm": 0.06818602296286949, "learning_rate": 6.91432007858541e-06, "loss": 0.4667, "step": 3033 }, { "epoch": 1.5053990318977286, "grad_norm": 0.07199100368580628, "learning_rate": 6.912514171956704e-06, "loss": 0.5049, "step": 3034 }, { "epoch": 1.505895494600968, "grad_norm": 0.07260935212384795, "learning_rate": 6.9107079730308724e-06, "loss": 0.4931, "step": 3035 }, { "epoch": 1.5063919573042075, "grad_norm": 0.0727951239071628, "learning_rate": 6.908901482083961e-06, "loss": 0.5121, "step": 3036 }, { "epoch": 1.506888420007447, "grad_norm": 0.07189572060344285, "learning_rate": 6.907094699392066e-06, "loss": 0.4765, "step": 3037 }, { "epoch": 1.5073848827106864, "grad_norm": 0.07077554594688146, "learning_rate": 6.905287625231325e-06, "loss": 0.4861, "step": 3038 }, { "epoch": 1.5078813454139257, "grad_norm": 0.07032031769025249, "learning_rate": 6.90348025987792e-06, "loss": 0.4684, "step": 3039 }, { "epoch": 1.508377808117165, "grad_norm": 0.07212837688355783, "learning_rate": 6.901672603608076e-06, "loss": 0.493, "step": 3040 }, { "epoch": 1.5088742708204046, "grad_norm": 0.07459184873506873, "learning_rate": 6.899864656698066e-06, "loss": 0.4786, "step": 3041 }, { "epoch": 1.5093707335236441, "grad_norm": 0.0695658996301349, "learning_rate": 6.898056419424204e-06, "loss": 0.4674, "step": 3042 }, { "epoch": 1.5098671962268835, "grad_norm": 0.07066812767150565, "learning_rate": 6.8962478920628505e-06, "loss": 0.4729, "step": 3043 }, { "epoch": 1.5103636589301228, "grad_norm": 0.07346041759217889, "learning_rate": 6.894439074890413e-06, "loss": 0.5077, "step": 3044 }, { "epoch": 1.5108601216333621, "grad_norm": 0.07619677857322961, "learning_rate": 6.892629968183338e-06, "loss": 0.513, "step": 3045 }, { "epoch": 1.5113565843366017, "grad_norm": 0.07663086607755477, "learning_rate": 6.890820572218118e-06, "loss": 0.4849, "step": 3046 }, { "epoch": 1.5118530470398412, "grad_norm": 0.07527384356601459, "learning_rate": 6.88901088727129e-06, "loss": 0.488, "step": 3047 }, { "epoch": 1.5123495097430806, "grad_norm": 0.07613931254967439, "learning_rate": 6.887200913619435e-06, "loss": 0.5246, "step": 3048 }, { "epoch": 1.51284597244632, "grad_norm": 0.07351039591100084, "learning_rate": 6.885390651539181e-06, "loss": 0.4986, "step": 3049 }, { "epoch": 1.5133424351495592, "grad_norm": 0.07283507380458602, "learning_rate": 6.883580101307195e-06, "loss": 0.4696, "step": 3050 }, { "epoch": 1.5138388978527988, "grad_norm": 0.07057028032237944, "learning_rate": 6.881769263200192e-06, "loss": 0.4747, "step": 3051 }, { "epoch": 1.5143353605560383, "grad_norm": 0.07179846588719133, "learning_rate": 6.8799581374949276e-06, "loss": 0.4866, "step": 3052 }, { "epoch": 1.5148318232592777, "grad_norm": 0.07496502673400987, "learning_rate": 6.878146724468205e-06, "loss": 0.5174, "step": 3053 }, { "epoch": 1.515328285962517, "grad_norm": 0.07430870063782538, "learning_rate": 6.876335024396872e-06, "loss": 0.5139, "step": 3054 }, { "epoch": 1.5158247486657563, "grad_norm": 0.07581456093878954, "learning_rate": 6.874523037557812e-06, "loss": 0.4792, "step": 3055 }, { "epoch": 1.516321211368996, "grad_norm": 0.07193302206420817, "learning_rate": 6.8727107642279645e-06, "loss": 0.4771, "step": 3056 }, { "epoch": 1.5168176740722354, "grad_norm": 0.07423534271844814, "learning_rate": 6.8708982046843005e-06, "loss": 0.4922, "step": 3057 }, { "epoch": 1.5173141367754748, "grad_norm": 0.06944464704893721, "learning_rate": 6.869085359203844e-06, "loss": 0.4767, "step": 3058 }, { "epoch": 1.517810599478714, "grad_norm": 0.07422967148546548, "learning_rate": 6.8672722280636595e-06, "loss": 0.4891, "step": 3059 }, { "epoch": 1.5183070621819534, "grad_norm": 0.07764248517174992, "learning_rate": 6.865458811540854e-06, "loss": 0.5052, "step": 3060 }, { "epoch": 1.518803524885193, "grad_norm": 0.07449831386708612, "learning_rate": 6.863645109912581e-06, "loss": 0.4694, "step": 3061 }, { "epoch": 1.5192999875884325, "grad_norm": 0.06960098049635088, "learning_rate": 6.861831123456033e-06, "loss": 0.4961, "step": 3062 }, { "epoch": 1.5197964502916719, "grad_norm": 0.0726928648817254, "learning_rate": 6.86001685244845e-06, "loss": 0.4877, "step": 3063 }, { "epoch": 1.5202929129949112, "grad_norm": 0.07271711349404424, "learning_rate": 6.858202297167114e-06, "loss": 0.4965, "step": 3064 }, { "epoch": 1.5207893756981505, "grad_norm": 0.07172467737507537, "learning_rate": 6.8563874578893505e-06, "loss": 0.4776, "step": 3065 }, { "epoch": 1.52128583840139, "grad_norm": 0.06994923402408372, "learning_rate": 6.854572334892531e-06, "loss": 0.4631, "step": 3066 }, { "epoch": 1.5217823011046296, "grad_norm": 0.07151187949027787, "learning_rate": 6.852756928454064e-06, "loss": 0.4661, "step": 3067 }, { "epoch": 1.522278763807869, "grad_norm": 0.07336019831993339, "learning_rate": 6.850941238851408e-06, "loss": 0.4841, "step": 3068 }, { "epoch": 1.5227752265111083, "grad_norm": 0.07386078183875347, "learning_rate": 6.84912526636206e-06, "loss": 0.523, "step": 3069 }, { "epoch": 1.5232716892143476, "grad_norm": 0.07181896557591305, "learning_rate": 6.8473090112635656e-06, "loss": 0.4877, "step": 3070 }, { "epoch": 1.5237681519175872, "grad_norm": 0.07203431208701555, "learning_rate": 6.845492473833506e-06, "loss": 0.4741, "step": 3071 }, { "epoch": 1.5242646146208267, "grad_norm": 0.07472847969899919, "learning_rate": 6.843675654349513e-06, "loss": 0.4788, "step": 3072 }, { "epoch": 1.524761077324066, "grad_norm": 0.07256373704505083, "learning_rate": 6.841858553089258e-06, "loss": 0.4665, "step": 3073 }, { "epoch": 1.5252575400273054, "grad_norm": 0.0722750199855971, "learning_rate": 6.840041170330454e-06, "loss": 0.4601, "step": 3074 }, { "epoch": 1.5257540027305447, "grad_norm": 0.0721037848913084, "learning_rate": 6.838223506350859e-06, "loss": 0.4798, "step": 3075 }, { "epoch": 1.5262504654337843, "grad_norm": 0.07215571549842134, "learning_rate": 6.836405561428276e-06, "loss": 0.4738, "step": 3076 }, { "epoch": 1.5267469281370238, "grad_norm": 0.07342767459463316, "learning_rate": 6.834587335840549e-06, "loss": 0.5264, "step": 3077 }, { "epoch": 1.5272433908402632, "grad_norm": 0.0707351475232259, "learning_rate": 6.832768829865561e-06, "loss": 0.4981, "step": 3078 }, { "epoch": 1.5277398535435025, "grad_norm": 0.07421547342728028, "learning_rate": 6.830950043781245e-06, "loss": 0.5003, "step": 3079 }, { "epoch": 1.5282363162467418, "grad_norm": 0.07731539811606819, "learning_rate": 6.82913097786557e-06, "loss": 0.4683, "step": 3080 }, { "epoch": 1.5287327789499814, "grad_norm": 0.06928467278023695, "learning_rate": 6.827311632396553e-06, "loss": 0.4626, "step": 3081 }, { "epoch": 1.529229241653221, "grad_norm": 0.0745738000242575, "learning_rate": 6.825492007652255e-06, "loss": 0.5103, "step": 3082 }, { "epoch": 1.5297257043564603, "grad_norm": 0.07364417846410318, "learning_rate": 6.82367210391077e-06, "loss": 0.4731, "step": 3083 }, { "epoch": 1.5302221670596996, "grad_norm": 0.07028000247402512, "learning_rate": 6.821851921450246e-06, "loss": 0.4717, "step": 3084 }, { "epoch": 1.530718629762939, "grad_norm": 0.07445663844660433, "learning_rate": 6.820031460548865e-06, "loss": 0.4728, "step": 3085 }, { "epoch": 1.5312150924661785, "grad_norm": 0.07642475577058762, "learning_rate": 6.818210721484859e-06, "loss": 0.4866, "step": 3086 }, { "epoch": 1.531711555169418, "grad_norm": 0.07272701730211181, "learning_rate": 6.816389704536496e-06, "loss": 0.5144, "step": 3087 }, { "epoch": 1.5322080178726574, "grad_norm": 0.07226212774546704, "learning_rate": 6.8145684099820915e-06, "loss": 0.4789, "step": 3088 }, { "epoch": 1.5327044805758967, "grad_norm": 0.06972510732857357, "learning_rate": 6.812746838099998e-06, "loss": 0.4781, "step": 3089 }, { "epoch": 1.533200943279136, "grad_norm": 0.07047457761812748, "learning_rate": 6.8109249891686165e-06, "loss": 0.4789, "step": 3090 }, { "epoch": 1.5336974059823756, "grad_norm": 0.07024127145574385, "learning_rate": 6.809102863466386e-06, "loss": 0.471, "step": 3091 }, { "epoch": 1.5341938686856151, "grad_norm": 0.07162497294503184, "learning_rate": 6.80728046127179e-06, "loss": 0.5013, "step": 3092 }, { "epoch": 1.5346903313888545, "grad_norm": 0.07229883941244337, "learning_rate": 6.805457782863354e-06, "loss": 0.497, "step": 3093 }, { "epoch": 1.5351867940920938, "grad_norm": 0.07135143607274454, "learning_rate": 6.803634828519643e-06, "loss": 0.4552, "step": 3094 }, { "epoch": 1.5356832567953331, "grad_norm": 0.0664211340599298, "learning_rate": 6.801811598519268e-06, "loss": 0.4231, "step": 3095 }, { "epoch": 1.5361797194985727, "grad_norm": 0.07023830081055071, "learning_rate": 6.799988093140879e-06, "loss": 0.489, "step": 3096 }, { "epoch": 1.5366761822018122, "grad_norm": 0.07314527625696632, "learning_rate": 6.7981643126631714e-06, "loss": 0.5141, "step": 3097 }, { "epoch": 1.5371726449050516, "grad_norm": 0.07361069625191462, "learning_rate": 6.796340257364879e-06, "loss": 0.5258, "step": 3098 }, { "epoch": 1.537669107608291, "grad_norm": 0.07198866226971654, "learning_rate": 6.794515927524783e-06, "loss": 0.4824, "step": 3099 }, { "epoch": 1.5381655703115302, "grad_norm": 0.0690803990888876, "learning_rate": 6.792691323421698e-06, "loss": 0.4608, "step": 3100 }, { "epoch": 1.5386620330147698, "grad_norm": 0.07486582879406296, "learning_rate": 6.790866445334489e-06, "loss": 0.5164, "step": 3101 }, { "epoch": 1.5391584957180093, "grad_norm": 0.07599038459310221, "learning_rate": 6.789041293542058e-06, "loss": 0.516, "step": 3102 }, { "epoch": 1.5396549584212487, "grad_norm": 0.07342375419107655, "learning_rate": 6.787215868323349e-06, "loss": 0.4985, "step": 3103 }, { "epoch": 1.540151421124488, "grad_norm": 0.07336496676278476, "learning_rate": 6.785390169957354e-06, "loss": 0.4882, "step": 3104 }, { "epoch": 1.5406478838277273, "grad_norm": 0.0733944918690652, "learning_rate": 6.783564198723094e-06, "loss": 0.4793, "step": 3105 }, { "epoch": 1.5411443465309669, "grad_norm": 0.07278949180379689, "learning_rate": 6.781737954899644e-06, "loss": 0.5187, "step": 3106 }, { "epoch": 1.5416408092342064, "grad_norm": 0.06996250280626114, "learning_rate": 6.779911438766117e-06, "loss": 0.4756, "step": 3107 }, { "epoch": 1.5421372719374458, "grad_norm": 0.07095598603026688, "learning_rate": 6.778084650601664e-06, "loss": 0.508, "step": 3108 }, { "epoch": 1.542633734640685, "grad_norm": 0.07131546994283615, "learning_rate": 6.776257590685485e-06, "loss": 0.4602, "step": 3109 }, { "epoch": 1.5431301973439244, "grad_norm": 0.07280569367230365, "learning_rate": 6.77443025929681e-06, "loss": 0.5203, "step": 3110 }, { "epoch": 1.543626660047164, "grad_norm": 0.0707173933711939, "learning_rate": 6.772602656714922e-06, "loss": 0.4857, "step": 3111 }, { "epoch": 1.5441231227504035, "grad_norm": 0.07197803101114433, "learning_rate": 6.770774783219139e-06, "loss": 0.4693, "step": 3112 }, { "epoch": 1.5446195854536429, "grad_norm": 0.07114587955675691, "learning_rate": 6.7689466390888216e-06, "loss": 0.4832, "step": 3113 }, { "epoch": 1.5451160481568822, "grad_norm": 0.07396760660848735, "learning_rate": 6.767118224603374e-06, "loss": 0.4828, "step": 3114 }, { "epoch": 1.5456125108601215, "grad_norm": 0.07517982329949065, "learning_rate": 6.76528954004224e-06, "loss": 0.4974, "step": 3115 }, { "epoch": 1.546108973563361, "grad_norm": 0.07043798379945213, "learning_rate": 6.763460585684903e-06, "loss": 0.4972, "step": 3116 }, { "epoch": 1.5466054362666006, "grad_norm": 0.07254963724358023, "learning_rate": 6.761631361810892e-06, "loss": 0.4786, "step": 3117 }, { "epoch": 1.54710189896984, "grad_norm": 0.07611923909342257, "learning_rate": 6.7598018686997725e-06, "loss": 0.537, "step": 3118 }, { "epoch": 1.5475983616730793, "grad_norm": 0.07540310011286495, "learning_rate": 6.757972106631153e-06, "loss": 0.4943, "step": 3119 }, { "epoch": 1.5480948243763186, "grad_norm": 0.07568744832809565, "learning_rate": 6.756142075884685e-06, "loss": 0.4948, "step": 3120 }, { "epoch": 1.5485912870795582, "grad_norm": 0.07178159273392977, "learning_rate": 6.754311776740057e-06, "loss": 0.493, "step": 3121 }, { "epoch": 1.5490877497827977, "grad_norm": 0.07210617256147947, "learning_rate": 6.7524812094770024e-06, "loss": 0.4569, "step": 3122 }, { "epoch": 1.549584212486037, "grad_norm": 0.07661066086921471, "learning_rate": 6.750650374375293e-06, "loss": 0.5068, "step": 3123 }, { "epoch": 1.5500806751892764, "grad_norm": 0.07394053303684799, "learning_rate": 6.748819271714745e-06, "loss": 0.5012, "step": 3124 }, { "epoch": 1.5505771378925157, "grad_norm": 0.07531830271554768, "learning_rate": 6.746987901775211e-06, "loss": 0.5163, "step": 3125 }, { "epoch": 1.5510736005957553, "grad_norm": 0.07362018369668626, "learning_rate": 6.745156264836589e-06, "loss": 0.4705, "step": 3126 }, { "epoch": 1.5515700632989948, "grad_norm": 0.07498203965559165, "learning_rate": 6.743324361178811e-06, "loss": 0.5239, "step": 3127 }, { "epoch": 1.5520665260022342, "grad_norm": 0.07425384522239706, "learning_rate": 6.741492191081856e-06, "loss": 0.4918, "step": 3128 }, { "epoch": 1.5525629887054735, "grad_norm": 0.07268237609705687, "learning_rate": 6.739659754825742e-06, "loss": 0.5026, "step": 3129 }, { "epoch": 1.5530594514087128, "grad_norm": 0.0743248504811937, "learning_rate": 6.73782705269053e-06, "loss": 0.4999, "step": 3130 }, { "epoch": 1.5535559141119524, "grad_norm": 0.07084637700551116, "learning_rate": 6.735994084956317e-06, "loss": 0.4823, "step": 3131 }, { "epoch": 1.5540523768151917, "grad_norm": 0.07331080609658902, "learning_rate": 6.734160851903241e-06, "loss": 0.4629, "step": 3132 }, { "epoch": 1.5545488395184313, "grad_norm": 0.07280582900730688, "learning_rate": 6.732327353811484e-06, "loss": 0.4908, "step": 3133 }, { "epoch": 1.5550453022216706, "grad_norm": 0.07356499549409012, "learning_rate": 6.730493590961268e-06, "loss": 0.5275, "step": 3134 }, { "epoch": 1.55554176492491, "grad_norm": 0.06992613113899178, "learning_rate": 6.728659563632853e-06, "loss": 0.4796, "step": 3135 }, { "epoch": 1.5560382276281495, "grad_norm": 0.07211726923050378, "learning_rate": 6.726825272106539e-06, "loss": 0.5181, "step": 3136 }, { "epoch": 1.5565346903313888, "grad_norm": 0.07333092531161171, "learning_rate": 6.724990716662672e-06, "loss": 0.4988, "step": 3137 }, { "epoch": 1.5570311530346284, "grad_norm": 0.07593080180263549, "learning_rate": 6.7231558975816294e-06, "loss": 0.5345, "step": 3138 }, { "epoch": 1.5575276157378677, "grad_norm": 0.06944979950702822, "learning_rate": 6.721320815143837e-06, "loss": 0.4673, "step": 3139 }, { "epoch": 1.558024078441107, "grad_norm": 0.07427402559622737, "learning_rate": 6.719485469629758e-06, "loss": 0.4742, "step": 3140 }, { "epoch": 1.5585205411443466, "grad_norm": 0.07355680254283553, "learning_rate": 6.717649861319896e-06, "loss": 0.4671, "step": 3141 }, { "epoch": 1.559017003847586, "grad_norm": 0.07307892379944664, "learning_rate": 6.715813990494793e-06, "loss": 0.4799, "step": 3142 }, { "epoch": 1.5595134665508255, "grad_norm": 0.06942389465400658, "learning_rate": 6.713977857435031e-06, "loss": 0.4788, "step": 3143 }, { "epoch": 1.5600099292540648, "grad_norm": 0.07329210200157485, "learning_rate": 6.712141462421236e-06, "loss": 0.4861, "step": 3144 }, { "epoch": 1.5605063919573041, "grad_norm": 0.07106259480596694, "learning_rate": 6.7103048057340696e-06, "loss": 0.5108, "step": 3145 }, { "epoch": 1.5610028546605437, "grad_norm": 0.07588428796875119, "learning_rate": 6.708467887654237e-06, "loss": 0.5474, "step": 3146 }, { "epoch": 1.561499317363783, "grad_norm": 0.07137860313281386, "learning_rate": 6.70663070846248e-06, "loss": 0.4952, "step": 3147 }, { "epoch": 1.5619957800670226, "grad_norm": 0.06988977725921999, "learning_rate": 6.704793268439584e-06, "loss": 0.4708, "step": 3148 }, { "epoch": 1.5624922427702619, "grad_norm": 0.0709502308910497, "learning_rate": 6.702955567866372e-06, "loss": 0.5429, "step": 3149 }, { "epoch": 1.5629887054735012, "grad_norm": 0.07302469676059627, "learning_rate": 6.7011176070237035e-06, "loss": 0.4873, "step": 3150 }, { "epoch": 1.5634851681767408, "grad_norm": 0.07060128527275819, "learning_rate": 6.699279386192487e-06, "loss": 0.4626, "step": 3151 }, { "epoch": 1.56398163087998, "grad_norm": 0.07539679176181197, "learning_rate": 6.6974409056536605e-06, "loss": 0.4675, "step": 3152 }, { "epoch": 1.5644780935832197, "grad_norm": 0.0745636123342702, "learning_rate": 6.69560216568821e-06, "loss": 0.481, "step": 3153 }, { "epoch": 1.564974556286459, "grad_norm": 0.06906599990319741, "learning_rate": 6.6937631665771545e-06, "loss": 0.4497, "step": 3154 }, { "epoch": 1.5654710189896983, "grad_norm": 0.07462061344881958, "learning_rate": 6.6919239086015545e-06, "loss": 0.4897, "step": 3155 }, { "epoch": 1.5659674816929379, "grad_norm": 0.07077635712710982, "learning_rate": 6.690084392042514e-06, "loss": 0.5235, "step": 3156 }, { "epoch": 1.5664639443961772, "grad_norm": 0.06944489107858984, "learning_rate": 6.6882446171811746e-06, "loss": 0.4841, "step": 3157 }, { "epoch": 1.5669604070994168, "grad_norm": 0.07012427338846192, "learning_rate": 6.686404584298711e-06, "loss": 0.4871, "step": 3158 }, { "epoch": 1.567456869802656, "grad_norm": 0.07176612922444559, "learning_rate": 6.68456429367635e-06, "loss": 0.4732, "step": 3159 }, { "epoch": 1.5679533325058954, "grad_norm": 0.07298056998831044, "learning_rate": 6.6827237455953435e-06, "loss": 0.5041, "step": 3160 }, { "epoch": 1.568449795209135, "grad_norm": 0.06987812192112296, "learning_rate": 6.680882940336993e-06, "loss": 0.4722, "step": 3161 }, { "epoch": 1.5689462579123743, "grad_norm": 0.0760172434224406, "learning_rate": 6.679041878182637e-06, "loss": 0.5077, "step": 3162 }, { "epoch": 1.5694427206156139, "grad_norm": 0.06996288342494414, "learning_rate": 6.677200559413652e-06, "loss": 0.4952, "step": 3163 }, { "epoch": 1.5699391833188532, "grad_norm": 0.06879675843507983, "learning_rate": 6.675358984311453e-06, "loss": 0.4839, "step": 3164 }, { "epoch": 1.5704356460220925, "grad_norm": 0.0720529922914632, "learning_rate": 6.673517153157495e-06, "loss": 0.4694, "step": 3165 }, { "epoch": 1.5709321087253318, "grad_norm": 0.0728027783553152, "learning_rate": 6.671675066233273e-06, "loss": 0.511, "step": 3166 }, { "epoch": 1.5714285714285714, "grad_norm": 0.07316311296453923, "learning_rate": 6.669832723820321e-06, "loss": 0.4949, "step": 3167 }, { "epoch": 1.571925034131811, "grad_norm": 0.0780852417204941, "learning_rate": 6.66799012620021e-06, "loss": 0.4751, "step": 3168 }, { "epoch": 1.5724214968350503, "grad_norm": 0.07366023643229092, "learning_rate": 6.666147273654554e-06, "loss": 0.4773, "step": 3169 }, { "epoch": 1.5729179595382896, "grad_norm": 0.07005105873310478, "learning_rate": 6.664304166465e-06, "loss": 0.4912, "step": 3170 }, { "epoch": 1.573414422241529, "grad_norm": 0.07330296651903939, "learning_rate": 6.66246080491324e-06, "loss": 0.4769, "step": 3171 }, { "epoch": 1.5739108849447685, "grad_norm": 0.07135761963420995, "learning_rate": 6.660617189281001e-06, "loss": 0.506, "step": 3172 }, { "epoch": 1.574407347648008, "grad_norm": 0.07246079314389194, "learning_rate": 6.65877331985005e-06, "loss": 0.5013, "step": 3173 }, { "epoch": 1.5749038103512474, "grad_norm": 0.07514102098587554, "learning_rate": 6.656929196902195e-06, "loss": 0.4871, "step": 3174 }, { "epoch": 1.5754002730544867, "grad_norm": 0.07291536990626205, "learning_rate": 6.65508482071928e-06, "loss": 0.4911, "step": 3175 }, { "epoch": 1.575896735757726, "grad_norm": 0.07250201842234996, "learning_rate": 6.6532401915831855e-06, "loss": 0.4773, "step": 3176 }, { "epoch": 1.5763931984609656, "grad_norm": 0.07149374200149058, "learning_rate": 6.651395309775837e-06, "loss": 0.4966, "step": 3177 }, { "epoch": 1.5768896611642051, "grad_norm": 0.07309712040914346, "learning_rate": 6.649550175579191e-06, "loss": 0.4702, "step": 3178 }, { "epoch": 1.5773861238674445, "grad_norm": 0.07264584874883896, "learning_rate": 6.647704789275251e-06, "loss": 0.4663, "step": 3179 }, { "epoch": 1.5778825865706838, "grad_norm": 0.07569991453689745, "learning_rate": 6.645859151146052e-06, "loss": 0.4978, "step": 3180 }, { "epoch": 1.5783790492739231, "grad_norm": 0.0703518000533615, "learning_rate": 6.644013261473672e-06, "loss": 0.4821, "step": 3181 }, { "epoch": 1.5788755119771627, "grad_norm": 0.07370794045437885, "learning_rate": 6.642167120540224e-06, "loss": 0.5049, "step": 3182 }, { "epoch": 1.5793719746804022, "grad_norm": 0.07207862109622967, "learning_rate": 6.640320728627863e-06, "loss": 0.5071, "step": 3183 }, { "epoch": 1.5798684373836416, "grad_norm": 0.07270111894209261, "learning_rate": 6.638474086018778e-06, "loss": 0.4872, "step": 3184 }, { "epoch": 1.580364900086881, "grad_norm": 0.07115484824977912, "learning_rate": 6.636627192995201e-06, "loss": 0.4982, "step": 3185 }, { "epoch": 1.5808613627901202, "grad_norm": 0.07401912439597283, "learning_rate": 6.634780049839397e-06, "loss": 0.5041, "step": 3186 }, { "epoch": 1.5813578254933598, "grad_norm": 0.07265465103238414, "learning_rate": 6.6329326568336764e-06, "loss": 0.4624, "step": 3187 }, { "epoch": 1.5818542881965993, "grad_norm": 0.07348558258067905, "learning_rate": 6.631085014260379e-06, "loss": 0.4777, "step": 3188 }, { "epoch": 1.5823507508998387, "grad_norm": 0.06853816265650765, "learning_rate": 6.629237122401891e-06, "loss": 0.4736, "step": 3189 }, { "epoch": 1.582847213603078, "grad_norm": 0.07117687394840508, "learning_rate": 6.627388981540632e-06, "loss": 0.4864, "step": 3190 }, { "epoch": 1.5833436763063173, "grad_norm": 0.07322973094031512, "learning_rate": 6.62554059195906e-06, "loss": 0.4937, "step": 3191 }, { "epoch": 1.583840139009557, "grad_norm": 0.07230162933491331, "learning_rate": 6.623691953939672e-06, "loss": 0.503, "step": 3192 }, { "epoch": 1.5843366017127964, "grad_norm": 0.08004626806270065, "learning_rate": 6.6218430677650015e-06, "loss": 0.5251, "step": 3193 }, { "epoch": 1.5848330644160358, "grad_norm": 0.07451252608556116, "learning_rate": 6.619993933717623e-06, "loss": 0.5029, "step": 3194 }, { "epoch": 1.585329527119275, "grad_norm": 0.07152721415325579, "learning_rate": 6.618144552080148e-06, "loss": 0.5249, "step": 3195 }, { "epoch": 1.5858259898225144, "grad_norm": 0.06830156338501069, "learning_rate": 6.616294923135221e-06, "loss": 0.4986, "step": 3196 }, { "epoch": 1.586322452525754, "grad_norm": 0.07340013043109206, "learning_rate": 6.614445047165532e-06, "loss": 0.4847, "step": 3197 }, { "epoch": 1.5868189152289935, "grad_norm": 0.07484398073094696, "learning_rate": 6.612594924453801e-06, "loss": 0.5109, "step": 3198 }, { "epoch": 1.5873153779322329, "grad_norm": 0.07501950325032929, "learning_rate": 6.6107445552827955e-06, "loss": 0.4683, "step": 3199 }, { "epoch": 1.5878118406354722, "grad_norm": 0.07061001694221351, "learning_rate": 6.608893939935308e-06, "loss": 0.4614, "step": 3200 }, { "epoch": 1.5883083033387115, "grad_norm": 0.07201468221091423, "learning_rate": 6.607043078694179e-06, "loss": 0.4681, "step": 3201 }, { "epoch": 1.588804766041951, "grad_norm": 0.07102035784420759, "learning_rate": 6.605191971842283e-06, "loss": 0.4838, "step": 3202 }, { "epoch": 1.5893012287451906, "grad_norm": 0.07356265885096674, "learning_rate": 6.6033406196625306e-06, "loss": 0.5241, "step": 3203 }, { "epoch": 1.58979769144843, "grad_norm": 0.07013443767166846, "learning_rate": 6.6014890224378724e-06, "loss": 0.4752, "step": 3204 }, { "epoch": 1.5902941541516693, "grad_norm": 0.07009450309863739, "learning_rate": 6.599637180451295e-06, "loss": 0.4941, "step": 3205 }, { "epoch": 1.5907906168549086, "grad_norm": 0.07345078430314567, "learning_rate": 6.597785093985824e-06, "loss": 0.5187, "step": 3206 }, { "epoch": 1.5912870795581482, "grad_norm": 0.07212784017604812, "learning_rate": 6.59593276332452e-06, "loss": 0.4917, "step": 3207 }, { "epoch": 1.5917835422613877, "grad_norm": 0.07108259073549884, "learning_rate": 6.59408018875048e-06, "loss": 0.4793, "step": 3208 }, { "epoch": 1.592280004964627, "grad_norm": 0.0715048074956627, "learning_rate": 6.592227370546843e-06, "loss": 0.5008, "step": 3209 }, { "epoch": 1.5927764676678664, "grad_norm": 0.07546006913679203, "learning_rate": 6.590374308996782e-06, "loss": 0.4917, "step": 3210 }, { "epoch": 1.5932729303711057, "grad_norm": 0.0716530556098064, "learning_rate": 6.588521004383507e-06, "loss": 0.4968, "step": 3211 }, { "epoch": 1.5937693930743453, "grad_norm": 0.07083408847887936, "learning_rate": 6.5866674569902676e-06, "loss": 0.5028, "step": 3212 }, { "epoch": 1.5942658557775848, "grad_norm": 0.07115108608146581, "learning_rate": 6.584813667100347e-06, "loss": 0.5092, "step": 3213 }, { "epoch": 1.5947623184808242, "grad_norm": 0.07038895988678812, "learning_rate": 6.582959634997068e-06, "loss": 0.4479, "step": 3214 }, { "epoch": 1.5952587811840635, "grad_norm": 0.06975115394749672, "learning_rate": 6.581105360963791e-06, "loss": 0.4841, "step": 3215 }, { "epoch": 1.5957552438873028, "grad_norm": 0.07197478829125538, "learning_rate": 6.579250845283909e-06, "loss": 0.4971, "step": 3216 }, { "epoch": 1.5962517065905424, "grad_norm": 0.0711203164305087, "learning_rate": 6.577396088240857e-06, "loss": 0.4959, "step": 3217 }, { "epoch": 1.596748169293782, "grad_norm": 0.07016673110860351, "learning_rate": 6.575541090118105e-06, "loss": 0.4606, "step": 3218 }, { "epoch": 1.5972446319970213, "grad_norm": 0.0717158281669078, "learning_rate": 6.5736858511991585e-06, "loss": 0.5236, "step": 3219 }, { "epoch": 1.5977410947002606, "grad_norm": 0.07207805937373091, "learning_rate": 6.5718303717675614e-06, "loss": 0.4935, "step": 3220 }, { "epoch": 1.5982375574035, "grad_norm": 0.07306215422798554, "learning_rate": 6.5699746521068944e-06, "loss": 0.4995, "step": 3221 }, { "epoch": 1.5987340201067395, "grad_norm": 0.07178842625175158, "learning_rate": 6.5681186925007755e-06, "loss": 0.4961, "step": 3222 }, { "epoch": 1.599230482809979, "grad_norm": 0.07054850163936612, "learning_rate": 6.566262493232858e-06, "loss": 0.47, "step": 3223 }, { "epoch": 1.5997269455132184, "grad_norm": 0.07202065099054866, "learning_rate": 6.564406054586831e-06, "loss": 0.512, "step": 3224 }, { "epoch": 1.6002234082164577, "grad_norm": 0.07115337760806216, "learning_rate": 6.562549376846421e-06, "loss": 0.4661, "step": 3225 }, { "epoch": 1.600719870919697, "grad_norm": 0.07217097163771145, "learning_rate": 6.5606924602953925e-06, "loss": 0.4872, "step": 3226 }, { "epoch": 1.6012163336229366, "grad_norm": 0.07184519047556386, "learning_rate": 6.558835305217545e-06, "loss": 0.4918, "step": 3227 }, { "epoch": 1.6017127963261761, "grad_norm": 0.068517155266461, "learning_rate": 6.556977911896716e-06, "loss": 0.4715, "step": 3228 }, { "epoch": 1.6022092590294155, "grad_norm": 0.06873824143142059, "learning_rate": 6.555120280616777e-06, "loss": 0.4702, "step": 3229 }, { "epoch": 1.6027057217326548, "grad_norm": 0.07054371263644203, "learning_rate": 6.553262411661636e-06, "loss": 0.476, "step": 3230 }, { "epoch": 1.6032021844358941, "grad_norm": 0.07448870421576329, "learning_rate": 6.551404305315243e-06, "loss": 0.4909, "step": 3231 }, { "epoch": 1.6036986471391337, "grad_norm": 0.07600252195798972, "learning_rate": 6.5495459618615756e-06, "loss": 0.5184, "step": 3232 }, { "epoch": 1.6041951098423732, "grad_norm": 0.0748000187133278, "learning_rate": 6.547687381584653e-06, "loss": 0.4955, "step": 3233 }, { "epoch": 1.6046915725456126, "grad_norm": 0.07127300203261505, "learning_rate": 6.545828564768529e-06, "loss": 0.4823, "step": 3234 }, { "epoch": 1.605188035248852, "grad_norm": 0.07232653631996022, "learning_rate": 6.543969511697295e-06, "loss": 0.5055, "step": 3235 }, { "epoch": 1.6056844979520912, "grad_norm": 0.07128404071201468, "learning_rate": 6.542110222655076e-06, "loss": 0.4794, "step": 3236 }, { "epoch": 1.6061809606553308, "grad_norm": 0.06928059050306892, "learning_rate": 6.540250697926035e-06, "loss": 0.4699, "step": 3237 }, { "epoch": 1.6066774233585703, "grad_norm": 0.06907166245498234, "learning_rate": 6.5383909377943725e-06, "loss": 0.4755, "step": 3238 }, { "epoch": 1.6071738860618097, "grad_norm": 0.07215048758101493, "learning_rate": 6.536530942544321e-06, "loss": 0.5169, "step": 3239 }, { "epoch": 1.607670348765049, "grad_norm": 0.07093218907508307, "learning_rate": 6.534670712460151e-06, "loss": 0.4936, "step": 3240 }, { "epoch": 1.6081668114682883, "grad_norm": 0.07423867373297897, "learning_rate": 6.532810247826168e-06, "loss": 0.5185, "step": 3241 }, { "epoch": 1.6086632741715279, "grad_norm": 0.07419926462973128, "learning_rate": 6.530949548926716e-06, "loss": 0.4977, "step": 3242 }, { "epoch": 1.6091597368747674, "grad_norm": 0.07426546852534963, "learning_rate": 6.529088616046172e-06, "loss": 0.4703, "step": 3243 }, { "epoch": 1.6096561995780068, "grad_norm": 0.07032622346168317, "learning_rate": 6.527227449468951e-06, "loss": 0.4602, "step": 3244 }, { "epoch": 1.610152662281246, "grad_norm": 0.0739855696239581, "learning_rate": 6.525366049479501e-06, "loss": 0.4915, "step": 3245 }, { "epoch": 1.6106491249844854, "grad_norm": 0.07666709761082861, "learning_rate": 6.523504416362308e-06, "loss": 0.4845, "step": 3246 }, { "epoch": 1.611145587687725, "grad_norm": 0.07131154560337846, "learning_rate": 6.521642550401894e-06, "loss": 0.4763, "step": 3247 }, { "epoch": 1.6116420503909645, "grad_norm": 0.07160752523694873, "learning_rate": 6.519780451882811e-06, "loss": 0.4801, "step": 3248 }, { "epoch": 1.6121385130942039, "grad_norm": 0.07150793823658054, "learning_rate": 6.517918121089656e-06, "loss": 0.467, "step": 3249 }, { "epoch": 1.6126349757974432, "grad_norm": 0.0729422427737503, "learning_rate": 6.516055558307054e-06, "loss": 0.5076, "step": 3250 }, { "epoch": 1.6131314385006825, "grad_norm": 0.0727548197435697, "learning_rate": 6.5141927638196665e-06, "loss": 0.5082, "step": 3251 }, { "epoch": 1.613627901203922, "grad_norm": 0.07179615875120154, "learning_rate": 6.5123297379121944e-06, "loss": 0.4962, "step": 3252 }, { "epoch": 1.6141243639071616, "grad_norm": 0.07058848713675607, "learning_rate": 6.510466480869369e-06, "loss": 0.5126, "step": 3253 }, { "epoch": 1.614620826610401, "grad_norm": 0.07140445583852305, "learning_rate": 6.508602992975963e-06, "loss": 0.4587, "step": 3254 }, { "epoch": 1.6151172893136403, "grad_norm": 0.07630979470934252, "learning_rate": 6.506739274516777e-06, "loss": 0.5185, "step": 3255 }, { "epoch": 1.6156137520168796, "grad_norm": 0.07288163814992277, "learning_rate": 6.504875325776651e-06, "loss": 0.5012, "step": 3256 }, { "epoch": 1.6161102147201192, "grad_norm": 0.06951093014179835, "learning_rate": 6.50301114704046e-06, "loss": 0.4635, "step": 3257 }, { "epoch": 1.6166066774233587, "grad_norm": 0.07246702259048228, "learning_rate": 6.501146738593114e-06, "loss": 0.495, "step": 3258 }, { "epoch": 1.617103140126598, "grad_norm": 0.0772067141521757, "learning_rate": 6.499282100719558e-06, "loss": 0.5023, "step": 3259 }, { "epoch": 1.6175996028298374, "grad_norm": 0.07720777262425242, "learning_rate": 6.497417233704774e-06, "loss": 0.5386, "step": 3260 }, { "epoch": 1.6180960655330767, "grad_norm": 0.07009815130142644, "learning_rate": 6.495552137833774e-06, "loss": 0.4827, "step": 3261 }, { "epoch": 1.6185925282363163, "grad_norm": 0.07666255501778249, "learning_rate": 6.493686813391608e-06, "loss": 0.4698, "step": 3262 }, { "epoch": 1.6190889909395558, "grad_norm": 0.07156342643723003, "learning_rate": 6.491821260663364e-06, "loss": 0.5408, "step": 3263 }, { "epoch": 1.6195854536427952, "grad_norm": 0.0681821239446925, "learning_rate": 6.4899554799341576e-06, "loss": 0.476, "step": 3264 }, { "epoch": 1.6200819163460345, "grad_norm": 0.07136397273408063, "learning_rate": 6.488089471489147e-06, "loss": 0.5122, "step": 3265 }, { "epoch": 1.6205783790492738, "grad_norm": 0.07370498350984535, "learning_rate": 6.486223235613522e-06, "loss": 0.5162, "step": 3266 }, { "epoch": 1.6210748417525134, "grad_norm": 0.07631744016069217, "learning_rate": 6.4843567725925025e-06, "loss": 0.5008, "step": 3267 }, { "epoch": 1.621571304455753, "grad_norm": 0.06930606236752354, "learning_rate": 6.4824900827113506e-06, "loss": 0.4766, "step": 3268 }, { "epoch": 1.6220677671589923, "grad_norm": 0.0751654947836453, "learning_rate": 6.48062316625536e-06, "loss": 0.5066, "step": 3269 }, { "epoch": 1.6225642298622316, "grad_norm": 0.06943801964539678, "learning_rate": 6.478756023509859e-06, "loss": 0.476, "step": 3270 }, { "epoch": 1.623060692565471, "grad_norm": 0.07004301982561809, "learning_rate": 6.47688865476021e-06, "loss": 0.4733, "step": 3271 }, { "epoch": 1.6235571552687105, "grad_norm": 0.07182265834599942, "learning_rate": 6.475021060291809e-06, "loss": 0.5008, "step": 3272 }, { "epoch": 1.6240536179719498, "grad_norm": 0.07533202201536007, "learning_rate": 6.47315324039009e-06, "loss": 0.4876, "step": 3273 }, { "epoch": 1.6245500806751894, "grad_norm": 0.07085573161894548, "learning_rate": 6.471285195340517e-06, "loss": 0.4778, "step": 3274 }, { "epoch": 1.6250465433784287, "grad_norm": 0.07293460795213716, "learning_rate": 6.469416925428593e-06, "loss": 0.4879, "step": 3275 }, { "epoch": 1.625543006081668, "grad_norm": 0.06991347889183441, "learning_rate": 6.467548430939854e-06, "loss": 0.4428, "step": 3276 }, { "epoch": 1.6260394687849076, "grad_norm": 0.07308541333754964, "learning_rate": 6.4656797121598655e-06, "loss": 0.4782, "step": 3277 }, { "epoch": 1.626535931488147, "grad_norm": 0.07263307742040882, "learning_rate": 6.463810769374234e-06, "loss": 0.4984, "step": 3278 }, { "epoch": 1.6270323941913865, "grad_norm": 0.06995720887521895, "learning_rate": 6.461941602868597e-06, "loss": 0.4469, "step": 3279 }, { "epoch": 1.6275288568946258, "grad_norm": 0.07229481627052717, "learning_rate": 6.4600722129286266e-06, "loss": 0.4694, "step": 3280 }, { "epoch": 1.6280253195978651, "grad_norm": 0.07009091230720142, "learning_rate": 6.458202599840028e-06, "loss": 0.4849, "step": 3281 }, { "epoch": 1.6285217823011047, "grad_norm": 0.07308347394869531, "learning_rate": 6.456332763888544e-06, "loss": 0.5112, "step": 3282 }, { "epoch": 1.629018245004344, "grad_norm": 0.06933218981123568, "learning_rate": 6.454462705359946e-06, "loss": 0.4848, "step": 3283 }, { "epoch": 1.6295147077075836, "grad_norm": 0.07618867420111537, "learning_rate": 6.452592424540045e-06, "loss": 0.506, "step": 3284 }, { "epoch": 1.6300111704108229, "grad_norm": 0.07143326454103777, "learning_rate": 6.4507219217146825e-06, "loss": 0.4859, "step": 3285 }, { "epoch": 1.6305076331140622, "grad_norm": 0.07379665108370889, "learning_rate": 6.448851197169733e-06, "loss": 0.4789, "step": 3286 }, { "epoch": 1.6310040958173018, "grad_norm": 0.07383433957267552, "learning_rate": 6.446980251191111e-06, "loss": 0.5042, "step": 3287 }, { "epoch": 1.631500558520541, "grad_norm": 0.0695968961937613, "learning_rate": 6.445109084064758e-06, "loss": 0.4803, "step": 3288 }, { "epoch": 1.6319970212237807, "grad_norm": 0.07369256598687489, "learning_rate": 6.443237696076652e-06, "loss": 0.4869, "step": 3289 }, { "epoch": 1.63249348392702, "grad_norm": 0.07215440570531192, "learning_rate": 6.441366087512804e-06, "loss": 0.4975, "step": 3290 }, { "epoch": 1.6329899466302593, "grad_norm": 0.07468599079538513, "learning_rate": 6.439494258659259e-06, "loss": 0.5109, "step": 3291 }, { "epoch": 1.6334864093334989, "grad_norm": 0.06772469651576586, "learning_rate": 6.437622209802099e-06, "loss": 0.4995, "step": 3292 }, { "epoch": 1.6339828720367382, "grad_norm": 0.07521859544196922, "learning_rate": 6.435749941227434e-06, "loss": 0.5283, "step": 3293 }, { "epoch": 1.6344793347399778, "grad_norm": 0.0692286722927468, "learning_rate": 6.43387745322141e-06, "loss": 0.4664, "step": 3294 }, { "epoch": 1.634975797443217, "grad_norm": 0.06903400899827464, "learning_rate": 6.432004746070209e-06, "loss": 0.4662, "step": 3295 }, { "epoch": 1.6354722601464564, "grad_norm": 0.06963374521958657, "learning_rate": 6.430131820060043e-06, "loss": 0.4785, "step": 3296 }, { "epoch": 1.635968722849696, "grad_norm": 0.0702109802058609, "learning_rate": 6.428258675477158e-06, "loss": 0.4896, "step": 3297 }, { "epoch": 1.6364651855529353, "grad_norm": 0.07196211846484113, "learning_rate": 6.426385312607837e-06, "loss": 0.5137, "step": 3298 }, { "epoch": 1.6369616482561749, "grad_norm": 0.07225873948893603, "learning_rate": 6.424511731738389e-06, "loss": 0.4952, "step": 3299 }, { "epoch": 1.6374581109594142, "grad_norm": 0.0759441267344298, "learning_rate": 6.4226379331551625e-06, "loss": 0.4939, "step": 3300 }, { "epoch": 1.6379545736626535, "grad_norm": 0.07168285148556738, "learning_rate": 6.420763917144539e-06, "loss": 0.4928, "step": 3301 }, { "epoch": 1.638451036365893, "grad_norm": 0.07063050009803296, "learning_rate": 6.4188896839929314e-06, "loss": 0.4916, "step": 3302 }, { "epoch": 1.6389474990691324, "grad_norm": 0.07020108879688203, "learning_rate": 6.417015233986786e-06, "loss": 0.4688, "step": 3303 }, { "epoch": 1.639443961772372, "grad_norm": 0.07062473758265202, "learning_rate": 6.415140567412583e-06, "loss": 0.4642, "step": 3304 }, { "epoch": 1.6399404244756113, "grad_norm": 0.07243674409944932, "learning_rate": 6.413265684556833e-06, "loss": 0.496, "step": 3305 }, { "epoch": 1.6404368871788506, "grad_norm": 0.0748780185500087, "learning_rate": 6.4113905857060835e-06, "loss": 0.5193, "step": 3306 }, { "epoch": 1.64093334988209, "grad_norm": 0.07086725699518998, "learning_rate": 6.409515271146912e-06, "loss": 0.4582, "step": 3307 }, { "epoch": 1.6414298125853295, "grad_norm": 0.07281673976046933, "learning_rate": 6.4076397411659316e-06, "loss": 0.4832, "step": 3308 }, { "epoch": 1.641926275288569, "grad_norm": 0.07024856454439375, "learning_rate": 6.405763996049788e-06, "loss": 0.4531, "step": 3309 }, { "epoch": 1.6424227379918084, "grad_norm": 0.07035789172828415, "learning_rate": 6.403888036085155e-06, "loss": 0.4604, "step": 3310 }, { "epoch": 1.6429192006950477, "grad_norm": 0.07489507707353497, "learning_rate": 6.402011861558748e-06, "loss": 0.5046, "step": 3311 }, { "epoch": 1.643415663398287, "grad_norm": 0.07344936173620556, "learning_rate": 6.400135472757305e-06, "loss": 0.468, "step": 3312 }, { "epoch": 1.6439121261015266, "grad_norm": 0.07071152024432985, "learning_rate": 6.398258869967606e-06, "loss": 0.4573, "step": 3313 }, { "epoch": 1.6444085888047661, "grad_norm": 0.07228608382103503, "learning_rate": 6.396382053476459e-06, "loss": 0.519, "step": 3314 }, { "epoch": 1.6449050515080055, "grad_norm": 0.07024217535197713, "learning_rate": 6.394505023570702e-06, "loss": 0.5054, "step": 3315 }, { "epoch": 1.6454015142112448, "grad_norm": 0.07022134255927119, "learning_rate": 6.392627780537212e-06, "loss": 0.4658, "step": 3316 }, { "epoch": 1.6458979769144841, "grad_norm": 0.0717953161572256, "learning_rate": 6.390750324662895e-06, "loss": 0.4899, "step": 3317 }, { "epoch": 1.6463944396177237, "grad_norm": 0.07671132428182728, "learning_rate": 6.388872656234689e-06, "loss": 0.5285, "step": 3318 }, { "epoch": 1.6468909023209632, "grad_norm": 0.07014994651509567, "learning_rate": 6.386994775539569e-06, "loss": 0.4845, "step": 3319 }, { "epoch": 1.6473873650242026, "grad_norm": 0.0717433473722914, "learning_rate": 6.3851166828645354e-06, "loss": 0.5168, "step": 3320 }, { "epoch": 1.647883827727442, "grad_norm": 0.07534612081049234, "learning_rate": 6.383238378496624e-06, "loss": 0.5067, "step": 3321 }, { "epoch": 1.6483802904306812, "grad_norm": 0.07416068493187741, "learning_rate": 6.381359862722905e-06, "loss": 0.5052, "step": 3322 }, { "epoch": 1.6488767531339208, "grad_norm": 0.0738384042184749, "learning_rate": 6.379481135830481e-06, "loss": 0.4627, "step": 3323 }, { "epoch": 1.6493732158371603, "grad_norm": 0.07395306527127492, "learning_rate": 6.3776021981064825e-06, "loss": 0.48, "step": 3324 }, { "epoch": 1.6498696785403997, "grad_norm": 0.06933352802336462, "learning_rate": 6.375723049838077e-06, "loss": 0.4645, "step": 3325 }, { "epoch": 1.650366141243639, "grad_norm": 0.07507598422114344, "learning_rate": 6.37384369131246e-06, "loss": 0.4641, "step": 3326 }, { "epoch": 1.6508626039468783, "grad_norm": 0.07092097177784223, "learning_rate": 6.371964122816865e-06, "loss": 0.4931, "step": 3327 }, { "epoch": 1.651359066650118, "grad_norm": 0.0717024682861258, "learning_rate": 6.3700843446385495e-06, "loss": 0.4808, "step": 3328 }, { "epoch": 1.6518555293533574, "grad_norm": 0.07215914605506774, "learning_rate": 6.3682043570648115e-06, "loss": 0.5071, "step": 3329 }, { "epoch": 1.6523519920565968, "grad_norm": 0.07186036631806916, "learning_rate": 6.366324160382974e-06, "loss": 0.5117, "step": 3330 }, { "epoch": 1.652848454759836, "grad_norm": 0.07690482622531732, "learning_rate": 6.364443754880395e-06, "loss": 0.4808, "step": 3331 }, { "epoch": 1.6533449174630754, "grad_norm": 0.0730255508982856, "learning_rate": 6.362563140844465e-06, "loss": 0.4823, "step": 3332 }, { "epoch": 1.653841380166315, "grad_norm": 0.07022155162274443, "learning_rate": 6.360682318562607e-06, "loss": 0.4958, "step": 3333 }, { "epoch": 1.6543378428695545, "grad_norm": 0.0742115762662353, "learning_rate": 6.358801288322274e-06, "loss": 0.5135, "step": 3334 }, { "epoch": 1.6548343055727939, "grad_norm": 0.0718887982711449, "learning_rate": 6.3569200504109505e-06, "loss": 0.5174, "step": 3335 }, { "epoch": 1.6553307682760332, "grad_norm": 0.07166615114113524, "learning_rate": 6.355038605116155e-06, "loss": 0.4916, "step": 3336 }, { "epoch": 1.6558272309792725, "grad_norm": 0.0691153247633263, "learning_rate": 6.353156952725432e-06, "loss": 0.476, "step": 3337 }, { "epoch": 1.656323693682512, "grad_norm": 0.07031968237098923, "learning_rate": 6.3512750935263664e-06, "loss": 0.5051, "step": 3338 }, { "epoch": 1.6568201563857516, "grad_norm": 0.0726129063754758, "learning_rate": 6.349393027806569e-06, "loss": 0.4881, "step": 3339 }, { "epoch": 1.657316619088991, "grad_norm": 0.07360865260049655, "learning_rate": 6.347510755853683e-06, "loss": 0.5138, "step": 3340 }, { "epoch": 1.6578130817922303, "grad_norm": 0.07271768270838276, "learning_rate": 6.345628277955384e-06, "loss": 0.4731, "step": 3341 }, { "epoch": 1.6583095444954696, "grad_norm": 0.0714115408833875, "learning_rate": 6.3437455943993785e-06, "loss": 0.4505, "step": 3342 }, { "epoch": 1.6588060071987092, "grad_norm": 0.0707543409992033, "learning_rate": 6.341862705473405e-06, "loss": 0.5006, "step": 3343 }, { "epoch": 1.6593024699019487, "grad_norm": 0.07128065259021685, "learning_rate": 6.339979611465231e-06, "loss": 0.4637, "step": 3344 }, { "epoch": 1.659798932605188, "grad_norm": 0.06932455519727876, "learning_rate": 6.338096312662658e-06, "loss": 0.4929, "step": 3345 }, { "epoch": 1.6602953953084274, "grad_norm": 0.07738642167451999, "learning_rate": 6.336212809353518e-06, "loss": 0.5045, "step": 3346 }, { "epoch": 1.6607918580116667, "grad_norm": 0.07115439790368483, "learning_rate": 6.334329101825676e-06, "loss": 0.5263, "step": 3347 }, { "epoch": 1.6612883207149063, "grad_norm": 0.06896941474755712, "learning_rate": 6.332445190367025e-06, "loss": 0.4706, "step": 3348 }, { "epoch": 1.6617847834181458, "grad_norm": 0.06890612750608131, "learning_rate": 6.330561075265489e-06, "loss": 0.4599, "step": 3349 }, { "epoch": 1.6622812461213852, "grad_norm": 0.0713066211215429, "learning_rate": 6.328676756809028e-06, "loss": 0.5373, "step": 3350 }, { "epoch": 1.6627777088246245, "grad_norm": 0.07394098379135165, "learning_rate": 6.326792235285628e-06, "loss": 0.494, "step": 3351 }, { "epoch": 1.6632741715278638, "grad_norm": 0.06792132504176879, "learning_rate": 6.32490751098331e-06, "loss": 0.4471, "step": 3352 }, { "epoch": 1.6637706342311034, "grad_norm": 0.06981735971850177, "learning_rate": 6.323022584190121e-06, "loss": 0.4812, "step": 3353 }, { "epoch": 1.664267096934343, "grad_norm": 0.07156250252199886, "learning_rate": 6.321137455194142e-06, "loss": 0.4741, "step": 3354 }, { "epoch": 1.6647635596375823, "grad_norm": 0.07252274014404607, "learning_rate": 6.319252124283486e-06, "loss": 0.4727, "step": 3355 }, { "epoch": 1.6652600223408216, "grad_norm": 0.07105885580438999, "learning_rate": 6.317366591746296e-06, "loss": 0.513, "step": 3356 }, { "epoch": 1.665756485044061, "grad_norm": 0.06892471295198868, "learning_rate": 6.315480857870746e-06, "loss": 0.4579, "step": 3357 }, { "epoch": 1.6662529477473005, "grad_norm": 0.07137113284562825, "learning_rate": 6.313594922945036e-06, "loss": 0.4955, "step": 3358 }, { "epoch": 1.66674941045054, "grad_norm": 0.07259766337382742, "learning_rate": 6.311708787257408e-06, "loss": 0.4861, "step": 3359 }, { "epoch": 1.6672458731537794, "grad_norm": 0.06910578629300906, "learning_rate": 6.30982245109612e-06, "loss": 0.4665, "step": 3360 }, { "epoch": 1.6677423358570187, "grad_norm": 0.06959429004837796, "learning_rate": 6.307935914749473e-06, "loss": 0.4966, "step": 3361 }, { "epoch": 1.668238798560258, "grad_norm": 0.07166836668037606, "learning_rate": 6.306049178505793e-06, "loss": 0.4742, "step": 3362 }, { "epoch": 1.6687352612634976, "grad_norm": 0.07386543999500932, "learning_rate": 6.304162242653437e-06, "loss": 0.4785, "step": 3363 }, { "epoch": 1.6692317239667371, "grad_norm": 0.07432261885864039, "learning_rate": 6.302275107480792e-06, "loss": 0.4758, "step": 3364 }, { "epoch": 1.6697281866699765, "grad_norm": 0.07336508318681913, "learning_rate": 6.300387773276278e-06, "loss": 0.4744, "step": 3365 }, { "epoch": 1.6702246493732158, "grad_norm": 0.07083645305442617, "learning_rate": 6.298500240328342e-06, "loss": 0.4725, "step": 3366 }, { "epoch": 1.6707211120764551, "grad_norm": 0.07163096680010762, "learning_rate": 6.296612508925466e-06, "loss": 0.4807, "step": 3367 }, { "epoch": 1.6712175747796947, "grad_norm": 0.06990183990931721, "learning_rate": 6.294724579356157e-06, "loss": 0.4683, "step": 3368 }, { "epoch": 1.6717140374829342, "grad_norm": 0.07405777347762747, "learning_rate": 6.292836451908955e-06, "loss": 0.5003, "step": 3369 }, { "epoch": 1.6722105001861736, "grad_norm": 0.07315404944387507, "learning_rate": 6.290948126872429e-06, "loss": 0.5086, "step": 3370 }, { "epoch": 1.672706962889413, "grad_norm": 0.06840865988701067, "learning_rate": 6.289059604535182e-06, "loss": 0.489, "step": 3371 }, { "epoch": 1.6732034255926522, "grad_norm": 0.07184124769888703, "learning_rate": 6.2871708851858414e-06, "loss": 0.4989, "step": 3372 }, { "epoch": 1.6736998882958918, "grad_norm": 0.07060758468264917, "learning_rate": 6.285281969113072e-06, "loss": 0.4608, "step": 3373 }, { "epoch": 1.6741963509991313, "grad_norm": 0.0693710380692579, "learning_rate": 6.283392856605559e-06, "loss": 0.4521, "step": 3374 }, { "epoch": 1.6746928137023707, "grad_norm": 0.0738048416478676, "learning_rate": 6.281503547952027e-06, "loss": 0.4936, "step": 3375 }, { "epoch": 1.67518927640561, "grad_norm": 0.07735478008819657, "learning_rate": 6.279614043441226e-06, "loss": 0.5336, "step": 3376 }, { "epoch": 1.6756857391088493, "grad_norm": 0.06963549926932651, "learning_rate": 6.277724343361933e-06, "loss": 0.4604, "step": 3377 }, { "epoch": 1.6761822018120889, "grad_norm": 0.06947653830726935, "learning_rate": 6.275834448002962e-06, "loss": 0.4765, "step": 3378 }, { "epoch": 1.6766786645153284, "grad_norm": 0.06876300652452302, "learning_rate": 6.273944357653152e-06, "loss": 0.4757, "step": 3379 }, { "epoch": 1.6771751272185678, "grad_norm": 0.07182670922356638, "learning_rate": 6.272054072601374e-06, "loss": 0.4886, "step": 3380 }, { "epoch": 1.677671589921807, "grad_norm": 0.07003890530333579, "learning_rate": 6.270163593136525e-06, "loss": 0.5033, "step": 3381 }, { "epoch": 1.6781680526250464, "grad_norm": 0.07243728795553099, "learning_rate": 6.268272919547537e-06, "loss": 0.5108, "step": 3382 }, { "epoch": 1.678664515328286, "grad_norm": 0.07219023545275664, "learning_rate": 6.266382052123369e-06, "loss": 0.5075, "step": 3383 }, { "epoch": 1.6791609780315255, "grad_norm": 0.07226117961469808, "learning_rate": 6.26449099115301e-06, "loss": 0.4954, "step": 3384 }, { "epoch": 1.6796574407347649, "grad_norm": 0.07054829849640398, "learning_rate": 6.2625997369254765e-06, "loss": 0.4919, "step": 3385 }, { "epoch": 1.6801539034380042, "grad_norm": 0.07199319774173396, "learning_rate": 6.260708289729818e-06, "loss": 0.503, "step": 3386 }, { "epoch": 1.6806503661412435, "grad_norm": 0.07052930419325945, "learning_rate": 6.258816649855109e-06, "loss": 0.4896, "step": 3387 }, { "epoch": 1.681146828844483, "grad_norm": 0.06905137996462095, "learning_rate": 6.2569248175904615e-06, "loss": 0.4785, "step": 3388 }, { "epoch": 1.6816432915477226, "grad_norm": 0.07053248386925157, "learning_rate": 6.2550327932250085e-06, "loss": 0.4925, "step": 3389 }, { "epoch": 1.682139754250962, "grad_norm": 0.07224569722130686, "learning_rate": 6.2531405770479146e-06, "loss": 0.4706, "step": 3390 }, { "epoch": 1.6826362169542013, "grad_norm": 0.07274752966145347, "learning_rate": 6.251248169348376e-06, "loss": 0.4741, "step": 3391 }, { "epoch": 1.6831326796574406, "grad_norm": 0.07165526373877322, "learning_rate": 6.24935557041562e-06, "loss": 0.4764, "step": 3392 }, { "epoch": 1.6836291423606802, "grad_norm": 0.06999372308594969, "learning_rate": 6.247462780538893e-06, "loss": 0.4656, "step": 3393 }, { "epoch": 1.6841256050639197, "grad_norm": 0.07399732812656597, "learning_rate": 6.245569800007484e-06, "loss": 0.5117, "step": 3394 }, { "epoch": 1.684622067767159, "grad_norm": 0.07362880549471516, "learning_rate": 6.243676629110702e-06, "loss": 0.5538, "step": 3395 }, { "epoch": 1.6851185304703984, "grad_norm": 0.0727603851572235, "learning_rate": 6.241783268137888e-06, "loss": 0.5069, "step": 3396 }, { "epoch": 1.6856149931736377, "grad_norm": 0.07428177312369587, "learning_rate": 6.239889717378411e-06, "loss": 0.5192, "step": 3397 }, { "epoch": 1.6861114558768773, "grad_norm": 0.07459257178624475, "learning_rate": 6.2379959771216716e-06, "loss": 0.4922, "step": 3398 }, { "epoch": 1.6866079185801168, "grad_norm": 0.06924537401803085, "learning_rate": 6.236102047657096e-06, "loss": 0.5061, "step": 3399 }, { "epoch": 1.6871043812833562, "grad_norm": 0.07243627093577376, "learning_rate": 6.234207929274143e-06, "loss": 0.4906, "step": 3400 }, { "epoch": 1.6876008439865955, "grad_norm": 0.07172235410923561, "learning_rate": 6.232313622262297e-06, "loss": 0.4827, "step": 3401 }, { "epoch": 1.6880973066898348, "grad_norm": 0.06923536972014138, "learning_rate": 6.230419126911072e-06, "loss": 0.4546, "step": 3402 }, { "epoch": 1.6885937693930744, "grad_norm": 0.071292437387297, "learning_rate": 6.228524443510011e-06, "loss": 0.5121, "step": 3403 }, { "epoch": 1.689090232096314, "grad_norm": 0.06941047555292386, "learning_rate": 6.226629572348687e-06, "loss": 0.4657, "step": 3404 }, { "epoch": 1.6895866947995533, "grad_norm": 0.07432595643381897, "learning_rate": 6.224734513716702e-06, "loss": 0.4883, "step": 3405 }, { "epoch": 1.6900831575027926, "grad_norm": 0.07122928064283822, "learning_rate": 6.222839267903682e-06, "loss": 0.4678, "step": 3406 }, { "epoch": 1.690579620206032, "grad_norm": 0.06841699929757633, "learning_rate": 6.220943835199286e-06, "loss": 0.4791, "step": 3407 }, { "epoch": 1.6910760829092715, "grad_norm": 0.06823369275178195, "learning_rate": 6.219048215893204e-06, "loss": 0.4514, "step": 3408 }, { "epoch": 1.691572545612511, "grad_norm": 0.06835513269888316, "learning_rate": 6.2171524102751454e-06, "loss": 0.47, "step": 3409 }, { "epoch": 1.6920690083157504, "grad_norm": 0.07859367473920947, "learning_rate": 6.215256418634858e-06, "loss": 0.5113, "step": 3410 }, { "epoch": 1.6925654710189897, "grad_norm": 0.07080399011507223, "learning_rate": 6.2133602412621116e-06, "loss": 0.4849, "step": 3411 }, { "epoch": 1.693061933722229, "grad_norm": 0.06993443237391803, "learning_rate": 6.211463878446708e-06, "loss": 0.4665, "step": 3412 }, { "epoch": 1.6935583964254686, "grad_norm": 0.07103193818369047, "learning_rate": 6.209567330478473e-06, "loss": 0.4787, "step": 3413 }, { "epoch": 1.694054859128708, "grad_norm": 0.0718499238641342, "learning_rate": 6.207670597647266e-06, "loss": 0.4775, "step": 3414 }, { "epoch": 1.6945513218319475, "grad_norm": 0.0713450002416761, "learning_rate": 6.2057736802429724e-06, "loss": 0.4963, "step": 3415 }, { "epoch": 1.6950477845351868, "grad_norm": 0.07188266897364559, "learning_rate": 6.203876578555506e-06, "loss": 0.477, "step": 3416 }, { "epoch": 1.6955442472384261, "grad_norm": 0.07065734028417589, "learning_rate": 6.201979292874805e-06, "loss": 0.4843, "step": 3417 }, { "epoch": 1.6960407099416657, "grad_norm": 0.07454692730874736, "learning_rate": 6.200081823490842e-06, "loss": 0.496, "step": 3418 }, { "epoch": 1.696537172644905, "grad_norm": 0.07376467418188615, "learning_rate": 6.198184170693615e-06, "loss": 0.5022, "step": 3419 }, { "epoch": 1.6970336353481446, "grad_norm": 0.07109089901080787, "learning_rate": 6.196286334773148e-06, "loss": 0.454, "step": 3420 }, { "epoch": 1.6975300980513839, "grad_norm": 0.07441012396251873, "learning_rate": 6.194388316019495e-06, "loss": 0.4949, "step": 3421 }, { "epoch": 1.6980265607546232, "grad_norm": 0.0737655799446165, "learning_rate": 6.192490114722741e-06, "loss": 0.5255, "step": 3422 }, { "epoch": 1.6985230234578628, "grad_norm": 0.07027598683919967, "learning_rate": 6.1905917311729915e-06, "loss": 0.476, "step": 3423 }, { "epoch": 1.699019486161102, "grad_norm": 0.07501683188473765, "learning_rate": 6.188693165660387e-06, "loss": 0.5079, "step": 3424 }, { "epoch": 1.6995159488643417, "grad_norm": 0.07110659680282254, "learning_rate": 6.1867944184750894e-06, "loss": 0.4953, "step": 3425 }, { "epoch": 1.700012411567581, "grad_norm": 0.0711979716541948, "learning_rate": 6.184895489907293e-06, "loss": 0.4677, "step": 3426 }, { "epoch": 1.7005088742708203, "grad_norm": 0.07512243922895441, "learning_rate": 6.182996380247223e-06, "loss": 0.4969, "step": 3427 }, { "epoch": 1.7010053369740599, "grad_norm": 0.07291393767148943, "learning_rate": 6.181097089785121e-06, "loss": 0.4645, "step": 3428 }, { "epoch": 1.7015017996772992, "grad_norm": 0.071546062557215, "learning_rate": 6.179197618811267e-06, "loss": 0.4995, "step": 3429 }, { "epoch": 1.7019982623805388, "grad_norm": 0.07167043597024016, "learning_rate": 6.177297967615964e-06, "loss": 0.4999, "step": 3430 }, { "epoch": 1.702494725083778, "grad_norm": 0.07257860964521468, "learning_rate": 6.175398136489542e-06, "loss": 0.5015, "step": 3431 }, { "epoch": 1.7029911877870174, "grad_norm": 0.07284246143070047, "learning_rate": 6.173498125722363e-06, "loss": 0.484, "step": 3432 }, { "epoch": 1.703487650490257, "grad_norm": 0.0708350136827653, "learning_rate": 6.171597935604811e-06, "loss": 0.5185, "step": 3433 }, { "epoch": 1.7039841131934963, "grad_norm": 0.07204652200212407, "learning_rate": 6.1696975664273e-06, "loss": 0.5097, "step": 3434 }, { "epoch": 1.7044805758967358, "grad_norm": 0.07273453576826858, "learning_rate": 6.167797018480268e-06, "loss": 0.4757, "step": 3435 }, { "epoch": 1.7049770385999752, "grad_norm": 0.07288642192421328, "learning_rate": 6.1658962920541875e-06, "loss": 0.5086, "step": 3436 }, { "epoch": 1.7054735013032145, "grad_norm": 0.06952770295584065, "learning_rate": 6.1639953874395534e-06, "loss": 0.4691, "step": 3437 }, { "epoch": 1.705969964006454, "grad_norm": 0.07091606836576105, "learning_rate": 6.1620943049268865e-06, "loss": 0.4848, "step": 3438 }, { "epoch": 1.7064664267096934, "grad_norm": 0.07492107718253822, "learning_rate": 6.160193044806738e-06, "loss": 0.5146, "step": 3439 }, { "epoch": 1.706962889412933, "grad_norm": 0.07019956876848898, "learning_rate": 6.158291607369686e-06, "loss": 0.5008, "step": 3440 }, { "epoch": 1.7074593521161723, "grad_norm": 0.07279278421648618, "learning_rate": 6.156389992906332e-06, "loss": 0.4932, "step": 3441 }, { "epoch": 1.7079558148194116, "grad_norm": 0.07522974421670806, "learning_rate": 6.154488201707309e-06, "loss": 0.4831, "step": 3442 }, { "epoch": 1.7084522775226512, "grad_norm": 0.07153871451762317, "learning_rate": 6.152586234063277e-06, "loss": 0.4855, "step": 3443 }, { "epoch": 1.7089487402258905, "grad_norm": 0.07244260575924184, "learning_rate": 6.150684090264918e-06, "loss": 0.502, "step": 3444 }, { "epoch": 1.70944520292913, "grad_norm": 0.07312446444547795, "learning_rate": 6.148781770602945e-06, "loss": 0.4932, "step": 3445 }, { "epoch": 1.7099416656323694, "grad_norm": 0.07183132744757643, "learning_rate": 6.146879275368098e-06, "loss": 0.4644, "step": 3446 }, { "epoch": 1.7104381283356087, "grad_norm": 0.0728091776261513, "learning_rate": 6.144976604851143e-06, "loss": 0.4797, "step": 3447 }, { "epoch": 1.710934591038848, "grad_norm": 0.07686865515523557, "learning_rate": 6.143073759342872e-06, "loss": 0.4613, "step": 3448 }, { "epoch": 1.7114310537420876, "grad_norm": 0.06862548003626924, "learning_rate": 6.141170739134107e-06, "loss": 0.4723, "step": 3449 }, { "epoch": 1.7119275164453271, "grad_norm": 0.0698538911972819, "learning_rate": 6.139267544515689e-06, "loss": 0.4839, "step": 3450 }, { "epoch": 1.7124239791485665, "grad_norm": 0.07008192081244582, "learning_rate": 6.1373641757784945e-06, "loss": 0.468, "step": 3451 }, { "epoch": 1.7129204418518058, "grad_norm": 0.07095298359236782, "learning_rate": 6.135460633213422e-06, "loss": 0.5054, "step": 3452 }, { "epoch": 1.7134169045550451, "grad_norm": 0.07446355810920537, "learning_rate": 6.133556917111396e-06, "loss": 0.5204, "step": 3453 }, { "epoch": 1.7139133672582847, "grad_norm": 0.07115588859641515, "learning_rate": 6.131653027763372e-06, "loss": 0.4861, "step": 3454 }, { "epoch": 1.7144098299615242, "grad_norm": 0.0730323307012295, "learning_rate": 6.129748965460327e-06, "loss": 0.531, "step": 3455 }, { "epoch": 1.7149062926647636, "grad_norm": 0.0728929348845048, "learning_rate": 6.127844730493267e-06, "loss": 0.5021, "step": 3456 }, { "epoch": 1.715402755368003, "grad_norm": 0.07088824802373886, "learning_rate": 6.125940323153223e-06, "loss": 0.4749, "step": 3457 }, { "epoch": 1.7158992180712422, "grad_norm": 0.07616952737021909, "learning_rate": 6.1240357437312544e-06, "loss": 0.4908, "step": 3458 }, { "epoch": 1.7163956807744818, "grad_norm": 0.06961257299018885, "learning_rate": 6.122130992518444e-06, "loss": 0.4518, "step": 3459 }, { "epoch": 1.7168921434777213, "grad_norm": 0.06954236191260457, "learning_rate": 6.120226069805904e-06, "loss": 0.4813, "step": 3460 }, { "epoch": 1.7173886061809607, "grad_norm": 0.07114725015517909, "learning_rate": 6.1183209758847715e-06, "loss": 0.4787, "step": 3461 }, { "epoch": 1.7178850688842, "grad_norm": 0.06980692059756774, "learning_rate": 6.116415711046208e-06, "loss": 0.4688, "step": 3462 }, { "epoch": 1.7183815315874393, "grad_norm": 0.07553272063236754, "learning_rate": 6.114510275581402e-06, "loss": 0.5312, "step": 3463 }, { "epoch": 1.718877994290679, "grad_norm": 0.07750265108845414, "learning_rate": 6.112604669781572e-06, "loss": 0.5201, "step": 3464 }, { "epoch": 1.7193744569939184, "grad_norm": 0.07239752989184631, "learning_rate": 6.1106988939379584e-06, "loss": 0.4487, "step": 3465 }, { "epoch": 1.7198709196971578, "grad_norm": 0.07267185258184378, "learning_rate": 6.108792948341826e-06, "loss": 0.5139, "step": 3466 }, { "epoch": 1.720367382400397, "grad_norm": 0.06984553929988284, "learning_rate": 6.106886833284469e-06, "loss": 0.488, "step": 3467 }, { "epoch": 1.7208638451036364, "grad_norm": 0.07633088437476376, "learning_rate": 6.104980549057208e-06, "loss": 0.5496, "step": 3468 }, { "epoch": 1.721360307806876, "grad_norm": 0.07185447533511291, "learning_rate": 6.103074095951387e-06, "loss": 0.4559, "step": 3469 }, { "epoch": 1.7218567705101155, "grad_norm": 0.07159619149542636, "learning_rate": 6.101167474258377e-06, "loss": 0.4655, "step": 3470 }, { "epoch": 1.7223532332133549, "grad_norm": 0.07021731374652906, "learning_rate": 6.0992606842695745e-06, "loss": 0.4529, "step": 3471 }, { "epoch": 1.7228496959165942, "grad_norm": 0.07932591092305574, "learning_rate": 6.0973537262764024e-06, "loss": 0.5198, "step": 3472 }, { "epoch": 1.7233461586198335, "grad_norm": 0.07128172870853265, "learning_rate": 6.095446600570306e-06, "loss": 0.4945, "step": 3473 }, { "epoch": 1.723842621323073, "grad_norm": 0.07099044484886086, "learning_rate": 6.093539307442762e-06, "loss": 0.4538, "step": 3474 }, { "epoch": 1.7243390840263126, "grad_norm": 0.06842920295839011, "learning_rate": 6.091631847185268e-06, "loss": 0.4562, "step": 3475 }, { "epoch": 1.724835546729552, "grad_norm": 0.0739575588141388, "learning_rate": 6.089724220089351e-06, "loss": 0.5191, "step": 3476 }, { "epoch": 1.7253320094327913, "grad_norm": 0.08136158471753265, "learning_rate": 6.087816426446557e-06, "loss": 0.4862, "step": 3477 }, { "epoch": 1.7258284721360306, "grad_norm": 0.07013805452078947, "learning_rate": 6.0859084665484645e-06, "loss": 0.4773, "step": 3478 }, { "epoch": 1.7263249348392702, "grad_norm": 0.0739863259677037, "learning_rate": 6.084000340686674e-06, "loss": 0.4895, "step": 3479 }, { "epoch": 1.7268213975425097, "grad_norm": 0.07078698868835145, "learning_rate": 6.082092049152813e-06, "loss": 0.4581, "step": 3480 }, { "epoch": 1.727317860245749, "grad_norm": 0.07130648089053174, "learning_rate": 6.080183592238533e-06, "loss": 0.4964, "step": 3481 }, { "epoch": 1.7278143229489884, "grad_norm": 0.07373187736910049, "learning_rate": 6.078274970235509e-06, "loss": 0.4881, "step": 3482 }, { "epoch": 1.7283107856522277, "grad_norm": 0.07263393261901284, "learning_rate": 6.076366183435445e-06, "loss": 0.4675, "step": 3483 }, { "epoch": 1.7288072483554673, "grad_norm": 0.07232290837907768, "learning_rate": 6.074457232130067e-06, "loss": 0.5158, "step": 3484 }, { "epoch": 1.7293037110587068, "grad_norm": 0.07569196194716082, "learning_rate": 6.07254811661113e-06, "loss": 0.5033, "step": 3485 }, { "epoch": 1.7298001737619462, "grad_norm": 0.0737678773736528, "learning_rate": 6.0706388371704104e-06, "loss": 0.5094, "step": 3486 }, { "epoch": 1.7302966364651855, "grad_norm": 0.0767658102737874, "learning_rate": 6.068729394099711e-06, "loss": 0.5083, "step": 3487 }, { "epoch": 1.7307930991684248, "grad_norm": 0.07118568963078373, "learning_rate": 6.066819787690859e-06, "loss": 0.4844, "step": 3488 }, { "epoch": 1.7312895618716644, "grad_norm": 0.0733650587823233, "learning_rate": 6.064910018235707e-06, "loss": 0.5007, "step": 3489 }, { "epoch": 1.731786024574904, "grad_norm": 0.07419443969678913, "learning_rate": 6.063000086026134e-06, "loss": 0.4936, "step": 3490 }, { "epoch": 1.7322824872781433, "grad_norm": 0.07144300280941726, "learning_rate": 6.061089991354041e-06, "loss": 0.5035, "step": 3491 }, { "epoch": 1.7327789499813826, "grad_norm": 0.07269441526800977, "learning_rate": 6.059179734511357e-06, "loss": 0.5006, "step": 3492 }, { "epoch": 1.733275412684622, "grad_norm": 0.06735909256472328, "learning_rate": 6.057269315790033e-06, "loss": 0.4787, "step": 3493 }, { "epoch": 1.7337718753878615, "grad_norm": 0.07101972753252063, "learning_rate": 6.055358735482045e-06, "loss": 0.5026, "step": 3494 }, { "epoch": 1.734268338091101, "grad_norm": 0.07316133121592043, "learning_rate": 6.053447993879397e-06, "loss": 0.5288, "step": 3495 }, { "epoch": 1.7347648007943404, "grad_norm": 0.07072090733545723, "learning_rate": 6.051537091274115e-06, "loss": 0.477, "step": 3496 }, { "epoch": 1.7352612634975797, "grad_norm": 0.06911247409586789, "learning_rate": 6.049626027958246e-06, "loss": 0.4501, "step": 3497 }, { "epoch": 1.735757726200819, "grad_norm": 0.07632319474987508, "learning_rate": 6.047714804223871e-06, "loss": 0.5038, "step": 3498 }, { "epoch": 1.7362541889040586, "grad_norm": 0.07068492441512336, "learning_rate": 6.045803420363085e-06, "loss": 0.4719, "step": 3499 }, { "epoch": 1.7367506516072981, "grad_norm": 0.08074542221028214, "learning_rate": 6.043891876668015e-06, "loss": 0.5156, "step": 3500 }, { "epoch": 1.7372471143105375, "grad_norm": 0.07228894317585616, "learning_rate": 6.0419801734308085e-06, "loss": 0.5086, "step": 3501 }, { "epoch": 1.7377435770137768, "grad_norm": 0.07240394720933138, "learning_rate": 6.04006831094364e-06, "loss": 0.4768, "step": 3502 }, { "epoch": 1.7382400397170161, "grad_norm": 0.07257800871912748, "learning_rate": 6.038156289498705e-06, "loss": 0.4864, "step": 3503 }, { "epoch": 1.7387365024202557, "grad_norm": 0.07693716933243037, "learning_rate": 6.03624410938823e-06, "loss": 0.5101, "step": 3504 }, { "epoch": 1.7392329651234952, "grad_norm": 0.07042241058097551, "learning_rate": 6.034331770904455e-06, "loss": 0.4866, "step": 3505 }, { "epoch": 1.7397294278267346, "grad_norm": 0.07029230350366913, "learning_rate": 6.032419274339654e-06, "loss": 0.5054, "step": 3506 }, { "epoch": 1.740225890529974, "grad_norm": 0.07177352673078713, "learning_rate": 6.03050661998612e-06, "loss": 0.4853, "step": 3507 }, { "epoch": 1.7407223532332132, "grad_norm": 0.07412862457534308, "learning_rate": 6.028593808136173e-06, "loss": 0.5043, "step": 3508 }, { "epoch": 1.7412188159364528, "grad_norm": 0.07261567809098946, "learning_rate": 6.026680839082153e-06, "loss": 0.5034, "step": 3509 }, { "epoch": 1.7417152786396923, "grad_norm": 0.07011596539977943, "learning_rate": 6.024767713116429e-06, "loss": 0.4814, "step": 3510 }, { "epoch": 1.7422117413429317, "grad_norm": 0.0715432820186117, "learning_rate": 6.022854430531392e-06, "loss": 0.4729, "step": 3511 }, { "epoch": 1.742708204046171, "grad_norm": 0.0713065455423351, "learning_rate": 6.020940991619455e-06, "loss": 0.506, "step": 3512 }, { "epoch": 1.7432046667494103, "grad_norm": 0.07050208269041043, "learning_rate": 6.019027396673058e-06, "loss": 0.4828, "step": 3513 }, { "epoch": 1.7437011294526499, "grad_norm": 0.07360098624736137, "learning_rate": 6.01711364598466e-06, "loss": 0.5075, "step": 3514 }, { "epoch": 1.7441975921558894, "grad_norm": 0.07351220296533653, "learning_rate": 6.015199739846751e-06, "loss": 0.4904, "step": 3515 }, { "epoch": 1.7446940548591288, "grad_norm": 0.07781887095221011, "learning_rate": 6.013285678551838e-06, "loss": 0.4954, "step": 3516 }, { "epoch": 1.745190517562368, "grad_norm": 0.07444681722627283, "learning_rate": 6.011371462392457e-06, "loss": 0.4682, "step": 3517 }, { "epoch": 1.7456869802656074, "grad_norm": 0.07313248542856911, "learning_rate": 6.0094570916611635e-06, "loss": 0.4997, "step": 3518 }, { "epoch": 1.746183442968847, "grad_norm": 0.07056610694243752, "learning_rate": 6.007542566650539e-06, "loss": 0.4805, "step": 3519 }, { "epoch": 1.7466799056720865, "grad_norm": 0.0731856933833494, "learning_rate": 6.005627887653189e-06, "loss": 0.4672, "step": 3520 }, { "epoch": 1.7471763683753259, "grad_norm": 0.0733689880039657, "learning_rate": 6.00371305496174e-06, "loss": 0.5161, "step": 3521 }, { "epoch": 1.7476728310785652, "grad_norm": 0.07171031605313356, "learning_rate": 6.001798068868842e-06, "loss": 0.4733, "step": 3522 }, { "epoch": 1.7481692937818045, "grad_norm": 0.07327381780949192, "learning_rate": 5.999882929667173e-06, "loss": 0.4934, "step": 3523 }, { "epoch": 1.748665756485044, "grad_norm": 0.06983487291449605, "learning_rate": 5.997967637649431e-06, "loss": 0.4889, "step": 3524 }, { "epoch": 1.7491622191882836, "grad_norm": 0.06937374272648494, "learning_rate": 5.996052193108336e-06, "loss": 0.4744, "step": 3525 }, { "epoch": 1.749658681891523, "grad_norm": 0.07137647015287153, "learning_rate": 5.994136596336633e-06, "loss": 0.5007, "step": 3526 }, { "epoch": 1.7501551445947623, "grad_norm": 0.07233499968766187, "learning_rate": 5.9922208476270914e-06, "loss": 0.4782, "step": 3527 }, { "epoch": 1.7506516072980016, "grad_norm": 0.07007113610888752, "learning_rate": 5.990304947272503e-06, "loss": 0.4633, "step": 3528 }, { "epoch": 1.7506516072980016, "eval_loss": 0.5171855092048645, "eval_runtime": 259.2374, "eval_samples_per_second": 117.086, "eval_steps_per_second": 14.639, "step": 3528 }, { "epoch": 1.7511480700012412, "grad_norm": 0.0726685919128279, "learning_rate": 5.988388895565681e-06, "loss": 0.4911, "step": 3529 }, { "epoch": 1.7516445327044807, "grad_norm": 0.07643078823588302, "learning_rate": 5.986472692799465e-06, "loss": 0.5087, "step": 3530 }, { "epoch": 1.75214099540772, "grad_norm": 0.06621160646927267, "learning_rate": 5.984556339266714e-06, "loss": 0.458, "step": 3531 }, { "epoch": 1.7526374581109594, "grad_norm": 0.06975490562081263, "learning_rate": 5.9826398352603134e-06, "loss": 0.4903, "step": 3532 }, { "epoch": 1.7531339208141987, "grad_norm": 0.07050926318317503, "learning_rate": 5.980723181073168e-06, "loss": 0.4669, "step": 3533 }, { "epoch": 1.7536303835174383, "grad_norm": 0.06895855481629098, "learning_rate": 5.978806376998209e-06, "loss": 0.4757, "step": 3534 }, { "epoch": 1.7541268462206778, "grad_norm": 0.06917867418657715, "learning_rate": 5.976889423328391e-06, "loss": 0.4751, "step": 3535 }, { "epoch": 1.7546233089239172, "grad_norm": 0.07451106891830402, "learning_rate": 5.974972320356688e-06, "loss": 0.5079, "step": 3536 }, { "epoch": 1.7551197716271565, "grad_norm": 0.06940933717231007, "learning_rate": 5.973055068376097e-06, "loss": 0.4865, "step": 3537 }, { "epoch": 1.7556162343303958, "grad_norm": 0.07457921708551443, "learning_rate": 5.9711376676796404e-06, "loss": 0.471, "step": 3538 }, { "epoch": 1.7561126970336354, "grad_norm": 0.0743437605915209, "learning_rate": 5.969220118560363e-06, "loss": 0.4884, "step": 3539 }, { "epoch": 1.756609159736875, "grad_norm": 0.07290986659316935, "learning_rate": 5.967302421311331e-06, "loss": 0.4943, "step": 3540 }, { "epoch": 1.7571056224401143, "grad_norm": 0.0715462893233084, "learning_rate": 5.965384576225632e-06, "loss": 0.4962, "step": 3541 }, { "epoch": 1.7576020851433536, "grad_norm": 0.07183399679200113, "learning_rate": 5.96346658359638e-06, "loss": 0.4726, "step": 3542 }, { "epoch": 1.758098547846593, "grad_norm": 0.06982319209707803, "learning_rate": 5.961548443716709e-06, "loss": 0.4699, "step": 3543 }, { "epoch": 1.7585950105498325, "grad_norm": 0.07251622340048915, "learning_rate": 5.959630156879777e-06, "loss": 0.4907, "step": 3544 }, { "epoch": 1.759091473253072, "grad_norm": 0.07250648567166573, "learning_rate": 5.957711723378759e-06, "loss": 0.484, "step": 3545 }, { "epoch": 1.7595879359563114, "grad_norm": 0.07596698660963411, "learning_rate": 5.955793143506863e-06, "loss": 0.4825, "step": 3546 }, { "epoch": 1.7600843986595507, "grad_norm": 0.07171805789313515, "learning_rate": 5.953874417557308e-06, "loss": 0.4784, "step": 3547 }, { "epoch": 1.76058086136279, "grad_norm": 0.07651650904469937, "learning_rate": 5.951955545823342e-06, "loss": 0.5252, "step": 3548 }, { "epoch": 1.7610773240660296, "grad_norm": 0.07565542672573962, "learning_rate": 5.950036528598235e-06, "loss": 0.4815, "step": 3549 }, { "epoch": 1.7615737867692691, "grad_norm": 0.07570080552039914, "learning_rate": 5.948117366175278e-06, "loss": 0.4925, "step": 3550 }, { "epoch": 1.7620702494725085, "grad_norm": 0.07570945061559178, "learning_rate": 5.946198058847783e-06, "loss": 0.5056, "step": 3551 }, { "epoch": 1.7625667121757478, "grad_norm": 0.0709021037213143, "learning_rate": 5.944278606909086e-06, "loss": 0.5034, "step": 3552 }, { "epoch": 1.7630631748789871, "grad_norm": 0.07323276956843093, "learning_rate": 5.942359010652544e-06, "loss": 0.4951, "step": 3553 }, { "epoch": 1.7635596375822267, "grad_norm": 0.07419262441731611, "learning_rate": 5.940439270371538e-06, "loss": 0.5276, "step": 3554 }, { "epoch": 1.764056100285466, "grad_norm": 0.07025387603832438, "learning_rate": 5.938519386359466e-06, "loss": 0.4855, "step": 3555 }, { "epoch": 1.7645525629887056, "grad_norm": 0.07550630555995716, "learning_rate": 5.936599358909756e-06, "loss": 0.4906, "step": 3556 }, { "epoch": 1.7650490256919449, "grad_norm": 0.07080434868326954, "learning_rate": 5.93467918831585e-06, "loss": 0.4966, "step": 3557 }, { "epoch": 1.7655454883951842, "grad_norm": 0.07189099890804855, "learning_rate": 5.9327588748712165e-06, "loss": 0.4963, "step": 3558 }, { "epoch": 1.7660419510984238, "grad_norm": 0.07785509762182191, "learning_rate": 5.930838418869343e-06, "loss": 0.4484, "step": 3559 }, { "epoch": 1.766538413801663, "grad_norm": 0.07505577734044602, "learning_rate": 5.9289178206037456e-06, "loss": 0.4751, "step": 3560 }, { "epoch": 1.7670348765049027, "grad_norm": 0.07452023513954975, "learning_rate": 5.926997080367951e-06, "loss": 0.4901, "step": 3561 }, { "epoch": 1.767531339208142, "grad_norm": 0.07419747069466309, "learning_rate": 5.925076198455517e-06, "loss": 0.4958, "step": 3562 }, { "epoch": 1.7680278019113813, "grad_norm": 0.06962384440658652, "learning_rate": 5.923155175160018e-06, "loss": 0.4948, "step": 3563 }, { "epoch": 1.7685242646146209, "grad_norm": 0.07400393848807546, "learning_rate": 5.921234010775052e-06, "loss": 0.5317, "step": 3564 }, { "epoch": 1.7690207273178602, "grad_norm": 0.07358460565070393, "learning_rate": 5.919312705594239e-06, "loss": 0.5093, "step": 3565 }, { "epoch": 1.7695171900210998, "grad_norm": 0.07520962763925339, "learning_rate": 5.917391259911219e-06, "loss": 0.5008, "step": 3566 }, { "epoch": 1.770013652724339, "grad_norm": 0.07202658913675364, "learning_rate": 5.915469674019654e-06, "loss": 0.5081, "step": 3567 }, { "epoch": 1.7705101154275784, "grad_norm": 0.07185311565574021, "learning_rate": 5.913547948213227e-06, "loss": 0.4871, "step": 3568 }, { "epoch": 1.771006578130818, "grad_norm": 0.0735637908156275, "learning_rate": 5.911626082785644e-06, "loss": 0.4835, "step": 3569 }, { "epoch": 1.7715030408340573, "grad_norm": 0.0735907052019009, "learning_rate": 5.909704078030631e-06, "loss": 0.4879, "step": 3570 }, { "epoch": 1.7719995035372968, "grad_norm": 0.07008428137817518, "learning_rate": 5.907781934241937e-06, "loss": 0.5005, "step": 3571 }, { "epoch": 1.7724959662405362, "grad_norm": 0.07729615402227608, "learning_rate": 5.905859651713328e-06, "loss": 0.4932, "step": 3572 }, { "epoch": 1.7729924289437755, "grad_norm": 0.06997736005764132, "learning_rate": 5.903937230738597e-06, "loss": 0.4608, "step": 3573 }, { "epoch": 1.773488891647015, "grad_norm": 0.0778643616820918, "learning_rate": 5.902014671611553e-06, "loss": 0.5364, "step": 3574 }, { "epoch": 1.7739853543502544, "grad_norm": 0.07699448930722969, "learning_rate": 5.900091974626028e-06, "loss": 0.54, "step": 3575 }, { "epoch": 1.774481817053494, "grad_norm": 0.07271609012983173, "learning_rate": 5.898169140075878e-06, "loss": 0.4869, "step": 3576 }, { "epoch": 1.7749782797567333, "grad_norm": 0.07564281998394239, "learning_rate": 5.896246168254976e-06, "loss": 0.5295, "step": 3577 }, { "epoch": 1.7754747424599726, "grad_norm": 0.07263423530612817, "learning_rate": 5.894323059457218e-06, "loss": 0.458, "step": 3578 }, { "epoch": 1.7759712051632122, "grad_norm": 0.07281173591445123, "learning_rate": 5.892399813976518e-06, "loss": 0.4815, "step": 3579 }, { "epoch": 1.7764676678664515, "grad_norm": 0.07438654268139841, "learning_rate": 5.890476432106815e-06, "loss": 0.4532, "step": 3580 }, { "epoch": 1.776964130569691, "grad_norm": 0.07132895431017902, "learning_rate": 5.8885529141420685e-06, "loss": 0.4681, "step": 3581 }, { "epoch": 1.7774605932729304, "grad_norm": 0.07337836587851508, "learning_rate": 5.886629260376254e-06, "loss": 0.5104, "step": 3582 }, { "epoch": 1.7779570559761697, "grad_norm": 0.06888317038226811, "learning_rate": 5.884705471103376e-06, "loss": 0.5026, "step": 3583 }, { "epoch": 1.7784535186794093, "grad_norm": 0.0704125385766139, "learning_rate": 5.882781546617451e-06, "loss": 0.5052, "step": 3584 }, { "epoch": 1.7789499813826486, "grad_norm": 0.07913813709508663, "learning_rate": 5.880857487212519e-06, "loss": 0.4758, "step": 3585 }, { "epoch": 1.7794464440858881, "grad_norm": 0.07299620364036855, "learning_rate": 5.878933293182645e-06, "loss": 0.4898, "step": 3586 }, { "epoch": 1.7799429067891275, "grad_norm": 0.07113118681039178, "learning_rate": 5.877008964821909e-06, "loss": 0.4707, "step": 3587 }, { "epoch": 1.7804393694923668, "grad_norm": 0.07609742154417785, "learning_rate": 5.875084502424414e-06, "loss": 0.5312, "step": 3588 }, { "epoch": 1.7809358321956061, "grad_norm": 0.07232100575504447, "learning_rate": 5.873159906284286e-06, "loss": 0.4906, "step": 3589 }, { "epoch": 1.7814322948988457, "grad_norm": 0.07262820041435715, "learning_rate": 5.871235176695664e-06, "loss": 0.4596, "step": 3590 }, { "epoch": 1.7819287576020852, "grad_norm": 0.07247576171994086, "learning_rate": 5.869310313952717e-06, "loss": 0.4768, "step": 3591 }, { "epoch": 1.7824252203053246, "grad_norm": 0.07203298175946792, "learning_rate": 5.867385318349623e-06, "loss": 0.477, "step": 3592 }, { "epoch": 1.782921683008564, "grad_norm": 0.07392881293705612, "learning_rate": 5.865460190180594e-06, "loss": 0.5153, "step": 3593 }, { "epoch": 1.7834181457118032, "grad_norm": 0.0756318218212652, "learning_rate": 5.863534929739852e-06, "loss": 0.4821, "step": 3594 }, { "epoch": 1.7839146084150428, "grad_norm": 0.0747502853802229, "learning_rate": 5.86160953732164e-06, "loss": 0.4766, "step": 3595 }, { "epoch": 1.7844110711182823, "grad_norm": 0.07297460569575788, "learning_rate": 5.859684013220225e-06, "loss": 0.4807, "step": 3596 }, { "epoch": 1.7849075338215217, "grad_norm": 0.07155856829311029, "learning_rate": 5.857758357729892e-06, "loss": 0.4797, "step": 3597 }, { "epoch": 1.785403996524761, "grad_norm": 0.07177962375247923, "learning_rate": 5.855832571144947e-06, "loss": 0.5285, "step": 3598 }, { "epoch": 1.7859004592280003, "grad_norm": 0.0718258720492854, "learning_rate": 5.853906653759718e-06, "loss": 0.5025, "step": 3599 }, { "epoch": 1.78639692193124, "grad_norm": 0.07145170610796107, "learning_rate": 5.851980605868547e-06, "loss": 0.5119, "step": 3600 }, { "epoch": 1.7868933846344794, "grad_norm": 0.0711317878064843, "learning_rate": 5.850054427765801e-06, "loss": 0.4873, "step": 3601 }, { "epoch": 1.7873898473377188, "grad_norm": 0.07248680936032043, "learning_rate": 5.848128119745865e-06, "loss": 0.4688, "step": 3602 }, { "epoch": 1.787886310040958, "grad_norm": 0.07010165537492177, "learning_rate": 5.846201682103144e-06, "loss": 0.4681, "step": 3603 }, { "epoch": 1.7883827727441974, "grad_norm": 0.0758110653402738, "learning_rate": 5.844275115132064e-06, "loss": 0.4998, "step": 3604 }, { "epoch": 1.788879235447437, "grad_norm": 0.07165309101536645, "learning_rate": 5.8423484191270705e-06, "loss": 0.4885, "step": 3605 }, { "epoch": 1.7893756981506765, "grad_norm": 0.0684537758607228, "learning_rate": 5.840421594382627e-06, "loss": 0.4421, "step": 3606 }, { "epoch": 1.7898721608539159, "grad_norm": 0.07043974964585102, "learning_rate": 5.838494641193217e-06, "loss": 0.4978, "step": 3607 }, { "epoch": 1.7903686235571552, "grad_norm": 0.07343477895002559, "learning_rate": 5.836567559853346e-06, "loss": 0.506, "step": 3608 }, { "epoch": 1.7908650862603945, "grad_norm": 0.07131137027745556, "learning_rate": 5.834640350657538e-06, "loss": 0.5276, "step": 3609 }, { "epoch": 1.791361548963634, "grad_norm": 0.06975660850835906, "learning_rate": 5.832713013900333e-06, "loss": 0.487, "step": 3610 }, { "epoch": 1.7918580116668736, "grad_norm": 0.07013597758903344, "learning_rate": 5.830785549876296e-06, "loss": 0.4871, "step": 3611 }, { "epoch": 1.792354474370113, "grad_norm": 0.07033271915719036, "learning_rate": 5.828857958880008e-06, "loss": 0.5121, "step": 3612 }, { "epoch": 1.7928509370733523, "grad_norm": 0.07292135205875179, "learning_rate": 5.826930241206071e-06, "loss": 0.4784, "step": 3613 }, { "epoch": 1.7933473997765916, "grad_norm": 0.07427049941543128, "learning_rate": 5.825002397149105e-06, "loss": 0.5119, "step": 3614 }, { "epoch": 1.7938438624798312, "grad_norm": 0.07167706019268992, "learning_rate": 5.823074427003752e-06, "loss": 0.5509, "step": 3615 }, { "epoch": 1.7943403251830707, "grad_norm": 0.07024605078269222, "learning_rate": 5.821146331064669e-06, "loss": 0.4837, "step": 3616 }, { "epoch": 1.79483678788631, "grad_norm": 0.07174070828931835, "learning_rate": 5.8192181096265355e-06, "loss": 0.4731, "step": 3617 }, { "epoch": 1.7953332505895494, "grad_norm": 0.07256450684278515, "learning_rate": 5.817289762984048e-06, "loss": 0.5094, "step": 3618 }, { "epoch": 1.7958297132927887, "grad_norm": 0.07124042333943453, "learning_rate": 5.8153612914319255e-06, "loss": 0.479, "step": 3619 }, { "epoch": 1.7963261759960283, "grad_norm": 0.07289082282917042, "learning_rate": 5.813432695264903e-06, "loss": 0.511, "step": 3620 }, { "epoch": 1.7968226386992678, "grad_norm": 0.0685871218786059, "learning_rate": 5.811503974777736e-06, "loss": 0.4704, "step": 3621 }, { "epoch": 1.7973191014025072, "grad_norm": 0.07621899432181643, "learning_rate": 5.809575130265196e-06, "loss": 0.5003, "step": 3622 }, { "epoch": 1.7978155641057465, "grad_norm": 0.07243606226840418, "learning_rate": 5.807646162022078e-06, "loss": 0.4806, "step": 3623 }, { "epoch": 1.7983120268089858, "grad_norm": 0.07334689387257744, "learning_rate": 5.805717070343195e-06, "loss": 0.502, "step": 3624 }, { "epoch": 1.7988084895122254, "grad_norm": 0.07236811202655051, "learning_rate": 5.803787855523377e-06, "loss": 0.5212, "step": 3625 }, { "epoch": 1.799304952215465, "grad_norm": 0.07021878117951692, "learning_rate": 5.8018585178574714e-06, "loss": 0.4777, "step": 3626 }, { "epoch": 1.7998014149187043, "grad_norm": 0.07026592126708119, "learning_rate": 5.79992905764035e-06, "loss": 0.5112, "step": 3627 }, { "epoch": 1.8002978776219436, "grad_norm": 0.07219620537052175, "learning_rate": 5.797999475166897e-06, "loss": 0.5062, "step": 3628 }, { "epoch": 1.800794340325183, "grad_norm": 0.07311395206059088, "learning_rate": 5.796069770732019e-06, "loss": 0.4729, "step": 3629 }, { "epoch": 1.8012908030284225, "grad_norm": 0.0728361404160052, "learning_rate": 5.79413994463064e-06, "loss": 0.4749, "step": 3630 }, { "epoch": 1.801787265731662, "grad_norm": 0.07109436647193143, "learning_rate": 5.792209997157705e-06, "loss": 0.4888, "step": 3631 }, { "epoch": 1.8022837284349014, "grad_norm": 0.07019275246724026, "learning_rate": 5.790279928608173e-06, "loss": 0.5022, "step": 3632 }, { "epoch": 1.8027801911381407, "grad_norm": 0.07247375032569164, "learning_rate": 5.788349739277026e-06, "loss": 0.5117, "step": 3633 }, { "epoch": 1.80327665384138, "grad_norm": 0.0699804846033475, "learning_rate": 5.7864194294592615e-06, "loss": 0.491, "step": 3634 }, { "epoch": 1.8037731165446196, "grad_norm": 0.06993980001240863, "learning_rate": 5.7844889994498955e-06, "loss": 0.4906, "step": 3635 }, { "epoch": 1.8042695792478591, "grad_norm": 0.07775193298527062, "learning_rate": 5.782558449543964e-06, "loss": 0.5035, "step": 3636 }, { "epoch": 1.8047660419510985, "grad_norm": 0.0754723195092835, "learning_rate": 5.780627780036523e-06, "loss": 0.5056, "step": 3637 }, { "epoch": 1.8052625046543378, "grad_norm": 0.08173656319877305, "learning_rate": 5.77869699122264e-06, "loss": 0.476, "step": 3638 }, { "epoch": 1.8057589673575771, "grad_norm": 0.07211057900448045, "learning_rate": 5.776766083397409e-06, "loss": 0.5001, "step": 3639 }, { "epoch": 1.8062554300608167, "grad_norm": 0.07357175060489121, "learning_rate": 5.774835056855934e-06, "loss": 0.4846, "step": 3640 }, { "epoch": 1.8067518927640562, "grad_norm": 0.07392807260783969, "learning_rate": 5.7729039118933476e-06, "loss": 0.5086, "step": 3641 }, { "epoch": 1.8072483554672956, "grad_norm": 0.0725775134465148, "learning_rate": 5.770972648804789e-06, "loss": 0.5274, "step": 3642 }, { "epoch": 1.807744818170535, "grad_norm": 0.07163353524333979, "learning_rate": 5.769041267885424e-06, "loss": 0.4717, "step": 3643 }, { "epoch": 1.8082412808737742, "grad_norm": 0.07105769685417251, "learning_rate": 5.767109769430429e-06, "loss": 0.4874, "step": 3644 }, { "epoch": 1.8087377435770138, "grad_norm": 0.06975242191548883, "learning_rate": 5.765178153735007e-06, "loss": 0.4722, "step": 3645 }, { "epoch": 1.8092342062802533, "grad_norm": 0.07347601381891362, "learning_rate": 5.763246421094373e-06, "loss": 0.496, "step": 3646 }, { "epoch": 1.8097306689834927, "grad_norm": 0.07590220268232373, "learning_rate": 5.761314571803761e-06, "loss": 0.5011, "step": 3647 }, { "epoch": 1.810227131686732, "grad_norm": 0.07242839670144252, "learning_rate": 5.759382606158423e-06, "loss": 0.478, "step": 3648 }, { "epoch": 1.8107235943899713, "grad_norm": 0.07593439548121685, "learning_rate": 5.757450524453632e-06, "loss": 0.497, "step": 3649 }, { "epoch": 1.8112200570932109, "grad_norm": 0.07167454149194932, "learning_rate": 5.755518326984671e-06, "loss": 0.4845, "step": 3650 }, { "epoch": 1.8117165197964504, "grad_norm": 0.07071788965467532, "learning_rate": 5.753586014046847e-06, "loss": 0.4751, "step": 3651 }, { "epoch": 1.8122129824996898, "grad_norm": 0.07152437833377714, "learning_rate": 5.7516535859354835e-06, "loss": 0.515, "step": 3652 }, { "epoch": 1.812709445202929, "grad_norm": 0.07164234780108387, "learning_rate": 5.749721042945924e-06, "loss": 0.4768, "step": 3653 }, { "epoch": 1.8132059079061684, "grad_norm": 0.06924108464675693, "learning_rate": 5.747788385373522e-06, "loss": 0.4778, "step": 3654 }, { "epoch": 1.813702370609408, "grad_norm": 0.07367629715089329, "learning_rate": 5.7458556135136545e-06, "loss": 0.4942, "step": 3655 }, { "epoch": 1.8141988333126475, "grad_norm": 0.07021653950442595, "learning_rate": 5.743922727661716e-06, "loss": 0.5148, "step": 3656 }, { "epoch": 1.8146952960158869, "grad_norm": 0.07417466927168129, "learning_rate": 5.7419897281131164e-06, "loss": 0.5172, "step": 3657 }, { "epoch": 1.8151917587191262, "grad_norm": 0.0678717013795942, "learning_rate": 5.740056615163284e-06, "loss": 0.4566, "step": 3658 }, { "epoch": 1.8156882214223655, "grad_norm": 0.06873133916950154, "learning_rate": 5.738123389107665e-06, "loss": 0.4481, "step": 3659 }, { "epoch": 1.816184684125605, "grad_norm": 0.07135619019377491, "learning_rate": 5.736190050241719e-06, "loss": 0.4767, "step": 3660 }, { "epoch": 1.8166811468288446, "grad_norm": 0.0728194684169538, "learning_rate": 5.7342565988609275e-06, "loss": 0.4569, "step": 3661 }, { "epoch": 1.817177609532084, "grad_norm": 0.07250024973531223, "learning_rate": 5.732323035260789e-06, "loss": 0.4853, "step": 3662 }, { "epoch": 1.8176740722353233, "grad_norm": 0.06919852772378363, "learning_rate": 5.730389359736816e-06, "loss": 0.493, "step": 3663 }, { "epoch": 1.8181705349385626, "grad_norm": 0.07065617648477643, "learning_rate": 5.7284555725845405e-06, "loss": 0.4756, "step": 3664 }, { "epoch": 1.8186669976418022, "grad_norm": 0.07232159318569668, "learning_rate": 5.726521674099511e-06, "loss": 0.5141, "step": 3665 }, { "epoch": 1.8191634603450417, "grad_norm": 0.07221720276118149, "learning_rate": 5.724587664577292e-06, "loss": 0.4652, "step": 3666 }, { "epoch": 1.819659923048281, "grad_norm": 0.07053346570081506, "learning_rate": 5.722653544313467e-06, "loss": 0.5112, "step": 3667 }, { "epoch": 1.8201563857515204, "grad_norm": 0.07023432041629968, "learning_rate": 5.720719313603633e-06, "loss": 0.4875, "step": 3668 }, { "epoch": 1.8206528484547597, "grad_norm": 0.07095615034595218, "learning_rate": 5.71878497274341e-06, "loss": 0.4999, "step": 3669 }, { "epoch": 1.8211493111579993, "grad_norm": 0.07274255009602926, "learning_rate": 5.7168505220284266e-06, "loss": 0.507, "step": 3670 }, { "epoch": 1.8216457738612388, "grad_norm": 0.07247280332754438, "learning_rate": 5.714915961754335e-06, "loss": 0.4726, "step": 3671 }, { "epoch": 1.8221422365644782, "grad_norm": 0.06954349998822817, "learning_rate": 5.712981292216803e-06, "loss": 0.4388, "step": 3672 }, { "epoch": 1.8226386992677175, "grad_norm": 0.07008709511894415, "learning_rate": 5.711046513711512e-06, "loss": 0.4896, "step": 3673 }, { "epoch": 1.8231351619709568, "grad_norm": 0.07146176222719076, "learning_rate": 5.709111626534161e-06, "loss": 0.471, "step": 3674 }, { "epoch": 1.8236316246741964, "grad_norm": 0.06966762407120558, "learning_rate": 5.707176630980469e-06, "loss": 0.4756, "step": 3675 }, { "epoch": 1.824128087377436, "grad_norm": 0.06975242002832147, "learning_rate": 5.705241527346166e-06, "loss": 0.4964, "step": 3676 }, { "epoch": 1.8246245500806753, "grad_norm": 0.07114687927174249, "learning_rate": 5.703306315927004e-06, "loss": 0.4552, "step": 3677 }, { "epoch": 1.8251210127839146, "grad_norm": 0.07157645828419164, "learning_rate": 5.701370997018748e-06, "loss": 0.5142, "step": 3678 }, { "epoch": 1.825617475487154, "grad_norm": 0.06880701494843049, "learning_rate": 5.69943557091718e-06, "loss": 0.4483, "step": 3679 }, { "epoch": 1.8261139381903935, "grad_norm": 0.07089399642352692, "learning_rate": 5.6975000379181025e-06, "loss": 0.4823, "step": 3680 }, { "epoch": 1.826610400893633, "grad_norm": 0.07140090532891165, "learning_rate": 5.695564398317326e-06, "loss": 0.4902, "step": 3681 }, { "epoch": 1.8271068635968724, "grad_norm": 0.07649533474683105, "learning_rate": 5.693628652410683e-06, "loss": 0.49, "step": 3682 }, { "epoch": 1.8276033263001117, "grad_norm": 0.07226480459106793, "learning_rate": 5.691692800494023e-06, "loss": 0.5069, "step": 3683 }, { "epoch": 1.828099789003351, "grad_norm": 0.07464166753150865, "learning_rate": 5.689756842863208e-06, "loss": 0.4629, "step": 3684 }, { "epoch": 1.8285962517065906, "grad_norm": 0.07013676743303268, "learning_rate": 5.687820779814119e-06, "loss": 0.4533, "step": 3685 }, { "epoch": 1.8290927144098301, "grad_norm": 0.07186995524898125, "learning_rate": 5.6858846116426535e-06, "loss": 0.4833, "step": 3686 }, { "epoch": 1.8295891771130695, "grad_norm": 0.07153345606512074, "learning_rate": 5.683948338644721e-06, "loss": 0.4836, "step": 3687 }, { "epoch": 1.8300856398163088, "grad_norm": 0.07438658965655393, "learning_rate": 5.6820119611162515e-06, "loss": 0.511, "step": 3688 }, { "epoch": 1.8305821025195481, "grad_norm": 0.07184279444597692, "learning_rate": 5.68007547935319e-06, "loss": 0.4793, "step": 3689 }, { "epoch": 1.8310785652227877, "grad_norm": 0.0723669994788689, "learning_rate": 5.678138893651495e-06, "loss": 0.5094, "step": 3690 }, { "epoch": 1.8315750279260272, "grad_norm": 0.07299871934038199, "learning_rate": 5.676202204307144e-06, "loss": 0.4871, "step": 3691 }, { "epoch": 1.8320714906292666, "grad_norm": 0.06980833513308857, "learning_rate": 5.674265411616127e-06, "loss": 0.4917, "step": 3692 }, { "epoch": 1.8325679533325059, "grad_norm": 0.07292298451496881, "learning_rate": 5.672328515874452e-06, "loss": 0.4529, "step": 3693 }, { "epoch": 1.8330644160357452, "grad_norm": 0.07138317703404169, "learning_rate": 5.670391517378145e-06, "loss": 0.5169, "step": 3694 }, { "epoch": 1.8335608787389848, "grad_norm": 0.07162547405202699, "learning_rate": 5.668454416423243e-06, "loss": 0.4893, "step": 3695 }, { "epoch": 1.834057341442224, "grad_norm": 0.07316600393252785, "learning_rate": 5.666517213305802e-06, "loss": 0.505, "step": 3696 }, { "epoch": 1.8345538041454637, "grad_norm": 0.06988135104934977, "learning_rate": 5.6645799083218915e-06, "loss": 0.5128, "step": 3697 }, { "epoch": 1.835050266848703, "grad_norm": 0.07303979907617231, "learning_rate": 5.662642501767597e-06, "loss": 0.4678, "step": 3698 }, { "epoch": 1.8355467295519423, "grad_norm": 0.075149991058931, "learning_rate": 5.66070499393902e-06, "loss": 0.4879, "step": 3699 }, { "epoch": 1.8360431922551819, "grad_norm": 0.07133476017601827, "learning_rate": 5.65876738513228e-06, "loss": 0.4976, "step": 3700 }, { "epoch": 1.8365396549584212, "grad_norm": 0.07100746394861222, "learning_rate": 5.656829675643506e-06, "loss": 0.481, "step": 3701 }, { "epoch": 1.8370361176616608, "grad_norm": 0.06893065620889177, "learning_rate": 5.65489186576885e-06, "loss": 0.4677, "step": 3702 }, { "epoch": 1.8375325803649, "grad_norm": 0.07043388719856646, "learning_rate": 5.652953955804471e-06, "loss": 0.486, "step": 3703 }, { "epoch": 1.8380290430681394, "grad_norm": 0.07186042040863846, "learning_rate": 5.6510159460465485e-06, "loss": 0.5171, "step": 3704 }, { "epoch": 1.838525505771379, "grad_norm": 0.06942013042023604, "learning_rate": 5.649077836791279e-06, "loss": 0.4661, "step": 3705 }, { "epoch": 1.8390219684746183, "grad_norm": 0.07320920107605887, "learning_rate": 5.6471396283348676e-06, "loss": 0.5008, "step": 3706 }, { "epoch": 1.8395184311778578, "grad_norm": 0.06922693618002337, "learning_rate": 5.645201320973541e-06, "loss": 0.475, "step": 3707 }, { "epoch": 1.8400148938810972, "grad_norm": 0.07336728917335142, "learning_rate": 5.643262915003538e-06, "loss": 0.5215, "step": 3708 }, { "epoch": 1.8405113565843365, "grad_norm": 0.0716664034498282, "learning_rate": 5.64132441072111e-06, "loss": 0.5243, "step": 3709 }, { "epoch": 1.841007819287576, "grad_norm": 0.07202044980774482, "learning_rate": 5.6393858084225305e-06, "loss": 0.4886, "step": 3710 }, { "epoch": 1.8415042819908154, "grad_norm": 0.07266065385472024, "learning_rate": 5.637447108404082e-06, "loss": 0.4862, "step": 3711 }, { "epoch": 1.842000744694055, "grad_norm": 0.0706724033050316, "learning_rate": 5.635508310962064e-06, "loss": 0.483, "step": 3712 }, { "epoch": 1.8424972073972943, "grad_norm": 0.07233950101249319, "learning_rate": 5.63356941639279e-06, "loss": 0.4836, "step": 3713 }, { "epoch": 1.8429936701005336, "grad_norm": 0.07601798497496463, "learning_rate": 5.631630424992588e-06, "loss": 0.5224, "step": 3714 }, { "epoch": 1.8434901328037732, "grad_norm": 0.07146589515735953, "learning_rate": 5.629691337057803e-06, "loss": 0.5011, "step": 3715 }, { "epoch": 1.8439865955070125, "grad_norm": 0.0700451438479701, "learning_rate": 5.627752152884794e-06, "loss": 0.4834, "step": 3716 }, { "epoch": 1.844483058210252, "grad_norm": 0.06743704292605564, "learning_rate": 5.625812872769935e-06, "loss": 0.4562, "step": 3717 }, { "epoch": 1.8449795209134914, "grad_norm": 0.06997430470081448, "learning_rate": 5.623873497009612e-06, "loss": 0.5186, "step": 3718 }, { "epoch": 1.8454759836167307, "grad_norm": 0.07339075549752148, "learning_rate": 5.621934025900226e-06, "loss": 0.4918, "step": 3719 }, { "epoch": 1.8459724463199703, "grad_norm": 0.0710906084904975, "learning_rate": 5.619994459738198e-06, "loss": 0.4829, "step": 3720 }, { "epoch": 1.8464689090232096, "grad_norm": 0.07016695505135577, "learning_rate": 5.6180547988199586e-06, "loss": 0.4903, "step": 3721 }, { "epoch": 1.8469653717264491, "grad_norm": 0.07234070412472808, "learning_rate": 5.616115043441951e-06, "loss": 0.478, "step": 3722 }, { "epoch": 1.8474618344296885, "grad_norm": 0.07055140091964066, "learning_rate": 5.614175193900639e-06, "loss": 0.4875, "step": 3723 }, { "epoch": 1.8479582971329278, "grad_norm": 0.0765106043318054, "learning_rate": 5.612235250492495e-06, "loss": 0.4839, "step": 3724 }, { "epoch": 1.8484547598361674, "grad_norm": 0.07073929439268867, "learning_rate": 5.61029521351401e-06, "loss": 0.4836, "step": 3725 }, { "epoch": 1.8489512225394067, "grad_norm": 0.07189512081581409, "learning_rate": 5.608355083261686e-06, "loss": 0.4724, "step": 3726 }, { "epoch": 1.8494476852426462, "grad_norm": 0.07079012203165752, "learning_rate": 5.606414860032042e-06, "loss": 0.512, "step": 3727 }, { "epoch": 1.8499441479458856, "grad_norm": 0.07272943202601717, "learning_rate": 5.604474544121612e-06, "loss": 0.4789, "step": 3728 }, { "epoch": 1.850440610649125, "grad_norm": 0.06777523021078664, "learning_rate": 5.602534135826939e-06, "loss": 0.469, "step": 3729 }, { "epoch": 1.8509370733523642, "grad_norm": 0.073353001049987, "learning_rate": 5.600593635444583e-06, "loss": 0.4993, "step": 3730 }, { "epoch": 1.8514335360556038, "grad_norm": 0.07248977863476927, "learning_rate": 5.5986530432711195e-06, "loss": 0.4717, "step": 3731 }, { "epoch": 1.8519299987588433, "grad_norm": 0.07234349061832071, "learning_rate": 5.596712359603138e-06, "loss": 0.4963, "step": 3732 }, { "epoch": 1.8524264614620827, "grad_norm": 0.06920179818135419, "learning_rate": 5.5947715847372385e-06, "loss": 0.4582, "step": 3733 }, { "epoch": 1.852922924165322, "grad_norm": 0.07187699072861928, "learning_rate": 5.5928307189700415e-06, "loss": 0.4571, "step": 3734 }, { "epoch": 1.8534193868685613, "grad_norm": 0.07080879521086189, "learning_rate": 5.590889762598171e-06, "loss": 0.5281, "step": 3735 }, { "epoch": 1.853915849571801, "grad_norm": 0.07195862744667644, "learning_rate": 5.588948715918277e-06, "loss": 0.527, "step": 3736 }, { "epoch": 1.8544123122750404, "grad_norm": 0.07215211385422904, "learning_rate": 5.587007579227014e-06, "loss": 0.5411, "step": 3737 }, { "epoch": 1.8549087749782798, "grad_norm": 0.07117465488308353, "learning_rate": 5.5850663528210545e-06, "loss": 0.4886, "step": 3738 }, { "epoch": 1.855405237681519, "grad_norm": 0.07165570397884258, "learning_rate": 5.583125036997084e-06, "loss": 0.4918, "step": 3739 }, { "epoch": 1.8559017003847584, "grad_norm": 0.06806064422601031, "learning_rate": 5.581183632051801e-06, "loss": 0.4638, "step": 3740 }, { "epoch": 1.856398163087998, "grad_norm": 0.06938300711407791, "learning_rate": 5.579242138281918e-06, "loss": 0.488, "step": 3741 }, { "epoch": 1.8568946257912375, "grad_norm": 0.07263388401977534, "learning_rate": 5.577300555984162e-06, "loss": 0.5041, "step": 3742 }, { "epoch": 1.8573910884944769, "grad_norm": 0.07291712802999431, "learning_rate": 5.5753588854552724e-06, "loss": 0.4973, "step": 3743 }, { "epoch": 1.8578875511977162, "grad_norm": 0.06984653551222911, "learning_rate": 5.573417126992004e-06, "loss": 0.4702, "step": 3744 }, { "epoch": 1.8583840139009555, "grad_norm": 0.07593350841767885, "learning_rate": 5.57147528089112e-06, "loss": 0.4966, "step": 3745 }, { "epoch": 1.858880476604195, "grad_norm": 0.07160799166246536, "learning_rate": 5.5695333474494015e-06, "loss": 0.4811, "step": 3746 }, { "epoch": 1.8593769393074346, "grad_norm": 0.06904328199683979, "learning_rate": 5.567591326963644e-06, "loss": 0.4753, "step": 3747 }, { "epoch": 1.859873402010674, "grad_norm": 0.06929606497626255, "learning_rate": 5.565649219730651e-06, "loss": 0.5292, "step": 3748 }, { "epoch": 1.8603698647139133, "grad_norm": 0.07444165806160696, "learning_rate": 5.563707026047246e-06, "loss": 0.4787, "step": 3749 }, { "epoch": 1.8608663274171526, "grad_norm": 0.06887681183296859, "learning_rate": 5.561764746210261e-06, "loss": 0.4982, "step": 3750 }, { "epoch": 1.8613627901203922, "grad_norm": 0.07082675162940927, "learning_rate": 5.559822380516539e-06, "loss": 0.4702, "step": 3751 }, { "epoch": 1.8618592528236317, "grad_norm": 0.07144978926454551, "learning_rate": 5.5578799292629446e-06, "loss": 0.501, "step": 3752 }, { "epoch": 1.862355715526871, "grad_norm": 0.07049843632887456, "learning_rate": 5.5559373927463476e-06, "loss": 0.5032, "step": 3753 }, { "epoch": 1.8628521782301104, "grad_norm": 0.07256530112167905, "learning_rate": 5.553994771263633e-06, "loss": 0.4928, "step": 3754 }, { "epoch": 1.8633486409333497, "grad_norm": 0.07088436352644081, "learning_rate": 5.5520520651117014e-06, "loss": 0.4669, "step": 3755 }, { "epoch": 1.8638451036365893, "grad_norm": 0.07330689752559463, "learning_rate": 5.550109274587463e-06, "loss": 0.4836, "step": 3756 }, { "epoch": 1.8643415663398288, "grad_norm": 0.07300119259335101, "learning_rate": 5.548166399987842e-06, "loss": 0.5022, "step": 3757 }, { "epoch": 1.8648380290430682, "grad_norm": 0.07139240282413961, "learning_rate": 5.546223441609775e-06, "loss": 0.45, "step": 3758 }, { "epoch": 1.8653344917463075, "grad_norm": 0.07306394150942447, "learning_rate": 5.544280399750214e-06, "loss": 0.4938, "step": 3759 }, { "epoch": 1.8658309544495468, "grad_norm": 0.07654037658961671, "learning_rate": 5.54233727470612e-06, "loss": 0.4927, "step": 3760 }, { "epoch": 1.8663274171527864, "grad_norm": 0.0723075236426715, "learning_rate": 5.540394066774471e-06, "loss": 0.4759, "step": 3761 }, { "epoch": 1.866823879856026, "grad_norm": 0.07004287361289634, "learning_rate": 5.538450776252252e-06, "loss": 0.4718, "step": 3762 }, { "epoch": 1.8673203425592653, "grad_norm": 0.07222653031062394, "learning_rate": 5.536507403436465e-06, "loss": 0.4792, "step": 3763 }, { "epoch": 1.8678168052625046, "grad_norm": 0.07278259090790781, "learning_rate": 5.534563948624124e-06, "loss": 0.4775, "step": 3764 }, { "epoch": 1.868313267965744, "grad_norm": 0.07010260861928222, "learning_rate": 5.532620412112255e-06, "loss": 0.483, "step": 3765 }, { "epoch": 1.8688097306689835, "grad_norm": 0.07351482506516754, "learning_rate": 5.530676794197895e-06, "loss": 0.4741, "step": 3766 }, { "epoch": 1.869306193372223, "grad_norm": 0.07191108493092994, "learning_rate": 5.528733095178097e-06, "loss": 0.4639, "step": 3767 }, { "epoch": 1.8698026560754624, "grad_norm": 0.07046867812955843, "learning_rate": 5.526789315349922e-06, "loss": 0.474, "step": 3768 }, { "epoch": 1.8702991187787017, "grad_norm": 0.07249463782600657, "learning_rate": 5.524845455010448e-06, "loss": 0.5159, "step": 3769 }, { "epoch": 1.870795581481941, "grad_norm": 0.07291403318196778, "learning_rate": 5.52290151445676e-06, "loss": 0.4721, "step": 3770 }, { "epoch": 1.8712920441851806, "grad_norm": 0.07363004384944376, "learning_rate": 5.52095749398596e-06, "loss": 0.5124, "step": 3771 }, { "epoch": 1.8717885068884201, "grad_norm": 0.07402791452077123, "learning_rate": 5.51901339389516e-06, "loss": 0.4947, "step": 3772 }, { "epoch": 1.8722849695916595, "grad_norm": 0.07386850215254852, "learning_rate": 5.5170692144814844e-06, "loss": 0.5007, "step": 3773 }, { "epoch": 1.8727814322948988, "grad_norm": 0.07282838700658707, "learning_rate": 5.51512495604207e-06, "loss": 0.4936, "step": 3774 }, { "epoch": 1.8732778949981381, "grad_norm": 0.070148810597566, "learning_rate": 5.513180618874066e-06, "loss": 0.4793, "step": 3775 }, { "epoch": 1.8737743577013777, "grad_norm": 0.07261217534361884, "learning_rate": 5.51123620327463e-06, "loss": 0.4862, "step": 3776 }, { "epoch": 1.8742708204046172, "grad_norm": 0.06946317885723177, "learning_rate": 5.509291709540942e-06, "loss": 0.497, "step": 3777 }, { "epoch": 1.8747672831078566, "grad_norm": 0.06961836136385555, "learning_rate": 5.50734713797018e-06, "loss": 0.4604, "step": 3778 }, { "epoch": 1.875263745811096, "grad_norm": 0.07178654847839235, "learning_rate": 5.5054024888595415e-06, "loss": 0.4991, "step": 3779 }, { "epoch": 1.8757602085143352, "grad_norm": 0.07408092919562587, "learning_rate": 5.503457762506236e-06, "loss": 0.5154, "step": 3780 }, { "epoch": 1.8762566712175748, "grad_norm": 0.07588664418284227, "learning_rate": 5.501512959207485e-06, "loss": 0.5019, "step": 3781 }, { "epoch": 1.8767531339208143, "grad_norm": 0.07277845131973065, "learning_rate": 5.499568079260519e-06, "loss": 0.5079, "step": 3782 }, { "epoch": 1.8772495966240537, "grad_norm": 0.0754058108393957, "learning_rate": 5.497623122962582e-06, "loss": 0.5014, "step": 3783 }, { "epoch": 1.877746059327293, "grad_norm": 0.07288657771449725, "learning_rate": 5.495678090610929e-06, "loss": 0.4912, "step": 3784 }, { "epoch": 1.8782425220305323, "grad_norm": 0.07547192333018429, "learning_rate": 5.493732982502828e-06, "loss": 0.5102, "step": 3785 }, { "epoch": 1.8787389847337719, "grad_norm": 0.07101553044945602, "learning_rate": 5.491787798935557e-06, "loss": 0.516, "step": 3786 }, { "epoch": 1.8792354474370114, "grad_norm": 0.0755631628504027, "learning_rate": 5.489842540206406e-06, "loss": 0.5123, "step": 3787 }, { "epoch": 1.8797319101402508, "grad_norm": 0.06839328149157925, "learning_rate": 5.487897206612678e-06, "loss": 0.4766, "step": 3788 }, { "epoch": 1.88022837284349, "grad_norm": 0.07225884897896602, "learning_rate": 5.485951798451683e-06, "loss": 0.4921, "step": 3789 }, { "epoch": 1.8807248355467294, "grad_norm": 0.07247762275978963, "learning_rate": 5.484006316020747e-06, "loss": 0.4911, "step": 3790 }, { "epoch": 1.881221298249969, "grad_norm": 0.07250223736563732, "learning_rate": 5.482060759617207e-06, "loss": 0.4837, "step": 3791 }, { "epoch": 1.8817177609532085, "grad_norm": 0.07185564268743984, "learning_rate": 5.480115129538409e-06, "loss": 0.5248, "step": 3792 }, { "epoch": 1.8822142236564479, "grad_norm": 0.0713794358085212, "learning_rate": 5.478169426081712e-06, "loss": 0.4907, "step": 3793 }, { "epoch": 1.8827106863596872, "grad_norm": 0.06919626880513847, "learning_rate": 5.476223649544485e-06, "loss": 0.4611, "step": 3794 }, { "epoch": 1.8832071490629265, "grad_norm": 0.07072586859293258, "learning_rate": 5.474277800224109e-06, "loss": 0.4829, "step": 3795 }, { "epoch": 1.883703611766166, "grad_norm": 0.06988680449488685, "learning_rate": 5.472331878417974e-06, "loss": 0.4537, "step": 3796 }, { "epoch": 1.8842000744694056, "grad_norm": 0.06903359042318398, "learning_rate": 5.470385884423486e-06, "loss": 0.5165, "step": 3797 }, { "epoch": 1.884696537172645, "grad_norm": 0.07130170675257452, "learning_rate": 5.468439818538057e-06, "loss": 0.4838, "step": 3798 }, { "epoch": 1.8851929998758843, "grad_norm": 0.07051483223367049, "learning_rate": 5.466493681059114e-06, "loss": 0.4792, "step": 3799 }, { "epoch": 1.8856894625791236, "grad_norm": 0.06919703127980216, "learning_rate": 5.464547472284091e-06, "loss": 0.458, "step": 3800 }, { "epoch": 1.8861859252823632, "grad_norm": 0.07022779374561813, "learning_rate": 5.462601192510435e-06, "loss": 0.5066, "step": 3801 }, { "epoch": 1.8866823879856027, "grad_norm": 0.07212467257348683, "learning_rate": 5.4606548420356046e-06, "loss": 0.4953, "step": 3802 }, { "epoch": 1.887178850688842, "grad_norm": 0.07085298955939363, "learning_rate": 5.458708421157066e-06, "loss": 0.5051, "step": 3803 }, { "epoch": 1.8876753133920814, "grad_norm": 0.07090198392557463, "learning_rate": 5.4567619301723015e-06, "loss": 0.4989, "step": 3804 }, { "epoch": 1.8881717760953207, "grad_norm": 0.07218297703303209, "learning_rate": 5.454815369378798e-06, "loss": 0.5297, "step": 3805 }, { "epoch": 1.8886682387985603, "grad_norm": 0.07024232525534924, "learning_rate": 5.452868739074059e-06, "loss": 0.5068, "step": 3806 }, { "epoch": 1.8891647015017998, "grad_norm": 0.06920823610833787, "learning_rate": 5.450922039555594e-06, "loss": 0.4614, "step": 3807 }, { "epoch": 1.8896611642050392, "grad_norm": 0.07763806617888123, "learning_rate": 5.448975271120925e-06, "loss": 0.4939, "step": 3808 }, { "epoch": 1.8901576269082785, "grad_norm": 0.07052981043474175, "learning_rate": 5.447028434067586e-06, "loss": 0.5062, "step": 3809 }, { "epoch": 1.8906540896115178, "grad_norm": 0.07014190175993225, "learning_rate": 5.445081528693118e-06, "loss": 0.5001, "step": 3810 }, { "epoch": 1.8911505523147574, "grad_norm": 0.07208974706034772, "learning_rate": 5.443134555295075e-06, "loss": 0.5052, "step": 3811 }, { "epoch": 1.891647015017997, "grad_norm": 0.07343737439730191, "learning_rate": 5.441187514171018e-06, "loss": 0.5319, "step": 3812 }, { "epoch": 1.8921434777212363, "grad_norm": 0.06983822109409202, "learning_rate": 5.439240405618524e-06, "loss": 0.5003, "step": 3813 }, { "epoch": 1.8926399404244756, "grad_norm": 0.07100147800127123, "learning_rate": 5.437293229935178e-06, "loss": 0.4855, "step": 3814 }, { "epoch": 1.893136403127715, "grad_norm": 0.06982351812557983, "learning_rate": 5.4353459874185735e-06, "loss": 0.4685, "step": 3815 }, { "epoch": 1.8936328658309545, "grad_norm": 0.07161346724602752, "learning_rate": 5.433398678366314e-06, "loss": 0.451, "step": 3816 }, { "epoch": 1.894129328534194, "grad_norm": 0.07198803784772664, "learning_rate": 5.431451303076015e-06, "loss": 0.5078, "step": 3817 }, { "epoch": 1.8946257912374334, "grad_norm": 0.07011876526269005, "learning_rate": 5.429503861845305e-06, "loss": 0.4891, "step": 3818 }, { "epoch": 1.8951222539406727, "grad_norm": 0.07097934662924628, "learning_rate": 5.427556354971812e-06, "loss": 0.4812, "step": 3819 }, { "epoch": 1.895618716643912, "grad_norm": 0.07026743873520488, "learning_rate": 5.425608782753188e-06, "loss": 0.5061, "step": 3820 }, { "epoch": 1.8961151793471516, "grad_norm": 0.07349726995712234, "learning_rate": 5.4236611454870865e-06, "loss": 0.4738, "step": 3821 }, { "epoch": 1.8966116420503911, "grad_norm": 0.07126025580824183, "learning_rate": 5.42171344347117e-06, "loss": 0.4577, "step": 3822 }, { "epoch": 1.8971081047536305, "grad_norm": 0.06904179244008007, "learning_rate": 5.419765677003116e-06, "loss": 0.4556, "step": 3823 }, { "epoch": 1.8976045674568698, "grad_norm": 0.0740816990577368, "learning_rate": 5.417817846380609e-06, "loss": 0.4814, "step": 3824 }, { "epoch": 1.8981010301601091, "grad_norm": 0.0730309332342059, "learning_rate": 5.415869951901344e-06, "loss": 0.4987, "step": 3825 }, { "epoch": 1.8985974928633487, "grad_norm": 0.07214431782979727, "learning_rate": 5.413921993863024e-06, "loss": 0.4878, "step": 3826 }, { "epoch": 1.8990939555665882, "grad_norm": 0.07279291804221866, "learning_rate": 5.411973972563363e-06, "loss": 0.485, "step": 3827 }, { "epoch": 1.8995904182698276, "grad_norm": 0.07274491253000365, "learning_rate": 5.4100258883000874e-06, "loss": 0.4864, "step": 3828 }, { "epoch": 1.9000868809730669, "grad_norm": 0.07047067985378323, "learning_rate": 5.408077741370927e-06, "loss": 0.4782, "step": 3829 }, { "epoch": 1.9005833436763062, "grad_norm": 0.07326147656237637, "learning_rate": 5.406129532073628e-06, "loss": 0.4662, "step": 3830 }, { "epoch": 1.9010798063795458, "grad_norm": 0.07648989726494484, "learning_rate": 5.4041812607059444e-06, "loss": 0.484, "step": 3831 }, { "epoch": 1.9015762690827853, "grad_norm": 0.07201495853310386, "learning_rate": 5.402232927565632e-06, "loss": 0.4919, "step": 3832 }, { "epoch": 1.9020727317860247, "grad_norm": 0.07217460792172264, "learning_rate": 5.4002845329504675e-06, "loss": 0.4848, "step": 3833 }, { "epoch": 1.902569194489264, "grad_norm": 0.07370157153449013, "learning_rate": 5.398336077158231e-06, "loss": 0.4753, "step": 3834 }, { "epoch": 1.9030656571925033, "grad_norm": 0.06977914565580395, "learning_rate": 5.39638756048671e-06, "loss": 0.4935, "step": 3835 }, { "epoch": 1.9035621198957429, "grad_norm": 0.07335495629369132, "learning_rate": 5.394438983233707e-06, "loss": 0.4682, "step": 3836 }, { "epoch": 1.9040585825989822, "grad_norm": 0.07093032314632058, "learning_rate": 5.39249034569703e-06, "loss": 0.4617, "step": 3837 }, { "epoch": 1.9045550453022217, "grad_norm": 0.07253844538726559, "learning_rate": 5.390541648174495e-06, "loss": 0.5027, "step": 3838 }, { "epoch": 1.905051508005461, "grad_norm": 0.06916797666897612, "learning_rate": 5.388592890963933e-06, "loss": 0.4521, "step": 3839 }, { "epoch": 1.9055479707087004, "grad_norm": 0.07497205907931132, "learning_rate": 5.386644074363176e-06, "loss": 0.4871, "step": 3840 }, { "epoch": 1.90604443341194, "grad_norm": 0.07361931403289883, "learning_rate": 5.384695198670074e-06, "loss": 0.5075, "step": 3841 }, { "epoch": 1.9065408961151793, "grad_norm": 0.07108507274036159, "learning_rate": 5.38274626418248e-06, "loss": 0.4663, "step": 3842 }, { "epoch": 1.9070373588184188, "grad_norm": 0.06947858409999921, "learning_rate": 5.380797271198253e-06, "loss": 0.4917, "step": 3843 }, { "epoch": 1.9075338215216582, "grad_norm": 0.07108447466202403, "learning_rate": 5.378848220015271e-06, "loss": 0.481, "step": 3844 }, { "epoch": 1.9080302842248975, "grad_norm": 0.07108083929393204, "learning_rate": 5.3768991109314115e-06, "loss": 0.5002, "step": 3845 }, { "epoch": 1.908526746928137, "grad_norm": 0.0717715304157745, "learning_rate": 5.374949944244566e-06, "loss": 0.4854, "step": 3846 }, { "epoch": 1.9090232096313764, "grad_norm": 0.0720786792030362, "learning_rate": 5.373000720252635e-06, "loss": 0.5155, "step": 3847 }, { "epoch": 1.909519672334616, "grad_norm": 0.07023062512373864, "learning_rate": 5.371051439253524e-06, "loss": 0.491, "step": 3848 }, { "epoch": 1.9100161350378553, "grad_norm": 0.07508624689000723, "learning_rate": 5.3691021015451494e-06, "loss": 0.4648, "step": 3849 }, { "epoch": 1.9105125977410946, "grad_norm": 0.0730348579251482, "learning_rate": 5.367152707425437e-06, "loss": 0.4769, "step": 3850 }, { "epoch": 1.9110090604443342, "grad_norm": 0.07212915215112067, "learning_rate": 5.36520325719232e-06, "loss": 0.4645, "step": 3851 }, { "epoch": 1.9115055231475735, "grad_norm": 0.07249343057488641, "learning_rate": 5.36325375114374e-06, "loss": 0.5165, "step": 3852 }, { "epoch": 1.912001985850813, "grad_norm": 0.07200471814438766, "learning_rate": 5.36130418957765e-06, "loss": 0.4757, "step": 3853 }, { "epoch": 1.9124984485540524, "grad_norm": 0.07433250128939686, "learning_rate": 5.359354572792006e-06, "loss": 0.5093, "step": 3854 }, { "epoch": 1.9129949112572917, "grad_norm": 0.07198352611168042, "learning_rate": 5.357404901084778e-06, "loss": 0.489, "step": 3855 }, { "epoch": 1.9134913739605313, "grad_norm": 0.07463196244659961, "learning_rate": 5.355455174753941e-06, "loss": 0.5198, "step": 3856 }, { "epoch": 1.9139878366637706, "grad_norm": 0.0691576326182084, "learning_rate": 5.35350539409748e-06, "loss": 0.4683, "step": 3857 }, { "epoch": 1.9144842993670101, "grad_norm": 0.07564132823230729, "learning_rate": 5.351555559413389e-06, "loss": 0.4893, "step": 3858 }, { "epoch": 1.9149807620702495, "grad_norm": 0.07390246796901016, "learning_rate": 5.349605670999667e-06, "loss": 0.4801, "step": 3859 }, { "epoch": 1.9154772247734888, "grad_norm": 0.0717750115158868, "learning_rate": 5.347655729154323e-06, "loss": 0.4849, "step": 3860 }, { "epoch": 1.9159736874767284, "grad_norm": 0.07277950154580684, "learning_rate": 5.345705734175375e-06, "loss": 0.4754, "step": 3861 }, { "epoch": 1.9164701501799677, "grad_norm": 0.07131228553584106, "learning_rate": 5.343755686360849e-06, "loss": 0.4685, "step": 3862 }, { "epoch": 1.9169666128832072, "grad_norm": 0.07097194395346555, "learning_rate": 5.341805586008778e-06, "loss": 0.4539, "step": 3863 }, { "epoch": 1.9174630755864466, "grad_norm": 0.07101003749169538, "learning_rate": 5.339855433417203e-06, "loss": 0.4888, "step": 3864 }, { "epoch": 1.917959538289686, "grad_norm": 0.07298877591361808, "learning_rate": 5.337905228884174e-06, "loss": 0.4787, "step": 3865 }, { "epoch": 1.9184560009929255, "grad_norm": 0.07395924862347028, "learning_rate": 5.335954972707749e-06, "loss": 0.4932, "step": 3866 }, { "epoch": 1.9189524636961648, "grad_norm": 0.07140031977681557, "learning_rate": 5.334004665185994e-06, "loss": 0.4982, "step": 3867 }, { "epoch": 1.9194489263994043, "grad_norm": 0.07305491864396065, "learning_rate": 5.332054306616979e-06, "loss": 0.4917, "step": 3868 }, { "epoch": 1.9199453891026437, "grad_norm": 0.07182300787383228, "learning_rate": 5.330103897298791e-06, "loss": 0.5242, "step": 3869 }, { "epoch": 1.920441851805883, "grad_norm": 0.0703090063753603, "learning_rate": 5.328153437529512e-06, "loss": 0.496, "step": 3870 }, { "epoch": 1.9209383145091226, "grad_norm": 0.06929915693165271, "learning_rate": 5.326202927607242e-06, "loss": 0.4823, "step": 3871 }, { "epoch": 1.9214347772123619, "grad_norm": 0.07318562935704766, "learning_rate": 5.324252367830085e-06, "loss": 0.5222, "step": 3872 }, { "epoch": 1.9219312399156014, "grad_norm": 0.070345300692849, "learning_rate": 5.322301758496153e-06, "loss": 0.4886, "step": 3873 }, { "epoch": 1.9224277026188408, "grad_norm": 0.0733085359507376, "learning_rate": 5.320351099903565e-06, "loss": 0.512, "step": 3874 }, { "epoch": 1.92292416532208, "grad_norm": 0.0719498700081074, "learning_rate": 5.318400392350449e-06, "loss": 0.4699, "step": 3875 }, { "epoch": 1.9234206280253194, "grad_norm": 0.06824895945528683, "learning_rate": 5.316449636134936e-06, "loss": 0.4729, "step": 3876 }, { "epoch": 1.923917090728559, "grad_norm": 0.07041042686305006, "learning_rate": 5.31449883155517e-06, "loss": 0.476, "step": 3877 }, { "epoch": 1.9244135534317985, "grad_norm": 0.067579216909461, "learning_rate": 5.3125479789093014e-06, "loss": 0.4677, "step": 3878 }, { "epoch": 1.9249100161350379, "grad_norm": 0.06917056104636418, "learning_rate": 5.310597078495485e-06, "loss": 0.5167, "step": 3879 }, { "epoch": 1.9254064788382772, "grad_norm": 0.07520188972413051, "learning_rate": 5.308646130611885e-06, "loss": 0.5043, "step": 3880 }, { "epoch": 1.9259029415415165, "grad_norm": 0.07522798508907527, "learning_rate": 5.306695135556673e-06, "loss": 0.5008, "step": 3881 }, { "epoch": 1.926399404244756, "grad_norm": 0.07326055869597388, "learning_rate": 5.304744093628028e-06, "loss": 0.5286, "step": 3882 }, { "epoch": 1.9268958669479956, "grad_norm": 0.0719762524355997, "learning_rate": 5.302793005124134e-06, "loss": 0.5473, "step": 3883 }, { "epoch": 1.927392329651235, "grad_norm": 0.07057243925086228, "learning_rate": 5.300841870343183e-06, "loss": 0.4878, "step": 3884 }, { "epoch": 1.9278887923544743, "grad_norm": 0.07456395275600818, "learning_rate": 5.298890689583377e-06, "loss": 0.4613, "step": 3885 }, { "epoch": 1.9283852550577136, "grad_norm": 0.0739750197425005, "learning_rate": 5.2969394631429205e-06, "loss": 0.4888, "step": 3886 }, { "epoch": 1.9288817177609532, "grad_norm": 0.06859930114929563, "learning_rate": 5.294988191320029e-06, "loss": 0.4502, "step": 3887 }, { "epoch": 1.9293781804641927, "grad_norm": 0.07333117565055676, "learning_rate": 5.29303687441292e-06, "loss": 0.5159, "step": 3888 }, { "epoch": 1.929874643167432, "grad_norm": 0.07881910644631798, "learning_rate": 5.2910855127198255e-06, "loss": 0.5053, "step": 3889 }, { "epoch": 1.9303711058706714, "grad_norm": 0.07125205611992942, "learning_rate": 5.289134106538978e-06, "loss": 0.4817, "step": 3890 }, { "epoch": 1.9308675685739107, "grad_norm": 0.07168797442151044, "learning_rate": 5.287182656168618e-06, "loss": 0.4976, "step": 3891 }, { "epoch": 1.9313640312771503, "grad_norm": 0.07173357891598954, "learning_rate": 5.2852311619069915e-06, "loss": 0.4605, "step": 3892 }, { "epoch": 1.9318604939803898, "grad_norm": 0.07179667762070725, "learning_rate": 5.2832796240523565e-06, "loss": 0.4384, "step": 3893 }, { "epoch": 1.9323569566836292, "grad_norm": 0.0771564342106695, "learning_rate": 5.281328042902973e-06, "loss": 0.4836, "step": 3894 }, { "epoch": 1.9328534193868685, "grad_norm": 0.06971413584196552, "learning_rate": 5.279376418757108e-06, "loss": 0.4616, "step": 3895 }, { "epoch": 1.9333498820901078, "grad_norm": 0.07134917459980415, "learning_rate": 5.27742475191304e-06, "loss": 0.4853, "step": 3896 }, { "epoch": 1.9338463447933474, "grad_norm": 0.06948966703103633, "learning_rate": 5.275473042669043e-06, "loss": 0.4525, "step": 3897 }, { "epoch": 1.934342807496587, "grad_norm": 0.09123313507126177, "learning_rate": 5.273521291323411e-06, "loss": 0.4759, "step": 3898 }, { "epoch": 1.9348392701998263, "grad_norm": 0.07182320974046329, "learning_rate": 5.271569498174435e-06, "loss": 0.4837, "step": 3899 }, { "epoch": 1.9353357329030656, "grad_norm": 0.07167426782717073, "learning_rate": 5.269617663520414e-06, "loss": 0.4925, "step": 3900 }, { "epoch": 1.935832195606305, "grad_norm": 0.06971566479492058, "learning_rate": 5.2676657876596575e-06, "loss": 0.4939, "step": 3901 }, { "epoch": 1.9363286583095445, "grad_norm": 0.068449292316369, "learning_rate": 5.265713870890476e-06, "loss": 0.4435, "step": 3902 }, { "epoch": 1.936825121012784, "grad_norm": 0.07115671276963907, "learning_rate": 5.263761913511189e-06, "loss": 0.4797, "step": 3903 }, { "epoch": 1.9373215837160234, "grad_norm": 0.07092945923206197, "learning_rate": 5.261809915820124e-06, "loss": 0.4768, "step": 3904 }, { "epoch": 1.9378180464192627, "grad_norm": 0.07288485555482782, "learning_rate": 5.259857878115611e-06, "loss": 0.5005, "step": 3905 }, { "epoch": 1.938314509122502, "grad_norm": 0.06857200953594583, "learning_rate": 5.257905800695988e-06, "loss": 0.4786, "step": 3906 }, { "epoch": 1.9388109718257416, "grad_norm": 0.07014718654146762, "learning_rate": 5.2559536838595995e-06, "loss": 0.4651, "step": 3907 }, { "epoch": 1.9393074345289811, "grad_norm": 0.0701723829531734, "learning_rate": 5.254001527904793e-06, "loss": 0.4626, "step": 3908 }, { "epoch": 1.9398038972322205, "grad_norm": 0.07084434226107171, "learning_rate": 5.252049333129925e-06, "loss": 0.4729, "step": 3909 }, { "epoch": 1.9403003599354598, "grad_norm": 0.07160630955321064, "learning_rate": 5.250097099833358e-06, "loss": 0.484, "step": 3910 }, { "epoch": 1.9407968226386991, "grad_norm": 0.07034163421346201, "learning_rate": 5.248144828313459e-06, "loss": 0.51, "step": 3911 }, { "epoch": 1.9412932853419387, "grad_norm": 0.06976530549909565, "learning_rate": 5.2461925188686035e-06, "loss": 0.4295, "step": 3912 }, { "epoch": 1.9417897480451782, "grad_norm": 0.07191014314418367, "learning_rate": 5.244240171797168e-06, "loss": 0.4845, "step": 3913 }, { "epoch": 1.9422862107484176, "grad_norm": 0.07058958360248195, "learning_rate": 5.2422877873975384e-06, "loss": 0.4803, "step": 3914 }, { "epoch": 1.942782673451657, "grad_norm": 0.07252410993239682, "learning_rate": 5.240335365968104e-06, "loss": 0.5167, "step": 3915 }, { "epoch": 1.9432791361548962, "grad_norm": 0.07298165209234508, "learning_rate": 5.2383829078072635e-06, "loss": 0.5214, "step": 3916 }, { "epoch": 1.9437755988581358, "grad_norm": 0.07366922809256125, "learning_rate": 5.236430413213419e-06, "loss": 0.5065, "step": 3917 }, { "epoch": 1.9442720615613753, "grad_norm": 0.0743591532780005, "learning_rate": 5.234477882484975e-06, "loss": 0.5747, "step": 3918 }, { "epoch": 1.9447685242646147, "grad_norm": 0.0752519340136121, "learning_rate": 5.232525315920346e-06, "loss": 0.5071, "step": 3919 }, { "epoch": 1.945264986967854, "grad_norm": 0.07115434487269699, "learning_rate": 5.230572713817951e-06, "loss": 0.4723, "step": 3920 }, { "epoch": 1.9457614496710933, "grad_norm": 0.07085415232697705, "learning_rate": 5.228620076476214e-06, "loss": 0.4882, "step": 3921 }, { "epoch": 1.9462579123743329, "grad_norm": 0.07250241964423546, "learning_rate": 5.226667404193564e-06, "loss": 0.5011, "step": 3922 }, { "epoch": 1.9467543750775724, "grad_norm": 0.06899533098215475, "learning_rate": 5.224714697268437e-06, "loss": 0.4682, "step": 3923 }, { "epoch": 1.9472508377808118, "grad_norm": 0.07378228905532908, "learning_rate": 5.222761955999269e-06, "loss": 0.4722, "step": 3924 }, { "epoch": 1.947747300484051, "grad_norm": 0.07267710646256538, "learning_rate": 5.220809180684508e-06, "loss": 0.4793, "step": 3925 }, { "epoch": 1.9482437631872904, "grad_norm": 0.07188532716957385, "learning_rate": 5.218856371622605e-06, "loss": 0.4758, "step": 3926 }, { "epoch": 1.94874022589053, "grad_norm": 0.0748892540261098, "learning_rate": 5.216903529112015e-06, "loss": 0.4768, "step": 3927 }, { "epoch": 1.9492366885937695, "grad_norm": 0.07361587591698933, "learning_rate": 5.214950653451199e-06, "loss": 0.4958, "step": 3928 }, { "epoch": 1.9497331512970089, "grad_norm": 0.0750386427247887, "learning_rate": 5.21299774493862e-06, "loss": 0.5139, "step": 3929 }, { "epoch": 1.9502296140002482, "grad_norm": 0.07157581527430623, "learning_rate": 5.211044803872752e-06, "loss": 0.5005, "step": 3930 }, { "epoch": 1.9507260767034875, "grad_norm": 0.07122801864538819, "learning_rate": 5.20909183055207e-06, "loss": 0.4836, "step": 3931 }, { "epoch": 1.951222539406727, "grad_norm": 0.073775379701217, "learning_rate": 5.207138825275053e-06, "loss": 0.5205, "step": 3932 }, { "epoch": 1.9517190021099666, "grad_norm": 0.0699335599632651, "learning_rate": 5.205185788340189e-06, "loss": 0.4552, "step": 3933 }, { "epoch": 1.952215464813206, "grad_norm": 0.0747012833856705, "learning_rate": 5.2032327200459665e-06, "loss": 0.5261, "step": 3934 }, { "epoch": 1.9527119275164453, "grad_norm": 0.06986301985581114, "learning_rate": 5.201279620690881e-06, "loss": 0.4645, "step": 3935 }, { "epoch": 1.9532083902196846, "grad_norm": 0.07083169381180109, "learning_rate": 5.199326490573433e-06, "loss": 0.4753, "step": 3936 }, { "epoch": 1.9537048529229242, "grad_norm": 0.07316821283838185, "learning_rate": 5.197373329992127e-06, "loss": 0.5037, "step": 3937 }, { "epoch": 1.9542013156261637, "grad_norm": 0.0709068153288116, "learning_rate": 5.195420139245472e-06, "loss": 0.46, "step": 3938 }, { "epoch": 1.954697778329403, "grad_norm": 0.07258328229639797, "learning_rate": 5.193466918631984e-06, "loss": 0.4768, "step": 3939 }, { "epoch": 1.9551942410326424, "grad_norm": 0.07013691797325394, "learning_rate": 5.191513668450178e-06, "loss": 0.4537, "step": 3940 }, { "epoch": 1.9556907037358817, "grad_norm": 0.0691432700044006, "learning_rate": 5.189560388998578e-06, "loss": 0.4689, "step": 3941 }, { "epoch": 1.9561871664391213, "grad_norm": 0.07255706197612641, "learning_rate": 5.187607080575712e-06, "loss": 0.4471, "step": 3942 }, { "epoch": 1.9566836291423608, "grad_norm": 0.07177608806087026, "learning_rate": 5.185653743480112e-06, "loss": 0.449, "step": 3943 }, { "epoch": 1.9571800918456002, "grad_norm": 0.07710954560772512, "learning_rate": 5.183700378010315e-06, "loss": 0.4822, "step": 3944 }, { "epoch": 1.9576765545488395, "grad_norm": 0.07169664734250977, "learning_rate": 5.1817469844648585e-06, "loss": 0.4972, "step": 3945 }, { "epoch": 1.9581730172520788, "grad_norm": 0.07193769644389156, "learning_rate": 5.179793563142291e-06, "loss": 0.468, "step": 3946 }, { "epoch": 1.9586694799553184, "grad_norm": 0.07188270892453336, "learning_rate": 5.17784011434116e-06, "loss": 0.5197, "step": 3947 }, { "epoch": 1.959165942658558, "grad_norm": 0.07174487240214936, "learning_rate": 5.1758866383600185e-06, "loss": 0.49, "step": 3948 }, { "epoch": 1.9596624053617973, "grad_norm": 0.06851866889793691, "learning_rate": 5.1739331354974245e-06, "loss": 0.4647, "step": 3949 }, { "epoch": 1.9601588680650366, "grad_norm": 0.07355462858315683, "learning_rate": 5.17197960605194e-06, "loss": 0.4867, "step": 3950 }, { "epoch": 1.960655330768276, "grad_norm": 0.07129939537738009, "learning_rate": 5.17002605032213e-06, "loss": 0.4696, "step": 3951 }, { "epoch": 1.9611517934715155, "grad_norm": 0.07265310768420902, "learning_rate": 5.168072468606564e-06, "loss": 0.5057, "step": 3952 }, { "epoch": 1.961648256174755, "grad_norm": 0.07219085002056012, "learning_rate": 5.166118861203816e-06, "loss": 0.4757, "step": 3953 }, { "epoch": 1.9621447188779944, "grad_norm": 0.07188896917633172, "learning_rate": 5.1641652284124645e-06, "loss": 0.4623, "step": 3954 }, { "epoch": 1.9626411815812337, "grad_norm": 0.07047459964076412, "learning_rate": 5.16221157053109e-06, "loss": 0.4917, "step": 3955 }, { "epoch": 1.963137644284473, "grad_norm": 0.07207176246858632, "learning_rate": 5.160257887858278e-06, "loss": 0.4835, "step": 3956 }, { "epoch": 1.9636341069877126, "grad_norm": 0.07245058141331831, "learning_rate": 5.158304180692615e-06, "loss": 0.5134, "step": 3957 }, { "epoch": 1.9641305696909521, "grad_norm": 0.0709051816408688, "learning_rate": 5.156350449332698e-06, "loss": 0.4731, "step": 3958 }, { "epoch": 1.9646270323941915, "grad_norm": 0.0716399263483255, "learning_rate": 5.154396694077121e-06, "loss": 0.477, "step": 3959 }, { "epoch": 1.9651234950974308, "grad_norm": 0.070990095924848, "learning_rate": 5.152442915224486e-06, "loss": 0.4813, "step": 3960 }, { "epoch": 1.9656199578006701, "grad_norm": 0.07019738338899009, "learning_rate": 5.150489113073394e-06, "loss": 0.5073, "step": 3961 }, { "epoch": 1.9661164205039097, "grad_norm": 0.07049687491155082, "learning_rate": 5.148535287922457e-06, "loss": 0.4801, "step": 3962 }, { "epoch": 1.9666128832071492, "grad_norm": 0.07129508411132399, "learning_rate": 5.1465814400702804e-06, "loss": 0.484, "step": 3963 }, { "epoch": 1.9671093459103886, "grad_norm": 0.07490675464094516, "learning_rate": 5.144627569815481e-06, "loss": 0.4956, "step": 3964 }, { "epoch": 1.9676058086136279, "grad_norm": 0.07152667854181811, "learning_rate": 5.142673677456676e-06, "loss": 0.4937, "step": 3965 }, { "epoch": 1.9681022713168672, "grad_norm": 0.07213237931738262, "learning_rate": 5.1407197632924885e-06, "loss": 0.5074, "step": 3966 }, { "epoch": 1.9685987340201068, "grad_norm": 0.07476637301318777, "learning_rate": 5.138765827621541e-06, "loss": 0.5058, "step": 3967 }, { "epoch": 1.9690951967233463, "grad_norm": 0.07153425116188193, "learning_rate": 5.136811870742462e-06, "loss": 0.4935, "step": 3968 }, { "epoch": 1.9695916594265857, "grad_norm": 0.07285692288676107, "learning_rate": 5.134857892953881e-06, "loss": 0.4776, "step": 3969 }, { "epoch": 1.970088122129825, "grad_norm": 0.07130975929675355, "learning_rate": 5.132903894554434e-06, "loss": 0.4711, "step": 3970 }, { "epoch": 1.9705845848330643, "grad_norm": 0.06927463689733188, "learning_rate": 5.130949875842758e-06, "loss": 0.4741, "step": 3971 }, { "epoch": 1.9710810475363039, "grad_norm": 0.07226451812060516, "learning_rate": 5.128995837117493e-06, "loss": 0.4993, "step": 3972 }, { "epoch": 1.9715775102395434, "grad_norm": 0.06814833359691802, "learning_rate": 5.127041778677283e-06, "loss": 0.4552, "step": 3973 }, { "epoch": 1.9720739729427827, "grad_norm": 0.07386779362943056, "learning_rate": 5.1250877008207725e-06, "loss": 0.4943, "step": 3974 }, { "epoch": 1.972570435646022, "grad_norm": 0.07245457245389018, "learning_rate": 5.123133603846613e-06, "loss": 0.4883, "step": 3975 }, { "epoch": 1.9730668983492614, "grad_norm": 0.0729979245483732, "learning_rate": 5.121179488053458e-06, "loss": 0.4817, "step": 3976 }, { "epoch": 1.973563361052501, "grad_norm": 0.0718325924027625, "learning_rate": 5.1192253537399595e-06, "loss": 0.4552, "step": 3977 }, { "epoch": 1.9740598237557403, "grad_norm": 0.07326345383485158, "learning_rate": 5.117271201204779e-06, "loss": 0.5166, "step": 3978 }, { "epoch": 1.9745562864589798, "grad_norm": 0.07098301430790274, "learning_rate": 5.115317030746575e-06, "loss": 0.4545, "step": 3979 }, { "epoch": 1.9750527491622192, "grad_norm": 0.07394770312842015, "learning_rate": 5.11336284266401e-06, "loss": 0.4882, "step": 3980 }, { "epoch": 1.9755492118654585, "grad_norm": 0.07205746944153327, "learning_rate": 5.111408637255754e-06, "loss": 0.494, "step": 3981 }, { "epoch": 1.976045674568698, "grad_norm": 0.06869822675283942, "learning_rate": 5.109454414820475e-06, "loss": 0.4724, "step": 3982 }, { "epoch": 1.9765421372719374, "grad_norm": 0.07308322700351189, "learning_rate": 5.107500175656842e-06, "loss": 0.4904, "step": 3983 }, { "epoch": 1.977038599975177, "grad_norm": 0.07165097342539073, "learning_rate": 5.10554592006353e-06, "loss": 0.4908, "step": 3984 }, { "epoch": 1.9775350626784163, "grad_norm": 0.07454745150610671, "learning_rate": 5.103591648339218e-06, "loss": 0.4706, "step": 3985 }, { "epoch": 1.9780315253816556, "grad_norm": 0.06909233417316848, "learning_rate": 5.101637360782584e-06, "loss": 0.4881, "step": 3986 }, { "epoch": 1.9785279880848952, "grad_norm": 0.07547889134109059, "learning_rate": 5.0996830576923075e-06, "loss": 0.4967, "step": 3987 }, { "epoch": 1.9790244507881345, "grad_norm": 0.07184250001754491, "learning_rate": 5.097728739367076e-06, "loss": 0.4631, "step": 3988 }, { "epoch": 1.979520913491374, "grad_norm": 0.07246338867423593, "learning_rate": 5.095774406105572e-06, "loss": 0.5352, "step": 3989 }, { "epoch": 1.9800173761946134, "grad_norm": 0.07117800861507659, "learning_rate": 5.0938200582064846e-06, "loss": 0.4887, "step": 3990 }, { "epoch": 1.9805138388978527, "grad_norm": 0.07386014244542119, "learning_rate": 5.091865695968508e-06, "loss": 0.4824, "step": 3991 }, { "epoch": 1.9810103016010923, "grad_norm": 0.07463772727319021, "learning_rate": 5.089911319690331e-06, "loss": 0.4731, "step": 3992 }, { "epoch": 1.9815067643043316, "grad_norm": 0.07409681863302726, "learning_rate": 5.087956929670651e-06, "loss": 0.5116, "step": 3993 }, { "epoch": 1.9820032270075711, "grad_norm": 0.07102473269761052, "learning_rate": 5.086002526208166e-06, "loss": 0.4988, "step": 3994 }, { "epoch": 1.9824996897108105, "grad_norm": 0.07270851135800403, "learning_rate": 5.084048109601571e-06, "loss": 0.5075, "step": 3995 }, { "epoch": 1.9829961524140498, "grad_norm": 0.07287188630853728, "learning_rate": 5.0820936801495716e-06, "loss": 0.5128, "step": 3996 }, { "epoch": 1.9834926151172894, "grad_norm": 0.07061601120193683, "learning_rate": 5.080139238150869e-06, "loss": 0.4846, "step": 3997 }, { "epoch": 1.9839890778205287, "grad_norm": 0.07260115702119305, "learning_rate": 5.07818478390417e-06, "loss": 0.4818, "step": 3998 }, { "epoch": 1.9844855405237682, "grad_norm": 0.07763074208464117, "learning_rate": 5.076230317708179e-06, "loss": 0.5172, "step": 3999 }, { "epoch": 1.9849820032270076, "grad_norm": 0.06975414599004237, "learning_rate": 5.074275839861606e-06, "loss": 0.4785, "step": 4000 }, { "epoch": 1.985478465930247, "grad_norm": 0.07380231976084484, "learning_rate": 5.072321350663163e-06, "loss": 0.4678, "step": 4001 }, { "epoch": 1.9859749286334865, "grad_norm": 0.07237761374407202, "learning_rate": 5.070366850411561e-06, "loss": 0.4779, "step": 4002 }, { "epoch": 1.9864713913367258, "grad_norm": 0.07057959775998889, "learning_rate": 5.068412339405514e-06, "loss": 0.4809, "step": 4003 }, { "epoch": 1.9869678540399653, "grad_norm": 0.0715457070565392, "learning_rate": 5.066457817943738e-06, "loss": 0.4992, "step": 4004 }, { "epoch": 1.9874643167432047, "grad_norm": 0.07391983194928121, "learning_rate": 5.06450328632495e-06, "loss": 0.5153, "step": 4005 }, { "epoch": 1.987960779446444, "grad_norm": 0.07503637951833564, "learning_rate": 5.062548744847867e-06, "loss": 0.5113, "step": 4006 }, { "epoch": 1.9884572421496836, "grad_norm": 0.07074043181554573, "learning_rate": 5.0605941938112135e-06, "loss": 0.4669, "step": 4007 }, { "epoch": 1.9889537048529229, "grad_norm": 0.07169585364157281, "learning_rate": 5.058639633513708e-06, "loss": 0.4969, "step": 4008 }, { "epoch": 1.9894501675561624, "grad_norm": 0.06785942658601277, "learning_rate": 5.056685064254075e-06, "loss": 0.4561, "step": 4009 }, { "epoch": 1.9899466302594018, "grad_norm": 0.06918926184232858, "learning_rate": 5.054730486331041e-06, "loss": 0.4759, "step": 4010 }, { "epoch": 1.990443092962641, "grad_norm": 0.0749865895276791, "learning_rate": 5.052775900043326e-06, "loss": 0.5023, "step": 4011 }, { "epoch": 1.9909395556658807, "grad_norm": 0.07200649155022336, "learning_rate": 5.050821305689662e-06, "loss": 0.4835, "step": 4012 }, { "epoch": 1.99143601836912, "grad_norm": 0.06885032143514215, "learning_rate": 5.048866703568778e-06, "loss": 0.4554, "step": 4013 }, { "epoch": 1.9919324810723595, "grad_norm": 0.07501649054063417, "learning_rate": 5.046912093979402e-06, "loss": 0.5018, "step": 4014 }, { "epoch": 1.9924289437755989, "grad_norm": 0.07143933594119112, "learning_rate": 5.044957477220261e-06, "loss": 0.4684, "step": 4015 }, { "epoch": 1.9929254064788382, "grad_norm": 0.07164911960402352, "learning_rate": 5.043002853590093e-06, "loss": 0.4778, "step": 4016 }, { "epoch": 1.9934218691820775, "grad_norm": 0.0724278620954663, "learning_rate": 5.0410482233876275e-06, "loss": 0.4862, "step": 4017 }, { "epoch": 1.993918331885317, "grad_norm": 0.07324551513366404, "learning_rate": 5.0390935869116006e-06, "loss": 0.4954, "step": 4018 }, { "epoch": 1.9944147945885566, "grad_norm": 0.07243750592972426, "learning_rate": 5.0371389444607455e-06, "loss": 0.4751, "step": 4019 }, { "epoch": 1.994911257291796, "grad_norm": 0.07093125056962983, "learning_rate": 5.035184296333798e-06, "loss": 0.5011, "step": 4020 }, { "epoch": 1.9954077199950353, "grad_norm": 0.07146838278764876, "learning_rate": 5.033229642829494e-06, "loss": 0.4697, "step": 4021 }, { "epoch": 1.9959041826982746, "grad_norm": 0.0680395947687067, "learning_rate": 5.0312749842465725e-06, "loss": 0.4548, "step": 4022 }, { "epoch": 1.9964006454015142, "grad_norm": 0.07297368724052139, "learning_rate": 5.029320320883771e-06, "loss": 0.5171, "step": 4023 }, { "epoch": 1.9968971081047537, "grad_norm": 0.07341048772340283, "learning_rate": 5.0273656530398285e-06, "loss": 0.4846, "step": 4024 }, { "epoch": 1.997393570807993, "grad_norm": 0.07150096630499465, "learning_rate": 5.025410981013486e-06, "loss": 0.4866, "step": 4025 }, { "epoch": 1.9978900335112324, "grad_norm": 0.06923746655335533, "learning_rate": 5.023456305103482e-06, "loss": 0.4842, "step": 4026 }, { "epoch": 1.9983864962144717, "grad_norm": 0.07353105275938862, "learning_rate": 5.021501625608557e-06, "loss": 0.4865, "step": 4027 }, { "epoch": 1.9988829589177113, "grad_norm": 0.07262147086529945, "learning_rate": 5.019546942827452e-06, "loss": 0.5071, "step": 4028 }, { "epoch": 1.9993794216209508, "grad_norm": 0.07009105495484086, "learning_rate": 5.017592257058912e-06, "loss": 0.4571, "step": 4029 }, { "epoch": 1.9998758843241902, "grad_norm": 0.0717265778317302, "learning_rate": 5.015637568601678e-06, "loss": 0.4721, "step": 4030 }, { "epoch": 2.0, "grad_norm": 0.0717265778317302, "learning_rate": 5.013682877754491e-06, "loss": 0.1317, "step": 4031 }, { "epoch": 2.0003723470274295, "grad_norm": 0.07314792877813803, "learning_rate": 5.011728184816096e-06, "loss": 0.3638, "step": 4032 }, { "epoch": 2.0003723470274295, "eval_loss": 0.5145248770713806, "eval_runtime": 258.8663, "eval_samples_per_second": 117.254, "eval_steps_per_second": 14.66, "step": 4032 }, { "epoch": 2.0004964627032393, "grad_norm": 0.08562217509650846, "learning_rate": 5.009773490085236e-06, "loss": 0.4738, "step": 4033 }, { "epoch": 2.0009929254064787, "grad_norm": 0.08037551216927723, "learning_rate": 5.007818793860656e-06, "loss": 0.4554, "step": 4034 }, { "epoch": 2.0014893881097184, "grad_norm": 0.07699272192544572, "learning_rate": 5.0058640964410975e-06, "loss": 0.4558, "step": 4035 }, { "epoch": 2.0019858508129578, "grad_norm": 0.07490333477454107, "learning_rate": 5.003909398125306e-06, "loss": 0.4492, "step": 4036 }, { "epoch": 2.002482313516197, "grad_norm": 0.07484042574828073, "learning_rate": 5.001954699212026e-06, "loss": 0.467, "step": 4037 }, { "epoch": 2.0029787762194364, "grad_norm": 0.07985101916527011, "learning_rate": 5e-06, "loss": 0.4856, "step": 4038 }, { "epoch": 2.0034752389226758, "grad_norm": 0.07776863307734547, "learning_rate": 4.998045300787976e-06, "loss": 0.4442, "step": 4039 }, { "epoch": 2.0039717016259155, "grad_norm": 0.08421470369676873, "learning_rate": 4.996090601874695e-06, "loss": 0.4537, "step": 4040 }, { "epoch": 2.004468164329155, "grad_norm": 0.07856954374286516, "learning_rate": 4.994135903558904e-06, "loss": 0.4599, "step": 4041 }, { "epoch": 2.004964627032394, "grad_norm": 0.07287925161116053, "learning_rate": 4.9921812061393454e-06, "loss": 0.4298, "step": 4042 }, { "epoch": 2.0054610897356335, "grad_norm": 0.07811975249794761, "learning_rate": 4.990226509914764e-06, "loss": 0.4746, "step": 4043 }, { "epoch": 2.005957552438873, "grad_norm": 0.08367632748009121, "learning_rate": 4.9882718151839045e-06, "loss": 0.4859, "step": 4044 }, { "epoch": 2.0064540151421126, "grad_norm": 0.07739267038787806, "learning_rate": 4.986317122245508e-06, "loss": 0.4506, "step": 4045 }, { "epoch": 2.006950477845352, "grad_norm": 0.07603906485692118, "learning_rate": 4.984362431398324e-06, "loss": 0.4648, "step": 4046 }, { "epoch": 2.0074469405485913, "grad_norm": 0.07055244361688247, "learning_rate": 4.9824077429410895e-06, "loss": 0.4481, "step": 4047 }, { "epoch": 2.0079434032518306, "grad_norm": 0.07596314574372579, "learning_rate": 4.98045305717255e-06, "loss": 0.4705, "step": 4048 }, { "epoch": 2.00843986595507, "grad_norm": 0.07041469030095589, "learning_rate": 4.978498374391446e-06, "loss": 0.4667, "step": 4049 }, { "epoch": 2.0089363286583097, "grad_norm": 0.07212901412219706, "learning_rate": 4.976543694896521e-06, "loss": 0.4696, "step": 4050 }, { "epoch": 2.009432791361549, "grad_norm": 0.07731331158034714, "learning_rate": 4.974589018986516e-06, "loss": 0.4541, "step": 4051 }, { "epoch": 2.0099292540647884, "grad_norm": 0.072220754341368, "learning_rate": 4.972634346960173e-06, "loss": 0.4462, "step": 4052 }, { "epoch": 2.0104257167680277, "grad_norm": 0.07403397940617826, "learning_rate": 4.97067967911623e-06, "loss": 0.45, "step": 4053 }, { "epoch": 2.010922179471267, "grad_norm": 0.0732440572549554, "learning_rate": 4.968725015753429e-06, "loss": 0.4322, "step": 4054 }, { "epoch": 2.011418642174507, "grad_norm": 0.07231534439947165, "learning_rate": 4.966770357170507e-06, "loss": 0.4715, "step": 4055 }, { "epoch": 2.011915104877746, "grad_norm": 0.0713353452872365, "learning_rate": 4.9648157036662035e-06, "loss": 0.4629, "step": 4056 }, { "epoch": 2.0124115675809855, "grad_norm": 0.07330641016780033, "learning_rate": 4.962861055539256e-06, "loss": 0.4704, "step": 4057 }, { "epoch": 2.012908030284225, "grad_norm": 0.07421035285517524, "learning_rate": 4.9609064130884e-06, "loss": 0.4709, "step": 4058 }, { "epoch": 2.013404492987464, "grad_norm": 0.07083522565061746, "learning_rate": 4.9589517766123725e-06, "loss": 0.4426, "step": 4059 }, { "epoch": 2.013900955690704, "grad_norm": 0.07256789965761337, "learning_rate": 4.956997146409907e-06, "loss": 0.453, "step": 4060 }, { "epoch": 2.0143974183939433, "grad_norm": 0.07401292502068656, "learning_rate": 4.955042522779739e-06, "loss": 0.4725, "step": 4061 }, { "epoch": 2.0148938810971826, "grad_norm": 0.07293397586128852, "learning_rate": 4.953087906020601e-06, "loss": 0.4625, "step": 4062 }, { "epoch": 2.015390343800422, "grad_norm": 0.07198059298195802, "learning_rate": 4.951133296431224e-06, "loss": 0.4573, "step": 4063 }, { "epoch": 2.0158868065036613, "grad_norm": 0.07547173794053258, "learning_rate": 4.949178694310339e-06, "loss": 0.4486, "step": 4064 }, { "epoch": 2.016383269206901, "grad_norm": 0.07199537230690754, "learning_rate": 4.9472240999566755e-06, "loss": 0.4355, "step": 4065 }, { "epoch": 2.0168797319101404, "grad_norm": 0.07316013752955937, "learning_rate": 4.945269513668962e-06, "loss": 0.4571, "step": 4066 }, { "epoch": 2.0173761946133797, "grad_norm": 0.07005123491908063, "learning_rate": 4.943314935745925e-06, "loss": 0.4537, "step": 4067 }, { "epoch": 2.017872657316619, "grad_norm": 0.07114454027164603, "learning_rate": 4.941360366486294e-06, "loss": 0.4365, "step": 4068 }, { "epoch": 2.0183691200198584, "grad_norm": 0.0718199562081555, "learning_rate": 4.939405806188788e-06, "loss": 0.4993, "step": 4069 }, { "epoch": 2.018865582723098, "grad_norm": 0.07727220254428632, "learning_rate": 4.9374512551521335e-06, "loss": 0.4716, "step": 4070 }, { "epoch": 2.0193620454263375, "grad_norm": 0.07394436337606848, "learning_rate": 4.935496713675052e-06, "loss": 0.4612, "step": 4071 }, { "epoch": 2.019858508129577, "grad_norm": 0.0713432047748325, "learning_rate": 4.9335421820562635e-06, "loss": 0.5075, "step": 4072 }, { "epoch": 2.020354970832816, "grad_norm": 0.07297875830518837, "learning_rate": 4.931587660594488e-06, "loss": 0.4054, "step": 4073 }, { "epoch": 2.0208514335360555, "grad_norm": 0.0742995803324647, "learning_rate": 4.929633149588441e-06, "loss": 0.4802, "step": 4074 }, { "epoch": 2.0213478962392952, "grad_norm": 0.07235310266525152, "learning_rate": 4.927678649336838e-06, "loss": 0.4755, "step": 4075 }, { "epoch": 2.0218443589425346, "grad_norm": 0.07469287262717912, "learning_rate": 4.925724160138394e-06, "loss": 0.4663, "step": 4076 }, { "epoch": 2.022340821645774, "grad_norm": 0.07378999311999616, "learning_rate": 4.923769682291822e-06, "loss": 0.4718, "step": 4077 }, { "epoch": 2.022837284349013, "grad_norm": 0.07371766271655966, "learning_rate": 4.921815216095832e-06, "loss": 0.4573, "step": 4078 }, { "epoch": 2.0233337470522526, "grad_norm": 0.0818546782159551, "learning_rate": 4.919860761849132e-06, "loss": 0.4857, "step": 4079 }, { "epoch": 2.0238302097554923, "grad_norm": 0.07398217930028606, "learning_rate": 4.917906319850431e-06, "loss": 0.4513, "step": 4080 }, { "epoch": 2.0243266724587317, "grad_norm": 0.07234169176030968, "learning_rate": 4.915951890398431e-06, "loss": 0.4686, "step": 4081 }, { "epoch": 2.024823135161971, "grad_norm": 0.07138272648374208, "learning_rate": 4.913997473791837e-06, "loss": 0.4842, "step": 4082 }, { "epoch": 2.0253195978652103, "grad_norm": 0.07389413192702614, "learning_rate": 4.9120430703293504e-06, "loss": 0.4866, "step": 4083 }, { "epoch": 2.0258160605684497, "grad_norm": 0.07146788366049606, "learning_rate": 4.9100886803096696e-06, "loss": 0.4646, "step": 4084 }, { "epoch": 2.0263125232716894, "grad_norm": 0.07093367028537836, "learning_rate": 4.908134304031495e-06, "loss": 0.4583, "step": 4085 }, { "epoch": 2.0268089859749288, "grad_norm": 0.07429306592375522, "learning_rate": 4.906179941793516e-06, "loss": 0.4953, "step": 4086 }, { "epoch": 2.027305448678168, "grad_norm": 0.07360985294937847, "learning_rate": 4.90422559389443e-06, "loss": 0.4464, "step": 4087 }, { "epoch": 2.0278019113814074, "grad_norm": 0.0718166852132005, "learning_rate": 4.902271260632926e-06, "loss": 0.457, "step": 4088 }, { "epoch": 2.0282983740846467, "grad_norm": 0.07030065358374502, "learning_rate": 4.900316942307693e-06, "loss": 0.4384, "step": 4089 }, { "epoch": 2.0287948367878865, "grad_norm": 0.07571066072847284, "learning_rate": 4.898362639217417e-06, "loss": 0.4567, "step": 4090 }, { "epoch": 2.029291299491126, "grad_norm": 0.07956518040265176, "learning_rate": 4.896408351660783e-06, "loss": 0.4577, "step": 4091 }, { "epoch": 2.029787762194365, "grad_norm": 0.0742603702620764, "learning_rate": 4.89445407993647e-06, "loss": 0.45, "step": 4092 }, { "epoch": 2.0302842248976045, "grad_norm": 0.07054575013845489, "learning_rate": 4.892499824343159e-06, "loss": 0.457, "step": 4093 }, { "epoch": 2.030780687600844, "grad_norm": 0.07252488643464008, "learning_rate": 4.890545585179527e-06, "loss": 0.4622, "step": 4094 }, { "epoch": 2.0312771503040836, "grad_norm": 0.07143595162426333, "learning_rate": 4.888591362744247e-06, "loss": 0.4477, "step": 4095 }, { "epoch": 2.031773613007323, "grad_norm": 0.07159588677019353, "learning_rate": 4.886637157335992e-06, "loss": 0.4622, "step": 4096 }, { "epoch": 2.0322700757105623, "grad_norm": 0.07040447114523164, "learning_rate": 4.884682969253428e-06, "loss": 0.4579, "step": 4097 }, { "epoch": 2.0327665384138016, "grad_norm": 0.07411670769151289, "learning_rate": 4.882728798795223e-06, "loss": 0.4516, "step": 4098 }, { "epoch": 2.033263001117041, "grad_norm": 0.06959189954785697, "learning_rate": 4.880774646260041e-06, "loss": 0.4221, "step": 4099 }, { "epoch": 2.0337594638202807, "grad_norm": 0.07199309706450548, "learning_rate": 4.878820511946543e-06, "loss": 0.4712, "step": 4100 }, { "epoch": 2.03425592652352, "grad_norm": 0.07325495710301079, "learning_rate": 4.876866396153388e-06, "loss": 0.4794, "step": 4101 }, { "epoch": 2.0347523892267594, "grad_norm": 0.0717853143430827, "learning_rate": 4.874912299179228e-06, "loss": 0.4739, "step": 4102 }, { "epoch": 2.0352488519299987, "grad_norm": 0.07407765387786804, "learning_rate": 4.872958221322719e-06, "loss": 0.4842, "step": 4103 }, { "epoch": 2.035745314633238, "grad_norm": 0.0694922585009176, "learning_rate": 4.871004162882508e-06, "loss": 0.4404, "step": 4104 }, { "epoch": 2.0362417773364774, "grad_norm": 0.07157290293449331, "learning_rate": 4.869050124157244e-06, "loss": 0.4553, "step": 4105 }, { "epoch": 2.036738240039717, "grad_norm": 0.07266934587376413, "learning_rate": 4.8670961054455666e-06, "loss": 0.4485, "step": 4106 }, { "epoch": 2.0372347027429565, "grad_norm": 0.071777351529913, "learning_rate": 4.86514210704612e-06, "loss": 0.4649, "step": 4107 }, { "epoch": 2.037731165446196, "grad_norm": 0.07503740354924117, "learning_rate": 4.863188129257539e-06, "loss": 0.4373, "step": 4108 }, { "epoch": 2.038227628149435, "grad_norm": 0.07386719941049365, "learning_rate": 4.8612341723784586e-06, "loss": 0.4776, "step": 4109 }, { "epoch": 2.0387240908526745, "grad_norm": 0.07248900897774578, "learning_rate": 4.859280236707512e-06, "loss": 0.452, "step": 4110 }, { "epoch": 2.0392205535559143, "grad_norm": 0.07179491353728072, "learning_rate": 4.857326322543325e-06, "loss": 0.4478, "step": 4111 }, { "epoch": 2.0397170162591536, "grad_norm": 0.07174429454012216, "learning_rate": 4.855372430184522e-06, "loss": 0.4343, "step": 4112 }, { "epoch": 2.040213478962393, "grad_norm": 0.07519278914658567, "learning_rate": 4.853418559929722e-06, "loss": 0.474, "step": 4113 }, { "epoch": 2.0407099416656322, "grad_norm": 0.07138808068817071, "learning_rate": 4.851464712077546e-06, "loss": 0.4454, "step": 4114 }, { "epoch": 2.0412064043688716, "grad_norm": 0.07314394649356787, "learning_rate": 4.849510886926606e-06, "loss": 0.4659, "step": 4115 }, { "epoch": 2.0417028670721113, "grad_norm": 0.07387286339499641, "learning_rate": 4.847557084775515e-06, "loss": 0.4686, "step": 4116 }, { "epoch": 2.0421993297753507, "grad_norm": 0.0707490447383383, "learning_rate": 4.84560330592288e-06, "loss": 0.5006, "step": 4117 }, { "epoch": 2.04269579247859, "grad_norm": 0.07012577898465304, "learning_rate": 4.843649550667304e-06, "loss": 0.4308, "step": 4118 }, { "epoch": 2.0431922551818293, "grad_norm": 0.07275514929361539, "learning_rate": 4.841695819307386e-06, "loss": 0.4929, "step": 4119 }, { "epoch": 2.0436887178850687, "grad_norm": 0.07248163580645842, "learning_rate": 4.839742112141725e-06, "loss": 0.4415, "step": 4120 }, { "epoch": 2.0441851805883084, "grad_norm": 0.07201595951270065, "learning_rate": 4.8377884294689114e-06, "loss": 0.4482, "step": 4121 }, { "epoch": 2.044681643291548, "grad_norm": 0.07366828864299017, "learning_rate": 4.835834771587537e-06, "loss": 0.4547, "step": 4122 }, { "epoch": 2.045178105994787, "grad_norm": 0.07590040021361757, "learning_rate": 4.833881138796185e-06, "loss": 0.4332, "step": 4123 }, { "epoch": 2.0456745686980264, "grad_norm": 0.07052536062844234, "learning_rate": 4.831927531393437e-06, "loss": 0.4399, "step": 4124 }, { "epoch": 2.0461710314012658, "grad_norm": 0.07317965073722368, "learning_rate": 4.829973949677871e-06, "loss": 0.4596, "step": 4125 }, { "epoch": 2.0466674941045055, "grad_norm": 0.07648174917490391, "learning_rate": 4.828020393948061e-06, "loss": 0.4876, "step": 4126 }, { "epoch": 2.047163956807745, "grad_norm": 0.07241753151580768, "learning_rate": 4.826066864502578e-06, "loss": 0.4543, "step": 4127 }, { "epoch": 2.047660419510984, "grad_norm": 0.07405990811154906, "learning_rate": 4.824113361639984e-06, "loss": 0.4693, "step": 4128 }, { "epoch": 2.0481568822142235, "grad_norm": 0.07489984157760932, "learning_rate": 4.822159885658843e-06, "loss": 0.432, "step": 4129 }, { "epoch": 2.048653344917463, "grad_norm": 0.06986823081594058, "learning_rate": 4.8202064368577116e-06, "loss": 0.4457, "step": 4130 }, { "epoch": 2.0491498076207026, "grad_norm": 0.07234328790911547, "learning_rate": 4.818253015535143e-06, "loss": 0.4661, "step": 4131 }, { "epoch": 2.049646270323942, "grad_norm": 0.07155335491576369, "learning_rate": 4.816299621989687e-06, "loss": 0.4741, "step": 4132 }, { "epoch": 2.0501427330271813, "grad_norm": 0.07599145080679516, "learning_rate": 4.81434625651989e-06, "loss": 0.4851, "step": 4133 }, { "epoch": 2.0506391957304206, "grad_norm": 0.07201140956511509, "learning_rate": 4.81239291942429e-06, "loss": 0.4552, "step": 4134 }, { "epoch": 2.05113565843366, "grad_norm": 0.07298263836130801, "learning_rate": 4.810439611001423e-06, "loss": 0.4642, "step": 4135 }, { "epoch": 2.0516321211368997, "grad_norm": 0.07558026313940648, "learning_rate": 4.808486331549824e-06, "loss": 0.4708, "step": 4136 }, { "epoch": 2.052128583840139, "grad_norm": 0.07086733181913618, "learning_rate": 4.806533081368017e-06, "loss": 0.4487, "step": 4137 }, { "epoch": 2.0526250465433784, "grad_norm": 0.07450006007010589, "learning_rate": 4.804579860754528e-06, "loss": 0.4851, "step": 4138 }, { "epoch": 2.0531215092466177, "grad_norm": 0.07069277786323414, "learning_rate": 4.802626670007873e-06, "loss": 0.434, "step": 4139 }, { "epoch": 2.053617971949857, "grad_norm": 0.06861088663923186, "learning_rate": 4.800673509426567e-06, "loss": 0.4435, "step": 4140 }, { "epoch": 2.054114434653097, "grad_norm": 0.07350117652911003, "learning_rate": 4.7987203793091186e-06, "loss": 0.4843, "step": 4141 }, { "epoch": 2.054610897356336, "grad_norm": 0.07202045677553137, "learning_rate": 4.796767279954034e-06, "loss": 0.4352, "step": 4142 }, { "epoch": 2.0551073600595755, "grad_norm": 0.07171344406046876, "learning_rate": 4.794814211659813e-06, "loss": 0.4538, "step": 4143 }, { "epoch": 2.055603822762815, "grad_norm": 0.07280878355114719, "learning_rate": 4.7928611747249495e-06, "loss": 0.4429, "step": 4144 }, { "epoch": 2.056100285466054, "grad_norm": 0.07357463543017809, "learning_rate": 4.790908169447933e-06, "loss": 0.4816, "step": 4145 }, { "epoch": 2.056596748169294, "grad_norm": 0.07156764439656021, "learning_rate": 4.78895519612725e-06, "loss": 0.458, "step": 4146 }, { "epoch": 2.0570932108725333, "grad_norm": 0.07225385651619044, "learning_rate": 4.787002255061381e-06, "loss": 0.4581, "step": 4147 }, { "epoch": 2.0575896735757726, "grad_norm": 0.07084771506549391, "learning_rate": 4.785049346548803e-06, "loss": 0.4373, "step": 4148 }, { "epoch": 2.058086136279012, "grad_norm": 0.07376134194351902, "learning_rate": 4.783096470887987e-06, "loss": 0.4264, "step": 4149 }, { "epoch": 2.0585825989822513, "grad_norm": 0.07512222501568963, "learning_rate": 4.781143628377396e-06, "loss": 0.4394, "step": 4150 }, { "epoch": 2.059079061685491, "grad_norm": 0.07189691667984506, "learning_rate": 4.779190819315493e-06, "loss": 0.4604, "step": 4151 }, { "epoch": 2.0595755243887304, "grad_norm": 0.07219890185194544, "learning_rate": 4.777238044000732e-06, "loss": 0.4514, "step": 4152 }, { "epoch": 2.0600719870919697, "grad_norm": 0.07489990717793613, "learning_rate": 4.775285302731565e-06, "loss": 0.4493, "step": 4153 }, { "epoch": 2.060568449795209, "grad_norm": 0.07554161871208594, "learning_rate": 4.773332595806437e-06, "loss": 0.4458, "step": 4154 }, { "epoch": 2.0610649124984484, "grad_norm": 0.0720993346206349, "learning_rate": 4.771379923523786e-06, "loss": 0.4399, "step": 4155 }, { "epoch": 2.061561375201688, "grad_norm": 0.07289060496223992, "learning_rate": 4.76942728618205e-06, "loss": 0.4335, "step": 4156 }, { "epoch": 2.0620578379049275, "grad_norm": 0.07282969468184067, "learning_rate": 4.767474684079655e-06, "loss": 0.4732, "step": 4157 }, { "epoch": 2.062554300608167, "grad_norm": 0.07354514479733838, "learning_rate": 4.765522117515026e-06, "loss": 0.4434, "step": 4158 }, { "epoch": 2.063050763311406, "grad_norm": 0.07316254525944305, "learning_rate": 4.763569586786584e-06, "loss": 0.4721, "step": 4159 }, { "epoch": 2.0635472260146455, "grad_norm": 0.07388552565859352, "learning_rate": 4.761617092192737e-06, "loss": 0.447, "step": 4160 }, { "epoch": 2.0640436887178852, "grad_norm": 0.07238240778799938, "learning_rate": 4.759664634031897e-06, "loss": 0.4706, "step": 4161 }, { "epoch": 2.0645401514211246, "grad_norm": 0.07372646052049893, "learning_rate": 4.757712212602464e-06, "loss": 0.4518, "step": 4162 }, { "epoch": 2.065036614124364, "grad_norm": 0.07457439512766961, "learning_rate": 4.755759828202834e-06, "loss": 0.4712, "step": 4163 }, { "epoch": 2.0655330768276032, "grad_norm": 0.07399410722838877, "learning_rate": 4.753807481131398e-06, "loss": 0.4604, "step": 4164 }, { "epoch": 2.0660295395308426, "grad_norm": 0.07082211955770341, "learning_rate": 4.751855171686542e-06, "loss": 0.4498, "step": 4165 }, { "epoch": 2.0665260022340823, "grad_norm": 0.07398347450665645, "learning_rate": 4.7499029001666435e-06, "loss": 0.464, "step": 4166 }, { "epoch": 2.0670224649373217, "grad_norm": 0.07175179899612245, "learning_rate": 4.747950666870076e-06, "loss": 0.45, "step": 4167 }, { "epoch": 2.067518927640561, "grad_norm": 0.0725543113621323, "learning_rate": 4.745998472095209e-06, "loss": 0.4459, "step": 4168 }, { "epoch": 2.0680153903438003, "grad_norm": 0.07356433723103205, "learning_rate": 4.744046316140402e-06, "loss": 0.4871, "step": 4169 }, { "epoch": 2.0685118530470397, "grad_norm": 0.07077237681782449, "learning_rate": 4.742094199304013e-06, "loss": 0.4189, "step": 4170 }, { "epoch": 2.0690083157502794, "grad_norm": 0.07397114345236917, "learning_rate": 4.74014212188439e-06, "loss": 0.4607, "step": 4171 }, { "epoch": 2.0695047784535188, "grad_norm": 0.07633194862451866, "learning_rate": 4.738190084179876e-06, "loss": 0.4775, "step": 4172 }, { "epoch": 2.070001241156758, "grad_norm": 0.0742711453109311, "learning_rate": 4.73623808648881e-06, "loss": 0.4769, "step": 4173 }, { "epoch": 2.0704977038599974, "grad_norm": 0.07170333053549788, "learning_rate": 4.7342861291095244e-06, "loss": 0.4608, "step": 4174 }, { "epoch": 2.0709941665632368, "grad_norm": 0.0748163728180664, "learning_rate": 4.732334212340345e-06, "loss": 0.4928, "step": 4175 }, { "epoch": 2.0714906292664765, "grad_norm": 0.07541859685661392, "learning_rate": 4.7303823364795874e-06, "loss": 0.4681, "step": 4176 }, { "epoch": 2.071987091969716, "grad_norm": 0.0733725681131633, "learning_rate": 4.728430501825567e-06, "loss": 0.4728, "step": 4177 }, { "epoch": 2.072483554672955, "grad_norm": 0.07409349750433514, "learning_rate": 4.72647870867659e-06, "loss": 0.4497, "step": 4178 }, { "epoch": 2.0729800173761945, "grad_norm": 0.07399731870180891, "learning_rate": 4.7245269573309575e-06, "loss": 0.4831, "step": 4179 }, { "epoch": 2.073476480079434, "grad_norm": 0.07655395352644413, "learning_rate": 4.722575248086962e-06, "loss": 0.4491, "step": 4180 }, { "epoch": 2.0739729427826736, "grad_norm": 0.0749787725080781, "learning_rate": 4.720623581242893e-06, "loss": 0.4489, "step": 4181 }, { "epoch": 2.074469405485913, "grad_norm": 0.07444718116125149, "learning_rate": 4.7186719570970285e-06, "loss": 0.4766, "step": 4182 }, { "epoch": 2.0749658681891523, "grad_norm": 0.07366724823253058, "learning_rate": 4.716720375947644e-06, "loss": 0.463, "step": 4183 }, { "epoch": 2.0754623308923916, "grad_norm": 0.0713571217812567, "learning_rate": 4.714768838093009e-06, "loss": 0.449, "step": 4184 }, { "epoch": 2.075958793595631, "grad_norm": 0.07236983736508307, "learning_rate": 4.712817343831384e-06, "loss": 0.4451, "step": 4185 }, { "epoch": 2.0764552562988707, "grad_norm": 0.07472833997107589, "learning_rate": 4.710865893461024e-06, "loss": 0.4833, "step": 4186 }, { "epoch": 2.07695171900211, "grad_norm": 0.07565048657385896, "learning_rate": 4.708914487280175e-06, "loss": 0.4785, "step": 4187 }, { "epoch": 2.0774481817053494, "grad_norm": 0.07173844405137295, "learning_rate": 4.706963125587079e-06, "loss": 0.4531, "step": 4188 }, { "epoch": 2.0779446444085887, "grad_norm": 0.07287877873442934, "learning_rate": 4.705011808679972e-06, "loss": 0.4379, "step": 4189 }, { "epoch": 2.078441107111828, "grad_norm": 0.0716401851616967, "learning_rate": 4.7030605368570795e-06, "loss": 0.4344, "step": 4190 }, { "epoch": 2.078937569815068, "grad_norm": 0.0713923972030575, "learning_rate": 4.701109310416626e-06, "loss": 0.4426, "step": 4191 }, { "epoch": 2.079434032518307, "grad_norm": 0.07268447504285887, "learning_rate": 4.699158129656818e-06, "loss": 0.4577, "step": 4192 }, { "epoch": 2.0799304952215465, "grad_norm": 0.06999453896271132, "learning_rate": 4.697206994875869e-06, "loss": 0.4345, "step": 4193 }, { "epoch": 2.080426957924786, "grad_norm": 0.07212199517441456, "learning_rate": 4.695255906371974e-06, "loss": 0.4577, "step": 4194 }, { "epoch": 2.080923420628025, "grad_norm": 0.07328964439731311, "learning_rate": 4.693304864443328e-06, "loss": 0.4832, "step": 4195 }, { "epoch": 2.081419883331265, "grad_norm": 0.07344622598547146, "learning_rate": 4.6913538693881155e-06, "loss": 0.4568, "step": 4196 }, { "epoch": 2.0819163460345043, "grad_norm": 0.07094064393577482, "learning_rate": 4.689402921504516e-06, "loss": 0.4739, "step": 4197 }, { "epoch": 2.0824128087377436, "grad_norm": 0.07297562583866672, "learning_rate": 4.6874520210907e-06, "loss": 0.4617, "step": 4198 }, { "epoch": 2.082909271440983, "grad_norm": 0.07116335621343138, "learning_rate": 4.685501168444831e-06, "loss": 0.4518, "step": 4199 }, { "epoch": 2.0834057341442223, "grad_norm": 0.07456698856493216, "learning_rate": 4.683550363865065e-06, "loss": 0.4774, "step": 4200 }, { "epoch": 2.083902196847462, "grad_norm": 0.06963389093636772, "learning_rate": 4.681599607649553e-06, "loss": 0.4249, "step": 4201 }, { "epoch": 2.0843986595507014, "grad_norm": 0.07174632242610744, "learning_rate": 4.679648900096436e-06, "loss": 0.4459, "step": 4202 }, { "epoch": 2.0848951222539407, "grad_norm": 0.07098206575623335, "learning_rate": 4.677698241503847e-06, "loss": 0.4341, "step": 4203 }, { "epoch": 2.08539158495718, "grad_norm": 0.06976961364064482, "learning_rate": 4.675747632169916e-06, "loss": 0.4402, "step": 4204 }, { "epoch": 2.0858880476604194, "grad_norm": 0.07452720372868127, "learning_rate": 4.673797072392759e-06, "loss": 0.4773, "step": 4205 }, { "epoch": 2.086384510363659, "grad_norm": 0.07246580630946493, "learning_rate": 4.671846562470489e-06, "loss": 0.44, "step": 4206 }, { "epoch": 2.0868809730668985, "grad_norm": 0.0757655362478522, "learning_rate": 4.669896102701212e-06, "loss": 0.4277, "step": 4207 }, { "epoch": 2.087377435770138, "grad_norm": 0.07543467059805738, "learning_rate": 4.667945693383022e-06, "loss": 0.4902, "step": 4208 }, { "epoch": 2.087873898473377, "grad_norm": 0.07464446756126804, "learning_rate": 4.665995334814009e-06, "loss": 0.4925, "step": 4209 }, { "epoch": 2.0883703611766165, "grad_norm": 0.07342049375383967, "learning_rate": 4.664045027292252e-06, "loss": 0.4497, "step": 4210 }, { "epoch": 2.0888668238798562, "grad_norm": 0.07066862656094619, "learning_rate": 4.662094771115828e-06, "loss": 0.4516, "step": 4211 }, { "epoch": 2.0893632865830956, "grad_norm": 0.07338215598137313, "learning_rate": 4.660144566582799e-06, "loss": 0.4427, "step": 4212 }, { "epoch": 2.089859749286335, "grad_norm": 0.07355061364313716, "learning_rate": 4.658194413991224e-06, "loss": 0.4551, "step": 4213 }, { "epoch": 2.090356211989574, "grad_norm": 0.07318149318093226, "learning_rate": 4.656244313639153e-06, "loss": 0.4288, "step": 4214 }, { "epoch": 2.0908526746928136, "grad_norm": 0.07415826471442223, "learning_rate": 4.654294265824627e-06, "loss": 0.5057, "step": 4215 }, { "epoch": 2.0913491373960533, "grad_norm": 0.07366828306193732, "learning_rate": 4.652344270845679e-06, "loss": 0.4383, "step": 4216 }, { "epoch": 2.0918456000992927, "grad_norm": 0.07147693654112407, "learning_rate": 4.650394329000334e-06, "loss": 0.4739, "step": 4217 }, { "epoch": 2.092342062802532, "grad_norm": 0.07286515685522153, "learning_rate": 4.648444440586612e-06, "loss": 0.4832, "step": 4218 }, { "epoch": 2.0928385255057713, "grad_norm": 0.07170150457742781, "learning_rate": 4.6464946059025205e-06, "loss": 0.4367, "step": 4219 }, { "epoch": 2.0933349882090106, "grad_norm": 0.07026426443694084, "learning_rate": 4.644544825246059e-06, "loss": 0.4491, "step": 4220 }, { "epoch": 2.0938314509122504, "grad_norm": 0.07262877550044473, "learning_rate": 4.6425950989152225e-06, "loss": 0.4325, "step": 4221 }, { "epoch": 2.0943279136154898, "grad_norm": 0.07352269126408716, "learning_rate": 4.640645427207995e-06, "loss": 0.4918, "step": 4222 }, { "epoch": 2.094824376318729, "grad_norm": 0.0768702240002514, "learning_rate": 4.638695810422353e-06, "loss": 0.5017, "step": 4223 }, { "epoch": 2.0953208390219684, "grad_norm": 0.0721775418798943, "learning_rate": 4.6367462488562614e-06, "loss": 0.4525, "step": 4224 }, { "epoch": 2.0958173017252077, "grad_norm": 0.07251926836651529, "learning_rate": 4.634796742807683e-06, "loss": 0.4445, "step": 4225 }, { "epoch": 2.0963137644284475, "grad_norm": 0.07281659816445292, "learning_rate": 4.632847292574565e-06, "loss": 0.4292, "step": 4226 }, { "epoch": 2.096810227131687, "grad_norm": 0.07524695595787657, "learning_rate": 4.630897898454852e-06, "loss": 0.4568, "step": 4227 }, { "epoch": 2.097306689834926, "grad_norm": 0.07048486967770481, "learning_rate": 4.628948560746477e-06, "loss": 0.4311, "step": 4228 }, { "epoch": 2.0978031525381655, "grad_norm": 0.07302922412326243, "learning_rate": 4.6269992797473655e-06, "loss": 0.4369, "step": 4229 }, { "epoch": 2.098299615241405, "grad_norm": 0.07451648437833568, "learning_rate": 4.6250500557554346e-06, "loss": 0.4324, "step": 4230 }, { "epoch": 2.0987960779446446, "grad_norm": 0.07491305988077178, "learning_rate": 4.623100889068589e-06, "loss": 0.4622, "step": 4231 }, { "epoch": 2.099292540647884, "grad_norm": 0.07198956615781928, "learning_rate": 4.6211517799847304e-06, "loss": 0.447, "step": 4232 }, { "epoch": 2.0997890033511233, "grad_norm": 0.07299965844460025, "learning_rate": 4.6192027288017475e-06, "loss": 0.4849, "step": 4233 }, { "epoch": 2.1002854660543626, "grad_norm": 0.07247630851840142, "learning_rate": 4.617253735817522e-06, "loss": 0.4775, "step": 4234 }, { "epoch": 2.100781928757602, "grad_norm": 0.07203360554721318, "learning_rate": 4.6153048013299266e-06, "loss": 0.4304, "step": 4235 }, { "epoch": 2.1012783914608413, "grad_norm": 0.07326018253851636, "learning_rate": 4.613355925636823e-06, "loss": 0.4263, "step": 4236 }, { "epoch": 2.101774854164081, "grad_norm": 0.07376985663412329, "learning_rate": 4.611407109036067e-06, "loss": 0.4495, "step": 4237 }, { "epoch": 2.1022713168673204, "grad_norm": 0.0692281617188793, "learning_rate": 4.609458351825505e-06, "loss": 0.424, "step": 4238 }, { "epoch": 2.1027677795705597, "grad_norm": 0.07246012599168375, "learning_rate": 4.607509654302973e-06, "loss": 0.4617, "step": 4239 }, { "epoch": 2.103264242273799, "grad_norm": 0.07411607321908148, "learning_rate": 4.605561016766295e-06, "loss": 0.4908, "step": 4240 }, { "epoch": 2.103760704977039, "grad_norm": 0.07717174587548606, "learning_rate": 4.603612439513293e-06, "loss": 0.4774, "step": 4241 }, { "epoch": 2.104257167680278, "grad_norm": 0.07097666906897472, "learning_rate": 4.6016639228417726e-06, "loss": 0.4691, "step": 4242 }, { "epoch": 2.1047536303835175, "grad_norm": 0.07450003788778216, "learning_rate": 4.599715467049534e-06, "loss": 0.4802, "step": 4243 }, { "epoch": 2.105250093086757, "grad_norm": 0.0742256173702752, "learning_rate": 4.597767072434369e-06, "loss": 0.4748, "step": 4244 }, { "epoch": 2.105746555789996, "grad_norm": 0.07224204359839762, "learning_rate": 4.595818739294058e-06, "loss": 0.4576, "step": 4245 }, { "epoch": 2.1062430184932355, "grad_norm": 0.07318531262067612, "learning_rate": 4.593870467926373e-06, "loss": 0.4626, "step": 4246 }, { "epoch": 2.1067394811964752, "grad_norm": 0.07430147251059871, "learning_rate": 4.591922258629074e-06, "loss": 0.4571, "step": 4247 }, { "epoch": 2.1072359438997146, "grad_norm": 0.07254417646526728, "learning_rate": 4.589974111699914e-06, "loss": 0.4794, "step": 4248 }, { "epoch": 2.107732406602954, "grad_norm": 0.07515164952717794, "learning_rate": 4.5880260274366375e-06, "loss": 0.4692, "step": 4249 }, { "epoch": 2.1082288693061932, "grad_norm": 0.07383658645149892, "learning_rate": 4.586078006136977e-06, "loss": 0.4874, "step": 4250 }, { "epoch": 2.108725332009433, "grad_norm": 0.07257460141396958, "learning_rate": 4.584130048098658e-06, "loss": 0.4819, "step": 4251 }, { "epoch": 2.1092217947126723, "grad_norm": 0.07337974249936584, "learning_rate": 4.582182153619392e-06, "loss": 0.4706, "step": 4252 }, { "epoch": 2.1097182574159117, "grad_norm": 0.07199015701301469, "learning_rate": 4.5802343229968845e-06, "loss": 0.4666, "step": 4253 }, { "epoch": 2.110214720119151, "grad_norm": 0.07286868193978167, "learning_rate": 4.5782865565288296e-06, "loss": 0.436, "step": 4254 }, { "epoch": 2.1107111828223903, "grad_norm": 0.06980078227341749, "learning_rate": 4.576338854512916e-06, "loss": 0.446, "step": 4255 }, { "epoch": 2.1112076455256297, "grad_norm": 0.07755006219874234, "learning_rate": 4.574391217246813e-06, "loss": 0.4662, "step": 4256 }, { "epoch": 2.1117041082288694, "grad_norm": 0.07399265860382301, "learning_rate": 4.5724436450281895e-06, "loss": 0.4495, "step": 4257 }, { "epoch": 2.112200570932109, "grad_norm": 0.0746046076715519, "learning_rate": 4.570496138154699e-06, "loss": 0.4606, "step": 4258 }, { "epoch": 2.112697033635348, "grad_norm": 0.0716888428047803, "learning_rate": 4.568548696923986e-06, "loss": 0.4414, "step": 4259 }, { "epoch": 2.1131934963385874, "grad_norm": 0.06988503629826938, "learning_rate": 4.566601321633688e-06, "loss": 0.4656, "step": 4260 }, { "epoch": 2.1136899590418268, "grad_norm": 0.07322255277171939, "learning_rate": 4.564654012581428e-06, "loss": 0.4289, "step": 4261 }, { "epoch": 2.1141864217450665, "grad_norm": 0.07170049650607539, "learning_rate": 4.562706770064824e-06, "loss": 0.456, "step": 4262 }, { "epoch": 2.114682884448306, "grad_norm": 0.07498256697277747, "learning_rate": 4.560759594381477e-06, "loss": 0.464, "step": 4263 }, { "epoch": 2.115179347151545, "grad_norm": 0.07575026511841638, "learning_rate": 4.558812485828983e-06, "loss": 0.4769, "step": 4264 }, { "epoch": 2.1156758098547845, "grad_norm": 0.07380441248442098, "learning_rate": 4.556865444704928e-06, "loss": 0.4638, "step": 4265 }, { "epoch": 2.116172272558024, "grad_norm": 0.0696430455459803, "learning_rate": 4.554918471306883e-06, "loss": 0.4171, "step": 4266 }, { "epoch": 2.1166687352612636, "grad_norm": 0.07436796696460703, "learning_rate": 4.552971565932416e-06, "loss": 0.4443, "step": 4267 }, { "epoch": 2.117165197964503, "grad_norm": 0.07578969996317546, "learning_rate": 4.551024728879075e-06, "loss": 0.4688, "step": 4268 }, { "epoch": 2.1176616606677423, "grad_norm": 0.07395243694691826, "learning_rate": 4.549077960444407e-06, "loss": 0.4541, "step": 4269 }, { "epoch": 2.1181581233709816, "grad_norm": 0.07110739817951225, "learning_rate": 4.547131260925941e-06, "loss": 0.4581, "step": 4270 }, { "epoch": 2.118654586074221, "grad_norm": 0.0741125823175704, "learning_rate": 4.545184630621202e-06, "loss": 0.4824, "step": 4271 }, { "epoch": 2.1191510487774607, "grad_norm": 0.07664528627796972, "learning_rate": 4.543238069827701e-06, "loss": 0.4501, "step": 4272 }, { "epoch": 2.1196475114807, "grad_norm": 0.07570433962985741, "learning_rate": 4.541291578842936e-06, "loss": 0.497, "step": 4273 }, { "epoch": 2.1201439741839394, "grad_norm": 0.07617127656102472, "learning_rate": 4.539345157964399e-06, "loss": 0.4631, "step": 4274 }, { "epoch": 2.1206404368871787, "grad_norm": 0.07313940894695849, "learning_rate": 4.537398807489568e-06, "loss": 0.4678, "step": 4275 }, { "epoch": 2.121136899590418, "grad_norm": 0.07300910218402296, "learning_rate": 4.535452527715911e-06, "loss": 0.4542, "step": 4276 }, { "epoch": 2.121633362293658, "grad_norm": 0.07492994635211063, "learning_rate": 4.533506318940888e-06, "loss": 0.4396, "step": 4277 }, { "epoch": 2.122129824996897, "grad_norm": 0.07652124159341604, "learning_rate": 4.531560181461944e-06, "loss": 0.4225, "step": 4278 }, { "epoch": 2.1226262877001365, "grad_norm": 0.07259064899112683, "learning_rate": 4.529614115576515e-06, "loss": 0.4752, "step": 4279 }, { "epoch": 2.123122750403376, "grad_norm": 0.07304094179424345, "learning_rate": 4.5276681215820266e-06, "loss": 0.4583, "step": 4280 }, { "epoch": 2.123619213106615, "grad_norm": 0.07441873823770657, "learning_rate": 4.525722199775893e-06, "loss": 0.4626, "step": 4281 }, { "epoch": 2.124115675809855, "grad_norm": 0.07328536701423397, "learning_rate": 4.523776350455516e-06, "loss": 0.4899, "step": 4282 }, { "epoch": 2.1246121385130943, "grad_norm": 0.0703197526894117, "learning_rate": 4.521830573918289e-06, "loss": 0.4233, "step": 4283 }, { "epoch": 2.1251086012163336, "grad_norm": 0.07408961544325211, "learning_rate": 4.5198848704615915e-06, "loss": 0.4929, "step": 4284 }, { "epoch": 2.125605063919573, "grad_norm": 0.07399250331444931, "learning_rate": 4.517939240382793e-06, "loss": 0.4982, "step": 4285 }, { "epoch": 2.1261015266228123, "grad_norm": 0.07313344981416467, "learning_rate": 4.515993683979253e-06, "loss": 0.4701, "step": 4286 }, { "epoch": 2.126597989326052, "grad_norm": 0.07221818239629586, "learning_rate": 4.514048201548318e-06, "loss": 0.4547, "step": 4287 }, { "epoch": 2.1270944520292914, "grad_norm": 0.07303252754167958, "learning_rate": 4.512102793387325e-06, "loss": 0.4578, "step": 4288 }, { "epoch": 2.1275909147325307, "grad_norm": 0.11487612831188249, "learning_rate": 4.510157459793596e-06, "loss": 0.4151, "step": 4289 }, { "epoch": 2.12808737743577, "grad_norm": 0.07265820335361244, "learning_rate": 4.508212201064446e-06, "loss": 0.4341, "step": 4290 }, { "epoch": 2.1285838401390094, "grad_norm": 0.074447871083182, "learning_rate": 4.506267017497174e-06, "loss": 0.481, "step": 4291 }, { "epoch": 2.129080302842249, "grad_norm": 0.07330142691315944, "learning_rate": 4.504321909389072e-06, "loss": 0.4345, "step": 4292 }, { "epoch": 2.1295767655454885, "grad_norm": 0.07169395950659548, "learning_rate": 4.5023768770374195e-06, "loss": 0.4803, "step": 4293 }, { "epoch": 2.130073228248728, "grad_norm": 0.07144159301453944, "learning_rate": 4.500431920739482e-06, "loss": 0.4379, "step": 4294 }, { "epoch": 2.130569690951967, "grad_norm": 0.07462658435580564, "learning_rate": 4.498487040792517e-06, "loss": 0.4455, "step": 4295 }, { "epoch": 2.1310661536552065, "grad_norm": 0.07373163804771965, "learning_rate": 4.496542237493765e-06, "loss": 0.4269, "step": 4296 }, { "epoch": 2.1315626163584462, "grad_norm": 0.07622904870198172, "learning_rate": 4.49459751114046e-06, "loss": 0.5137, "step": 4297 }, { "epoch": 2.1320590790616856, "grad_norm": 0.07372742998907207, "learning_rate": 4.492652862029822e-06, "loss": 0.5211, "step": 4298 }, { "epoch": 2.132555541764925, "grad_norm": 0.07370616974666123, "learning_rate": 4.490708290459061e-06, "loss": 0.4474, "step": 4299 }, { "epoch": 2.1330520044681642, "grad_norm": 0.07458809789142895, "learning_rate": 4.488763796725369e-06, "loss": 0.4831, "step": 4300 }, { "epoch": 2.1335484671714036, "grad_norm": 0.07398203891052577, "learning_rate": 4.486819381125935e-06, "loss": 0.4833, "step": 4301 }, { "epoch": 2.1340449298746433, "grad_norm": 0.07415054973135388, "learning_rate": 4.48487504395793e-06, "loss": 0.4858, "step": 4302 }, { "epoch": 2.1345413925778827, "grad_norm": 0.07379822859689968, "learning_rate": 4.4829307855185155e-06, "loss": 0.4328, "step": 4303 }, { "epoch": 2.135037855281122, "grad_norm": 0.07379863650798167, "learning_rate": 4.480986606104842e-06, "loss": 0.4874, "step": 4304 }, { "epoch": 2.1355343179843613, "grad_norm": 0.07413874016115828, "learning_rate": 4.479042506014042e-06, "loss": 0.4744, "step": 4305 }, { "epoch": 2.1360307806876007, "grad_norm": 0.07332637014689809, "learning_rate": 4.477098485543242e-06, "loss": 0.4842, "step": 4306 }, { "epoch": 2.1365272433908404, "grad_norm": 0.07626723329610915, "learning_rate": 4.475154544989554e-06, "loss": 0.4637, "step": 4307 }, { "epoch": 2.1370237060940798, "grad_norm": 0.07124366540358651, "learning_rate": 4.473210684650079e-06, "loss": 0.4553, "step": 4308 }, { "epoch": 2.137520168797319, "grad_norm": 0.07106600874663206, "learning_rate": 4.471266904821904e-06, "loss": 0.4579, "step": 4309 }, { "epoch": 2.1380166315005584, "grad_norm": 0.07320126920930652, "learning_rate": 4.469323205802105e-06, "loss": 0.4761, "step": 4310 }, { "epoch": 2.1385130942037978, "grad_norm": 0.0748936232395386, "learning_rate": 4.467379587887747e-06, "loss": 0.49, "step": 4311 }, { "epoch": 2.1390095569070375, "grad_norm": 0.070655034353901, "learning_rate": 4.465436051375877e-06, "loss": 0.4706, "step": 4312 }, { "epoch": 2.139506019610277, "grad_norm": 0.07154275743223767, "learning_rate": 4.463492596563536e-06, "loss": 0.4721, "step": 4313 }, { "epoch": 2.140002482313516, "grad_norm": 0.06944606811288602, "learning_rate": 4.46154922374775e-06, "loss": 0.4293, "step": 4314 }, { "epoch": 2.1404989450167555, "grad_norm": 0.0695554372896349, "learning_rate": 4.459605933225531e-06, "loss": 0.4095, "step": 4315 }, { "epoch": 2.140995407719995, "grad_norm": 0.07387663952272498, "learning_rate": 4.4576627252938805e-06, "loss": 0.5105, "step": 4316 }, { "epoch": 2.1414918704232346, "grad_norm": 0.07296540945926891, "learning_rate": 4.455719600249787e-06, "loss": 0.4748, "step": 4317 }, { "epoch": 2.141988333126474, "grad_norm": 0.07444996417793436, "learning_rate": 4.453776558390225e-06, "loss": 0.4476, "step": 4318 }, { "epoch": 2.1424847958297133, "grad_norm": 0.07412269708771786, "learning_rate": 4.4518336000121585e-06, "loss": 0.4835, "step": 4319 }, { "epoch": 2.1429812585329526, "grad_norm": 0.07223272136559325, "learning_rate": 4.4498907254125394e-06, "loss": 0.4209, "step": 4320 }, { "epoch": 2.143477721236192, "grad_norm": 0.07511790139542897, "learning_rate": 4.4479479348883e-06, "loss": 0.4588, "step": 4321 }, { "epoch": 2.1439741839394317, "grad_norm": 0.07007837954759942, "learning_rate": 4.446005228736368e-06, "loss": 0.4573, "step": 4322 }, { "epoch": 2.144470646642671, "grad_norm": 0.0728602451708517, "learning_rate": 4.444062607253655e-06, "loss": 0.5192, "step": 4323 }, { "epoch": 2.1449671093459104, "grad_norm": 0.07341402010785382, "learning_rate": 4.442120070737057e-06, "loss": 0.459, "step": 4324 }, { "epoch": 2.1454635720491497, "grad_norm": 0.07380914908331342, "learning_rate": 4.4401776194834615e-06, "loss": 0.4823, "step": 4325 }, { "epoch": 2.145960034752389, "grad_norm": 0.07298131712761911, "learning_rate": 4.438235253789741e-06, "loss": 0.4405, "step": 4326 }, { "epoch": 2.146456497455629, "grad_norm": 0.07533361099651531, "learning_rate": 4.436292973952755e-06, "loss": 0.4602, "step": 4327 }, { "epoch": 2.146952960158868, "grad_norm": 0.07118691927368932, "learning_rate": 4.43435078026935e-06, "loss": 0.4753, "step": 4328 }, { "epoch": 2.1474494228621075, "grad_norm": 0.07142698648583826, "learning_rate": 4.432408673036358e-06, "loss": 0.4595, "step": 4329 }, { "epoch": 2.147945885565347, "grad_norm": 0.07637953793097572, "learning_rate": 4.430466652550599e-06, "loss": 0.4579, "step": 4330 }, { "epoch": 2.148442348268586, "grad_norm": 0.07424465808567507, "learning_rate": 4.4285247191088815e-06, "loss": 0.4672, "step": 4331 }, { "epoch": 2.148938810971826, "grad_norm": 0.07278996829833347, "learning_rate": 4.426582873007999e-06, "loss": 0.474, "step": 4332 }, { "epoch": 2.1494352736750653, "grad_norm": 0.0717818181339654, "learning_rate": 4.4246411145447275e-06, "loss": 0.4381, "step": 4333 }, { "epoch": 2.1499317363783046, "grad_norm": 0.0730160088959314, "learning_rate": 4.422699444015838e-06, "loss": 0.4499, "step": 4334 }, { "epoch": 2.150428199081544, "grad_norm": 0.07393221348452107, "learning_rate": 4.420757861718082e-06, "loss": 0.4661, "step": 4335 }, { "epoch": 2.1509246617847833, "grad_norm": 0.07274849802941666, "learning_rate": 4.418816367948201e-06, "loss": 0.4393, "step": 4336 }, { "epoch": 2.151421124488023, "grad_norm": 0.07902231214358871, "learning_rate": 4.416874963002918e-06, "loss": 0.4608, "step": 4337 }, { "epoch": 2.1519175871912624, "grad_norm": 0.07283494180291966, "learning_rate": 4.414933647178948e-06, "loss": 0.4765, "step": 4338 }, { "epoch": 2.1524140498945017, "grad_norm": 0.0701883235014208, "learning_rate": 4.412992420772988e-06, "loss": 0.4504, "step": 4339 }, { "epoch": 2.152910512597741, "grad_norm": 0.07421158965760369, "learning_rate": 4.411051284081725e-06, "loss": 0.4522, "step": 4340 }, { "epoch": 2.1534069753009804, "grad_norm": 0.07571375423025403, "learning_rate": 4.4091102374018295e-06, "loss": 0.4634, "step": 4341 }, { "epoch": 2.15390343800422, "grad_norm": 0.07236772001970997, "learning_rate": 4.40716928102996e-06, "loss": 0.4478, "step": 4342 }, { "epoch": 2.1543999007074595, "grad_norm": 0.07188417221152126, "learning_rate": 4.405228415262763e-06, "loss": 0.4645, "step": 4343 }, { "epoch": 2.154896363410699, "grad_norm": 0.07382921539610335, "learning_rate": 4.403287640396864e-06, "loss": 0.4807, "step": 4344 }, { "epoch": 2.155392826113938, "grad_norm": 0.06982201525129672, "learning_rate": 4.401346956728881e-06, "loss": 0.4393, "step": 4345 }, { "epoch": 2.1558892888171775, "grad_norm": 0.07054004972844154, "learning_rate": 4.3994063645554185e-06, "loss": 0.4309, "step": 4346 }, { "epoch": 2.1563857515204172, "grad_norm": 0.07132879012840733, "learning_rate": 4.397465864173063e-06, "loss": 0.4381, "step": 4347 }, { "epoch": 2.1568822142236566, "grad_norm": 0.07432312035402415, "learning_rate": 4.39552545587839e-06, "loss": 0.4612, "step": 4348 }, { "epoch": 2.157378676926896, "grad_norm": 0.07411331173872626, "learning_rate": 4.393585139967958e-06, "loss": 0.4989, "step": 4349 }, { "epoch": 2.157875139630135, "grad_norm": 0.07267337288227944, "learning_rate": 4.391644916738314e-06, "loss": 0.4461, "step": 4350 }, { "epoch": 2.1583716023333746, "grad_norm": 0.07331622047891374, "learning_rate": 4.38970478648599e-06, "loss": 0.459, "step": 4351 }, { "epoch": 2.1588680650366143, "grad_norm": 0.07453181757687845, "learning_rate": 4.387764749507507e-06, "loss": 0.4597, "step": 4352 }, { "epoch": 2.1593645277398537, "grad_norm": 0.07315319571236073, "learning_rate": 4.385824806099364e-06, "loss": 0.4769, "step": 4353 }, { "epoch": 2.159860990443093, "grad_norm": 0.07693153678377337, "learning_rate": 4.383884956558051e-06, "loss": 0.4861, "step": 4354 }, { "epoch": 2.1603574531463323, "grad_norm": 0.07417595641227936, "learning_rate": 4.381945201180045e-06, "loss": 0.4691, "step": 4355 }, { "epoch": 2.1608539158495716, "grad_norm": 0.07529427254296801, "learning_rate": 4.380005540261803e-06, "loss": 0.4748, "step": 4356 }, { "epoch": 2.1613503785528114, "grad_norm": 0.07267164548374914, "learning_rate": 4.378065974099775e-06, "loss": 0.4566, "step": 4357 }, { "epoch": 2.1618468412560508, "grad_norm": 0.07203923664865745, "learning_rate": 4.37612650299039e-06, "loss": 0.4685, "step": 4358 }, { "epoch": 2.16234330395929, "grad_norm": 0.07187394235177122, "learning_rate": 4.374187127230068e-06, "loss": 0.4557, "step": 4359 }, { "epoch": 2.1628397666625294, "grad_norm": 0.07011681787618959, "learning_rate": 4.3722478471152065e-06, "loss": 0.4321, "step": 4360 }, { "epoch": 2.1633362293657687, "grad_norm": 0.07272195445241587, "learning_rate": 4.370308662942198e-06, "loss": 0.4518, "step": 4361 }, { "epoch": 2.1638326920690085, "grad_norm": 0.07460252071373111, "learning_rate": 4.368369575007413e-06, "loss": 0.4907, "step": 4362 }, { "epoch": 2.164329154772248, "grad_norm": 0.07561379198675501, "learning_rate": 4.3664305836072116e-06, "loss": 0.4796, "step": 4363 }, { "epoch": 2.164825617475487, "grad_norm": 0.07136976585613097, "learning_rate": 4.364491689037938e-06, "loss": 0.439, "step": 4364 }, { "epoch": 2.1653220801787265, "grad_norm": 0.07162067869532027, "learning_rate": 4.362552891595919e-06, "loss": 0.4359, "step": 4365 }, { "epoch": 2.165818542881966, "grad_norm": 0.0733281484806096, "learning_rate": 4.3606141915774695e-06, "loss": 0.4771, "step": 4366 }, { "epoch": 2.166315005585205, "grad_norm": 0.07224633811997479, "learning_rate": 4.3586755892788896e-06, "loss": 0.4372, "step": 4367 }, { "epoch": 2.166811468288445, "grad_norm": 0.07028756349782972, "learning_rate": 4.356737084996465e-06, "loss": 0.448, "step": 4368 }, { "epoch": 2.1673079309916843, "grad_norm": 0.07108239096848315, "learning_rate": 4.354798679026461e-06, "loss": 0.4479, "step": 4369 }, { "epoch": 2.1678043936949236, "grad_norm": 0.07372307764488349, "learning_rate": 4.352860371665134e-06, "loss": 0.4601, "step": 4370 }, { "epoch": 2.168300856398163, "grad_norm": 0.06975696761984575, "learning_rate": 4.350922163208724e-06, "loss": 0.4494, "step": 4371 }, { "epoch": 2.1687973191014027, "grad_norm": 0.07546779879045096, "learning_rate": 4.348984053953453e-06, "loss": 0.4524, "step": 4372 }, { "epoch": 2.169293781804642, "grad_norm": 0.07280842926844215, "learning_rate": 4.3470460441955306e-06, "loss": 0.4536, "step": 4373 }, { "epoch": 2.1697902445078814, "grad_norm": 0.07703084007865943, "learning_rate": 4.345108134231152e-06, "loss": 0.4747, "step": 4374 }, { "epoch": 2.1702867072111207, "grad_norm": 0.07110900628117096, "learning_rate": 4.343170324356495e-06, "loss": 0.4499, "step": 4375 }, { "epoch": 2.17078316991436, "grad_norm": 0.07369518298980911, "learning_rate": 4.341232614867722e-06, "loss": 0.457, "step": 4376 }, { "epoch": 2.1712796326175994, "grad_norm": 0.07157372517615651, "learning_rate": 4.3392950060609804e-06, "loss": 0.4473, "step": 4377 }, { "epoch": 2.171776095320839, "grad_norm": 0.0717701752371705, "learning_rate": 4.337357498232405e-06, "loss": 0.4516, "step": 4378 }, { "epoch": 2.1722725580240785, "grad_norm": 0.07316611230340865, "learning_rate": 4.335420091678109e-06, "loss": 0.4419, "step": 4379 }, { "epoch": 2.172769020727318, "grad_norm": 0.0756994196452662, "learning_rate": 4.3334827866941995e-06, "loss": 0.475, "step": 4380 }, { "epoch": 2.173265483430557, "grad_norm": 0.0785324379774332, "learning_rate": 4.331545583576758e-06, "loss": 0.4879, "step": 4381 }, { "epoch": 2.173761946133797, "grad_norm": 0.07380761788530164, "learning_rate": 4.329608482621855e-06, "loss": 0.4705, "step": 4382 }, { "epoch": 2.1742584088370362, "grad_norm": 0.07386854839808037, "learning_rate": 4.327671484125548e-06, "loss": 0.4466, "step": 4383 }, { "epoch": 2.1747548715402756, "grad_norm": 0.07355308329216982, "learning_rate": 4.325734588383876e-06, "loss": 0.4671, "step": 4384 }, { "epoch": 2.175251334243515, "grad_norm": 0.07717137637860379, "learning_rate": 4.323797795692859e-06, "loss": 0.4768, "step": 4385 }, { "epoch": 2.1757477969467542, "grad_norm": 0.10530308493803685, "learning_rate": 4.321861106348507e-06, "loss": 0.4548, "step": 4386 }, { "epoch": 2.1762442596499936, "grad_norm": 0.07624838294925372, "learning_rate": 4.319924520646812e-06, "loss": 0.464, "step": 4387 }, { "epoch": 2.1767407223532333, "grad_norm": 0.07398353032038894, "learning_rate": 4.31798803888375e-06, "loss": 0.4362, "step": 4388 }, { "epoch": 2.1772371850564727, "grad_norm": 0.07362322859625167, "learning_rate": 4.31605166135528e-06, "loss": 0.4702, "step": 4389 }, { "epoch": 2.177733647759712, "grad_norm": 0.07241274976591916, "learning_rate": 4.314115388357348e-06, "loss": 0.45, "step": 4390 }, { "epoch": 2.1782301104629513, "grad_norm": 0.06990581661486783, "learning_rate": 4.312179220185883e-06, "loss": 0.4299, "step": 4391 }, { "epoch": 2.178726573166191, "grad_norm": 0.07375544071049109, "learning_rate": 4.310243157136794e-06, "loss": 0.4975, "step": 4392 }, { "epoch": 2.1792230358694304, "grad_norm": 0.07164232045346458, "learning_rate": 4.308307199505979e-06, "loss": 0.4429, "step": 4393 }, { "epoch": 2.17971949857267, "grad_norm": 0.07286003848759594, "learning_rate": 4.306371347589318e-06, "loss": 0.4513, "step": 4394 }, { "epoch": 2.180215961275909, "grad_norm": 0.07478821375829875, "learning_rate": 4.304435601682675e-06, "loss": 0.4706, "step": 4395 }, { "epoch": 2.1807124239791484, "grad_norm": 0.07402377964070475, "learning_rate": 4.3024999620819e-06, "loss": 0.4735, "step": 4396 }, { "epoch": 2.1812088866823878, "grad_norm": 0.07372832051354254, "learning_rate": 4.30056442908282e-06, "loss": 0.4672, "step": 4397 }, { "epoch": 2.1817053493856275, "grad_norm": 0.0723792667046002, "learning_rate": 4.298629002981253e-06, "loss": 0.4732, "step": 4398 }, { "epoch": 2.182201812088867, "grad_norm": 0.07305135775888415, "learning_rate": 4.296693684072997e-06, "loss": 0.4385, "step": 4399 }, { "epoch": 2.182698274792106, "grad_norm": 0.07387898291563187, "learning_rate": 4.294758472653837e-06, "loss": 0.4795, "step": 4400 }, { "epoch": 2.1831947374953455, "grad_norm": 0.07342139584048227, "learning_rate": 4.2928233690195345e-06, "loss": 0.4317, "step": 4401 }, { "epoch": 2.1836912001985853, "grad_norm": 0.07323848425271148, "learning_rate": 4.290888373465841e-06, "loss": 0.4771, "step": 4402 }, { "epoch": 2.1841876629018246, "grad_norm": 0.07217563028765697, "learning_rate": 4.288953486288491e-06, "loss": 0.4176, "step": 4403 }, { "epoch": 2.184684125605064, "grad_norm": 0.07588623975529438, "learning_rate": 4.2870187077832e-06, "loss": 0.4761, "step": 4404 }, { "epoch": 2.1851805883083033, "grad_norm": 0.07679164359797928, "learning_rate": 4.2850840382456656e-06, "loss": 0.4809, "step": 4405 }, { "epoch": 2.1856770510115426, "grad_norm": 0.07357731906381813, "learning_rate": 4.283149477971575e-06, "loss": 0.4922, "step": 4406 }, { "epoch": 2.186173513714782, "grad_norm": 0.07414935840899083, "learning_rate": 4.281215027256592e-06, "loss": 0.4418, "step": 4407 }, { "epoch": 2.1866699764180217, "grad_norm": 0.07168473465744193, "learning_rate": 4.2792806863963685e-06, "loss": 0.4318, "step": 4408 }, { "epoch": 2.187166439121261, "grad_norm": 0.07499491492610341, "learning_rate": 4.277346455686535e-06, "loss": 0.4517, "step": 4409 }, { "epoch": 2.1876629018245004, "grad_norm": 0.07423494586098568, "learning_rate": 4.275412335422709e-06, "loss": 0.4415, "step": 4410 }, { "epoch": 2.1881593645277397, "grad_norm": 0.0729238681152025, "learning_rate": 4.27347832590049e-06, "loss": 0.4583, "step": 4411 }, { "epoch": 2.188655827230979, "grad_norm": 0.07542756084684298, "learning_rate": 4.27154442741546e-06, "loss": 0.4458, "step": 4412 }, { "epoch": 2.189152289934219, "grad_norm": 0.07608446572149048, "learning_rate": 4.269610640263185e-06, "loss": 0.4653, "step": 4413 }, { "epoch": 2.189648752637458, "grad_norm": 0.07361868469969725, "learning_rate": 4.267676964739211e-06, "loss": 0.4667, "step": 4414 }, { "epoch": 2.1901452153406975, "grad_norm": 0.0687170502926834, "learning_rate": 4.2657434011390725e-06, "loss": 0.4419, "step": 4415 }, { "epoch": 2.190641678043937, "grad_norm": 0.07000188477823314, "learning_rate": 4.2638099497582835e-06, "loss": 0.4715, "step": 4416 }, { "epoch": 2.191138140747176, "grad_norm": 0.07273493307479256, "learning_rate": 4.2618766108923385e-06, "loss": 0.4671, "step": 4417 }, { "epoch": 2.191634603450416, "grad_norm": 0.07397448831392173, "learning_rate": 4.259943384836717e-06, "loss": 0.4562, "step": 4418 }, { "epoch": 2.1921310661536553, "grad_norm": 0.07195426602600087, "learning_rate": 4.258010271886885e-06, "loss": 0.4525, "step": 4419 }, { "epoch": 2.1926275288568946, "grad_norm": 0.07397602097862697, "learning_rate": 4.256077272338286e-06, "loss": 0.4471, "step": 4420 }, { "epoch": 2.193123991560134, "grad_norm": 0.07350928801561125, "learning_rate": 4.254144386486347e-06, "loss": 0.4875, "step": 4421 }, { "epoch": 2.1936204542633733, "grad_norm": 0.07269756927451913, "learning_rate": 4.2522116146264805e-06, "loss": 0.4569, "step": 4422 }, { "epoch": 2.194116916966613, "grad_norm": 0.07206227980041602, "learning_rate": 4.2502789570540775e-06, "loss": 0.447, "step": 4423 }, { "epoch": 2.1946133796698524, "grad_norm": 0.07262743407923547, "learning_rate": 4.248346414064517e-06, "loss": 0.4778, "step": 4424 }, { "epoch": 2.1951098423730917, "grad_norm": 0.07271820913507389, "learning_rate": 4.246413985953154e-06, "loss": 0.491, "step": 4425 }, { "epoch": 2.195606305076331, "grad_norm": 0.0704878390287762, "learning_rate": 4.24448167301533e-06, "loss": 0.4448, "step": 4426 }, { "epoch": 2.1961027677795704, "grad_norm": 0.07215568890408974, "learning_rate": 4.242549475546369e-06, "loss": 0.4624, "step": 4427 }, { "epoch": 2.19659923048281, "grad_norm": 0.07656441422337223, "learning_rate": 4.240617393841578e-06, "loss": 0.4868, "step": 4428 }, { "epoch": 2.1970956931860495, "grad_norm": 0.07123197731052923, "learning_rate": 4.23868542819624e-06, "loss": 0.4561, "step": 4429 }, { "epoch": 2.197592155889289, "grad_norm": 0.07713749872276374, "learning_rate": 4.236753578905627e-06, "loss": 0.4506, "step": 4430 }, { "epoch": 2.198088618592528, "grad_norm": 0.07255814303647512, "learning_rate": 4.234821846264993e-06, "loss": 0.4292, "step": 4431 }, { "epoch": 2.1985850812957675, "grad_norm": 0.0736169066359428, "learning_rate": 4.232890230569573e-06, "loss": 0.5202, "step": 4432 }, { "epoch": 2.1990815439990072, "grad_norm": 0.07229802672521726, "learning_rate": 4.2309587321145795e-06, "loss": 0.4574, "step": 4433 }, { "epoch": 2.1995780067022466, "grad_norm": 0.0753428203681146, "learning_rate": 4.229027351195213e-06, "loss": 0.4579, "step": 4434 }, { "epoch": 2.200074469405486, "grad_norm": 0.07479409648984664, "learning_rate": 4.227096088106655e-06, "loss": 0.4518, "step": 4435 }, { "epoch": 2.2005709321087252, "grad_norm": 0.07571046025431778, "learning_rate": 4.225164943144067e-06, "loss": 0.469, "step": 4436 }, { "epoch": 2.2010673948119646, "grad_norm": 0.07471281694591714, "learning_rate": 4.223233916602593e-06, "loss": 0.4571, "step": 4437 }, { "epoch": 2.2015638575152043, "grad_norm": 0.07239478998206747, "learning_rate": 4.221303008777361e-06, "loss": 0.467, "step": 4438 }, { "epoch": 2.2020603202184437, "grad_norm": 0.07068125404733483, "learning_rate": 4.219372219963479e-06, "loss": 0.4512, "step": 4439 }, { "epoch": 2.202556782921683, "grad_norm": 0.07725181283625165, "learning_rate": 4.217441550456037e-06, "loss": 0.4959, "step": 4440 }, { "epoch": 2.2030532456249223, "grad_norm": 0.07637029431018237, "learning_rate": 4.215511000550106e-06, "loss": 0.4788, "step": 4441 }, { "epoch": 2.2035497083281617, "grad_norm": 0.07344370158757503, "learning_rate": 4.21358057054074e-06, "loss": 0.4706, "step": 4442 }, { "epoch": 2.2040461710314014, "grad_norm": 0.07304870738294844, "learning_rate": 4.211650260722975e-06, "loss": 0.461, "step": 4443 }, { "epoch": 2.2045426337346408, "grad_norm": 0.07258620076213448, "learning_rate": 4.209720071391827e-06, "loss": 0.4306, "step": 4444 }, { "epoch": 2.20503909643788, "grad_norm": 0.0732693591865333, "learning_rate": 4.207790002842296e-06, "loss": 0.4573, "step": 4445 }, { "epoch": 2.2055355591411194, "grad_norm": 0.07384740915845746, "learning_rate": 4.20586005536936e-06, "loss": 0.4423, "step": 4446 }, { "epoch": 2.2060320218443588, "grad_norm": 0.07183291018678276, "learning_rate": 4.203930229267982e-06, "loss": 0.4744, "step": 4447 }, { "epoch": 2.2065284845475985, "grad_norm": 0.07160026517347787, "learning_rate": 4.2020005248331056e-06, "loss": 0.4652, "step": 4448 }, { "epoch": 2.207024947250838, "grad_norm": 0.073264422607452, "learning_rate": 4.2000709423596515e-06, "loss": 0.476, "step": 4449 }, { "epoch": 2.207521409954077, "grad_norm": 0.06959340873010447, "learning_rate": 4.198141482142529e-06, "loss": 0.4654, "step": 4450 }, { "epoch": 2.2080178726573165, "grad_norm": 0.07551089938013718, "learning_rate": 4.196212144476626e-06, "loss": 0.4698, "step": 4451 }, { "epoch": 2.208514335360556, "grad_norm": 0.07608020844071971, "learning_rate": 4.194282929656806e-06, "loss": 0.4578, "step": 4452 }, { "epoch": 2.2090107980637956, "grad_norm": 0.07294714292734182, "learning_rate": 4.1923538379779224e-06, "loss": 0.4295, "step": 4453 }, { "epoch": 2.209507260767035, "grad_norm": 0.07450752572030765, "learning_rate": 4.190424869734805e-06, "loss": 0.4917, "step": 4454 }, { "epoch": 2.2100037234702743, "grad_norm": 0.08921788801344878, "learning_rate": 4.188496025222266e-06, "loss": 0.481, "step": 4455 }, { "epoch": 2.2105001861735136, "grad_norm": 0.07214432985792273, "learning_rate": 4.186567304735099e-06, "loss": 0.489, "step": 4456 }, { "epoch": 2.210996648876753, "grad_norm": 0.07181054002878612, "learning_rate": 4.184638708568075e-06, "loss": 0.4494, "step": 4457 }, { "epoch": 2.2114931115799927, "grad_norm": 0.07328512025912555, "learning_rate": 4.1827102370159526e-06, "loss": 0.4832, "step": 4458 }, { "epoch": 2.211989574283232, "grad_norm": 0.07298126138675862, "learning_rate": 4.180781890373465e-06, "loss": 0.4687, "step": 4459 }, { "epoch": 2.2124860369864714, "grad_norm": 0.0726929787084046, "learning_rate": 4.178853668935332e-06, "loss": 0.4734, "step": 4460 }, { "epoch": 2.2129824996897107, "grad_norm": 0.07150065845892105, "learning_rate": 4.1769255729962494e-06, "loss": 0.4627, "step": 4461 }, { "epoch": 2.21347896239295, "grad_norm": 0.077463177549223, "learning_rate": 4.174997602850895e-06, "loss": 0.4447, "step": 4462 }, { "epoch": 2.21397542509619, "grad_norm": 0.0724247437106297, "learning_rate": 4.173069758793929e-06, "loss": 0.4667, "step": 4463 }, { "epoch": 2.214471887799429, "grad_norm": 0.07246835564653938, "learning_rate": 4.171142041119994e-06, "loss": 0.4517, "step": 4464 }, { "epoch": 2.2149683505026685, "grad_norm": 0.06890663228644153, "learning_rate": 4.169214450123706e-06, "loss": 0.4339, "step": 4465 }, { "epoch": 2.215464813205908, "grad_norm": 0.07297905706760326, "learning_rate": 4.167286986099669e-06, "loss": 0.4085, "step": 4466 }, { "epoch": 2.215961275909147, "grad_norm": 0.07651788554616279, "learning_rate": 4.165359649342464e-06, "loss": 0.4529, "step": 4467 }, { "epoch": 2.216457738612387, "grad_norm": 0.071610709788606, "learning_rate": 4.163432440146655e-06, "loss": 0.4552, "step": 4468 }, { "epoch": 2.2169542013156263, "grad_norm": 0.07536894363343133, "learning_rate": 4.161505358806784e-06, "loss": 0.4496, "step": 4469 }, { "epoch": 2.2174506640188656, "grad_norm": 0.07399735928163335, "learning_rate": 4.159578405617375e-06, "loss": 0.4601, "step": 4470 }, { "epoch": 2.217947126722105, "grad_norm": 0.07202784194770909, "learning_rate": 4.15765158087293e-06, "loss": 0.4554, "step": 4471 }, { "epoch": 2.2184435894253443, "grad_norm": 0.075485992762671, "learning_rate": 4.155724884867937e-06, "loss": 0.4814, "step": 4472 }, { "epoch": 2.218940052128584, "grad_norm": 0.07469834057898289, "learning_rate": 4.153798317896857e-06, "loss": 0.4916, "step": 4473 }, { "epoch": 2.2194365148318234, "grad_norm": 0.07480518347076377, "learning_rate": 4.151871880254137e-06, "loss": 0.4586, "step": 4474 }, { "epoch": 2.2199329775350627, "grad_norm": 0.07250194926970677, "learning_rate": 4.1499455722342e-06, "loss": 0.4629, "step": 4475 }, { "epoch": 2.220429440238302, "grad_norm": 0.07107799191740055, "learning_rate": 4.148019394131454e-06, "loss": 0.453, "step": 4476 }, { "epoch": 2.2209259029415414, "grad_norm": 0.07251330487830691, "learning_rate": 4.146093346240284e-06, "loss": 0.465, "step": 4477 }, { "epoch": 2.221422365644781, "grad_norm": 0.07090740682014357, "learning_rate": 4.144167428855053e-06, "loss": 0.4513, "step": 4478 }, { "epoch": 2.2219188283480205, "grad_norm": 0.07642691152132507, "learning_rate": 4.142241642270109e-06, "loss": 0.4221, "step": 4479 }, { "epoch": 2.22241529105126, "grad_norm": 0.07441365762455417, "learning_rate": 4.140315986779778e-06, "loss": 0.4571, "step": 4480 }, { "epoch": 2.222911753754499, "grad_norm": 0.07042826712428749, "learning_rate": 4.138390462678364e-06, "loss": 0.4605, "step": 4481 }, { "epoch": 2.2234082164577385, "grad_norm": 0.07180591373029381, "learning_rate": 4.136465070260151e-06, "loss": 0.4278, "step": 4482 }, { "epoch": 2.2239046791609782, "grad_norm": 0.07131430714390916, "learning_rate": 4.134539809819407e-06, "loss": 0.4639, "step": 4483 }, { "epoch": 2.2244011418642176, "grad_norm": 0.07681011768308688, "learning_rate": 4.1326146816503776e-06, "loss": 0.4602, "step": 4484 }, { "epoch": 2.224897604567457, "grad_norm": 0.07260171427884389, "learning_rate": 4.130689686047286e-06, "loss": 0.4388, "step": 4485 }, { "epoch": 2.225394067270696, "grad_norm": 0.07201246409421869, "learning_rate": 4.1287648233043366e-06, "loss": 0.4125, "step": 4486 }, { "epoch": 2.2258905299739355, "grad_norm": 0.07254989653192677, "learning_rate": 4.126840093715715e-06, "loss": 0.4525, "step": 4487 }, { "epoch": 2.2263869926771753, "grad_norm": 0.07166160294326122, "learning_rate": 4.124915497575587e-06, "loss": 0.4568, "step": 4488 }, { "epoch": 2.2268834553804147, "grad_norm": 0.07122292370884728, "learning_rate": 4.122991035178093e-06, "loss": 0.4559, "step": 4489 }, { "epoch": 2.227379918083654, "grad_norm": 0.07258420031929565, "learning_rate": 4.121066706817357e-06, "loss": 0.4711, "step": 4490 }, { "epoch": 2.2278763807868933, "grad_norm": 0.07465745398445925, "learning_rate": 4.1191425127874824e-06, "loss": 0.491, "step": 4491 }, { "epoch": 2.2283728434901326, "grad_norm": 0.07335909603079206, "learning_rate": 4.117218453382551e-06, "loss": 0.4785, "step": 4492 }, { "epoch": 2.2288693061933724, "grad_norm": 0.07387175642761908, "learning_rate": 4.115294528896627e-06, "loss": 0.4666, "step": 4493 }, { "epoch": 2.2293657688966118, "grad_norm": 0.07207293863663537, "learning_rate": 4.113370739623746e-06, "loss": 0.451, "step": 4494 }, { "epoch": 2.229862231599851, "grad_norm": 0.07313205287182717, "learning_rate": 4.111447085857932e-06, "loss": 0.4912, "step": 4495 }, { "epoch": 2.2303586943030904, "grad_norm": 0.07288468015896155, "learning_rate": 4.109523567893185e-06, "loss": 0.4716, "step": 4496 }, { "epoch": 2.2308551570063297, "grad_norm": 0.07035718648695975, "learning_rate": 4.107600186023485e-06, "loss": 0.4427, "step": 4497 }, { "epoch": 2.2313516197095695, "grad_norm": 0.07110839782679326, "learning_rate": 4.105676940542785e-06, "loss": 0.4382, "step": 4498 }, { "epoch": 2.231848082412809, "grad_norm": 0.0702316925680538, "learning_rate": 4.103753831745025e-06, "loss": 0.4661, "step": 4499 }, { "epoch": 2.232344545116048, "grad_norm": 0.07315427281933387, "learning_rate": 4.1018308599241245e-06, "loss": 0.4681, "step": 4500 }, { "epoch": 2.2328410078192875, "grad_norm": 0.0709859756454207, "learning_rate": 4.099908025373973e-06, "loss": 0.462, "step": 4501 }, { "epoch": 2.233337470522527, "grad_norm": 0.07390214333546574, "learning_rate": 4.097985328388449e-06, "loss": 0.4674, "step": 4502 }, { "epoch": 2.2338339332257666, "grad_norm": 0.07310495770803771, "learning_rate": 4.096062769261405e-06, "loss": 0.444, "step": 4503 }, { "epoch": 2.234330395929006, "grad_norm": 0.07318819058228206, "learning_rate": 4.094140348286674e-06, "loss": 0.4579, "step": 4504 }, { "epoch": 2.2348268586322453, "grad_norm": 0.07154890926200361, "learning_rate": 4.092218065758065e-06, "loss": 0.4827, "step": 4505 }, { "epoch": 2.2353233213354846, "grad_norm": 0.07123253786534825, "learning_rate": 4.0902959219693695e-06, "loss": 0.4058, "step": 4506 }, { "epoch": 2.235819784038724, "grad_norm": 0.0743483487279238, "learning_rate": 4.088373917214357e-06, "loss": 0.467, "step": 4507 }, { "epoch": 2.2363162467419633, "grad_norm": 0.0739892303014404, "learning_rate": 4.086452051786774e-06, "loss": 0.4671, "step": 4508 }, { "epoch": 2.236812709445203, "grad_norm": 0.07372586400995458, "learning_rate": 4.0845303259803485e-06, "loss": 0.4632, "step": 4509 }, { "epoch": 2.2373091721484424, "grad_norm": 0.07350804599337754, "learning_rate": 4.082608740088782e-06, "loss": 0.4676, "step": 4510 }, { "epoch": 2.2378056348516817, "grad_norm": 0.07078113633327661, "learning_rate": 4.080687294405761e-06, "loss": 0.456, "step": 4511 }, { "epoch": 2.238302097554921, "grad_norm": 0.07305462569174823, "learning_rate": 4.078765989224947e-06, "loss": 0.4667, "step": 4512 }, { "epoch": 2.238798560258161, "grad_norm": 0.07116321137722682, "learning_rate": 4.0768448248399836e-06, "loss": 0.4246, "step": 4513 }, { "epoch": 2.2392950229614, "grad_norm": 0.07662697234502294, "learning_rate": 4.074923801544485e-06, "loss": 0.489, "step": 4514 }, { "epoch": 2.2397914856646395, "grad_norm": 0.07701510571760695, "learning_rate": 4.07300291963205e-06, "loss": 0.4552, "step": 4515 }, { "epoch": 2.240287948367879, "grad_norm": 0.07124722345033124, "learning_rate": 4.071082179396257e-06, "loss": 0.4634, "step": 4516 }, { "epoch": 2.240784411071118, "grad_norm": 0.07170234298992674, "learning_rate": 4.069161581130658e-06, "loss": 0.4505, "step": 4517 }, { "epoch": 2.2412808737743575, "grad_norm": 0.06941445386725237, "learning_rate": 4.067241125128786e-06, "loss": 0.4254, "step": 4518 }, { "epoch": 2.2417773364775972, "grad_norm": 0.07459034297591921, "learning_rate": 4.065320811684152e-06, "loss": 0.5102, "step": 4519 }, { "epoch": 2.2422737991808366, "grad_norm": 0.07136337958486601, "learning_rate": 4.063400641090246e-06, "loss": 0.4246, "step": 4520 }, { "epoch": 2.242770261884076, "grad_norm": 0.07180395198419941, "learning_rate": 4.0614806136405355e-06, "loss": 0.4437, "step": 4521 }, { "epoch": 2.2432667245873152, "grad_norm": 0.06959895930282148, "learning_rate": 4.059560729628465e-06, "loss": 0.4395, "step": 4522 }, { "epoch": 2.243763187290555, "grad_norm": 0.07319596333666036, "learning_rate": 4.0576409893474566e-06, "loss": 0.4562, "step": 4523 }, { "epoch": 2.2442596499937943, "grad_norm": 0.07184203088659276, "learning_rate": 4.0557213930909146e-06, "loss": 0.4495, "step": 4524 }, { "epoch": 2.2447561126970337, "grad_norm": 0.07533243438464896, "learning_rate": 4.053801941152218e-06, "loss": 0.4814, "step": 4525 }, { "epoch": 2.245252575400273, "grad_norm": 0.0744110741286213, "learning_rate": 4.051882633824723e-06, "loss": 0.4598, "step": 4526 }, { "epoch": 2.2457490381035123, "grad_norm": 0.07628997779462912, "learning_rate": 4.049963471401765e-06, "loss": 0.4827, "step": 4527 }, { "epoch": 2.2462455008067517, "grad_norm": 0.07467086146241933, "learning_rate": 4.048044454176658e-06, "loss": 0.4991, "step": 4528 }, { "epoch": 2.2467419635099914, "grad_norm": 0.07426614710662187, "learning_rate": 4.046125582442695e-06, "loss": 0.4705, "step": 4529 }, { "epoch": 2.2472384262132308, "grad_norm": 0.07379210496441937, "learning_rate": 4.04420685649314e-06, "loss": 0.4504, "step": 4530 }, { "epoch": 2.24773488891647, "grad_norm": 0.07265582803024827, "learning_rate": 4.0422882766212416e-06, "loss": 0.4326, "step": 4531 }, { "epoch": 2.2482313516197094, "grad_norm": 0.07594842599950401, "learning_rate": 4.040369843120226e-06, "loss": 0.458, "step": 4532 }, { "epoch": 2.248727814322949, "grad_norm": 0.06816504846270663, "learning_rate": 4.038451556283292e-06, "loss": 0.4504, "step": 4533 }, { "epoch": 2.2492242770261885, "grad_norm": 0.07282920440283609, "learning_rate": 4.036533416403621e-06, "loss": 0.4792, "step": 4534 }, { "epoch": 2.249720739729428, "grad_norm": 0.07628329549507812, "learning_rate": 4.034615423774369e-06, "loss": 0.498, "step": 4535 }, { "epoch": 2.250217202432667, "grad_norm": 0.07438066243638336, "learning_rate": 4.03269757868867e-06, "loss": 0.4489, "step": 4536 }, { "epoch": 2.250217202432667, "eval_loss": 0.5172906517982483, "eval_runtime": 258.8938, "eval_samples_per_second": 117.241, "eval_steps_per_second": 14.659, "step": 4536 }, { "epoch": 2.2507136651359065, "grad_norm": 0.07331941505145612, "learning_rate": 4.030779881439639e-06, "loss": 0.4721, "step": 4537 }, { "epoch": 2.251210127839146, "grad_norm": 0.07216350901232152, "learning_rate": 4.02886233232036e-06, "loss": 0.4654, "step": 4538 }, { "epoch": 2.2517065905423856, "grad_norm": 0.07347245654808018, "learning_rate": 4.026944931623905e-06, "loss": 0.4839, "step": 4539 }, { "epoch": 2.252203053245625, "grad_norm": 0.07355019456418468, "learning_rate": 4.025027679643314e-06, "loss": 0.4442, "step": 4540 }, { "epoch": 2.2526995159488643, "grad_norm": 0.07176085614756975, "learning_rate": 4.02311057667161e-06, "loss": 0.4591, "step": 4541 }, { "epoch": 2.2531959786521036, "grad_norm": 0.07193387754107909, "learning_rate": 4.0211936230017915e-06, "loss": 0.4232, "step": 4542 }, { "epoch": 2.2536924413553434, "grad_norm": 0.07306749615179373, "learning_rate": 4.019276818926833e-06, "loss": 0.4709, "step": 4543 }, { "epoch": 2.2541889040585827, "grad_norm": 0.07275645118232545, "learning_rate": 4.017360164739687e-06, "loss": 0.4711, "step": 4544 }, { "epoch": 2.254685366761822, "grad_norm": 0.07207487833978099, "learning_rate": 4.015443660733288e-06, "loss": 0.463, "step": 4545 }, { "epoch": 2.2551818294650614, "grad_norm": 0.07244408374466355, "learning_rate": 4.0135273072005374e-06, "loss": 0.4418, "step": 4546 }, { "epoch": 2.2556782921683007, "grad_norm": 0.07541937868644752, "learning_rate": 4.01161110443432e-06, "loss": 0.4724, "step": 4547 }, { "epoch": 2.25617475487154, "grad_norm": 0.07245802199577502, "learning_rate": 4.009695052727499e-06, "loss": 0.456, "step": 4548 }, { "epoch": 2.25667121757478, "grad_norm": 0.07409608483995006, "learning_rate": 4.00777915237291e-06, "loss": 0.5128, "step": 4549 }, { "epoch": 2.257167680278019, "grad_norm": 0.0720396749298917, "learning_rate": 4.005863403663368e-06, "loss": 0.4677, "step": 4550 }, { "epoch": 2.2576641429812585, "grad_norm": 0.07528407483916275, "learning_rate": 4.0039478068916655e-06, "loss": 0.4787, "step": 4551 }, { "epoch": 2.258160605684498, "grad_norm": 0.07836436675954314, "learning_rate": 4.002032362350571e-06, "loss": 0.5044, "step": 4552 }, { "epoch": 2.2586570683877376, "grad_norm": 0.07308686718367485, "learning_rate": 4.000117070332828e-06, "loss": 0.4483, "step": 4553 }, { "epoch": 2.259153531090977, "grad_norm": 0.07185200327263667, "learning_rate": 3.998201931131159e-06, "loss": 0.4353, "step": 4554 }, { "epoch": 2.2596499937942163, "grad_norm": 0.07071000554552216, "learning_rate": 3.996286945038263e-06, "loss": 0.4598, "step": 4555 }, { "epoch": 2.2601464564974556, "grad_norm": 0.07354071205498476, "learning_rate": 3.9943721123468124e-06, "loss": 0.4366, "step": 4556 }, { "epoch": 2.260642919200695, "grad_norm": 0.07360684253852036, "learning_rate": 3.992457433349461e-06, "loss": 0.4447, "step": 4557 }, { "epoch": 2.2611393819039343, "grad_norm": 0.07349793093279754, "learning_rate": 3.990542908338837e-06, "loss": 0.4723, "step": 4558 }, { "epoch": 2.261635844607174, "grad_norm": 0.07447147137326113, "learning_rate": 3.988628537607544e-06, "loss": 0.4786, "step": 4559 }, { "epoch": 2.2621323073104134, "grad_norm": 0.07332903082275616, "learning_rate": 3.986714321448162e-06, "loss": 0.444, "step": 4560 }, { "epoch": 2.2626287700136527, "grad_norm": 0.0732381531192602, "learning_rate": 3.984800260153251e-06, "loss": 0.4744, "step": 4561 }, { "epoch": 2.263125232716892, "grad_norm": 0.07108143720306151, "learning_rate": 3.982886354015341e-06, "loss": 0.4677, "step": 4562 }, { "epoch": 2.263621695420132, "grad_norm": 0.07661093044858573, "learning_rate": 3.980972603326945e-06, "loss": 0.4656, "step": 4563 }, { "epoch": 2.264118158123371, "grad_norm": 0.07662723476502775, "learning_rate": 3.979059008380547e-06, "loss": 0.4559, "step": 4564 }, { "epoch": 2.2646146208266105, "grad_norm": 0.06964828076274196, "learning_rate": 3.97714556946861e-06, "loss": 0.4762, "step": 4565 }, { "epoch": 2.26511108352985, "grad_norm": 0.07254551987290445, "learning_rate": 3.9752322868835715e-06, "loss": 0.4519, "step": 4566 }, { "epoch": 2.265607546233089, "grad_norm": 0.07699102489984336, "learning_rate": 3.973319160917848e-06, "loss": 0.4897, "step": 4567 }, { "epoch": 2.2661040089363285, "grad_norm": 0.07175368111713772, "learning_rate": 3.971406191863829e-06, "loss": 0.4541, "step": 4568 }, { "epoch": 2.2666004716395682, "grad_norm": 0.07336946290089652, "learning_rate": 3.969493380013882e-06, "loss": 0.4348, "step": 4569 }, { "epoch": 2.2670969343428076, "grad_norm": 0.07404276252315657, "learning_rate": 3.967580725660348e-06, "loss": 0.4694, "step": 4570 }, { "epoch": 2.267593397046047, "grad_norm": 0.07209603130099349, "learning_rate": 3.965668229095546e-06, "loss": 0.4415, "step": 4571 }, { "epoch": 2.2680898597492862, "grad_norm": 0.07126832127112002, "learning_rate": 3.963755890611772e-06, "loss": 0.4545, "step": 4572 }, { "epoch": 2.2685863224525256, "grad_norm": 0.07424826158725521, "learning_rate": 3.961843710501294e-06, "loss": 0.4582, "step": 4573 }, { "epoch": 2.2690827851557653, "grad_norm": 0.07626947433796435, "learning_rate": 3.959931689056362e-06, "loss": 0.4714, "step": 4574 }, { "epoch": 2.2695792478590047, "grad_norm": 0.07459916034486681, "learning_rate": 3.958019826569192e-06, "loss": 0.4339, "step": 4575 }, { "epoch": 2.270075710562244, "grad_norm": 0.07557272071979547, "learning_rate": 3.956108123331986e-06, "loss": 0.483, "step": 4576 }, { "epoch": 2.2705721732654833, "grad_norm": 0.07299559261994196, "learning_rate": 3.954196579636918e-06, "loss": 0.4832, "step": 4577 }, { "epoch": 2.2710686359687227, "grad_norm": 0.07190351112626157, "learning_rate": 3.952285195776132e-06, "loss": 0.4256, "step": 4578 }, { "epoch": 2.2715650986719624, "grad_norm": 0.0746426816115579, "learning_rate": 3.950373972041755e-06, "loss": 0.4581, "step": 4579 }, { "epoch": 2.2720615613752018, "grad_norm": 0.0758770115143761, "learning_rate": 3.948462908725888e-06, "loss": 0.4602, "step": 4580 }, { "epoch": 2.272558024078441, "grad_norm": 0.07214617535370865, "learning_rate": 3.946552006120604e-06, "loss": 0.4662, "step": 4581 }, { "epoch": 2.2730544867816804, "grad_norm": 0.0734472665053787, "learning_rate": 3.944641264517956e-06, "loss": 0.4376, "step": 4582 }, { "epoch": 2.2735509494849198, "grad_norm": 0.07316103188106508, "learning_rate": 3.942730684209969e-06, "loss": 0.4521, "step": 4583 }, { "epoch": 2.2740474121881595, "grad_norm": 0.07210985009452146, "learning_rate": 3.940820265488644e-06, "loss": 0.4538, "step": 4584 }, { "epoch": 2.274543874891399, "grad_norm": 0.07222754057585111, "learning_rate": 3.9389100086459604e-06, "loss": 0.4665, "step": 4585 }, { "epoch": 2.275040337594638, "grad_norm": 0.07140562198443162, "learning_rate": 3.936999913973868e-06, "loss": 0.4496, "step": 4586 }, { "epoch": 2.2755368002978775, "grad_norm": 0.07177302930612432, "learning_rate": 3.9350899817642946e-06, "loss": 0.4618, "step": 4587 }, { "epoch": 2.276033263001117, "grad_norm": 0.07207407681313742, "learning_rate": 3.933180212309143e-06, "loss": 0.4576, "step": 4588 }, { "epoch": 2.2765297257043566, "grad_norm": 0.07314155056855802, "learning_rate": 3.93127060590029e-06, "loss": 0.4324, "step": 4589 }, { "epoch": 2.277026188407596, "grad_norm": 0.0750859987810042, "learning_rate": 3.929361162829591e-06, "loss": 0.4596, "step": 4590 }, { "epoch": 2.2775226511108353, "grad_norm": 0.07556163426963661, "learning_rate": 3.9274518833888704e-06, "loss": 0.4658, "step": 4591 }, { "epoch": 2.2780191138140746, "grad_norm": 0.07079600144616138, "learning_rate": 3.925542767869933e-06, "loss": 0.4845, "step": 4592 }, { "epoch": 2.278515576517314, "grad_norm": 0.07579154104447022, "learning_rate": 3.923633816564558e-06, "loss": 0.4973, "step": 4593 }, { "epoch": 2.2790120392205537, "grad_norm": 0.07561556923014369, "learning_rate": 3.921725029764494e-06, "loss": 0.4806, "step": 4594 }, { "epoch": 2.279508501923793, "grad_norm": 0.07106848910421479, "learning_rate": 3.91981640776147e-06, "loss": 0.4363, "step": 4595 }, { "epoch": 2.2800049646270324, "grad_norm": 0.07222499569491124, "learning_rate": 3.9179079508471876e-06, "loss": 0.4437, "step": 4596 }, { "epoch": 2.2805014273302717, "grad_norm": 0.07272568112911992, "learning_rate": 3.915999659313328e-06, "loss": 0.462, "step": 4597 }, { "epoch": 2.280997890033511, "grad_norm": 0.0749925291982409, "learning_rate": 3.914091533451537e-06, "loss": 0.4757, "step": 4598 }, { "epoch": 2.281494352736751, "grad_norm": 0.0733452428212777, "learning_rate": 3.9121835735534446e-06, "loss": 0.4628, "step": 4599 }, { "epoch": 2.28199081543999, "grad_norm": 0.07469474773325638, "learning_rate": 3.910275779910651e-06, "loss": 0.4348, "step": 4600 }, { "epoch": 2.2824872781432295, "grad_norm": 0.06954130209101946, "learning_rate": 3.908368152814733e-06, "loss": 0.4267, "step": 4601 }, { "epoch": 2.282983740846469, "grad_norm": 0.07280494243859224, "learning_rate": 3.906460692557239e-06, "loss": 0.464, "step": 4602 }, { "epoch": 2.283480203549708, "grad_norm": 0.07491485368077401, "learning_rate": 3.904553399429695e-06, "loss": 0.4673, "step": 4603 }, { "epoch": 2.283976666252948, "grad_norm": 0.0725255163654333, "learning_rate": 3.902646273723599e-06, "loss": 0.4582, "step": 4604 }, { "epoch": 2.2844731289561873, "grad_norm": 0.07376370932884879, "learning_rate": 3.900739315730426e-06, "loss": 0.4774, "step": 4605 }, { "epoch": 2.2849695916594266, "grad_norm": 0.07147727373362511, "learning_rate": 3.898832525741624e-06, "loss": 0.4488, "step": 4606 }, { "epoch": 2.285466054362666, "grad_norm": 0.0707745677288137, "learning_rate": 3.896925904048614e-06, "loss": 0.4299, "step": 4607 }, { "epoch": 2.2859625170659053, "grad_norm": 0.07233717904719753, "learning_rate": 3.895019450942793e-06, "loss": 0.4501, "step": 4608 }, { "epoch": 2.286458979769145, "grad_norm": 0.07081364354190414, "learning_rate": 3.893113166715533e-06, "loss": 0.4609, "step": 4609 }, { "epoch": 2.2869554424723844, "grad_norm": 0.0724291518918608, "learning_rate": 3.891207051658177e-06, "loss": 0.4768, "step": 4610 }, { "epoch": 2.2874519051756237, "grad_norm": 0.07451520468130642, "learning_rate": 3.889301106062044e-06, "loss": 0.4979, "step": 4611 }, { "epoch": 2.287948367878863, "grad_norm": 0.07300620972050165, "learning_rate": 3.887395330218429e-06, "loss": 0.4579, "step": 4612 }, { "epoch": 2.2884448305821024, "grad_norm": 0.06996007226973054, "learning_rate": 3.885489724418599e-06, "loss": 0.4366, "step": 4613 }, { "epoch": 2.288941293285342, "grad_norm": 0.07132885315218875, "learning_rate": 3.883584288953794e-06, "loss": 0.4526, "step": 4614 }, { "epoch": 2.2894377559885815, "grad_norm": 0.06908714335809077, "learning_rate": 3.88167902411523e-06, "loss": 0.4333, "step": 4615 }, { "epoch": 2.289934218691821, "grad_norm": 0.07239860976789113, "learning_rate": 3.8797739301940965e-06, "loss": 0.4796, "step": 4616 }, { "epoch": 2.29043068139506, "grad_norm": 0.07372826754791378, "learning_rate": 3.877869007481557e-06, "loss": 0.471, "step": 4617 }, { "epoch": 2.2909271440982995, "grad_norm": 0.07371280520150718, "learning_rate": 3.875964256268747e-06, "loss": 0.4527, "step": 4618 }, { "epoch": 2.2914236068015392, "grad_norm": 0.07959721066448272, "learning_rate": 3.874059676846778e-06, "loss": 0.4963, "step": 4619 }, { "epoch": 2.2919200695047786, "grad_norm": 0.07356143469580767, "learning_rate": 3.8721552695067334e-06, "loss": 0.4834, "step": 4620 }, { "epoch": 2.292416532208018, "grad_norm": 0.07043793579089817, "learning_rate": 3.8702510345396736e-06, "loss": 0.4365, "step": 4621 }, { "epoch": 2.292912994911257, "grad_norm": 0.07389661566696645, "learning_rate": 3.868346972236629e-06, "loss": 0.4515, "step": 4622 }, { "epoch": 2.2934094576144965, "grad_norm": 0.07056702907409622, "learning_rate": 3.866443082888605e-06, "loss": 0.4476, "step": 4623 }, { "epoch": 2.2939059203177363, "grad_norm": 0.07431132634179353, "learning_rate": 3.864539366786579e-06, "loss": 0.4317, "step": 4624 }, { "epoch": 2.2944023830209757, "grad_norm": 0.07411892259856963, "learning_rate": 3.862635824221508e-06, "loss": 0.4711, "step": 4625 }, { "epoch": 2.294898845724215, "grad_norm": 0.07390766032523606, "learning_rate": 3.860732455484314e-06, "loss": 0.4745, "step": 4626 }, { "epoch": 2.2953953084274543, "grad_norm": 0.07129476349377506, "learning_rate": 3.8588292608658964e-06, "loss": 0.4645, "step": 4627 }, { "epoch": 2.2958917711306936, "grad_norm": 0.07595256250205015, "learning_rate": 3.856926240657129e-06, "loss": 0.4909, "step": 4628 }, { "epoch": 2.296388233833933, "grad_norm": 0.07254923696850328, "learning_rate": 3.855023395148858e-06, "loss": 0.4317, "step": 4629 }, { "epoch": 2.2968846965371728, "grad_norm": 0.07350447502657023, "learning_rate": 3.853120724631903e-06, "loss": 0.4751, "step": 4630 }, { "epoch": 2.297381159240412, "grad_norm": 0.07131503869721514, "learning_rate": 3.8512182293970565e-06, "loss": 0.4662, "step": 4631 }, { "epoch": 2.2978776219436514, "grad_norm": 0.06930825093142902, "learning_rate": 3.849315909735084e-06, "loss": 0.4226, "step": 4632 }, { "epoch": 2.2983740846468907, "grad_norm": 0.07264603579286186, "learning_rate": 3.847413765936724e-06, "loss": 0.4282, "step": 4633 }, { "epoch": 2.2988705473501305, "grad_norm": 0.07438691342521027, "learning_rate": 3.845511798292692e-06, "loss": 0.461, "step": 4634 }, { "epoch": 2.29936701005337, "grad_norm": 0.07688398707415792, "learning_rate": 3.843610007093669e-06, "loss": 0.4229, "step": 4635 }, { "epoch": 2.299863472756609, "grad_norm": 0.0719622667679788, "learning_rate": 3.841708392630315e-06, "loss": 0.4517, "step": 4636 }, { "epoch": 2.3003599354598485, "grad_norm": 0.07299102365171561, "learning_rate": 3.839806955193262e-06, "loss": 0.4466, "step": 4637 }, { "epoch": 2.300856398163088, "grad_norm": 0.07375205625152867, "learning_rate": 3.837905695073114e-06, "loss": 0.447, "step": 4638 }, { "epoch": 2.301352860866327, "grad_norm": 0.06837142772677776, "learning_rate": 3.836004612560447e-06, "loss": 0.4203, "step": 4639 }, { "epoch": 2.301849323569567, "grad_norm": 0.07062241886373752, "learning_rate": 3.8341037079458125e-06, "loss": 0.4407, "step": 4640 }, { "epoch": 2.3023457862728063, "grad_norm": 0.07010432830866563, "learning_rate": 3.8322029815197335e-06, "loss": 0.4299, "step": 4641 }, { "epoch": 2.3028422489760456, "grad_norm": 0.07444368732307707, "learning_rate": 3.830302433572704e-06, "loss": 0.472, "step": 4642 }, { "epoch": 2.303338711679285, "grad_norm": 0.07268106792030828, "learning_rate": 3.828402064395191e-06, "loss": 0.4365, "step": 4643 }, { "epoch": 2.3038351743825247, "grad_norm": 0.07458663611375892, "learning_rate": 3.8265018742776374e-06, "loss": 0.4595, "step": 4644 }, { "epoch": 2.304331637085764, "grad_norm": 0.07467767197871673, "learning_rate": 3.824601863510459e-06, "loss": 0.4958, "step": 4645 }, { "epoch": 2.3048280997890034, "grad_norm": 0.07201811203621303, "learning_rate": 3.822702032384038e-06, "loss": 0.4437, "step": 4646 }, { "epoch": 2.3053245624922427, "grad_norm": 0.0692424696265978, "learning_rate": 3.820802381188735e-06, "loss": 0.4488, "step": 4647 }, { "epoch": 2.305821025195482, "grad_norm": 0.07300687048478505, "learning_rate": 3.818902910214881e-06, "loss": 0.4693, "step": 4648 }, { "epoch": 2.3063174878987214, "grad_norm": 0.07596628725546059, "learning_rate": 3.817003619752779e-06, "loss": 0.4634, "step": 4649 }, { "epoch": 2.306813950601961, "grad_norm": 0.07268075534926753, "learning_rate": 3.8151045100927075e-06, "loss": 0.442, "step": 4650 }, { "epoch": 2.3073104133052005, "grad_norm": 0.07167150254029588, "learning_rate": 3.8132055815249127e-06, "loss": 0.4477, "step": 4651 }, { "epoch": 2.30780687600844, "grad_norm": 0.0713520151906273, "learning_rate": 3.811306834339615e-06, "loss": 0.4492, "step": 4652 }, { "epoch": 2.308303338711679, "grad_norm": 0.07299337925654024, "learning_rate": 3.809408268827009e-06, "loss": 0.4426, "step": 4653 }, { "epoch": 2.308799801414919, "grad_norm": 0.07345951629738362, "learning_rate": 3.8075098852772607e-06, "loss": 0.4836, "step": 4654 }, { "epoch": 2.3092962641181582, "grad_norm": 0.07194786674780375, "learning_rate": 3.8056116839805048e-06, "loss": 0.4386, "step": 4655 }, { "epoch": 2.3097927268213976, "grad_norm": 0.07183541026450214, "learning_rate": 3.8037136652268524e-06, "loss": 0.4489, "step": 4656 }, { "epoch": 2.310289189524637, "grad_norm": 0.07199156509588549, "learning_rate": 3.801815829306388e-06, "loss": 0.4482, "step": 4657 }, { "epoch": 2.3107856522278762, "grad_norm": 0.07300167018565622, "learning_rate": 3.7999181765091597e-06, "loss": 0.4555, "step": 4658 }, { "epoch": 2.3112821149311156, "grad_norm": 0.0727637096854206, "learning_rate": 3.7980207071251967e-06, "loss": 0.4541, "step": 4659 }, { "epoch": 2.3117785776343553, "grad_norm": 0.07124656239632847, "learning_rate": 3.7961234214444963e-06, "loss": 0.4468, "step": 4660 }, { "epoch": 2.3122750403375947, "grad_norm": 0.07460627268561644, "learning_rate": 3.7942263197570297e-06, "loss": 0.4975, "step": 4661 }, { "epoch": 2.312771503040834, "grad_norm": 0.06854605918792946, "learning_rate": 3.7923294023527353e-06, "loss": 0.4466, "step": 4662 }, { "epoch": 2.3132679657440733, "grad_norm": 0.07300630381478836, "learning_rate": 3.7904326695215283e-06, "loss": 0.4615, "step": 4663 }, { "epoch": 2.313764428447313, "grad_norm": 0.07456192765025539, "learning_rate": 3.788536121553294e-06, "loss": 0.4685, "step": 4664 }, { "epoch": 2.3142608911505524, "grad_norm": 0.07439993755686335, "learning_rate": 3.786639758737889e-06, "loss": 0.5119, "step": 4665 }, { "epoch": 2.3147573538537918, "grad_norm": 0.0767387688734663, "learning_rate": 3.7847435813651436e-06, "loss": 0.5161, "step": 4666 }, { "epoch": 2.315253816557031, "grad_norm": 0.07332142670152268, "learning_rate": 3.782847589724855e-06, "loss": 0.4265, "step": 4667 }, { "epoch": 2.3157502792602704, "grad_norm": 0.07089426764347762, "learning_rate": 3.7809517841067976e-06, "loss": 0.4339, "step": 4668 }, { "epoch": 2.3162467419635098, "grad_norm": 0.07230696778867758, "learning_rate": 3.7790561648007136e-06, "loss": 0.4289, "step": 4669 }, { "epoch": 2.3167432046667495, "grad_norm": 0.07350340322008529, "learning_rate": 3.777160732096318e-06, "loss": 0.454, "step": 4670 }, { "epoch": 2.317239667369989, "grad_norm": 0.07118246729742177, "learning_rate": 3.7752654862832995e-06, "loss": 0.4501, "step": 4671 }, { "epoch": 2.317736130073228, "grad_norm": 0.07416497221248512, "learning_rate": 3.773370427651313e-06, "loss": 0.4617, "step": 4672 }, { "epoch": 2.3182325927764675, "grad_norm": 0.07372197345098633, "learning_rate": 3.7714755564899913e-06, "loss": 0.4706, "step": 4673 }, { "epoch": 2.3187290554797073, "grad_norm": 0.07601035801539498, "learning_rate": 3.7695808730889307e-06, "loss": 0.4793, "step": 4674 }, { "epoch": 2.3192255181829466, "grad_norm": 0.07121752187002783, "learning_rate": 3.7676863777377055e-06, "loss": 0.4519, "step": 4675 }, { "epoch": 2.319721980886186, "grad_norm": 0.0754180647916415, "learning_rate": 3.7657920707258588e-06, "loss": 0.4667, "step": 4676 }, { "epoch": 2.3202184435894253, "grad_norm": 0.06998138945634584, "learning_rate": 3.763897952342906e-06, "loss": 0.4435, "step": 4677 }, { "epoch": 2.3207149062926646, "grad_norm": 0.07131847650966808, "learning_rate": 3.7620040228783305e-06, "loss": 0.4423, "step": 4678 }, { "epoch": 2.321211368995904, "grad_norm": 0.07090715276736156, "learning_rate": 3.760110282621591e-06, "loss": 0.4384, "step": 4679 }, { "epoch": 2.3217078316991437, "grad_norm": 0.0734856641174945, "learning_rate": 3.7582167318621136e-06, "loss": 0.4365, "step": 4680 }, { "epoch": 2.322204294402383, "grad_norm": 0.07982924966824814, "learning_rate": 3.7563233708892993e-06, "loss": 0.4773, "step": 4681 }, { "epoch": 2.3227007571056224, "grad_norm": 0.07623726513167152, "learning_rate": 3.7544301999925176e-06, "loss": 0.4559, "step": 4682 }, { "epoch": 2.3231972198088617, "grad_norm": 0.07411457548761642, "learning_rate": 3.7525372194611075e-06, "loss": 0.4538, "step": 4683 }, { "epoch": 2.3236936825121015, "grad_norm": 0.07073415295298938, "learning_rate": 3.750644429584382e-06, "loss": 0.4418, "step": 4684 }, { "epoch": 2.324190145215341, "grad_norm": 0.07123935814247413, "learning_rate": 3.7487518306516237e-06, "loss": 0.4542, "step": 4685 }, { "epoch": 2.32468660791858, "grad_norm": 0.0755939983672755, "learning_rate": 3.7468594229520854e-06, "loss": 0.4836, "step": 4686 }, { "epoch": 2.3251830706218195, "grad_norm": 0.07368993868071555, "learning_rate": 3.744967206774993e-06, "loss": 0.4825, "step": 4687 }, { "epoch": 2.325679533325059, "grad_norm": 0.06850787814380067, "learning_rate": 3.743075182409539e-06, "loss": 0.442, "step": 4688 }, { "epoch": 2.326175996028298, "grad_norm": 0.07061990343981624, "learning_rate": 3.7411833501448924e-06, "loss": 0.4516, "step": 4689 }, { "epoch": 2.326672458731538, "grad_norm": 0.07427819657272731, "learning_rate": 3.7392917102701854e-06, "loss": 0.4821, "step": 4690 }, { "epoch": 2.3271689214347773, "grad_norm": 0.07667376209001088, "learning_rate": 3.737400263074526e-06, "loss": 0.4857, "step": 4691 }, { "epoch": 2.3276653841380166, "grad_norm": 0.07378650706390087, "learning_rate": 3.7355090088469924e-06, "loss": 0.473, "step": 4692 }, { "epoch": 2.328161846841256, "grad_norm": 0.07232148035439803, "learning_rate": 3.733617947876633e-06, "loss": 0.4687, "step": 4693 }, { "epoch": 2.3286583095444957, "grad_norm": 0.07035460992891325, "learning_rate": 3.731727080452464e-06, "loss": 0.4407, "step": 4694 }, { "epoch": 2.329154772247735, "grad_norm": 0.07141269813437955, "learning_rate": 3.7298364068634764e-06, "loss": 0.4538, "step": 4695 }, { "epoch": 2.3296512349509744, "grad_norm": 0.07038667356360034, "learning_rate": 3.727945927398628e-06, "loss": 0.4754, "step": 4696 }, { "epoch": 2.3301476976542137, "grad_norm": 0.07429587927666312, "learning_rate": 3.7260556423468486e-06, "loss": 0.4659, "step": 4697 }, { "epoch": 2.330644160357453, "grad_norm": 0.07302518698343577, "learning_rate": 3.72416555199704e-06, "loss": 0.4832, "step": 4698 }, { "epoch": 2.3311406230606924, "grad_norm": 0.07168300385035588, "learning_rate": 3.722275656638068e-06, "loss": 0.483, "step": 4699 }, { "epoch": 2.331637085763932, "grad_norm": 0.07072808980615392, "learning_rate": 3.7203859565587765e-06, "loss": 0.438, "step": 4700 }, { "epoch": 2.3321335484671715, "grad_norm": 0.0735926019372795, "learning_rate": 3.7184964520479737e-06, "loss": 0.4593, "step": 4701 }, { "epoch": 2.332630011170411, "grad_norm": 0.07033503029697981, "learning_rate": 3.7166071433944407e-06, "loss": 0.4483, "step": 4702 }, { "epoch": 2.33312647387365, "grad_norm": 0.07182229172907455, "learning_rate": 3.7147180308869296e-06, "loss": 0.4705, "step": 4703 }, { "epoch": 2.33362293657689, "grad_norm": 0.07143612225361773, "learning_rate": 3.712829114814158e-06, "loss": 0.4254, "step": 4704 }, { "epoch": 2.3341193992801292, "grad_norm": 0.07400848281236902, "learning_rate": 3.7109403954648208e-06, "loss": 0.4588, "step": 4705 }, { "epoch": 2.3346158619833686, "grad_norm": 0.0727514120789118, "learning_rate": 3.7090518731275738e-06, "loss": 0.4498, "step": 4706 }, { "epoch": 2.335112324686608, "grad_norm": 0.07610809200395814, "learning_rate": 3.7071635480910486e-06, "loss": 0.4679, "step": 4707 }, { "epoch": 2.3356087873898472, "grad_norm": 0.07723757088222308, "learning_rate": 3.7052754206438455e-06, "loss": 0.4844, "step": 4708 }, { "epoch": 2.3361052500930866, "grad_norm": 0.07401073014952221, "learning_rate": 3.703387491074536e-06, "loss": 0.4456, "step": 4709 }, { "epoch": 2.3366017127963263, "grad_norm": 0.07439980480315013, "learning_rate": 3.7014997596716596e-06, "loss": 0.4761, "step": 4710 }, { "epoch": 2.3370981754995657, "grad_norm": 0.07226783163233316, "learning_rate": 3.699612226723724e-06, "loss": 0.4856, "step": 4711 }, { "epoch": 2.337594638202805, "grad_norm": 0.07470804406103008, "learning_rate": 3.697724892519209e-06, "loss": 0.4688, "step": 4712 }, { "epoch": 2.3380911009060443, "grad_norm": 0.07314284196363455, "learning_rate": 3.6958377573465643e-06, "loss": 0.4721, "step": 4713 }, { "epoch": 2.3385875636092837, "grad_norm": 0.07246523023838439, "learning_rate": 3.693950821494209e-06, "loss": 0.4408, "step": 4714 }, { "epoch": 2.3390840263125234, "grad_norm": 0.07405405668151005, "learning_rate": 3.692064085250528e-06, "loss": 0.4592, "step": 4715 }, { "epoch": 2.3395804890157628, "grad_norm": 0.07165590250964264, "learning_rate": 3.6901775489038804e-06, "loss": 0.4557, "step": 4716 }, { "epoch": 2.340076951719002, "grad_norm": 0.07382760497377183, "learning_rate": 3.688291212742594e-06, "loss": 0.4756, "step": 4717 }, { "epoch": 2.3405734144222414, "grad_norm": 0.07176344836460424, "learning_rate": 3.686405077054963e-06, "loss": 0.4409, "step": 4718 }, { "epoch": 2.3410698771254808, "grad_norm": 0.07038888718316055, "learning_rate": 3.6845191421292558e-06, "loss": 0.448, "step": 4719 }, { "epoch": 2.3415663398287205, "grad_norm": 0.07348014482842789, "learning_rate": 3.682633408253704e-06, "loss": 0.4652, "step": 4720 }, { "epoch": 2.34206280253196, "grad_norm": 0.07286941695899553, "learning_rate": 3.680747875716514e-06, "loss": 0.4535, "step": 4721 }, { "epoch": 2.342559265235199, "grad_norm": 0.07332682139319671, "learning_rate": 3.6788625448058605e-06, "loss": 0.4762, "step": 4722 }, { "epoch": 2.3430557279384385, "grad_norm": 0.07381530635235646, "learning_rate": 3.676977415809882e-06, "loss": 0.4707, "step": 4723 }, { "epoch": 2.343552190641678, "grad_norm": 0.06990799813327832, "learning_rate": 3.675092489016693e-06, "loss": 0.4018, "step": 4724 }, { "epoch": 2.3440486533449176, "grad_norm": 0.07394454106338585, "learning_rate": 3.673207764714373e-06, "loss": 0.4408, "step": 4725 }, { "epoch": 2.344545116048157, "grad_norm": 0.07301506376321407, "learning_rate": 3.671323243190974e-06, "loss": 0.4544, "step": 4726 }, { "epoch": 2.3450415787513963, "grad_norm": 0.07588801616599444, "learning_rate": 3.669438924734512e-06, "loss": 0.4637, "step": 4727 }, { "epoch": 2.3455380414546356, "grad_norm": 0.07434133158405588, "learning_rate": 3.6675548096329773e-06, "loss": 0.4667, "step": 4728 }, { "epoch": 2.346034504157875, "grad_norm": 0.07567857678764008, "learning_rate": 3.665670898174325e-06, "loss": 0.4717, "step": 4729 }, { "epoch": 2.3465309668611147, "grad_norm": 0.07114717020994527, "learning_rate": 3.6637871906464834e-06, "loss": 0.4274, "step": 4730 }, { "epoch": 2.347027429564354, "grad_norm": 0.07074817565632668, "learning_rate": 3.6619036873373435e-06, "loss": 0.4247, "step": 4731 }, { "epoch": 2.3475238922675934, "grad_norm": 0.0722416760901901, "learning_rate": 3.660020388534771e-06, "loss": 0.448, "step": 4732 }, { "epoch": 2.3480203549708327, "grad_norm": 0.07185437341829799, "learning_rate": 3.658137294526596e-06, "loss": 0.4598, "step": 4733 }, { "epoch": 2.348516817674072, "grad_norm": 0.07141278433821945, "learning_rate": 3.656254405600621e-06, "loss": 0.4486, "step": 4734 }, { "epoch": 2.349013280377312, "grad_norm": 0.07133360394274396, "learning_rate": 3.654371722044616e-06, "loss": 0.4357, "step": 4735 }, { "epoch": 2.349509743080551, "grad_norm": 0.07194937115926209, "learning_rate": 3.6524892441463166e-06, "loss": 0.4415, "step": 4736 }, { "epoch": 2.3500062057837905, "grad_norm": 0.07480819749333578, "learning_rate": 3.650606972193431e-06, "loss": 0.5018, "step": 4737 }, { "epoch": 2.35050266848703, "grad_norm": 0.07296178767261839, "learning_rate": 3.6487249064736352e-06, "loss": 0.4458, "step": 4738 }, { "epoch": 2.350999131190269, "grad_norm": 0.07008863773692527, "learning_rate": 3.64684304727457e-06, "loss": 0.4185, "step": 4739 }, { "epoch": 2.351495593893509, "grad_norm": 0.07221727150834073, "learning_rate": 3.644961394883848e-06, "loss": 0.4826, "step": 4740 }, { "epoch": 2.3519920565967483, "grad_norm": 0.07125064634096756, "learning_rate": 3.643079949589051e-06, "loss": 0.4761, "step": 4741 }, { "epoch": 2.3524885192999876, "grad_norm": 0.0740146726475471, "learning_rate": 3.641198711677728e-06, "loss": 0.4661, "step": 4742 }, { "epoch": 2.352984982003227, "grad_norm": 0.07190469581219823, "learning_rate": 3.6393176814373944e-06, "loss": 0.485, "step": 4743 }, { "epoch": 2.3534814447064663, "grad_norm": 0.06876822776109015, "learning_rate": 3.6374368591555352e-06, "loss": 0.4461, "step": 4744 }, { "epoch": 2.353977907409706, "grad_norm": 0.07565380640590723, "learning_rate": 3.6355562451196065e-06, "loss": 0.4843, "step": 4745 }, { "epoch": 2.3544743701129454, "grad_norm": 0.07404096345374254, "learning_rate": 3.633675839617028e-06, "loss": 0.4652, "step": 4746 }, { "epoch": 2.3549708328161847, "grad_norm": 0.07180746556237937, "learning_rate": 3.6317956429351906e-06, "loss": 0.4339, "step": 4747 }, { "epoch": 2.355467295519424, "grad_norm": 0.0738081781163325, "learning_rate": 3.6299156553614513e-06, "loss": 0.444, "step": 4748 }, { "epoch": 2.3559637582226634, "grad_norm": 0.07143636633873711, "learning_rate": 3.628035877183136e-06, "loss": 0.4452, "step": 4749 }, { "epoch": 2.356460220925903, "grad_norm": 0.0750219048239024, "learning_rate": 3.6261563086875396e-06, "loss": 0.4937, "step": 4750 }, { "epoch": 2.3569566836291425, "grad_norm": 0.07145869095204205, "learning_rate": 3.6242769501619245e-06, "loss": 0.4502, "step": 4751 }, { "epoch": 2.357453146332382, "grad_norm": 0.07262263133026953, "learning_rate": 3.622397801893518e-06, "loss": 0.4621, "step": 4752 }, { "epoch": 2.357949609035621, "grad_norm": 0.07474341812925704, "learning_rate": 3.62051886416952e-06, "loss": 0.4569, "step": 4753 }, { "epoch": 2.3584460717388605, "grad_norm": 0.07221546383431686, "learning_rate": 3.618640137277097e-06, "loss": 0.4749, "step": 4754 }, { "epoch": 2.3589425344421002, "grad_norm": 0.07333306403627074, "learning_rate": 3.6167616215033784e-06, "loss": 0.4638, "step": 4755 }, { "epoch": 2.3594389971453396, "grad_norm": 0.07300394997026594, "learning_rate": 3.614883317135467e-06, "loss": 0.4449, "step": 4756 }, { "epoch": 2.359935459848579, "grad_norm": 0.07595971084123176, "learning_rate": 3.613005224460433e-06, "loss": 0.4845, "step": 4757 }, { "epoch": 2.360431922551818, "grad_norm": 0.0744674579731717, "learning_rate": 3.6111273437653114e-06, "loss": 0.4364, "step": 4758 }, { "epoch": 2.3609283852550575, "grad_norm": 0.07356470176802757, "learning_rate": 3.6092496753371064e-06, "loss": 0.4598, "step": 4759 }, { "epoch": 2.3614248479582973, "grad_norm": 0.07331618173536777, "learning_rate": 3.6073722194627893e-06, "loss": 0.4691, "step": 4760 }, { "epoch": 2.3619213106615367, "grad_norm": 0.07269515877497877, "learning_rate": 3.6054949764292996e-06, "loss": 0.4351, "step": 4761 }, { "epoch": 2.362417773364776, "grad_norm": 0.07255848749377546, "learning_rate": 3.6036179465235432e-06, "loss": 0.4852, "step": 4762 }, { "epoch": 2.3629142360680153, "grad_norm": 0.07436795161560912, "learning_rate": 3.6017411300323957e-06, "loss": 0.4641, "step": 4763 }, { "epoch": 2.3634106987712546, "grad_norm": 0.07223596806015442, "learning_rate": 3.599864527242696e-06, "loss": 0.4643, "step": 4764 }, { "epoch": 2.3639071614744944, "grad_norm": 0.07363739183083648, "learning_rate": 3.5979881384412534e-06, "loss": 0.4593, "step": 4765 }, { "epoch": 2.3644036241777338, "grad_norm": 0.07133242037314486, "learning_rate": 3.5961119639148443e-06, "loss": 0.457, "step": 4766 }, { "epoch": 2.364900086880973, "grad_norm": 0.07048145352143227, "learning_rate": 3.5942360039502135e-06, "loss": 0.4448, "step": 4767 }, { "epoch": 2.3653965495842124, "grad_norm": 0.07333678955240325, "learning_rate": 3.592360258834069e-06, "loss": 0.4491, "step": 4768 }, { "epoch": 2.3658930122874517, "grad_norm": 0.07318665298320112, "learning_rate": 3.5904847288530882e-06, "loss": 0.4669, "step": 4769 }, { "epoch": 2.366389474990691, "grad_norm": 0.07357706600034569, "learning_rate": 3.5886094142939195e-06, "loss": 0.479, "step": 4770 }, { "epoch": 2.366885937693931, "grad_norm": 0.07131078314109707, "learning_rate": 3.5867343154431693e-06, "loss": 0.4538, "step": 4771 }, { "epoch": 2.36738240039717, "grad_norm": 0.07233993020557973, "learning_rate": 3.584859432587419e-06, "loss": 0.4595, "step": 4772 }, { "epoch": 2.3678788631004095, "grad_norm": 0.07227522944783686, "learning_rate": 3.582984766013215e-06, "loss": 0.4681, "step": 4773 }, { "epoch": 2.368375325803649, "grad_norm": 0.07378966999244926, "learning_rate": 3.58111031600707e-06, "loss": 0.4467, "step": 4774 }, { "epoch": 2.3688717885068886, "grad_norm": 0.07255376065435246, "learning_rate": 3.5792360828554615e-06, "loss": 0.4649, "step": 4775 }, { "epoch": 2.369368251210128, "grad_norm": 0.07349043423441821, "learning_rate": 3.5773620668448384e-06, "loss": 0.4427, "step": 4776 }, { "epoch": 2.3698647139133673, "grad_norm": 0.0737917842996249, "learning_rate": 3.575488268261613e-06, "loss": 0.4484, "step": 4777 }, { "epoch": 2.3703611766166066, "grad_norm": 0.07131005263256235, "learning_rate": 3.5736146873921652e-06, "loss": 0.4636, "step": 4778 }, { "epoch": 2.370857639319846, "grad_norm": 0.07285168554855122, "learning_rate": 3.5717413245228434e-06, "loss": 0.4796, "step": 4779 }, { "epoch": 2.3713541020230853, "grad_norm": 0.07242742265876065, "learning_rate": 3.569868179939958e-06, "loss": 0.4462, "step": 4780 }, { "epoch": 2.371850564726325, "grad_norm": 0.07264450598228168, "learning_rate": 3.567995253929792e-06, "loss": 0.4966, "step": 4781 }, { "epoch": 2.3723470274295644, "grad_norm": 0.07568634838040776, "learning_rate": 3.56612254677859e-06, "loss": 0.4577, "step": 4782 }, { "epoch": 2.3728434901328037, "grad_norm": 0.07437246189255993, "learning_rate": 3.564250058772567e-06, "loss": 0.5073, "step": 4783 }, { "epoch": 2.373339952836043, "grad_norm": 0.07202710745369477, "learning_rate": 3.562377790197903e-06, "loss": 0.4652, "step": 4784 }, { "epoch": 2.373836415539283, "grad_norm": 0.073800591321293, "learning_rate": 3.560505741340742e-06, "loss": 0.441, "step": 4785 }, { "epoch": 2.374332878242522, "grad_norm": 0.0712780849538718, "learning_rate": 3.5586339124871993e-06, "loss": 0.447, "step": 4786 }, { "epoch": 2.3748293409457615, "grad_norm": 0.07491310671013832, "learning_rate": 3.556762303923351e-06, "loss": 0.4816, "step": 4787 }, { "epoch": 2.375325803649001, "grad_norm": 0.07467968796444399, "learning_rate": 3.554890915935244e-06, "loss": 0.4778, "step": 4788 }, { "epoch": 2.37582226635224, "grad_norm": 0.07237031006567743, "learning_rate": 3.5530197488088904e-06, "loss": 0.4571, "step": 4789 }, { "epoch": 2.3763187290554795, "grad_norm": 0.07262263860714888, "learning_rate": 3.5511488028302676e-06, "loss": 0.4385, "step": 4790 }, { "epoch": 2.3768151917587192, "grad_norm": 0.07185368446839807, "learning_rate": 3.5492780782853196e-06, "loss": 0.4311, "step": 4791 }, { "epoch": 2.3773116544619586, "grad_norm": 0.07324880246011775, "learning_rate": 3.547407575459957e-06, "loss": 0.4472, "step": 4792 }, { "epoch": 2.377808117165198, "grad_norm": 0.07446233139953379, "learning_rate": 3.545537294640055e-06, "loss": 0.5045, "step": 4793 }, { "epoch": 2.3783045798684372, "grad_norm": 0.070140983659222, "learning_rate": 3.543667236111458e-06, "loss": 0.4607, "step": 4794 }, { "epoch": 2.378801042571677, "grad_norm": 0.07250620197395204, "learning_rate": 3.541797400159973e-06, "loss": 0.4479, "step": 4795 }, { "epoch": 2.3792975052749163, "grad_norm": 0.07540106712813605, "learning_rate": 3.539927787071375e-06, "loss": 0.5026, "step": 4796 }, { "epoch": 2.3797939679781557, "grad_norm": 0.07498674621009987, "learning_rate": 3.5380583971314043e-06, "loss": 0.457, "step": 4797 }, { "epoch": 2.380290430681395, "grad_norm": 0.07174711237052699, "learning_rate": 3.5361892306257666e-06, "loss": 0.4536, "step": 4798 }, { "epoch": 2.3807868933846343, "grad_norm": 0.0716157727005352, "learning_rate": 3.534320287840135e-06, "loss": 0.4606, "step": 4799 }, { "epoch": 2.3812833560878737, "grad_norm": 0.07398435347159586, "learning_rate": 3.532451569060148e-06, "loss": 0.4672, "step": 4800 }, { "epoch": 2.3817798187911134, "grad_norm": 0.07071056978522963, "learning_rate": 3.530583074571407e-06, "loss": 0.4429, "step": 4801 }, { "epoch": 2.3822762814943528, "grad_norm": 0.07240856332363192, "learning_rate": 3.5287148046594847e-06, "loss": 0.4532, "step": 4802 }, { "epoch": 2.382772744197592, "grad_norm": 0.0719354437460463, "learning_rate": 3.5268467596099126e-06, "loss": 0.4594, "step": 4803 }, { "epoch": 2.3832692069008314, "grad_norm": 0.07040324938389426, "learning_rate": 3.524978939708193e-06, "loss": 0.439, "step": 4804 }, { "epoch": 2.383765669604071, "grad_norm": 0.07230438616729792, "learning_rate": 3.5231113452397927e-06, "loss": 0.4264, "step": 4805 }, { "epoch": 2.3842621323073105, "grad_norm": 0.07399897104250126, "learning_rate": 3.5212439764901425e-06, "loss": 0.4619, "step": 4806 }, { "epoch": 2.38475859501055, "grad_norm": 0.07318270307419919, "learning_rate": 3.5193768337446418e-06, "loss": 0.4589, "step": 4807 }, { "epoch": 2.385255057713789, "grad_norm": 0.07315629477062484, "learning_rate": 3.5175099172886507e-06, "loss": 0.4932, "step": 4808 }, { "epoch": 2.3857515204170285, "grad_norm": 0.07225843453148965, "learning_rate": 3.515643227407499e-06, "loss": 0.456, "step": 4809 }, { "epoch": 2.386247983120268, "grad_norm": 0.07421180142061967, "learning_rate": 3.51377676438648e-06, "loss": 0.4508, "step": 4810 }, { "epoch": 2.3867444458235076, "grad_norm": 0.07108575490410428, "learning_rate": 3.511910528510854e-06, "loss": 0.4356, "step": 4811 }, { "epoch": 2.387240908526747, "grad_norm": 0.07724965177245846, "learning_rate": 3.510044520065843e-06, "loss": 0.4847, "step": 4812 }, { "epoch": 2.3877373712299863, "grad_norm": 0.07282260233134959, "learning_rate": 3.5081787393366374e-06, "loss": 0.4386, "step": 4813 }, { "epoch": 2.3882338339332256, "grad_norm": 0.07152279900165055, "learning_rate": 3.506313186608392e-06, "loss": 0.4293, "step": 4814 }, { "epoch": 2.3887302966364654, "grad_norm": 0.0746752837500685, "learning_rate": 3.504447862166227e-06, "loss": 0.4697, "step": 4815 }, { "epoch": 2.3892267593397047, "grad_norm": 0.07436156575975729, "learning_rate": 3.502582766295227e-06, "loss": 0.4888, "step": 4816 }, { "epoch": 2.389723222042944, "grad_norm": 0.07478976244885759, "learning_rate": 3.500717899280442e-06, "loss": 0.4683, "step": 4817 }, { "epoch": 2.3902196847461834, "grad_norm": 0.07037325001387294, "learning_rate": 3.498853261406888e-06, "loss": 0.4378, "step": 4818 }, { "epoch": 2.3907161474494227, "grad_norm": 0.07631373189207717, "learning_rate": 3.4969888529595426e-06, "loss": 0.479, "step": 4819 }, { "epoch": 2.391212610152662, "grad_norm": 0.07426025126427815, "learning_rate": 3.4951246742233517e-06, "loss": 0.4594, "step": 4820 }, { "epoch": 2.391709072855902, "grad_norm": 0.0727151200637013, "learning_rate": 3.4932607254832257e-06, "loss": 0.4694, "step": 4821 }, { "epoch": 2.392205535559141, "grad_norm": 0.07184130765612347, "learning_rate": 3.4913970070240388e-06, "loss": 0.4627, "step": 4822 }, { "epoch": 2.3927019982623805, "grad_norm": 0.07136506615380843, "learning_rate": 3.4895335191306323e-06, "loss": 0.4406, "step": 4823 }, { "epoch": 2.39319846096562, "grad_norm": 0.0831197941937164, "learning_rate": 3.4876702620878072e-06, "loss": 0.5137, "step": 4824 }, { "epoch": 2.3936949236688596, "grad_norm": 0.07098730544237608, "learning_rate": 3.4858072361803347e-06, "loss": 0.4703, "step": 4825 }, { "epoch": 2.394191386372099, "grad_norm": 0.07435224540563783, "learning_rate": 3.483944441692948e-06, "loss": 0.432, "step": 4826 }, { "epoch": 2.3946878490753383, "grad_norm": 0.07156278419670799, "learning_rate": 3.482081878910346e-06, "loss": 0.4451, "step": 4827 }, { "epoch": 2.3951843117785776, "grad_norm": 0.07422050116299996, "learning_rate": 3.4802195481171895e-06, "loss": 0.4613, "step": 4828 }, { "epoch": 2.395680774481817, "grad_norm": 0.07199435924853591, "learning_rate": 3.4783574495981075e-06, "loss": 0.4442, "step": 4829 }, { "epoch": 2.3961772371850563, "grad_norm": 0.07585944049032202, "learning_rate": 3.4764955836376924e-06, "loss": 0.4979, "step": 4830 }, { "epoch": 2.396673699888296, "grad_norm": 0.07129212048060686, "learning_rate": 3.4746339505204986e-06, "loss": 0.4557, "step": 4831 }, { "epoch": 2.3971701625915354, "grad_norm": 0.072693573519986, "learning_rate": 3.4727725505310496e-06, "loss": 0.4352, "step": 4832 }, { "epoch": 2.3976666252947747, "grad_norm": 0.07448848764186855, "learning_rate": 3.470911383953828e-06, "loss": 0.44, "step": 4833 }, { "epoch": 2.398163087998014, "grad_norm": 0.07497358791166973, "learning_rate": 3.469050451073287e-06, "loss": 0.4587, "step": 4834 }, { "epoch": 2.398659550701254, "grad_norm": 0.07184294064328557, "learning_rate": 3.467189752173835e-06, "loss": 0.453, "step": 4835 }, { "epoch": 2.399156013404493, "grad_norm": 0.07433389489274306, "learning_rate": 3.4653292875398523e-06, "loss": 0.4821, "step": 4836 }, { "epoch": 2.3996524761077325, "grad_norm": 0.07068190313163356, "learning_rate": 3.4634690574556815e-06, "loss": 0.4549, "step": 4837 }, { "epoch": 2.400148938810972, "grad_norm": 0.07465144485609781, "learning_rate": 3.4616090622056296e-06, "loss": 0.48, "step": 4838 }, { "epoch": 2.400645401514211, "grad_norm": 0.07169302316248427, "learning_rate": 3.459749302073967e-06, "loss": 0.4451, "step": 4839 }, { "epoch": 2.4011418642174505, "grad_norm": 0.07085649592361846, "learning_rate": 3.457889777344926e-06, "loss": 0.4607, "step": 4840 }, { "epoch": 2.4016383269206902, "grad_norm": 0.07329969355902644, "learning_rate": 3.4560304883027072e-06, "loss": 0.4545, "step": 4841 }, { "epoch": 2.4021347896239296, "grad_norm": 0.07310247900538014, "learning_rate": 3.4541714352314726e-06, "loss": 0.4726, "step": 4842 }, { "epoch": 2.402631252327169, "grad_norm": 0.07103939876575813, "learning_rate": 3.4523126184153483e-06, "loss": 0.4501, "step": 4843 }, { "epoch": 2.4031277150304082, "grad_norm": 0.08263769021042183, "learning_rate": 3.4504540381384265e-06, "loss": 0.4879, "step": 4844 }, { "epoch": 2.403624177733648, "grad_norm": 0.07030234205937556, "learning_rate": 3.448595694684758e-06, "loss": 0.4631, "step": 4845 }, { "epoch": 2.4041206404368873, "grad_norm": 0.06845809783951995, "learning_rate": 3.4467375883383638e-06, "loss": 0.4174, "step": 4846 }, { "epoch": 2.4046171031401267, "grad_norm": 0.07368759949851977, "learning_rate": 3.444879719383224e-06, "loss": 0.4655, "step": 4847 }, { "epoch": 2.405113565843366, "grad_norm": 0.06867761594775768, "learning_rate": 3.4430220881032855e-06, "loss": 0.4207, "step": 4848 }, { "epoch": 2.4056100285466053, "grad_norm": 0.07176053293602179, "learning_rate": 3.441164694782456e-06, "loss": 0.4497, "step": 4849 }, { "epoch": 2.4061064912498447, "grad_norm": 0.07308211559699186, "learning_rate": 3.4393075397046105e-06, "loss": 0.4453, "step": 4850 }, { "epoch": 2.4066029539530844, "grad_norm": 0.0729020237538028, "learning_rate": 3.437450623153582e-06, "loss": 0.461, "step": 4851 }, { "epoch": 2.4070994166563238, "grad_norm": 0.06998983752353476, "learning_rate": 3.4355939454131722e-06, "loss": 0.4848, "step": 4852 }, { "epoch": 2.407595879359563, "grad_norm": 0.07186086298195375, "learning_rate": 3.433737506767144e-06, "loss": 0.4565, "step": 4853 }, { "epoch": 2.4080923420628024, "grad_norm": 0.07480359801968427, "learning_rate": 3.4318813074992253e-06, "loss": 0.453, "step": 4854 }, { "epoch": 2.4085888047660418, "grad_norm": 0.07144531652703363, "learning_rate": 3.430025347893107e-06, "loss": 0.4567, "step": 4855 }, { "epoch": 2.4090852674692815, "grad_norm": 0.07124218226500924, "learning_rate": 3.4281696282324402e-06, "loss": 0.4288, "step": 4856 }, { "epoch": 2.409581730172521, "grad_norm": 0.07058037892685524, "learning_rate": 3.426314148800843e-06, "loss": 0.4511, "step": 4857 }, { "epoch": 2.41007819287576, "grad_norm": 0.06929868897535611, "learning_rate": 3.424458909881897e-06, "loss": 0.4511, "step": 4858 }, { "epoch": 2.4105746555789995, "grad_norm": 0.07525160754840045, "learning_rate": 3.4226039117591443e-06, "loss": 0.5151, "step": 4859 }, { "epoch": 2.411071118282239, "grad_norm": 0.07513115447244399, "learning_rate": 3.420749154716093e-06, "loss": 0.4584, "step": 4860 }, { "epoch": 2.4115675809854786, "grad_norm": 0.07269003207123895, "learning_rate": 3.418894639036211e-06, "loss": 0.4729, "step": 4861 }, { "epoch": 2.412064043688718, "grad_norm": 0.07404592017298095, "learning_rate": 3.4170403650029327e-06, "loss": 0.4696, "step": 4862 }, { "epoch": 2.4125605063919573, "grad_norm": 0.07014295136455598, "learning_rate": 3.415186332899653e-06, "loss": 0.4162, "step": 4863 }, { "epoch": 2.4130569690951966, "grad_norm": 0.07251567829613642, "learning_rate": 3.4133325430097337e-06, "loss": 0.482, "step": 4864 }, { "epoch": 2.413553431798436, "grad_norm": 0.07182370674066571, "learning_rate": 3.411478995616493e-06, "loss": 0.4331, "step": 4865 }, { "epoch": 2.4140498945016757, "grad_norm": 0.07201533809263864, "learning_rate": 3.409625691003221e-06, "loss": 0.4801, "step": 4866 }, { "epoch": 2.414546357204915, "grad_norm": 0.07336794806212467, "learning_rate": 3.407772629453159e-06, "loss": 0.4623, "step": 4867 }, { "epoch": 2.4150428199081544, "grad_norm": 0.07361076825262522, "learning_rate": 3.405919811249522e-06, "loss": 0.4716, "step": 4868 }, { "epoch": 2.4155392826113937, "grad_norm": 0.07468347922530201, "learning_rate": 3.404067236675483e-06, "loss": 0.4764, "step": 4869 }, { "epoch": 2.416035745314633, "grad_norm": 0.07232813832383798, "learning_rate": 3.4022149060141775e-06, "loss": 0.4794, "step": 4870 }, { "epoch": 2.416532208017873, "grad_norm": 0.07258532390558077, "learning_rate": 3.400362819548706e-06, "loss": 0.4761, "step": 4871 }, { "epoch": 2.417028670721112, "grad_norm": 0.07485777438080334, "learning_rate": 3.3985109775621284e-06, "loss": 0.5034, "step": 4872 }, { "epoch": 2.4175251334243515, "grad_norm": 0.07137979655942545, "learning_rate": 3.3966593803374703e-06, "loss": 0.4739, "step": 4873 }, { "epoch": 2.418021596127591, "grad_norm": 0.06859686509924706, "learning_rate": 3.394808028157718e-06, "loss": 0.4281, "step": 4874 }, { "epoch": 2.41851805883083, "grad_norm": 0.07187345846385193, "learning_rate": 3.392956921305821e-06, "loss": 0.4418, "step": 4875 }, { "epoch": 2.41901452153407, "grad_norm": 0.07594593901365239, "learning_rate": 3.3911060600646934e-06, "loss": 0.4575, "step": 4876 }, { "epoch": 2.4195109842373093, "grad_norm": 0.07446272844243572, "learning_rate": 3.3892554447172066e-06, "loss": 0.4414, "step": 4877 }, { "epoch": 2.4200074469405486, "grad_norm": 0.07141489930382675, "learning_rate": 3.3874050755461984e-06, "loss": 0.4506, "step": 4878 }, { "epoch": 2.420503909643788, "grad_norm": 0.07426444294024223, "learning_rate": 3.385554952834469e-06, "loss": 0.4685, "step": 4879 }, { "epoch": 2.4210003723470273, "grad_norm": 0.06977047817721685, "learning_rate": 3.3837050768647784e-06, "loss": 0.4509, "step": 4880 }, { "epoch": 2.421496835050267, "grad_norm": 0.07200114942417052, "learning_rate": 3.3818554479198532e-06, "loss": 0.461, "step": 4881 }, { "epoch": 2.4219932977535064, "grad_norm": 0.07146768375177204, "learning_rate": 3.380006066282378e-06, "loss": 0.4449, "step": 4882 }, { "epoch": 2.4224897604567457, "grad_norm": 0.07020862149032203, "learning_rate": 3.3781569322350006e-06, "loss": 0.433, "step": 4883 }, { "epoch": 2.422986223159985, "grad_norm": 0.07643393327996241, "learning_rate": 3.3763080460603307e-06, "loss": 0.4977, "step": 4884 }, { "epoch": 2.4234826858632244, "grad_norm": 0.07396500457066017, "learning_rate": 3.374459408040942e-06, "loss": 0.452, "step": 4885 }, { "epoch": 2.423979148566464, "grad_norm": 0.07520521684618664, "learning_rate": 3.3726110184593697e-06, "loss": 0.4555, "step": 4886 }, { "epoch": 2.4244756112697035, "grad_norm": 0.07150715320587676, "learning_rate": 3.3707628775981106e-06, "loss": 0.4581, "step": 4887 }, { "epoch": 2.424972073972943, "grad_norm": 0.07074504126181777, "learning_rate": 3.368914985739622e-06, "loss": 0.4827, "step": 4888 }, { "epoch": 2.425468536676182, "grad_norm": 0.0711238360588336, "learning_rate": 3.367067343166326e-06, "loss": 0.4586, "step": 4889 }, { "epoch": 2.4259649993794214, "grad_norm": 0.07267839426440299, "learning_rate": 3.365219950160603e-06, "loss": 0.4937, "step": 4890 }, { "epoch": 2.4264614620826612, "grad_norm": 0.07170663157707102, "learning_rate": 3.3633728070048e-06, "loss": 0.4666, "step": 4891 }, { "epoch": 2.4269579247859006, "grad_norm": 0.07319404492664315, "learning_rate": 3.3615259139812227e-06, "loss": 0.4579, "step": 4892 }, { "epoch": 2.42745438748914, "grad_norm": 0.07268583391352942, "learning_rate": 3.359679271372138e-06, "loss": 0.4534, "step": 4893 }, { "epoch": 2.427950850192379, "grad_norm": 0.07320504663664425, "learning_rate": 3.357832879459776e-06, "loss": 0.4474, "step": 4894 }, { "epoch": 2.4284473128956185, "grad_norm": 0.07329344087726802, "learning_rate": 3.3559867385263277e-06, "loss": 0.4982, "step": 4895 }, { "epoch": 2.4289437755988583, "grad_norm": 0.0741107539071744, "learning_rate": 3.3541408488539474e-06, "loss": 0.4815, "step": 4896 }, { "epoch": 2.4294402383020977, "grad_norm": 0.07340753028917309, "learning_rate": 3.3522952107247496e-06, "loss": 0.4671, "step": 4897 }, { "epoch": 2.429936701005337, "grad_norm": 0.07199561910628434, "learning_rate": 3.35044982442081e-06, "loss": 0.4743, "step": 4898 }, { "epoch": 2.4304331637085763, "grad_norm": 0.07489732948889798, "learning_rate": 3.3486046902241663e-06, "loss": 0.4826, "step": 4899 }, { "epoch": 2.4309296264118156, "grad_norm": 0.08229476417910944, "learning_rate": 3.346759808416816e-06, "loss": 0.4673, "step": 4900 }, { "epoch": 2.4314260891150554, "grad_norm": 0.0734959805508446, "learning_rate": 3.344915179280722e-06, "loss": 0.4691, "step": 4901 }, { "epoch": 2.4319225518182948, "grad_norm": 0.07588738560643025, "learning_rate": 3.3430708030978055e-06, "loss": 0.4871, "step": 4902 }, { "epoch": 2.432419014521534, "grad_norm": 0.0723640039745302, "learning_rate": 3.3412266801499503e-06, "loss": 0.4723, "step": 4903 }, { "epoch": 2.4329154772247734, "grad_norm": 0.07436716244661329, "learning_rate": 3.339382810719001e-06, "loss": 0.4694, "step": 4904 }, { "epoch": 2.4334119399280127, "grad_norm": 0.07110548824172484, "learning_rate": 3.337539195086762e-06, "loss": 0.4439, "step": 4905 }, { "epoch": 2.4339084026312525, "grad_norm": 0.07318853233485054, "learning_rate": 3.335695833535001e-06, "loss": 0.4558, "step": 4906 }, { "epoch": 2.434404865334492, "grad_norm": 0.07070760691238002, "learning_rate": 3.3338527263454478e-06, "loss": 0.4672, "step": 4907 }, { "epoch": 2.434901328037731, "grad_norm": 0.07408841815287856, "learning_rate": 3.3320098737997915e-06, "loss": 0.4727, "step": 4908 }, { "epoch": 2.4353977907409705, "grad_norm": 0.07050033637736432, "learning_rate": 3.3301672761796805e-06, "loss": 0.438, "step": 4909 }, { "epoch": 2.43589425344421, "grad_norm": 0.07223651097731444, "learning_rate": 3.328324933766728e-06, "loss": 0.4651, "step": 4910 }, { "epoch": 2.436390716147449, "grad_norm": 0.07259102222113285, "learning_rate": 3.326482846842506e-06, "loss": 0.442, "step": 4911 }, { "epoch": 2.436887178850689, "grad_norm": 0.07238593049477171, "learning_rate": 3.3246410156885477e-06, "loss": 0.4697, "step": 4912 }, { "epoch": 2.4373836415539283, "grad_norm": 0.07343801139786234, "learning_rate": 3.322799440586349e-06, "loss": 0.4807, "step": 4913 }, { "epoch": 2.4378801042571676, "grad_norm": 0.0704308581928645, "learning_rate": 3.3209581218173636e-06, "loss": 0.4603, "step": 4914 }, { "epoch": 2.438376566960407, "grad_norm": 0.07078467248069595, "learning_rate": 3.3191170596630085e-06, "loss": 0.4174, "step": 4915 }, { "epoch": 2.4388730296636467, "grad_norm": 0.07345596267263887, "learning_rate": 3.317276254404659e-06, "loss": 0.4572, "step": 4916 }, { "epoch": 2.439369492366886, "grad_norm": 0.07196161644566479, "learning_rate": 3.315435706323653e-06, "loss": 0.4454, "step": 4917 }, { "epoch": 2.4398659550701254, "grad_norm": 0.07141470733605967, "learning_rate": 3.3135954157012894e-06, "loss": 0.4449, "step": 4918 }, { "epoch": 2.4403624177733647, "grad_norm": 0.07559725227479616, "learning_rate": 3.3117553828188275e-06, "loss": 0.4462, "step": 4919 }, { "epoch": 2.440858880476604, "grad_norm": 0.07448387570855905, "learning_rate": 3.309915607957487e-06, "loss": 0.4733, "step": 4920 }, { "epoch": 2.4413553431798434, "grad_norm": 0.07351317002445866, "learning_rate": 3.3080760913984468e-06, "loss": 0.4597, "step": 4921 }, { "epoch": 2.441851805883083, "grad_norm": 0.06980086573237342, "learning_rate": 3.306236833422848e-06, "loss": 0.4514, "step": 4922 }, { "epoch": 2.4423482685863225, "grad_norm": 0.07197898130409723, "learning_rate": 3.3043978343117916e-06, "loss": 0.4644, "step": 4923 }, { "epoch": 2.442844731289562, "grad_norm": 0.07488286290152556, "learning_rate": 3.3025590943463403e-06, "loss": 0.4903, "step": 4924 }, { "epoch": 2.443341193992801, "grad_norm": 0.07248650394814932, "learning_rate": 3.3007206138075143e-06, "loss": 0.4519, "step": 4925 }, { "epoch": 2.443837656696041, "grad_norm": 0.0690974071655818, "learning_rate": 3.2988823929762965e-06, "loss": 0.4323, "step": 4926 }, { "epoch": 2.4443341193992802, "grad_norm": 0.07407912445276958, "learning_rate": 3.2970444321336294e-06, "loss": 0.4667, "step": 4927 }, { "epoch": 2.4448305821025196, "grad_norm": 0.07280384197326245, "learning_rate": 3.2952067315604162e-06, "loss": 0.4606, "step": 4928 }, { "epoch": 2.445327044805759, "grad_norm": 0.07026914227270502, "learning_rate": 3.2933692915375205e-06, "loss": 0.4274, "step": 4929 }, { "epoch": 2.4458235075089982, "grad_norm": 0.07225020966833783, "learning_rate": 3.2915321123457654e-06, "loss": 0.4325, "step": 4930 }, { "epoch": 2.4463199702122376, "grad_norm": 0.07631532903043024, "learning_rate": 3.2896951942659334e-06, "loss": 0.4743, "step": 4931 }, { "epoch": 2.4468164329154773, "grad_norm": 0.07656256169012282, "learning_rate": 3.2878585375787676e-06, "loss": 0.4711, "step": 4932 }, { "epoch": 2.4473128956187167, "grad_norm": 0.0757992804891011, "learning_rate": 3.2860221425649714e-06, "loss": 0.4798, "step": 4933 }, { "epoch": 2.447809358321956, "grad_norm": 0.07241109508407907, "learning_rate": 3.2841860095052096e-06, "loss": 0.4587, "step": 4934 }, { "epoch": 2.4483058210251953, "grad_norm": 0.07277584878584002, "learning_rate": 3.2823501386801055e-06, "loss": 0.4628, "step": 4935 }, { "epoch": 2.448802283728435, "grad_norm": 0.07268028763143215, "learning_rate": 3.2805145303702433e-06, "loss": 0.4536, "step": 4936 }, { "epoch": 2.4492987464316744, "grad_norm": 0.07500003929320131, "learning_rate": 3.278679184856164e-06, "loss": 0.4681, "step": 4937 }, { "epoch": 2.4497952091349138, "grad_norm": 0.07799652555267876, "learning_rate": 3.276844102418372e-06, "loss": 0.51, "step": 4938 }, { "epoch": 2.450291671838153, "grad_norm": 0.07138560918989682, "learning_rate": 3.2750092833373303e-06, "loss": 0.4368, "step": 4939 }, { "epoch": 2.4507881345413924, "grad_norm": 0.07259687502633978, "learning_rate": 3.273174727893463e-06, "loss": 0.4782, "step": 4940 }, { "epoch": 2.4512845972446318, "grad_norm": 0.07348721712699163, "learning_rate": 3.27134043636715e-06, "loss": 0.4433, "step": 4941 }, { "epoch": 2.4517810599478715, "grad_norm": 0.07367500408800275, "learning_rate": 3.2695064090387328e-06, "loss": 0.4806, "step": 4942 }, { "epoch": 2.452277522651111, "grad_norm": 0.0718678944614559, "learning_rate": 3.2676726461885167e-06, "loss": 0.4614, "step": 4943 }, { "epoch": 2.45277398535435, "grad_norm": 0.07364739250647794, "learning_rate": 3.2658391480967594e-06, "loss": 0.4864, "step": 4944 }, { "epoch": 2.4532704480575895, "grad_norm": 0.07305171136004614, "learning_rate": 3.264005915043685e-06, "loss": 0.479, "step": 4945 }, { "epoch": 2.4537669107608293, "grad_norm": 0.07653717036474185, "learning_rate": 3.2621729473094704e-06, "loss": 0.4646, "step": 4946 }, { "epoch": 2.4542633734640686, "grad_norm": 0.07221997080472961, "learning_rate": 3.2603402451742594e-06, "loss": 0.4405, "step": 4947 }, { "epoch": 2.454759836167308, "grad_norm": 0.0737897302199099, "learning_rate": 3.258507808918146e-06, "loss": 0.4587, "step": 4948 }, { "epoch": 2.4552562988705473, "grad_norm": 0.07212895440920399, "learning_rate": 3.2566756388211917e-06, "loss": 0.4522, "step": 4949 }, { "epoch": 2.4557527615737866, "grad_norm": 0.07275303783957517, "learning_rate": 3.254843735163414e-06, "loss": 0.435, "step": 4950 }, { "epoch": 2.456249224277026, "grad_norm": 0.07110446769175349, "learning_rate": 3.253012098224789e-06, "loss": 0.4625, "step": 4951 }, { "epoch": 2.4567456869802657, "grad_norm": 0.07533616154433118, "learning_rate": 3.2511807282852564e-06, "loss": 0.476, "step": 4952 }, { "epoch": 2.457242149683505, "grad_norm": 0.07121508664898368, "learning_rate": 3.2493496256247074e-06, "loss": 0.437, "step": 4953 }, { "epoch": 2.4577386123867444, "grad_norm": 0.07253166936136166, "learning_rate": 3.247518790522999e-06, "loss": 0.4483, "step": 4954 }, { "epoch": 2.4582350750899837, "grad_norm": 0.07591020227064676, "learning_rate": 3.245688223259944e-06, "loss": 0.4883, "step": 4955 }, { "epoch": 2.4587315377932235, "grad_norm": 0.07434210263708153, "learning_rate": 3.2438579241153166e-06, "loss": 0.4957, "step": 4956 }, { "epoch": 2.459228000496463, "grad_norm": 0.0736921894882178, "learning_rate": 3.242027893368849e-06, "loss": 0.4487, "step": 4957 }, { "epoch": 2.459724463199702, "grad_norm": 0.07417743880067373, "learning_rate": 3.240198131300229e-06, "loss": 0.4582, "step": 4958 }, { "epoch": 2.4602209259029415, "grad_norm": 0.07213475102302708, "learning_rate": 3.2383686381891087e-06, "loss": 0.4626, "step": 4959 }, { "epoch": 2.460717388606181, "grad_norm": 0.07342311279959657, "learning_rate": 3.236539414315096e-06, "loss": 0.4572, "step": 4960 }, { "epoch": 2.46121385130942, "grad_norm": 0.07185612590850873, "learning_rate": 3.234710459957761e-06, "loss": 0.4919, "step": 4961 }, { "epoch": 2.46171031401266, "grad_norm": 0.07275605720944686, "learning_rate": 3.232881775396626e-06, "loss": 0.4511, "step": 4962 }, { "epoch": 2.4622067767158993, "grad_norm": 0.07622583368006376, "learning_rate": 3.2310533609111805e-06, "loss": 0.4777, "step": 4963 }, { "epoch": 2.4627032394191386, "grad_norm": 0.07466851152744547, "learning_rate": 3.229225216780864e-06, "loss": 0.4793, "step": 4964 }, { "epoch": 2.463199702122378, "grad_norm": 0.07152900402073481, "learning_rate": 3.227397343285081e-06, "loss": 0.4191, "step": 4965 }, { "epoch": 2.4636961648256177, "grad_norm": 0.07188262340628947, "learning_rate": 3.2255697407031924e-06, "loss": 0.4366, "step": 4966 }, { "epoch": 2.464192627528857, "grad_norm": 0.07348716146826911, "learning_rate": 3.2237424093145175e-06, "loss": 0.4817, "step": 4967 }, { "epoch": 2.4646890902320964, "grad_norm": 0.0713609536000802, "learning_rate": 3.221915349398337e-06, "loss": 0.4474, "step": 4968 }, { "epoch": 2.4651855529353357, "grad_norm": 0.07659482845157203, "learning_rate": 3.2200885612338846e-06, "loss": 0.4926, "step": 4969 }, { "epoch": 2.465682015638575, "grad_norm": 0.07235133973202737, "learning_rate": 3.2182620451003565e-06, "loss": 0.4976, "step": 4970 }, { "epoch": 2.4661784783418144, "grad_norm": 0.07497447521649205, "learning_rate": 3.216435801276907e-06, "loss": 0.4966, "step": 4971 }, { "epoch": 2.466674941045054, "grad_norm": 0.07042342843861166, "learning_rate": 3.2146098300426485e-06, "loss": 0.4481, "step": 4972 }, { "epoch": 2.4671714037482935, "grad_norm": 0.07471585111012378, "learning_rate": 3.2127841316766515e-06, "loss": 0.4472, "step": 4973 }, { "epoch": 2.467667866451533, "grad_norm": 0.07143622672924964, "learning_rate": 3.210958706457944e-06, "loss": 0.4297, "step": 4974 }, { "epoch": 2.468164329154772, "grad_norm": 0.07066114201680801, "learning_rate": 3.2091335546655124e-06, "loss": 0.458, "step": 4975 }, { "epoch": 2.468660791858012, "grad_norm": 0.07504532464434141, "learning_rate": 3.2073086765783023e-06, "loss": 0.4729, "step": 4976 }, { "epoch": 2.4691572545612512, "grad_norm": 0.07239769361863543, "learning_rate": 3.2054840724752184e-06, "loss": 0.4961, "step": 4977 }, { "epoch": 2.4696537172644906, "grad_norm": 0.07219230294929461, "learning_rate": 3.2036597426351203e-06, "loss": 0.4435, "step": 4978 }, { "epoch": 2.47015017996773, "grad_norm": 0.0758055553305713, "learning_rate": 3.2018356873368307e-06, "loss": 0.4634, "step": 4979 }, { "epoch": 2.4706466426709692, "grad_norm": 0.07221304089912467, "learning_rate": 3.2000119068591227e-06, "loss": 0.4689, "step": 4980 }, { "epoch": 2.4711431053742086, "grad_norm": 0.07256877504850277, "learning_rate": 3.198188401480734e-06, "loss": 0.4632, "step": 4981 }, { "epoch": 2.4716395680774483, "grad_norm": 0.07368479644516852, "learning_rate": 3.196365171480359e-06, "loss": 0.4585, "step": 4982 }, { "epoch": 2.4721360307806877, "grad_norm": 0.07170466723428409, "learning_rate": 3.1945422171366482e-06, "loss": 0.4643, "step": 4983 }, { "epoch": 2.472632493483927, "grad_norm": 0.0736886755768893, "learning_rate": 3.192719538728212e-06, "loss": 0.4404, "step": 4984 }, { "epoch": 2.4731289561871663, "grad_norm": 0.07409447169142401, "learning_rate": 3.190897136533615e-06, "loss": 0.4486, "step": 4985 }, { "epoch": 2.473625418890406, "grad_norm": 0.07350626854049384, "learning_rate": 3.189075010831385e-06, "loss": 0.4565, "step": 4986 }, { "epoch": 2.4741218815936454, "grad_norm": 0.07724817239470792, "learning_rate": 3.1872531619000024e-06, "loss": 0.4575, "step": 4987 }, { "epoch": 2.4746183442968848, "grad_norm": 0.07394461490386524, "learning_rate": 3.18543159001791e-06, "loss": 0.465, "step": 4988 }, { "epoch": 2.475114807000124, "grad_norm": 0.07219693102311857, "learning_rate": 3.183610295463505e-06, "loss": 0.4636, "step": 4989 }, { "epoch": 2.4756112697033634, "grad_norm": 0.0755170770173934, "learning_rate": 3.1817892785151426e-06, "loss": 0.4921, "step": 4990 }, { "epoch": 2.4761077324066028, "grad_norm": 0.07125952744942823, "learning_rate": 3.179968539451135e-06, "loss": 0.4562, "step": 4991 }, { "epoch": 2.4766041951098425, "grad_norm": 0.0710903052738868, "learning_rate": 3.1781480785497555e-06, "loss": 0.4521, "step": 4992 }, { "epoch": 2.477100657813082, "grad_norm": 0.07147477701855874, "learning_rate": 3.17632789608923e-06, "loss": 0.4319, "step": 4993 }, { "epoch": 2.477597120516321, "grad_norm": 0.06860598579714546, "learning_rate": 3.174507992347746e-06, "loss": 0.4218, "step": 4994 }, { "epoch": 2.4780935832195605, "grad_norm": 0.06928050623670558, "learning_rate": 3.172688367603447e-06, "loss": 0.4201, "step": 4995 }, { "epoch": 2.4785900459228, "grad_norm": 0.07393072454941838, "learning_rate": 3.170869022134432e-06, "loss": 0.5016, "step": 4996 }, { "epoch": 2.4790865086260396, "grad_norm": 0.07611053154211594, "learning_rate": 3.1690499562187573e-06, "loss": 0.4742, "step": 4997 }, { "epoch": 2.479582971329279, "grad_norm": 0.07172895151999717, "learning_rate": 3.1672311701344404e-06, "loss": 0.44, "step": 4998 }, { "epoch": 2.4800794340325183, "grad_norm": 0.0717638559387684, "learning_rate": 3.165412664159453e-06, "loss": 0.4627, "step": 4999 }, { "epoch": 2.4805758967357576, "grad_norm": 0.07041680276845996, "learning_rate": 3.163594438571725e-06, "loss": 0.4697, "step": 5000 }, { "epoch": 2.481072359438997, "grad_norm": 0.06961263950820218, "learning_rate": 3.161776493649141e-06, "loss": 0.44, "step": 5001 }, { "epoch": 2.4815688221422367, "grad_norm": 0.07677040647683987, "learning_rate": 3.1599588296695476e-06, "loss": 0.4688, "step": 5002 }, { "epoch": 2.482065284845476, "grad_norm": 0.07432966041252718, "learning_rate": 3.158141446910744e-06, "loss": 0.4706, "step": 5003 }, { "epoch": 2.4825617475487154, "grad_norm": 0.070305882460339, "learning_rate": 3.1563243456504877e-06, "loss": 0.4812, "step": 5004 }, { "epoch": 2.4830582102519547, "grad_norm": 0.07137322616898634, "learning_rate": 3.1545075261664954e-06, "loss": 0.4549, "step": 5005 }, { "epoch": 2.483554672955194, "grad_norm": 0.07262431628108891, "learning_rate": 3.1526909887364365e-06, "loss": 0.4325, "step": 5006 }, { "epoch": 2.484051135658434, "grad_norm": 0.07360856338067412, "learning_rate": 3.1508747336379407e-06, "loss": 0.4459, "step": 5007 }, { "epoch": 2.484547598361673, "grad_norm": 0.07116264212561499, "learning_rate": 3.1490587611485936e-06, "loss": 0.4599, "step": 5008 }, { "epoch": 2.4850440610649125, "grad_norm": 0.07434591832468768, "learning_rate": 3.1472430715459366e-06, "loss": 0.4683, "step": 5009 }, { "epoch": 2.485540523768152, "grad_norm": 0.0726460286968446, "learning_rate": 3.145427665107471e-06, "loss": 0.4627, "step": 5010 }, { "epoch": 2.486036986471391, "grad_norm": 0.07457139906557984, "learning_rate": 3.1436125421106507e-06, "loss": 0.4867, "step": 5011 }, { "epoch": 2.486533449174631, "grad_norm": 0.07471436808388297, "learning_rate": 3.1417977028328884e-06, "loss": 0.5004, "step": 5012 }, { "epoch": 2.4870299118778703, "grad_norm": 0.07351308940818314, "learning_rate": 3.139983147551552e-06, "loss": 0.4538, "step": 5013 }, { "epoch": 2.4875263745811096, "grad_norm": 0.07325613680703755, "learning_rate": 3.138168876543969e-06, "loss": 0.4745, "step": 5014 }, { "epoch": 2.488022837284349, "grad_norm": 0.07606605052849479, "learning_rate": 3.136354890087421e-06, "loss": 0.4661, "step": 5015 }, { "epoch": 2.4885192999875883, "grad_norm": 0.07037890181768297, "learning_rate": 3.134541188459147e-06, "loss": 0.4309, "step": 5016 }, { "epoch": 2.489015762690828, "grad_norm": 0.07154622198629479, "learning_rate": 3.1327277719363413e-06, "loss": 0.4731, "step": 5017 }, { "epoch": 2.4895122253940674, "grad_norm": 0.07199847674452635, "learning_rate": 3.1309146407961565e-06, "loss": 0.4684, "step": 5018 }, { "epoch": 2.4900086880973067, "grad_norm": 0.0688715415526427, "learning_rate": 3.1291017953157003e-06, "loss": 0.4405, "step": 5019 }, { "epoch": 2.490505150800546, "grad_norm": 0.07243291775387756, "learning_rate": 3.1272892357720376e-06, "loss": 0.466, "step": 5020 }, { "epoch": 2.4910016135037854, "grad_norm": 0.072838356671563, "learning_rate": 3.125476962442189e-06, "loss": 0.456, "step": 5021 }, { "epoch": 2.491498076207025, "grad_norm": 0.0726349062067118, "learning_rate": 3.12366497560313e-06, "loss": 0.461, "step": 5022 }, { "epoch": 2.4919945389102645, "grad_norm": 0.07215929842189774, "learning_rate": 3.121853275531794e-06, "loss": 0.441, "step": 5023 }, { "epoch": 2.492491001613504, "grad_norm": 0.07542042515887382, "learning_rate": 3.120041862505072e-06, "loss": 0.5146, "step": 5024 }, { "epoch": 2.492987464316743, "grad_norm": 0.2562024226795915, "learning_rate": 3.118230736799809e-06, "loss": 0.4881, "step": 5025 }, { "epoch": 2.4934839270199824, "grad_norm": 0.07217630690547586, "learning_rate": 3.1164198986928064e-06, "loss": 0.482, "step": 5026 }, { "epoch": 2.4939803897232222, "grad_norm": 0.07135677306049457, "learning_rate": 3.114609348460821e-06, "loss": 0.4663, "step": 5027 }, { "epoch": 2.4944768524264616, "grad_norm": 0.07086569894661765, "learning_rate": 3.1127990863805668e-06, "loss": 0.4597, "step": 5028 }, { "epoch": 2.494973315129701, "grad_norm": 0.07288462245322094, "learning_rate": 3.110989112728713e-06, "loss": 0.4716, "step": 5029 }, { "epoch": 2.49546977783294, "grad_norm": 0.07127975550983165, "learning_rate": 3.1091794277818845e-06, "loss": 0.4759, "step": 5030 }, { "epoch": 2.4959662405361795, "grad_norm": 0.07251358966959501, "learning_rate": 3.1073700318166638e-06, "loss": 0.47, "step": 5031 }, { "epoch": 2.4964627032394193, "grad_norm": 0.07208491301981003, "learning_rate": 3.1055609251095874e-06, "loss": 0.4532, "step": 5032 }, { "epoch": 2.4969591659426587, "grad_norm": 0.07166247839743241, "learning_rate": 3.1037521079371503e-06, "loss": 0.4669, "step": 5033 }, { "epoch": 2.497455628645898, "grad_norm": 0.07111482822718003, "learning_rate": 3.101943580575798e-06, "loss": 0.4285, "step": 5034 }, { "epoch": 2.4979520913491373, "grad_norm": 0.0749071795087903, "learning_rate": 3.1001353433019365e-06, "loss": 0.466, "step": 5035 }, { "epoch": 2.4984485540523766, "grad_norm": 0.07281000376215524, "learning_rate": 3.098327396391926e-06, "loss": 0.4883, "step": 5036 }, { "epoch": 2.4989450167556164, "grad_norm": 0.07552457928843555, "learning_rate": 3.0965197401220824e-06, "loss": 0.4741, "step": 5037 }, { "epoch": 2.4994414794588558, "grad_norm": 0.07389154079442611, "learning_rate": 3.0947123747686756e-06, "loss": 0.4884, "step": 5038 }, { "epoch": 2.499937942162095, "grad_norm": 0.0730632967561261, "learning_rate": 3.0929053006079336e-06, "loss": 0.4806, "step": 5039 }, { "epoch": 2.5004344048653344, "grad_norm": 0.07248646669142482, "learning_rate": 3.091098517916039e-06, "loss": 0.4496, "step": 5040 }, { "epoch": 2.5004344048653344, "eval_loss": 0.5154539942741394, "eval_runtime": 259.0671, "eval_samples_per_second": 117.163, "eval_steps_per_second": 14.649, "step": 5040 }, { "epoch": 2.5009308675685737, "grad_norm": 0.07096380398662457, "learning_rate": 3.0892920269691284e-06, "loss": 0.4433, "step": 5041 }, { "epoch": 2.501427330271813, "grad_norm": 0.0711112506139062, "learning_rate": 3.087485828043296e-06, "loss": 0.4585, "step": 5042 }, { "epoch": 2.501923792975053, "grad_norm": 0.07417792998843728, "learning_rate": 3.085679921414591e-06, "loss": 0.4804, "step": 5043 }, { "epoch": 2.502420255678292, "grad_norm": 0.07232246786213962, "learning_rate": 3.083874307359016e-06, "loss": 0.4652, "step": 5044 }, { "epoch": 2.5029167183815315, "grad_norm": 0.07231894396958254, "learning_rate": 3.0820689861525295e-06, "loss": 0.46, "step": 5045 }, { "epoch": 2.503413181084771, "grad_norm": 0.0741298988460473, "learning_rate": 3.0802639580710465e-06, "loss": 0.4631, "step": 5046 }, { "epoch": 2.5039096437880106, "grad_norm": 0.07126734306557868, "learning_rate": 3.0784592233904363e-06, "loss": 0.458, "step": 5047 }, { "epoch": 2.50440610649125, "grad_norm": 0.07541124816238452, "learning_rate": 3.0766547823865255e-06, "loss": 0.4984, "step": 5048 }, { "epoch": 2.5049025691944893, "grad_norm": 0.07292424807759758, "learning_rate": 3.0748506353350928e-06, "loss": 0.4524, "step": 5049 }, { "epoch": 2.5053990318977286, "grad_norm": 0.07190778953502837, "learning_rate": 3.0730467825118727e-06, "loss": 0.4614, "step": 5050 }, { "epoch": 2.505895494600968, "grad_norm": 0.07507039186827946, "learning_rate": 3.0712432241925547e-06, "loss": 0.4662, "step": 5051 }, { "epoch": 2.5063919573042073, "grad_norm": 0.07226146094163524, "learning_rate": 3.0694399606527853e-06, "loss": 0.4409, "step": 5052 }, { "epoch": 2.506888420007447, "grad_norm": 0.074729432072801, "learning_rate": 3.067636992168165e-06, "loss": 0.4838, "step": 5053 }, { "epoch": 2.5073848827106864, "grad_norm": 0.0733553965599642, "learning_rate": 3.0658343190142454e-06, "loss": 0.453, "step": 5054 }, { "epoch": 2.5078813454139257, "grad_norm": 0.06986856930273888, "learning_rate": 3.064031941466539e-06, "loss": 0.405, "step": 5055 }, { "epoch": 2.508377808117165, "grad_norm": 0.0714472505987162, "learning_rate": 3.0622298598005085e-06, "loss": 0.4381, "step": 5056 }, { "epoch": 2.508874270820405, "grad_norm": 0.07511575062773596, "learning_rate": 3.060428074291575e-06, "loss": 0.4821, "step": 5057 }, { "epoch": 2.509370733523644, "grad_norm": 0.07157582915674207, "learning_rate": 3.058626585215112e-06, "loss": 0.4321, "step": 5058 }, { "epoch": 2.5098671962268835, "grad_norm": 0.07366666603107923, "learning_rate": 3.056825392846449e-06, "loss": 0.4608, "step": 5059 }, { "epoch": 2.510363658930123, "grad_norm": 0.07254565589833509, "learning_rate": 3.0550244974608675e-06, "loss": 0.473, "step": 5060 }, { "epoch": 2.510860121633362, "grad_norm": 0.07520433230268554, "learning_rate": 3.053223899333605e-06, "loss": 0.493, "step": 5061 }, { "epoch": 2.5113565843366015, "grad_norm": 0.0724140826027767, "learning_rate": 3.0514235987398553e-06, "loss": 0.4469, "step": 5062 }, { "epoch": 2.5118530470398412, "grad_norm": 0.07411108457650745, "learning_rate": 3.049623595954766e-06, "loss": 0.4481, "step": 5063 }, { "epoch": 2.5123495097430806, "grad_norm": 0.07471077841339425, "learning_rate": 3.047823891253438e-06, "loss": 0.5006, "step": 5064 }, { "epoch": 2.51284597244632, "grad_norm": 0.07343300102872428, "learning_rate": 3.046024484910929e-06, "loss": 0.4542, "step": 5065 }, { "epoch": 2.5133424351495592, "grad_norm": 0.0731642469946916, "learning_rate": 3.0442253772022457e-06, "loss": 0.4634, "step": 5066 }, { "epoch": 2.513838897852799, "grad_norm": 0.07152879351921768, "learning_rate": 3.0424265684023556e-06, "loss": 0.4507, "step": 5067 }, { "epoch": 2.5143353605560383, "grad_norm": 0.07093497332741829, "learning_rate": 3.0406280587861775e-06, "loss": 0.453, "step": 5068 }, { "epoch": 2.5148318232592777, "grad_norm": 0.07409077210272415, "learning_rate": 3.038829848628584e-06, "loss": 0.4536, "step": 5069 }, { "epoch": 2.515328285962517, "grad_norm": 0.07212986037709085, "learning_rate": 3.0370319382044046e-06, "loss": 0.474, "step": 5070 }, { "epoch": 2.5158247486657563, "grad_norm": 0.07102190391920636, "learning_rate": 3.035234327788418e-06, "loss": 0.4156, "step": 5071 }, { "epoch": 2.5163212113689957, "grad_norm": 0.0707281924452623, "learning_rate": 3.033437017655363e-06, "loss": 0.433, "step": 5072 }, { "epoch": 2.5168176740722354, "grad_norm": 0.0693389163609712, "learning_rate": 3.031640008079927e-06, "loss": 0.4283, "step": 5073 }, { "epoch": 2.5173141367754748, "grad_norm": 0.07143182707566005, "learning_rate": 3.0298432993367577e-06, "loss": 0.458, "step": 5074 }, { "epoch": 2.517810599478714, "grad_norm": 0.07233793461023176, "learning_rate": 3.02804689170045e-06, "loss": 0.4539, "step": 5075 }, { "epoch": 2.5183070621819534, "grad_norm": 0.07295793425095794, "learning_rate": 3.026250785445558e-06, "loss": 0.4411, "step": 5076 }, { "epoch": 2.518803524885193, "grad_norm": 0.0727848842428524, "learning_rate": 3.024454980846585e-06, "loss": 0.465, "step": 5077 }, { "epoch": 2.5192999875884325, "grad_norm": 0.07399648656532462, "learning_rate": 3.0226594781779926e-06, "loss": 0.4643, "step": 5078 }, { "epoch": 2.519796450291672, "grad_norm": 0.07215847850860996, "learning_rate": 3.0208642777141954e-06, "loss": 0.4373, "step": 5079 }, { "epoch": 2.520292912994911, "grad_norm": 0.07261890968416367, "learning_rate": 3.01906937972956e-06, "loss": 0.4551, "step": 5080 }, { "epoch": 2.5207893756981505, "grad_norm": 0.06909668282131737, "learning_rate": 3.0172747844984098e-06, "loss": 0.4226, "step": 5081 }, { "epoch": 2.52128583840139, "grad_norm": 0.0706356833724616, "learning_rate": 3.0154804922950166e-06, "loss": 0.4314, "step": 5082 }, { "epoch": 2.5217823011046296, "grad_norm": 0.07115984795525102, "learning_rate": 3.01368650339361e-06, "loss": 0.4088, "step": 5083 }, { "epoch": 2.522278763807869, "grad_norm": 0.07209812169556237, "learning_rate": 3.011892818068374e-06, "loss": 0.4479, "step": 5084 }, { "epoch": 2.5227752265111083, "grad_norm": 0.07197949076439182, "learning_rate": 3.0100994365934443e-06, "loss": 0.4529, "step": 5085 }, { "epoch": 2.5232716892143476, "grad_norm": 0.07294575988417028, "learning_rate": 3.0083063592429108e-06, "loss": 0.456, "step": 5086 }, { "epoch": 2.5237681519175874, "grad_norm": 0.07085387809339508, "learning_rate": 3.0065135862908147e-06, "loss": 0.4451, "step": 5087 }, { "epoch": 2.5242646146208267, "grad_norm": 0.0737491970059168, "learning_rate": 3.0047211180111537e-06, "loss": 0.448, "step": 5088 }, { "epoch": 2.524761077324066, "grad_norm": 0.07316982337451786, "learning_rate": 3.0029289546778782e-06, "loss": 0.4675, "step": 5089 }, { "epoch": 2.5252575400273054, "grad_norm": 0.07173406586479004, "learning_rate": 3.0011370965648925e-06, "loss": 0.4927, "step": 5090 }, { "epoch": 2.5257540027305447, "grad_norm": 0.06955527651479629, "learning_rate": 2.999345543946052e-06, "loss": 0.4526, "step": 5091 }, { "epoch": 2.526250465433784, "grad_norm": 0.06966454912794548, "learning_rate": 2.997554297095167e-06, "loss": 0.4378, "step": 5092 }, { "epoch": 2.526746928137024, "grad_norm": 0.0720482880545221, "learning_rate": 2.995763356286e-06, "loss": 0.4619, "step": 5093 }, { "epoch": 2.527243390840263, "grad_norm": 0.07265439796787254, "learning_rate": 2.9939727217922685e-06, "loss": 0.4676, "step": 5094 }, { "epoch": 2.5277398535435025, "grad_norm": 0.07214286064337516, "learning_rate": 2.9921823938876426e-06, "loss": 0.4884, "step": 5095 }, { "epoch": 2.528236316246742, "grad_norm": 0.07252007490926463, "learning_rate": 2.990392372845744e-06, "loss": 0.4385, "step": 5096 }, { "epoch": 2.5287327789499816, "grad_norm": 0.07497006099995851, "learning_rate": 2.9886026589401517e-06, "loss": 0.4551, "step": 5097 }, { "epoch": 2.529229241653221, "grad_norm": 0.074559241799246, "learning_rate": 2.986813252444391e-06, "loss": 0.4865, "step": 5098 }, { "epoch": 2.5297257043564603, "grad_norm": 0.07273738797586408, "learning_rate": 2.985024153631946e-06, "loss": 0.4199, "step": 5099 }, { "epoch": 2.5302221670596996, "grad_norm": 0.07055498664081801, "learning_rate": 2.9832353627762513e-06, "loss": 0.4251, "step": 5100 }, { "epoch": 2.530718629762939, "grad_norm": 0.07257439997911404, "learning_rate": 2.9814468801506945e-06, "loss": 0.4616, "step": 5101 }, { "epoch": 2.5312150924661783, "grad_norm": 0.07045107768696802, "learning_rate": 2.979658706028619e-06, "loss": 0.4351, "step": 5102 }, { "epoch": 2.531711555169418, "grad_norm": 0.06993957099507159, "learning_rate": 2.977870840683315e-06, "loss": 0.3987, "step": 5103 }, { "epoch": 2.5322080178726574, "grad_norm": 0.07233526039800486, "learning_rate": 2.976083284388031e-06, "loss": 0.4892, "step": 5104 }, { "epoch": 2.5327044805758967, "grad_norm": 0.07852328921692033, "learning_rate": 2.9742960374159656e-06, "loss": 0.4635, "step": 5105 }, { "epoch": 2.533200943279136, "grad_norm": 0.0724494414963727, "learning_rate": 2.9725091000402716e-06, "loss": 0.4802, "step": 5106 }, { "epoch": 2.533697405982376, "grad_norm": 0.0686244582602942, "learning_rate": 2.9707224725340543e-06, "loss": 0.4251, "step": 5107 }, { "epoch": 2.534193868685615, "grad_norm": 0.06821451041156537, "learning_rate": 2.9689361551703693e-06, "loss": 0.4142, "step": 5108 }, { "epoch": 2.5346903313888545, "grad_norm": 0.07016181940774685, "learning_rate": 2.9671501482222277e-06, "loss": 0.4622, "step": 5109 }, { "epoch": 2.535186794092094, "grad_norm": 0.07390854121226326, "learning_rate": 2.9653644519625915e-06, "loss": 0.4571, "step": 5110 }, { "epoch": 2.535683256795333, "grad_norm": 0.07218976475871981, "learning_rate": 2.963579066664375e-06, "loss": 0.4771, "step": 5111 }, { "epoch": 2.5361797194985725, "grad_norm": 0.07368470243297218, "learning_rate": 2.961793992600447e-06, "loss": 0.459, "step": 5112 }, { "epoch": 2.5366761822018122, "grad_norm": 0.07320081493889653, "learning_rate": 2.960009230043628e-06, "loss": 0.4496, "step": 5113 }, { "epoch": 2.5371726449050516, "grad_norm": 0.07304793240187735, "learning_rate": 2.9582247792666876e-06, "loss": 0.4598, "step": 5114 }, { "epoch": 2.537669107608291, "grad_norm": 0.07328829880376828, "learning_rate": 2.956440640542353e-06, "loss": 0.4597, "step": 5115 }, { "epoch": 2.5381655703115302, "grad_norm": 0.07674094919345029, "learning_rate": 2.9546568141433007e-06, "loss": 0.4669, "step": 5116 }, { "epoch": 2.53866203301477, "grad_norm": 0.07259058089747297, "learning_rate": 2.9528733003421597e-06, "loss": 0.4718, "step": 5117 }, { "epoch": 2.5391584957180093, "grad_norm": 0.07046795106722405, "learning_rate": 2.9510900994115125e-06, "loss": 0.4421, "step": 5118 }, { "epoch": 2.5396549584212487, "grad_norm": 0.07287748922294839, "learning_rate": 2.949307211623891e-06, "loss": 0.5054, "step": 5119 }, { "epoch": 2.540151421124488, "grad_norm": 0.07125903898864844, "learning_rate": 2.947524637251782e-06, "loss": 0.4689, "step": 5120 }, { "epoch": 2.5406478838277273, "grad_norm": 0.07329623501191143, "learning_rate": 2.945742376567623e-06, "loss": 0.4722, "step": 5121 }, { "epoch": 2.5411443465309667, "grad_norm": 0.07112535061046521, "learning_rate": 2.943960429843804e-06, "loss": 0.417, "step": 5122 }, { "epoch": 2.5416408092342064, "grad_norm": 0.07322676770404164, "learning_rate": 2.9421787973526694e-06, "loss": 0.4622, "step": 5123 }, { "epoch": 2.5421372719374458, "grad_norm": 0.07091383942713003, "learning_rate": 2.940397479366509e-06, "loss": 0.4664, "step": 5124 }, { "epoch": 2.542633734640685, "grad_norm": 0.07158178260639342, "learning_rate": 2.9386164761575716e-06, "loss": 0.4424, "step": 5125 }, { "epoch": 2.5431301973439244, "grad_norm": 0.07217103078505158, "learning_rate": 2.936835787998053e-06, "loss": 0.4537, "step": 5126 }, { "epoch": 2.543626660047164, "grad_norm": 0.07171812937440548, "learning_rate": 2.935055415160104e-06, "loss": 0.4806, "step": 5127 }, { "epoch": 2.5441231227504035, "grad_norm": 0.07193834390014926, "learning_rate": 2.933275357915826e-06, "loss": 0.4415, "step": 5128 }, { "epoch": 2.544619585453643, "grad_norm": 0.07409938519274292, "learning_rate": 2.9314956165372726e-06, "loss": 0.487, "step": 5129 }, { "epoch": 2.545116048156882, "grad_norm": 0.07535215883909349, "learning_rate": 2.9297161912964476e-06, "loss": 0.4376, "step": 5130 }, { "epoch": 2.5456125108601215, "grad_norm": 0.0740990586903259, "learning_rate": 2.9279370824653087e-06, "loss": 0.4427, "step": 5131 }, { "epoch": 2.546108973563361, "grad_norm": 0.07393544908024881, "learning_rate": 2.926158290315764e-06, "loss": 0.4564, "step": 5132 }, { "epoch": 2.5466054362666006, "grad_norm": 0.07023920417474953, "learning_rate": 2.924379815119672e-06, "loss": 0.4595, "step": 5133 }, { "epoch": 2.54710189896984, "grad_norm": 0.07061916856504286, "learning_rate": 2.9226016571488467e-06, "loss": 0.4635, "step": 5134 }, { "epoch": 2.5475983616730793, "grad_norm": 0.07356084383044202, "learning_rate": 2.9208238166750485e-06, "loss": 0.4317, "step": 5135 }, { "epoch": 2.5480948243763186, "grad_norm": 0.07020373559944719, "learning_rate": 2.9190462939699925e-06, "loss": 0.4249, "step": 5136 }, { "epoch": 2.5485912870795584, "grad_norm": 0.07407304637604707, "learning_rate": 2.917269089305347e-06, "loss": 0.4646, "step": 5137 }, { "epoch": 2.5490877497827977, "grad_norm": 0.07443631533106954, "learning_rate": 2.915492202952724e-06, "loss": 0.4656, "step": 5138 }, { "epoch": 2.549584212486037, "grad_norm": 0.07372474395212635, "learning_rate": 2.9137156351837005e-06, "loss": 0.4526, "step": 5139 }, { "epoch": 2.5500806751892764, "grad_norm": 0.07683958008697943, "learning_rate": 2.911939386269786e-06, "loss": 0.4832, "step": 5140 }, { "epoch": 2.5505771378925157, "grad_norm": 0.07350654884183572, "learning_rate": 2.9101634564824586e-06, "loss": 0.5004, "step": 5141 }, { "epoch": 2.551073600595755, "grad_norm": 0.07041706999389266, "learning_rate": 2.908387846093138e-06, "loss": 0.4626, "step": 5142 }, { "epoch": 2.551570063298995, "grad_norm": 0.07167233151165785, "learning_rate": 2.9066125553732003e-06, "loss": 0.4389, "step": 5143 }, { "epoch": 2.552066526002234, "grad_norm": 0.07262148199601952, "learning_rate": 2.904837584593968e-06, "loss": 0.4583, "step": 5144 }, { "epoch": 2.5525629887054735, "grad_norm": 0.07309536918324455, "learning_rate": 2.9030629340267165e-06, "loss": 0.4663, "step": 5145 }, { "epoch": 2.553059451408713, "grad_norm": 0.0719409786309271, "learning_rate": 2.9012886039426747e-06, "loss": 0.4438, "step": 5146 }, { "epoch": 2.5535559141119526, "grad_norm": 0.07427692712025832, "learning_rate": 2.8995145946130182e-06, "loss": 0.4592, "step": 5147 }, { "epoch": 2.554052376815192, "grad_norm": 0.07254799565282642, "learning_rate": 2.897740906308879e-06, "loss": 0.4138, "step": 5148 }, { "epoch": 2.5545488395184313, "grad_norm": 0.07232628351087324, "learning_rate": 2.8959675393013353e-06, "loss": 0.4817, "step": 5149 }, { "epoch": 2.5550453022216706, "grad_norm": 0.07168098229006624, "learning_rate": 2.894194493861415e-06, "loss": 0.4674, "step": 5150 }, { "epoch": 2.55554176492491, "grad_norm": 0.07361419756507165, "learning_rate": 2.8924217702601048e-06, "loss": 0.4692, "step": 5151 }, { "epoch": 2.5560382276281493, "grad_norm": 0.07536609973844947, "learning_rate": 2.8906493687683324e-06, "loss": 0.4618, "step": 5152 }, { "epoch": 2.5565346903313886, "grad_norm": 0.07145853364222705, "learning_rate": 2.888877289656985e-06, "loss": 0.424, "step": 5153 }, { "epoch": 2.5570311530346284, "grad_norm": 0.07113786320279357, "learning_rate": 2.887105533196895e-06, "loss": 0.4493, "step": 5154 }, { "epoch": 2.5575276157378677, "grad_norm": 0.07575964297661865, "learning_rate": 2.885334099658844e-06, "loss": 0.4769, "step": 5155 }, { "epoch": 2.558024078441107, "grad_norm": 0.07264729662949794, "learning_rate": 2.8835629893135747e-06, "loss": 0.4821, "step": 5156 }, { "epoch": 2.558520541144347, "grad_norm": 0.07052412346155278, "learning_rate": 2.8817922024317636e-06, "loss": 0.4328, "step": 5157 }, { "epoch": 2.559017003847586, "grad_norm": 0.07054010099118446, "learning_rate": 2.880021739284053e-06, "loss": 0.4454, "step": 5158 }, { "epoch": 2.5595134665508255, "grad_norm": 0.07035637400898442, "learning_rate": 2.8782516001410287e-06, "loss": 0.4498, "step": 5159 }, { "epoch": 2.560009929254065, "grad_norm": 0.07225924124383622, "learning_rate": 2.876481785273225e-06, "loss": 0.4541, "step": 5160 }, { "epoch": 2.560506391957304, "grad_norm": 0.0699158749891402, "learning_rate": 2.874712294951135e-06, "loss": 0.426, "step": 5161 }, { "epoch": 2.5610028546605434, "grad_norm": 0.07178418597986615, "learning_rate": 2.8729431294451926e-06, "loss": 0.4484, "step": 5162 }, { "epoch": 2.561499317363783, "grad_norm": 0.07431620471359664, "learning_rate": 2.87117428902579e-06, "loss": 0.4625, "step": 5163 }, { "epoch": 2.5619957800670226, "grad_norm": 0.07142951600063215, "learning_rate": 2.869405773963264e-06, "loss": 0.4613, "step": 5164 }, { "epoch": 2.562492242770262, "grad_norm": 0.07148690845215873, "learning_rate": 2.8676375845279013e-06, "loss": 0.4236, "step": 5165 }, { "epoch": 2.562988705473501, "grad_norm": 0.0706622462248769, "learning_rate": 2.8658697209899467e-06, "loss": 0.4556, "step": 5166 }, { "epoch": 2.563485168176741, "grad_norm": 0.07418442942380814, "learning_rate": 2.8641021836195853e-06, "loss": 0.4574, "step": 5167 }, { "epoch": 2.5639816308799803, "grad_norm": 0.07327595477365036, "learning_rate": 2.8623349726869606e-06, "loss": 0.4472, "step": 5168 }, { "epoch": 2.5644780935832197, "grad_norm": 0.07382844367057993, "learning_rate": 2.86056808846216e-06, "loss": 0.4572, "step": 5169 }, { "epoch": 2.564974556286459, "grad_norm": 0.07016839471992978, "learning_rate": 2.8588015312152215e-06, "loss": 0.4435, "step": 5170 }, { "epoch": 2.5654710189896983, "grad_norm": 0.07083415871411265, "learning_rate": 2.85703530121614e-06, "loss": 0.4339, "step": 5171 }, { "epoch": 2.5659674816929376, "grad_norm": 0.0714983644379608, "learning_rate": 2.8552693987348533e-06, "loss": 0.4402, "step": 5172 }, { "epoch": 2.566463944396177, "grad_norm": 0.07685230780477147, "learning_rate": 2.8535038240412503e-06, "loss": 0.4853, "step": 5173 }, { "epoch": 2.5669604070994168, "grad_norm": 0.07383361729449742, "learning_rate": 2.85173857740517e-06, "loss": 0.4739, "step": 5174 }, { "epoch": 2.567456869802656, "grad_norm": 0.07358388092931081, "learning_rate": 2.8499736590964043e-06, "loss": 0.4462, "step": 5175 }, { "epoch": 2.5679533325058954, "grad_norm": 0.07380900887304771, "learning_rate": 2.8482090693846926e-06, "loss": 0.492, "step": 5176 }, { "epoch": 2.568449795209135, "grad_norm": 0.07617068375564527, "learning_rate": 2.8464448085397212e-06, "loss": 0.449, "step": 5177 }, { "epoch": 2.5689462579123745, "grad_norm": 0.07348344159954795, "learning_rate": 2.844680876831133e-06, "loss": 0.4512, "step": 5178 }, { "epoch": 2.569442720615614, "grad_norm": 0.07192285134930039, "learning_rate": 2.8429172745285127e-06, "loss": 0.4458, "step": 5179 }, { "epoch": 2.569939183318853, "grad_norm": 0.0749431146540679, "learning_rate": 2.8411540019014026e-06, "loss": 0.5048, "step": 5180 }, { "epoch": 2.5704356460220925, "grad_norm": 0.07407244688469924, "learning_rate": 2.8393910592192898e-06, "loss": 0.4891, "step": 5181 }, { "epoch": 2.570932108725332, "grad_norm": 0.07213297514571944, "learning_rate": 2.837628446751608e-06, "loss": 0.4697, "step": 5182 }, { "epoch": 2.571428571428571, "grad_norm": 0.0699179683327418, "learning_rate": 2.8358661647677497e-06, "loss": 0.4355, "step": 5183 }, { "epoch": 2.571925034131811, "grad_norm": 0.07765505093200081, "learning_rate": 2.834104213537047e-06, "loss": 0.4939, "step": 5184 }, { "epoch": 2.5724214968350503, "grad_norm": 0.08172733691964389, "learning_rate": 2.8323425933287883e-06, "loss": 0.4986, "step": 5185 }, { "epoch": 2.5729179595382896, "grad_norm": 0.07907186060467485, "learning_rate": 2.83058130441221e-06, "loss": 0.4528, "step": 5186 }, { "epoch": 2.573414422241529, "grad_norm": 0.07227752346027236, "learning_rate": 2.828820347056493e-06, "loss": 0.4412, "step": 5187 }, { "epoch": 2.5739108849447687, "grad_norm": 0.07282996319859109, "learning_rate": 2.827059721530777e-06, "loss": 0.4652, "step": 5188 }, { "epoch": 2.574407347648008, "grad_norm": 0.07180952079077647, "learning_rate": 2.8252994281041392e-06, "loss": 0.4461, "step": 5189 }, { "epoch": 2.5749038103512474, "grad_norm": 0.07342289227262047, "learning_rate": 2.8235394670456164e-06, "loss": 0.4255, "step": 5190 }, { "epoch": 2.5754002730544867, "grad_norm": 0.07426475934548302, "learning_rate": 2.821779838624188e-06, "loss": 0.4461, "step": 5191 }, { "epoch": 2.575896735757726, "grad_norm": 0.0724320917076261, "learning_rate": 2.8200205431087868e-06, "loss": 0.4669, "step": 5192 }, { "epoch": 2.5763931984609654, "grad_norm": 0.07369682353577538, "learning_rate": 2.8182615807682933e-06, "loss": 0.4788, "step": 5193 }, { "epoch": 2.576889661164205, "grad_norm": 0.07439988616448766, "learning_rate": 2.8165029518715337e-06, "loss": 0.4428, "step": 5194 }, { "epoch": 2.5773861238674445, "grad_norm": 0.07255177311200152, "learning_rate": 2.8147446566872894e-06, "loss": 0.4723, "step": 5195 }, { "epoch": 2.577882586570684, "grad_norm": 0.07390677701238471, "learning_rate": 2.812986695484287e-06, "loss": 0.4552, "step": 5196 }, { "epoch": 2.578379049273923, "grad_norm": 0.07207378742571481, "learning_rate": 2.8112290685312005e-06, "loss": 0.4666, "step": 5197 }, { "epoch": 2.578875511977163, "grad_norm": 0.07190111822941408, "learning_rate": 2.8094717760966584e-06, "loss": 0.427, "step": 5198 }, { "epoch": 2.5793719746804022, "grad_norm": 0.07009977758785436, "learning_rate": 2.80771481844923e-06, "loss": 0.4448, "step": 5199 }, { "epoch": 2.5798684373836416, "grad_norm": 0.0723752660969619, "learning_rate": 2.8059581958574434e-06, "loss": 0.4441, "step": 5200 }, { "epoch": 2.580364900086881, "grad_norm": 0.0751303317079359, "learning_rate": 2.804201908589768e-06, "loss": 0.4811, "step": 5201 }, { "epoch": 2.5808613627901202, "grad_norm": 0.07485829073586375, "learning_rate": 2.8024459569146223e-06, "loss": 0.4675, "step": 5202 }, { "epoch": 2.5813578254933596, "grad_norm": 0.07364181111902501, "learning_rate": 2.800690341100378e-06, "loss": 0.4505, "step": 5203 }, { "epoch": 2.5818542881965993, "grad_norm": 0.07357147393693832, "learning_rate": 2.7989350614153532e-06, "loss": 0.4468, "step": 5204 }, { "epoch": 2.5823507508998387, "grad_norm": 0.07244406818148143, "learning_rate": 2.7971801181278115e-06, "loss": 0.4375, "step": 5205 }, { "epoch": 2.582847213603078, "grad_norm": 0.07097629774725632, "learning_rate": 2.795425511505968e-06, "loss": 0.4429, "step": 5206 }, { "epoch": 2.5833436763063173, "grad_norm": 0.07113223350037606, "learning_rate": 2.793671241817989e-06, "loss": 0.452, "step": 5207 }, { "epoch": 2.583840139009557, "grad_norm": 0.06911189314520064, "learning_rate": 2.791917309331985e-06, "loss": 0.4245, "step": 5208 }, { "epoch": 2.5843366017127964, "grad_norm": 0.07235990203667617, "learning_rate": 2.7901637143160143e-06, "loss": 0.4572, "step": 5209 }, { "epoch": 2.5848330644160358, "grad_norm": 0.07286253379182837, "learning_rate": 2.7884104570380906e-06, "loss": 0.4801, "step": 5210 }, { "epoch": 2.585329527119275, "grad_norm": 0.0729800350875364, "learning_rate": 2.7866575377661654e-06, "loss": 0.4913, "step": 5211 }, { "epoch": 2.5858259898225144, "grad_norm": 0.0706958205684523, "learning_rate": 2.7849049567681496e-06, "loss": 0.4358, "step": 5212 }, { "epoch": 2.5863224525257538, "grad_norm": 0.07357005050554628, "learning_rate": 2.7831527143118954e-06, "loss": 0.4699, "step": 5213 }, { "epoch": 2.5868189152289935, "grad_norm": 0.07152756428761167, "learning_rate": 2.781400810665201e-06, "loss": 0.4756, "step": 5214 }, { "epoch": 2.587315377932233, "grad_norm": 0.07534545721814909, "learning_rate": 2.7796492460958237e-06, "loss": 0.4545, "step": 5215 }, { "epoch": 2.587811840635472, "grad_norm": 0.0717316509368772, "learning_rate": 2.7778980208714556e-06, "loss": 0.4268, "step": 5216 }, { "epoch": 2.5883083033387115, "grad_norm": 0.07247442996879212, "learning_rate": 2.7761471352597486e-06, "loss": 0.4506, "step": 5217 }, { "epoch": 2.5888047660419513, "grad_norm": 0.07364335888385559, "learning_rate": 2.7743965895282956e-06, "loss": 0.456, "step": 5218 }, { "epoch": 2.5893012287451906, "grad_norm": 0.07489070055107547, "learning_rate": 2.772646383944636e-06, "loss": 0.4884, "step": 5219 }, { "epoch": 2.58979769144843, "grad_norm": 0.07236646941891323, "learning_rate": 2.7708965187762683e-06, "loss": 0.4646, "step": 5220 }, { "epoch": 2.5902941541516693, "grad_norm": 0.0726014680663615, "learning_rate": 2.769146994290623e-06, "loss": 0.4401, "step": 5221 }, { "epoch": 2.5907906168549086, "grad_norm": 0.07146054387083108, "learning_rate": 2.7673978107550925e-06, "loss": 0.49, "step": 5222 }, { "epoch": 2.591287079558148, "grad_norm": 0.07183695866470052, "learning_rate": 2.7656489684370068e-06, "loss": 0.465, "step": 5223 }, { "epoch": 2.5917835422613877, "grad_norm": 0.07319204412500135, "learning_rate": 2.763900467603654e-06, "loss": 0.4441, "step": 5224 }, { "epoch": 2.592280004964627, "grad_norm": 0.07224797748649779, "learning_rate": 2.7621523085222612e-06, "loss": 0.4376, "step": 5225 }, { "epoch": 2.5927764676678664, "grad_norm": 0.07293475246622837, "learning_rate": 2.760404491460006e-06, "loss": 0.4719, "step": 5226 }, { "epoch": 2.5932729303711057, "grad_norm": 0.07161942837682458, "learning_rate": 2.7586570166840154e-06, "loss": 0.4566, "step": 5227 }, { "epoch": 2.5937693930743455, "grad_norm": 0.07225675300278564, "learning_rate": 2.7569098844613616e-06, "loss": 0.4366, "step": 5228 }, { "epoch": 2.594265855777585, "grad_norm": 0.06933097869876324, "learning_rate": 2.7551630950590686e-06, "loss": 0.44, "step": 5229 }, { "epoch": 2.594762318480824, "grad_norm": 0.07459117930805313, "learning_rate": 2.753416648744103e-06, "loss": 0.4643, "step": 5230 }, { "epoch": 2.5952587811840635, "grad_norm": 0.07090794968370165, "learning_rate": 2.75167054578338e-06, "loss": 0.4572, "step": 5231 }, { "epoch": 2.595755243887303, "grad_norm": 0.06958101880273615, "learning_rate": 2.749924786443766e-06, "loss": 0.4524, "step": 5232 }, { "epoch": 2.596251706590542, "grad_norm": 0.0709070300995474, "learning_rate": 2.7481793709920722e-06, "loss": 0.4306, "step": 5233 }, { "epoch": 2.596748169293782, "grad_norm": 0.07236911324643573, "learning_rate": 2.7464342996950537e-06, "loss": 0.452, "step": 5234 }, { "epoch": 2.5972446319970213, "grad_norm": 0.07405711913344337, "learning_rate": 2.744689572819421e-06, "loss": 0.4733, "step": 5235 }, { "epoch": 2.5977410947002606, "grad_norm": 0.07253420026733598, "learning_rate": 2.742945190631827e-06, "loss": 0.4638, "step": 5236 }, { "epoch": 2.5982375574035, "grad_norm": 0.07081417321177245, "learning_rate": 2.7412011533988707e-06, "loss": 0.4249, "step": 5237 }, { "epoch": 2.5987340201067397, "grad_norm": 0.07403116256998661, "learning_rate": 2.7394574613870995e-06, "loss": 0.4637, "step": 5238 }, { "epoch": 2.599230482809979, "grad_norm": 0.07481669438242297, "learning_rate": 2.7377141148630116e-06, "loss": 0.4659, "step": 5239 }, { "epoch": 2.5997269455132184, "grad_norm": 0.07377666582825643, "learning_rate": 2.735971114093049e-06, "loss": 0.4768, "step": 5240 }, { "epoch": 2.6002234082164577, "grad_norm": 0.06995115542811843, "learning_rate": 2.734228459343598e-06, "loss": 0.4562, "step": 5241 }, { "epoch": 2.600719870919697, "grad_norm": 0.0710589217991799, "learning_rate": 2.7324861508810007e-06, "loss": 0.4635, "step": 5242 }, { "epoch": 2.6012163336229364, "grad_norm": 0.07548066876254467, "learning_rate": 2.730744188971536e-06, "loss": 0.4843, "step": 5243 }, { "epoch": 2.601712796326176, "grad_norm": 0.07286147196013953, "learning_rate": 2.72900257388144e-06, "loss": 0.4366, "step": 5244 }, { "epoch": 2.6022092590294155, "grad_norm": 0.07419676476869297, "learning_rate": 2.7272613058768865e-06, "loss": 0.4753, "step": 5245 }, { "epoch": 2.602705721732655, "grad_norm": 0.07254724545442845, "learning_rate": 2.725520385224001e-06, "loss": 0.436, "step": 5246 }, { "epoch": 2.603202184435894, "grad_norm": 0.07388891093945382, "learning_rate": 2.723779812188857e-06, "loss": 0.4783, "step": 5247 }, { "epoch": 2.603698647139134, "grad_norm": 0.07329201727578473, "learning_rate": 2.7220395870374715e-06, "loss": 0.4403, "step": 5248 }, { "epoch": 2.6041951098423732, "grad_norm": 0.07432559254448866, "learning_rate": 2.7202997100358117e-06, "loss": 0.482, "step": 5249 }, { "epoch": 2.6046915725456126, "grad_norm": 0.07208369927428669, "learning_rate": 2.7185601814497897e-06, "loss": 0.4623, "step": 5250 }, { "epoch": 2.605188035248852, "grad_norm": 0.07212394690380779, "learning_rate": 2.7168210015452625e-06, "loss": 0.4224, "step": 5251 }, { "epoch": 2.6056844979520912, "grad_norm": 0.07141617635703298, "learning_rate": 2.7150821705880403e-06, "loss": 0.4435, "step": 5252 }, { "epoch": 2.6061809606553306, "grad_norm": 0.07306470846553159, "learning_rate": 2.7133436888438684e-06, "loss": 0.4748, "step": 5253 }, { "epoch": 2.6066774233585703, "grad_norm": 0.07049611258940773, "learning_rate": 2.711605556578452e-06, "loss": 0.4371, "step": 5254 }, { "epoch": 2.6071738860618097, "grad_norm": 0.07231778349554253, "learning_rate": 2.709867774057433e-06, "loss": 0.4408, "step": 5255 }, { "epoch": 2.607670348765049, "grad_norm": 0.07306994518093786, "learning_rate": 2.708130341546407e-06, "loss": 0.4745, "step": 5256 }, { "epoch": 2.6081668114682883, "grad_norm": 0.07139666450342631, "learning_rate": 2.706393259310911e-06, "loss": 0.4202, "step": 5257 }, { "epoch": 2.608663274171528, "grad_norm": 0.07431580209413276, "learning_rate": 2.7046565276164283e-06, "loss": 0.4874, "step": 5258 }, { "epoch": 2.6091597368747674, "grad_norm": 0.07024347861273901, "learning_rate": 2.7029201467283937e-06, "loss": 0.4098, "step": 5259 }, { "epoch": 2.6096561995780068, "grad_norm": 0.07203456244316435, "learning_rate": 2.7011841169121825e-06, "loss": 0.4504, "step": 5260 }, { "epoch": 2.610152662281246, "grad_norm": 0.07237420715731252, "learning_rate": 2.699448438433122e-06, "loss": 0.4364, "step": 5261 }, { "epoch": 2.6106491249844854, "grad_norm": 0.07079548912149816, "learning_rate": 2.6977131115564814e-06, "loss": 0.4437, "step": 5262 }, { "epoch": 2.6111455876877248, "grad_norm": 0.07229082269715763, "learning_rate": 2.695978136547476e-06, "loss": 0.4594, "step": 5263 }, { "epoch": 2.6116420503909645, "grad_norm": 0.0728641580489102, "learning_rate": 2.694243513671271e-06, "loss": 0.4638, "step": 5264 }, { "epoch": 2.612138513094204, "grad_norm": 0.06905678657408329, "learning_rate": 2.6925092431929734e-06, "loss": 0.4335, "step": 5265 }, { "epoch": 2.612634975797443, "grad_norm": 0.07285435311798948, "learning_rate": 2.690775325377642e-06, "loss": 0.4708, "step": 5266 }, { "epoch": 2.6131314385006825, "grad_norm": 0.07043222760531233, "learning_rate": 2.6890417604902765e-06, "loss": 0.4315, "step": 5267 }, { "epoch": 2.6136279012039223, "grad_norm": 0.0746756557661863, "learning_rate": 2.687308548795825e-06, "loss": 0.4816, "step": 5268 }, { "epoch": 2.6141243639071616, "grad_norm": 0.07081456837335927, "learning_rate": 2.68557569055918e-06, "loss": 0.4362, "step": 5269 }, { "epoch": 2.614620826610401, "grad_norm": 0.07330130227050775, "learning_rate": 2.6838431860451797e-06, "loss": 0.4465, "step": 5270 }, { "epoch": 2.6151172893136403, "grad_norm": 0.0716011816724277, "learning_rate": 2.682111035518614e-06, "loss": 0.4539, "step": 5271 }, { "epoch": 2.6156137520168796, "grad_norm": 0.06965330881909364, "learning_rate": 2.6803792392442123e-06, "loss": 0.4139, "step": 5272 }, { "epoch": 2.616110214720119, "grad_norm": 0.07124158785595132, "learning_rate": 2.6786477974866494e-06, "loss": 0.437, "step": 5273 }, { "epoch": 2.6166066774233587, "grad_norm": 0.0723706784290423, "learning_rate": 2.676916710510552e-06, "loss": 0.4659, "step": 5274 }, { "epoch": 2.617103140126598, "grad_norm": 0.06963126779958274, "learning_rate": 2.675185978580487e-06, "loss": 0.4257, "step": 5275 }, { "epoch": 2.6175996028298374, "grad_norm": 0.07338333374455405, "learning_rate": 2.6734556019609704e-06, "loss": 0.4481, "step": 5276 }, { "epoch": 2.6180960655330767, "grad_norm": 0.07251896116734101, "learning_rate": 2.6717255809164615e-06, "loss": 0.4472, "step": 5277 }, { "epoch": 2.6185925282363165, "grad_norm": 0.07067789143415738, "learning_rate": 2.6699959157113653e-06, "loss": 0.4428, "step": 5278 }, { "epoch": 2.619088990939556, "grad_norm": 0.07525851620413397, "learning_rate": 2.668266606610036e-06, "loss": 0.4604, "step": 5279 }, { "epoch": 2.619585453642795, "grad_norm": 0.07533316106282821, "learning_rate": 2.6665376538767684e-06, "loss": 0.4754, "step": 5280 }, { "epoch": 2.6200819163460345, "grad_norm": 0.07008424279012973, "learning_rate": 2.664809057775807e-06, "loss": 0.4828, "step": 5281 }, { "epoch": 2.620578379049274, "grad_norm": 0.06972061411105103, "learning_rate": 2.66308081857134e-06, "loss": 0.4342, "step": 5282 }, { "epoch": 2.621074841752513, "grad_norm": 0.07186982715753283, "learning_rate": 2.6613529365274974e-06, "loss": 0.4165, "step": 5283 }, { "epoch": 2.621571304455753, "grad_norm": 0.07285331263904996, "learning_rate": 2.659625411908366e-06, "loss": 0.4799, "step": 5284 }, { "epoch": 2.6220677671589923, "grad_norm": 0.07501163105738619, "learning_rate": 2.657898244977961e-06, "loss": 0.4717, "step": 5285 }, { "epoch": 2.6225642298622316, "grad_norm": 0.07160123786520918, "learning_rate": 2.656171436000258e-06, "loss": 0.4443, "step": 5286 }, { "epoch": 2.623060692565471, "grad_norm": 0.06884269309644556, "learning_rate": 2.6544449852391695e-06, "loss": 0.4373, "step": 5287 }, { "epoch": 2.6235571552687107, "grad_norm": 0.07252049270899412, "learning_rate": 2.652718892958558e-06, "loss": 0.4457, "step": 5288 }, { "epoch": 2.62405361797195, "grad_norm": 0.07301113966245568, "learning_rate": 2.650993159422228e-06, "loss": 0.4817, "step": 5289 }, { "epoch": 2.6245500806751894, "grad_norm": 0.07392352396215351, "learning_rate": 2.649267784893929e-06, "loss": 0.4418, "step": 5290 }, { "epoch": 2.6250465433784287, "grad_norm": 0.07452490832212791, "learning_rate": 2.6475427696373598e-06, "loss": 0.4644, "step": 5291 }, { "epoch": 2.625543006081668, "grad_norm": 0.07118282301353918, "learning_rate": 2.6458181139161564e-06, "loss": 0.463, "step": 5292 }, { "epoch": 2.6260394687849073, "grad_norm": 0.07152366375897204, "learning_rate": 2.644093817993911e-06, "loss": 0.4285, "step": 5293 }, { "epoch": 2.6265359314881467, "grad_norm": 0.07181298428188997, "learning_rate": 2.642369882134151e-06, "loss": 0.4416, "step": 5294 }, { "epoch": 2.6270323941913865, "grad_norm": 0.0707996904302843, "learning_rate": 2.6406463066003505e-06, "loss": 0.4639, "step": 5295 }, { "epoch": 2.627528856894626, "grad_norm": 0.07691379853284944, "learning_rate": 2.638923091655935e-06, "loss": 0.4563, "step": 5296 }, { "epoch": 2.628025319597865, "grad_norm": 0.07191388401411938, "learning_rate": 2.6372002375642657e-06, "loss": 0.483, "step": 5297 }, { "epoch": 2.628521782301105, "grad_norm": 0.07283482258711875, "learning_rate": 2.635477744588658e-06, "loss": 0.4473, "step": 5298 }, { "epoch": 2.6290182450043442, "grad_norm": 0.07144510285173247, "learning_rate": 2.6337556129923648e-06, "loss": 0.4561, "step": 5299 }, { "epoch": 2.6295147077075836, "grad_norm": 0.07225771158425691, "learning_rate": 2.6320338430385857e-06, "loss": 0.4326, "step": 5300 }, { "epoch": 2.630011170410823, "grad_norm": 0.07092103282667439, "learning_rate": 2.630312434990466e-06, "loss": 0.4432, "step": 5301 }, { "epoch": 2.630507633114062, "grad_norm": 0.0701907480460393, "learning_rate": 2.628591389111095e-06, "loss": 0.4169, "step": 5302 }, { "epoch": 2.6310040958173015, "grad_norm": 0.07194177966806387, "learning_rate": 2.6268707056635077e-06, "loss": 0.4447, "step": 5303 }, { "epoch": 2.631500558520541, "grad_norm": 0.07281610835524266, "learning_rate": 2.625150384910682e-06, "loss": 0.4744, "step": 5304 }, { "epoch": 2.6319970212237807, "grad_norm": 0.07556119554107671, "learning_rate": 2.6234304271155443e-06, "loss": 0.4868, "step": 5305 }, { "epoch": 2.63249348392702, "grad_norm": 0.07372561099256456, "learning_rate": 2.6217108325409594e-06, "loss": 0.4733, "step": 5306 }, { "epoch": 2.6329899466302593, "grad_norm": 0.07215826218897892, "learning_rate": 2.6199916014497396e-06, "loss": 0.4485, "step": 5307 }, { "epoch": 2.633486409333499, "grad_norm": 0.07287009510752426, "learning_rate": 2.618272734104645e-06, "loss": 0.4489, "step": 5308 }, { "epoch": 2.6339828720367384, "grad_norm": 0.07013210868053496, "learning_rate": 2.6165542307683744e-06, "loss": 0.4987, "step": 5309 }, { "epoch": 2.6344793347399778, "grad_norm": 0.07198909527337301, "learning_rate": 2.614836091703572e-06, "loss": 0.4406, "step": 5310 }, { "epoch": 2.634975797443217, "grad_norm": 0.07119081849050292, "learning_rate": 2.6131183171728323e-06, "loss": 0.4628, "step": 5311 }, { "epoch": 2.6354722601464564, "grad_norm": 0.07010495374139804, "learning_rate": 2.611400907438685e-06, "loss": 0.4388, "step": 5312 }, { "epoch": 2.6359687228496957, "grad_norm": 0.07035383373874375, "learning_rate": 2.6096838627636124e-06, "loss": 0.451, "step": 5313 }, { "epoch": 2.636465185552935, "grad_norm": 0.07252865320160067, "learning_rate": 2.6079671834100354e-06, "loss": 0.4461, "step": 5314 }, { "epoch": 2.636961648256175, "grad_norm": 0.07337124382294172, "learning_rate": 2.60625086964032e-06, "loss": 0.4631, "step": 5315 }, { "epoch": 2.637458110959414, "grad_norm": 0.07423342378313234, "learning_rate": 2.6045349217167815e-06, "loss": 0.4961, "step": 5316 }, { "epoch": 2.6379545736626535, "grad_norm": 0.07250274236591756, "learning_rate": 2.6028193399016677e-06, "loss": 0.4608, "step": 5317 }, { "epoch": 2.6384510363658933, "grad_norm": 0.07249481412053288, "learning_rate": 2.6011041244571844e-06, "loss": 0.4269, "step": 5318 }, { "epoch": 2.6389474990691326, "grad_norm": 0.06832436591372723, "learning_rate": 2.5993892756454702e-06, "loss": 0.4358, "step": 5319 }, { "epoch": 2.639443961772372, "grad_norm": 0.07458496665240169, "learning_rate": 2.597674793728616e-06, "loss": 0.4899, "step": 5320 }, { "epoch": 2.6399404244756113, "grad_norm": 0.07045333283556546, "learning_rate": 2.595960678968652e-06, "loss": 0.4481, "step": 5321 }, { "epoch": 2.6404368871788506, "grad_norm": 0.07350913336828889, "learning_rate": 2.59424693162755e-06, "loss": 0.4553, "step": 5322 }, { "epoch": 2.64093334988209, "grad_norm": 0.07608854710093517, "learning_rate": 2.5925335519672333e-06, "loss": 0.4687, "step": 5323 }, { "epoch": 2.6414298125853293, "grad_norm": 0.06943444544262166, "learning_rate": 2.5908205402495603e-06, "loss": 0.4295, "step": 5324 }, { "epoch": 2.641926275288569, "grad_norm": 0.0723083110709931, "learning_rate": 2.5891078967363416e-06, "loss": 0.4454, "step": 5325 }, { "epoch": 2.6424227379918084, "grad_norm": 0.07429140563455172, "learning_rate": 2.587395621689325e-06, "loss": 0.4728, "step": 5326 }, { "epoch": 2.6429192006950477, "grad_norm": 0.07137582757890498, "learning_rate": 2.585683715370202e-06, "loss": 0.4362, "step": 5327 }, { "epoch": 2.643415663398287, "grad_norm": 0.07321803169563242, "learning_rate": 2.5839721780406146e-06, "loss": 0.4516, "step": 5328 }, { "epoch": 2.643912126101527, "grad_norm": 0.0709602832592665, "learning_rate": 2.5822610099621402e-06, "loss": 0.4278, "step": 5329 }, { "epoch": 2.644408588804766, "grad_norm": 0.07032601586797163, "learning_rate": 2.5805502113963066e-06, "loss": 0.4457, "step": 5330 }, { "epoch": 2.6449050515080055, "grad_norm": 0.07003931206303413, "learning_rate": 2.5788397826045807e-06, "loss": 0.4407, "step": 5331 }, { "epoch": 2.645401514211245, "grad_norm": 0.07218328091870577, "learning_rate": 2.577129723848373e-06, "loss": 0.4537, "step": 5332 }, { "epoch": 2.645897976914484, "grad_norm": 0.0725217289580609, "learning_rate": 2.57542003538904e-06, "loss": 0.4565, "step": 5333 }, { "epoch": 2.6463944396177235, "grad_norm": 0.07169708777564429, "learning_rate": 2.5737107174878773e-06, "loss": 0.4279, "step": 5334 }, { "epoch": 2.6468909023209632, "grad_norm": 0.071467454593286, "learning_rate": 2.5720017704061307e-06, "loss": 0.4613, "step": 5335 }, { "epoch": 2.6473873650242026, "grad_norm": 0.07225220351631556, "learning_rate": 2.5702931944049816e-06, "loss": 0.4536, "step": 5336 }, { "epoch": 2.647883827727442, "grad_norm": 0.07089284434717677, "learning_rate": 2.5685849897455617e-06, "loss": 0.4357, "step": 5337 }, { "epoch": 2.6483802904306812, "grad_norm": 0.0759453690608335, "learning_rate": 2.5668771566889415e-06, "loss": 0.4604, "step": 5338 }, { "epoch": 2.648876753133921, "grad_norm": 0.07174183489715347, "learning_rate": 2.565169695496134e-06, "loss": 0.4383, "step": 5339 }, { "epoch": 2.6493732158371603, "grad_norm": 0.07130619769369462, "learning_rate": 2.563462606428101e-06, "loss": 0.4544, "step": 5340 }, { "epoch": 2.6498696785403997, "grad_norm": 0.07324873611482309, "learning_rate": 2.5617558897457402e-06, "loss": 0.4446, "step": 5341 }, { "epoch": 2.650366141243639, "grad_norm": 0.07448900607353087, "learning_rate": 2.5600495457098984e-06, "loss": 0.453, "step": 5342 }, { "epoch": 2.6508626039468783, "grad_norm": 0.07312292007238767, "learning_rate": 2.5583435745813624e-06, "loss": 0.4763, "step": 5343 }, { "epoch": 2.6513590666501177, "grad_norm": 0.07299329196320681, "learning_rate": 2.5566379766208602e-06, "loss": 0.4385, "step": 5344 }, { "epoch": 2.6518555293533574, "grad_norm": 0.07218798632859258, "learning_rate": 2.5549327520890686e-06, "loss": 0.4301, "step": 5345 }, { "epoch": 2.6523519920565968, "grad_norm": 0.08102474836920227, "learning_rate": 2.5532279012466025e-06, "loss": 0.4467, "step": 5346 }, { "epoch": 2.652848454759836, "grad_norm": 0.07313090190592135, "learning_rate": 2.5515234243540186e-06, "loss": 0.4355, "step": 5347 }, { "epoch": 2.6533449174630754, "grad_norm": 0.07443189436453763, "learning_rate": 2.549819321671825e-06, "loss": 0.4887, "step": 5348 }, { "epoch": 2.653841380166315, "grad_norm": 0.07160561955915772, "learning_rate": 2.5481155934604585e-06, "loss": 0.4512, "step": 5349 }, { "epoch": 2.6543378428695545, "grad_norm": 0.07063791790927199, "learning_rate": 2.5464122399803126e-06, "loss": 0.4499, "step": 5350 }, { "epoch": 2.654834305572794, "grad_norm": 0.0737680394360274, "learning_rate": 2.5447092614917128e-06, "loss": 0.4767, "step": 5351 }, { "epoch": 2.655330768276033, "grad_norm": 0.07062001938640819, "learning_rate": 2.5430066582549373e-06, "loss": 0.4576, "step": 5352 }, { "epoch": 2.6558272309792725, "grad_norm": 0.07382855500342596, "learning_rate": 2.5413044305301993e-06, "loss": 0.4667, "step": 5353 }, { "epoch": 2.656323693682512, "grad_norm": 0.07193888010833445, "learning_rate": 2.5396025785776545e-06, "loss": 0.4515, "step": 5354 }, { "epoch": 2.6568201563857516, "grad_norm": 0.07084732634769446, "learning_rate": 2.5379011026574084e-06, "loss": 0.4633, "step": 5355 }, { "epoch": 2.657316619088991, "grad_norm": 0.071847207505773, "learning_rate": 2.536200003029501e-06, "loss": 0.4683, "step": 5356 }, { "epoch": 2.6578130817922303, "grad_norm": 0.07187666934920998, "learning_rate": 2.5344992799539193e-06, "loss": 0.4625, "step": 5357 }, { "epoch": 2.6583095444954696, "grad_norm": 0.07160584760597262, "learning_rate": 2.5327989336905923e-06, "loss": 0.4439, "step": 5358 }, { "epoch": 2.6588060071987094, "grad_norm": 0.07208708458309498, "learning_rate": 2.5310989644993876e-06, "loss": 0.4971, "step": 5359 }, { "epoch": 2.6593024699019487, "grad_norm": 0.07247788488979347, "learning_rate": 2.5293993726401224e-06, "loss": 0.4371, "step": 5360 }, { "epoch": 2.659798932605188, "grad_norm": 0.07266963378543632, "learning_rate": 2.527700158372548e-06, "loss": 0.4542, "step": 5361 }, { "epoch": 2.6602953953084274, "grad_norm": 0.0731112273892814, "learning_rate": 2.5260013219563663e-06, "loss": 0.4674, "step": 5362 }, { "epoch": 2.6607918580116667, "grad_norm": 0.0728325130956322, "learning_rate": 2.5243028636512146e-06, "loss": 0.4788, "step": 5363 }, { "epoch": 2.661288320714906, "grad_norm": 0.07168492677060803, "learning_rate": 2.5226047837166757e-06, "loss": 0.4502, "step": 5364 }, { "epoch": 2.661784783418146, "grad_norm": 0.07164432402739039, "learning_rate": 2.5209070824122733e-06, "loss": 0.4511, "step": 5365 }, { "epoch": 2.662281246121385, "grad_norm": 0.07246429922456672, "learning_rate": 2.519209759997472e-06, "loss": 0.4567, "step": 5366 }, { "epoch": 2.6627777088246245, "grad_norm": 0.07313252363944685, "learning_rate": 2.5175128167316848e-06, "loss": 0.4645, "step": 5367 }, { "epoch": 2.663274171527864, "grad_norm": 0.07246259249132068, "learning_rate": 2.515816252874258e-06, "loss": 0.468, "step": 5368 }, { "epoch": 2.6637706342311036, "grad_norm": 0.07090541004756543, "learning_rate": 2.514120068684488e-06, "loss": 0.4514, "step": 5369 }, { "epoch": 2.664267096934343, "grad_norm": 0.0702345762551842, "learning_rate": 2.5124242644216066e-06, "loss": 0.4468, "step": 5370 }, { "epoch": 2.6647635596375823, "grad_norm": 0.07252724362767601, "learning_rate": 2.5107288403447906e-06, "loss": 0.4414, "step": 5371 }, { "epoch": 2.6652600223408216, "grad_norm": 0.07389080736112623, "learning_rate": 2.50903379671316e-06, "loss": 0.4709, "step": 5372 }, { "epoch": 2.665756485044061, "grad_norm": 0.07191391269126059, "learning_rate": 2.5073391337857722e-06, "loss": 0.4651, "step": 5373 }, { "epoch": 2.6662529477473003, "grad_norm": 0.074543193027082, "learning_rate": 2.505644851821633e-06, "loss": 0.4822, "step": 5374 }, { "epoch": 2.66674941045054, "grad_norm": 0.07255691804503774, "learning_rate": 2.5039509510796843e-06, "loss": 0.4574, "step": 5375 }, { "epoch": 2.6672458731537794, "grad_norm": 0.07225389782589921, "learning_rate": 2.50225743181881e-06, "loss": 0.4651, "step": 5376 }, { "epoch": 2.6677423358570187, "grad_norm": 0.07329724006761641, "learning_rate": 2.50056429429784e-06, "loss": 0.4445, "step": 5377 }, { "epoch": 2.668238798560258, "grad_norm": 0.0704766031668799, "learning_rate": 2.4988715387755415e-06, "loss": 0.4381, "step": 5378 }, { "epoch": 2.668735261263498, "grad_norm": 0.07337203960969434, "learning_rate": 2.4971791655106263e-06, "loss": 0.466, "step": 5379 }, { "epoch": 2.669231723966737, "grad_norm": 0.07181577127392384, "learning_rate": 2.4954871747617472e-06, "loss": 0.4701, "step": 5380 }, { "epoch": 2.6697281866699765, "grad_norm": 0.07358688312804215, "learning_rate": 2.493795566787496e-06, "loss": 0.4855, "step": 5381 }, { "epoch": 2.670224649373216, "grad_norm": 0.07195123164241551, "learning_rate": 2.4921043418464085e-06, "loss": 0.4245, "step": 5382 }, { "epoch": 2.670721112076455, "grad_norm": 0.0734381724103889, "learning_rate": 2.4904135001969595e-06, "loss": 0.451, "step": 5383 }, { "epoch": 2.6712175747796945, "grad_norm": 0.07307924081621357, "learning_rate": 2.4887230420975705e-06, "loss": 0.4627, "step": 5384 }, { "epoch": 2.6717140374829342, "grad_norm": 0.07196229922624982, "learning_rate": 2.4870329678065997e-06, "loss": 0.4464, "step": 5385 }, { "epoch": 2.6722105001861736, "grad_norm": 0.07175602703078268, "learning_rate": 2.4853432775823457e-06, "loss": 0.4415, "step": 5386 }, { "epoch": 2.672706962889413, "grad_norm": 0.07118605838847018, "learning_rate": 2.4836539716830533e-06, "loss": 0.4573, "step": 5387 }, { "epoch": 2.6732034255926522, "grad_norm": 0.06966429924987237, "learning_rate": 2.4819650503669035e-06, "loss": 0.4597, "step": 5388 }, { "epoch": 2.673699888295892, "grad_norm": 0.0715539394926452, "learning_rate": 2.4802765138920236e-06, "loss": 0.4165, "step": 5389 }, { "epoch": 2.6741963509991313, "grad_norm": 0.07073444357197714, "learning_rate": 2.478588362516478e-06, "loss": 0.4528, "step": 5390 }, { "epoch": 2.6746928137023707, "grad_norm": 0.0727857459617948, "learning_rate": 2.4769005964982718e-06, "loss": 0.4782, "step": 5391 }, { "epoch": 2.67518927640561, "grad_norm": 0.07476771403087601, "learning_rate": 2.475213216095356e-06, "loss": 0.4949, "step": 5392 }, { "epoch": 2.6756857391088493, "grad_norm": 0.0747474889538692, "learning_rate": 2.473526221565617e-06, "loss": 0.4991, "step": 5393 }, { "epoch": 2.6761822018120887, "grad_norm": 0.072689971266639, "learning_rate": 2.4718396131668877e-06, "loss": 0.485, "step": 5394 }, { "epoch": 2.6766786645153284, "grad_norm": 0.07221036618857145, "learning_rate": 2.4701533911569375e-06, "loss": 0.4839, "step": 5395 }, { "epoch": 2.6771751272185678, "grad_norm": 0.07272155353905288, "learning_rate": 2.4684675557934766e-06, "loss": 0.434, "step": 5396 }, { "epoch": 2.677671589921807, "grad_norm": 0.07156056790757943, "learning_rate": 2.4667821073341636e-06, "loss": 0.4596, "step": 5397 }, { "epoch": 2.6781680526250464, "grad_norm": 0.07222255501510809, "learning_rate": 2.4650970460365846e-06, "loss": 0.439, "step": 5398 }, { "epoch": 2.678664515328286, "grad_norm": 0.0757415187469245, "learning_rate": 2.4634123721582804e-06, "loss": 0.4942, "step": 5399 }, { "epoch": 2.6791609780315255, "grad_norm": 0.07333904857592394, "learning_rate": 2.461728085956722e-06, "loss": 0.4576, "step": 5400 }, { "epoch": 2.679657440734765, "grad_norm": 0.07286665550490455, "learning_rate": 2.460044187689328e-06, "loss": 0.4352, "step": 5401 }, { "epoch": 2.680153903438004, "grad_norm": 0.07097365341929482, "learning_rate": 2.458360677613457e-06, "loss": 0.447, "step": 5402 }, { "epoch": 2.6806503661412435, "grad_norm": 0.07406683798365062, "learning_rate": 2.456677555986401e-06, "loss": 0.4562, "step": 5403 }, { "epoch": 2.681146828844483, "grad_norm": 0.0715410713991149, "learning_rate": 2.4549948230654034e-06, "loss": 0.4626, "step": 5404 }, { "epoch": 2.6816432915477226, "grad_norm": 0.07133636588398236, "learning_rate": 2.4533124791076396e-06, "loss": 0.4677, "step": 5405 }, { "epoch": 2.682139754250962, "grad_norm": 0.07290857535557271, "learning_rate": 2.451630524370232e-06, "loss": 0.4533, "step": 5406 }, { "epoch": 2.6826362169542013, "grad_norm": 0.07127985534713971, "learning_rate": 2.4499489591102395e-06, "loss": 0.4893, "step": 5407 }, { "epoch": 2.6831326796574406, "grad_norm": 0.07140839768994581, "learning_rate": 2.448267783584659e-06, "loss": 0.4489, "step": 5408 }, { "epoch": 2.6836291423606804, "grad_norm": 0.07109980383685978, "learning_rate": 2.446586998050436e-06, "loss": 0.4484, "step": 5409 }, { "epoch": 2.6841256050639197, "grad_norm": 0.07442132086297334, "learning_rate": 2.4449066027644473e-06, "loss": 0.485, "step": 5410 }, { "epoch": 2.684622067767159, "grad_norm": 0.07286157873915275, "learning_rate": 2.4432265979835183e-06, "loss": 0.455, "step": 5411 }, { "epoch": 2.6851185304703984, "grad_norm": 0.06958316341672992, "learning_rate": 2.4415469839644094e-06, "loss": 0.4441, "step": 5412 }, { "epoch": 2.6856149931736377, "grad_norm": 0.07471228524829766, "learning_rate": 2.4398677609638228e-06, "loss": 0.4566, "step": 5413 }, { "epoch": 2.686111455876877, "grad_norm": 0.07131159705902516, "learning_rate": 2.4381889292383997e-06, "loss": 0.4298, "step": 5414 }, { "epoch": 2.686607918580117, "grad_norm": 0.0742376202400464, "learning_rate": 2.4365104890447218e-06, "loss": 0.4762, "step": 5415 }, { "epoch": 2.687104381283356, "grad_norm": 0.07242206286086188, "learning_rate": 2.434832440639315e-06, "loss": 0.444, "step": 5416 }, { "epoch": 2.6876008439865955, "grad_norm": 0.07382874049757732, "learning_rate": 2.433154784278638e-06, "loss": 0.5035, "step": 5417 }, { "epoch": 2.688097306689835, "grad_norm": 0.07053618916196587, "learning_rate": 2.4314775202190983e-06, "loss": 0.4455, "step": 5418 }, { "epoch": 2.6885937693930746, "grad_norm": 0.0731789208347564, "learning_rate": 2.429800648717036e-06, "loss": 0.4613, "step": 5419 }, { "epoch": 2.689090232096314, "grad_norm": 0.07283239329695157, "learning_rate": 2.4281241700287334e-06, "loss": 0.4591, "step": 5420 }, { "epoch": 2.6895866947995533, "grad_norm": 0.0725960512541698, "learning_rate": 2.426448084410416e-06, "loss": 0.4716, "step": 5421 }, { "epoch": 2.6900831575027926, "grad_norm": 0.07360807080088544, "learning_rate": 2.424772392118245e-06, "loss": 0.4685, "step": 5422 }, { "epoch": 2.690579620206032, "grad_norm": 0.07326003572886362, "learning_rate": 2.4230970934083216e-06, "loss": 0.4794, "step": 5423 }, { "epoch": 2.6910760829092712, "grad_norm": 0.07094010720894862, "learning_rate": 2.4214221885366918e-06, "loss": 0.4601, "step": 5424 }, { "epoch": 2.691572545612511, "grad_norm": 0.07394289944310807, "learning_rate": 2.4197476777593336e-06, "loss": 0.4576, "step": 5425 }, { "epoch": 2.6920690083157504, "grad_norm": 0.0727641219406632, "learning_rate": 2.4180735613321745e-06, "loss": 0.4528, "step": 5426 }, { "epoch": 2.6925654710189897, "grad_norm": 0.07508411585218325, "learning_rate": 2.4163998395110732e-06, "loss": 0.4533, "step": 5427 }, { "epoch": 2.693061933722229, "grad_norm": 0.07197259385452393, "learning_rate": 2.4147265125518292e-06, "loss": 0.4684, "step": 5428 }, { "epoch": 2.693558396425469, "grad_norm": 0.07182235273910088, "learning_rate": 2.4130535807101905e-06, "loss": 0.431, "step": 5429 }, { "epoch": 2.694054859128708, "grad_norm": 0.07238059271016718, "learning_rate": 2.4113810442418293e-06, "loss": 0.4487, "step": 5430 }, { "epoch": 2.6945513218319475, "grad_norm": 0.07061423607281134, "learning_rate": 2.4097089034023726e-06, "loss": 0.464, "step": 5431 }, { "epoch": 2.695047784535187, "grad_norm": 0.0696942313119842, "learning_rate": 2.408037158447375e-06, "loss": 0.434, "step": 5432 }, { "epoch": 2.695544247238426, "grad_norm": 0.07009626749102861, "learning_rate": 2.406365809632341e-06, "loss": 0.4455, "step": 5433 }, { "epoch": 2.6960407099416654, "grad_norm": 0.07490099364686856, "learning_rate": 2.4046948572127077e-06, "loss": 0.459, "step": 5434 }, { "epoch": 2.696537172644905, "grad_norm": 0.07357840038096851, "learning_rate": 2.403024301443851e-06, "loss": 0.4497, "step": 5435 }, { "epoch": 2.6970336353481446, "grad_norm": 0.07322032132576278, "learning_rate": 2.4013541425810916e-06, "loss": 0.4502, "step": 5436 }, { "epoch": 2.697530098051384, "grad_norm": 0.07315206853573321, "learning_rate": 2.3996843808796845e-06, "loss": 0.4657, "step": 5437 }, { "epoch": 2.698026560754623, "grad_norm": 0.07619258559402037, "learning_rate": 2.398015016594828e-06, "loss": 0.501, "step": 5438 }, { "epoch": 2.698523023457863, "grad_norm": 0.07015291482571168, "learning_rate": 2.3963460499816564e-06, "loss": 0.4231, "step": 5439 }, { "epoch": 2.6990194861611023, "grad_norm": 0.07276475840477924, "learning_rate": 2.394677481295243e-06, "loss": 0.4425, "step": 5440 }, { "epoch": 2.6995159488643417, "grad_norm": 0.07276518401473396, "learning_rate": 2.393009310790606e-06, "loss": 0.4847, "step": 5441 }, { "epoch": 2.700012411567581, "grad_norm": 0.07071737375167014, "learning_rate": 2.3913415387226936e-06, "loss": 0.4427, "step": 5442 }, { "epoch": 2.7005088742708203, "grad_norm": 0.07287583155324498, "learning_rate": 2.389674165346402e-06, "loss": 0.4741, "step": 5443 }, { "epoch": 2.7010053369740596, "grad_norm": 0.07055392236578718, "learning_rate": 2.3880071909165607e-06, "loss": 0.4644, "step": 5444 }, { "epoch": 2.701501799677299, "grad_norm": 0.07323683960653447, "learning_rate": 2.386340615687941e-06, "loss": 0.4754, "step": 5445 }, { "epoch": 2.7019982623805388, "grad_norm": 0.07564816505304253, "learning_rate": 2.3846744399152504e-06, "loss": 0.4702, "step": 5446 }, { "epoch": 2.702494725083778, "grad_norm": 0.06768172491094766, "learning_rate": 2.3830086638531367e-06, "loss": 0.4209, "step": 5447 }, { "epoch": 2.7029911877870174, "grad_norm": 0.0717282008086792, "learning_rate": 2.3813432877561903e-06, "loss": 0.4452, "step": 5448 }, { "epoch": 2.703487650490257, "grad_norm": 0.07271166896844942, "learning_rate": 2.3796783118789335e-06, "loss": 0.4541, "step": 5449 }, { "epoch": 2.7039841131934965, "grad_norm": 0.07021717573627075, "learning_rate": 2.378013736475835e-06, "loss": 0.4052, "step": 5450 }, { "epoch": 2.704480575896736, "grad_norm": 0.07301532107170802, "learning_rate": 2.3763495618012967e-06, "loss": 0.5041, "step": 5451 }, { "epoch": 2.704977038599975, "grad_norm": 0.07150478795652813, "learning_rate": 2.3746857881096586e-06, "loss": 0.4766, "step": 5452 }, { "epoch": 2.7054735013032145, "grad_norm": 0.07278421297777359, "learning_rate": 2.3730224156552063e-06, "loss": 0.4369, "step": 5453 }, { "epoch": 2.705969964006454, "grad_norm": 0.07163528857638941, "learning_rate": 2.3713594446921552e-06, "loss": 0.4496, "step": 5454 }, { "epoch": 2.706466426709693, "grad_norm": 0.07379949986235082, "learning_rate": 2.3696968754746672e-06, "loss": 0.4663, "step": 5455 }, { "epoch": 2.706962889412933, "grad_norm": 0.06835946522496905, "learning_rate": 2.3680347082568396e-06, "loss": 0.4316, "step": 5456 }, { "epoch": 2.7074593521161723, "grad_norm": 0.07097554521865745, "learning_rate": 2.3663729432927034e-06, "loss": 0.4601, "step": 5457 }, { "epoch": 2.7079558148194116, "grad_norm": 0.07192575583390655, "learning_rate": 2.364711580836238e-06, "loss": 0.4741, "step": 5458 }, { "epoch": 2.7084522775226514, "grad_norm": 0.07413193566513956, "learning_rate": 2.363050621141354e-06, "loss": 0.4797, "step": 5459 }, { "epoch": 2.7089487402258907, "grad_norm": 0.07397341842935361, "learning_rate": 2.3613900644619005e-06, "loss": 0.4601, "step": 5460 }, { "epoch": 2.70944520292913, "grad_norm": 0.07104537353838479, "learning_rate": 2.3597299110516718e-06, "loss": 0.4395, "step": 5461 }, { "epoch": 2.7099416656323694, "grad_norm": 0.07220429705497064, "learning_rate": 2.3580701611643896e-06, "loss": 0.4491, "step": 5462 }, { "epoch": 2.7104381283356087, "grad_norm": 0.07237243617685923, "learning_rate": 2.356410815053725e-06, "loss": 0.4477, "step": 5463 }, { "epoch": 2.710934591038848, "grad_norm": 0.07142769983441732, "learning_rate": 2.3547518729732788e-06, "loss": 0.434, "step": 5464 }, { "epoch": 2.7114310537420874, "grad_norm": 0.06938918876377276, "learning_rate": 2.3530933351765967e-06, "loss": 0.4308, "step": 5465 }, { "epoch": 2.711927516445327, "grad_norm": 0.07652269457501713, "learning_rate": 2.351435201917159e-06, "loss": 0.4627, "step": 5466 }, { "epoch": 2.7124239791485665, "grad_norm": 0.0704284583828208, "learning_rate": 2.3497774734483827e-06, "loss": 0.4459, "step": 5467 }, { "epoch": 2.712920441851806, "grad_norm": 0.07163313771334788, "learning_rate": 2.348120150023627e-06, "loss": 0.4592, "step": 5468 }, { "epoch": 2.713416904555045, "grad_norm": 0.07290632848977743, "learning_rate": 2.346463231896186e-06, "loss": 0.4463, "step": 5469 }, { "epoch": 2.713913367258285, "grad_norm": 0.07016113247477145, "learning_rate": 2.3448067193192953e-06, "loss": 0.4487, "step": 5470 }, { "epoch": 2.7144098299615242, "grad_norm": 0.07214476803834971, "learning_rate": 2.3431506125461243e-06, "loss": 0.4741, "step": 5471 }, { "epoch": 2.7149062926647636, "grad_norm": 0.07456185081547756, "learning_rate": 2.341494911829782e-06, "loss": 0.4622, "step": 5472 }, { "epoch": 2.715402755368003, "grad_norm": 0.07237360880431196, "learning_rate": 2.339839617423318e-06, "loss": 0.4471, "step": 5473 }, { "epoch": 2.7158992180712422, "grad_norm": 0.07839016208893054, "learning_rate": 2.338184729579714e-06, "loss": 0.4548, "step": 5474 }, { "epoch": 2.7163956807744816, "grad_norm": 0.07457366522480684, "learning_rate": 2.3365302485518966e-06, "loss": 0.4692, "step": 5475 }, { "epoch": 2.7168921434777213, "grad_norm": 0.07175463069898462, "learning_rate": 2.3348761745927258e-06, "loss": 0.4495, "step": 5476 }, { "epoch": 2.7173886061809607, "grad_norm": 0.07097627701034127, "learning_rate": 2.3332225079549995e-06, "loss": 0.4255, "step": 5477 }, { "epoch": 2.7178850688842, "grad_norm": 0.07196217651367964, "learning_rate": 2.3315692488914544e-06, "loss": 0.49, "step": 5478 }, { "epoch": 2.7183815315874393, "grad_norm": 0.07703244837469461, "learning_rate": 2.329916397654763e-06, "loss": 0.4936, "step": 5479 }, { "epoch": 2.718877994290679, "grad_norm": 0.07217612227351226, "learning_rate": 2.32826395449754e-06, "loss": 0.483, "step": 5480 }, { "epoch": 2.7193744569939184, "grad_norm": 0.07172214175782687, "learning_rate": 2.326611919672332e-06, "loss": 0.463, "step": 5481 }, { "epoch": 2.7198709196971578, "grad_norm": 0.07239940889748474, "learning_rate": 2.324960293431629e-06, "loss": 0.4364, "step": 5482 }, { "epoch": 2.720367382400397, "grad_norm": 0.07164091885322517, "learning_rate": 2.3233090760278544e-06, "loss": 0.4499, "step": 5483 }, { "epoch": 2.7208638451036364, "grad_norm": 0.07414212769490823, "learning_rate": 2.3216582677133682e-06, "loss": 0.4659, "step": 5484 }, { "epoch": 2.7213603078068758, "grad_norm": 0.07135247244737274, "learning_rate": 2.3200078687404736e-06, "loss": 0.4467, "step": 5485 }, { "epoch": 2.7218567705101155, "grad_norm": 0.07245678162225738, "learning_rate": 2.3183578793614043e-06, "loss": 0.4637, "step": 5486 }, { "epoch": 2.722353233213355, "grad_norm": 0.0713600463717167, "learning_rate": 2.316708299828338e-06, "loss": 0.4504, "step": 5487 }, { "epoch": 2.722849695916594, "grad_norm": 0.0736838370568765, "learning_rate": 2.3150591303933852e-06, "loss": 0.459, "step": 5488 }, { "epoch": 2.7233461586198335, "grad_norm": 0.07028199554059375, "learning_rate": 2.313410371308592e-06, "loss": 0.4556, "step": 5489 }, { "epoch": 2.7238426213230733, "grad_norm": 0.07112815035167214, "learning_rate": 2.311762022825949e-06, "loss": 0.4603, "step": 5490 }, { "epoch": 2.7243390840263126, "grad_norm": 0.07077727556976685, "learning_rate": 2.3101140851973768e-06, "loss": 0.4589, "step": 5491 }, { "epoch": 2.724835546729552, "grad_norm": 0.07497765247200448, "learning_rate": 2.3084665586747397e-06, "loss": 0.4635, "step": 5492 }, { "epoch": 2.7253320094327913, "grad_norm": 0.07385568530723025, "learning_rate": 2.3068194435098334e-06, "loss": 0.4556, "step": 5493 }, { "epoch": 2.7258284721360306, "grad_norm": 0.07406628874892704, "learning_rate": 2.3051727399543934e-06, "loss": 0.4707, "step": 5494 }, { "epoch": 2.72632493483927, "grad_norm": 0.07091425812135041, "learning_rate": 2.3035264482600915e-06, "loss": 0.4367, "step": 5495 }, { "epoch": 2.7268213975425097, "grad_norm": 0.06863687149117234, "learning_rate": 2.3018805686785362e-06, "loss": 0.4276, "step": 5496 }, { "epoch": 2.727317860245749, "grad_norm": 0.07051110974771434, "learning_rate": 2.300235101461276e-06, "loss": 0.4584, "step": 5497 }, { "epoch": 2.7278143229489884, "grad_norm": 0.071099105480711, "learning_rate": 2.298590046859793e-06, "loss": 0.4293, "step": 5498 }, { "epoch": 2.7283107856522277, "grad_norm": 0.07455777915040125, "learning_rate": 2.2969454051255064e-06, "loss": 0.459, "step": 5499 }, { "epoch": 2.7288072483554675, "grad_norm": 0.07307833101340502, "learning_rate": 2.295301176509776e-06, "loss": 0.4612, "step": 5500 }, { "epoch": 2.729303711058707, "grad_norm": 0.07217219742126983, "learning_rate": 2.2936573612638922e-06, "loss": 0.4683, "step": 5501 }, { "epoch": 2.729800173761946, "grad_norm": 0.0744157919215635, "learning_rate": 2.29201395963909e-06, "loss": 0.4659, "step": 5502 }, { "epoch": 2.7302966364651855, "grad_norm": 0.07289841722012438, "learning_rate": 2.2903709718865347e-06, "loss": 0.445, "step": 5503 }, { "epoch": 2.730793099168425, "grad_norm": 0.07298563895816411, "learning_rate": 2.2887283982573287e-06, "loss": 0.4453, "step": 5504 }, { "epoch": 2.731289561871664, "grad_norm": 0.1200957041826658, "learning_rate": 2.2870862390025172e-06, "loss": 0.4756, "step": 5505 }, { "epoch": 2.731786024574904, "grad_norm": 0.0694534964088227, "learning_rate": 2.2854444943730735e-06, "loss": 0.4292, "step": 5506 }, { "epoch": 2.7322824872781433, "grad_norm": 0.07376327414716859, "learning_rate": 2.2838031646199164e-06, "loss": 0.4563, "step": 5507 }, { "epoch": 2.7327789499813826, "grad_norm": 0.07381798743005258, "learning_rate": 2.282162249993895e-06, "loss": 0.4722, "step": 5508 }, { "epoch": 2.733275412684622, "grad_norm": 0.07069931901928059, "learning_rate": 2.280521750745796e-06, "loss": 0.4527, "step": 5509 }, { "epoch": 2.7337718753878617, "grad_norm": 0.07238019601026131, "learning_rate": 2.2788816671263443e-06, "loss": 0.4562, "step": 5510 }, { "epoch": 2.734268338091101, "grad_norm": 0.07535935212905417, "learning_rate": 2.277241999386198e-06, "loss": 0.4729, "step": 5511 }, { "epoch": 2.7347648007943404, "grad_norm": 0.07148930959040346, "learning_rate": 2.2756027477759573e-06, "loss": 0.4691, "step": 5512 }, { "epoch": 2.7352612634975797, "grad_norm": 0.07042681073402034, "learning_rate": 2.2739639125461526e-06, "loss": 0.4502, "step": 5513 }, { "epoch": 2.735757726200819, "grad_norm": 0.0825610958308842, "learning_rate": 2.272325493947257e-06, "loss": 0.446, "step": 5514 }, { "epoch": 2.7362541889040584, "grad_norm": 0.07122332767066362, "learning_rate": 2.2706874922296756e-06, "loss": 0.4481, "step": 5515 }, { "epoch": 2.736750651607298, "grad_norm": 0.07202439783077769, "learning_rate": 2.2690499076437472e-06, "loss": 0.4452, "step": 5516 }, { "epoch": 2.7372471143105375, "grad_norm": 0.0759937883079161, "learning_rate": 2.267412740439755e-06, "loss": 0.4989, "step": 5517 }, { "epoch": 2.737743577013777, "grad_norm": 0.07122852007942947, "learning_rate": 2.2657759908679093e-06, "loss": 0.4392, "step": 5518 }, { "epoch": 2.738240039717016, "grad_norm": 0.0722685401508018, "learning_rate": 2.264139659178366e-06, "loss": 0.4496, "step": 5519 }, { "epoch": 2.738736502420256, "grad_norm": 0.07136737189013136, "learning_rate": 2.2625037456212096e-06, "loss": 0.4319, "step": 5520 }, { "epoch": 2.7392329651234952, "grad_norm": 0.07323565448937264, "learning_rate": 2.2608682504464614e-06, "loss": 0.4462, "step": 5521 }, { "epoch": 2.7397294278267346, "grad_norm": 0.07192415827088411, "learning_rate": 2.259233173904084e-06, "loss": 0.4269, "step": 5522 }, { "epoch": 2.740225890529974, "grad_norm": 0.07138721363420222, "learning_rate": 2.257598516243969e-06, "loss": 0.4625, "step": 5523 }, { "epoch": 2.7407223532332132, "grad_norm": 0.07603992677823823, "learning_rate": 2.2559642777159525e-06, "loss": 0.4848, "step": 5524 }, { "epoch": 2.7412188159364526, "grad_norm": 0.07173016922467354, "learning_rate": 2.2543304585697977e-06, "loss": 0.436, "step": 5525 }, { "epoch": 2.7417152786396923, "grad_norm": 0.0718564409263651, "learning_rate": 2.25269705905521e-06, "loss": 0.4443, "step": 5526 }, { "epoch": 2.7422117413429317, "grad_norm": 0.07145531667680151, "learning_rate": 2.2510640794218264e-06, "loss": 0.4714, "step": 5527 }, { "epoch": 2.742708204046171, "grad_norm": 0.0703115660351647, "learning_rate": 2.2494315199192206e-06, "loss": 0.4541, "step": 5528 }, { "epoch": 2.7432046667494103, "grad_norm": 0.07513605107381305, "learning_rate": 2.2477993807969074e-06, "loss": 0.4958, "step": 5529 }, { "epoch": 2.74370112945265, "grad_norm": 0.07026907153785616, "learning_rate": 2.2461676623043283e-06, "loss": 0.4617, "step": 5530 }, { "epoch": 2.7441975921558894, "grad_norm": 0.0731908821401188, "learning_rate": 2.2445363646908698e-06, "loss": 0.5116, "step": 5531 }, { "epoch": 2.7446940548591288, "grad_norm": 0.07507413862164442, "learning_rate": 2.242905488205848e-06, "loss": 0.4828, "step": 5532 }, { "epoch": 2.745190517562368, "grad_norm": 0.07326962669308193, "learning_rate": 2.241275033098513e-06, "loss": 0.4768, "step": 5533 }, { "epoch": 2.7456869802656074, "grad_norm": 0.07446257400543148, "learning_rate": 2.239644999618058e-06, "loss": 0.4586, "step": 5534 }, { "epoch": 2.7461834429688468, "grad_norm": 0.06965894292069011, "learning_rate": 2.2380153880136062e-06, "loss": 0.4433, "step": 5535 }, { "epoch": 2.7466799056720865, "grad_norm": 0.06927299902017811, "learning_rate": 2.2363861985342156e-06, "loss": 0.4458, "step": 5536 }, { "epoch": 2.747176368375326, "grad_norm": 0.06964886413539237, "learning_rate": 2.234757431428885e-06, "loss": 0.4247, "step": 5537 }, { "epoch": 2.747672831078565, "grad_norm": 0.07289524357337542, "learning_rate": 2.233129086946542e-06, "loss": 0.4678, "step": 5538 }, { "epoch": 2.7481692937818045, "grad_norm": 0.07102667782964327, "learning_rate": 2.231501165336057e-06, "loss": 0.456, "step": 5539 }, { "epoch": 2.7486657564850443, "grad_norm": 0.07411113072941954, "learning_rate": 2.229873666846229e-06, "loss": 0.475, "step": 5540 }, { "epoch": 2.7491622191882836, "grad_norm": 0.07161052155330794, "learning_rate": 2.2282465917257952e-06, "loss": 0.4589, "step": 5541 }, { "epoch": 2.749658681891523, "grad_norm": 0.07736293002603781, "learning_rate": 2.2266199402234286e-06, "loss": 0.4569, "step": 5542 }, { "epoch": 2.7501551445947623, "grad_norm": 0.07242961476602337, "learning_rate": 2.224993712587734e-06, "loss": 0.4766, "step": 5543 }, { "epoch": 2.7506516072980016, "grad_norm": 0.07153571234095471, "learning_rate": 2.2233679090672584e-06, "loss": 0.4473, "step": 5544 }, { "epoch": 2.7506516072980016, "eval_loss": 0.5140753388404846, "eval_runtime": 259.2366, "eval_samples_per_second": 117.086, "eval_steps_per_second": 14.639, "step": 5544 }, { "epoch": 2.751148070001241, "grad_norm": 0.07403899048266355, "learning_rate": 2.221742529910477e-06, "loss": 0.4497, "step": 5545 }, { "epoch": 2.7516445327044807, "grad_norm": 0.06701646326960832, "learning_rate": 2.2201175753658048e-06, "loss": 0.4053, "step": 5546 }, { "epoch": 2.75214099540772, "grad_norm": 0.0674570629487927, "learning_rate": 2.2184930456815897e-06, "loss": 0.4052, "step": 5547 }, { "epoch": 2.7526374581109594, "grad_norm": 0.07291171901110415, "learning_rate": 2.2168689411061123e-06, "loss": 0.4813, "step": 5548 }, { "epoch": 2.7531339208141987, "grad_norm": 0.07132406878589202, "learning_rate": 2.2152452618875954e-06, "loss": 0.4408, "step": 5549 }, { "epoch": 2.7536303835174385, "grad_norm": 0.07134727618995322, "learning_rate": 2.2136220082741876e-06, "loss": 0.4308, "step": 5550 }, { "epoch": 2.754126846220678, "grad_norm": 0.07343701614477756, "learning_rate": 2.2119991805139823e-06, "loss": 0.4781, "step": 5551 }, { "epoch": 2.754623308923917, "grad_norm": 0.07166201620344755, "learning_rate": 2.2103767788549996e-06, "loss": 0.4588, "step": 5552 }, { "epoch": 2.7551197716271565, "grad_norm": 0.07609433226158972, "learning_rate": 2.208754803545196e-06, "loss": 0.4684, "step": 5553 }, { "epoch": 2.755616234330396, "grad_norm": 0.072588971915586, "learning_rate": 2.2071332548324688e-06, "loss": 0.487, "step": 5554 }, { "epoch": 2.756112697033635, "grad_norm": 0.0728012780360345, "learning_rate": 2.2055121329646416e-06, "loss": 0.4359, "step": 5555 }, { "epoch": 2.756609159736875, "grad_norm": 0.0714482252676178, "learning_rate": 2.20389143818948e-06, "loss": 0.4607, "step": 5556 }, { "epoch": 2.7571056224401143, "grad_norm": 0.07300698715457024, "learning_rate": 2.20227117075468e-06, "loss": 0.4526, "step": 5557 }, { "epoch": 2.7576020851433536, "grad_norm": 0.07058202943928703, "learning_rate": 2.200651330907874e-06, "loss": 0.4424, "step": 5558 }, { "epoch": 2.758098547846593, "grad_norm": 0.07194090382598976, "learning_rate": 2.1990319188966276e-06, "loss": 0.4564, "step": 5559 }, { "epoch": 2.7585950105498327, "grad_norm": 0.07413470061933844, "learning_rate": 2.19741293496844e-06, "loss": 0.4785, "step": 5560 }, { "epoch": 2.759091473253072, "grad_norm": 0.07339601606666014, "learning_rate": 2.1957943793707517e-06, "loss": 0.4804, "step": 5561 }, { "epoch": 2.7595879359563114, "grad_norm": 0.07682989147253744, "learning_rate": 2.1941762523509282e-06, "loss": 0.4757, "step": 5562 }, { "epoch": 2.7600843986595507, "grad_norm": 0.07177187759459464, "learning_rate": 2.192558554156278e-06, "loss": 0.4583, "step": 5563 }, { "epoch": 2.76058086136279, "grad_norm": 0.07238024054866415, "learning_rate": 2.1909412850340395e-06, "loss": 0.4201, "step": 5564 }, { "epoch": 2.7610773240660293, "grad_norm": 0.07107079343533929, "learning_rate": 2.189324445231384e-06, "loss": 0.4708, "step": 5565 }, { "epoch": 2.761573786769269, "grad_norm": 0.07580931353151424, "learning_rate": 2.187708034995423e-06, "loss": 0.4555, "step": 5566 }, { "epoch": 2.7620702494725085, "grad_norm": 0.074047343219353, "learning_rate": 2.186092054573195e-06, "loss": 0.4661, "step": 5567 }, { "epoch": 2.762566712175748, "grad_norm": 0.07350297127509982, "learning_rate": 2.184476504211681e-06, "loss": 0.4955, "step": 5568 }, { "epoch": 2.763063174878987, "grad_norm": 0.07236753011939805, "learning_rate": 2.18286138415779e-06, "loss": 0.465, "step": 5569 }, { "epoch": 2.763559637582227, "grad_norm": 0.07229715160125913, "learning_rate": 2.1812466946583654e-06, "loss": 0.4349, "step": 5570 }, { "epoch": 2.764056100285466, "grad_norm": 0.07448114359837037, "learning_rate": 2.1796324359601896e-06, "loss": 0.4787, "step": 5571 }, { "epoch": 2.7645525629887056, "grad_norm": 0.06942310726010976, "learning_rate": 2.1780186083099746e-06, "loss": 0.4285, "step": 5572 }, { "epoch": 2.765049025691945, "grad_norm": 0.07393983006350734, "learning_rate": 2.176405211954369e-06, "loss": 0.4731, "step": 5573 }, { "epoch": 2.765545488395184, "grad_norm": 0.0713507331251344, "learning_rate": 2.1747922471399517e-06, "loss": 0.4254, "step": 5574 }, { "epoch": 2.7660419510984235, "grad_norm": 0.07469750472023376, "learning_rate": 2.1731797141132425e-06, "loss": 0.5091, "step": 5575 }, { "epoch": 2.766538413801663, "grad_norm": 0.07529947136126779, "learning_rate": 2.1715676131206893e-06, "loss": 0.498, "step": 5576 }, { "epoch": 2.7670348765049027, "grad_norm": 0.0714992330808803, "learning_rate": 2.169955944408674e-06, "loss": 0.4412, "step": 5577 }, { "epoch": 2.767531339208142, "grad_norm": 0.0730209938522385, "learning_rate": 2.168344708223519e-06, "loss": 0.4711, "step": 5578 }, { "epoch": 2.7680278019113813, "grad_norm": 0.07410312426064489, "learning_rate": 2.166733904811472e-06, "loss": 0.5138, "step": 5579 }, { "epoch": 2.768524264614621, "grad_norm": 0.07265453900182571, "learning_rate": 2.1651235344187183e-06, "loss": 0.4711, "step": 5580 }, { "epoch": 2.7690207273178604, "grad_norm": 0.07219486082807354, "learning_rate": 2.163513597291381e-06, "loss": 0.449, "step": 5581 }, { "epoch": 2.7695171900210998, "grad_norm": 0.07169161843650458, "learning_rate": 2.1619040936755083e-06, "loss": 0.4685, "step": 5582 }, { "epoch": 2.770013652724339, "grad_norm": 0.0716278342968845, "learning_rate": 2.160295023817091e-06, "loss": 0.4806, "step": 5583 }, { "epoch": 2.7705101154275784, "grad_norm": 0.07458597427259725, "learning_rate": 2.1586863879620486e-06, "loss": 0.4651, "step": 5584 }, { "epoch": 2.7710065781308177, "grad_norm": 0.07458008902961828, "learning_rate": 2.1570781863562328e-06, "loss": 0.4538, "step": 5585 }, { "epoch": 2.771503040834057, "grad_norm": 0.07663571921112627, "learning_rate": 2.155470419245435e-06, "loss": 0.4871, "step": 5586 }, { "epoch": 2.771999503537297, "grad_norm": 0.07248567735016598, "learning_rate": 2.153863086875374e-06, "loss": 0.4953, "step": 5587 }, { "epoch": 2.772495966240536, "grad_norm": 0.07208045569274636, "learning_rate": 2.152256189491707e-06, "loss": 0.4117, "step": 5588 }, { "epoch": 2.7729924289437755, "grad_norm": 0.07040926609012249, "learning_rate": 2.1506497273400218e-06, "loss": 0.4512, "step": 5589 }, { "epoch": 2.7734888916470153, "grad_norm": 0.07146556046724373, "learning_rate": 2.1490437006658393e-06, "loss": 0.4704, "step": 5590 }, { "epoch": 2.7739853543502546, "grad_norm": 0.072885103918037, "learning_rate": 2.1474381097146163e-06, "loss": 0.4435, "step": 5591 }, { "epoch": 2.774481817053494, "grad_norm": 0.07419544353699468, "learning_rate": 2.1458329547317384e-06, "loss": 0.4622, "step": 5592 }, { "epoch": 2.7749782797567333, "grad_norm": 0.07108922480782838, "learning_rate": 2.144228235962533e-06, "loss": 0.4458, "step": 5593 }, { "epoch": 2.7754747424599726, "grad_norm": 0.07248340212697033, "learning_rate": 2.1426239536522497e-06, "loss": 0.455, "step": 5594 }, { "epoch": 2.775971205163212, "grad_norm": 0.07119818086272339, "learning_rate": 2.1410201080460837e-06, "loss": 0.4766, "step": 5595 }, { "epoch": 2.7764676678664513, "grad_norm": 0.07272794995230875, "learning_rate": 2.139416699389153e-06, "loss": 0.4557, "step": 5596 }, { "epoch": 2.776964130569691, "grad_norm": 0.07336170551958457, "learning_rate": 2.1378137279265126e-06, "loss": 0.5142, "step": 5597 }, { "epoch": 2.7774605932729304, "grad_norm": 0.07038873630375103, "learning_rate": 2.1362111939031538e-06, "loss": 0.4593, "step": 5598 }, { "epoch": 2.7779570559761697, "grad_norm": 0.07121169984717571, "learning_rate": 2.134609097563995e-06, "loss": 0.4536, "step": 5599 }, { "epoch": 2.7784535186794095, "grad_norm": 0.07275418181501159, "learning_rate": 2.133007439153894e-06, "loss": 0.4929, "step": 5600 }, { "epoch": 2.778949981382649, "grad_norm": 0.0711687503533935, "learning_rate": 2.131406218917637e-06, "loss": 0.4441, "step": 5601 }, { "epoch": 2.779446444085888, "grad_norm": 0.0717822196418287, "learning_rate": 2.129805437099944e-06, "loss": 0.4597, "step": 5602 }, { "epoch": 2.7799429067891275, "grad_norm": 0.07293237039541688, "learning_rate": 2.1282050939454713e-06, "loss": 0.4644, "step": 5603 }, { "epoch": 2.780439369492367, "grad_norm": 0.07441525762705241, "learning_rate": 2.126605189698803e-06, "loss": 0.4539, "step": 5604 }, { "epoch": 2.780935832195606, "grad_norm": 0.07075570422249783, "learning_rate": 2.125005724604461e-06, "loss": 0.447, "step": 5605 }, { "epoch": 2.7814322948988455, "grad_norm": 0.07113131933005146, "learning_rate": 2.1234066989068972e-06, "loss": 0.4424, "step": 5606 }, { "epoch": 2.7819287576020852, "grad_norm": 0.07083162859588368, "learning_rate": 2.121808112850497e-06, "loss": 0.4477, "step": 5607 }, { "epoch": 2.7824252203053246, "grad_norm": 0.07373674057526543, "learning_rate": 2.1202099666795783e-06, "loss": 0.4429, "step": 5608 }, { "epoch": 2.782921683008564, "grad_norm": 0.07349052401745303, "learning_rate": 2.118612260638391e-06, "loss": 0.5113, "step": 5609 }, { "epoch": 2.7834181457118032, "grad_norm": 0.07150161174098543, "learning_rate": 2.117014994971121e-06, "loss": 0.4765, "step": 5610 }, { "epoch": 2.783914608415043, "grad_norm": 0.06954490397678757, "learning_rate": 2.115418169921883e-06, "loss": 0.4373, "step": 5611 }, { "epoch": 2.7844110711182823, "grad_norm": 0.06960572970474305, "learning_rate": 2.1138217857347284e-06, "loss": 0.4309, "step": 5612 }, { "epoch": 2.7849075338215217, "grad_norm": 0.07441963347632186, "learning_rate": 2.1122258426536373e-06, "loss": 0.498, "step": 5613 }, { "epoch": 2.785403996524761, "grad_norm": 0.07185784460169722, "learning_rate": 2.1106303409225228e-06, "loss": 0.4486, "step": 5614 }, { "epoch": 2.7859004592280003, "grad_norm": 0.07171485254229928, "learning_rate": 2.1090352807852344e-06, "loss": 0.4587, "step": 5615 }, { "epoch": 2.7863969219312397, "grad_norm": 0.07395003793923444, "learning_rate": 2.1074406624855505e-06, "loss": 0.4222, "step": 5616 }, { "epoch": 2.7868933846344794, "grad_norm": 0.06817105944314727, "learning_rate": 2.1058464862671805e-06, "loss": 0.4311, "step": 5617 }, { "epoch": 2.7873898473377188, "grad_norm": 0.07146224716442358, "learning_rate": 2.1042527523737732e-06, "loss": 0.4587, "step": 5618 }, { "epoch": 2.787886310040958, "grad_norm": 0.07262673582585218, "learning_rate": 2.1026594610489013e-06, "loss": 0.4652, "step": 5619 }, { "epoch": 2.7883827727441974, "grad_norm": 0.07216345014974743, "learning_rate": 2.1010666125360767e-06, "loss": 0.471, "step": 5620 }, { "epoch": 2.788879235447437, "grad_norm": 0.07400589154296823, "learning_rate": 2.09947420707874e-06, "loss": 0.4607, "step": 5621 }, { "epoch": 2.7893756981506765, "grad_norm": 0.0758128860095602, "learning_rate": 2.097882244920264e-06, "loss": 0.4726, "step": 5622 }, { "epoch": 2.789872160853916, "grad_norm": 0.0716427119065037, "learning_rate": 2.096290726303955e-06, "loss": 0.4512, "step": 5623 }, { "epoch": 2.790368623557155, "grad_norm": 0.07378414988935275, "learning_rate": 2.0946996514730494e-06, "loss": 0.4646, "step": 5624 }, { "epoch": 2.7908650862603945, "grad_norm": 0.07321208031026308, "learning_rate": 2.093109020670721e-06, "loss": 0.4619, "step": 5625 }, { "epoch": 2.791361548963634, "grad_norm": 0.07314239115958235, "learning_rate": 2.0915188341400685e-06, "loss": 0.4998, "step": 5626 }, { "epoch": 2.7918580116668736, "grad_norm": 0.07090958314562905, "learning_rate": 2.08992909212413e-06, "loss": 0.4523, "step": 5627 }, { "epoch": 2.792354474370113, "grad_norm": 0.07124752970334998, "learning_rate": 2.0883397948658702e-06, "loss": 0.4463, "step": 5628 }, { "epoch": 2.7928509370733523, "grad_norm": 0.07221734657858735, "learning_rate": 2.086750942608186e-06, "loss": 0.4478, "step": 5629 }, { "epoch": 2.7933473997765916, "grad_norm": 0.07331998158164842, "learning_rate": 2.0851625355939117e-06, "loss": 0.488, "step": 5630 }, { "epoch": 2.7938438624798314, "grad_norm": 0.07110673575019735, "learning_rate": 2.0835745740658057e-06, "loss": 0.4409, "step": 5631 }, { "epoch": 2.7943403251830707, "grad_norm": 0.06747812385303419, "learning_rate": 2.0819870582665676e-06, "loss": 0.4219, "step": 5632 }, { "epoch": 2.79483678788631, "grad_norm": 0.07053503688375928, "learning_rate": 2.08039998843882e-06, "loss": 0.4516, "step": 5633 }, { "epoch": 2.7953332505895494, "grad_norm": 0.06946889062359742, "learning_rate": 2.0788133648251207e-06, "loss": 0.4344, "step": 5634 }, { "epoch": 2.7958297132927887, "grad_norm": 0.07207456844077999, "learning_rate": 2.0772271876679624e-06, "loss": 0.4659, "step": 5635 }, { "epoch": 2.796326175996028, "grad_norm": 0.07027972391444388, "learning_rate": 2.0756414572097635e-06, "loss": 0.4347, "step": 5636 }, { "epoch": 2.796822638699268, "grad_norm": 0.07229760993502869, "learning_rate": 2.074056173692881e-06, "loss": 0.4492, "step": 5637 }, { "epoch": 2.797319101402507, "grad_norm": 0.07510172712807424, "learning_rate": 2.072471337359599e-06, "loss": 0.467, "step": 5638 }, { "epoch": 2.7978155641057465, "grad_norm": 0.06966540788364817, "learning_rate": 2.070886948452133e-06, "loss": 0.4342, "step": 5639 }, { "epoch": 2.798312026808986, "grad_norm": 0.06957085396117565, "learning_rate": 2.069303007212633e-06, "loss": 0.4387, "step": 5640 }, { "epoch": 2.7988084895122256, "grad_norm": 0.07003352401136151, "learning_rate": 2.067719513883176e-06, "loss": 0.4369, "step": 5641 }, { "epoch": 2.799304952215465, "grad_norm": 0.07038614205771057, "learning_rate": 2.0661364687057772e-06, "loss": 0.4383, "step": 5642 }, { "epoch": 2.7998014149187043, "grad_norm": 0.07017671240403632, "learning_rate": 2.0645538719223767e-06, "loss": 0.4514, "step": 5643 }, { "epoch": 2.8002978776219436, "grad_norm": 0.0747377193940271, "learning_rate": 2.0629717237748526e-06, "loss": 0.4993, "step": 5644 }, { "epoch": 2.800794340325183, "grad_norm": 0.07037477944829895, "learning_rate": 2.0613900245050083e-06, "loss": 0.4285, "step": 5645 }, { "epoch": 2.8012908030284223, "grad_norm": 0.0710517615274243, "learning_rate": 2.0598087743545807e-06, "loss": 0.438, "step": 5646 }, { "epoch": 2.801787265731662, "grad_norm": 0.07301424456740002, "learning_rate": 2.058227973565241e-06, "loss": 0.4335, "step": 5647 }, { "epoch": 2.8022837284349014, "grad_norm": 0.07214885003404842, "learning_rate": 2.0566476223785857e-06, "loss": 0.4445, "step": 5648 }, { "epoch": 2.8027801911381407, "grad_norm": 0.0708736347174934, "learning_rate": 2.0550677210361502e-06, "loss": 0.4649, "step": 5649 }, { "epoch": 2.80327665384138, "grad_norm": 0.07880190272797606, "learning_rate": 2.0534882697793957e-06, "loss": 0.462, "step": 5650 }, { "epoch": 2.80377311654462, "grad_norm": 0.06952515190967841, "learning_rate": 2.0519092688497133e-06, "loss": 0.4126, "step": 5651 }, { "epoch": 2.804269579247859, "grad_norm": 0.07423937996352062, "learning_rate": 2.0503307184884313e-06, "loss": 0.4581, "step": 5652 }, { "epoch": 2.8047660419510985, "grad_norm": 0.07246555572886494, "learning_rate": 2.0487526189368058e-06, "loss": 0.4463, "step": 5653 }, { "epoch": 2.805262504654338, "grad_norm": 0.07132292131720794, "learning_rate": 2.0471749704360218e-06, "loss": 0.4423, "step": 5654 }, { "epoch": 2.805758967357577, "grad_norm": 0.07368385174282674, "learning_rate": 2.045597773227199e-06, "loss": 0.4742, "step": 5655 }, { "epoch": 2.8062554300608165, "grad_norm": 0.07227760152949314, "learning_rate": 2.0440210275513845e-06, "loss": 0.4569, "step": 5656 }, { "epoch": 2.8067518927640562, "grad_norm": 0.07272838553171634, "learning_rate": 2.042444733649562e-06, "loss": 0.4589, "step": 5657 }, { "epoch": 2.8072483554672956, "grad_norm": 0.07416783495308385, "learning_rate": 2.0408688917626402e-06, "loss": 0.4945, "step": 5658 }, { "epoch": 2.807744818170535, "grad_norm": 0.07326226133632513, "learning_rate": 2.039293502131463e-06, "loss": 0.4667, "step": 5659 }, { "epoch": 2.8082412808737742, "grad_norm": 0.06972396959337102, "learning_rate": 2.0377185649968036e-06, "loss": 0.467, "step": 5660 }, { "epoch": 2.808737743577014, "grad_norm": 0.07338530096769981, "learning_rate": 2.0361440805993627e-06, "loss": 0.4593, "step": 5661 }, { "epoch": 2.8092342062802533, "grad_norm": 0.07264115187947948, "learning_rate": 2.0345700491797786e-06, "loss": 0.4579, "step": 5662 }, { "epoch": 2.8097306689834927, "grad_norm": 0.07200847901130374, "learning_rate": 2.0329964709786144e-06, "loss": 0.473, "step": 5663 }, { "epoch": 2.810227131686732, "grad_norm": 0.07334938393925998, "learning_rate": 2.0314233462363687e-06, "loss": 0.4704, "step": 5664 }, { "epoch": 2.8107235943899713, "grad_norm": 0.06806632390588159, "learning_rate": 2.029850675193467e-06, "loss": 0.4235, "step": 5665 }, { "epoch": 2.8112200570932107, "grad_norm": 0.07441274494262383, "learning_rate": 2.0282784580902655e-06, "loss": 0.4518, "step": 5666 }, { "epoch": 2.8117165197964504, "grad_norm": 0.07318598053775449, "learning_rate": 2.026706695167055e-06, "loss": 0.4687, "step": 5667 }, { "epoch": 2.8122129824996898, "grad_norm": 0.07264327682527112, "learning_rate": 2.0251353866640515e-06, "loss": 0.4364, "step": 5668 }, { "epoch": 2.812709445202929, "grad_norm": 0.07139704661863898, "learning_rate": 2.0235645328214077e-06, "loss": 0.4434, "step": 5669 }, { "epoch": 2.8132059079061684, "grad_norm": 0.07453629708077483, "learning_rate": 2.0219941338792016e-06, "loss": 0.4633, "step": 5670 }, { "epoch": 2.813702370609408, "grad_norm": 0.07224548451159646, "learning_rate": 2.0204241900774434e-06, "loss": 0.4775, "step": 5671 }, { "epoch": 2.8141988333126475, "grad_norm": 0.07199251613284192, "learning_rate": 2.0188547016560738e-06, "loss": 0.4794, "step": 5672 }, { "epoch": 2.814695296015887, "grad_norm": 0.07081110183530961, "learning_rate": 2.017285668854962e-06, "loss": 0.4782, "step": 5673 }, { "epoch": 2.815191758719126, "grad_norm": 0.073617482185421, "learning_rate": 2.0157170919139137e-06, "loss": 0.462, "step": 5674 }, { "epoch": 2.8156882214223655, "grad_norm": 0.07112186816229596, "learning_rate": 2.0141489710726566e-06, "loss": 0.4551, "step": 5675 }, { "epoch": 2.816184684125605, "grad_norm": 0.07135177337572862, "learning_rate": 2.0125813065708568e-06, "loss": 0.4374, "step": 5676 }, { "epoch": 2.8166811468288446, "grad_norm": 0.07214896219901143, "learning_rate": 2.0110140986481043e-06, "loss": 0.4553, "step": 5677 }, { "epoch": 2.817177609532084, "grad_norm": 0.07258899423829851, "learning_rate": 2.00944734754392e-06, "loss": 0.4495, "step": 5678 }, { "epoch": 2.8176740722353233, "grad_norm": 0.07313560055809436, "learning_rate": 2.007881053497761e-06, "loss": 0.4505, "step": 5679 }, { "epoch": 2.8181705349385626, "grad_norm": 0.07223163063288415, "learning_rate": 2.006315216749006e-06, "loss": 0.4493, "step": 5680 }, { "epoch": 2.8186669976418024, "grad_norm": 0.07237609101988172, "learning_rate": 2.004749837536972e-06, "loss": 0.446, "step": 5681 }, { "epoch": 2.8191634603450417, "grad_norm": 0.07157138426508139, "learning_rate": 2.0031849161009003e-06, "loss": 0.4389, "step": 5682 }, { "epoch": 2.819659923048281, "grad_norm": 0.06975936253696055, "learning_rate": 2.001620452679962e-06, "loss": 0.4585, "step": 5683 }, { "epoch": 2.8201563857515204, "grad_norm": 0.07346619156749418, "learning_rate": 2.000056447513264e-06, "loss": 0.4805, "step": 5684 }, { "epoch": 2.8206528484547597, "grad_norm": 0.07014287296263343, "learning_rate": 1.998492900839836e-06, "loss": 0.4613, "step": 5685 }, { "epoch": 2.821149311157999, "grad_norm": 0.0706945389906741, "learning_rate": 1.9969298128986457e-06, "loss": 0.4497, "step": 5686 }, { "epoch": 2.821645773861239, "grad_norm": 0.07091160174834128, "learning_rate": 1.9953671839285794e-06, "loss": 0.4453, "step": 5687 }, { "epoch": 2.822142236564478, "grad_norm": 0.0726675916967775, "learning_rate": 1.993805014168465e-06, "loss": 0.5102, "step": 5688 }, { "epoch": 2.8226386992677175, "grad_norm": 0.0717704906392301, "learning_rate": 1.9922433038570544e-06, "loss": 0.4428, "step": 5689 }, { "epoch": 2.823135161970957, "grad_norm": 0.07092607030772675, "learning_rate": 1.9906820532330262e-06, "loss": 0.4959, "step": 5690 }, { "epoch": 2.8236316246741966, "grad_norm": 0.07007383114953025, "learning_rate": 1.9891212625349983e-06, "loss": 0.4435, "step": 5691 }, { "epoch": 2.824128087377436, "grad_norm": 0.0728017778027581, "learning_rate": 1.987560932001509e-06, "loss": 0.4779, "step": 5692 }, { "epoch": 2.8246245500806753, "grad_norm": 0.07280756722503544, "learning_rate": 1.9860010618710286e-06, "loss": 0.4541, "step": 5693 }, { "epoch": 2.8251210127839146, "grad_norm": 0.07308516502105063, "learning_rate": 1.9844416523819622e-06, "loss": 0.431, "step": 5694 }, { "epoch": 2.825617475487154, "grad_norm": 0.07145369691673427, "learning_rate": 1.982882703772636e-06, "loss": 0.4234, "step": 5695 }, { "epoch": 2.8261139381903932, "grad_norm": 0.07453301298395182, "learning_rate": 1.981324216281315e-06, "loss": 0.4628, "step": 5696 }, { "epoch": 2.826610400893633, "grad_norm": 0.07203730077033106, "learning_rate": 1.979766190146187e-06, "loss": 0.48, "step": 5697 }, { "epoch": 2.8271068635968724, "grad_norm": 0.07398813444664835, "learning_rate": 1.978208625605369e-06, "loss": 0.4809, "step": 5698 }, { "epoch": 2.8276033263001117, "grad_norm": 0.06817625019096056, "learning_rate": 1.9766515228969136e-06, "loss": 0.4405, "step": 5699 }, { "epoch": 2.828099789003351, "grad_norm": 0.07373964121100986, "learning_rate": 1.9750948822587955e-06, "loss": 0.4928, "step": 5700 }, { "epoch": 2.828596251706591, "grad_norm": 0.07050400870015364, "learning_rate": 1.973538703928926e-06, "loss": 0.4548, "step": 5701 }, { "epoch": 2.82909271440983, "grad_norm": 0.0746664124741609, "learning_rate": 1.9719829881451396e-06, "loss": 0.4307, "step": 5702 }, { "epoch": 2.8295891771130695, "grad_norm": 0.07130220874136327, "learning_rate": 1.9704277351452028e-06, "loss": 0.4581, "step": 5703 }, { "epoch": 2.830085639816309, "grad_norm": 0.07260529178342427, "learning_rate": 1.9688729451668116e-06, "loss": 0.4329, "step": 5704 }, { "epoch": 2.830582102519548, "grad_norm": 0.07492902420199271, "learning_rate": 1.9673186184475885e-06, "loss": 0.4686, "step": 5705 }, { "epoch": 2.8310785652227874, "grad_norm": 0.0714214093617678, "learning_rate": 1.965764755225091e-06, "loss": 0.4387, "step": 5706 }, { "epoch": 2.831575027926027, "grad_norm": 0.07067957272379431, "learning_rate": 1.964211355736798e-06, "loss": 0.4279, "step": 5707 }, { "epoch": 2.8320714906292666, "grad_norm": 0.07015666137280017, "learning_rate": 1.9626584202201267e-06, "loss": 0.4546, "step": 5708 }, { "epoch": 2.832567953332506, "grad_norm": 0.07454452830903732, "learning_rate": 1.961105948912415e-06, "loss": 0.4727, "step": 5709 }, { "epoch": 2.833064416035745, "grad_norm": 0.07120708361201979, "learning_rate": 1.9595539420509328e-06, "loss": 0.4443, "step": 5710 }, { "epoch": 2.833560878738985, "grad_norm": 0.07118528149786242, "learning_rate": 1.9580023998728823e-06, "loss": 0.46, "step": 5711 }, { "epoch": 2.8340573414422243, "grad_norm": 0.07069768707561298, "learning_rate": 1.956451322615389e-06, "loss": 0.4881, "step": 5712 }, { "epoch": 2.8345538041454637, "grad_norm": 0.07300010654314026, "learning_rate": 1.9549007105155127e-06, "loss": 0.4538, "step": 5713 }, { "epoch": 2.835050266848703, "grad_norm": 0.07363656559015999, "learning_rate": 1.9533505638102384e-06, "loss": 0.4644, "step": 5714 }, { "epoch": 2.8355467295519423, "grad_norm": 0.07306036415060045, "learning_rate": 1.951800882736479e-06, "loss": 0.4213, "step": 5715 }, { "epoch": 2.8360431922551816, "grad_norm": 0.07289854460238754, "learning_rate": 1.9502516675310836e-06, "loss": 0.4724, "step": 5716 }, { "epoch": 2.836539654958421, "grad_norm": 0.07420132974049734, "learning_rate": 1.9487029184308186e-06, "loss": 0.4756, "step": 5717 }, { "epoch": 2.8370361176616608, "grad_norm": 0.07423329737411025, "learning_rate": 1.947154635672393e-06, "loss": 0.4725, "step": 5718 }, { "epoch": 2.8375325803649, "grad_norm": 0.07415959326109783, "learning_rate": 1.945606819492429e-06, "loss": 0.4674, "step": 5719 }, { "epoch": 2.8380290430681394, "grad_norm": 0.07260651034602289, "learning_rate": 1.9440594701274906e-06, "loss": 0.4796, "step": 5720 }, { "epoch": 2.838525505771379, "grad_norm": 0.07364464260727976, "learning_rate": 1.9425125878140644e-06, "loss": 0.4597, "step": 5721 }, { "epoch": 2.8390219684746185, "grad_norm": 0.07212719334038142, "learning_rate": 1.9409661727885638e-06, "loss": 0.4396, "step": 5722 }, { "epoch": 2.839518431177858, "grad_norm": 0.07636798983029885, "learning_rate": 1.9394202252873377e-06, "loss": 0.4647, "step": 5723 }, { "epoch": 2.840014893881097, "grad_norm": 0.0703000889908455, "learning_rate": 1.9378747455466563e-06, "loss": 0.4367, "step": 5724 }, { "epoch": 2.8405113565843365, "grad_norm": 0.07379106009795416, "learning_rate": 1.9363297338027236e-06, "loss": 0.4513, "step": 5725 }, { "epoch": 2.841007819287576, "grad_norm": 0.07225481677416651, "learning_rate": 1.9347851902916694e-06, "loss": 0.4421, "step": 5726 }, { "epoch": 2.841504281990815, "grad_norm": 0.07090370027965347, "learning_rate": 1.93324111524955e-06, "loss": 0.4678, "step": 5727 }, { "epoch": 2.842000744694055, "grad_norm": 0.07410037463754408, "learning_rate": 1.9316975089123556e-06, "loss": 0.462, "step": 5728 }, { "epoch": 2.8424972073972943, "grad_norm": 0.0731944774344661, "learning_rate": 1.9301543715160014e-06, "loss": 0.4861, "step": 5729 }, { "epoch": 2.8429936701005336, "grad_norm": 0.07017095928272804, "learning_rate": 1.928611703296328e-06, "loss": 0.4635, "step": 5730 }, { "epoch": 2.8434901328037734, "grad_norm": 0.0730360434590913, "learning_rate": 1.927069504489112e-06, "loss": 0.503, "step": 5731 }, { "epoch": 2.8439865955070127, "grad_norm": 0.06964490329233675, "learning_rate": 1.925527775330049e-06, "loss": 0.4186, "step": 5732 }, { "epoch": 2.844483058210252, "grad_norm": 0.07140910322648998, "learning_rate": 1.923986516054772e-06, "loss": 0.4371, "step": 5733 }, { "epoch": 2.8449795209134914, "grad_norm": 0.07106555542516287, "learning_rate": 1.9224457268988367e-06, "loss": 0.4386, "step": 5734 }, { "epoch": 2.8454759836167307, "grad_norm": 0.07498041184595833, "learning_rate": 1.9209054080977262e-06, "loss": 0.449, "step": 5735 }, { "epoch": 2.84597244631997, "grad_norm": 0.0730042564864519, "learning_rate": 1.9193655598868557e-06, "loss": 0.4714, "step": 5736 }, { "epoch": 2.8464689090232094, "grad_norm": 0.07411614717666876, "learning_rate": 1.9178261825015625e-06, "loss": 0.4519, "step": 5737 }, { "epoch": 2.846965371726449, "grad_norm": 0.06889493988420331, "learning_rate": 1.9162872761771207e-06, "loss": 0.4145, "step": 5738 }, { "epoch": 2.8474618344296885, "grad_norm": 0.07156478169309566, "learning_rate": 1.9147488411487226e-06, "loss": 0.465, "step": 5739 }, { "epoch": 2.847958297132928, "grad_norm": 0.07360117429747257, "learning_rate": 1.9132108776514985e-06, "loss": 0.4448, "step": 5740 }, { "epoch": 2.8484547598361676, "grad_norm": 0.07013544132089725, "learning_rate": 1.9116733859204984e-06, "loss": 0.4219, "step": 5741 }, { "epoch": 2.848951222539407, "grad_norm": 0.07207103402288535, "learning_rate": 1.910136366190702e-06, "loss": 0.4669, "step": 5742 }, { "epoch": 2.8494476852426462, "grad_norm": 0.07340381981525523, "learning_rate": 1.9085998186970215e-06, "loss": 0.4436, "step": 5743 }, { "epoch": 2.8499441479458856, "grad_norm": 0.07207559538977827, "learning_rate": 1.9070637436742905e-06, "loss": 0.471, "step": 5744 }, { "epoch": 2.850440610649125, "grad_norm": 0.07266945487920531, "learning_rate": 1.9055281413572763e-06, "loss": 0.4404, "step": 5745 }, { "epoch": 2.8509370733523642, "grad_norm": 0.07519234782264786, "learning_rate": 1.9039930119806698e-06, "loss": 0.4387, "step": 5746 }, { "epoch": 2.8514335360556036, "grad_norm": 0.07715864442431582, "learning_rate": 1.9024583557790889e-06, "loss": 0.4538, "step": 5747 }, { "epoch": 2.8519299987588433, "grad_norm": 0.07373136234905418, "learning_rate": 1.9009241729870842e-06, "loss": 0.4618, "step": 5748 }, { "epoch": 2.8524264614620827, "grad_norm": 0.07179951947194625, "learning_rate": 1.8993904638391286e-06, "loss": 0.4335, "step": 5749 }, { "epoch": 2.852922924165322, "grad_norm": 0.07371238617587633, "learning_rate": 1.8978572285696296e-06, "loss": 0.4597, "step": 5750 }, { "epoch": 2.8534193868685613, "grad_norm": 0.07073156813998753, "learning_rate": 1.8963244674129104e-06, "loss": 0.4488, "step": 5751 }, { "epoch": 2.853915849571801, "grad_norm": 0.07732984470628916, "learning_rate": 1.894792180603235e-06, "loss": 0.4575, "step": 5752 }, { "epoch": 2.8544123122750404, "grad_norm": 0.07187498679301367, "learning_rate": 1.8932603683747858e-06, "loss": 0.4303, "step": 5753 }, { "epoch": 2.8549087749782798, "grad_norm": 0.07386629483634839, "learning_rate": 1.8917290309616754e-06, "loss": 0.4666, "step": 5754 }, { "epoch": 2.855405237681519, "grad_norm": 0.0722961968794769, "learning_rate": 1.8901981685979464e-06, "loss": 0.4656, "step": 5755 }, { "epoch": 2.8559017003847584, "grad_norm": 0.07270464651085981, "learning_rate": 1.8886677815175642e-06, "loss": 0.4698, "step": 5756 }, { "epoch": 2.8563981630879978, "grad_norm": 0.07533072554757832, "learning_rate": 1.887137869954427e-06, "loss": 0.4578, "step": 5757 }, { "epoch": 2.8568946257912375, "grad_norm": 0.06979861982970094, "learning_rate": 1.8856084341423552e-06, "loss": 0.503, "step": 5758 }, { "epoch": 2.857391088494477, "grad_norm": 0.07449849243741927, "learning_rate": 1.884079474315097e-06, "loss": 0.4635, "step": 5759 }, { "epoch": 2.857887551197716, "grad_norm": 0.07483026512075892, "learning_rate": 1.8825509907063328e-06, "loss": 0.4863, "step": 5760 }, { "epoch": 2.8583840139009555, "grad_norm": 0.07179229556946186, "learning_rate": 1.8810229835496635e-06, "loss": 0.4537, "step": 5761 }, { "epoch": 2.8588804766041953, "grad_norm": 0.07270191976343439, "learning_rate": 1.8794954530786242e-06, "loss": 0.4632, "step": 5762 }, { "epoch": 2.8593769393074346, "grad_norm": 0.07087539190876527, "learning_rate": 1.8779683995266712e-06, "loss": 0.4599, "step": 5763 }, { "epoch": 2.859873402010674, "grad_norm": 0.07380883554497368, "learning_rate": 1.8764418231271885e-06, "loss": 0.4351, "step": 5764 }, { "epoch": 2.8603698647139133, "grad_norm": 0.07207762992746394, "learning_rate": 1.8749157241134924e-06, "loss": 0.4574, "step": 5765 }, { "epoch": 2.8608663274171526, "grad_norm": 0.07121802723137254, "learning_rate": 1.8733901027188212e-06, "loss": 0.4715, "step": 5766 }, { "epoch": 2.861362790120392, "grad_norm": 0.07084904663872903, "learning_rate": 1.8718649591763415e-06, "loss": 0.4782, "step": 5767 }, { "epoch": 2.8618592528236317, "grad_norm": 0.07167843014443905, "learning_rate": 1.8703402937191467e-06, "loss": 0.4913, "step": 5768 }, { "epoch": 2.862355715526871, "grad_norm": 0.07151970972114965, "learning_rate": 1.8688161065802563e-06, "loss": 0.4573, "step": 5769 }, { "epoch": 2.8628521782301104, "grad_norm": 0.07035903364506926, "learning_rate": 1.8672923979926205e-06, "loss": 0.4413, "step": 5770 }, { "epoch": 2.8633486409333497, "grad_norm": 0.07578753526967076, "learning_rate": 1.865769168189111e-06, "loss": 0.4756, "step": 5771 }, { "epoch": 2.8638451036365895, "grad_norm": 0.07075700344737111, "learning_rate": 1.8642464174025327e-06, "loss": 0.4449, "step": 5772 }, { "epoch": 2.864341566339829, "grad_norm": 0.07203092781562426, "learning_rate": 1.862724145865611e-06, "loss": 0.4492, "step": 5773 }, { "epoch": 2.864838029043068, "grad_norm": 0.06914479180823499, "learning_rate": 1.8612023538109998e-06, "loss": 0.4561, "step": 5774 }, { "epoch": 2.8653344917463075, "grad_norm": 0.07273419996837144, "learning_rate": 1.8596810414712835e-06, "loss": 0.4722, "step": 5775 }, { "epoch": 2.865830954449547, "grad_norm": 0.07111294951471243, "learning_rate": 1.8581602090789674e-06, "loss": 0.4595, "step": 5776 }, { "epoch": 2.866327417152786, "grad_norm": 0.07008046197731649, "learning_rate": 1.8566398568664896e-06, "loss": 0.4373, "step": 5777 }, { "epoch": 2.866823879856026, "grad_norm": 0.07270960522246615, "learning_rate": 1.8551199850662094e-06, "loss": 0.4753, "step": 5778 }, { "epoch": 2.8673203425592653, "grad_norm": 0.07023865205596509, "learning_rate": 1.8536005939104135e-06, "loss": 0.4712, "step": 5779 }, { "epoch": 2.8678168052625046, "grad_norm": 0.07396879722713033, "learning_rate": 1.8520816836313195e-06, "loss": 0.4589, "step": 5780 }, { "epoch": 2.868313267965744, "grad_norm": 0.07038011700790384, "learning_rate": 1.8505632544610657e-06, "loss": 0.4226, "step": 5781 }, { "epoch": 2.8688097306689837, "grad_norm": 0.06978356890739894, "learning_rate": 1.8490453066317248e-06, "loss": 0.4428, "step": 5782 }, { "epoch": 2.869306193372223, "grad_norm": 0.07286599599417821, "learning_rate": 1.8475278403752833e-06, "loss": 0.4756, "step": 5783 }, { "epoch": 2.8698026560754624, "grad_norm": 0.07421525777471157, "learning_rate": 1.8460108559236673e-06, "loss": 0.4417, "step": 5784 }, { "epoch": 2.8702991187787017, "grad_norm": 0.07028522021448058, "learning_rate": 1.8444943535087217e-06, "loss": 0.4358, "step": 5785 }, { "epoch": 2.870795581481941, "grad_norm": 0.0764537827742573, "learning_rate": 1.8429783333622176e-06, "loss": 0.4842, "step": 5786 }, { "epoch": 2.8712920441851804, "grad_norm": 0.07188922291547878, "learning_rate": 1.8414627957158577e-06, "loss": 0.4432, "step": 5787 }, { "epoch": 2.87178850688842, "grad_norm": 0.07264028901682451, "learning_rate": 1.8399477408012645e-06, "loss": 0.4695, "step": 5788 }, { "epoch": 2.8722849695916595, "grad_norm": 0.07189418837208039, "learning_rate": 1.8384331688499934e-06, "loss": 0.4258, "step": 5789 }, { "epoch": 2.872781432294899, "grad_norm": 0.07198764033425765, "learning_rate": 1.836919080093521e-06, "loss": 0.4401, "step": 5790 }, { "epoch": 2.873277894998138, "grad_norm": 0.07460868295659728, "learning_rate": 1.8354054747632489e-06, "loss": 0.464, "step": 5791 }, { "epoch": 2.873774357701378, "grad_norm": 0.07080197418491792, "learning_rate": 1.8338923530905112e-06, "loss": 0.4311, "step": 5792 }, { "epoch": 2.8742708204046172, "grad_norm": 0.07043644767035809, "learning_rate": 1.832379715306561e-06, "loss": 0.4416, "step": 5793 }, { "epoch": 2.8747672831078566, "grad_norm": 0.07550125283727059, "learning_rate": 1.8308675616425843e-06, "loss": 0.4471, "step": 5794 }, { "epoch": 2.875263745811096, "grad_norm": 0.07238050144322952, "learning_rate": 1.8293558923296873e-06, "loss": 0.4458, "step": 5795 }, { "epoch": 2.8757602085143352, "grad_norm": 0.07071205447465201, "learning_rate": 1.8278447075989037e-06, "loss": 0.4369, "step": 5796 }, { "epoch": 2.8762566712175746, "grad_norm": 0.07135347695106668, "learning_rate": 1.8263340076811958e-06, "loss": 0.4327, "step": 5797 }, { "epoch": 2.8767531339208143, "grad_norm": 0.07025016632242698, "learning_rate": 1.8248237928074492e-06, "loss": 0.4237, "step": 5798 }, { "epoch": 2.8772495966240537, "grad_norm": 0.07701605183732108, "learning_rate": 1.823314063208476e-06, "loss": 0.4931, "step": 5799 }, { "epoch": 2.877746059327293, "grad_norm": 0.07220406200650448, "learning_rate": 1.8218048191150123e-06, "loss": 0.4396, "step": 5800 }, { "epoch": 2.8782425220305323, "grad_norm": 0.07287019392934202, "learning_rate": 1.8202960607577246e-06, "loss": 0.4656, "step": 5801 }, { "epoch": 2.878738984733772, "grad_norm": 0.07524685673051981, "learning_rate": 1.8187877883672024e-06, "loss": 0.5075, "step": 5802 }, { "epoch": 2.8792354474370114, "grad_norm": 0.07106511484488788, "learning_rate": 1.8172800021739573e-06, "loss": 0.4338, "step": 5803 }, { "epoch": 2.8797319101402508, "grad_norm": 0.07288049698231516, "learning_rate": 1.8157727024084348e-06, "loss": 0.455, "step": 5804 }, { "epoch": 2.88022837284349, "grad_norm": 0.07187959245629297, "learning_rate": 1.8142658893009995e-06, "loss": 0.4552, "step": 5805 }, { "epoch": 2.8807248355467294, "grad_norm": 0.07247759393249832, "learning_rate": 1.8127595630819422e-06, "loss": 0.4909, "step": 5806 }, { "epoch": 2.8812212982499688, "grad_norm": 0.0721473018210314, "learning_rate": 1.8112537239814836e-06, "loss": 0.4628, "step": 5807 }, { "epoch": 2.8817177609532085, "grad_norm": 0.0719175610043289, "learning_rate": 1.8097483722297644e-06, "loss": 0.4486, "step": 5808 }, { "epoch": 2.882214223656448, "grad_norm": 0.07175367367263759, "learning_rate": 1.8082435080568556e-06, "loss": 0.426, "step": 5809 }, { "epoch": 2.882710686359687, "grad_norm": 0.0746276008814608, "learning_rate": 1.8067391316927514e-06, "loss": 0.4519, "step": 5810 }, { "epoch": 2.8832071490629265, "grad_norm": 0.06967624512108889, "learning_rate": 1.8052352433673687e-06, "loss": 0.463, "step": 5811 }, { "epoch": 2.8837036117661663, "grad_norm": 0.07322094442708088, "learning_rate": 1.8037318433105566e-06, "loss": 0.4662, "step": 5812 }, { "epoch": 2.8842000744694056, "grad_norm": 0.07187306447088909, "learning_rate": 1.8022289317520826e-06, "loss": 0.4726, "step": 5813 }, { "epoch": 2.884696537172645, "grad_norm": 0.07162376268349631, "learning_rate": 1.800726508921647e-06, "loss": 0.4581, "step": 5814 }, { "epoch": 2.8851929998758843, "grad_norm": 0.0727599031863759, "learning_rate": 1.799224575048865e-06, "loss": 0.4741, "step": 5815 }, { "epoch": 2.8856894625791236, "grad_norm": 0.06846531021709305, "learning_rate": 1.797723130363288e-06, "loss": 0.4339, "step": 5816 }, { "epoch": 2.886185925282363, "grad_norm": 0.0761370981096178, "learning_rate": 1.7962221750943859e-06, "loss": 0.459, "step": 5817 }, { "epoch": 2.8866823879856027, "grad_norm": 0.07505223732737246, "learning_rate": 1.7947217094715536e-06, "loss": 0.4905, "step": 5818 }, { "epoch": 2.887178850688842, "grad_norm": 0.07104653840701323, "learning_rate": 1.793221733724117e-06, "loss": 0.4611, "step": 5819 }, { "epoch": 2.8876753133920814, "grad_norm": 0.07272648271449884, "learning_rate": 1.7917222480813202e-06, "loss": 0.4638, "step": 5820 }, { "epoch": 2.8881717760953207, "grad_norm": 0.07331371305136444, "learning_rate": 1.7902232527723385e-06, "loss": 0.446, "step": 5821 }, { "epoch": 2.8886682387985605, "grad_norm": 0.07126558933581785, "learning_rate": 1.7887247480262677e-06, "loss": 0.4548, "step": 5822 }, { "epoch": 2.8891647015018, "grad_norm": 0.07465013992994197, "learning_rate": 1.7872267340721289e-06, "loss": 0.4363, "step": 5823 }, { "epoch": 2.889661164205039, "grad_norm": 0.0740872483391922, "learning_rate": 1.7857292111388724e-06, "loss": 0.4726, "step": 5824 }, { "epoch": 2.8901576269082785, "grad_norm": 0.07407082014718436, "learning_rate": 1.7842321794553674e-06, "loss": 0.4612, "step": 5825 }, { "epoch": 2.890654089611518, "grad_norm": 0.07602916638072348, "learning_rate": 1.7827356392504142e-06, "loss": 0.4958, "step": 5826 }, { "epoch": 2.891150552314757, "grad_norm": 0.06968809240417105, "learning_rate": 1.781239590752734e-06, "loss": 0.4284, "step": 5827 }, { "epoch": 2.891647015017997, "grad_norm": 0.07346959924100413, "learning_rate": 1.7797440341909716e-06, "loss": 0.4841, "step": 5828 }, { "epoch": 2.8921434777212363, "grad_norm": 0.08088666560812105, "learning_rate": 1.7782489697937027e-06, "loss": 0.5473, "step": 5829 }, { "epoch": 2.8926399404244756, "grad_norm": 0.07151855750187917, "learning_rate": 1.7767543977894198e-06, "loss": 0.4897, "step": 5830 }, { "epoch": 2.893136403127715, "grad_norm": 0.07415823417761293, "learning_rate": 1.7752603184065498e-06, "loss": 0.4661, "step": 5831 }, { "epoch": 2.8936328658309547, "grad_norm": 0.06945222643401203, "learning_rate": 1.7737667318734326e-06, "loss": 0.4221, "step": 5832 }, { "epoch": 2.894129328534194, "grad_norm": 0.07058770550666034, "learning_rate": 1.7722736384183426e-06, "loss": 0.4423, "step": 5833 }, { "epoch": 2.8946257912374334, "grad_norm": 0.07479340645321704, "learning_rate": 1.7707810382694745e-06, "loss": 0.4731, "step": 5834 }, { "epoch": 2.8951222539406727, "grad_norm": 0.07147594140636955, "learning_rate": 1.7692889316549465e-06, "loss": 0.4379, "step": 5835 }, { "epoch": 2.895618716643912, "grad_norm": 0.07253687881245385, "learning_rate": 1.7677973188028069e-06, "loss": 0.4715, "step": 5836 }, { "epoch": 2.8961151793471513, "grad_norm": 0.07241853848143014, "learning_rate": 1.7663061999410209e-06, "loss": 0.4785, "step": 5837 }, { "epoch": 2.896611642050391, "grad_norm": 0.07371362598436333, "learning_rate": 1.7648155752974848e-06, "loss": 0.4683, "step": 5838 }, { "epoch": 2.8971081047536305, "grad_norm": 0.07216697012999135, "learning_rate": 1.7633254451000164e-06, "loss": 0.447, "step": 5839 }, { "epoch": 2.89760456745687, "grad_norm": 0.07059782522077165, "learning_rate": 1.761835809576356e-06, "loss": 0.4441, "step": 5840 }, { "epoch": 2.898101030160109, "grad_norm": 0.07011250857472014, "learning_rate": 1.7603466689541737e-06, "loss": 0.435, "step": 5841 }, { "epoch": 2.898597492863349, "grad_norm": 0.07215203526460655, "learning_rate": 1.7588580234610592e-06, "loss": 0.4473, "step": 5842 }, { "epoch": 2.899093955566588, "grad_norm": 0.07211326609340975, "learning_rate": 1.7573698733245258e-06, "loss": 0.4371, "step": 5843 }, { "epoch": 2.8995904182698276, "grad_norm": 0.07631968305239617, "learning_rate": 1.755882218772018e-06, "loss": 0.4498, "step": 5844 }, { "epoch": 2.900086880973067, "grad_norm": 0.07312916715153597, "learning_rate": 1.7543950600308957e-06, "loss": 0.4595, "step": 5845 }, { "epoch": 2.900583343676306, "grad_norm": 0.07114981311857127, "learning_rate": 1.7529083973284506e-06, "loss": 0.4413, "step": 5846 }, { "epoch": 2.9010798063795455, "grad_norm": 0.07136003851732914, "learning_rate": 1.7514222308918944e-06, "loss": 0.4493, "step": 5847 }, { "epoch": 2.9015762690827853, "grad_norm": 0.07570826101502601, "learning_rate": 1.7499365609483627e-06, "loss": 0.4815, "step": 5848 }, { "epoch": 2.9020727317860247, "grad_norm": 0.07242856991285997, "learning_rate": 1.748451387724917e-06, "loss": 0.4437, "step": 5849 }, { "epoch": 2.902569194489264, "grad_norm": 0.07259034528913118, "learning_rate": 1.74696671144854e-06, "loss": 0.4513, "step": 5850 }, { "epoch": 2.9030656571925033, "grad_norm": 0.07113950495000526, "learning_rate": 1.745482532346145e-06, "loss": 0.4478, "step": 5851 }, { "epoch": 2.903562119895743, "grad_norm": 0.0720227909235229, "learning_rate": 1.743998850644561e-06, "loss": 0.4631, "step": 5852 }, { "epoch": 2.9040585825989824, "grad_norm": 0.07017499253087572, "learning_rate": 1.7425156665705478e-06, "loss": 0.4327, "step": 5853 }, { "epoch": 2.9045550453022217, "grad_norm": 0.07243409627224551, "learning_rate": 1.741032980350786e-06, "loss": 0.4664, "step": 5854 }, { "epoch": 2.905051508005461, "grad_norm": 0.06974170123821136, "learning_rate": 1.739550792211877e-06, "loss": 0.4444, "step": 5855 }, { "epoch": 2.9055479707087004, "grad_norm": 0.07346891443058372, "learning_rate": 1.7380691023803543e-06, "loss": 0.4645, "step": 5856 }, { "epoch": 2.9060444334119397, "grad_norm": 0.06952419588660884, "learning_rate": 1.7365879110826667e-06, "loss": 0.4535, "step": 5857 }, { "epoch": 2.906540896115179, "grad_norm": 0.0722978135792198, "learning_rate": 1.7351072185451934e-06, "loss": 0.4418, "step": 5858 }, { "epoch": 2.907037358818419, "grad_norm": 0.07169213101467879, "learning_rate": 1.7336270249942333e-06, "loss": 0.4442, "step": 5859 }, { "epoch": 2.907533821521658, "grad_norm": 0.07027625231493947, "learning_rate": 1.7321473306560082e-06, "loss": 0.4422, "step": 5860 }, { "epoch": 2.9080302842248975, "grad_norm": 0.0724082992248681, "learning_rate": 1.7306681357566695e-06, "loss": 0.4418, "step": 5861 }, { "epoch": 2.9085267469281373, "grad_norm": 0.07361412412474029, "learning_rate": 1.7291894405222847e-06, "loss": 0.4546, "step": 5862 }, { "epoch": 2.9090232096313766, "grad_norm": 0.07628467050428009, "learning_rate": 1.7277112451788542e-06, "loss": 0.4687, "step": 5863 }, { "epoch": 2.909519672334616, "grad_norm": 0.07302537711850288, "learning_rate": 1.7262335499522886e-06, "loss": 0.4471, "step": 5864 }, { "epoch": 2.9100161350378553, "grad_norm": 0.07516702388732484, "learning_rate": 1.7247563550684366e-06, "loss": 0.4561, "step": 5865 }, { "epoch": 2.9105125977410946, "grad_norm": 0.07184057747732295, "learning_rate": 1.7232796607530606e-06, "loss": 0.4588, "step": 5866 }, { "epoch": 2.911009060444334, "grad_norm": 0.06890078743814726, "learning_rate": 1.7218034672318485e-06, "loss": 0.4434, "step": 5867 }, { "epoch": 2.9115055231475733, "grad_norm": 0.07510715431067853, "learning_rate": 1.7203277747304164e-06, "loss": 0.4912, "step": 5868 }, { "epoch": 2.912001985850813, "grad_norm": 0.07157713937648363, "learning_rate": 1.718852583474297e-06, "loss": 0.4378, "step": 5869 }, { "epoch": 2.9124984485540524, "grad_norm": 0.06864218506193251, "learning_rate": 1.7173778936889523e-06, "loss": 0.4438, "step": 5870 }, { "epoch": 2.9129949112572917, "grad_norm": 0.07437592986907665, "learning_rate": 1.715903705599764e-06, "loss": 0.4704, "step": 5871 }, { "epoch": 2.9134913739605315, "grad_norm": 0.0733699303510351, "learning_rate": 1.7144300194320357e-06, "loss": 0.4618, "step": 5872 }, { "epoch": 2.913987836663771, "grad_norm": 0.0718682272644226, "learning_rate": 1.712956835411001e-06, "loss": 0.4473, "step": 5873 }, { "epoch": 2.91448429936701, "grad_norm": 0.07602899319234606, "learning_rate": 1.7114841537618081e-06, "loss": 0.4771, "step": 5874 }, { "epoch": 2.9149807620702495, "grad_norm": 0.07309674636622461, "learning_rate": 1.7100119747095372e-06, "loss": 0.4348, "step": 5875 }, { "epoch": 2.915477224773489, "grad_norm": 0.07057419622476488, "learning_rate": 1.7085402984791848e-06, "loss": 0.4342, "step": 5876 }, { "epoch": 2.915973687476728, "grad_norm": 0.07101692739372809, "learning_rate": 1.707069125295671e-06, "loss": 0.4413, "step": 5877 }, { "epoch": 2.9164701501799675, "grad_norm": 0.06989251553843452, "learning_rate": 1.7055984553838455e-06, "loss": 0.4089, "step": 5878 }, { "epoch": 2.9169666128832072, "grad_norm": 0.06969315355799537, "learning_rate": 1.7041282889684746e-06, "loss": 0.4518, "step": 5879 }, { "epoch": 2.9174630755864466, "grad_norm": 0.07050586361392111, "learning_rate": 1.702658626274249e-06, "loss": 0.4643, "step": 5880 }, { "epoch": 2.917959538289686, "grad_norm": 0.0703534380162176, "learning_rate": 1.701189467525784e-06, "loss": 0.4701, "step": 5881 }, { "epoch": 2.9184560009929257, "grad_norm": 0.07285478802365085, "learning_rate": 1.6997208129476144e-06, "loss": 0.4818, "step": 5882 }, { "epoch": 2.918952463696165, "grad_norm": 0.0713570994512701, "learning_rate": 1.6982526627642043e-06, "loss": 0.4396, "step": 5883 }, { "epoch": 2.9194489263994043, "grad_norm": 0.07084619268097336, "learning_rate": 1.6967850171999334e-06, "loss": 0.4483, "step": 5884 }, { "epoch": 2.9199453891026437, "grad_norm": 0.07312699322762967, "learning_rate": 1.6953178764791116e-06, "loss": 0.5011, "step": 5885 }, { "epoch": 2.920441851805883, "grad_norm": 0.06913757556808983, "learning_rate": 1.6938512408259655e-06, "loss": 0.4439, "step": 5886 }, { "epoch": 2.9209383145091223, "grad_norm": 0.07112579876663329, "learning_rate": 1.6923851104646461e-06, "loss": 0.4515, "step": 5887 }, { "epoch": 2.9214347772123617, "grad_norm": 0.07114918399378936, "learning_rate": 1.69091948561923e-06, "loss": 0.4366, "step": 5888 }, { "epoch": 2.9219312399156014, "grad_norm": 0.07312432271511711, "learning_rate": 1.689454366513712e-06, "loss": 0.4458, "step": 5889 }, { "epoch": 2.9224277026188408, "grad_norm": 0.07083664164054572, "learning_rate": 1.6879897533720151e-06, "loss": 0.4653, "step": 5890 }, { "epoch": 2.92292416532208, "grad_norm": 0.07591038722612382, "learning_rate": 1.6865256464179808e-06, "loss": 0.468, "step": 5891 }, { "epoch": 2.9234206280253194, "grad_norm": 0.07074166751616634, "learning_rate": 1.685062045875372e-06, "loss": 0.4627, "step": 5892 }, { "epoch": 2.923917090728559, "grad_norm": 0.07313498112061012, "learning_rate": 1.6835989519678802e-06, "loss": 0.4496, "step": 5893 }, { "epoch": 2.9244135534317985, "grad_norm": 0.0703811251728174, "learning_rate": 1.682136364919112e-06, "loss": 0.4339, "step": 5894 }, { "epoch": 2.924910016135038, "grad_norm": 0.07137372235534829, "learning_rate": 1.6806742849526064e-06, "loss": 0.435, "step": 5895 }, { "epoch": 2.925406478838277, "grad_norm": 0.07235141275487113, "learning_rate": 1.6792127122918116e-06, "loss": 0.5001, "step": 5896 }, { "epoch": 2.9259029415415165, "grad_norm": 0.07145659685400398, "learning_rate": 1.6777516471601103e-06, "loss": 0.4395, "step": 5897 }, { "epoch": 2.926399404244756, "grad_norm": 0.0739168525849856, "learning_rate": 1.6762910897808017e-06, "loss": 0.4632, "step": 5898 }, { "epoch": 2.9268958669479956, "grad_norm": 0.07021567224806549, "learning_rate": 1.6748310403771067e-06, "loss": 0.4623, "step": 5899 }, { "epoch": 2.927392329651235, "grad_norm": 0.07010595995431612, "learning_rate": 1.6733714991721738e-06, "loss": 0.4528, "step": 5900 }, { "epoch": 2.9278887923544743, "grad_norm": 0.07479842189450096, "learning_rate": 1.6719124663890674e-06, "loss": 0.4643, "step": 5901 }, { "epoch": 2.9283852550577136, "grad_norm": 0.07251917648131366, "learning_rate": 1.6704539422507803e-06, "loss": 0.4624, "step": 5902 }, { "epoch": 2.9288817177609534, "grad_norm": 0.07173529330807134, "learning_rate": 1.668995926980223e-06, "loss": 0.4721, "step": 5903 }, { "epoch": 2.9293781804641927, "grad_norm": 0.07271552046391146, "learning_rate": 1.6675384208002275e-06, "loss": 0.473, "step": 5904 }, { "epoch": 2.929874643167432, "grad_norm": 0.07226439840476027, "learning_rate": 1.666081423933555e-06, "loss": 0.4586, "step": 5905 }, { "epoch": 2.9303711058706714, "grad_norm": 0.0714342130930368, "learning_rate": 1.6646249366028788e-06, "loss": 0.4581, "step": 5906 }, { "epoch": 2.9308675685739107, "grad_norm": 0.06945596117253568, "learning_rate": 1.6631689590308049e-06, "loss": 0.4604, "step": 5907 }, { "epoch": 2.93136403127715, "grad_norm": 0.07326856612947807, "learning_rate": 1.661713491439853e-06, "loss": 0.4557, "step": 5908 }, { "epoch": 2.93186049398039, "grad_norm": 0.07050725043012718, "learning_rate": 1.6602585340524669e-06, "loss": 0.4416, "step": 5909 }, { "epoch": 2.932356956683629, "grad_norm": 0.07050986823414582, "learning_rate": 1.658804087091017e-06, "loss": 0.4476, "step": 5910 }, { "epoch": 2.9328534193868685, "grad_norm": 0.07159943014698252, "learning_rate": 1.6573501507777906e-06, "loss": 0.4698, "step": 5911 }, { "epoch": 2.933349882090108, "grad_norm": 0.07083310107065469, "learning_rate": 1.6558967253349983e-06, "loss": 0.464, "step": 5912 }, { "epoch": 2.9338463447933476, "grad_norm": 0.06952388035745817, "learning_rate": 1.654443810984771e-06, "loss": 0.4147, "step": 5913 }, { "epoch": 2.934342807496587, "grad_norm": 0.07059058804372653, "learning_rate": 1.652991407949167e-06, "loss": 0.4576, "step": 5914 }, { "epoch": 2.9348392701998263, "grad_norm": 0.07134194094863748, "learning_rate": 1.6515395164501613e-06, "loss": 0.4631, "step": 5915 }, { "epoch": 2.9353357329030656, "grad_norm": 0.07335501990274129, "learning_rate": 1.6500881367096506e-06, "loss": 0.4772, "step": 5916 }, { "epoch": 2.935832195606305, "grad_norm": 0.07235343050752824, "learning_rate": 1.6486372689494573e-06, "loss": 0.4827, "step": 5917 }, { "epoch": 2.9363286583095443, "grad_norm": 0.07111464655511497, "learning_rate": 1.6471869133913232e-06, "loss": 0.4351, "step": 5918 }, { "epoch": 2.936825121012784, "grad_norm": 0.07387778721142053, "learning_rate": 1.6457370702569093e-06, "loss": 0.4706, "step": 5919 }, { "epoch": 2.9373215837160234, "grad_norm": 0.07393542980680054, "learning_rate": 1.6442877397678042e-06, "loss": 0.4569, "step": 5920 }, { "epoch": 2.9378180464192627, "grad_norm": 0.0720364009150225, "learning_rate": 1.6428389221455115e-06, "loss": 0.454, "step": 5921 }, { "epoch": 2.938314509122502, "grad_norm": 0.07412971708528018, "learning_rate": 1.6413906176114636e-06, "loss": 0.4657, "step": 5922 }, { "epoch": 2.938810971825742, "grad_norm": 0.07287513599920468, "learning_rate": 1.6399428263870082e-06, "loss": 0.4298, "step": 5923 }, { "epoch": 2.939307434528981, "grad_norm": 0.07213379859066761, "learning_rate": 1.6384955486934157e-06, "loss": 0.4532, "step": 5924 }, { "epoch": 2.9398038972322205, "grad_norm": 0.07156534659604881, "learning_rate": 1.6370487847518829e-06, "loss": 0.4687, "step": 5925 }, { "epoch": 2.94030035993546, "grad_norm": 0.07205669107853233, "learning_rate": 1.6356025347835209e-06, "loss": 0.4455, "step": 5926 }, { "epoch": 2.940796822638699, "grad_norm": 0.07571569336289337, "learning_rate": 1.6341567990093704e-06, "loss": 0.463, "step": 5927 }, { "epoch": 2.9412932853419385, "grad_norm": 0.07239860011491428, "learning_rate": 1.6327115776503833e-06, "loss": 0.4341, "step": 5928 }, { "epoch": 2.9417897480451782, "grad_norm": 0.0741988580326704, "learning_rate": 1.631266870927442e-06, "loss": 0.4464, "step": 5929 }, { "epoch": 2.9422862107484176, "grad_norm": 0.07056138848741017, "learning_rate": 1.6298226790613464e-06, "loss": 0.459, "step": 5930 }, { "epoch": 2.942782673451657, "grad_norm": 0.07345289330859334, "learning_rate": 1.6283790022728164e-06, "loss": 0.4449, "step": 5931 }, { "epoch": 2.9432791361548962, "grad_norm": 0.07104898966124364, "learning_rate": 1.626935840782497e-06, "loss": 0.4536, "step": 5932 }, { "epoch": 2.943775598858136, "grad_norm": 0.07431978745376211, "learning_rate": 1.6254931948109498e-06, "loss": 0.4765, "step": 5933 }, { "epoch": 2.9442720615613753, "grad_norm": 0.07430155293124209, "learning_rate": 1.6240510645786639e-06, "loss": 0.5009, "step": 5934 }, { "epoch": 2.9447685242646147, "grad_norm": 0.07254532970085129, "learning_rate": 1.622609450306043e-06, "loss": 0.4661, "step": 5935 }, { "epoch": 2.945264986967854, "grad_norm": 0.07190075269617335, "learning_rate": 1.6211683522134136e-06, "loss": 0.4431, "step": 5936 }, { "epoch": 2.9457614496710933, "grad_norm": 0.07219291405916406, "learning_rate": 1.6197277705210278e-06, "loss": 0.4528, "step": 5937 }, { "epoch": 2.9462579123743327, "grad_norm": 0.07069487209787771, "learning_rate": 1.6182877054490526e-06, "loss": 0.4334, "step": 5938 }, { "epoch": 2.9467543750775724, "grad_norm": 0.07109599185929143, "learning_rate": 1.6168481572175814e-06, "loss": 0.454, "step": 5939 }, { "epoch": 2.9472508377808118, "grad_norm": 0.07409010851854753, "learning_rate": 1.6154091260466242e-06, "loss": 0.4723, "step": 5940 }, { "epoch": 2.947747300484051, "grad_norm": 0.07691475767500006, "learning_rate": 1.6139706121561133e-06, "loss": 0.4499, "step": 5941 }, { "epoch": 2.9482437631872904, "grad_norm": 0.07057759341189006, "learning_rate": 1.6125326157659048e-06, "loss": 0.4875, "step": 5942 }, { "epoch": 2.94874022589053, "grad_norm": 0.07207202143271935, "learning_rate": 1.6110951370957723e-06, "loss": 0.464, "step": 5943 }, { "epoch": 2.9492366885937695, "grad_norm": 0.07171710857679257, "learning_rate": 1.6096581763654106e-06, "loss": 0.4791, "step": 5944 }, { "epoch": 2.949733151297009, "grad_norm": 0.07025891311039467, "learning_rate": 1.6082217337944357e-06, "loss": 0.4558, "step": 5945 }, { "epoch": 2.950229614000248, "grad_norm": 0.07092578727672147, "learning_rate": 1.6067858096023869e-06, "loss": 0.4495, "step": 5946 }, { "epoch": 2.9507260767034875, "grad_norm": 0.0701539238256779, "learning_rate": 1.6053504040087208e-06, "loss": 0.4527, "step": 5947 }, { "epoch": 2.951222539406727, "grad_norm": 0.07164960976189151, "learning_rate": 1.6039155172328153e-06, "loss": 0.4314, "step": 5948 }, { "epoch": 2.9517190021099666, "grad_norm": 0.07069326099764968, "learning_rate": 1.6024811494939723e-06, "loss": 0.454, "step": 5949 }, { "epoch": 2.952215464813206, "grad_norm": 0.07281057872718284, "learning_rate": 1.601047301011409e-06, "loss": 0.4546, "step": 5950 }, { "epoch": 2.9527119275164453, "grad_norm": 0.07337024104844138, "learning_rate": 1.5996139720042692e-06, "loss": 0.4763, "step": 5951 }, { "epoch": 2.9532083902196846, "grad_norm": 0.07255991546812811, "learning_rate": 1.5981811626916126e-06, "loss": 0.4956, "step": 5952 }, { "epoch": 2.9537048529229244, "grad_norm": 0.07118427601670174, "learning_rate": 1.5967488732924202e-06, "loss": 0.4625, "step": 5953 }, { "epoch": 2.9542013156261637, "grad_norm": 0.0705626273726107, "learning_rate": 1.5953171040255965e-06, "loss": 0.4526, "step": 5954 }, { "epoch": 2.954697778329403, "grad_norm": 0.07193505828742014, "learning_rate": 1.5938858551099639e-06, "loss": 0.4391, "step": 5955 }, { "epoch": 2.9551942410326424, "grad_norm": 0.07037932237490879, "learning_rate": 1.5924551267642641e-06, "loss": 0.4617, "step": 5956 }, { "epoch": 2.9556907037358817, "grad_norm": 0.07160562539684742, "learning_rate": 1.5910249192071637e-06, "loss": 0.4643, "step": 5957 }, { "epoch": 2.956187166439121, "grad_norm": 0.07359941219396972, "learning_rate": 1.5895952326572438e-06, "loss": 0.5014, "step": 5958 }, { "epoch": 2.956683629142361, "grad_norm": 0.07216612036347472, "learning_rate": 1.5881660673330141e-06, "loss": 0.4807, "step": 5959 }, { "epoch": 2.9571800918456, "grad_norm": 0.07016235803755881, "learning_rate": 1.5867374234528938e-06, "loss": 0.4389, "step": 5960 }, { "epoch": 2.9576765545488395, "grad_norm": 0.0743924650931678, "learning_rate": 1.5853093012352317e-06, "loss": 0.4759, "step": 5961 }, { "epoch": 2.958173017252079, "grad_norm": 0.07307681745982117, "learning_rate": 1.5838817008982927e-06, "loss": 0.4666, "step": 5962 }, { "epoch": 2.9586694799553186, "grad_norm": 0.07148307237166242, "learning_rate": 1.5824546226602611e-06, "loss": 0.4297, "step": 5963 }, { "epoch": 2.959165942658558, "grad_norm": 0.0733250074435015, "learning_rate": 1.5810280667392458e-06, "loss": 0.4792, "step": 5964 }, { "epoch": 2.9596624053617973, "grad_norm": 0.07349016981862934, "learning_rate": 1.5796020333532696e-06, "loss": 0.4583, "step": 5965 }, { "epoch": 2.9601588680650366, "grad_norm": 0.07165327067178426, "learning_rate": 1.5781765227202822e-06, "loss": 0.4659, "step": 5966 }, { "epoch": 2.960655330768276, "grad_norm": 0.07070030003800495, "learning_rate": 1.5767515350581492e-06, "loss": 0.4535, "step": 5967 }, { "epoch": 2.9611517934715152, "grad_norm": 0.06980187457227569, "learning_rate": 1.575327070584654e-06, "loss": 0.3996, "step": 5968 }, { "epoch": 2.961648256174755, "grad_norm": 0.07050217628886313, "learning_rate": 1.5739031295175078e-06, "loss": 0.457, "step": 5969 }, { "epoch": 2.9621447188779944, "grad_norm": 0.07083558784751204, "learning_rate": 1.572479712074333e-06, "loss": 0.4465, "step": 5970 }, { "epoch": 2.9626411815812337, "grad_norm": 0.07305312265469574, "learning_rate": 1.5710568184726799e-06, "loss": 0.4616, "step": 5971 }, { "epoch": 2.963137644284473, "grad_norm": 0.07297673470614084, "learning_rate": 1.569634448930013e-06, "loss": 0.465, "step": 5972 }, { "epoch": 2.963634106987713, "grad_norm": 0.07431334323820772, "learning_rate": 1.5682126036637174e-06, "loss": 0.4777, "step": 5973 }, { "epoch": 2.964130569690952, "grad_norm": 0.07221414404814905, "learning_rate": 1.5667912828911025e-06, "loss": 0.4685, "step": 5974 }, { "epoch": 2.9646270323941915, "grad_norm": 0.0709090572935716, "learning_rate": 1.5653704868293928e-06, "loss": 0.4347, "step": 5975 }, { "epoch": 2.965123495097431, "grad_norm": 0.07158548900446969, "learning_rate": 1.5639502156957337e-06, "loss": 0.457, "step": 5976 }, { "epoch": 2.96561995780067, "grad_norm": 0.06958496316397475, "learning_rate": 1.5625304697071897e-06, "loss": 0.4617, "step": 5977 }, { "epoch": 2.9661164205039094, "grad_norm": 0.06999282640269491, "learning_rate": 1.5611112490807496e-06, "loss": 0.4293, "step": 5978 }, { "epoch": 2.966612883207149, "grad_norm": 0.06936000279838299, "learning_rate": 1.559692554033317e-06, "loss": 0.4229, "step": 5979 }, { "epoch": 2.9671093459103886, "grad_norm": 0.07260037421289114, "learning_rate": 1.5582743847817138e-06, "loss": 0.4839, "step": 5980 }, { "epoch": 2.967605808613628, "grad_norm": 0.07236131524189705, "learning_rate": 1.5568567415426893e-06, "loss": 0.4806, "step": 5981 }, { "epoch": 2.968102271316867, "grad_norm": 0.07341692924894432, "learning_rate": 1.555439624532904e-06, "loss": 0.4724, "step": 5982 }, { "epoch": 2.968598734020107, "grad_norm": 0.07241636608277653, "learning_rate": 1.5540230339689437e-06, "loss": 0.4436, "step": 5983 }, { "epoch": 2.9690951967233463, "grad_norm": 0.07406946952048966, "learning_rate": 1.5526069700673108e-06, "loss": 0.4581, "step": 5984 }, { "epoch": 2.9695916594265857, "grad_norm": 0.07302094613595563, "learning_rate": 1.551191433044426e-06, "loss": 0.4616, "step": 5985 }, { "epoch": 2.970088122129825, "grad_norm": 0.07019743543659121, "learning_rate": 1.549776423116635e-06, "loss": 0.4505, "step": 5986 }, { "epoch": 2.9705845848330643, "grad_norm": 0.07316166105045384, "learning_rate": 1.5483619405001965e-06, "loss": 0.4403, "step": 5987 }, { "epoch": 2.9710810475363036, "grad_norm": 0.07297545044125849, "learning_rate": 1.5469479854112934e-06, "loss": 0.4729, "step": 5988 }, { "epoch": 2.9715775102395434, "grad_norm": 0.07282454195737302, "learning_rate": 1.5455345580660259e-06, "loss": 0.439, "step": 5989 }, { "epoch": 2.9720739729427827, "grad_norm": 0.07479071932810989, "learning_rate": 1.544121658680411e-06, "loss": 0.4816, "step": 5990 }, { "epoch": 2.972570435646022, "grad_norm": 0.07439592386649715, "learning_rate": 1.542709287470393e-06, "loss": 0.4629, "step": 5991 }, { "epoch": 2.9730668983492614, "grad_norm": 0.07067142551209074, "learning_rate": 1.5412974446518243e-06, "loss": 0.4465, "step": 5992 }, { "epoch": 2.973563361052501, "grad_norm": 0.07345460653832109, "learning_rate": 1.539886130440486e-06, "loss": 0.4693, "step": 5993 }, { "epoch": 2.9740598237557405, "grad_norm": 0.07332983971617145, "learning_rate": 1.5384753450520739e-06, "loss": 0.4377, "step": 5994 }, { "epoch": 2.97455628645898, "grad_norm": 0.07165864927196039, "learning_rate": 1.537065088702203e-06, "loss": 0.446, "step": 5995 }, { "epoch": 2.975052749162219, "grad_norm": 0.07460898613291474, "learning_rate": 1.5356553616064107e-06, "loss": 0.4678, "step": 5996 }, { "epoch": 2.9755492118654585, "grad_norm": 0.07235821467864646, "learning_rate": 1.5342461639801481e-06, "loss": 0.4672, "step": 5997 }, { "epoch": 2.976045674568698, "grad_norm": 0.07151717029488126, "learning_rate": 1.532837496038792e-06, "loss": 0.4513, "step": 5998 }, { "epoch": 2.976542137271937, "grad_norm": 0.07072519813246403, "learning_rate": 1.531429357997633e-06, "loss": 0.4461, "step": 5999 }, { "epoch": 2.977038599975177, "grad_norm": 0.07209317328064743, "learning_rate": 1.5300217500718806e-06, "loss": 0.4505, "step": 6000 }, { "epoch": 2.9775350626784163, "grad_norm": 0.07019272634029788, "learning_rate": 1.5286146724766681e-06, "loss": 0.441, "step": 6001 }, { "epoch": 2.9780315253816556, "grad_norm": 0.07047003562021524, "learning_rate": 1.5272081254270421e-06, "loss": 0.4696, "step": 6002 }, { "epoch": 2.9785279880848954, "grad_norm": 0.0714175852136517, "learning_rate": 1.5258021091379738e-06, "loss": 0.4593, "step": 6003 }, { "epoch": 2.9790244507881347, "grad_norm": 0.06983063937057955, "learning_rate": 1.5243966238243484e-06, "loss": 0.4244, "step": 6004 }, { "epoch": 2.979520913491374, "grad_norm": 0.07233901190332044, "learning_rate": 1.5229916697009706e-06, "loss": 0.4704, "step": 6005 }, { "epoch": 2.9800173761946134, "grad_norm": 0.0710352377551852, "learning_rate": 1.5215872469825682e-06, "loss": 0.4429, "step": 6006 }, { "epoch": 2.9805138388978527, "grad_norm": 0.07279079307995248, "learning_rate": 1.520183355883783e-06, "loss": 0.4928, "step": 6007 }, { "epoch": 2.981010301601092, "grad_norm": 0.072720773924886, "learning_rate": 1.5187799966191769e-06, "loss": 0.453, "step": 6008 }, { "epoch": 2.9815067643043314, "grad_norm": 0.07541142329483538, "learning_rate": 1.5173771694032296e-06, "loss": 0.4541, "step": 6009 }, { "epoch": 2.982003227007571, "grad_norm": 0.07134543270914809, "learning_rate": 1.5159748744503444e-06, "loss": 0.465, "step": 6010 }, { "epoch": 2.9824996897108105, "grad_norm": 0.07153087313411305, "learning_rate": 1.5145731119748376e-06, "loss": 0.4563, "step": 6011 }, { "epoch": 2.98299615241405, "grad_norm": 0.07466590611705792, "learning_rate": 1.5131718821909435e-06, "loss": 0.4896, "step": 6012 }, { "epoch": 2.9834926151172896, "grad_norm": 0.07143816309455199, "learning_rate": 1.5117711853128225e-06, "loss": 0.4649, "step": 6013 }, { "epoch": 2.983989077820529, "grad_norm": 0.07168287010692334, "learning_rate": 1.5103710215545448e-06, "loss": 0.4644, "step": 6014 }, { "epoch": 2.9844855405237682, "grad_norm": 0.07015268650999319, "learning_rate": 1.5089713911301063e-06, "loss": 0.4485, "step": 6015 }, { "epoch": 2.9849820032270076, "grad_norm": 0.06921333182373515, "learning_rate": 1.5075722942534154e-06, "loss": 0.4314, "step": 6016 }, { "epoch": 2.985478465930247, "grad_norm": 0.07178123073223433, "learning_rate": 1.5061737311383018e-06, "loss": 0.4686, "step": 6017 }, { "epoch": 2.9859749286334862, "grad_norm": 0.07114264232083874, "learning_rate": 1.5047757019985155e-06, "loss": 0.4572, "step": 6018 }, { "epoch": 2.9864713913367256, "grad_norm": 0.07295002675156483, "learning_rate": 1.5033782070477192e-06, "loss": 0.4481, "step": 6019 }, { "epoch": 2.9869678540399653, "grad_norm": 0.07224442006644777, "learning_rate": 1.5019812464995027e-06, "loss": 0.4652, "step": 6020 }, { "epoch": 2.9874643167432047, "grad_norm": 0.07070678180071457, "learning_rate": 1.5005848205673652e-06, "loss": 0.4544, "step": 6021 }, { "epoch": 2.987960779446444, "grad_norm": 0.06968067830115295, "learning_rate": 1.4991889294647277e-06, "loss": 0.4659, "step": 6022 }, { "epoch": 2.988457242149684, "grad_norm": 0.07194371180524783, "learning_rate": 1.4977935734049342e-06, "loss": 0.4687, "step": 6023 }, { "epoch": 2.988953704852923, "grad_norm": 0.07353135929655469, "learning_rate": 1.4963987526012368e-06, "loss": 0.4295, "step": 6024 }, { "epoch": 2.9894501675561624, "grad_norm": 0.07387419105574315, "learning_rate": 1.495004467266815e-06, "loss": 0.4508, "step": 6025 }, { "epoch": 2.9899466302594018, "grad_norm": 0.07221393034988988, "learning_rate": 1.4936107176147606e-06, "loss": 0.4572, "step": 6026 }, { "epoch": 2.990443092962641, "grad_norm": 0.07335610921440719, "learning_rate": 1.4922175038580894e-06, "loss": 0.4533, "step": 6027 }, { "epoch": 2.9909395556658804, "grad_norm": 0.07038270855175356, "learning_rate": 1.4908248262097292e-06, "loss": 0.4344, "step": 6028 }, { "epoch": 2.9914360183691198, "grad_norm": 0.07300052489851201, "learning_rate": 1.4894326848825275e-06, "loss": 0.4814, "step": 6029 }, { "epoch": 2.9919324810723595, "grad_norm": 0.07050113627752637, "learning_rate": 1.4880410800892541e-06, "loss": 0.4727, "step": 6030 }, { "epoch": 2.992428943775599, "grad_norm": 0.07210413625778277, "learning_rate": 1.4866500120425914e-06, "loss": 0.4464, "step": 6031 }, { "epoch": 2.992925406478838, "grad_norm": 0.07003570060025452, "learning_rate": 1.4852594809551402e-06, "loss": 0.4552, "step": 6032 }, { "epoch": 2.9934218691820775, "grad_norm": 0.06897753877390084, "learning_rate": 1.483869487039425e-06, "loss": 0.4197, "step": 6033 }, { "epoch": 2.9939183318853173, "grad_norm": 0.0721561684411484, "learning_rate": 1.4824800305078797e-06, "loss": 0.4576, "step": 6034 }, { "epoch": 2.9944147945885566, "grad_norm": 0.0706978250471, "learning_rate": 1.4810911115728644e-06, "loss": 0.4262, "step": 6035 }, { "epoch": 2.994911257291796, "grad_norm": 0.07334245092152264, "learning_rate": 1.479702730446651e-06, "loss": 0.4694, "step": 6036 }, { "epoch": 2.9954077199950353, "grad_norm": 0.06934060094770478, "learning_rate": 1.4783148873414305e-06, "loss": 0.4424, "step": 6037 }, { "epoch": 2.9959041826982746, "grad_norm": 0.07370151222123784, "learning_rate": 1.4769275824693146e-06, "loss": 0.4715, "step": 6038 }, { "epoch": 2.996400645401514, "grad_norm": 0.07250380480661225, "learning_rate": 1.4755408160423302e-06, "loss": 0.4568, "step": 6039 }, { "epoch": 2.9968971081047537, "grad_norm": 0.07043634822680875, "learning_rate": 1.4741545882724213e-06, "loss": 0.4634, "step": 6040 }, { "epoch": 2.997393570807993, "grad_norm": 0.0733358803979804, "learning_rate": 1.4727688993714494e-06, "loss": 0.4536, "step": 6041 }, { "epoch": 2.9978900335112324, "grad_norm": 0.07071774708399711, "learning_rate": 1.4713837495511978e-06, "loss": 0.4465, "step": 6042 }, { "epoch": 2.9983864962144717, "grad_norm": 0.0717978688509802, "learning_rate": 1.4699991390233631e-06, "loss": 0.4446, "step": 6043 }, { "epoch": 2.9988829589177115, "grad_norm": 0.07146905900228022, "learning_rate": 1.4686150679995592e-06, "loss": 0.4412, "step": 6044 }, { "epoch": 2.999379421620951, "grad_norm": 0.07219369507453394, "learning_rate": 1.467231536691322e-06, "loss": 0.4552, "step": 6045 }, { "epoch": 2.99987588432419, "grad_norm": 0.0713535462878617, "learning_rate": 1.4658485453100996e-06, "loss": 0.4784, "step": 6046 }, { "epoch": 3.0, "grad_norm": 0.0713535462878617, "learning_rate": 1.4644660940672628e-06, "loss": 0.1017, "step": 6047 }, { "epoch": 3.0003723470274295, "grad_norm": 0.06977470998602367, "learning_rate": 1.4630841831740955e-06, "loss": 0.335, "step": 6048 }, { "epoch": 3.0003723470274295, "eval_loss": 0.5128746032714844, "eval_runtime": 258.8736, "eval_samples_per_second": 117.25, "eval_steps_per_second": 14.66, "step": 6048 }, { "epoch": 3.0004964627032393, "grad_norm": 0.07879870082790116, "learning_rate": 1.4617028128417993e-06, "loss": 0.4138, "step": 6049 }, { "epoch": 3.0009929254064787, "grad_norm": 0.08185603013604671, "learning_rate": 1.4603219832814968e-06, "loss": 0.4423, "step": 6050 }, { "epoch": 3.0014893881097184, "grad_norm": 0.08009656994151068, "learning_rate": 1.4589416947042234e-06, "loss": 0.4033, "step": 6051 }, { "epoch": 3.0019858508129578, "grad_norm": 0.07847564767768977, "learning_rate": 1.4575619473209373e-06, "loss": 0.4638, "step": 6052 }, { "epoch": 3.002482313516197, "grad_norm": 0.07147341894384611, "learning_rate": 1.4561827413425089e-06, "loss": 0.4149, "step": 6053 }, { "epoch": 3.0029787762194364, "grad_norm": 0.07426846370280647, "learning_rate": 1.4548040769797255e-06, "loss": 0.4317, "step": 6054 }, { "epoch": 3.0034752389226758, "grad_norm": 0.07080595435948296, "learning_rate": 1.4534259544432983e-06, "loss": 0.4281, "step": 6055 }, { "epoch": 3.0039717016259155, "grad_norm": 0.07189277655550798, "learning_rate": 1.452048373943849e-06, "loss": 0.4378, "step": 6056 }, { "epoch": 3.004468164329155, "grad_norm": 0.0735231757191823, "learning_rate": 1.4506713356919184e-06, "loss": 0.4592, "step": 6057 }, { "epoch": 3.004964627032394, "grad_norm": 0.07177815486814404, "learning_rate": 1.4492948398979634e-06, "loss": 0.4519, "step": 6058 }, { "epoch": 3.0054610897356335, "grad_norm": 0.07308116284527004, "learning_rate": 1.447918886772362e-06, "loss": 0.4372, "step": 6059 }, { "epoch": 3.005957552438873, "grad_norm": 0.07352080350431428, "learning_rate": 1.446543476525406e-06, "loss": 0.4313, "step": 6060 }, { "epoch": 3.0064540151421126, "grad_norm": 0.07591613815457766, "learning_rate": 1.4451686093673028e-06, "loss": 0.4694, "step": 6061 }, { "epoch": 3.006950477845352, "grad_norm": 0.07713518741085477, "learning_rate": 1.4437942855081816e-06, "loss": 0.4749, "step": 6062 }, { "epoch": 3.0074469405485913, "grad_norm": 0.07522268953804548, "learning_rate": 1.4424205051580831e-06, "loss": 0.4296, "step": 6063 }, { "epoch": 3.0079434032518306, "grad_norm": 0.07371488261024522, "learning_rate": 1.4410472685269699e-06, "loss": 0.4359, "step": 6064 }, { "epoch": 3.00843986595507, "grad_norm": 0.07325013732702464, "learning_rate": 1.4396745758247189e-06, "loss": 0.4266, "step": 6065 }, { "epoch": 3.0089363286583097, "grad_norm": 0.07395652356856376, "learning_rate": 1.4383024272611217e-06, "loss": 0.4729, "step": 6066 }, { "epoch": 3.009432791361549, "grad_norm": 0.07352284430323344, "learning_rate": 1.4369308230458927e-06, "loss": 0.4499, "step": 6067 }, { "epoch": 3.0099292540647884, "grad_norm": 0.0757223936029539, "learning_rate": 1.4355597633886576e-06, "loss": 0.4423, "step": 6068 }, { "epoch": 3.0104257167680277, "grad_norm": 0.07337655222505507, "learning_rate": 1.43418924849896e-06, "loss": 0.4469, "step": 6069 }, { "epoch": 3.010922179471267, "grad_norm": 0.07487651129076654, "learning_rate": 1.4328192785862638e-06, "loss": 0.4367, "step": 6070 }, { "epoch": 3.011418642174507, "grad_norm": 0.07221681940111216, "learning_rate": 1.4314498538599437e-06, "loss": 0.4398, "step": 6071 }, { "epoch": 3.011915104877746, "grad_norm": 0.07034556104531757, "learning_rate": 1.4300809745292993e-06, "loss": 0.4346, "step": 6072 }, { "epoch": 3.0124115675809855, "grad_norm": 0.0735456721483014, "learning_rate": 1.4287126408035356e-06, "loss": 0.4478, "step": 6073 }, { "epoch": 3.012908030284225, "grad_norm": 0.07182254899050108, "learning_rate": 1.4273448528917854e-06, "loss": 0.4519, "step": 6074 }, { "epoch": 3.013404492987464, "grad_norm": 0.069424652232987, "learning_rate": 1.425977611003091e-06, "loss": 0.4125, "step": 6075 }, { "epoch": 3.013900955690704, "grad_norm": 0.07186620818532258, "learning_rate": 1.424610915346412e-06, "loss": 0.4109, "step": 6076 }, { "epoch": 3.0143974183939433, "grad_norm": 0.0750254821896234, "learning_rate": 1.4232447661306292e-06, "loss": 0.444, "step": 6077 }, { "epoch": 3.0148938810971826, "grad_norm": 0.07316063569686765, "learning_rate": 1.4218791635645335e-06, "loss": 0.4644, "step": 6078 }, { "epoch": 3.015390343800422, "grad_norm": 0.07325720344215891, "learning_rate": 1.4205141078568384e-06, "loss": 0.4305, "step": 6079 }, { "epoch": 3.0158868065036613, "grad_norm": 0.07186913073023993, "learning_rate": 1.419149599216169e-06, "loss": 0.4305, "step": 6080 }, { "epoch": 3.016383269206901, "grad_norm": 0.07259629560798145, "learning_rate": 1.4177856378510675e-06, "loss": 0.446, "step": 6081 }, { "epoch": 3.0168797319101404, "grad_norm": 0.07347257396391449, "learning_rate": 1.416422223969996e-06, "loss": 0.4521, "step": 6082 }, { "epoch": 3.0173761946133797, "grad_norm": 0.07406415018966424, "learning_rate": 1.4150593577813282e-06, "loss": 0.4268, "step": 6083 }, { "epoch": 3.017872657316619, "grad_norm": 0.07238241289397684, "learning_rate": 1.4136970394933586e-06, "loss": 0.4731, "step": 6084 }, { "epoch": 3.0183691200198584, "grad_norm": 0.06964934760379549, "learning_rate": 1.4123352693142945e-06, "loss": 0.455, "step": 6085 }, { "epoch": 3.018865582723098, "grad_norm": 0.07462793481034907, "learning_rate": 1.4109740474522594e-06, "loss": 0.4637, "step": 6086 }, { "epoch": 3.0193620454263375, "grad_norm": 0.07060949249255187, "learning_rate": 1.4096133741152967e-06, "loss": 0.4375, "step": 6087 }, { "epoch": 3.019858508129577, "grad_norm": 0.07329812020775885, "learning_rate": 1.4082532495113627e-06, "loss": 0.4302, "step": 6088 }, { "epoch": 3.020354970832816, "grad_norm": 0.07481253449274504, "learning_rate": 1.4068936738483302e-06, "loss": 0.4526, "step": 6089 }, { "epoch": 3.0208514335360555, "grad_norm": 0.07216798863248236, "learning_rate": 1.4055346473339865e-06, "loss": 0.4164, "step": 6090 }, { "epoch": 3.0213478962392952, "grad_norm": 0.07267376251676638, "learning_rate": 1.4041761701760414e-06, "loss": 0.4244, "step": 6091 }, { "epoch": 3.0218443589425346, "grad_norm": 0.07297391019769917, "learning_rate": 1.4028182425821135e-06, "loss": 0.4361, "step": 6092 }, { "epoch": 3.022340821645774, "grad_norm": 0.07239707684815393, "learning_rate": 1.4014608647597394e-06, "loss": 0.4369, "step": 6093 }, { "epoch": 3.022837284349013, "grad_norm": 0.07207550735039364, "learning_rate": 1.4001040369163755e-06, "loss": 0.4504, "step": 6094 }, { "epoch": 3.0233337470522526, "grad_norm": 0.07489432295105117, "learning_rate": 1.398747759259388e-06, "loss": 0.5066, "step": 6095 }, { "epoch": 3.0238302097554923, "grad_norm": 0.07117973295591797, "learning_rate": 1.3973920319960654e-06, "loss": 0.416, "step": 6096 }, { "epoch": 3.0243266724587317, "grad_norm": 0.07233810281882866, "learning_rate": 1.3960368553336073e-06, "loss": 0.4223, "step": 6097 }, { "epoch": 3.024823135161971, "grad_norm": 0.07396695918371478, "learning_rate": 1.394682229479129e-06, "loss": 0.452, "step": 6098 }, { "epoch": 3.0253195978652103, "grad_norm": 0.0706555544795235, "learning_rate": 1.3933281546396665e-06, "loss": 0.4223, "step": 6099 }, { "epoch": 3.0258160605684497, "grad_norm": 0.07178200800744759, "learning_rate": 1.391974631022166e-06, "loss": 0.4497, "step": 6100 }, { "epoch": 3.0263125232716894, "grad_norm": 0.07286657811058043, "learning_rate": 1.3906216588334936e-06, "loss": 0.4361, "step": 6101 }, { "epoch": 3.0268089859749288, "grad_norm": 0.07299593785310476, "learning_rate": 1.3892692382804295e-06, "loss": 0.4534, "step": 6102 }, { "epoch": 3.027305448678168, "grad_norm": 0.07099933881802412, "learning_rate": 1.3879173695696668e-06, "loss": 0.4137, "step": 6103 }, { "epoch": 3.0278019113814074, "grad_norm": 0.07139228360005229, "learning_rate": 1.3865660529078218e-06, "loss": 0.439, "step": 6104 }, { "epoch": 3.0282983740846467, "grad_norm": 0.07235298810056374, "learning_rate": 1.3852152885014152e-06, "loss": 0.4395, "step": 6105 }, { "epoch": 3.0287948367878865, "grad_norm": 0.0756061099029309, "learning_rate": 1.383865076556895e-06, "loss": 0.4624, "step": 6106 }, { "epoch": 3.029291299491126, "grad_norm": 0.07318085394261264, "learning_rate": 1.3825154172806176e-06, "loss": 0.4427, "step": 6107 }, { "epoch": 3.029787762194365, "grad_norm": 0.06982982435111475, "learning_rate": 1.3811663108788553e-06, "loss": 0.4132, "step": 6108 }, { "epoch": 3.0302842248976045, "grad_norm": 0.07128249031773394, "learning_rate": 1.3798177575577998e-06, "loss": 0.4325, "step": 6109 }, { "epoch": 3.030780687600844, "grad_norm": 0.06998626113303408, "learning_rate": 1.378469757523554e-06, "loss": 0.4446, "step": 6110 }, { "epoch": 3.0312771503040836, "grad_norm": 0.07236540853128076, "learning_rate": 1.3771223109821402e-06, "loss": 0.411, "step": 6111 }, { "epoch": 3.031773613007323, "grad_norm": 0.07183106915540166, "learning_rate": 1.3757754181394921e-06, "loss": 0.4344, "step": 6112 }, { "epoch": 3.0322700757105623, "grad_norm": 0.07249512228100471, "learning_rate": 1.374429079201461e-06, "loss": 0.4312, "step": 6113 }, { "epoch": 3.0327665384138016, "grad_norm": 0.07091446563916892, "learning_rate": 1.3730832943738143e-06, "loss": 0.4271, "step": 6114 }, { "epoch": 3.033263001117041, "grad_norm": 0.07431020907428121, "learning_rate": 1.3717380638622313e-06, "loss": 0.4429, "step": 6115 }, { "epoch": 3.0337594638202807, "grad_norm": 0.07269213005302484, "learning_rate": 1.3703933878723119e-06, "loss": 0.4787, "step": 6116 }, { "epoch": 3.03425592652352, "grad_norm": 0.06960826643523638, "learning_rate": 1.3690492666095672e-06, "loss": 0.4291, "step": 6117 }, { "epoch": 3.0347523892267594, "grad_norm": 0.07072163570955363, "learning_rate": 1.3677057002794226e-06, "loss": 0.403, "step": 6118 }, { "epoch": 3.0352488519299987, "grad_norm": 0.07224535948018528, "learning_rate": 1.3663626890872239e-06, "loss": 0.414, "step": 6119 }, { "epoch": 3.035745314633238, "grad_norm": 0.07340254247977736, "learning_rate": 1.3650202332382273e-06, "loss": 0.4506, "step": 6120 }, { "epoch": 3.0362417773364774, "grad_norm": 0.07281206895696977, "learning_rate": 1.3636783329376053e-06, "loss": 0.4593, "step": 6121 }, { "epoch": 3.036738240039717, "grad_norm": 0.07520592685480292, "learning_rate": 1.3623369883904447e-06, "loss": 0.4624, "step": 6122 }, { "epoch": 3.0372347027429565, "grad_norm": 0.07005595449616403, "learning_rate": 1.3609961998017519e-06, "loss": 0.4288, "step": 6123 }, { "epoch": 3.037731165446196, "grad_norm": 0.07100814556301673, "learning_rate": 1.3596559673764421e-06, "loss": 0.4118, "step": 6124 }, { "epoch": 3.038227628149435, "grad_norm": 0.07300186961223747, "learning_rate": 1.3583162913193483e-06, "loss": 0.4358, "step": 6125 }, { "epoch": 3.0387240908526745, "grad_norm": 0.07392695568371571, "learning_rate": 1.3569771718352208e-06, "loss": 0.433, "step": 6126 }, { "epoch": 3.0392205535559143, "grad_norm": 0.07423043383981226, "learning_rate": 1.3556386091287193e-06, "loss": 0.447, "step": 6127 }, { "epoch": 3.0397170162591536, "grad_norm": 0.07263636650400927, "learning_rate": 1.3543006034044255e-06, "loss": 0.4255, "step": 6128 }, { "epoch": 3.040213478962393, "grad_norm": 0.07583843572765442, "learning_rate": 1.3529631548668298e-06, "loss": 0.4454, "step": 6129 }, { "epoch": 3.0407099416656322, "grad_norm": 0.0720704890024341, "learning_rate": 1.3516262637203392e-06, "loss": 0.4425, "step": 6130 }, { "epoch": 3.0412064043688716, "grad_norm": 0.07381962200231772, "learning_rate": 1.350289930169278e-06, "loss": 0.4614, "step": 6131 }, { "epoch": 3.0417028670721113, "grad_norm": 0.07144967882171453, "learning_rate": 1.3489541544178808e-06, "loss": 0.4375, "step": 6132 }, { "epoch": 3.0421993297753507, "grad_norm": 0.07133068638464211, "learning_rate": 1.3476189366703024e-06, "loss": 0.4284, "step": 6133 }, { "epoch": 3.04269579247859, "grad_norm": 0.0729834506663787, "learning_rate": 1.3462842771306084e-06, "loss": 0.4435, "step": 6134 }, { "epoch": 3.0431922551818293, "grad_norm": 0.07057614017895736, "learning_rate": 1.3449501760027778e-06, "loss": 0.434, "step": 6135 }, { "epoch": 3.0436887178850687, "grad_norm": 0.0731597201840205, "learning_rate": 1.3436166334907118e-06, "loss": 0.4133, "step": 6136 }, { "epoch": 3.0441851805883084, "grad_norm": 0.07246949898925738, "learning_rate": 1.342283649798215e-06, "loss": 0.4195, "step": 6137 }, { "epoch": 3.044681643291548, "grad_norm": 0.06981040620325808, "learning_rate": 1.3409512251290164e-06, "loss": 0.4159, "step": 6138 }, { "epoch": 3.045178105994787, "grad_norm": 0.07238922242350312, "learning_rate": 1.3396193596867534e-06, "loss": 0.4276, "step": 6139 }, { "epoch": 3.0456745686980264, "grad_norm": 0.0719935930840651, "learning_rate": 1.3382880536749831e-06, "loss": 0.4257, "step": 6140 }, { "epoch": 3.0461710314012658, "grad_norm": 0.07403638629611554, "learning_rate": 1.3369573072971725e-06, "loss": 0.4716, "step": 6141 }, { "epoch": 3.0466674941045055, "grad_norm": 0.07230713050437415, "learning_rate": 1.3356271207567033e-06, "loss": 0.437, "step": 6142 }, { "epoch": 3.047163956807745, "grad_norm": 0.07508824326437552, "learning_rate": 1.334297494256877e-06, "loss": 0.4708, "step": 6143 }, { "epoch": 3.047660419510984, "grad_norm": 0.07437845757042565, "learning_rate": 1.3329684280009032e-06, "loss": 0.4236, "step": 6144 }, { "epoch": 3.0481568822142235, "grad_norm": 0.07369128333228318, "learning_rate": 1.3316399221919075e-06, "loss": 0.4321, "step": 6145 }, { "epoch": 3.048653344917463, "grad_norm": 0.07308115787987783, "learning_rate": 1.3303119770329336e-06, "loss": 0.4463, "step": 6146 }, { "epoch": 3.0491498076207026, "grad_norm": 0.07129520875002196, "learning_rate": 1.3289845927269335e-06, "loss": 0.4389, "step": 6147 }, { "epoch": 3.049646270323942, "grad_norm": 0.07374682289602172, "learning_rate": 1.3276577694767794e-06, "loss": 0.454, "step": 6148 }, { "epoch": 3.0501427330271813, "grad_norm": 0.07117621386936727, "learning_rate": 1.326331507485254e-06, "loss": 0.4338, "step": 6149 }, { "epoch": 3.0506391957304206, "grad_norm": 0.07589034699881031, "learning_rate": 1.325005806955053e-06, "loss": 0.4371, "step": 6150 }, { "epoch": 3.05113565843366, "grad_norm": 0.07142220044539167, "learning_rate": 1.323680668088792e-06, "loss": 0.4004, "step": 6151 }, { "epoch": 3.0516321211368997, "grad_norm": 0.07036115905012849, "learning_rate": 1.322356091088996e-06, "loss": 0.4205, "step": 6152 }, { "epoch": 3.052128583840139, "grad_norm": 0.07329646557855506, "learning_rate": 1.3210320761581047e-06, "loss": 0.4436, "step": 6153 }, { "epoch": 3.0526250465433784, "grad_norm": 0.07196844227110943, "learning_rate": 1.3197086234984707e-06, "loss": 0.402, "step": 6154 }, { "epoch": 3.0531215092466177, "grad_norm": 0.07401728202452947, "learning_rate": 1.3183857333123667e-06, "loss": 0.4541, "step": 6155 }, { "epoch": 3.053617971949857, "grad_norm": 0.07027866583372665, "learning_rate": 1.3170634058019733e-06, "loss": 0.4249, "step": 6156 }, { "epoch": 3.054114434653097, "grad_norm": 0.07119927820007768, "learning_rate": 1.3157416411693851e-06, "loss": 0.4274, "step": 6157 }, { "epoch": 3.054610897356336, "grad_norm": 0.07180601201705737, "learning_rate": 1.314420439616616e-06, "loss": 0.4176, "step": 6158 }, { "epoch": 3.0551073600595755, "grad_norm": 0.07158720435499417, "learning_rate": 1.3130998013455875e-06, "loss": 0.4651, "step": 6159 }, { "epoch": 3.055603822762815, "grad_norm": 0.07084365083214748, "learning_rate": 1.3117797265581412e-06, "loss": 0.4343, "step": 6160 }, { "epoch": 3.056100285466054, "grad_norm": 0.07440074991217603, "learning_rate": 1.3104602154560275e-06, "loss": 0.4307, "step": 6161 }, { "epoch": 3.056596748169294, "grad_norm": 0.07341912115141237, "learning_rate": 1.3091412682409104e-06, "loss": 0.4348, "step": 6162 }, { "epoch": 3.0570932108725333, "grad_norm": 0.07307954047079919, "learning_rate": 1.3078228851143743e-06, "loss": 0.4696, "step": 6163 }, { "epoch": 3.0575896735757726, "grad_norm": 0.07098981514671952, "learning_rate": 1.3065050662779088e-06, "loss": 0.4488, "step": 6164 }, { "epoch": 3.058086136279012, "grad_norm": 0.07061819183409725, "learning_rate": 1.3051878119329248e-06, "loss": 0.4005, "step": 6165 }, { "epoch": 3.0585825989822513, "grad_norm": 0.07396280603570082, "learning_rate": 1.303871122280742e-06, "loss": 0.445, "step": 6166 }, { "epoch": 3.059079061685491, "grad_norm": 0.07506077026161759, "learning_rate": 1.3025549975225936e-06, "loss": 0.4275, "step": 6167 }, { "epoch": 3.0595755243887304, "grad_norm": 0.07159324115577584, "learning_rate": 1.3012394378596333e-06, "loss": 0.4265, "step": 6168 }, { "epoch": 3.0600719870919697, "grad_norm": 0.0732539848540184, "learning_rate": 1.2999244434929159e-06, "loss": 0.4668, "step": 6169 }, { "epoch": 3.060568449795209, "grad_norm": 0.07326446643050033, "learning_rate": 1.298610014623423e-06, "loss": 0.4576, "step": 6170 }, { "epoch": 3.0610649124984484, "grad_norm": 0.07468402165767048, "learning_rate": 1.2972961514520411e-06, "loss": 0.4438, "step": 6171 }, { "epoch": 3.061561375201688, "grad_norm": 0.0731658754962425, "learning_rate": 1.295982854179575e-06, "loss": 0.4258, "step": 6172 }, { "epoch": 3.0620578379049275, "grad_norm": 0.07494630711068176, "learning_rate": 1.2946701230067405e-06, "loss": 0.4398, "step": 6173 }, { "epoch": 3.062554300608167, "grad_norm": 0.07269504657824281, "learning_rate": 1.293357958134166e-06, "loss": 0.4357, "step": 6174 }, { "epoch": 3.063050763311406, "grad_norm": 0.07279870347394367, "learning_rate": 1.2920463597623972e-06, "loss": 0.4661, "step": 6175 }, { "epoch": 3.0635472260146455, "grad_norm": 0.07318599816002383, "learning_rate": 1.2907353280918883e-06, "loss": 0.4121, "step": 6176 }, { "epoch": 3.0640436887178852, "grad_norm": 0.07288523031937426, "learning_rate": 1.2894248633230128e-06, "loss": 0.4415, "step": 6177 }, { "epoch": 3.0645401514211246, "grad_norm": 0.07467560919192302, "learning_rate": 1.2881149656560522e-06, "loss": 0.4657, "step": 6178 }, { "epoch": 3.065036614124364, "grad_norm": 0.07437833595978252, "learning_rate": 1.2868056352912018e-06, "loss": 0.4361, "step": 6179 }, { "epoch": 3.0655330768276032, "grad_norm": 0.06973346690351331, "learning_rate": 1.2854968724285755e-06, "loss": 0.4131, "step": 6180 }, { "epoch": 3.0660295395308426, "grad_norm": 0.07142243005484775, "learning_rate": 1.2841886772681944e-06, "loss": 0.4384, "step": 6181 }, { "epoch": 3.0665260022340823, "grad_norm": 0.07484904761118684, "learning_rate": 1.2828810500099936e-06, "loss": 0.4676, "step": 6182 }, { "epoch": 3.0670224649373217, "grad_norm": 0.07121129131372053, "learning_rate": 1.2815739908538272e-06, "loss": 0.4256, "step": 6183 }, { "epoch": 3.067518927640561, "grad_norm": 0.0737911004124479, "learning_rate": 1.2802674999994553e-06, "loss": 0.4595, "step": 6184 }, { "epoch": 3.0680153903438003, "grad_norm": 0.0733653209074709, "learning_rate": 1.2789615776465547e-06, "loss": 0.4608, "step": 6185 }, { "epoch": 3.0685118530470397, "grad_norm": 0.07226851556953984, "learning_rate": 1.2776562239947133e-06, "loss": 0.4365, "step": 6186 }, { "epoch": 3.0690083157502794, "grad_norm": 0.07582035370251462, "learning_rate": 1.276351439243436e-06, "loss": 0.4519, "step": 6187 }, { "epoch": 3.0695047784535188, "grad_norm": 0.074998501917356, "learning_rate": 1.2750472235921374e-06, "loss": 0.4604, "step": 6188 }, { "epoch": 3.070001241156758, "grad_norm": 0.07647818404479714, "learning_rate": 1.273743577240144e-06, "loss": 0.4659, "step": 6189 }, { "epoch": 3.0704977038599974, "grad_norm": 0.07188538726427558, "learning_rate": 1.2724405003867002e-06, "loss": 0.4307, "step": 6190 }, { "epoch": 3.0709941665632368, "grad_norm": 0.07189098099018701, "learning_rate": 1.2711379932309576e-06, "loss": 0.4576, "step": 6191 }, { "epoch": 3.0714906292664765, "grad_norm": 0.07098272142750643, "learning_rate": 1.2698360559719863e-06, "loss": 0.4266, "step": 6192 }, { "epoch": 3.071987091969716, "grad_norm": 0.07501841044922372, "learning_rate": 1.2685346888087657e-06, "loss": 0.4335, "step": 6193 }, { "epoch": 3.072483554672955, "grad_norm": 0.0699826251700936, "learning_rate": 1.2672338919401866e-06, "loss": 0.4272, "step": 6194 }, { "epoch": 3.0729800173761945, "grad_norm": 0.07246590479610392, "learning_rate": 1.2659336655650583e-06, "loss": 0.4406, "step": 6195 }, { "epoch": 3.073476480079434, "grad_norm": 0.07362225122051128, "learning_rate": 1.2646340098820969e-06, "loss": 0.4413, "step": 6196 }, { "epoch": 3.0739729427826736, "grad_norm": 0.07196618201196071, "learning_rate": 1.2633349250899363e-06, "loss": 0.4322, "step": 6197 }, { "epoch": 3.074469405485913, "grad_norm": 0.07255732105207222, "learning_rate": 1.2620364113871193e-06, "loss": 0.4427, "step": 6198 }, { "epoch": 3.0749658681891523, "grad_norm": 0.07340975032145909, "learning_rate": 1.2607384689721014e-06, "loss": 0.4338, "step": 6199 }, { "epoch": 3.0754623308923916, "grad_norm": 0.07278936088395885, "learning_rate": 1.2594410980432575e-06, "loss": 0.4512, "step": 6200 }, { "epoch": 3.075958793595631, "grad_norm": 0.07149143967747107, "learning_rate": 1.2581442987988635e-06, "loss": 0.4305, "step": 6201 }, { "epoch": 3.0764552562988707, "grad_norm": 0.07230038614740367, "learning_rate": 1.2568480714371183e-06, "loss": 0.445, "step": 6202 }, { "epoch": 3.07695171900211, "grad_norm": 0.07130612387394257, "learning_rate": 1.2555524161561277e-06, "loss": 0.4493, "step": 6203 }, { "epoch": 3.0774481817053494, "grad_norm": 0.0729240502747821, "learning_rate": 1.2542573331539136e-06, "loss": 0.4409, "step": 6204 }, { "epoch": 3.0779446444085887, "grad_norm": 0.07394585409694572, "learning_rate": 1.252962822628408e-06, "loss": 0.4311, "step": 6205 }, { "epoch": 3.078441107111828, "grad_norm": 0.07284506441536161, "learning_rate": 1.2516688847774545e-06, "loss": 0.4271, "step": 6206 }, { "epoch": 3.078937569815068, "grad_norm": 0.07289169893543256, "learning_rate": 1.2503755197988132e-06, "loss": 0.448, "step": 6207 }, { "epoch": 3.079434032518307, "grad_norm": 0.07072808271669016, "learning_rate": 1.2490827278901513e-06, "loss": 0.4373, "step": 6208 }, { "epoch": 3.0799304952215465, "grad_norm": 0.07481156352728258, "learning_rate": 1.247790509249055e-06, "loss": 0.4403, "step": 6209 }, { "epoch": 3.080426957924786, "grad_norm": 0.07547011321762705, "learning_rate": 1.246498864073017e-06, "loss": 0.4545, "step": 6210 }, { "epoch": 3.080923420628025, "grad_norm": 0.06958365489123573, "learning_rate": 1.2452077925594435e-06, "loss": 0.4153, "step": 6211 }, { "epoch": 3.081419883331265, "grad_norm": 0.07301974008845732, "learning_rate": 1.2439172949056566e-06, "loss": 0.4474, "step": 6212 }, { "epoch": 3.0819163460345043, "grad_norm": 0.07440526375197373, "learning_rate": 1.242627371308886e-06, "loss": 0.431, "step": 6213 }, { "epoch": 3.0824128087377436, "grad_norm": 0.07377167465451313, "learning_rate": 1.2413380219662779e-06, "loss": 0.4352, "step": 6214 }, { "epoch": 3.082909271440983, "grad_norm": 0.0719765132891477, "learning_rate": 1.2400492470748877e-06, "loss": 0.4196, "step": 6215 }, { "epoch": 3.0834057341442223, "grad_norm": 0.07128437563213033, "learning_rate": 1.2387610468316835e-06, "loss": 0.4619, "step": 6216 }, { "epoch": 3.083902196847462, "grad_norm": 0.07342401754003233, "learning_rate": 1.237473421433547e-06, "loss": 0.439, "step": 6217 }, { "epoch": 3.0843986595507014, "grad_norm": 0.07135736719422998, "learning_rate": 1.2361863710772686e-06, "loss": 0.4189, "step": 6218 }, { "epoch": 3.0848951222539407, "grad_norm": 0.07493974809013262, "learning_rate": 1.234899895959557e-06, "loss": 0.4636, "step": 6219 }, { "epoch": 3.08539158495718, "grad_norm": 0.07140821273764963, "learning_rate": 1.233613996277027e-06, "loss": 0.4014, "step": 6220 }, { "epoch": 3.0858880476604194, "grad_norm": 0.07335384354682381, "learning_rate": 1.2323286722262074e-06, "loss": 0.4369, "step": 6221 }, { "epoch": 3.086384510363659, "grad_norm": 0.07320122746466778, "learning_rate": 1.2310439240035415e-06, "loss": 0.4643, "step": 6222 }, { "epoch": 3.0868809730668985, "grad_norm": 0.07273563727400321, "learning_rate": 1.22975975180538e-06, "loss": 0.4441, "step": 6223 }, { "epoch": 3.087377435770138, "grad_norm": 0.07338840600727937, "learning_rate": 1.2284761558279901e-06, "loss": 0.452, "step": 6224 }, { "epoch": 3.087873898473377, "grad_norm": 0.07432857851656079, "learning_rate": 1.2271931362675482e-06, "loss": 0.4367, "step": 6225 }, { "epoch": 3.0883703611766165, "grad_norm": 0.06959785661299726, "learning_rate": 1.225910693320142e-06, "loss": 0.3849, "step": 6226 }, { "epoch": 3.0888668238798562, "grad_norm": 0.069439748035569, "learning_rate": 1.224628827181774e-06, "loss": 0.4129, "step": 6227 }, { "epoch": 3.0893632865830956, "grad_norm": 0.07167422324818025, "learning_rate": 1.2233475380483557e-06, "loss": 0.4383, "step": 6228 }, { "epoch": 3.089859749286335, "grad_norm": 0.07409177756432055, "learning_rate": 1.2220668261157132e-06, "loss": 0.4442, "step": 6229 }, { "epoch": 3.090356211989574, "grad_norm": 0.07229680378414288, "learning_rate": 1.2207866915795818e-06, "loss": 0.4375, "step": 6230 }, { "epoch": 3.0908526746928136, "grad_norm": 0.0703528873093919, "learning_rate": 1.2195071346356086e-06, "loss": 0.4116, "step": 6231 }, { "epoch": 3.0913491373960533, "grad_norm": 0.07187634392529822, "learning_rate": 1.2182281554793567e-06, "loss": 0.439, "step": 6232 }, { "epoch": 3.0918456000992927, "grad_norm": 0.07010903570551587, "learning_rate": 1.2169497543062924e-06, "loss": 0.3927, "step": 6233 }, { "epoch": 3.092342062802532, "grad_norm": 0.07399038526407921, "learning_rate": 1.2156719313118026e-06, "loss": 0.4438, "step": 6234 }, { "epoch": 3.0928385255057713, "grad_norm": 0.0717854306834454, "learning_rate": 1.21439468669118e-06, "loss": 0.4301, "step": 6235 }, { "epoch": 3.0933349882090106, "grad_norm": 0.07238415883742463, "learning_rate": 1.2131180206396331e-06, "loss": 0.4317, "step": 6236 }, { "epoch": 3.0938314509122504, "grad_norm": 0.07298178970303208, "learning_rate": 1.211841933352279e-06, "loss": 0.4735, "step": 6237 }, { "epoch": 3.0943279136154898, "grad_norm": 0.07236548395027288, "learning_rate": 1.2105664250241455e-06, "loss": 0.4372, "step": 6238 }, { "epoch": 3.094824376318729, "grad_norm": 0.0703106987227702, "learning_rate": 1.209291495850176e-06, "loss": 0.4474, "step": 6239 }, { "epoch": 3.0953208390219684, "grad_norm": 0.07531809414647281, "learning_rate": 1.208017146025221e-06, "loss": 0.4375, "step": 6240 }, { "epoch": 3.0958173017252077, "grad_norm": 0.06888357618016823, "learning_rate": 1.2067433757440466e-06, "loss": 0.4312, "step": 6241 }, { "epoch": 3.0963137644284475, "grad_norm": 0.07044369668372993, "learning_rate": 1.2054701852013267e-06, "loss": 0.4318, "step": 6242 }, { "epoch": 3.096810227131687, "grad_norm": 0.07449678303052629, "learning_rate": 1.2041975745916474e-06, "loss": 0.4308, "step": 6243 }, { "epoch": 3.097306689834926, "grad_norm": 0.07327395073971159, "learning_rate": 1.202925544109509e-06, "loss": 0.4371, "step": 6244 }, { "epoch": 3.0978031525381655, "grad_norm": 0.07065465741904894, "learning_rate": 1.2016540939493182e-06, "loss": 0.4227, "step": 6245 }, { "epoch": 3.098299615241405, "grad_norm": 0.07317786063065264, "learning_rate": 1.2003832243053987e-06, "loss": 0.4681, "step": 6246 }, { "epoch": 3.0987960779446446, "grad_norm": 0.07502800326692527, "learning_rate": 1.1991129353719816e-06, "loss": 0.4739, "step": 6247 }, { "epoch": 3.099292540647884, "grad_norm": 0.07359521511173439, "learning_rate": 1.1978432273432095e-06, "loss": 0.45, "step": 6248 }, { "epoch": 3.0997890033511233, "grad_norm": 0.07096005437948809, "learning_rate": 1.1965741004131365e-06, "loss": 0.4318, "step": 6249 }, { "epoch": 3.1002854660543626, "grad_norm": 0.07356071866870935, "learning_rate": 1.195305554775728e-06, "loss": 0.4426, "step": 6250 }, { "epoch": 3.100781928757602, "grad_norm": 0.07342378306599293, "learning_rate": 1.1940375906248635e-06, "loss": 0.4719, "step": 6251 }, { "epoch": 3.1012783914608413, "grad_norm": 0.07146026379036559, "learning_rate": 1.1927702081543279e-06, "loss": 0.4047, "step": 6252 }, { "epoch": 3.101774854164081, "grad_norm": 0.07286765177483547, "learning_rate": 1.191503407557823e-06, "loss": 0.4729, "step": 6253 }, { "epoch": 3.1022713168673204, "grad_norm": 0.07231918414330475, "learning_rate": 1.190237189028957e-06, "loss": 0.433, "step": 6254 }, { "epoch": 3.1027677795705597, "grad_norm": 0.0737948461976323, "learning_rate": 1.188971552761251e-06, "loss": 0.4387, "step": 6255 }, { "epoch": 3.103264242273799, "grad_norm": 0.07951141015270875, "learning_rate": 1.1877064989481396e-06, "loss": 0.5079, "step": 6256 }, { "epoch": 3.103760704977039, "grad_norm": 0.0752100583948817, "learning_rate": 1.186442027782964e-06, "loss": 0.4767, "step": 6257 }, { "epoch": 3.104257167680278, "grad_norm": 0.07290817664959637, "learning_rate": 1.1851781394589774e-06, "loss": 0.4312, "step": 6258 }, { "epoch": 3.1047536303835175, "grad_norm": 0.07437746217306612, "learning_rate": 1.1839148341693473e-06, "loss": 0.4496, "step": 6259 }, { "epoch": 3.105250093086757, "grad_norm": 0.07676748643472597, "learning_rate": 1.1826521121071476e-06, "loss": 0.4541, "step": 6260 }, { "epoch": 3.105746555789996, "grad_norm": 0.074831957192136, "learning_rate": 1.1813899734653673e-06, "loss": 0.4548, "step": 6261 }, { "epoch": 3.1062430184932355, "grad_norm": 0.07346740038768987, "learning_rate": 1.1801284184369022e-06, "loss": 0.426, "step": 6262 }, { "epoch": 3.1067394811964752, "grad_norm": 0.07200241111962989, "learning_rate": 1.1788674472145607e-06, "loss": 0.426, "step": 6263 }, { "epoch": 3.1072359438997146, "grad_norm": 0.0697525499088635, "learning_rate": 1.177607059991065e-06, "loss": 0.4152, "step": 6264 }, { "epoch": 3.107732406602954, "grad_norm": 0.0706362300791092, "learning_rate": 1.1763472569590405e-06, "loss": 0.4381, "step": 6265 }, { "epoch": 3.1082288693061932, "grad_norm": 0.07258543705591866, "learning_rate": 1.1750880383110313e-06, "loss": 0.4223, "step": 6266 }, { "epoch": 3.108725332009433, "grad_norm": 0.0730392166405372, "learning_rate": 1.1738294042394859e-06, "loss": 0.4464, "step": 6267 }, { "epoch": 3.1092217947126723, "grad_norm": 0.07034950376141262, "learning_rate": 1.17257135493677e-06, "loss": 0.4271, "step": 6268 }, { "epoch": 3.1097182574159117, "grad_norm": 0.07318720180760736, "learning_rate": 1.1713138905951538e-06, "loss": 0.4745, "step": 6269 }, { "epoch": 3.110214720119151, "grad_norm": 0.07215315623619104, "learning_rate": 1.1700570114068204e-06, "loss": 0.4306, "step": 6270 }, { "epoch": 3.1107111828223903, "grad_norm": 0.07331765254976616, "learning_rate": 1.1688007175638655e-06, "loss": 0.4265, "step": 6271 }, { "epoch": 3.1112076455256297, "grad_norm": 0.0729633099311388, "learning_rate": 1.1675450092582908e-06, "loss": 0.4234, "step": 6272 }, { "epoch": 3.1117041082288694, "grad_norm": 0.07376419714140751, "learning_rate": 1.1662898866820139e-06, "loss": 0.4218, "step": 6273 }, { "epoch": 3.112200570932109, "grad_norm": 0.076356924762706, "learning_rate": 1.1650353500268592e-06, "loss": 0.4411, "step": 6274 }, { "epoch": 3.112697033635348, "grad_norm": 0.0722139900522232, "learning_rate": 1.1637813994845604e-06, "loss": 0.4335, "step": 6275 }, { "epoch": 3.1131934963385874, "grad_norm": 0.0735799273371477, "learning_rate": 1.1625280352467676e-06, "loss": 0.4767, "step": 6276 }, { "epoch": 3.1136899590418268, "grad_norm": 0.07136714993097293, "learning_rate": 1.161275257505034e-06, "loss": 0.4209, "step": 6277 }, { "epoch": 3.1141864217450665, "grad_norm": 0.0728243696761246, "learning_rate": 1.1600230664508288e-06, "loss": 0.4442, "step": 6278 }, { "epoch": 3.114682884448306, "grad_norm": 0.07277821478264992, "learning_rate": 1.158771462275529e-06, "loss": 0.4466, "step": 6279 }, { "epoch": 3.115179347151545, "grad_norm": 0.07147557567766659, "learning_rate": 1.1575204451704208e-06, "loss": 0.4269, "step": 6280 }, { "epoch": 3.1156758098547845, "grad_norm": 0.07178507916184856, "learning_rate": 1.1562700153267053e-06, "loss": 0.4215, "step": 6281 }, { "epoch": 3.116172272558024, "grad_norm": 0.07484274630335912, "learning_rate": 1.155020172935486e-06, "loss": 0.4544, "step": 6282 }, { "epoch": 3.1166687352612636, "grad_norm": 0.07133263940129107, "learning_rate": 1.153770918187785e-06, "loss": 0.4419, "step": 6283 }, { "epoch": 3.117165197964503, "grad_norm": 0.07243435975635203, "learning_rate": 1.1525222512745277e-06, "loss": 0.4372, "step": 6284 }, { "epoch": 3.1176616606677423, "grad_norm": 0.07267053715381483, "learning_rate": 1.1512741723865562e-06, "loss": 0.4248, "step": 6285 }, { "epoch": 3.1181581233709816, "grad_norm": 0.0705129375128535, "learning_rate": 1.1500266817146183e-06, "loss": 0.4301, "step": 6286 }, { "epoch": 3.118654586074221, "grad_norm": 0.07294580347031818, "learning_rate": 1.1487797794493704e-06, "loss": 0.4394, "step": 6287 }, { "epoch": 3.1191510487774607, "grad_norm": 0.07004685048181185, "learning_rate": 1.1475334657813858e-06, "loss": 0.4043, "step": 6288 }, { "epoch": 3.1196475114807, "grad_norm": 0.07101860635293346, "learning_rate": 1.1462877409011396e-06, "loss": 0.4585, "step": 6289 }, { "epoch": 3.1201439741839394, "grad_norm": 0.07307878307525968, "learning_rate": 1.1450426049990237e-06, "loss": 0.4548, "step": 6290 }, { "epoch": 3.1206404368871787, "grad_norm": 0.07427427511786734, "learning_rate": 1.1437980582653364e-06, "loss": 0.4561, "step": 6291 }, { "epoch": 3.121136899590418, "grad_norm": 0.07270067758658703, "learning_rate": 1.1425541008902852e-06, "loss": 0.4385, "step": 6292 }, { "epoch": 3.121633362293658, "grad_norm": 0.0730383047366194, "learning_rate": 1.141310733063991e-06, "loss": 0.4318, "step": 6293 }, { "epoch": 3.122129824996897, "grad_norm": 0.07194331961927128, "learning_rate": 1.1400679549764826e-06, "loss": 0.4502, "step": 6294 }, { "epoch": 3.1226262877001365, "grad_norm": 0.07276370816193148, "learning_rate": 1.138825766817696e-06, "loss": 0.4557, "step": 6295 }, { "epoch": 3.123122750403376, "grad_norm": 0.07479660195719427, "learning_rate": 1.1375841687774836e-06, "loss": 0.4412, "step": 6296 }, { "epoch": 3.123619213106615, "grad_norm": 0.07232842889066814, "learning_rate": 1.1363431610456015e-06, "loss": 0.4302, "step": 6297 }, { "epoch": 3.124115675809855, "grad_norm": 0.07297903528073248, "learning_rate": 1.1351027438117185e-06, "loss": 0.4387, "step": 6298 }, { "epoch": 3.1246121385130943, "grad_norm": 0.07194249745343922, "learning_rate": 1.133862917265411e-06, "loss": 0.4487, "step": 6299 }, { "epoch": 3.1251086012163336, "grad_norm": 0.07472724689638927, "learning_rate": 1.1326236815961683e-06, "loss": 0.438, "step": 6300 }, { "epoch": 3.125605063919573, "grad_norm": 0.07327862212617026, "learning_rate": 1.1313850369933875e-06, "loss": 0.4267, "step": 6301 }, { "epoch": 3.1261015266228123, "grad_norm": 0.0717658959881318, "learning_rate": 1.1301469836463747e-06, "loss": 0.4662, "step": 6302 }, { "epoch": 3.126597989326052, "grad_norm": 0.06887025184343006, "learning_rate": 1.128909521744348e-06, "loss": 0.416, "step": 6303 }, { "epoch": 3.1270944520292914, "grad_norm": 0.0734880042440317, "learning_rate": 1.1276726514764309e-06, "loss": 0.4409, "step": 6304 }, { "epoch": 3.1275909147325307, "grad_norm": 0.07515049650490668, "learning_rate": 1.1264363730316623e-06, "loss": 0.4584, "step": 6305 }, { "epoch": 3.12808737743577, "grad_norm": 0.0760084427465557, "learning_rate": 1.1252006865989868e-06, "loss": 0.4372, "step": 6306 }, { "epoch": 3.1285838401390094, "grad_norm": 0.07086822307829199, "learning_rate": 1.123965592367257e-06, "loss": 0.419, "step": 6307 }, { "epoch": 3.129080302842249, "grad_norm": 0.06959338016658051, "learning_rate": 1.1227310905252402e-06, "loss": 0.3947, "step": 6308 }, { "epoch": 3.1295767655454885, "grad_norm": 0.07331027924562893, "learning_rate": 1.1214971812616083e-06, "loss": 0.4441, "step": 6309 }, { "epoch": 3.130073228248728, "grad_norm": 0.07242813224198356, "learning_rate": 1.1202638647649456e-06, "loss": 0.4346, "step": 6310 }, { "epoch": 3.130569690951967, "grad_norm": 0.07372477735829013, "learning_rate": 1.1190311412237448e-06, "loss": 0.4342, "step": 6311 }, { "epoch": 3.1310661536552065, "grad_norm": 0.07320995587740181, "learning_rate": 1.117799010826406e-06, "loss": 0.4417, "step": 6312 }, { "epoch": 3.1315626163584462, "grad_norm": 0.07220257005066855, "learning_rate": 1.1165674737612447e-06, "loss": 0.4315, "step": 6313 }, { "epoch": 3.1320590790616856, "grad_norm": 0.07047700088670927, "learning_rate": 1.1153365302164765e-06, "loss": 0.3936, "step": 6314 }, { "epoch": 3.132555541764925, "grad_norm": 0.07480885332612296, "learning_rate": 1.1141061803802344e-06, "loss": 0.485, "step": 6315 }, { "epoch": 3.1330520044681642, "grad_norm": 0.07306069618694938, "learning_rate": 1.1128764244405564e-06, "loss": 0.4415, "step": 6316 }, { "epoch": 3.1335484671714036, "grad_norm": 0.07185953235470903, "learning_rate": 1.111647262585393e-06, "loss": 0.4144, "step": 6317 }, { "epoch": 3.1340449298746433, "grad_norm": 0.07214525618658277, "learning_rate": 1.1104186950026003e-06, "loss": 0.445, "step": 6318 }, { "epoch": 3.1345413925778827, "grad_norm": 0.07463149967030125, "learning_rate": 1.1091907218799442e-06, "loss": 0.4548, "step": 6319 }, { "epoch": 3.135037855281122, "grad_norm": 0.07160401781801154, "learning_rate": 1.107963343405103e-06, "loss": 0.4191, "step": 6320 }, { "epoch": 3.1355343179843613, "grad_norm": 0.07409967039536204, "learning_rate": 1.1067365597656592e-06, "loss": 0.4395, "step": 6321 }, { "epoch": 3.1360307806876007, "grad_norm": 0.07520117080543616, "learning_rate": 1.10551037114911e-06, "loss": 0.4616, "step": 6322 }, { "epoch": 3.1365272433908404, "grad_norm": 0.07411725758901368, "learning_rate": 1.1042847777428573e-06, "loss": 0.4527, "step": 6323 }, { "epoch": 3.1370237060940798, "grad_norm": 0.07384442107466406, "learning_rate": 1.103059779734212e-06, "loss": 0.4346, "step": 6324 }, { "epoch": 3.137520168797319, "grad_norm": 0.07447981354299785, "learning_rate": 1.1018353773103979e-06, "loss": 0.4372, "step": 6325 }, { "epoch": 3.1380166315005584, "grad_norm": 0.07263836411793177, "learning_rate": 1.100611570658543e-06, "loss": 0.441, "step": 6326 }, { "epoch": 3.1385130942037978, "grad_norm": 0.07281104769870578, "learning_rate": 1.0993883599656885e-06, "loss": 0.44, "step": 6327 }, { "epoch": 3.1390095569070375, "grad_norm": 0.07305992366538848, "learning_rate": 1.0981657454187816e-06, "loss": 0.4222, "step": 6328 }, { "epoch": 3.139506019610277, "grad_norm": 0.07490868994563034, "learning_rate": 1.0969437272046795e-06, "loss": 0.439, "step": 6329 }, { "epoch": 3.140002482313516, "grad_norm": 0.07418711798138108, "learning_rate": 1.0957223055101485e-06, "loss": 0.4421, "step": 6330 }, { "epoch": 3.1404989450167555, "grad_norm": 0.07387558875548761, "learning_rate": 1.0945014805218607e-06, "loss": 0.4777, "step": 6331 }, { "epoch": 3.140995407719995, "grad_norm": 0.07110874455248554, "learning_rate": 1.093281252426403e-06, "loss": 0.431, "step": 6332 }, { "epoch": 3.1414918704232346, "grad_norm": 0.07177204619436062, "learning_rate": 1.0920616214102669e-06, "loss": 0.4225, "step": 6333 }, { "epoch": 3.141988333126474, "grad_norm": 0.07159994867603817, "learning_rate": 1.0908425876598512e-06, "loss": 0.4401, "step": 6334 }, { "epoch": 3.1424847958297133, "grad_norm": 0.07431659814634801, "learning_rate": 1.0896241513614691e-06, "loss": 0.4616, "step": 6335 }, { "epoch": 3.1429812585329526, "grad_norm": 0.07415439237796943, "learning_rate": 1.0884063127013355e-06, "loss": 0.4235, "step": 6336 }, { "epoch": 3.143477721236192, "grad_norm": 0.07277972879207999, "learning_rate": 1.0871890718655815e-06, "loss": 0.437, "step": 6337 }, { "epoch": 3.1439741839394317, "grad_norm": 0.07089605624142527, "learning_rate": 1.08597242904024e-06, "loss": 0.3935, "step": 6338 }, { "epoch": 3.144470646642671, "grad_norm": 0.07232687962567792, "learning_rate": 1.0847563844112552e-06, "loss": 0.4183, "step": 6339 }, { "epoch": 3.1449671093459104, "grad_norm": 0.07480668654229129, "learning_rate": 1.0835409381644819e-06, "loss": 0.4317, "step": 6340 }, { "epoch": 3.1454635720491497, "grad_norm": 0.0718756198950488, "learning_rate": 1.0823260904856791e-06, "loss": 0.4562, "step": 6341 }, { "epoch": 3.145960034752389, "grad_norm": 0.07311229140682408, "learning_rate": 1.0811118415605198e-06, "loss": 0.4423, "step": 6342 }, { "epoch": 3.146456497455629, "grad_norm": 0.07167006067349847, "learning_rate": 1.079898191574581e-06, "loss": 0.4406, "step": 6343 }, { "epoch": 3.146952960158868, "grad_norm": 0.07363214240251828, "learning_rate": 1.078685140713348e-06, "loss": 0.4644, "step": 6344 }, { "epoch": 3.1474494228621075, "grad_norm": 0.07392910711508574, "learning_rate": 1.0774726891622206e-06, "loss": 0.4424, "step": 6345 }, { "epoch": 3.147945885565347, "grad_norm": 0.06997889628056078, "learning_rate": 1.076260837106497e-06, "loss": 0.4269, "step": 6346 }, { "epoch": 3.148442348268586, "grad_norm": 0.07117551469703765, "learning_rate": 1.0750495847313936e-06, "loss": 0.3957, "step": 6347 }, { "epoch": 3.148938810971826, "grad_norm": 0.0733274547222186, "learning_rate": 1.0738389322220276e-06, "loss": 0.4594, "step": 6348 }, { "epoch": 3.1494352736750653, "grad_norm": 0.07420284307941621, "learning_rate": 1.0726288797634316e-06, "loss": 0.4519, "step": 6349 }, { "epoch": 3.1499317363783046, "grad_norm": 0.06961884503816268, "learning_rate": 1.0714194275405399e-06, "loss": 0.4011, "step": 6350 }, { "epoch": 3.150428199081544, "grad_norm": 0.07483189324920517, "learning_rate": 1.0702105757381982e-06, "loss": 0.4443, "step": 6351 }, { "epoch": 3.1509246617847833, "grad_norm": 0.07206434301062702, "learning_rate": 1.0690023245411613e-06, "loss": 0.452, "step": 6352 }, { "epoch": 3.151421124488023, "grad_norm": 0.07105045953167007, "learning_rate": 1.0677946741340888e-06, "loss": 0.4057, "step": 6353 }, { "epoch": 3.1519175871912624, "grad_norm": 0.07108778081638781, "learning_rate": 1.0665876247015545e-06, "loss": 0.4222, "step": 6354 }, { "epoch": 3.1524140498945017, "grad_norm": 0.07334785786034573, "learning_rate": 1.0653811764280336e-06, "loss": 0.4428, "step": 6355 }, { "epoch": 3.152910512597741, "grad_norm": 0.07542948877894473, "learning_rate": 1.064175329497912e-06, "loss": 0.4917, "step": 6356 }, { "epoch": 3.1534069753009804, "grad_norm": 0.072007413616065, "learning_rate": 1.0629700840954866e-06, "loss": 0.428, "step": 6357 }, { "epoch": 3.15390343800422, "grad_norm": 0.07136862999002559, "learning_rate": 1.0617654404049566e-06, "loss": 0.4179, "step": 6358 }, { "epoch": 3.1543999007074595, "grad_norm": 0.07166078367580378, "learning_rate": 1.0605613986104357e-06, "loss": 0.4425, "step": 6359 }, { "epoch": 3.154896363410699, "grad_norm": 0.07290498579597421, "learning_rate": 1.0593579588959412e-06, "loss": 0.431, "step": 6360 }, { "epoch": 3.155392826113938, "grad_norm": 0.07492878534177091, "learning_rate": 1.058155121445399e-06, "loss": 0.4495, "step": 6361 }, { "epoch": 3.1558892888171775, "grad_norm": 0.073171418580753, "learning_rate": 1.0569528864426444e-06, "loss": 0.4395, "step": 6362 }, { "epoch": 3.1563857515204172, "grad_norm": 0.07486368150671195, "learning_rate": 1.055751254071417e-06, "loss": 0.4636, "step": 6363 }, { "epoch": 3.1568822142236566, "grad_norm": 0.07233176790796679, "learning_rate": 1.054550224515371e-06, "loss": 0.4491, "step": 6364 }, { "epoch": 3.157378676926896, "grad_norm": 0.07546816185177538, "learning_rate": 1.053349797958061e-06, "loss": 0.4631, "step": 6365 }, { "epoch": 3.157875139630135, "grad_norm": 0.07337190052275312, "learning_rate": 1.052149974582956e-06, "loss": 0.4368, "step": 6366 }, { "epoch": 3.1583716023333746, "grad_norm": 0.07122430623194338, "learning_rate": 1.0509507545734289e-06, "loss": 0.4028, "step": 6367 }, { "epoch": 3.1588680650366143, "grad_norm": 0.0737020536658153, "learning_rate": 1.0497521381127595e-06, "loss": 0.4069, "step": 6368 }, { "epoch": 3.1593645277398537, "grad_norm": 0.07323902157198689, "learning_rate": 1.0485541253841391e-06, "loss": 0.4547, "step": 6369 }, { "epoch": 3.159860990443093, "grad_norm": 0.07346998905499097, "learning_rate": 1.0473567165706643e-06, "loss": 0.4655, "step": 6370 }, { "epoch": 3.1603574531463323, "grad_norm": 0.07434490171632929, "learning_rate": 1.0461599118553383e-06, "loss": 0.431, "step": 6371 }, { "epoch": 3.1608539158495716, "grad_norm": 0.07278054746974955, "learning_rate": 1.0449637114210765e-06, "loss": 0.4377, "step": 6372 }, { "epoch": 3.1613503785528114, "grad_norm": 0.076400465350522, "learning_rate": 1.0437681154506951e-06, "loss": 0.4499, "step": 6373 }, { "epoch": 3.1618468412560508, "grad_norm": 0.07364498414420374, "learning_rate": 1.0425731241269255e-06, "loss": 0.4249, "step": 6374 }, { "epoch": 3.16234330395929, "grad_norm": 0.07177693357710595, "learning_rate": 1.041378737632402e-06, "loss": 0.4007, "step": 6375 }, { "epoch": 3.1628397666625294, "grad_norm": 0.075074506947561, "learning_rate": 1.0401849561496647e-06, "loss": 0.4399, "step": 6376 }, { "epoch": 3.1633362293657687, "grad_norm": 0.07142224911160262, "learning_rate": 1.0389917798611687e-06, "loss": 0.4377, "step": 6377 }, { "epoch": 3.1638326920690085, "grad_norm": 0.07646993575423702, "learning_rate": 1.0377992089492666e-06, "loss": 0.4689, "step": 6378 }, { "epoch": 3.164329154772248, "grad_norm": 0.07544046356558812, "learning_rate": 1.0366072435962283e-06, "loss": 0.4547, "step": 6379 }, { "epoch": 3.164825617475487, "grad_norm": 0.07212582877183446, "learning_rate": 1.0354158839842226e-06, "loss": 0.4336, "step": 6380 }, { "epoch": 3.1653220801787265, "grad_norm": 0.0717380986515421, "learning_rate": 1.0342251302953332e-06, "loss": 0.4163, "step": 6381 }, { "epoch": 3.165818542881966, "grad_norm": 0.07452498123015722, "learning_rate": 1.0330349827115466e-06, "loss": 0.461, "step": 6382 }, { "epoch": 3.166315005585205, "grad_norm": 0.07269605501628315, "learning_rate": 1.031845441414756e-06, "loss": 0.4466, "step": 6383 }, { "epoch": 3.166811468288445, "grad_norm": 0.07389670081610103, "learning_rate": 1.0306565065867663e-06, "loss": 0.4551, "step": 6384 }, { "epoch": 3.1673079309916843, "grad_norm": 0.07430456948686814, "learning_rate": 1.0294681784092847e-06, "loss": 0.4422, "step": 6385 }, { "epoch": 3.1678043936949236, "grad_norm": 0.07299404342467042, "learning_rate": 1.028280457063931e-06, "loss": 0.4246, "step": 6386 }, { "epoch": 3.168300856398163, "grad_norm": 0.0739483810668814, "learning_rate": 1.0270933427322277e-06, "loss": 0.4374, "step": 6387 }, { "epoch": 3.1687973191014027, "grad_norm": 0.07241633132347361, "learning_rate": 1.0259068355956047e-06, "loss": 0.4226, "step": 6388 }, { "epoch": 3.169293781804642, "grad_norm": 0.07554269802453953, "learning_rate": 1.0247209358354038e-06, "loss": 0.4836, "step": 6389 }, { "epoch": 3.1697902445078814, "grad_norm": 0.07350081967710687, "learning_rate": 1.0235356436328675e-06, "loss": 0.4304, "step": 6390 }, { "epoch": 3.1702867072111207, "grad_norm": 0.07696261227487028, "learning_rate": 1.0223509591691517e-06, "loss": 0.4475, "step": 6391 }, { "epoch": 3.17078316991436, "grad_norm": 0.07744577811871206, "learning_rate": 1.0211668826253147e-06, "loss": 0.5039, "step": 6392 }, { "epoch": 3.1712796326175994, "grad_norm": 0.07504917661298924, "learning_rate": 1.0199834141823244e-06, "loss": 0.4789, "step": 6393 }, { "epoch": 3.171776095320839, "grad_norm": 0.07305119846164813, "learning_rate": 1.0188005540210545e-06, "loss": 0.4571, "step": 6394 }, { "epoch": 3.1722725580240785, "grad_norm": 0.07491863925128799, "learning_rate": 1.0176183023222847e-06, "loss": 0.4501, "step": 6395 }, { "epoch": 3.172769020727318, "grad_norm": 0.07452081497636626, "learning_rate": 1.0164366592667063e-06, "loss": 0.4654, "step": 6396 }, { "epoch": 3.173265483430557, "grad_norm": 0.07618022845511371, "learning_rate": 1.015255625034911e-06, "loss": 0.4584, "step": 6397 }, { "epoch": 3.173761946133797, "grad_norm": 0.07043046185576388, "learning_rate": 1.014075199807405e-06, "loss": 0.3949, "step": 6398 }, { "epoch": 3.1742584088370362, "grad_norm": 0.07227920540362784, "learning_rate": 1.012895383764595e-06, "loss": 0.4514, "step": 6399 }, { "epoch": 3.1747548715402756, "grad_norm": 0.07305956247134797, "learning_rate": 1.0117161770867962e-06, "loss": 0.4134, "step": 6400 }, { "epoch": 3.175251334243515, "grad_norm": 0.07062457812145931, "learning_rate": 1.0105375799542334e-06, "loss": 0.4259, "step": 6401 }, { "epoch": 3.1757477969467542, "grad_norm": 0.07382671423583863, "learning_rate": 1.009359592547034e-06, "loss": 0.4587, "step": 6402 }, { "epoch": 3.1762442596499936, "grad_norm": 0.07164535630823299, "learning_rate": 1.008182215045237e-06, "loss": 0.4309, "step": 6403 }, { "epoch": 3.1767407223532333, "grad_norm": 0.07310659452909928, "learning_rate": 1.007005447628785e-06, "loss": 0.4711, "step": 6404 }, { "epoch": 3.1772371850564727, "grad_norm": 0.07450978246926393, "learning_rate": 1.0058292904775257e-06, "loss": 0.4181, "step": 6405 }, { "epoch": 3.177733647759712, "grad_norm": 0.0716823391899861, "learning_rate": 1.0046537437712196e-06, "loss": 0.4086, "step": 6406 }, { "epoch": 3.1782301104629513, "grad_norm": 0.07328278411569006, "learning_rate": 1.003478807689528e-06, "loss": 0.44, "step": 6407 }, { "epoch": 3.178726573166191, "grad_norm": 0.07225344228354333, "learning_rate": 1.0023044824120198e-06, "loss": 0.4034, "step": 6408 }, { "epoch": 3.1792230358694304, "grad_norm": 0.07361715251773729, "learning_rate": 1.001130768118176e-06, "loss": 0.4183, "step": 6409 }, { "epoch": 3.17971949857267, "grad_norm": 0.07047270273695008, "learning_rate": 9.999576649873744e-07, "loss": 0.4054, "step": 6410 }, { "epoch": 3.180215961275909, "grad_norm": 0.07264361495431505, "learning_rate": 9.987851731989096e-07, "loss": 0.4336, "step": 6411 }, { "epoch": 3.1807124239791484, "grad_norm": 0.0726626866759461, "learning_rate": 9.976132929319755e-07, "loss": 0.4264, "step": 6412 }, { "epoch": 3.1812088866823878, "grad_norm": 0.0768203689765329, "learning_rate": 9.96442024365677e-07, "loss": 0.4593, "step": 6413 }, { "epoch": 3.1817053493856275, "grad_norm": 0.07293971639410052, "learning_rate": 9.952713676790227e-07, "loss": 0.4796, "step": 6414 }, { "epoch": 3.182201812088867, "grad_norm": 0.07490695058582275, "learning_rate": 9.941013230509278e-07, "loss": 0.4807, "step": 6415 }, { "epoch": 3.182698274792106, "grad_norm": 0.06927060845757176, "learning_rate": 9.929318906602176e-07, "loss": 0.4096, "step": 6416 }, { "epoch": 3.1831947374953455, "grad_norm": 0.07276995387218554, "learning_rate": 9.91763070685618e-07, "loss": 0.4309, "step": 6417 }, { "epoch": 3.1836912001985853, "grad_norm": 0.07277977198244508, "learning_rate": 9.905948633057666e-07, "loss": 0.4338, "step": 6418 }, { "epoch": 3.1841876629018246, "grad_norm": 0.07314677971605135, "learning_rate": 9.894272686992052e-07, "loss": 0.4528, "step": 6419 }, { "epoch": 3.184684125605064, "grad_norm": 0.07423666704428677, "learning_rate": 9.882602870443796e-07, "loss": 0.4465, "step": 6420 }, { "epoch": 3.1851805883083033, "grad_norm": 0.0732412581872415, "learning_rate": 9.87093918519647e-07, "loss": 0.4428, "step": 6421 }, { "epoch": 3.1856770510115426, "grad_norm": 0.06905045477934818, "learning_rate": 9.859281633032653e-07, "loss": 0.4127, "step": 6422 }, { "epoch": 3.186173513714782, "grad_norm": 0.0720047625731592, "learning_rate": 9.84763021573405e-07, "loss": 0.4335, "step": 6423 }, { "epoch": 3.1866699764180217, "grad_norm": 0.07284181108689597, "learning_rate": 9.835984935081371e-07, "loss": 0.4232, "step": 6424 }, { "epoch": 3.187166439121261, "grad_norm": 0.07034437883600633, "learning_rate": 9.82434579285441e-07, "loss": 0.4335, "step": 6425 }, { "epoch": 3.1876629018245004, "grad_norm": 0.07541412625398493, "learning_rate": 9.812712790832035e-07, "loss": 0.4843, "step": 6426 }, { "epoch": 3.1881593645277397, "grad_norm": 0.07461764739658933, "learning_rate": 9.801085930792138e-07, "loss": 0.4556, "step": 6427 }, { "epoch": 3.188655827230979, "grad_norm": 0.07150957407049094, "learning_rate": 9.789465214511729e-07, "loss": 0.423, "step": 6428 }, { "epoch": 3.189152289934219, "grad_norm": 0.07748019008724999, "learning_rate": 9.777850643766823e-07, "loss": 0.4611, "step": 6429 }, { "epoch": 3.189648752637458, "grad_norm": 0.07366850775212323, "learning_rate": 9.766242220332544e-07, "loss": 0.441, "step": 6430 }, { "epoch": 3.1901452153406975, "grad_norm": 0.07468096275004382, "learning_rate": 9.754639945983041e-07, "loss": 0.4764, "step": 6431 }, { "epoch": 3.190641678043937, "grad_norm": 0.07281699571452858, "learning_rate": 9.743043822491528e-07, "loss": 0.4225, "step": 6432 }, { "epoch": 3.191138140747176, "grad_norm": 0.07269377638605191, "learning_rate": 9.731453851630308e-07, "loss": 0.4481, "step": 6433 }, { "epoch": 3.191634603450416, "grad_norm": 0.07479476219760563, "learning_rate": 9.719870035170697e-07, "loss": 0.4536, "step": 6434 }, { "epoch": 3.1921310661536553, "grad_norm": 0.06903390753037493, "learning_rate": 9.708292374883121e-07, "loss": 0.4096, "step": 6435 }, { "epoch": 3.1926275288568946, "grad_norm": 0.0752804219093628, "learning_rate": 9.696720872537023e-07, "loss": 0.4711, "step": 6436 }, { "epoch": 3.193123991560134, "grad_norm": 0.07274574408985673, "learning_rate": 9.68515552990092e-07, "loss": 0.4552, "step": 6437 }, { "epoch": 3.1936204542633733, "grad_norm": 0.07134757154327871, "learning_rate": 9.673596348742404e-07, "loss": 0.4354, "step": 6438 }, { "epoch": 3.194116916966613, "grad_norm": 0.07197174710606297, "learning_rate": 9.662043330828086e-07, "loss": 0.4185, "step": 6439 }, { "epoch": 3.1946133796698524, "grad_norm": 0.0757707799900384, "learning_rate": 9.650496477923687e-07, "loss": 0.4806, "step": 6440 }, { "epoch": 3.1951098423730917, "grad_norm": 0.07346692458731068, "learning_rate": 9.638955791793952e-07, "loss": 0.4359, "step": 6441 }, { "epoch": 3.195606305076331, "grad_norm": 0.07060145600998394, "learning_rate": 9.62742127420268e-07, "loss": 0.4094, "step": 6442 }, { "epoch": 3.1961027677795704, "grad_norm": 0.07566580712909188, "learning_rate": 9.615892926912745e-07, "loss": 0.4724, "step": 6443 }, { "epoch": 3.19659923048281, "grad_norm": 0.07197491092931253, "learning_rate": 9.60437075168605e-07, "loss": 0.4514, "step": 6444 }, { "epoch": 3.1970956931860495, "grad_norm": 0.07526582352172888, "learning_rate": 9.592854750283604e-07, "loss": 0.451, "step": 6445 }, { "epoch": 3.197592155889289, "grad_norm": 0.07443960349293295, "learning_rate": 9.58134492446543e-07, "loss": 0.4488, "step": 6446 }, { "epoch": 3.198088618592528, "grad_norm": 0.07285278760288882, "learning_rate": 9.569841275990611e-07, "loss": 0.4734, "step": 6447 }, { "epoch": 3.1985850812957675, "grad_norm": 0.07308057042372908, "learning_rate": 9.558343806617316e-07, "loss": 0.4407, "step": 6448 }, { "epoch": 3.1990815439990072, "grad_norm": 0.07175580604720955, "learning_rate": 9.546852518102723e-07, "loss": 0.4589, "step": 6449 }, { "epoch": 3.1995780067022466, "grad_norm": 0.07329134585274771, "learning_rate": 9.535367412203117e-07, "loss": 0.4387, "step": 6450 }, { "epoch": 3.200074469405486, "grad_norm": 0.07397527035561834, "learning_rate": 9.523888490673805e-07, "loss": 0.4383, "step": 6451 }, { "epoch": 3.2005709321087252, "grad_norm": 0.06990958524922075, "learning_rate": 9.512415755269139e-07, "loss": 0.4369, "step": 6452 }, { "epoch": 3.2010673948119646, "grad_norm": 0.07557786165555054, "learning_rate": 9.500949207742566e-07, "loss": 0.4607, "step": 6453 }, { "epoch": 3.2015638575152043, "grad_norm": 0.07303988553622057, "learning_rate": 9.48948884984654e-07, "loss": 0.4558, "step": 6454 }, { "epoch": 3.2020603202184437, "grad_norm": 0.07264926895563321, "learning_rate": 9.478034683332621e-07, "loss": 0.4355, "step": 6455 }, { "epoch": 3.202556782921683, "grad_norm": 0.07321784380890624, "learning_rate": 9.466586709951381e-07, "loss": 0.4615, "step": 6456 }, { "epoch": 3.2030532456249223, "grad_norm": 0.07183308431742852, "learning_rate": 9.455144931452459e-07, "loss": 0.4633, "step": 6457 }, { "epoch": 3.2035497083281617, "grad_norm": 0.0760389707040024, "learning_rate": 9.443709349584546e-07, "loss": 0.4849, "step": 6458 }, { "epoch": 3.2040461710314014, "grad_norm": 0.07351781761787028, "learning_rate": 9.432279966095376e-07, "loss": 0.4174, "step": 6459 }, { "epoch": 3.2045426337346408, "grad_norm": 0.07417225150428665, "learning_rate": 9.420856782731774e-07, "loss": 0.4439, "step": 6460 }, { "epoch": 3.20503909643788, "grad_norm": 0.0723067891275983, "learning_rate": 9.409439801239561e-07, "loss": 0.4309, "step": 6461 }, { "epoch": 3.2055355591411194, "grad_norm": 0.07521119417390677, "learning_rate": 9.398029023363664e-07, "loss": 0.4372, "step": 6462 }, { "epoch": 3.2060320218443588, "grad_norm": 0.07288334819556669, "learning_rate": 9.386624450848031e-07, "loss": 0.4368, "step": 6463 }, { "epoch": 3.2065284845475985, "grad_norm": 0.07580814940598336, "learning_rate": 9.375226085435652e-07, "loss": 0.4715, "step": 6464 }, { "epoch": 3.207024947250838, "grad_norm": 0.07768050645172682, "learning_rate": 9.36383392886861e-07, "loss": 0.4533, "step": 6465 }, { "epoch": 3.207521409954077, "grad_norm": 0.07101000923713795, "learning_rate": 9.352447982887986e-07, "loss": 0.4064, "step": 6466 }, { "epoch": 3.2080178726573165, "grad_norm": 0.07276121017150415, "learning_rate": 9.341068249233964e-07, "loss": 0.4469, "step": 6467 }, { "epoch": 3.208514335360556, "grad_norm": 0.07130837808449741, "learning_rate": 9.32969472964575e-07, "loss": 0.4168, "step": 6468 }, { "epoch": 3.2090107980637956, "grad_norm": 0.06987465664506198, "learning_rate": 9.318327425861584e-07, "loss": 0.4124, "step": 6469 }, { "epoch": 3.209507260767035, "grad_norm": 0.07393044314992846, "learning_rate": 9.3069663396188e-07, "loss": 0.4618, "step": 6470 }, { "epoch": 3.2100037234702743, "grad_norm": 0.07386630528663508, "learning_rate": 9.295611472653737e-07, "loss": 0.4777, "step": 6471 }, { "epoch": 3.2105001861735136, "grad_norm": 0.07340938735988556, "learning_rate": 9.284262826701823e-07, "loss": 0.4472, "step": 6472 }, { "epoch": 3.210996648876753, "grad_norm": 0.07400222313268992, "learning_rate": 9.272920403497515e-07, "loss": 0.4424, "step": 6473 }, { "epoch": 3.2114931115799927, "grad_norm": 0.07318685134351284, "learning_rate": 9.26158420477431e-07, "loss": 0.4171, "step": 6474 }, { "epoch": 3.211989574283232, "grad_norm": 0.07338316230220633, "learning_rate": 9.250254232264772e-07, "loss": 0.4351, "step": 6475 }, { "epoch": 3.2124860369864714, "grad_norm": 0.0741471470218761, "learning_rate": 9.238930487700487e-07, "loss": 0.4822, "step": 6476 }, { "epoch": 3.2129824996897107, "grad_norm": 0.07200053996890503, "learning_rate": 9.227612972812139e-07, "loss": 0.4385, "step": 6477 }, { "epoch": 3.21347896239295, "grad_norm": 0.073457939378401, "learning_rate": 9.216301689329393e-07, "loss": 0.4168, "step": 6478 }, { "epoch": 3.21397542509619, "grad_norm": 0.0756181528793698, "learning_rate": 9.204996638981034e-07, "loss": 0.4703, "step": 6479 }, { "epoch": 3.214471887799429, "grad_norm": 0.0714661891520631, "learning_rate": 9.193697823494846e-07, "loss": 0.4305, "step": 6480 }, { "epoch": 3.2149683505026685, "grad_norm": 0.07422591700487427, "learning_rate": 9.182405244597647e-07, "loss": 0.4465, "step": 6481 }, { "epoch": 3.215464813205908, "grad_norm": 0.07131454314173444, "learning_rate": 9.171118904015358e-07, "loss": 0.4495, "step": 6482 }, { "epoch": 3.215961275909147, "grad_norm": 0.07187530227156332, "learning_rate": 9.159838803472904e-07, "loss": 0.4511, "step": 6483 }, { "epoch": 3.216457738612387, "grad_norm": 0.07481112874301267, "learning_rate": 9.148564944694255e-07, "loss": 0.4203, "step": 6484 }, { "epoch": 3.2169542013156263, "grad_norm": 0.07372505230900529, "learning_rate": 9.137297329402467e-07, "loss": 0.4524, "step": 6485 }, { "epoch": 3.2174506640188656, "grad_norm": 0.07173024709136377, "learning_rate": 9.126035959319579e-07, "loss": 0.464, "step": 6486 }, { "epoch": 3.217947126722105, "grad_norm": 0.0724764831646147, "learning_rate": 9.114780836166748e-07, "loss": 0.4441, "step": 6487 }, { "epoch": 3.2184435894253443, "grad_norm": 0.07239183478303986, "learning_rate": 9.10353196166412e-07, "loss": 0.4182, "step": 6488 }, { "epoch": 3.218940052128584, "grad_norm": 0.0747413146612994, "learning_rate": 9.092289337530907e-07, "loss": 0.462, "step": 6489 }, { "epoch": 3.2194365148318234, "grad_norm": 0.07302171673062939, "learning_rate": 9.081052965485365e-07, "loss": 0.4306, "step": 6490 }, { "epoch": 3.2199329775350627, "grad_norm": 0.07440786317465062, "learning_rate": 9.06982284724478e-07, "loss": 0.4445, "step": 6491 }, { "epoch": 3.220429440238302, "grad_norm": 0.0737803698740409, "learning_rate": 9.058598984525518e-07, "loss": 0.4701, "step": 6492 }, { "epoch": 3.2209259029415414, "grad_norm": 0.07497800702697728, "learning_rate": 9.047381379042941e-07, "loss": 0.5053, "step": 6493 }, { "epoch": 3.221422365644781, "grad_norm": 0.07190727725351215, "learning_rate": 9.03617003251151e-07, "loss": 0.4145, "step": 6494 }, { "epoch": 3.2219188283480205, "grad_norm": 0.07583370749987378, "learning_rate": 9.024964946644682e-07, "loss": 0.4384, "step": 6495 }, { "epoch": 3.22241529105126, "grad_norm": 0.07157005347045454, "learning_rate": 9.013766123154965e-07, "loss": 0.4445, "step": 6496 }, { "epoch": 3.222911753754499, "grad_norm": 0.07665603143646456, "learning_rate": 9.002573563753947e-07, "loss": 0.4546, "step": 6497 }, { "epoch": 3.2234082164577385, "grad_norm": 0.07106104303117836, "learning_rate": 8.991387270152202e-07, "loss": 0.4151, "step": 6498 }, { "epoch": 3.2239046791609782, "grad_norm": 0.07238514243593716, "learning_rate": 8.980207244059402e-07, "loss": 0.4171, "step": 6499 }, { "epoch": 3.2244011418642176, "grad_norm": 0.07173738237603448, "learning_rate": 8.969033487184225e-07, "loss": 0.4273, "step": 6500 }, { "epoch": 3.224897604567457, "grad_norm": 0.07179915383513251, "learning_rate": 8.957866001234383e-07, "loss": 0.4591, "step": 6501 }, { "epoch": 3.225394067270696, "grad_norm": 0.07446357660033671, "learning_rate": 8.946704787916676e-07, "loss": 0.4613, "step": 6502 }, { "epoch": 3.2258905299739355, "grad_norm": 0.07259418059023226, "learning_rate": 8.935549848936887e-07, "loss": 0.4708, "step": 6503 }, { "epoch": 3.2263869926771753, "grad_norm": 0.07650140470117987, "learning_rate": 8.924401185999904e-07, "loss": 0.4464, "step": 6504 }, { "epoch": 3.2268834553804147, "grad_norm": 0.07349494700194908, "learning_rate": 8.913258800809598e-07, "loss": 0.4133, "step": 6505 }, { "epoch": 3.227379918083654, "grad_norm": 0.0741847627217174, "learning_rate": 8.902122695068905e-07, "loss": 0.4239, "step": 6506 }, { "epoch": 3.2278763807868933, "grad_norm": 0.07361488605938472, "learning_rate": 8.890992870479809e-07, "loss": 0.4447, "step": 6507 }, { "epoch": 3.2283728434901326, "grad_norm": 0.07135406931538264, "learning_rate": 8.879869328743306e-07, "loss": 0.4152, "step": 6508 }, { "epoch": 3.2288693061933724, "grad_norm": 0.07102552704377421, "learning_rate": 8.868752071559478e-07, "loss": 0.3938, "step": 6509 }, { "epoch": 3.2293657688966118, "grad_norm": 0.07214553650346917, "learning_rate": 8.857641100627395e-07, "loss": 0.453, "step": 6510 }, { "epoch": 3.229862231599851, "grad_norm": 0.07070501925694196, "learning_rate": 8.846536417645213e-07, "loss": 0.4183, "step": 6511 }, { "epoch": 3.2303586943030904, "grad_norm": 0.07286313246206055, "learning_rate": 8.835438024310095e-07, "loss": 0.4319, "step": 6512 }, { "epoch": 3.2308551570063297, "grad_norm": 0.07248086505232414, "learning_rate": 8.824345922318234e-07, "loss": 0.4465, "step": 6513 }, { "epoch": 3.2313516197095695, "grad_norm": 0.07204426946399531, "learning_rate": 8.813260113364913e-07, "loss": 0.4534, "step": 6514 }, { "epoch": 3.231848082412809, "grad_norm": 0.07261340576260167, "learning_rate": 8.802180599144394e-07, "loss": 0.4506, "step": 6515 }, { "epoch": 3.232344545116048, "grad_norm": 0.07080102187886099, "learning_rate": 8.791107381350028e-07, "loss": 0.4398, "step": 6516 }, { "epoch": 3.2328410078192875, "grad_norm": 0.0729790380657279, "learning_rate": 8.780040461674161e-07, "loss": 0.4562, "step": 6517 }, { "epoch": 3.233337470522527, "grad_norm": 0.07468382100067729, "learning_rate": 8.768979841808184e-07, "loss": 0.4425, "step": 6518 }, { "epoch": 3.2338339332257666, "grad_norm": 0.0726797100674497, "learning_rate": 8.757925523442562e-07, "loss": 0.4553, "step": 6519 }, { "epoch": 3.234330395929006, "grad_norm": 0.0717963078697041, "learning_rate": 8.74687750826676e-07, "loss": 0.4297, "step": 6520 }, { "epoch": 3.2348268586322453, "grad_norm": 0.07134789754728933, "learning_rate": 8.735835797969272e-07, "loss": 0.4024, "step": 6521 }, { "epoch": 3.2353233213354846, "grad_norm": 0.07157494654779836, "learning_rate": 8.724800394237675e-07, "loss": 0.436, "step": 6522 }, { "epoch": 3.235819784038724, "grad_norm": 0.07397407102075705, "learning_rate": 8.713771298758539e-07, "loss": 0.4178, "step": 6523 }, { "epoch": 3.2363162467419633, "grad_norm": 0.07244233820873923, "learning_rate": 8.702748513217491e-07, "loss": 0.4178, "step": 6524 }, { "epoch": 3.236812709445203, "grad_norm": 0.07271891990618194, "learning_rate": 8.691732039299167e-07, "loss": 0.4409, "step": 6525 }, { "epoch": 3.2373091721484424, "grad_norm": 0.07477147839968602, "learning_rate": 8.680721878687281e-07, "loss": 0.4558, "step": 6526 }, { "epoch": 3.2378056348516817, "grad_norm": 0.07443087408775124, "learning_rate": 8.669718033064556e-07, "loss": 0.4303, "step": 6527 }, { "epoch": 3.238302097554921, "grad_norm": 0.07495800672885247, "learning_rate": 8.658720504112733e-07, "loss": 0.4674, "step": 6528 }, { "epoch": 3.238798560258161, "grad_norm": 0.07210207630859929, "learning_rate": 8.647729293512642e-07, "loss": 0.4576, "step": 6529 }, { "epoch": 3.2392950229614, "grad_norm": 0.07586349592109952, "learning_rate": 8.636744402944075e-07, "loss": 0.45, "step": 6530 }, { "epoch": 3.2397914856646395, "grad_norm": 0.06898888811284278, "learning_rate": 8.625765834085936e-07, "loss": 0.3799, "step": 6531 }, { "epoch": 3.240287948367879, "grad_norm": 0.07370951952665843, "learning_rate": 8.6147935886161e-07, "loss": 0.441, "step": 6532 }, { "epoch": 3.240784411071118, "grad_norm": 0.0726414034311996, "learning_rate": 8.60382766821149e-07, "loss": 0.4383, "step": 6533 }, { "epoch": 3.2412808737743575, "grad_norm": 0.07261925823331064, "learning_rate": 8.5928680745481e-07, "loss": 0.421, "step": 6534 }, { "epoch": 3.2417773364775972, "grad_norm": 0.0731272020451477, "learning_rate": 8.581914809300895e-07, "loss": 0.4459, "step": 6535 }, { "epoch": 3.2422737991808366, "grad_norm": 0.07293597481858168, "learning_rate": 8.570967874143937e-07, "loss": 0.4536, "step": 6536 }, { "epoch": 3.242770261884076, "grad_norm": 0.07281065453153535, "learning_rate": 8.560027270750276e-07, "loss": 0.4517, "step": 6537 }, { "epoch": 3.2432667245873152, "grad_norm": 0.07396847550314199, "learning_rate": 8.549093000792008e-07, "loss": 0.4249, "step": 6538 }, { "epoch": 3.243763187290555, "grad_norm": 0.07359192677743913, "learning_rate": 8.538165065940263e-07, "loss": 0.4451, "step": 6539 }, { "epoch": 3.2442596499937943, "grad_norm": 0.07186329185364042, "learning_rate": 8.527243467865176e-07, "loss": 0.4144, "step": 6540 }, { "epoch": 3.2447561126970337, "grad_norm": 0.07531998855534346, "learning_rate": 8.51632820823598e-07, "loss": 0.4572, "step": 6541 }, { "epoch": 3.245252575400273, "grad_norm": 0.07640602680421152, "learning_rate": 8.505419288720862e-07, "loss": 0.4686, "step": 6542 }, { "epoch": 3.2457490381035123, "grad_norm": 0.072281263654525, "learning_rate": 8.494516710987105e-07, "loss": 0.4187, "step": 6543 }, { "epoch": 3.2462455008067517, "grad_norm": 0.07397887512980252, "learning_rate": 8.483620476700977e-07, "loss": 0.471, "step": 6544 }, { "epoch": 3.2467419635099914, "grad_norm": 0.07094087678701722, "learning_rate": 8.472730587527783e-07, "loss": 0.4385, "step": 6545 }, { "epoch": 3.2472384262132308, "grad_norm": 0.07255957063372506, "learning_rate": 8.461847045131894e-07, "loss": 0.4559, "step": 6546 }, { "epoch": 3.24773488891647, "grad_norm": 0.07083546706731952, "learning_rate": 8.450969851176655e-07, "loss": 0.4517, "step": 6547 }, { "epoch": 3.2482313516197094, "grad_norm": 0.06990490031823457, "learning_rate": 8.440099007324498e-07, "loss": 0.4226, "step": 6548 }, { "epoch": 3.248727814322949, "grad_norm": 0.07250659990226761, "learning_rate": 8.429234515236845e-07, "loss": 0.4105, "step": 6549 }, { "epoch": 3.2492242770261885, "grad_norm": 0.07359684298440573, "learning_rate": 8.41837637657415e-07, "loss": 0.4406, "step": 6550 }, { "epoch": 3.249720739729428, "grad_norm": 0.0715269597858152, "learning_rate": 8.40752459299593e-07, "loss": 0.4393, "step": 6551 }, { "epoch": 3.250217202432667, "grad_norm": 0.07355218307599753, "learning_rate": 8.396679166160676e-07, "loss": 0.4328, "step": 6552 }, { "epoch": 3.250217202432667, "eval_loss": 0.5165188908576965, "eval_runtime": 258.8306, "eval_samples_per_second": 117.27, "eval_steps_per_second": 14.662, "step": 6552 }, { "epoch": 3.2507136651359065, "grad_norm": 0.07283223774832169, "learning_rate": 8.385840097725961e-07, "loss": 0.4602, "step": 6553 }, { "epoch": 3.251210127839146, "grad_norm": 0.07311451943838647, "learning_rate": 8.375007389348361e-07, "loss": 0.4147, "step": 6554 }, { "epoch": 3.2517065905423856, "grad_norm": 0.07258731207752103, "learning_rate": 8.364181042683472e-07, "loss": 0.4504, "step": 6555 }, { "epoch": 3.252203053245625, "grad_norm": 0.07437859093988324, "learning_rate": 8.353361059385934e-07, "loss": 0.4295, "step": 6556 }, { "epoch": 3.2526995159488643, "grad_norm": 0.07547480599085805, "learning_rate": 8.342547441109389e-07, "loss": 0.468, "step": 6557 }, { "epoch": 3.2531959786521036, "grad_norm": 0.0736194297795164, "learning_rate": 8.33174018950656e-07, "loss": 0.4362, "step": 6558 }, { "epoch": 3.2536924413553434, "grad_norm": 0.07181393467415445, "learning_rate": 8.320939306229125e-07, "loss": 0.4473, "step": 6559 }, { "epoch": 3.2541889040585827, "grad_norm": 0.07282434869419582, "learning_rate": 8.310144792927855e-07, "loss": 0.441, "step": 6560 }, { "epoch": 3.254685366761822, "grad_norm": 0.07474077075724365, "learning_rate": 8.299356651252511e-07, "loss": 0.4461, "step": 6561 }, { "epoch": 3.2551818294650614, "grad_norm": 0.0722199749212488, "learning_rate": 8.288574882851874e-07, "loss": 0.4277, "step": 6562 }, { "epoch": 3.2556782921683007, "grad_norm": 0.06972358362863357, "learning_rate": 8.277799489373784e-07, "loss": 0.4003, "step": 6563 }, { "epoch": 3.25617475487154, "grad_norm": 0.07348155810926192, "learning_rate": 8.267030472465077e-07, "loss": 0.4195, "step": 6564 }, { "epoch": 3.25667121757478, "grad_norm": 0.0723048472609885, "learning_rate": 8.256267833771608e-07, "loss": 0.4733, "step": 6565 }, { "epoch": 3.257167680278019, "grad_norm": 0.0700181266923299, "learning_rate": 8.245511574938309e-07, "loss": 0.4238, "step": 6566 }, { "epoch": 3.2576641429812585, "grad_norm": 0.07302398518997803, "learning_rate": 8.234761697609073e-07, "loss": 0.4402, "step": 6567 }, { "epoch": 3.258160605684498, "grad_norm": 0.07275318432187972, "learning_rate": 8.224018203426864e-07, "loss": 0.4229, "step": 6568 }, { "epoch": 3.2586570683877376, "grad_norm": 0.07250942596803592, "learning_rate": 8.213281094033648e-07, "loss": 0.4626, "step": 6569 }, { "epoch": 3.259153531090977, "grad_norm": 0.07289631800331221, "learning_rate": 8.202550371070417e-07, "loss": 0.4587, "step": 6570 }, { "epoch": 3.2596499937942163, "grad_norm": 0.07463255498980362, "learning_rate": 8.191826036177191e-07, "loss": 0.4722, "step": 6571 }, { "epoch": 3.2601464564974556, "grad_norm": 0.07140120891728631, "learning_rate": 8.181108090993001e-07, "loss": 0.4533, "step": 6572 }, { "epoch": 3.260642919200695, "grad_norm": 0.07450462444078626, "learning_rate": 8.170396537155934e-07, "loss": 0.4234, "step": 6573 }, { "epoch": 3.2611393819039343, "grad_norm": 0.0720324864948931, "learning_rate": 8.159691376303059e-07, "loss": 0.4616, "step": 6574 }, { "epoch": 3.261635844607174, "grad_norm": 0.07382569910297018, "learning_rate": 8.148992610070511e-07, "loss": 0.4639, "step": 6575 }, { "epoch": 3.2621323073104134, "grad_norm": 0.07158104231727033, "learning_rate": 8.138300240093411e-07, "loss": 0.4013, "step": 6576 }, { "epoch": 3.2626287700136527, "grad_norm": 0.07524488381765479, "learning_rate": 8.127614268005907e-07, "loss": 0.455, "step": 6577 }, { "epoch": 3.263125232716892, "grad_norm": 0.07346798365093564, "learning_rate": 8.116934695441203e-07, "loss": 0.4535, "step": 6578 }, { "epoch": 3.263621695420132, "grad_norm": 0.07102589429860762, "learning_rate": 8.106261524031473e-07, "loss": 0.4505, "step": 6579 }, { "epoch": 3.264118158123371, "grad_norm": 0.07197763258149469, "learning_rate": 8.095594755407971e-07, "loss": 0.4594, "step": 6580 }, { "epoch": 3.2646146208266105, "grad_norm": 0.07320604094259688, "learning_rate": 8.084934391200916e-07, "loss": 0.4655, "step": 6581 }, { "epoch": 3.26511108352985, "grad_norm": 0.07415582580125131, "learning_rate": 8.074280433039577e-07, "loss": 0.4337, "step": 6582 }, { "epoch": 3.265607546233089, "grad_norm": 0.07681223797920941, "learning_rate": 8.063632882552258e-07, "loss": 0.4595, "step": 6583 }, { "epoch": 3.2661040089363285, "grad_norm": 0.07285440928148708, "learning_rate": 8.05299174136624e-07, "loss": 0.4656, "step": 6584 }, { "epoch": 3.2666004716395682, "grad_norm": 0.07396212889199635, "learning_rate": 8.042357011107877e-07, "loss": 0.437, "step": 6585 }, { "epoch": 3.2670969343428076, "grad_norm": 0.07276458104314697, "learning_rate": 8.031728693402502e-07, "loss": 0.4507, "step": 6586 }, { "epoch": 3.267593397046047, "grad_norm": 0.07299764124445621, "learning_rate": 8.021106789874494e-07, "loss": 0.461, "step": 6587 }, { "epoch": 3.2680898597492862, "grad_norm": 0.069975440469783, "learning_rate": 8.010491302147227e-07, "loss": 0.4335, "step": 6588 }, { "epoch": 3.2685863224525256, "grad_norm": 0.07246995485880185, "learning_rate": 7.999882231843104e-07, "loss": 0.4561, "step": 6589 }, { "epoch": 3.2690827851557653, "grad_norm": 0.07541022380946706, "learning_rate": 7.989279580583569e-07, "loss": 0.4674, "step": 6590 }, { "epoch": 3.2695792478590047, "grad_norm": 0.07260758175017704, "learning_rate": 7.978683349989052e-07, "loss": 0.4461, "step": 6591 }, { "epoch": 3.270075710562244, "grad_norm": 0.07168818674294791, "learning_rate": 7.968093541679039e-07, "loss": 0.4314, "step": 6592 }, { "epoch": 3.2705721732654833, "grad_norm": 0.07247063626781816, "learning_rate": 7.957510157271991e-07, "loss": 0.4335, "step": 6593 }, { "epoch": 3.2710686359687227, "grad_norm": 0.07147894182938196, "learning_rate": 7.946933198385409e-07, "loss": 0.4468, "step": 6594 }, { "epoch": 3.2715650986719624, "grad_norm": 0.06996799665630328, "learning_rate": 7.936362666635827e-07, "loss": 0.4263, "step": 6595 }, { "epoch": 3.2720615613752018, "grad_norm": 0.07277142907800285, "learning_rate": 7.92579856363876e-07, "loss": 0.4432, "step": 6596 }, { "epoch": 3.272558024078441, "grad_norm": 0.0718025553116523, "learning_rate": 7.915240891008785e-07, "loss": 0.4427, "step": 6597 }, { "epoch": 3.2730544867816804, "grad_norm": 0.07453027337364937, "learning_rate": 7.904689650359465e-07, "loss": 0.4538, "step": 6598 }, { "epoch": 3.2735509494849198, "grad_norm": 0.07212902370180194, "learning_rate": 7.894144843303375e-07, "loss": 0.4375, "step": 6599 }, { "epoch": 3.2740474121881595, "grad_norm": 0.07253003013199788, "learning_rate": 7.883606471452138e-07, "loss": 0.4248, "step": 6600 }, { "epoch": 3.274543874891399, "grad_norm": 0.07345711876328244, "learning_rate": 7.873074536416365e-07, "loss": 0.4647, "step": 6601 }, { "epoch": 3.275040337594638, "grad_norm": 0.0746637762429317, "learning_rate": 7.8625490398057e-07, "loss": 0.4463, "step": 6602 }, { "epoch": 3.2755368002978775, "grad_norm": 0.07196118986595847, "learning_rate": 7.852029983228787e-07, "loss": 0.4165, "step": 6603 }, { "epoch": 3.276033263001117, "grad_norm": 0.0758385444525904, "learning_rate": 7.841517368293289e-07, "loss": 0.4912, "step": 6604 }, { "epoch": 3.2765297257043566, "grad_norm": 0.07158036061870192, "learning_rate": 7.831011196605915e-07, "loss": 0.4542, "step": 6605 }, { "epoch": 3.277026188407596, "grad_norm": 0.07296484306255716, "learning_rate": 7.820511469772341e-07, "loss": 0.4123, "step": 6606 }, { "epoch": 3.2775226511108353, "grad_norm": 0.07231519525271822, "learning_rate": 7.810018189397301e-07, "loss": 0.432, "step": 6607 }, { "epoch": 3.2780191138140746, "grad_norm": 0.07706888568761443, "learning_rate": 7.799531357084517e-07, "loss": 0.4663, "step": 6608 }, { "epoch": 3.278515576517314, "grad_norm": 0.07134553846073277, "learning_rate": 7.789050974436719e-07, "loss": 0.4302, "step": 6609 }, { "epoch": 3.2790120392205537, "grad_norm": 0.07451304099114202, "learning_rate": 7.77857704305569e-07, "loss": 0.4416, "step": 6610 }, { "epoch": 3.279508501923793, "grad_norm": 0.07403930544580954, "learning_rate": 7.768109564542181e-07, "loss": 0.4684, "step": 6611 }, { "epoch": 3.2800049646270324, "grad_norm": 0.07093526312200696, "learning_rate": 7.757648540495999e-07, "loss": 0.413, "step": 6612 }, { "epoch": 3.2805014273302717, "grad_norm": 0.07084022920084229, "learning_rate": 7.747193972515932e-07, "loss": 0.417, "step": 6613 }, { "epoch": 3.280997890033511, "grad_norm": 0.07213204436676163, "learning_rate": 7.736745862199785e-07, "loss": 0.434, "step": 6614 }, { "epoch": 3.281494352736751, "grad_norm": 0.07241724518413502, "learning_rate": 7.726304211144403e-07, "loss": 0.4143, "step": 6615 }, { "epoch": 3.28199081543999, "grad_norm": 0.07182507666799981, "learning_rate": 7.715869020945604e-07, "loss": 0.4547, "step": 6616 }, { "epoch": 3.2824872781432295, "grad_norm": 0.07260704595384614, "learning_rate": 7.705440293198263e-07, "loss": 0.4552, "step": 6617 }, { "epoch": 3.282983740846469, "grad_norm": 0.0723972933923348, "learning_rate": 7.695018029496232e-07, "loss": 0.4297, "step": 6618 }, { "epoch": 3.283480203549708, "grad_norm": 0.0724527393243901, "learning_rate": 7.684602231432381e-07, "loss": 0.4312, "step": 6619 }, { "epoch": 3.283976666252948, "grad_norm": 0.0716458404206346, "learning_rate": 7.674192900598609e-07, "loss": 0.4388, "step": 6620 }, { "epoch": 3.2844731289561873, "grad_norm": 0.07041881628193564, "learning_rate": 7.663790038585794e-07, "loss": 0.4363, "step": 6621 }, { "epoch": 3.2849695916594266, "grad_norm": 0.07313003855013313, "learning_rate": 7.653393646983875e-07, "loss": 0.4093, "step": 6622 }, { "epoch": 3.285466054362666, "grad_norm": 0.07334277170570097, "learning_rate": 7.643003727381754e-07, "loss": 0.4477, "step": 6623 }, { "epoch": 3.2859625170659053, "grad_norm": 0.07465207208298735, "learning_rate": 7.632620281367376e-07, "loss": 0.4392, "step": 6624 }, { "epoch": 3.286458979769145, "grad_norm": 0.07036024314880787, "learning_rate": 7.622243310527678e-07, "loss": 0.4083, "step": 6625 }, { "epoch": 3.2869554424723844, "grad_norm": 0.070643471682643, "learning_rate": 7.611872816448606e-07, "loss": 0.4264, "step": 6626 }, { "epoch": 3.2874519051756237, "grad_norm": 0.0721032209785513, "learning_rate": 7.601508800715146e-07, "loss": 0.4184, "step": 6627 }, { "epoch": 3.287948367878863, "grad_norm": 0.07275339367069343, "learning_rate": 7.59115126491124e-07, "loss": 0.4532, "step": 6628 }, { "epoch": 3.2884448305821024, "grad_norm": 0.07564767357943082, "learning_rate": 7.580800210619904e-07, "loss": 0.4665, "step": 6629 }, { "epoch": 3.288941293285342, "grad_norm": 0.07434048177123104, "learning_rate": 7.570455639423119e-07, "loss": 0.4294, "step": 6630 }, { "epoch": 3.2894377559885815, "grad_norm": 0.07498662516913521, "learning_rate": 7.560117552901863e-07, "loss": 0.4346, "step": 6631 }, { "epoch": 3.289934218691821, "grad_norm": 0.07379359019856084, "learning_rate": 7.549785952636185e-07, "loss": 0.4166, "step": 6632 }, { "epoch": 3.29043068139506, "grad_norm": 0.07508251711812729, "learning_rate": 7.539460840205076e-07, "loss": 0.4472, "step": 6633 }, { "epoch": 3.2909271440982995, "grad_norm": 0.07424590033382815, "learning_rate": 7.529142217186596e-07, "loss": 0.4505, "step": 6634 }, { "epoch": 3.2914236068015392, "grad_norm": 0.07437842299007032, "learning_rate": 7.518830085157735e-07, "loss": 0.4588, "step": 6635 }, { "epoch": 3.2919200695047786, "grad_norm": 0.07079216645025961, "learning_rate": 7.508524445694577e-07, "loss": 0.4155, "step": 6636 }, { "epoch": 3.292416532208018, "grad_norm": 0.07371760019391377, "learning_rate": 7.498225300372152e-07, "loss": 0.4622, "step": 6637 }, { "epoch": 3.292912994911257, "grad_norm": 0.07168229019197096, "learning_rate": 7.487932650764523e-07, "loss": 0.4252, "step": 6638 }, { "epoch": 3.2934094576144965, "grad_norm": 0.07461181078661946, "learning_rate": 7.477646498444762e-07, "loss": 0.4851, "step": 6639 }, { "epoch": 3.2939059203177363, "grad_norm": 0.07593723177468553, "learning_rate": 7.467366844984946e-07, "loss": 0.4457, "step": 6640 }, { "epoch": 3.2944023830209757, "grad_norm": 0.07516771486988753, "learning_rate": 7.457093691956136e-07, "loss": 0.4604, "step": 6641 }, { "epoch": 3.294898845724215, "grad_norm": 0.0717665501625767, "learning_rate": 7.446827040928439e-07, "loss": 0.4588, "step": 6642 }, { "epoch": 3.2953953084274543, "grad_norm": 0.07372662891217222, "learning_rate": 7.436566893470937e-07, "loss": 0.4408, "step": 6643 }, { "epoch": 3.2958917711306936, "grad_norm": 0.0732164428085213, "learning_rate": 7.426313251151734e-07, "loss": 0.4303, "step": 6644 }, { "epoch": 3.296388233833933, "grad_norm": 0.07271523459017934, "learning_rate": 7.41606611553794e-07, "loss": 0.4416, "step": 6645 }, { "epoch": 3.2968846965371728, "grad_norm": 0.07027489236127155, "learning_rate": 7.405825488195645e-07, "loss": 0.4408, "step": 6646 }, { "epoch": 3.297381159240412, "grad_norm": 0.0703499925933336, "learning_rate": 7.395591370689992e-07, "loss": 0.4136, "step": 6647 }, { "epoch": 3.2978776219436514, "grad_norm": 0.0732883393369235, "learning_rate": 7.385363764585074e-07, "loss": 0.4365, "step": 6648 }, { "epoch": 3.2983740846468907, "grad_norm": 0.07330766129833696, "learning_rate": 7.375142671444046e-07, "loss": 0.4408, "step": 6649 }, { "epoch": 3.2988705473501305, "grad_norm": 0.07290035642087749, "learning_rate": 7.364928092829021e-07, "loss": 0.4456, "step": 6650 }, { "epoch": 3.29936701005337, "grad_norm": 0.07259967466068204, "learning_rate": 7.354720030301138e-07, "loss": 0.4303, "step": 6651 }, { "epoch": 3.299863472756609, "grad_norm": 0.07300187700640386, "learning_rate": 7.344518485420526e-07, "loss": 0.461, "step": 6652 }, { "epoch": 3.3003599354598485, "grad_norm": 0.07125456429321311, "learning_rate": 7.334323459746329e-07, "loss": 0.4008, "step": 6653 }, { "epoch": 3.300856398163088, "grad_norm": 0.07285745627014652, "learning_rate": 7.32413495483671e-07, "loss": 0.4441, "step": 6654 }, { "epoch": 3.301352860866327, "grad_norm": 0.07707300575756339, "learning_rate": 7.313952972248795e-07, "loss": 0.4527, "step": 6655 }, { "epoch": 3.301849323569567, "grad_norm": 0.0759425918137467, "learning_rate": 7.303777513538762e-07, "loss": 0.4993, "step": 6656 }, { "epoch": 3.3023457862728063, "grad_norm": 0.07405805280796507, "learning_rate": 7.293608580261757e-07, "loss": 0.4367, "step": 6657 }, { "epoch": 3.3028422489760456, "grad_norm": 0.07201013691773817, "learning_rate": 7.28344617397192e-07, "loss": 0.4382, "step": 6658 }, { "epoch": 3.303338711679285, "grad_norm": 0.07353768250813451, "learning_rate": 7.27329029622244e-07, "loss": 0.4627, "step": 6659 }, { "epoch": 3.3038351743825247, "grad_norm": 0.07316616724213289, "learning_rate": 7.263140948565456e-07, "loss": 0.4491, "step": 6660 }, { "epoch": 3.304331637085764, "grad_norm": 0.07104927273550239, "learning_rate": 7.252998132552158e-07, "loss": 0.4536, "step": 6661 }, { "epoch": 3.3048280997890034, "grad_norm": 0.07102527141998924, "learning_rate": 7.242861849732696e-07, "loss": 0.4445, "step": 6662 }, { "epoch": 3.3053245624922427, "grad_norm": 0.07093892808583452, "learning_rate": 7.232732101656231e-07, "loss": 0.4407, "step": 6663 }, { "epoch": 3.305821025195482, "grad_norm": 0.07291337911424048, "learning_rate": 7.222608889870958e-07, "loss": 0.4391, "step": 6664 }, { "epoch": 3.3063174878987214, "grad_norm": 0.07466482337371662, "learning_rate": 7.212492215924016e-07, "loss": 0.4799, "step": 6665 }, { "epoch": 3.306813950601961, "grad_norm": 0.07150248934206467, "learning_rate": 7.202382081361619e-07, "loss": 0.4479, "step": 6666 }, { "epoch": 3.3073104133052005, "grad_norm": 0.0732130705782928, "learning_rate": 7.192278487728893e-07, "loss": 0.4144, "step": 6667 }, { "epoch": 3.30780687600844, "grad_norm": 0.07400423447607553, "learning_rate": 7.182181436570041e-07, "loss": 0.4268, "step": 6668 }, { "epoch": 3.308303338711679, "grad_norm": 0.07357958751155688, "learning_rate": 7.172090929428221e-07, "loss": 0.4483, "step": 6669 }, { "epoch": 3.308799801414919, "grad_norm": 0.07229389098614548, "learning_rate": 7.162006967845602e-07, "loss": 0.4661, "step": 6670 }, { "epoch": 3.3092962641181582, "grad_norm": 0.07090210708677912, "learning_rate": 7.151929553363368e-07, "loss": 0.4369, "step": 6671 }, { "epoch": 3.3097927268213976, "grad_norm": 0.07275196546922465, "learning_rate": 7.14185868752168e-07, "loss": 0.4386, "step": 6672 }, { "epoch": 3.310289189524637, "grad_norm": 0.07239941321768839, "learning_rate": 7.131794371859724e-07, "loss": 0.4629, "step": 6673 }, { "epoch": 3.3107856522278762, "grad_norm": 0.07150491620273815, "learning_rate": 7.121736607915658e-07, "loss": 0.4281, "step": 6674 }, { "epoch": 3.3112821149311156, "grad_norm": 0.07010195635506701, "learning_rate": 7.111685397226642e-07, "loss": 0.4399, "step": 6675 }, { "epoch": 3.3117785776343553, "grad_norm": 0.07081236027352858, "learning_rate": 7.101640741328858e-07, "loss": 0.4311, "step": 6676 }, { "epoch": 3.3122750403375947, "grad_norm": 0.0705597997075449, "learning_rate": 7.091602641757467e-07, "loss": 0.397, "step": 6677 }, { "epoch": 3.312771503040834, "grad_norm": 0.07351201791505935, "learning_rate": 7.081571100046613e-07, "loss": 0.4216, "step": 6678 }, { "epoch": 3.3132679657440733, "grad_norm": 0.07162297978284737, "learning_rate": 7.071546117729489e-07, "loss": 0.4418, "step": 6679 }, { "epoch": 3.313764428447313, "grad_norm": 0.07072229240363065, "learning_rate": 7.061527696338221e-07, "loss": 0.4229, "step": 6680 }, { "epoch": 3.3142608911505524, "grad_norm": 0.07178441225873806, "learning_rate": 7.051515837403989e-07, "loss": 0.4321, "step": 6681 }, { "epoch": 3.3147573538537918, "grad_norm": 0.07357930531007792, "learning_rate": 7.041510542456936e-07, "loss": 0.4377, "step": 6682 }, { "epoch": 3.315253816557031, "grad_norm": 0.07120147170563958, "learning_rate": 7.03151181302621e-07, "loss": 0.4562, "step": 6683 }, { "epoch": 3.3157502792602704, "grad_norm": 0.07540081844652202, "learning_rate": 7.021519650639952e-07, "loss": 0.4627, "step": 6684 }, { "epoch": 3.3162467419635098, "grad_norm": 0.07113171542314269, "learning_rate": 7.011534056825303e-07, "loss": 0.408, "step": 6685 }, { "epoch": 3.3167432046667495, "grad_norm": 0.07734922712339089, "learning_rate": 7.001555033108414e-07, "loss": 0.4541, "step": 6686 }, { "epoch": 3.317239667369989, "grad_norm": 0.0719354552889049, "learning_rate": 6.991582581014394e-07, "loss": 0.4084, "step": 6687 }, { "epoch": 3.317736130073228, "grad_norm": 0.07473471116857953, "learning_rate": 6.981616702067406e-07, "loss": 0.4333, "step": 6688 }, { "epoch": 3.3182325927764675, "grad_norm": 0.07355981521959422, "learning_rate": 6.97165739779056e-07, "loss": 0.4495, "step": 6689 }, { "epoch": 3.3187290554797073, "grad_norm": 0.0738486895010106, "learning_rate": 6.96170466970596e-07, "loss": 0.4207, "step": 6690 }, { "epoch": 3.3192255181829466, "grad_norm": 0.07518710674642788, "learning_rate": 6.951758519334745e-07, "loss": 0.4186, "step": 6691 }, { "epoch": 3.319721980886186, "grad_norm": 0.07191266899444419, "learning_rate": 6.941818948197005e-07, "loss": 0.4359, "step": 6692 }, { "epoch": 3.3202184435894253, "grad_norm": 0.07226725932523494, "learning_rate": 6.931885957811862e-07, "loss": 0.4553, "step": 6693 }, { "epoch": 3.3207149062926646, "grad_norm": 0.07825433273516179, "learning_rate": 6.921959549697404e-07, "loss": 0.4912, "step": 6694 }, { "epoch": 3.321211368995904, "grad_norm": 0.07284332065755789, "learning_rate": 6.912039725370717e-07, "loss": 0.4222, "step": 6695 }, { "epoch": 3.3217078316991437, "grad_norm": 0.07377691811710664, "learning_rate": 6.902126486347904e-07, "loss": 0.4481, "step": 6696 }, { "epoch": 3.322204294402383, "grad_norm": 0.0753173718581925, "learning_rate": 6.892219834144032e-07, "loss": 0.4612, "step": 6697 }, { "epoch": 3.3227007571056224, "grad_norm": 0.06991257631606883, "learning_rate": 6.882319770273193e-07, "loss": 0.422, "step": 6698 }, { "epoch": 3.3231972198088617, "grad_norm": 0.07307525817105578, "learning_rate": 6.872426296248413e-07, "loss": 0.4445, "step": 6699 }, { "epoch": 3.3236936825121015, "grad_norm": 0.07331646472148728, "learning_rate": 6.862539413581792e-07, "loss": 0.4462, "step": 6700 }, { "epoch": 3.324190145215341, "grad_norm": 0.07309253081404553, "learning_rate": 6.85265912378436e-07, "loss": 0.466, "step": 6701 }, { "epoch": 3.32468660791858, "grad_norm": 0.07396548326621871, "learning_rate": 6.842785428366161e-07, "loss": 0.4405, "step": 6702 }, { "epoch": 3.3251830706218195, "grad_norm": 0.0730798787215561, "learning_rate": 6.832918328836247e-07, "loss": 0.453, "step": 6703 }, { "epoch": 3.325679533325059, "grad_norm": 0.0728092690289243, "learning_rate": 6.823057826702617e-07, "loss": 0.4485, "step": 6704 }, { "epoch": 3.326175996028298, "grad_norm": 0.07346187407672358, "learning_rate": 6.813203923472328e-07, "loss": 0.44, "step": 6705 }, { "epoch": 3.326672458731538, "grad_norm": 0.07255358778733134, "learning_rate": 6.803356620651364e-07, "loss": 0.4154, "step": 6706 }, { "epoch": 3.3271689214347773, "grad_norm": 0.0705595758422318, "learning_rate": 6.793515919744725e-07, "loss": 0.435, "step": 6707 }, { "epoch": 3.3276653841380166, "grad_norm": 0.07369430638371367, "learning_rate": 6.783681822256433e-07, "loss": 0.4277, "step": 6708 }, { "epoch": 3.328161846841256, "grad_norm": 0.07246376987908641, "learning_rate": 6.773854329689433e-07, "loss": 0.4301, "step": 6709 }, { "epoch": 3.3286583095444957, "grad_norm": 0.07242937121960161, "learning_rate": 6.764033443545737e-07, "loss": 0.4405, "step": 6710 }, { "epoch": 3.329154772247735, "grad_norm": 0.07134622113216685, "learning_rate": 6.754219165326293e-07, "loss": 0.4774, "step": 6711 }, { "epoch": 3.3296512349509744, "grad_norm": 0.07254229774617119, "learning_rate": 6.744411496531045e-07, "loss": 0.4304, "step": 6712 }, { "epoch": 3.3301476976542137, "grad_norm": 0.07269788593095719, "learning_rate": 6.734610438658957e-07, "loss": 0.4559, "step": 6713 }, { "epoch": 3.330644160357453, "grad_norm": 0.07285441368630612, "learning_rate": 6.724815993207956e-07, "loss": 0.4203, "step": 6714 }, { "epoch": 3.3311406230606924, "grad_norm": 0.07401827820352684, "learning_rate": 6.71502816167497e-07, "loss": 0.449, "step": 6715 }, { "epoch": 3.331637085763932, "grad_norm": 0.06939161478859503, "learning_rate": 6.705246945555905e-07, "loss": 0.4281, "step": 6716 }, { "epoch": 3.3321335484671715, "grad_norm": 0.07253069731870333, "learning_rate": 6.695472346345655e-07, "loss": 0.4227, "step": 6717 }, { "epoch": 3.332630011170411, "grad_norm": 0.07181321712064559, "learning_rate": 6.685704365538132e-07, "loss": 0.4217, "step": 6718 }, { "epoch": 3.33312647387365, "grad_norm": 0.07691835372690169, "learning_rate": 6.67594300462619e-07, "loss": 0.4493, "step": 6719 }, { "epoch": 3.33362293657689, "grad_norm": 0.07082709018052989, "learning_rate": 6.666188265101725e-07, "loss": 0.4322, "step": 6720 }, { "epoch": 3.3341193992801292, "grad_norm": 0.07958434335917278, "learning_rate": 6.656440148455584e-07, "loss": 0.5023, "step": 6721 }, { "epoch": 3.3346158619833686, "grad_norm": 0.07347018470497847, "learning_rate": 6.646698656177591e-07, "loss": 0.4587, "step": 6722 }, { "epoch": 3.335112324686608, "grad_norm": 0.06977330862340675, "learning_rate": 6.636963789756601e-07, "loss": 0.4125, "step": 6723 }, { "epoch": 3.3356087873898472, "grad_norm": 0.06947659199276247, "learning_rate": 6.627235550680411e-07, "loss": 0.4247, "step": 6724 }, { "epoch": 3.3361052500930866, "grad_norm": 0.07559550092142363, "learning_rate": 6.617513940435849e-07, "loss": 0.4608, "step": 6725 }, { "epoch": 3.3366017127963263, "grad_norm": 0.07477526832332422, "learning_rate": 6.607798960508693e-07, "loss": 0.442, "step": 6726 }, { "epoch": 3.3370981754995657, "grad_norm": 0.07208237172360653, "learning_rate": 6.598090612383723e-07, "loss": 0.4387, "step": 6727 }, { "epoch": 3.337594638202805, "grad_norm": 0.07072698804456641, "learning_rate": 6.588388897544707e-07, "loss": 0.4371, "step": 6728 }, { "epoch": 3.3380911009060443, "grad_norm": 0.07388009610832842, "learning_rate": 6.578693817474391e-07, "loss": 0.4517, "step": 6729 }, { "epoch": 3.3385875636092837, "grad_norm": 0.07622744923989354, "learning_rate": 6.569005373654524e-07, "loss": 0.4459, "step": 6730 }, { "epoch": 3.3390840263125234, "grad_norm": 0.07231725065360844, "learning_rate": 6.559323567565828e-07, "loss": 0.4494, "step": 6731 }, { "epoch": 3.3395804890157628, "grad_norm": 0.07271324266775285, "learning_rate": 6.549648400688003e-07, "loss": 0.4261, "step": 6732 }, { "epoch": 3.340076951719002, "grad_norm": 0.07026922793424965, "learning_rate": 6.539979874499747e-07, "loss": 0.4192, "step": 6733 }, { "epoch": 3.3405734144222414, "grad_norm": 0.07098739030810841, "learning_rate": 6.530317990478729e-07, "loss": 0.44, "step": 6734 }, { "epoch": 3.3410698771254808, "grad_norm": 0.07210641928798285, "learning_rate": 6.52066275010163e-07, "loss": 0.4289, "step": 6735 }, { "epoch": 3.3415663398287205, "grad_norm": 0.07414660760570124, "learning_rate": 6.511014154844081e-07, "loss": 0.4556, "step": 6736 }, { "epoch": 3.34206280253196, "grad_norm": 0.07313210011289042, "learning_rate": 6.50137220618074e-07, "loss": 0.4128, "step": 6737 }, { "epoch": 3.342559265235199, "grad_norm": 0.07093195725850568, "learning_rate": 6.491736905585211e-07, "loss": 0.4447, "step": 6738 }, { "epoch": 3.3430557279384385, "grad_norm": 0.07546538508603158, "learning_rate": 6.482108254530078e-07, "loss": 0.4196, "step": 6739 }, { "epoch": 3.343552190641678, "grad_norm": 0.07474759213803385, "learning_rate": 6.472486254486954e-07, "loss": 0.4545, "step": 6740 }, { "epoch": 3.3440486533449176, "grad_norm": 0.07306879951905888, "learning_rate": 6.462870906926389e-07, "loss": 0.4343, "step": 6741 }, { "epoch": 3.344545116048157, "grad_norm": 0.0713618723632236, "learning_rate": 6.453262213317946e-07, "loss": 0.4405, "step": 6742 }, { "epoch": 3.3450415787513963, "grad_norm": 0.07294681200393098, "learning_rate": 6.443660175130157e-07, "loss": 0.4634, "step": 6743 }, { "epoch": 3.3455380414546356, "grad_norm": 0.07476448744900865, "learning_rate": 6.43406479383053e-07, "loss": 0.4443, "step": 6744 }, { "epoch": 3.346034504157875, "grad_norm": 0.07228777016156072, "learning_rate": 6.424476070885582e-07, "loss": 0.4204, "step": 6745 }, { "epoch": 3.3465309668611147, "grad_norm": 0.07164394178278337, "learning_rate": 6.414894007760769e-07, "loss": 0.4323, "step": 6746 }, { "epoch": 3.347027429564354, "grad_norm": 0.07392067999596949, "learning_rate": 6.405318605920602e-07, "loss": 0.442, "step": 6747 }, { "epoch": 3.3475238922675934, "grad_norm": 0.07339914027298029, "learning_rate": 6.395749866828477e-07, "loss": 0.4214, "step": 6748 }, { "epoch": 3.3480203549708327, "grad_norm": 0.0715534750654056, "learning_rate": 6.386187791946852e-07, "loss": 0.4309, "step": 6749 }, { "epoch": 3.348516817674072, "grad_norm": 0.08010573872207297, "learning_rate": 6.376632382737125e-07, "loss": 0.4804, "step": 6750 }, { "epoch": 3.349013280377312, "grad_norm": 0.07490044375536065, "learning_rate": 6.367083640659682e-07, "loss": 0.4469, "step": 6751 }, { "epoch": 3.349509743080551, "grad_norm": 0.07309406245651992, "learning_rate": 6.35754156717392e-07, "loss": 0.4459, "step": 6752 }, { "epoch": 3.3500062057837905, "grad_norm": 0.07124688102133844, "learning_rate": 6.348006163738174e-07, "loss": 0.4321, "step": 6753 }, { "epoch": 3.35050266848703, "grad_norm": 0.07098792326562689, "learning_rate": 6.338477431809764e-07, "loss": 0.4355, "step": 6754 }, { "epoch": 3.350999131190269, "grad_norm": 0.0716445990956709, "learning_rate": 6.328955372845036e-07, "loss": 0.4388, "step": 6755 }, { "epoch": 3.351495593893509, "grad_norm": 0.0722516039969341, "learning_rate": 6.319439988299253e-07, "loss": 0.418, "step": 6756 }, { "epoch": 3.3519920565967483, "grad_norm": 0.07210491335036923, "learning_rate": 6.309931279626713e-07, "loss": 0.4405, "step": 6757 }, { "epoch": 3.3524885192999876, "grad_norm": 0.07130023795309107, "learning_rate": 6.300429248280659e-07, "loss": 0.4413, "step": 6758 }, { "epoch": 3.352984982003227, "grad_norm": 0.07101581035496832, "learning_rate": 6.29093389571332e-07, "loss": 0.4202, "step": 6759 }, { "epoch": 3.3534814447064663, "grad_norm": 0.07383445946068722, "learning_rate": 6.281445223375921e-07, "loss": 0.4532, "step": 6760 }, { "epoch": 3.353977907409706, "grad_norm": 0.07163606670276983, "learning_rate": 6.271963232718631e-07, "loss": 0.439, "step": 6761 }, { "epoch": 3.3544743701129454, "grad_norm": 0.07111746615527417, "learning_rate": 6.262487925190653e-07, "loss": 0.4341, "step": 6762 }, { "epoch": 3.3549708328161847, "grad_norm": 0.07433150143658057, "learning_rate": 6.253019302240115e-07, "loss": 0.4579, "step": 6763 }, { "epoch": 3.355467295519424, "grad_norm": 0.07299509800751336, "learning_rate": 6.243557365314146e-07, "loss": 0.4544, "step": 6764 }, { "epoch": 3.3559637582226634, "grad_norm": 0.07241430620074621, "learning_rate": 6.234102115858853e-07, "loss": 0.4524, "step": 6765 }, { "epoch": 3.356460220925903, "grad_norm": 0.0735593480259739, "learning_rate": 6.224653555319309e-07, "loss": 0.4345, "step": 6766 }, { "epoch": 3.3569566836291425, "grad_norm": 0.07549715209388864, "learning_rate": 6.215211685139594e-07, "loss": 0.4439, "step": 6767 }, { "epoch": 3.357453146332382, "grad_norm": 0.07636832298287011, "learning_rate": 6.205776506762729e-07, "loss": 0.4379, "step": 6768 }, { "epoch": 3.357949609035621, "grad_norm": 0.074032413581356, "learning_rate": 6.196348021630749e-07, "loss": 0.439, "step": 6769 }, { "epoch": 3.3584460717388605, "grad_norm": 0.07225979545657661, "learning_rate": 6.186926231184631e-07, "loss": 0.4016, "step": 6770 }, { "epoch": 3.3589425344421002, "grad_norm": 0.07537400284700459, "learning_rate": 6.17751113686434e-07, "loss": 0.4623, "step": 6771 }, { "epoch": 3.3594389971453396, "grad_norm": 0.0738458214689033, "learning_rate": 6.168102740108844e-07, "loss": 0.45, "step": 6772 }, { "epoch": 3.359935459848579, "grad_norm": 0.0728896823511883, "learning_rate": 6.158701042356046e-07, "loss": 0.4309, "step": 6773 }, { "epoch": 3.360431922551818, "grad_norm": 0.07260783854191073, "learning_rate": 6.149306045042858e-07, "loss": 0.4526, "step": 6774 }, { "epoch": 3.3609283852550575, "grad_norm": 0.07493342650916683, "learning_rate": 6.139917749605151e-07, "loss": 0.4618, "step": 6775 }, { "epoch": 3.3614248479582973, "grad_norm": 0.07315163191067617, "learning_rate": 6.130536157477757e-07, "loss": 0.4478, "step": 6776 }, { "epoch": 3.3619213106615367, "grad_norm": 0.07513373087127617, "learning_rate": 6.121161270094533e-07, "loss": 0.4531, "step": 6777 }, { "epoch": 3.362417773364776, "grad_norm": 0.07382894483836297, "learning_rate": 6.111793088888257e-07, "loss": 0.4439, "step": 6778 }, { "epoch": 3.3629142360680153, "grad_norm": 0.07150084158217984, "learning_rate": 6.102431615290727e-07, "loss": 0.4357, "step": 6779 }, { "epoch": 3.3634106987712546, "grad_norm": 0.07094916622144724, "learning_rate": 6.093076850732665e-07, "loss": 0.4007, "step": 6780 }, { "epoch": 3.3639071614744944, "grad_norm": 0.07021291401826622, "learning_rate": 6.083728796643823e-07, "loss": 0.4385, "step": 6781 }, { "epoch": 3.3644036241777338, "grad_norm": 0.07429863866638267, "learning_rate": 6.074387454452891e-07, "loss": 0.4475, "step": 6782 }, { "epoch": 3.364900086880973, "grad_norm": 0.07453176224524409, "learning_rate": 6.06505282558753e-07, "loss": 0.4496, "step": 6783 }, { "epoch": 3.3653965495842124, "grad_norm": 0.07369756215281187, "learning_rate": 6.055724911474415e-07, "loss": 0.41, "step": 6784 }, { "epoch": 3.3658930122874517, "grad_norm": 0.0729390433187085, "learning_rate": 6.04640371353914e-07, "loss": 0.4471, "step": 6785 }, { "epoch": 3.366389474990691, "grad_norm": 0.07105795342109092, "learning_rate": 6.037089233206328e-07, "loss": 0.4204, "step": 6786 }, { "epoch": 3.366885937693931, "grad_norm": 0.07336405749120571, "learning_rate": 6.027781471899535e-07, "loss": 0.4408, "step": 6787 }, { "epoch": 3.36738240039717, "grad_norm": 0.0710225486742565, "learning_rate": 6.01848043104129e-07, "loss": 0.4284, "step": 6788 }, { "epoch": 3.3678788631004095, "grad_norm": 0.07099142741394315, "learning_rate": 6.009186112053134e-07, "loss": 0.4189, "step": 6789 }, { "epoch": 3.368375325803649, "grad_norm": 0.07195270358324399, "learning_rate": 5.99989851635554e-07, "loss": 0.447, "step": 6790 }, { "epoch": 3.3688717885068886, "grad_norm": 0.0718929631105409, "learning_rate": 5.990617645367963e-07, "loss": 0.4543, "step": 6791 }, { "epoch": 3.369368251210128, "grad_norm": 0.0717920961659943, "learning_rate": 5.981343500508846e-07, "loss": 0.4456, "step": 6792 }, { "epoch": 3.3698647139133673, "grad_norm": 0.07208811889172369, "learning_rate": 5.972076083195583e-07, "loss": 0.4165, "step": 6793 }, { "epoch": 3.3703611766166066, "grad_norm": 0.07661792816181558, "learning_rate": 5.962815394844567e-07, "loss": 0.465, "step": 6794 }, { "epoch": 3.370857639319846, "grad_norm": 0.0732824276079872, "learning_rate": 5.953561436871135e-07, "loss": 0.4335, "step": 6795 }, { "epoch": 3.3713541020230853, "grad_norm": 0.07402146099841271, "learning_rate": 5.944314210689611e-07, "loss": 0.4125, "step": 6796 }, { "epoch": 3.371850564726325, "grad_norm": 0.07417228224981427, "learning_rate": 5.935073717713274e-07, "loss": 0.4319, "step": 6797 }, { "epoch": 3.3723470274295644, "grad_norm": 0.07261266000053443, "learning_rate": 5.925839959354384e-07, "loss": 0.437, "step": 6798 }, { "epoch": 3.3728434901328037, "grad_norm": 0.07326393118590568, "learning_rate": 5.916612937024191e-07, "loss": 0.4382, "step": 6799 }, { "epoch": 3.373339952836043, "grad_norm": 0.07171944920636655, "learning_rate": 5.907392652132876e-07, "loss": 0.4327, "step": 6800 }, { "epoch": 3.373836415539283, "grad_norm": 0.07266217367510201, "learning_rate": 5.898179106089635e-07, "loss": 0.4184, "step": 6801 }, { "epoch": 3.374332878242522, "grad_norm": 0.07068059522225369, "learning_rate": 5.888972300302598e-07, "loss": 0.4373, "step": 6802 }, { "epoch": 3.3748293409457615, "grad_norm": 0.07446209367229231, "learning_rate": 5.879772236178871e-07, "loss": 0.4597, "step": 6803 }, { "epoch": 3.375325803649001, "grad_norm": 0.07286519517357345, "learning_rate": 5.870578915124547e-07, "loss": 0.4295, "step": 6804 }, { "epoch": 3.37582226635224, "grad_norm": 0.07239552432886055, "learning_rate": 5.861392338544669e-07, "loss": 0.4179, "step": 6805 }, { "epoch": 3.3763187290554795, "grad_norm": 0.07165189079916912, "learning_rate": 5.852212507843274e-07, "loss": 0.4039, "step": 6806 }, { "epoch": 3.3768151917587192, "grad_norm": 0.07292560249677038, "learning_rate": 5.843039424423341e-07, "loss": 0.4455, "step": 6807 }, { "epoch": 3.3773116544619586, "grad_norm": 0.07363371915212126, "learning_rate": 5.833873089686815e-07, "loss": 0.4304, "step": 6808 }, { "epoch": 3.377808117165198, "grad_norm": 0.07100216585939323, "learning_rate": 5.824713505034651e-07, "loss": 0.4214, "step": 6809 }, { "epoch": 3.3783045798684372, "grad_norm": 0.07409743627324886, "learning_rate": 5.815560671866721e-07, "loss": 0.4566, "step": 6810 }, { "epoch": 3.378801042571677, "grad_norm": 0.07281506492211258, "learning_rate": 5.806414591581916e-07, "loss": 0.4604, "step": 6811 }, { "epoch": 3.3792975052749163, "grad_norm": 0.07405617874881373, "learning_rate": 5.797275265578034e-07, "loss": 0.4454, "step": 6812 }, { "epoch": 3.3797939679781557, "grad_norm": 0.07084226874488449, "learning_rate": 5.788142695251897e-07, "loss": 0.4195, "step": 6813 }, { "epoch": 3.380290430681395, "grad_norm": 0.07461650004859605, "learning_rate": 5.779016881999267e-07, "loss": 0.4474, "step": 6814 }, { "epoch": 3.3807868933846343, "grad_norm": 0.0739634619535666, "learning_rate": 5.769897827214871e-07, "loss": 0.4467, "step": 6815 }, { "epoch": 3.3812833560878737, "grad_norm": 0.07162359935116724, "learning_rate": 5.760785532292424e-07, "loss": 0.4381, "step": 6816 }, { "epoch": 3.3817798187911134, "grad_norm": 0.07321476360130366, "learning_rate": 5.751679998624571e-07, "loss": 0.4679, "step": 6817 }, { "epoch": 3.3822762814943528, "grad_norm": 0.07121014109939985, "learning_rate": 5.742581227602978e-07, "loss": 0.421, "step": 6818 }, { "epoch": 3.382772744197592, "grad_norm": 0.0713190118930726, "learning_rate": 5.733489220618232e-07, "loss": 0.4041, "step": 6819 }, { "epoch": 3.3832692069008314, "grad_norm": 0.07061075340016978, "learning_rate": 5.724403979059884e-07, "loss": 0.4381, "step": 6820 }, { "epoch": 3.383765669604071, "grad_norm": 0.07239685849294974, "learning_rate": 5.715325504316493e-07, "loss": 0.4477, "step": 6821 }, { "epoch": 3.3842621323073105, "grad_norm": 0.07300640702874911, "learning_rate": 5.70625379777554e-07, "loss": 0.4121, "step": 6822 }, { "epoch": 3.38475859501055, "grad_norm": 0.07236966548859305, "learning_rate": 5.69718886082351e-07, "loss": 0.4273, "step": 6823 }, { "epoch": 3.385255057713789, "grad_norm": 0.07644248273040816, "learning_rate": 5.688130694845817e-07, "loss": 0.4504, "step": 6824 }, { "epoch": 3.3857515204170285, "grad_norm": 0.07547400432157439, "learning_rate": 5.679079301226853e-07, "loss": 0.4404, "step": 6825 }, { "epoch": 3.386247983120268, "grad_norm": 0.07373569405446405, "learning_rate": 5.670034681349995e-07, "loss": 0.4498, "step": 6826 }, { "epoch": 3.3867444458235076, "grad_norm": 0.07244341261231016, "learning_rate": 5.66099683659756e-07, "loss": 0.4322, "step": 6827 }, { "epoch": 3.387240908526747, "grad_norm": 0.07268650675591956, "learning_rate": 5.651965768350836e-07, "loss": 0.4398, "step": 6828 }, { "epoch": 3.3877373712299863, "grad_norm": 0.07528774455110801, "learning_rate": 5.642941477990078e-07, "loss": 0.4612, "step": 6829 }, { "epoch": 3.3882338339332256, "grad_norm": 0.07471360863771179, "learning_rate": 5.633923966894495e-07, "loss": 0.4464, "step": 6830 }, { "epoch": 3.3887302966364654, "grad_norm": 0.07306230039019465, "learning_rate": 5.624913236442287e-07, "loss": 0.4333, "step": 6831 }, { "epoch": 3.3892267593397047, "grad_norm": 0.07490854044047122, "learning_rate": 5.615909288010579e-07, "loss": 0.4461, "step": 6832 }, { "epoch": 3.389723222042944, "grad_norm": 0.07476389460652377, "learning_rate": 5.606912122975499e-07, "loss": 0.4588, "step": 6833 }, { "epoch": 3.3902196847461834, "grad_norm": 0.07136672326716272, "learning_rate": 5.597921742712115e-07, "loss": 0.461, "step": 6834 }, { "epoch": 3.3907161474494227, "grad_norm": 0.07352205733020133, "learning_rate": 5.588938148594452e-07, "loss": 0.4571, "step": 6835 }, { "epoch": 3.391212610152662, "grad_norm": 0.0733940024162979, "learning_rate": 5.579961341995521e-07, "loss": 0.4801, "step": 6836 }, { "epoch": 3.391709072855902, "grad_norm": 0.07133932398326485, "learning_rate": 5.570991324287273e-07, "loss": 0.4106, "step": 6837 }, { "epoch": 3.392205535559141, "grad_norm": 0.07094672541772178, "learning_rate": 5.562028096840638e-07, "loss": 0.4385, "step": 6838 }, { "epoch": 3.3927019982623805, "grad_norm": 0.07449748299710027, "learning_rate": 5.553071661025505e-07, "loss": 0.4311, "step": 6839 }, { "epoch": 3.39319846096562, "grad_norm": 0.07301207533923466, "learning_rate": 5.544122018210707e-07, "loss": 0.4501, "step": 6840 }, { "epoch": 3.3936949236688596, "grad_norm": 0.07324337727308855, "learning_rate": 5.535179169764071e-07, "loss": 0.4628, "step": 6841 }, { "epoch": 3.394191386372099, "grad_norm": 0.0727988496068958, "learning_rate": 5.526243117052354e-07, "loss": 0.4393, "step": 6842 }, { "epoch": 3.3946878490753383, "grad_norm": 0.07377019414054901, "learning_rate": 5.517313861441309e-07, "loss": 0.4411, "step": 6843 }, { "epoch": 3.3951843117785776, "grad_norm": 0.07336493283660565, "learning_rate": 5.508391404295593e-07, "loss": 0.4247, "step": 6844 }, { "epoch": 3.395680774481817, "grad_norm": 0.07302798948644608, "learning_rate": 5.499475746978899e-07, "loss": 0.4614, "step": 6845 }, { "epoch": 3.3961772371850563, "grad_norm": 0.07418133519525473, "learning_rate": 5.490566890853822e-07, "loss": 0.4425, "step": 6846 }, { "epoch": 3.396673699888296, "grad_norm": 0.07046780669261539, "learning_rate": 5.48166483728193e-07, "loss": 0.4097, "step": 6847 }, { "epoch": 3.3971701625915354, "grad_norm": 0.07472406252686546, "learning_rate": 5.472769587623783e-07, "loss": 0.4413, "step": 6848 }, { "epoch": 3.3976666252947747, "grad_norm": 0.07340409078178213, "learning_rate": 5.463881143238852e-07, "loss": 0.425, "step": 6849 }, { "epoch": 3.398163087998014, "grad_norm": 0.0752792541480088, "learning_rate": 5.454999505485614e-07, "loss": 0.4841, "step": 6850 }, { "epoch": 3.398659550701254, "grad_norm": 0.07103666393741491, "learning_rate": 5.446124675721482e-07, "loss": 0.4576, "step": 6851 }, { "epoch": 3.399156013404493, "grad_norm": 0.07172020968736212, "learning_rate": 5.437256655302814e-07, "loss": 0.4163, "step": 6852 }, { "epoch": 3.3996524761077325, "grad_norm": 0.07079432917083367, "learning_rate": 5.428395445584967e-07, "loss": 0.4185, "step": 6853 }, { "epoch": 3.400148938810972, "grad_norm": 0.07213578508832098, "learning_rate": 5.419541047922217e-07, "loss": 0.44, "step": 6854 }, { "epoch": 3.400645401514211, "grad_norm": 0.07221119180833911, "learning_rate": 5.410693463667827e-07, "loss": 0.441, "step": 6855 }, { "epoch": 3.4011418642174505, "grad_norm": 0.07210326161429813, "learning_rate": 5.401852694174015e-07, "loss": 0.4461, "step": 6856 }, { "epoch": 3.4016383269206902, "grad_norm": 0.07588456212802579, "learning_rate": 5.393018740791928e-07, "loss": 0.4971, "step": 6857 }, { "epoch": 3.4021347896239296, "grad_norm": 0.07226863179149022, "learning_rate": 5.384191604871714e-07, "loss": 0.4659, "step": 6858 }, { "epoch": 3.402631252327169, "grad_norm": 0.07113951574127732, "learning_rate": 5.37537128776246e-07, "loss": 0.4367, "step": 6859 }, { "epoch": 3.4031277150304082, "grad_norm": 0.0699606538530446, "learning_rate": 5.3665577908122e-07, "loss": 0.4076, "step": 6860 }, { "epoch": 3.403624177733648, "grad_norm": 0.07404117594298328, "learning_rate": 5.357751115367927e-07, "loss": 0.4679, "step": 6861 }, { "epoch": 3.4041206404368873, "grad_norm": 0.07338136610437596, "learning_rate": 5.348951262775626e-07, "loss": 0.4255, "step": 6862 }, { "epoch": 3.4046171031401267, "grad_norm": 0.07467626851816644, "learning_rate": 5.340158234380194e-07, "loss": 0.4465, "step": 6863 }, { "epoch": 3.405113565843366, "grad_norm": 0.07499822378987879, "learning_rate": 5.331372031525506e-07, "loss": 0.4405, "step": 6864 }, { "epoch": 3.4056100285466053, "grad_norm": 0.07267811874010739, "learning_rate": 5.322592655554404e-07, "loss": 0.4457, "step": 6865 }, { "epoch": 3.4061064912498447, "grad_norm": 0.07389241397863644, "learning_rate": 5.313820107808665e-07, "loss": 0.4707, "step": 6866 }, { "epoch": 3.4066029539530844, "grad_norm": 0.07501551342292902, "learning_rate": 5.305054389629022e-07, "loss": 0.4596, "step": 6867 }, { "epoch": 3.4070994166563238, "grad_norm": 0.07386900003627883, "learning_rate": 5.296295502355203e-07, "loss": 0.4547, "step": 6868 }, { "epoch": 3.407595879359563, "grad_norm": 0.07222630259324854, "learning_rate": 5.287543447325832e-07, "loss": 0.4221, "step": 6869 }, { "epoch": 3.4080923420628024, "grad_norm": 0.07411540724831181, "learning_rate": 5.278798225878546e-07, "loss": 0.4245, "step": 6870 }, { "epoch": 3.4085888047660418, "grad_norm": 0.072855657027542, "learning_rate": 5.2700598393499e-07, "loss": 0.4279, "step": 6871 }, { "epoch": 3.4090852674692815, "grad_norm": 0.07286142487660482, "learning_rate": 5.261328289075413e-07, "loss": 0.432, "step": 6872 }, { "epoch": 3.409581730172521, "grad_norm": 0.07717464831068466, "learning_rate": 5.25260357638957e-07, "loss": 0.5038, "step": 6873 }, { "epoch": 3.41007819287576, "grad_norm": 0.07193552232157858, "learning_rate": 5.243885702625795e-07, "loss": 0.4228, "step": 6874 }, { "epoch": 3.4105746555789995, "grad_norm": 0.07131623721625098, "learning_rate": 5.235174669116499e-07, "loss": 0.4349, "step": 6875 }, { "epoch": 3.411071118282239, "grad_norm": 0.0741642097125669, "learning_rate": 5.226470477192991e-07, "loss": 0.4459, "step": 6876 }, { "epoch": 3.4115675809854786, "grad_norm": 0.07392174863668086, "learning_rate": 5.217773128185582e-07, "loss": 0.4392, "step": 6877 }, { "epoch": 3.412064043688718, "grad_norm": 0.07591407769626606, "learning_rate": 5.209082623423528e-07, "loss": 0.4488, "step": 6878 }, { "epoch": 3.4125605063919573, "grad_norm": 0.0728908444161917, "learning_rate": 5.200398964235015e-07, "loss": 0.4363, "step": 6879 }, { "epoch": 3.4130569690951966, "grad_norm": 0.07095828506917863, "learning_rate": 5.191722151947227e-07, "loss": 0.4358, "step": 6880 }, { "epoch": 3.413553431798436, "grad_norm": 0.07433082837337116, "learning_rate": 5.18305218788625e-07, "loss": 0.437, "step": 6881 }, { "epoch": 3.4140498945016757, "grad_norm": 0.07254351436267927, "learning_rate": 5.174389073377167e-07, "loss": 0.4266, "step": 6882 }, { "epoch": 3.414546357204915, "grad_norm": 0.07367975798141735, "learning_rate": 5.165732809743995e-07, "loss": 0.4083, "step": 6883 }, { "epoch": 3.4150428199081544, "grad_norm": 0.07256116559937056, "learning_rate": 5.157083398309687e-07, "loss": 0.4158, "step": 6884 }, { "epoch": 3.4155392826113937, "grad_norm": 0.0741371545114394, "learning_rate": 5.148440840396191e-07, "loss": 0.4409, "step": 6885 }, { "epoch": 3.416035745314633, "grad_norm": 0.0747069253378787, "learning_rate": 5.139805137324366e-07, "loss": 0.4372, "step": 6886 }, { "epoch": 3.416532208017873, "grad_norm": 0.07110773162800223, "learning_rate": 5.131176290414053e-07, "loss": 0.4526, "step": 6887 }, { "epoch": 3.417028670721112, "grad_norm": 0.06964999302529894, "learning_rate": 5.122554300984028e-07, "loss": 0.41, "step": 6888 }, { "epoch": 3.4175251334243515, "grad_norm": 0.07556264542454232, "learning_rate": 5.113939170352012e-07, "loss": 0.4554, "step": 6889 }, { "epoch": 3.418021596127591, "grad_norm": 0.07304702051349063, "learning_rate": 5.105330899834715e-07, "loss": 0.4468, "step": 6890 }, { "epoch": 3.41851805883083, "grad_norm": 0.07193806001341835, "learning_rate": 5.096729490747754e-07, "loss": 0.4452, "step": 6891 }, { "epoch": 3.41901452153407, "grad_norm": 0.07089186955516436, "learning_rate": 5.08813494440572e-07, "loss": 0.4214, "step": 6892 }, { "epoch": 3.4195109842373093, "grad_norm": 0.07265896324265066, "learning_rate": 5.079547262122147e-07, "loss": 0.4419, "step": 6893 }, { "epoch": 3.4200074469405486, "grad_norm": 0.07104510900451609, "learning_rate": 5.07096644520954e-07, "loss": 0.3984, "step": 6894 }, { "epoch": 3.420503909643788, "grad_norm": 0.07132578371343347, "learning_rate": 5.062392494979329e-07, "loss": 0.4506, "step": 6895 }, { "epoch": 3.4210003723470273, "grad_norm": 0.07208462645684753, "learning_rate": 5.053825412741892e-07, "loss": 0.4283, "step": 6896 }, { "epoch": 3.421496835050267, "grad_norm": 0.0736822957018377, "learning_rate": 5.045265199806599e-07, "loss": 0.4546, "step": 6897 }, { "epoch": 3.4219932977535064, "grad_norm": 0.0744193833215387, "learning_rate": 5.036711857481713e-07, "loss": 0.4315, "step": 6898 }, { "epoch": 3.4224897604567457, "grad_norm": 0.07275794000618309, "learning_rate": 5.028165387074496e-07, "loss": 0.4233, "step": 6899 }, { "epoch": 3.422986223159985, "grad_norm": 0.07341302980067879, "learning_rate": 5.019625789891136e-07, "loss": 0.4438, "step": 6900 }, { "epoch": 3.4234826858632244, "grad_norm": 0.07474191570866216, "learning_rate": 5.011093067236756e-07, "loss": 0.3999, "step": 6901 }, { "epoch": 3.423979148566464, "grad_norm": 0.07628483834441871, "learning_rate": 5.002567220415467e-07, "loss": 0.4371, "step": 6902 }, { "epoch": 3.4244756112697035, "grad_norm": 0.07391342730693265, "learning_rate": 4.994048250730299e-07, "loss": 0.4255, "step": 6903 }, { "epoch": 3.424972073972943, "grad_norm": 0.07509989493312617, "learning_rate": 4.985536159483234e-07, "loss": 0.4223, "step": 6904 }, { "epoch": 3.425468536676182, "grad_norm": 0.07190502503603871, "learning_rate": 4.97703094797522e-07, "loss": 0.4394, "step": 6905 }, { "epoch": 3.4259649993794214, "grad_norm": 0.07336221763620508, "learning_rate": 4.968532617506133e-07, "loss": 0.4376, "step": 6906 }, { "epoch": 3.4264614620826612, "grad_norm": 0.07212699547695878, "learning_rate": 4.960041169374824e-07, "loss": 0.4522, "step": 6907 }, { "epoch": 3.4269579247859006, "grad_norm": 0.07207613536735617, "learning_rate": 4.951556604879049e-07, "loss": 0.4439, "step": 6908 }, { "epoch": 3.42745438748914, "grad_norm": 0.07366223517554166, "learning_rate": 4.943078925315553e-07, "loss": 0.4529, "step": 6909 }, { "epoch": 3.427950850192379, "grad_norm": 0.07002804566268289, "learning_rate": 4.934608131980012e-07, "loss": 0.4486, "step": 6910 }, { "epoch": 3.4284473128956185, "grad_norm": 0.07405308706476917, "learning_rate": 4.926144226167045e-07, "loss": 0.4445, "step": 6911 }, { "epoch": 3.4289437755988583, "grad_norm": 0.0730847232323671, "learning_rate": 4.917687209170235e-07, "loss": 0.4352, "step": 6912 }, { "epoch": 3.4294402383020977, "grad_norm": 0.07075714549242612, "learning_rate": 4.909237082282081e-07, "loss": 0.4322, "step": 6913 }, { "epoch": 3.429936701005337, "grad_norm": 0.07218842619876654, "learning_rate": 4.900793846794077e-07, "loss": 0.456, "step": 6914 }, { "epoch": 3.4304331637085763, "grad_norm": 0.07554674078927479, "learning_rate": 4.892357503996625e-07, "loss": 0.454, "step": 6915 }, { "epoch": 3.4309296264118156, "grad_norm": 0.07090126832846179, "learning_rate": 4.883928055179072e-07, "loss": 0.4395, "step": 6916 }, { "epoch": 3.4314260891150554, "grad_norm": 0.07396293062611849, "learning_rate": 4.87550550162974e-07, "loss": 0.4567, "step": 6917 }, { "epoch": 3.4319225518182948, "grad_norm": 0.0734029474539442, "learning_rate": 4.867089844635875e-07, "loss": 0.4442, "step": 6918 }, { "epoch": 3.432419014521534, "grad_norm": 0.07125295252218236, "learning_rate": 4.85868108548368e-07, "loss": 0.4483, "step": 6919 }, { "epoch": 3.4329154772247734, "grad_norm": 0.07308178603947592, "learning_rate": 4.850279225458293e-07, "loss": 0.4446, "step": 6920 }, { "epoch": 3.4334119399280127, "grad_norm": 0.07120202733530115, "learning_rate": 4.841884265843799e-07, "loss": 0.4461, "step": 6921 }, { "epoch": 3.4339084026312525, "grad_norm": 0.07536856568384741, "learning_rate": 4.83349620792325e-07, "loss": 0.4696, "step": 6922 }, { "epoch": 3.434404865334492, "grad_norm": 0.07182558717594947, "learning_rate": 4.825115052978613e-07, "loss": 0.4384, "step": 6923 }, { "epoch": 3.434901328037731, "grad_norm": 0.07693529697183651, "learning_rate": 4.816740802290814e-07, "loss": 0.4412, "step": 6924 }, { "epoch": 3.4353977907409705, "grad_norm": 0.07251550677690793, "learning_rate": 4.80837345713972e-07, "loss": 0.4171, "step": 6925 }, { "epoch": 3.43589425344421, "grad_norm": 0.0728053609741973, "learning_rate": 4.800013018804156e-07, "loss": 0.4297, "step": 6926 }, { "epoch": 3.436390716147449, "grad_norm": 0.07209631348497708, "learning_rate": 4.791659488561878e-07, "loss": 0.4595, "step": 6927 }, { "epoch": 3.436887178850689, "grad_norm": 0.07294278149924639, "learning_rate": 4.783312867689577e-07, "loss": 0.4695, "step": 6928 }, { "epoch": 3.4373836415539283, "grad_norm": 0.07281794317221295, "learning_rate": 4.77497315746292e-07, "loss": 0.4364, "step": 6929 }, { "epoch": 3.4378801042571676, "grad_norm": 0.07092418301279209, "learning_rate": 4.766640359156477e-07, "loss": 0.4218, "step": 6930 }, { "epoch": 3.438376566960407, "grad_norm": 0.07039331739060295, "learning_rate": 4.7583144740438015e-07, "loss": 0.4332, "step": 6931 }, { "epoch": 3.4388730296636467, "grad_norm": 0.07087669079862956, "learning_rate": 4.74999550339737e-07, "loss": 0.405, "step": 6932 }, { "epoch": 3.439369492366886, "grad_norm": 0.0710694632443892, "learning_rate": 4.741683448488582e-07, "loss": 0.4212, "step": 6933 }, { "epoch": 3.4398659550701254, "grad_norm": 0.07324992738813729, "learning_rate": 4.733378310587827e-07, "loss": 0.4639, "step": 6934 }, { "epoch": 3.4403624177733647, "grad_norm": 0.07189516683464231, "learning_rate": 4.72508009096439e-07, "loss": 0.4173, "step": 6935 }, { "epoch": 3.440858880476604, "grad_norm": 0.07210803887462554, "learning_rate": 4.716788790886545e-07, "loss": 0.442, "step": 6936 }, { "epoch": 3.4413553431798434, "grad_norm": 0.07444855352560995, "learning_rate": 4.708504411621473e-07, "loss": 0.4557, "step": 6937 }, { "epoch": 3.441851805883083, "grad_norm": 0.07405575332699776, "learning_rate": 4.7002269544352996e-07, "loss": 0.4949, "step": 6938 }, { "epoch": 3.4423482685863225, "grad_norm": 0.07091669568635707, "learning_rate": 4.6919564205931244e-07, "loss": 0.431, "step": 6939 }, { "epoch": 3.442844731289562, "grad_norm": 0.07403138375012017, "learning_rate": 4.683692811358936e-07, "loss": 0.4486, "step": 6940 }, { "epoch": 3.443341193992801, "grad_norm": 0.07295796845958885, "learning_rate": 4.6754361279957193e-07, "loss": 0.4425, "step": 6941 }, { "epoch": 3.443837656696041, "grad_norm": 0.07398747244263879, "learning_rate": 4.667186371765364e-07, "loss": 0.4608, "step": 6942 }, { "epoch": 3.4443341193992802, "grad_norm": 0.07362634337316348, "learning_rate": 4.658943543928707e-07, "loss": 0.4205, "step": 6943 }, { "epoch": 3.4448305821025196, "grad_norm": 0.07217553443395912, "learning_rate": 4.6507076457455445e-07, "loss": 0.458, "step": 6944 }, { "epoch": 3.445327044805759, "grad_norm": 0.0729885922925346, "learning_rate": 4.6424786784745936e-07, "loss": 0.4691, "step": 6945 }, { "epoch": 3.4458235075089982, "grad_norm": 0.07252264968726019, "learning_rate": 4.634256643373536e-07, "loss": 0.4258, "step": 6946 }, { "epoch": 3.4463199702122376, "grad_norm": 0.07482809179360157, "learning_rate": 4.6260415416989613e-07, "loss": 0.4692, "step": 6947 }, { "epoch": 3.4468164329154773, "grad_norm": 0.07322451229011788, "learning_rate": 4.617833374706415e-07, "loss": 0.4517, "step": 6948 }, { "epoch": 3.4473128956187167, "grad_norm": 0.07452852578412698, "learning_rate": 4.6096321436504e-07, "loss": 0.4663, "step": 6949 }, { "epoch": 3.447809358321956, "grad_norm": 0.07774629413412555, "learning_rate": 4.601437849784318e-07, "loss": 0.4618, "step": 6950 }, { "epoch": 3.4483058210251953, "grad_norm": 0.07401524988974062, "learning_rate": 4.593250494360563e-07, "loss": 0.4552, "step": 6951 }, { "epoch": 3.448802283728435, "grad_norm": 0.07257419698991296, "learning_rate": 4.585070078630427e-07, "loss": 0.4465, "step": 6952 }, { "epoch": 3.4492987464316744, "grad_norm": 0.0749883510950849, "learning_rate": 4.57689660384415e-07, "loss": 0.4798, "step": 6953 }, { "epoch": 3.4497952091349138, "grad_norm": 0.07509551841042462, "learning_rate": 4.568730071250926e-07, "loss": 0.4721, "step": 6954 }, { "epoch": 3.450291671838153, "grad_norm": 0.07320749923185751, "learning_rate": 4.560570482098875e-07, "loss": 0.4258, "step": 6955 }, { "epoch": 3.4507881345413924, "grad_norm": 0.0732758502467655, "learning_rate": 4.5524178376350703e-07, "loss": 0.4442, "step": 6956 }, { "epoch": 3.4512845972446318, "grad_norm": 0.0738737475124385, "learning_rate": 4.544272139105488e-07, "loss": 0.4272, "step": 6957 }, { "epoch": 3.4517810599478715, "grad_norm": 0.07500508451311125, "learning_rate": 4.536133387755093e-07, "loss": 0.4261, "step": 6958 }, { "epoch": 3.452277522651111, "grad_norm": 0.06947963423355868, "learning_rate": 4.528001584827746e-07, "loss": 0.3949, "step": 6959 }, { "epoch": 3.45277398535435, "grad_norm": 0.07202057902623601, "learning_rate": 4.519876731566264e-07, "loss": 0.4467, "step": 6960 }, { "epoch": 3.4532704480575895, "grad_norm": 0.07415885781551207, "learning_rate": 4.511758829212415e-07, "loss": 0.4295, "step": 6961 }, { "epoch": 3.4537669107608293, "grad_norm": 0.07532471992904242, "learning_rate": 4.5036478790068673e-07, "loss": 0.4598, "step": 6962 }, { "epoch": 3.4542633734640686, "grad_norm": 0.07091409618049023, "learning_rate": 4.495543882189274e-07, "loss": 0.4336, "step": 6963 }, { "epoch": 3.454759836167308, "grad_norm": 0.07498174921876814, "learning_rate": 4.487446839998194e-07, "loss": 0.4352, "step": 6964 }, { "epoch": 3.4552562988705473, "grad_norm": 0.07424799522056441, "learning_rate": 4.47935675367111e-07, "loss": 0.4472, "step": 6965 }, { "epoch": 3.4557527615737866, "grad_norm": 0.07135977542277129, "learning_rate": 4.4712736244444943e-07, "loss": 0.437, "step": 6966 }, { "epoch": 3.456249224277026, "grad_norm": 0.07419910319307793, "learning_rate": 4.4631974535536914e-07, "loss": 0.4626, "step": 6967 }, { "epoch": 3.4567456869802657, "grad_norm": 0.07283909548202254, "learning_rate": 4.455128242233042e-07, "loss": 0.4374, "step": 6968 }, { "epoch": 3.457242149683505, "grad_norm": 0.0740400261684498, "learning_rate": 4.4470659917157877e-07, "loss": 0.4847, "step": 6969 }, { "epoch": 3.4577386123867444, "grad_norm": 0.07116917467308426, "learning_rate": 4.439010703234098e-07, "loss": 0.4155, "step": 6970 }, { "epoch": 3.4582350750899837, "grad_norm": 0.07270125442975392, "learning_rate": 4.4309623780191214e-07, "loss": 0.4087, "step": 6971 }, { "epoch": 3.4587315377932235, "grad_norm": 0.07391297126366274, "learning_rate": 4.4229210173008964e-07, "loss": 0.4322, "step": 6972 }, { "epoch": 3.459228000496463, "grad_norm": 0.07503149368964877, "learning_rate": 4.414886622308423e-07, "loss": 0.466, "step": 6973 }, { "epoch": 3.459724463199702, "grad_norm": 0.07132078609972732, "learning_rate": 4.406859194269619e-07, "loss": 0.4556, "step": 6974 }, { "epoch": 3.4602209259029415, "grad_norm": 0.07621463735331618, "learning_rate": 4.3988387344113647e-07, "loss": 0.446, "step": 6975 }, { "epoch": 3.460717388606181, "grad_norm": 0.07167692072989904, "learning_rate": 4.390825243959451e-07, "loss": 0.4559, "step": 6976 }, { "epoch": 3.46121385130942, "grad_norm": 0.07245327069025828, "learning_rate": 4.3828187241385987e-07, "loss": 0.4435, "step": 6977 }, { "epoch": 3.46171031401266, "grad_norm": 0.07229654905654437, "learning_rate": 4.374819176172501e-07, "loss": 0.4376, "step": 6978 }, { "epoch": 3.4622067767158993, "grad_norm": 0.07266118777505072, "learning_rate": 4.3668266012837523e-07, "loss": 0.4354, "step": 6979 }, { "epoch": 3.4627032394191386, "grad_norm": 0.07247688735375024, "learning_rate": 4.35884100069387e-07, "loss": 0.4195, "step": 6980 }, { "epoch": 3.463199702122378, "grad_norm": 0.07168303899184163, "learning_rate": 4.350862375623349e-07, "loss": 0.428, "step": 6981 }, { "epoch": 3.4636961648256177, "grad_norm": 0.07207520542686019, "learning_rate": 4.3428907272915823e-07, "loss": 0.424, "step": 6982 }, { "epoch": 3.464192627528857, "grad_norm": 0.07432618035966075, "learning_rate": 4.334926056916916e-07, "loss": 0.4574, "step": 6983 }, { "epoch": 3.4646890902320964, "grad_norm": 0.07394453174221209, "learning_rate": 4.326968365716622e-07, "loss": 0.4408, "step": 6984 }, { "epoch": 3.4651855529353357, "grad_norm": 0.07210933065781855, "learning_rate": 4.319017654906887e-07, "loss": 0.4131, "step": 6985 }, { "epoch": 3.465682015638575, "grad_norm": 0.07351614551970871, "learning_rate": 4.31107392570288e-07, "loss": 0.4648, "step": 6986 }, { "epoch": 3.4661784783418144, "grad_norm": 0.0727033243995911, "learning_rate": 4.303137179318645e-07, "loss": 0.4454, "step": 6987 }, { "epoch": 3.466674941045054, "grad_norm": 0.0726353997298416, "learning_rate": 4.2952074169672175e-07, "loss": 0.4401, "step": 6988 }, { "epoch": 3.4671714037482935, "grad_norm": 0.07393255753739562, "learning_rate": 4.287284639860495e-07, "loss": 0.4799, "step": 6989 }, { "epoch": 3.467667866451533, "grad_norm": 0.07015546639264951, "learning_rate": 4.279368849209381e-07, "loss": 0.4463, "step": 6990 }, { "epoch": 3.468164329154772, "grad_norm": 0.07314951227584136, "learning_rate": 4.271460046223663e-07, "loss": 0.4375, "step": 6991 }, { "epoch": 3.468660791858012, "grad_norm": 0.07076454092546122, "learning_rate": 4.263558232112064e-07, "loss": 0.4472, "step": 6992 }, { "epoch": 3.4691572545612512, "grad_norm": 0.0718955639453092, "learning_rate": 4.255663408082272e-07, "loss": 0.4359, "step": 6993 }, { "epoch": 3.4696537172644906, "grad_norm": 0.07275376732319178, "learning_rate": 4.2477755753408625e-07, "loss": 0.4332, "step": 6994 }, { "epoch": 3.47015017996773, "grad_norm": 0.07130960107957575, "learning_rate": 4.239894735093386e-07, "loss": 0.439, "step": 6995 }, { "epoch": 3.4706466426709692, "grad_norm": 0.0714544722893366, "learning_rate": 4.2320208885442917e-07, "loss": 0.4646, "step": 6996 }, { "epoch": 3.4711431053742086, "grad_norm": 0.0740861914603814, "learning_rate": 4.2241540368969604e-07, "loss": 0.4442, "step": 6997 }, { "epoch": 3.4716395680774483, "grad_norm": 0.07174384340724754, "learning_rate": 4.216294181353736e-07, "loss": 0.4337, "step": 6998 }, { "epoch": 3.4721360307806877, "grad_norm": 0.07355509238233153, "learning_rate": 4.2084413231158473e-07, "loss": 0.4593, "step": 6999 }, { "epoch": 3.472632493483927, "grad_norm": 0.0712277631637039, "learning_rate": 4.2005954633835055e-07, "loss": 0.4212, "step": 7000 }, { "epoch": 3.4731289561871663, "grad_norm": 0.07241974680475426, "learning_rate": 4.1927566033558075e-07, "loss": 0.4101, "step": 7001 }, { "epoch": 3.473625418890406, "grad_norm": 0.07326424285200016, "learning_rate": 4.184924744230784e-07, "loss": 0.446, "step": 7002 }, { "epoch": 3.4741218815936454, "grad_norm": 0.07283954065452414, "learning_rate": 4.1770998872054436e-07, "loss": 0.4473, "step": 7003 }, { "epoch": 3.4746183442968848, "grad_norm": 0.07298687385051912, "learning_rate": 4.169282033475663e-07, "loss": 0.4355, "step": 7004 }, { "epoch": 3.475114807000124, "grad_norm": 0.06950575035689337, "learning_rate": 4.1614711842362876e-07, "loss": 0.4087, "step": 7005 }, { "epoch": 3.4756112697033634, "grad_norm": 0.07351689625797672, "learning_rate": 4.153667340681067e-07, "loss": 0.441, "step": 7006 }, { "epoch": 3.4761077324066028, "grad_norm": 0.07164469420788898, "learning_rate": 4.1458705040027135e-07, "loss": 0.4304, "step": 7007 }, { "epoch": 3.4766041951098425, "grad_norm": 0.07412787005202179, "learning_rate": 4.138080675392836e-07, "loss": 0.4427, "step": 7008 }, { "epoch": 3.477100657813082, "grad_norm": 0.07512842436789635, "learning_rate": 4.130297856041976e-07, "loss": 0.418, "step": 7009 }, { "epoch": 3.477597120516321, "grad_norm": 0.07599135302617657, "learning_rate": 4.1225220471396376e-07, "loss": 0.4542, "step": 7010 }, { "epoch": 3.4780935832195605, "grad_norm": 0.07028592568982106, "learning_rate": 4.1147532498742035e-07, "loss": 0.4326, "step": 7011 }, { "epoch": 3.4785900459228, "grad_norm": 0.0723766237238303, "learning_rate": 4.1069914654330357e-07, "loss": 0.4337, "step": 7012 }, { "epoch": 3.4790865086260396, "grad_norm": 0.07060596015984845, "learning_rate": 4.099236695002379e-07, "loss": 0.4206, "step": 7013 }, { "epoch": 3.479582971329279, "grad_norm": 0.07124772160966228, "learning_rate": 4.0914889397674243e-07, "loss": 0.4288, "step": 7014 }, { "epoch": 3.4800794340325183, "grad_norm": 0.07437517655189398, "learning_rate": 4.0837482009123017e-07, "loss": 0.4377, "step": 7015 }, { "epoch": 3.4805758967357576, "grad_norm": 0.07370022244994137, "learning_rate": 4.0760144796200605e-07, "loss": 0.4463, "step": 7016 }, { "epoch": 3.481072359438997, "grad_norm": 0.07031536555381257, "learning_rate": 4.06828777707266e-07, "loss": 0.4149, "step": 7017 }, { "epoch": 3.4815688221422367, "grad_norm": 0.07604069978172456, "learning_rate": 4.060568094451023e-07, "loss": 0.4738, "step": 7018 }, { "epoch": 3.482065284845476, "grad_norm": 0.0701995822374433, "learning_rate": 4.05285543293496e-07, "loss": 0.4036, "step": 7019 }, { "epoch": 3.4825617475487154, "grad_norm": 0.07424172134755505, "learning_rate": 4.045149793703257e-07, "loss": 0.4318, "step": 7020 }, { "epoch": 3.4830582102519547, "grad_norm": 0.07285730183823594, "learning_rate": 4.03745117793356e-07, "loss": 0.4597, "step": 7021 }, { "epoch": 3.483554672955194, "grad_norm": 0.07295991960910402, "learning_rate": 4.0297595868025065e-07, "loss": 0.4283, "step": 7022 }, { "epoch": 3.484051135658434, "grad_norm": 0.0723258436974514, "learning_rate": 4.022075021485622e-07, "loss": 0.424, "step": 7023 }, { "epoch": 3.484547598361673, "grad_norm": 0.07298322109584299, "learning_rate": 4.014397483157362e-07, "loss": 0.423, "step": 7024 }, { "epoch": 3.4850440610649125, "grad_norm": 0.07205478016555279, "learning_rate": 4.0067269729911316e-07, "loss": 0.4437, "step": 7025 }, { "epoch": 3.485540523768152, "grad_norm": 0.07307966961163839, "learning_rate": 3.999063492159233e-07, "loss": 0.452, "step": 7026 }, { "epoch": 3.486036986471391, "grad_norm": 0.07439585552315807, "learning_rate": 3.9914070418329123e-07, "loss": 0.4404, "step": 7027 }, { "epoch": 3.486533449174631, "grad_norm": 0.07114088102753215, "learning_rate": 3.983757623182338e-07, "loss": 0.4224, "step": 7028 }, { "epoch": 3.4870299118778703, "grad_norm": 0.07336142749381283, "learning_rate": 3.9761152373765875e-07, "loss": 0.4412, "step": 7029 }, { "epoch": 3.4875263745811096, "grad_norm": 0.07072524821639858, "learning_rate": 3.968479885583698e-07, "loss": 0.4458, "step": 7030 }, { "epoch": 3.488022837284349, "grad_norm": 0.07262332837310295, "learning_rate": 3.960851568970586e-07, "loss": 0.4425, "step": 7031 }, { "epoch": 3.4885192999875883, "grad_norm": 0.07243265698774211, "learning_rate": 3.953230288703136e-07, "loss": 0.4293, "step": 7032 }, { "epoch": 3.489015762690828, "grad_norm": 0.07096323367904923, "learning_rate": 3.945616045946138e-07, "loss": 0.4238, "step": 7033 }, { "epoch": 3.4895122253940674, "grad_norm": 0.07077384511779652, "learning_rate": 3.938008841863289e-07, "loss": 0.4137, "step": 7034 }, { "epoch": 3.4900086880973067, "grad_norm": 0.07527318638659324, "learning_rate": 3.9304086776172535e-07, "loss": 0.4349, "step": 7035 }, { "epoch": 3.490505150800546, "grad_norm": 0.06952756070762323, "learning_rate": 3.9228155543695803e-07, "loss": 0.4102, "step": 7036 }, { "epoch": 3.4910016135037854, "grad_norm": 0.07224901371189203, "learning_rate": 3.915229473280757e-07, "loss": 0.4401, "step": 7037 }, { "epoch": 3.491498076207025, "grad_norm": 0.07204345824938724, "learning_rate": 3.907650435510185e-07, "loss": 0.4125, "step": 7038 }, { "epoch": 3.4919945389102645, "grad_norm": 0.07180263804640478, "learning_rate": 3.900078442216221e-07, "loss": 0.4066, "step": 7039 }, { "epoch": 3.492491001613504, "grad_norm": 0.0722475836347384, "learning_rate": 3.8925134945561107e-07, "loss": 0.4146, "step": 7040 }, { "epoch": 3.492987464316743, "grad_norm": 0.06995833245259021, "learning_rate": 3.8849555936860296e-07, "loss": 0.4489, "step": 7041 }, { "epoch": 3.4934839270199824, "grad_norm": 0.07158725913239221, "learning_rate": 3.877404740761093e-07, "loss": 0.4211, "step": 7042 }, { "epoch": 3.4939803897232222, "grad_norm": 0.07263199309595254, "learning_rate": 3.86986093693531e-07, "loss": 0.4258, "step": 7043 }, { "epoch": 3.4944768524264616, "grad_norm": 0.07346394269935339, "learning_rate": 3.8623241833616543e-07, "loss": 0.4543, "step": 7044 }, { "epoch": 3.494973315129701, "grad_norm": 0.0726445809581451, "learning_rate": 3.854794481191987e-07, "loss": 0.435, "step": 7045 }, { "epoch": 3.49546977783294, "grad_norm": 0.07285345373233935, "learning_rate": 3.847271831577093e-07, "loss": 0.4339, "step": 7046 }, { "epoch": 3.4959662405361795, "grad_norm": 0.07409485080802107, "learning_rate": 3.8397562356667026e-07, "loss": 0.446, "step": 7047 }, { "epoch": 3.4964627032394193, "grad_norm": 0.07012737357757955, "learning_rate": 3.832247694609442e-07, "loss": 0.4208, "step": 7048 }, { "epoch": 3.4969591659426587, "grad_norm": 0.07193086143076212, "learning_rate": 3.824746209552882e-07, "loss": 0.4454, "step": 7049 }, { "epoch": 3.497455628645898, "grad_norm": 0.07795249327587464, "learning_rate": 3.817251781643505e-07, "loss": 0.431, "step": 7050 }, { "epoch": 3.4979520913491373, "grad_norm": 0.0716902556228763, "learning_rate": 3.8097644120266954e-07, "loss": 0.4214, "step": 7051 }, { "epoch": 3.4984485540523766, "grad_norm": 0.07223590607474627, "learning_rate": 3.8022841018468147e-07, "loss": 0.4416, "step": 7052 }, { "epoch": 3.4989450167556164, "grad_norm": 0.07436690378938308, "learning_rate": 3.794810852247066e-07, "loss": 0.457, "step": 7053 }, { "epoch": 3.4994414794588558, "grad_norm": 0.07112956963476424, "learning_rate": 3.787344664369641e-07, "loss": 0.4184, "step": 7054 }, { "epoch": 3.499937942162095, "grad_norm": 0.07104327637378509, "learning_rate": 3.779885539355621e-07, "loss": 0.4098, "step": 7055 }, { "epoch": 3.5004344048653344, "grad_norm": 0.07306606499748441, "learning_rate": 3.7724334783450054e-07, "loss": 0.4592, "step": 7056 }, { "epoch": 3.5004344048653344, "eval_loss": 0.5162577629089355, "eval_runtime": 259.1404, "eval_samples_per_second": 117.13, "eval_steps_per_second": 14.645, "step": 7056 }, { "epoch": 3.5009308675685737, "grad_norm": 0.07203781559335148, "learning_rate": 3.764988482476739e-07, "loss": 0.4234, "step": 7057 }, { "epoch": 3.501427330271813, "grad_norm": 0.07546147841789971, "learning_rate": 3.75755055288865e-07, "loss": 0.4316, "step": 7058 }, { "epoch": 3.501923792975053, "grad_norm": 0.07488137656265763, "learning_rate": 3.7501196907175297e-07, "loss": 0.4289, "step": 7059 }, { "epoch": 3.502420255678292, "grad_norm": 0.07033502879304558, "learning_rate": 3.742695897099052e-07, "loss": 0.403, "step": 7060 }, { "epoch": 3.5029167183815315, "grad_norm": 0.07491132145559015, "learning_rate": 3.7352791731678164e-07, "loss": 0.4701, "step": 7061 }, { "epoch": 3.503413181084771, "grad_norm": 0.07385912091891961, "learning_rate": 3.7278695200573754e-07, "loss": 0.4438, "step": 7062 }, { "epoch": 3.5039096437880106, "grad_norm": 0.07340689241571162, "learning_rate": 3.720466938900147e-07, "loss": 0.4538, "step": 7063 }, { "epoch": 3.50440610649125, "grad_norm": 0.07295251691291561, "learning_rate": 3.7130714308275196e-07, "loss": 0.4239, "step": 7064 }, { "epoch": 3.5049025691944893, "grad_norm": 0.07080688080671606, "learning_rate": 3.705682996969773e-07, "loss": 0.4475, "step": 7065 }, { "epoch": 3.5053990318977286, "grad_norm": 0.07369474252686231, "learning_rate": 3.6983016384560975e-07, "loss": 0.449, "step": 7066 }, { "epoch": 3.505895494600968, "grad_norm": 0.07286840009039136, "learning_rate": 3.6909273564146366e-07, "loss": 0.4371, "step": 7067 }, { "epoch": 3.5063919573042073, "grad_norm": 0.07326717587537919, "learning_rate": 3.683560151972415e-07, "loss": 0.4451, "step": 7068 }, { "epoch": 3.506888420007447, "grad_norm": 0.0730059610532952, "learning_rate": 3.6762000262554e-07, "loss": 0.4496, "step": 7069 }, { "epoch": 3.5073848827106864, "grad_norm": 0.0725619435516716, "learning_rate": 3.668846980388452e-07, "loss": 0.4546, "step": 7070 }, { "epoch": 3.5078813454139257, "grad_norm": 0.07202927835061003, "learning_rate": 3.661501015495389e-07, "loss": 0.4223, "step": 7071 }, { "epoch": 3.508377808117165, "grad_norm": 0.06993486720630124, "learning_rate": 3.6541621326989183e-07, "loss": 0.4114, "step": 7072 }, { "epoch": 3.508874270820405, "grad_norm": 0.07257818917447946, "learning_rate": 3.6468303331206546e-07, "loss": 0.4011, "step": 7073 }, { "epoch": 3.509370733523644, "grad_norm": 0.07288833908931067, "learning_rate": 3.6395056178811725e-07, "loss": 0.4481, "step": 7074 }, { "epoch": 3.5098671962268835, "grad_norm": 0.07092586904115811, "learning_rate": 3.632187988099906e-07, "loss": 0.4481, "step": 7075 }, { "epoch": 3.510363658930123, "grad_norm": 0.07214368911148672, "learning_rate": 3.6248774448952695e-07, "loss": 0.4237, "step": 7076 }, { "epoch": 3.510860121633362, "grad_norm": 0.0722242659165623, "learning_rate": 3.617573989384543e-07, "loss": 0.4321, "step": 7077 }, { "epoch": 3.5113565843366015, "grad_norm": 0.06952702140904304, "learning_rate": 3.6102776226839386e-07, "loss": 0.4119, "step": 7078 }, { "epoch": 3.5118530470398412, "grad_norm": 0.07076164720383553, "learning_rate": 3.602988345908609e-07, "loss": 0.4208, "step": 7079 }, { "epoch": 3.5123495097430806, "grad_norm": 0.07340766789593617, "learning_rate": 3.5957061601725797e-07, "loss": 0.4519, "step": 7080 }, { "epoch": 3.51284597244632, "grad_norm": 0.07459512778773453, "learning_rate": 3.588431066588832e-07, "loss": 0.4399, "step": 7081 }, { "epoch": 3.5133424351495592, "grad_norm": 0.0714307806848993, "learning_rate": 3.58116306626925e-07, "loss": 0.4329, "step": 7082 }, { "epoch": 3.513838897852799, "grad_norm": 0.07363200094283813, "learning_rate": 3.5739021603246104e-07, "loss": 0.4387, "step": 7083 }, { "epoch": 3.5143353605560383, "grad_norm": 0.07259395908343577, "learning_rate": 3.56664834986466e-07, "loss": 0.4516, "step": 7084 }, { "epoch": 3.5148318232592777, "grad_norm": 0.07184213899212576, "learning_rate": 3.5594016359979886e-07, "loss": 0.4273, "step": 7085 }, { "epoch": 3.515328285962517, "grad_norm": 0.07294685594344688, "learning_rate": 3.552162019832167e-07, "loss": 0.4387, "step": 7086 }, { "epoch": 3.5158247486657563, "grad_norm": 0.07441906850778905, "learning_rate": 3.5449295024736374e-07, "loss": 0.4337, "step": 7087 }, { "epoch": 3.5163212113689957, "grad_norm": 0.07368084403298997, "learning_rate": 3.5377040850277935e-07, "loss": 0.4512, "step": 7088 }, { "epoch": 3.5168176740722354, "grad_norm": 0.0713595992730785, "learning_rate": 3.5304857685989125e-07, "loss": 0.4433, "step": 7089 }, { "epoch": 3.5173141367754748, "grad_norm": 0.071877961919271, "learning_rate": 3.52327455429019e-07, "loss": 0.4355, "step": 7090 }, { "epoch": 3.517810599478714, "grad_norm": 0.07338459914123707, "learning_rate": 3.5160704432037616e-07, "loss": 0.4804, "step": 7091 }, { "epoch": 3.5183070621819534, "grad_norm": 0.06996501674731023, "learning_rate": 3.5088734364406573e-07, "loss": 0.4079, "step": 7092 }, { "epoch": 3.518803524885193, "grad_norm": 0.07605308956893601, "learning_rate": 3.5016835351008083e-07, "loss": 0.4442, "step": 7093 }, { "epoch": 3.5192999875884325, "grad_norm": 0.0721478352591386, "learning_rate": 3.4945007402830964e-07, "loss": 0.4262, "step": 7094 }, { "epoch": 3.519796450291672, "grad_norm": 0.07622812677268571, "learning_rate": 3.487325053085283e-07, "loss": 0.465, "step": 7095 }, { "epoch": 3.520292912994911, "grad_norm": 0.07288424413448973, "learning_rate": 3.480156474604063e-07, "loss": 0.4483, "step": 7096 }, { "epoch": 3.5207893756981505, "grad_norm": 0.07327517647672152, "learning_rate": 3.472995005935037e-07, "loss": 0.4246, "step": 7097 }, { "epoch": 3.52128583840139, "grad_norm": 0.07373687082587846, "learning_rate": 3.465840648172719e-07, "loss": 0.4583, "step": 7098 }, { "epoch": 3.5217823011046296, "grad_norm": 0.07400232258061422, "learning_rate": 3.45869340241054e-07, "loss": 0.4386, "step": 7099 }, { "epoch": 3.522278763807869, "grad_norm": 0.07429626916320163, "learning_rate": 3.451553269740848e-07, "loss": 0.4362, "step": 7100 }, { "epoch": 3.5227752265111083, "grad_norm": 0.07327933612968662, "learning_rate": 3.4444202512548874e-07, "loss": 0.4365, "step": 7101 }, { "epoch": 3.5232716892143476, "grad_norm": 0.07178454799022094, "learning_rate": 3.437294348042819e-07, "loss": 0.4208, "step": 7102 }, { "epoch": 3.5237681519175874, "grad_norm": 0.07363781565375832, "learning_rate": 3.4301755611937435e-07, "loss": 0.419, "step": 7103 }, { "epoch": 3.5242646146208267, "grad_norm": 0.07279288571151912, "learning_rate": 3.423063891795647e-07, "loss": 0.4364, "step": 7104 }, { "epoch": 3.524761077324066, "grad_norm": 0.07073245127069415, "learning_rate": 3.415959340935415e-07, "loss": 0.4227, "step": 7105 }, { "epoch": 3.5252575400273054, "grad_norm": 0.07027468642692776, "learning_rate": 3.408861909698896e-07, "loss": 0.4351, "step": 7106 }, { "epoch": 3.5257540027305447, "grad_norm": 0.07470813023985937, "learning_rate": 3.401771599170789e-07, "loss": 0.4514, "step": 7107 }, { "epoch": 3.526250465433784, "grad_norm": 0.07204009719364936, "learning_rate": 3.3946884104347543e-07, "loss": 0.438, "step": 7108 }, { "epoch": 3.526746928137024, "grad_norm": 0.07185790187129391, "learning_rate": 3.3876123445733376e-07, "loss": 0.4442, "step": 7109 }, { "epoch": 3.527243390840263, "grad_norm": 0.07484063183218079, "learning_rate": 3.380543402667996e-07, "loss": 0.4577, "step": 7110 }, { "epoch": 3.5277398535435025, "grad_norm": 0.07228052121532155, "learning_rate": 3.3734815857991155e-07, "loss": 0.4441, "step": 7111 }, { "epoch": 3.528236316246742, "grad_norm": 0.07094925816469587, "learning_rate": 3.366426895045966e-07, "loss": 0.4406, "step": 7112 }, { "epoch": 3.5287327789499816, "grad_norm": 0.06883784070072635, "learning_rate": 3.359379331486762e-07, "loss": 0.424, "step": 7113 }, { "epoch": 3.529229241653221, "grad_norm": 0.07220676053662756, "learning_rate": 3.352338896198598e-07, "loss": 0.4723, "step": 7114 }, { "epoch": 3.5297257043564603, "grad_norm": 0.07167224348889963, "learning_rate": 3.3453055902574915e-07, "loss": 0.4114, "step": 7115 }, { "epoch": 3.5302221670596996, "grad_norm": 0.07542897494207551, "learning_rate": 3.3382794147383877e-07, "loss": 0.4556, "step": 7116 }, { "epoch": 3.530718629762939, "grad_norm": 0.07316883893388977, "learning_rate": 3.3312603707151006e-07, "loss": 0.4451, "step": 7117 }, { "epoch": 3.5312150924661783, "grad_norm": 0.07512076327321777, "learning_rate": 3.324248459260393e-07, "loss": 0.4601, "step": 7118 }, { "epoch": 3.531711555169418, "grad_norm": 0.0720588479676324, "learning_rate": 3.317243681445914e-07, "loss": 0.427, "step": 7119 }, { "epoch": 3.5322080178726574, "grad_norm": 0.07030183758489095, "learning_rate": 3.310246038342246e-07, "loss": 0.4007, "step": 7120 }, { "epoch": 3.5327044805758967, "grad_norm": 0.07426185203798406, "learning_rate": 3.3032555310188566e-07, "loss": 0.4381, "step": 7121 }, { "epoch": 3.533200943279136, "grad_norm": 0.07266412590830498, "learning_rate": 3.2962721605441227e-07, "loss": 0.4422, "step": 7122 }, { "epoch": 3.533697405982376, "grad_norm": 0.07337165886257081, "learning_rate": 3.289295927985364e-07, "loss": 0.4564, "step": 7123 }, { "epoch": 3.534193868685615, "grad_norm": 0.07660405077180722, "learning_rate": 3.282326834408761e-07, "loss": 0.4615, "step": 7124 }, { "epoch": 3.5346903313888545, "grad_norm": 0.07351730627486232, "learning_rate": 3.2753648808794505e-07, "loss": 0.4277, "step": 7125 }, { "epoch": 3.535186794092094, "grad_norm": 0.0716057122840003, "learning_rate": 3.268410068461447e-07, "loss": 0.4415, "step": 7126 }, { "epoch": 3.535683256795333, "grad_norm": 0.07415164949108556, "learning_rate": 3.261462398217674e-07, "loss": 0.4648, "step": 7127 }, { "epoch": 3.5361797194985725, "grad_norm": 0.06981491738014588, "learning_rate": 3.254521871209981e-07, "loss": 0.4273, "step": 7128 }, { "epoch": 3.5366761822018122, "grad_norm": 0.07090923551472, "learning_rate": 3.247588488499115e-07, "loss": 0.417, "step": 7129 }, { "epoch": 3.5371726449050516, "grad_norm": 0.07266912902613439, "learning_rate": 3.240662251144727e-07, "loss": 0.4403, "step": 7130 }, { "epoch": 3.537669107608291, "grad_norm": 0.07248447075293059, "learning_rate": 3.233743160205388e-07, "loss": 0.4516, "step": 7131 }, { "epoch": 3.5381655703115302, "grad_norm": 0.0741972809931787, "learning_rate": 3.2268312167385687e-07, "loss": 0.4377, "step": 7132 }, { "epoch": 3.53866203301477, "grad_norm": 0.07189591452664673, "learning_rate": 3.2199264218006453e-07, "loss": 0.4497, "step": 7133 }, { "epoch": 3.5391584957180093, "grad_norm": 0.07147144486299516, "learning_rate": 3.213028776446903e-07, "loss": 0.4289, "step": 7134 }, { "epoch": 3.5396549584212487, "grad_norm": 0.07361435471493433, "learning_rate": 3.206138281731547e-07, "loss": 0.4726, "step": 7135 }, { "epoch": 3.540151421124488, "grad_norm": 0.07246314148121354, "learning_rate": 3.1992549387076685e-07, "loss": 0.4396, "step": 7136 }, { "epoch": 3.5406478838277273, "grad_norm": 0.072942581284602, "learning_rate": 3.1923787484272717e-07, "loss": 0.4445, "step": 7137 }, { "epoch": 3.5411443465309667, "grad_norm": 0.07465515413450105, "learning_rate": 3.1855097119412924e-07, "loss": 0.4613, "step": 7138 }, { "epoch": 3.5416408092342064, "grad_norm": 0.07283341180650725, "learning_rate": 3.1786478302995305e-07, "loss": 0.4536, "step": 7139 }, { "epoch": 3.5421372719374458, "grad_norm": 0.07424965634235413, "learning_rate": 3.17179310455073e-07, "loss": 0.4871, "step": 7140 }, { "epoch": 3.542633734640685, "grad_norm": 0.07378328684390752, "learning_rate": 3.164945535742525e-07, "loss": 0.489, "step": 7141 }, { "epoch": 3.5431301973439244, "grad_norm": 0.07472723192771431, "learning_rate": 3.15810512492144e-07, "loss": 0.4809, "step": 7142 }, { "epoch": 3.543626660047164, "grad_norm": 0.07196375675496666, "learning_rate": 3.151271873132944e-07, "loss": 0.466, "step": 7143 }, { "epoch": 3.5441231227504035, "grad_norm": 0.07180812934665125, "learning_rate": 3.1444457814213736e-07, "loss": 0.4587, "step": 7144 }, { "epoch": 3.544619585453643, "grad_norm": 0.07119979957545244, "learning_rate": 3.13762685083e-07, "loss": 0.4392, "step": 7145 }, { "epoch": 3.545116048156882, "grad_norm": 0.07433975376299118, "learning_rate": 3.1308150824009785e-07, "loss": 0.4415, "step": 7146 }, { "epoch": 3.5456125108601215, "grad_norm": 0.07140407776914781, "learning_rate": 3.1240104771753765e-07, "loss": 0.4345, "step": 7147 }, { "epoch": 3.546108973563361, "grad_norm": 0.07269789822773806, "learning_rate": 3.1172130361931894e-07, "loss": 0.4347, "step": 7148 }, { "epoch": 3.5466054362666006, "grad_norm": 0.07110519917373652, "learning_rate": 3.1104227604932644e-07, "loss": 0.4149, "step": 7149 }, { "epoch": 3.54710189896984, "grad_norm": 0.07206882812848532, "learning_rate": 3.10363965111341e-07, "loss": 0.4277, "step": 7150 }, { "epoch": 3.5475983616730793, "grad_norm": 0.0732811113576226, "learning_rate": 3.096863709090303e-07, "loss": 0.4377, "step": 7151 }, { "epoch": 3.5480948243763186, "grad_norm": 0.0750770669128101, "learning_rate": 3.0900949354595535e-07, "loss": 0.4593, "step": 7152 }, { "epoch": 3.5485912870795584, "grad_norm": 0.07486470898140467, "learning_rate": 3.0833333312556446e-07, "loss": 0.4725, "step": 7153 }, { "epoch": 3.5490877497827977, "grad_norm": 0.07116823477747528, "learning_rate": 3.076578897511978e-07, "loss": 0.4339, "step": 7154 }, { "epoch": 3.549584212486037, "grad_norm": 0.07283679758350045, "learning_rate": 3.069831635260878e-07, "loss": 0.4172, "step": 7155 }, { "epoch": 3.5500806751892764, "grad_norm": 0.07304141500792913, "learning_rate": 3.0630915455335365e-07, "loss": 0.4604, "step": 7156 }, { "epoch": 3.5505771378925157, "grad_norm": 0.07196716548331811, "learning_rate": 3.0563586293600846e-07, "loss": 0.4383, "step": 7157 }, { "epoch": 3.551073600595755, "grad_norm": 0.07490926531623762, "learning_rate": 3.049632887769527e-07, "loss": 0.4298, "step": 7158 }, { "epoch": 3.551570063298995, "grad_norm": 0.07673976524598099, "learning_rate": 3.0429143217897863e-07, "loss": 0.484, "step": 7159 }, { "epoch": 3.552066526002234, "grad_norm": 0.0718037382316523, "learning_rate": 3.0362029324477015e-07, "loss": 0.4102, "step": 7160 }, { "epoch": 3.5525629887054735, "grad_norm": 0.07090278460304152, "learning_rate": 3.0294987207689805e-07, "loss": 0.443, "step": 7161 }, { "epoch": 3.553059451408713, "grad_norm": 0.0701796762434687, "learning_rate": 3.022801687778276e-07, "loss": 0.4085, "step": 7162 }, { "epoch": 3.5535559141119526, "grad_norm": 0.07463259506007328, "learning_rate": 3.0161118344991083e-07, "loss": 0.4718, "step": 7163 }, { "epoch": 3.554052376815192, "grad_norm": 0.07268649175220862, "learning_rate": 3.0094291619539084e-07, "loss": 0.4477, "step": 7164 }, { "epoch": 3.5545488395184313, "grad_norm": 0.07293852406475182, "learning_rate": 3.0027536711640436e-07, "loss": 0.4708, "step": 7165 }, { "epoch": 3.5550453022216706, "grad_norm": 0.07040439954277569, "learning_rate": 2.99608536314972e-07, "loss": 0.4195, "step": 7166 }, { "epoch": 3.55554176492491, "grad_norm": 0.07409754906539101, "learning_rate": 2.9894242389301053e-07, "loss": 0.4622, "step": 7167 }, { "epoch": 3.5560382276281493, "grad_norm": 0.07423989038436342, "learning_rate": 2.982770299523241e-07, "loss": 0.4518, "step": 7168 }, { "epoch": 3.5565346903313886, "grad_norm": 0.07487884963289175, "learning_rate": 2.976123545946064e-07, "loss": 0.4503, "step": 7169 }, { "epoch": 3.5570311530346284, "grad_norm": 0.07367120972684456, "learning_rate": 2.969483979214438e-07, "loss": 0.4417, "step": 7170 }, { "epoch": 3.5575276157378677, "grad_norm": 0.07103468441793306, "learning_rate": 2.9628516003430974e-07, "loss": 0.4461, "step": 7171 }, { "epoch": 3.558024078441107, "grad_norm": 0.07187761985962852, "learning_rate": 2.9562264103457196e-07, "loss": 0.4685, "step": 7172 }, { "epoch": 3.558520541144347, "grad_norm": 0.07424526932350758, "learning_rate": 2.9496084102348443e-07, "loss": 0.4502, "step": 7173 }, { "epoch": 3.559017003847586, "grad_norm": 0.07072411570855557, "learning_rate": 2.942997601021924e-07, "loss": 0.4137, "step": 7174 }, { "epoch": 3.5595134665508255, "grad_norm": 0.0724840565932664, "learning_rate": 2.936393983717323e-07, "loss": 0.4323, "step": 7175 }, { "epoch": 3.560009929254065, "grad_norm": 0.07234915777182487, "learning_rate": 2.929797559330283e-07, "loss": 0.4621, "step": 7176 }, { "epoch": 3.560506391957304, "grad_norm": 0.07094228491783852, "learning_rate": 2.9232083288689814e-07, "loss": 0.4311, "step": 7177 }, { "epoch": 3.5610028546605434, "grad_norm": 0.07285222821523794, "learning_rate": 2.916626293340474e-07, "loss": 0.4496, "step": 7178 }, { "epoch": 3.561499317363783, "grad_norm": 0.073098707674849, "learning_rate": 2.910051453750701e-07, "loss": 0.4304, "step": 7179 }, { "epoch": 3.5619957800670226, "grad_norm": 0.07327812181997499, "learning_rate": 2.9034838111045406e-07, "loss": 0.4383, "step": 7180 }, { "epoch": 3.562492242770262, "grad_norm": 0.07343319803974091, "learning_rate": 2.896923366405746e-07, "loss": 0.4452, "step": 7181 }, { "epoch": 3.562988705473501, "grad_norm": 0.07205469997223532, "learning_rate": 2.890370120656971e-07, "loss": 0.419, "step": 7182 }, { "epoch": 3.563485168176741, "grad_norm": 0.07112166085797217, "learning_rate": 2.8838240748597757e-07, "loss": 0.4192, "step": 7183 }, { "epoch": 3.5639816308799803, "grad_norm": 0.0724456402776545, "learning_rate": 2.87728523001462e-07, "loss": 0.4247, "step": 7184 }, { "epoch": 3.5644780935832197, "grad_norm": 0.07155775449072983, "learning_rate": 2.8707535871208667e-07, "loss": 0.4436, "step": 7185 }, { "epoch": 3.564974556286459, "grad_norm": 0.0722910340184464, "learning_rate": 2.864229147176761e-07, "loss": 0.4576, "step": 7186 }, { "epoch": 3.5654710189896983, "grad_norm": 0.07227263041241058, "learning_rate": 2.8577119111794725e-07, "loss": 0.4493, "step": 7187 }, { "epoch": 3.5659674816929376, "grad_norm": 0.0742062560955642, "learning_rate": 2.851201880125043e-07, "loss": 0.4319, "step": 7188 }, { "epoch": 3.566463944396177, "grad_norm": 0.07020998189715937, "learning_rate": 2.8446990550084373e-07, "loss": 0.4276, "step": 7189 }, { "epoch": 3.5669604070994168, "grad_norm": 0.0719695731390821, "learning_rate": 2.8382034368235003e-07, "loss": 0.4354, "step": 7190 }, { "epoch": 3.567456869802656, "grad_norm": 0.07144612364466402, "learning_rate": 2.8317150265629813e-07, "loss": 0.4308, "step": 7191 }, { "epoch": 3.5679533325058954, "grad_norm": 0.07389313275934502, "learning_rate": 2.825233825218543e-07, "loss": 0.4377, "step": 7192 }, { "epoch": 3.568449795209135, "grad_norm": 0.07368487689346809, "learning_rate": 2.818759833780721e-07, "loss": 0.456, "step": 7193 }, { "epoch": 3.5689462579123745, "grad_norm": 0.07254773669872457, "learning_rate": 2.8122930532389683e-07, "loss": 0.4253, "step": 7194 }, { "epoch": 3.569442720615614, "grad_norm": 0.07141227637028748, "learning_rate": 2.8058334845816214e-07, "loss": 0.4265, "step": 7195 }, { "epoch": 3.569939183318853, "grad_norm": 0.07158291382565189, "learning_rate": 2.799381128795925e-07, "loss": 0.4584, "step": 7196 }, { "epoch": 3.5704356460220925, "grad_norm": 0.07470124461409806, "learning_rate": 2.7929359868680283e-07, "loss": 0.464, "step": 7197 }, { "epoch": 3.570932108725332, "grad_norm": 0.07348846639386283, "learning_rate": 2.7864980597829495e-07, "loss": 0.46, "step": 7198 }, { "epoch": 3.571428571428571, "grad_norm": 0.07182543905438588, "learning_rate": 2.7800673485246343e-07, "loss": 0.462, "step": 7199 }, { "epoch": 3.571925034131811, "grad_norm": 0.07144046247551303, "learning_rate": 2.7736438540759026e-07, "loss": 0.44, "step": 7200 }, { "epoch": 3.5724214968350503, "grad_norm": 0.07192453287481038, "learning_rate": 2.7672275774184967e-07, "loss": 0.4412, "step": 7201 }, { "epoch": 3.5729179595382896, "grad_norm": 0.0729370214192708, "learning_rate": 2.760818519533037e-07, "loss": 0.4589, "step": 7202 }, { "epoch": 3.573414422241529, "grad_norm": 0.07311855870542439, "learning_rate": 2.754416681399041e-07, "loss": 0.4523, "step": 7203 }, { "epoch": 3.5739108849447687, "grad_norm": 0.07161270410308067, "learning_rate": 2.748022063994932e-07, "loss": 0.4334, "step": 7204 }, { "epoch": 3.574407347648008, "grad_norm": 0.0741000290693234, "learning_rate": 2.7416346682980264e-07, "loss": 0.4461, "step": 7205 }, { "epoch": 3.5749038103512474, "grad_norm": 0.07146630300410399, "learning_rate": 2.7352544952845226e-07, "loss": 0.4439, "step": 7206 }, { "epoch": 3.5754002730544867, "grad_norm": 0.07101804224421061, "learning_rate": 2.728881545929546e-07, "loss": 0.4301, "step": 7207 }, { "epoch": 3.575896735757726, "grad_norm": 0.07617485536695313, "learning_rate": 2.722515821207078e-07, "loss": 0.4331, "step": 7208 }, { "epoch": 3.5763931984609654, "grad_norm": 0.07175749391331662, "learning_rate": 2.716157322090041e-07, "loss": 0.4423, "step": 7209 }, { "epoch": 3.576889661164205, "grad_norm": 0.07385981567472476, "learning_rate": 2.709806049550218e-07, "loss": 0.4132, "step": 7210 }, { "epoch": 3.5773861238674445, "grad_norm": 0.07213037434962473, "learning_rate": 2.7034620045582937e-07, "loss": 0.416, "step": 7211 }, { "epoch": 3.577882586570684, "grad_norm": 0.07190591120086846, "learning_rate": 2.6971251880838657e-07, "loss": 0.4106, "step": 7212 }, { "epoch": 3.578379049273923, "grad_norm": 0.07414440862138535, "learning_rate": 2.6907956010954086e-07, "loss": 0.4741, "step": 7213 }, { "epoch": 3.578875511977163, "grad_norm": 0.07271101396039249, "learning_rate": 2.684473244560298e-07, "loss": 0.4323, "step": 7214 }, { "epoch": 3.5793719746804022, "grad_norm": 0.07000619994876894, "learning_rate": 2.678158119444796e-07, "loss": 0.4145, "step": 7215 }, { "epoch": 3.5798684373836416, "grad_norm": 0.0720654558912617, "learning_rate": 2.6718502267140844e-07, "loss": 0.4412, "step": 7216 }, { "epoch": 3.580364900086881, "grad_norm": 0.07265652672638535, "learning_rate": 2.6655495673322205e-07, "loss": 0.4383, "step": 7217 }, { "epoch": 3.5808613627901202, "grad_norm": 0.07569100909955953, "learning_rate": 2.659256142262145e-07, "loss": 0.4671, "step": 7218 }, { "epoch": 3.5813578254933596, "grad_norm": 0.0734469350012236, "learning_rate": 2.652969952465728e-07, "loss": 0.411, "step": 7219 }, { "epoch": 3.5818542881965993, "grad_norm": 0.07126251992525097, "learning_rate": 2.646690998903689e-07, "loss": 0.4144, "step": 7220 }, { "epoch": 3.5823507508998387, "grad_norm": 0.07256713526228342, "learning_rate": 2.640419282535689e-07, "loss": 0.4074, "step": 7221 }, { "epoch": 3.582847213603078, "grad_norm": 0.0753410981599593, "learning_rate": 2.634154804320249e-07, "loss": 0.4538, "step": 7222 }, { "epoch": 3.5833436763063173, "grad_norm": 0.07215766584838977, "learning_rate": 2.6278975652147875e-07, "loss": 0.438, "step": 7223 }, { "epoch": 3.583840139009557, "grad_norm": 0.07323954472644308, "learning_rate": 2.6216475661756336e-07, "loss": 0.4397, "step": 7224 }, { "epoch": 3.5843366017127964, "grad_norm": 0.0764315762368446, "learning_rate": 2.6154048081579897e-07, "loss": 0.4896, "step": 7225 }, { "epoch": 3.5848330644160358, "grad_norm": 0.07373114519490154, "learning_rate": 2.6091692921159705e-07, "loss": 0.4582, "step": 7226 }, { "epoch": 3.585329527119275, "grad_norm": 0.07338166822291779, "learning_rate": 2.602941019002575e-07, "loss": 0.4599, "step": 7227 }, { "epoch": 3.5858259898225144, "grad_norm": 0.07156011806994722, "learning_rate": 2.5967199897696806e-07, "loss": 0.4445, "step": 7228 }, { "epoch": 3.5863224525257538, "grad_norm": 0.07461471442375366, "learning_rate": 2.5905062053680984e-07, "loss": 0.4621, "step": 7229 }, { "epoch": 3.5868189152289935, "grad_norm": 0.07397449325671485, "learning_rate": 2.584299666747475e-07, "loss": 0.4622, "step": 7230 }, { "epoch": 3.587315377932233, "grad_norm": 0.0733072439245948, "learning_rate": 2.578100374856402e-07, "loss": 0.4317, "step": 7231 }, { "epoch": 3.587811840635472, "grad_norm": 0.07337499878496877, "learning_rate": 2.571908330642325e-07, "loss": 0.4868, "step": 7232 }, { "epoch": 3.5883083033387115, "grad_norm": 0.0713814077159316, "learning_rate": 2.5657235350516175e-07, "loss": 0.4651, "step": 7233 }, { "epoch": 3.5888047660419513, "grad_norm": 0.07316923475034787, "learning_rate": 2.5595459890295106e-07, "loss": 0.4265, "step": 7234 }, { "epoch": 3.5893012287451906, "grad_norm": 0.07245152311967211, "learning_rate": 2.553375693520149e-07, "loss": 0.4354, "step": 7235 }, { "epoch": 3.58979769144843, "grad_norm": 0.07337580274838068, "learning_rate": 2.547212649466568e-07, "loss": 0.4466, "step": 7236 }, { "epoch": 3.5902941541516693, "grad_norm": 0.07299768932189318, "learning_rate": 2.541056857810681e-07, "loss": 0.4236, "step": 7237 }, { "epoch": 3.5907906168549086, "grad_norm": 0.07410300395331985, "learning_rate": 2.534908319493312e-07, "loss": 0.4335, "step": 7238 }, { "epoch": 3.591287079558148, "grad_norm": 0.07202380812272681, "learning_rate": 2.528767035454166e-07, "loss": 0.4323, "step": 7239 }, { "epoch": 3.5917835422613877, "grad_norm": 0.07133969448762374, "learning_rate": 2.522633006631825e-07, "loss": 0.4409, "step": 7240 }, { "epoch": 3.592280004964627, "grad_norm": 0.07511748521712497, "learning_rate": 2.516506233963795e-07, "loss": 0.4438, "step": 7241 }, { "epoch": 3.5927764676678664, "grad_norm": 0.07436474096664406, "learning_rate": 2.5103867183864425e-07, "loss": 0.433, "step": 7242 }, { "epoch": 3.5932729303711057, "grad_norm": 0.07322180245682137, "learning_rate": 2.504274460835038e-07, "loss": 0.409, "step": 7243 }, { "epoch": 3.5937693930743455, "grad_norm": 0.07174823455141795, "learning_rate": 2.4981694622437546e-07, "loss": 0.4248, "step": 7244 }, { "epoch": 3.594265855777585, "grad_norm": 0.07219829398726958, "learning_rate": 2.492071723545636e-07, "loss": 0.4469, "step": 7245 }, { "epoch": 3.594762318480824, "grad_norm": 0.07416423979824675, "learning_rate": 2.4859812456726195e-07, "loss": 0.4398, "step": 7246 }, { "epoch": 3.5952587811840635, "grad_norm": 0.072503482237734, "learning_rate": 2.479898029555533e-07, "loss": 0.4255, "step": 7247 }, { "epoch": 3.595755243887303, "grad_norm": 0.07210104204463237, "learning_rate": 2.473822076124116e-07, "loss": 0.4718, "step": 7248 }, { "epoch": 3.596251706590542, "grad_norm": 0.07646935551434451, "learning_rate": 2.4677533863069705e-07, "loss": 0.4709, "step": 7249 }, { "epoch": 3.596748169293782, "grad_norm": 0.07040667756464702, "learning_rate": 2.461691961031587e-07, "loss": 0.4347, "step": 7250 }, { "epoch": 3.5972446319970213, "grad_norm": 0.07550119093373847, "learning_rate": 2.4556378012243807e-07, "loss": 0.4966, "step": 7251 }, { "epoch": 3.5977410947002606, "grad_norm": 0.07199950299189856, "learning_rate": 2.449590907810612e-07, "loss": 0.4439, "step": 7252 }, { "epoch": 3.5982375574035, "grad_norm": 0.0702318610232438, "learning_rate": 2.4435512817144625e-07, "loss": 0.4417, "step": 7253 }, { "epoch": 3.5987340201067397, "grad_norm": 0.07549606737474215, "learning_rate": 2.4375189238589945e-07, "loss": 0.4632, "step": 7254 }, { "epoch": 3.599230482809979, "grad_norm": 0.07479633717728829, "learning_rate": 2.4314938351661486e-07, "loss": 0.4317, "step": 7255 }, { "epoch": 3.5997269455132184, "grad_norm": 0.07362432336615526, "learning_rate": 2.425476016556766e-07, "loss": 0.47, "step": 7256 }, { "epoch": 3.6002234082164577, "grad_norm": 0.07466644313513754, "learning_rate": 2.4194654689505716e-07, "loss": 0.4256, "step": 7257 }, { "epoch": 3.600719870919697, "grad_norm": 0.07352553344538089, "learning_rate": 2.4134621932661916e-07, "loss": 0.4307, "step": 7258 }, { "epoch": 3.6012163336229364, "grad_norm": 0.07137696369934685, "learning_rate": 2.407466190421126e-07, "loss": 0.4212, "step": 7259 }, { "epoch": 3.601712796326176, "grad_norm": 0.07388793112030832, "learning_rate": 2.4014774613317525e-07, "loss": 0.4695, "step": 7260 }, { "epoch": 3.6022092590294155, "grad_norm": 0.07553122083868365, "learning_rate": 2.3954960069133837e-07, "loss": 0.4511, "step": 7261 }, { "epoch": 3.602705721732655, "grad_norm": 0.07526863616354296, "learning_rate": 2.3895218280801547e-07, "loss": 0.432, "step": 7262 }, { "epoch": 3.603202184435894, "grad_norm": 0.07314299399999061, "learning_rate": 2.3835549257451408e-07, "loss": 0.4385, "step": 7263 }, { "epoch": 3.603698647139134, "grad_norm": 0.07240540018544343, "learning_rate": 2.3775953008202847e-07, "loss": 0.4319, "step": 7264 }, { "epoch": 3.6041951098423732, "grad_norm": 0.07352272917174481, "learning_rate": 2.3716429542164244e-07, "loss": 0.4563, "step": 7265 }, { "epoch": 3.6046915725456126, "grad_norm": 0.07016741575846668, "learning_rate": 2.365697886843271e-07, "loss": 0.4203, "step": 7266 }, { "epoch": 3.605188035248852, "grad_norm": 0.0725771299653585, "learning_rate": 2.359760099609437e-07, "loss": 0.4473, "step": 7267 }, { "epoch": 3.6056844979520912, "grad_norm": 0.07475172685609481, "learning_rate": 2.353829593422424e-07, "loss": 0.4599, "step": 7268 }, { "epoch": 3.6061809606553306, "grad_norm": 0.0750639449163914, "learning_rate": 2.3479063691886018e-07, "loss": 0.4261, "step": 7269 }, { "epoch": 3.6066774233585703, "grad_norm": 0.07078876083450575, "learning_rate": 2.3419904278132565e-07, "loss": 0.4384, "step": 7270 }, { "epoch": 3.6071738860618097, "grad_norm": 0.07311201285095503, "learning_rate": 2.3360817702005323e-07, "loss": 0.4862, "step": 7271 }, { "epoch": 3.607670348765049, "grad_norm": 0.07579752198532307, "learning_rate": 2.330180397253473e-07, "loss": 0.457, "step": 7272 }, { "epoch": 3.6081668114682883, "grad_norm": 0.07207424841476477, "learning_rate": 2.3242863098740187e-07, "loss": 0.4296, "step": 7273 }, { "epoch": 3.608663274171528, "grad_norm": 0.07384130182890736, "learning_rate": 2.3183995089629707e-07, "loss": 0.4589, "step": 7274 }, { "epoch": 3.6091597368747674, "grad_norm": 0.07191996401035634, "learning_rate": 2.3125199954200482e-07, "loss": 0.4498, "step": 7275 }, { "epoch": 3.6096561995780068, "grad_norm": 0.07344068487583766, "learning_rate": 2.3066477701438383e-07, "loss": 0.435, "step": 7276 }, { "epoch": 3.610152662281246, "grad_norm": 0.07526670786555902, "learning_rate": 2.3007828340318117e-07, "loss": 0.4632, "step": 7277 }, { "epoch": 3.6106491249844854, "grad_norm": 0.07578434183870061, "learning_rate": 2.2949251879803236e-07, "loss": 0.4655, "step": 7278 }, { "epoch": 3.6111455876877248, "grad_norm": 0.0706605039902432, "learning_rate": 2.28907483288463e-07, "loss": 0.4075, "step": 7279 }, { "epoch": 3.6116420503909645, "grad_norm": 0.07303631071464331, "learning_rate": 2.2832317696388607e-07, "loss": 0.4445, "step": 7280 }, { "epoch": 3.612138513094204, "grad_norm": 0.070072324686116, "learning_rate": 2.2773959991360394e-07, "loss": 0.439, "step": 7281 }, { "epoch": 3.612634975797443, "grad_norm": 0.07386234745655829, "learning_rate": 2.2715675222680588e-07, "loss": 0.4522, "step": 7282 }, { "epoch": 3.6131314385006825, "grad_norm": 0.0719746863855503, "learning_rate": 2.265746339925723e-07, "loss": 0.4513, "step": 7283 }, { "epoch": 3.6136279012039223, "grad_norm": 0.07171417706809774, "learning_rate": 2.2599324529986866e-07, "loss": 0.4085, "step": 7284 }, { "epoch": 3.6141243639071616, "grad_norm": 0.07465108871234516, "learning_rate": 2.2541258623755334e-07, "loss": 0.4803, "step": 7285 }, { "epoch": 3.614620826610401, "grad_norm": 0.07537298779351806, "learning_rate": 2.2483265689436929e-07, "loss": 0.4569, "step": 7286 }, { "epoch": 3.6151172893136403, "grad_norm": 0.07750838694730808, "learning_rate": 2.2425345735894888e-07, "loss": 0.4839, "step": 7287 }, { "epoch": 3.6156137520168796, "grad_norm": 0.07299486180670603, "learning_rate": 2.2367498771981522e-07, "loss": 0.4417, "step": 7288 }, { "epoch": 3.616110214720119, "grad_norm": 0.07301228623195481, "learning_rate": 2.230972480653759e-07, "loss": 0.4334, "step": 7289 }, { "epoch": 3.6166066774233587, "grad_norm": 0.07226158965409316, "learning_rate": 2.2252023848393144e-07, "loss": 0.4418, "step": 7290 }, { "epoch": 3.617103140126598, "grad_norm": 0.07130018174773109, "learning_rate": 2.219439590636674e-07, "loss": 0.4359, "step": 7291 }, { "epoch": 3.6175996028298374, "grad_norm": 0.07284789034528366, "learning_rate": 2.213684098926583e-07, "loss": 0.4229, "step": 7292 }, { "epoch": 3.6180960655330767, "grad_norm": 0.07327554168996199, "learning_rate": 2.2079359105886989e-07, "loss": 0.4299, "step": 7293 }, { "epoch": 3.6185925282363165, "grad_norm": 0.07272115811559865, "learning_rate": 2.202195026501508e-07, "loss": 0.4454, "step": 7294 }, { "epoch": 3.619088990939556, "grad_norm": 0.07328239249201607, "learning_rate": 2.1964614475424306e-07, "loss": 0.4301, "step": 7295 }, { "epoch": 3.619585453642795, "grad_norm": 0.07206975572008621, "learning_rate": 2.1907351745877437e-07, "loss": 0.4209, "step": 7296 }, { "epoch": 3.6200819163460345, "grad_norm": 0.07177592916609533, "learning_rate": 2.1850162085126303e-07, "loss": 0.4084, "step": 7297 }, { "epoch": 3.620578379049274, "grad_norm": 0.07030122376900125, "learning_rate": 2.179304550191136e-07, "loss": 0.4225, "step": 7298 }, { "epoch": 3.621074841752513, "grad_norm": 0.07258135447311445, "learning_rate": 2.1736002004961898e-07, "loss": 0.4344, "step": 7299 }, { "epoch": 3.621571304455753, "grad_norm": 0.07397369089030137, "learning_rate": 2.167903160299617e-07, "loss": 0.4452, "step": 7300 }, { "epoch": 3.6220677671589923, "grad_norm": 0.06982375268398766, "learning_rate": 2.1622134304721098e-07, "loss": 0.4241, "step": 7301 }, { "epoch": 3.6225642298622316, "grad_norm": 0.07186374761971, "learning_rate": 2.156531011883267e-07, "loss": 0.4109, "step": 7302 }, { "epoch": 3.623060692565471, "grad_norm": 0.07247529471597118, "learning_rate": 2.15085590540155e-07, "loss": 0.4511, "step": 7303 }, { "epoch": 3.6235571552687107, "grad_norm": 0.07458540665068539, "learning_rate": 2.1451881118942975e-07, "loss": 0.4617, "step": 7304 }, { "epoch": 3.62405361797195, "grad_norm": 0.07582139648778306, "learning_rate": 2.1395276322277504e-07, "loss": 0.4627, "step": 7305 }, { "epoch": 3.6245500806751894, "grad_norm": 0.07402453362459176, "learning_rate": 2.1338744672670165e-07, "loss": 0.4924, "step": 7306 }, { "epoch": 3.6250465433784287, "grad_norm": 0.07619031513083764, "learning_rate": 2.1282286178761046e-07, "loss": 0.4432, "step": 7307 }, { "epoch": 3.625543006081668, "grad_norm": 0.07527533962278696, "learning_rate": 2.1225900849178804e-07, "loss": 0.4662, "step": 7308 }, { "epoch": 3.6260394687849073, "grad_norm": 0.07256160181172347, "learning_rate": 2.11695886925411e-07, "loss": 0.4579, "step": 7309 }, { "epoch": 3.6265359314881467, "grad_norm": 0.07459927144810276, "learning_rate": 2.1113349717454267e-07, "loss": 0.4597, "step": 7310 }, { "epoch": 3.6270323941913865, "grad_norm": 0.07210064448559347, "learning_rate": 2.105718393251349e-07, "loss": 0.4384, "step": 7311 }, { "epoch": 3.627528856894626, "grad_norm": 0.07283451452803122, "learning_rate": 2.100109134630296e-07, "loss": 0.4451, "step": 7312 }, { "epoch": 3.628025319597865, "grad_norm": 0.0768403360547824, "learning_rate": 2.094507196739537e-07, "loss": 0.4738, "step": 7313 }, { "epoch": 3.628521782301105, "grad_norm": 0.0744758874009114, "learning_rate": 2.0889125804352595e-07, "loss": 0.4502, "step": 7314 }, { "epoch": 3.6290182450043442, "grad_norm": 0.0716895568860105, "learning_rate": 2.0833252865724907e-07, "loss": 0.4421, "step": 7315 }, { "epoch": 3.6295147077075836, "grad_norm": 0.0726724197567022, "learning_rate": 2.077745316005164e-07, "loss": 0.4341, "step": 7316 }, { "epoch": 3.630011170410823, "grad_norm": 0.07434405417937585, "learning_rate": 2.0721726695860977e-07, "loss": 0.4293, "step": 7317 }, { "epoch": 3.630507633114062, "grad_norm": 0.07143412055686629, "learning_rate": 2.0666073481669714e-07, "loss": 0.4097, "step": 7318 }, { "epoch": 3.6310040958173015, "grad_norm": 0.07276002545567115, "learning_rate": 2.0610493525983544e-07, "loss": 0.4411, "step": 7319 }, { "epoch": 3.631500558520541, "grad_norm": 0.07075461918486434, "learning_rate": 2.0554986837297064e-07, "loss": 0.427, "step": 7320 }, { "epoch": 3.6319970212237807, "grad_norm": 0.07420935677934005, "learning_rate": 2.049955342409349e-07, "loss": 0.426, "step": 7321 }, { "epoch": 3.63249348392702, "grad_norm": 0.06984495924090936, "learning_rate": 2.044419329484504e-07, "loss": 0.4552, "step": 7322 }, { "epoch": 3.6329899466302593, "grad_norm": 0.06939603455821021, "learning_rate": 2.0388906458012503e-07, "loss": 0.4255, "step": 7323 }, { "epoch": 3.633486409333499, "grad_norm": 0.07035364818165041, "learning_rate": 2.0333692922045623e-07, "loss": 0.4284, "step": 7324 }, { "epoch": 3.6339828720367384, "grad_norm": 0.07382774656321925, "learning_rate": 2.0278552695383036e-07, "loss": 0.4388, "step": 7325 }, { "epoch": 3.6344793347399778, "grad_norm": 0.07532508040952224, "learning_rate": 2.022348578645178e-07, "loss": 0.4571, "step": 7326 }, { "epoch": 3.634975797443217, "grad_norm": 0.07251073324334695, "learning_rate": 2.0168492203668122e-07, "loss": 0.4427, "step": 7327 }, { "epoch": 3.6354722601464564, "grad_norm": 0.0706357298880477, "learning_rate": 2.0113571955436895e-07, "loss": 0.4272, "step": 7328 }, { "epoch": 3.6359687228496957, "grad_norm": 0.07216767093440042, "learning_rate": 2.0058725050151828e-07, "loss": 0.4375, "step": 7329 }, { "epoch": 3.636465185552935, "grad_norm": 0.07330320428541527, "learning_rate": 2.0003951496195385e-07, "loss": 0.4575, "step": 7330 }, { "epoch": 3.636961648256175, "grad_norm": 0.07144970418486908, "learning_rate": 1.9949251301938756e-07, "loss": 0.4409, "step": 7331 }, { "epoch": 3.637458110959414, "grad_norm": 0.07503845282729875, "learning_rate": 1.9894624475742086e-07, "loss": 0.4707, "step": 7332 }, { "epoch": 3.6379545736626535, "grad_norm": 0.07117560793351785, "learning_rate": 1.9840071025954089e-07, "loss": 0.4067, "step": 7333 }, { "epoch": 3.6384510363658933, "grad_norm": 0.07277799392519127, "learning_rate": 1.9785590960912538e-07, "loss": 0.4395, "step": 7334 }, { "epoch": 3.6389474990691326, "grad_norm": 0.07147057359788965, "learning_rate": 1.9731184288943772e-07, "loss": 0.4275, "step": 7335 }, { "epoch": 3.639443961772372, "grad_norm": 0.11171374930937528, "learning_rate": 1.9676851018362865e-07, "loss": 0.4633, "step": 7336 }, { "epoch": 3.6399404244756113, "grad_norm": 0.07646171474556615, "learning_rate": 1.9622591157473946e-07, "loss": 0.4648, "step": 7337 }, { "epoch": 3.6404368871788506, "grad_norm": 0.07372439939792098, "learning_rate": 1.9568404714569666e-07, "loss": 0.453, "step": 7338 }, { "epoch": 3.64093334988209, "grad_norm": 0.07240599601324296, "learning_rate": 1.951429169793162e-07, "loss": 0.4366, "step": 7339 }, { "epoch": 3.6414298125853293, "grad_norm": 0.072422143521238, "learning_rate": 1.9460252115830137e-07, "loss": 0.4423, "step": 7340 }, { "epoch": 3.641926275288569, "grad_norm": 0.07145807098907717, "learning_rate": 1.940628597652422e-07, "loss": 0.4506, "step": 7341 }, { "epoch": 3.6424227379918084, "grad_norm": 0.07421632819020778, "learning_rate": 1.9352393288261717e-07, "loss": 0.4659, "step": 7342 }, { "epoch": 3.6429192006950477, "grad_norm": 0.07249759916938332, "learning_rate": 1.9298574059279263e-07, "loss": 0.4437, "step": 7343 }, { "epoch": 3.643415663398287, "grad_norm": 0.07546293310432843, "learning_rate": 1.9244828297802386e-07, "loss": 0.4802, "step": 7344 }, { "epoch": 3.643912126101527, "grad_norm": 0.07424234347976809, "learning_rate": 1.919115601204513e-07, "loss": 0.4367, "step": 7345 }, { "epoch": 3.644408588804766, "grad_norm": 0.07359058194638433, "learning_rate": 1.9137557210210544e-07, "loss": 0.4675, "step": 7346 }, { "epoch": 3.6449050515080055, "grad_norm": 0.07322009392359763, "learning_rate": 1.9084031900490297e-07, "loss": 0.4305, "step": 7347 }, { "epoch": 3.645401514211245, "grad_norm": 0.07376178653192299, "learning_rate": 1.9030580091064787e-07, "loss": 0.4631, "step": 7348 }, { "epoch": 3.645897976914484, "grad_norm": 0.07327292909717786, "learning_rate": 1.8977201790103428e-07, "loss": 0.424, "step": 7349 }, { "epoch": 3.6463944396177235, "grad_norm": 0.07114499977321155, "learning_rate": 1.892389700576408e-07, "loss": 0.426, "step": 7350 }, { "epoch": 3.6468909023209632, "grad_norm": 0.069110037267425, "learning_rate": 1.8870665746193672e-07, "loss": 0.4088, "step": 7351 }, { "epoch": 3.6473873650242026, "grad_norm": 0.07252850374903859, "learning_rate": 1.8817508019527696e-07, "loss": 0.4345, "step": 7352 }, { "epoch": 3.647883827727442, "grad_norm": 0.07267951521063351, "learning_rate": 1.8764423833890434e-07, "loss": 0.4111, "step": 7353 }, { "epoch": 3.6483802904306812, "grad_norm": 0.07179521032898407, "learning_rate": 1.8711413197394944e-07, "loss": 0.4216, "step": 7354 }, { "epoch": 3.648876753133921, "grad_norm": 0.07389459033510468, "learning_rate": 1.8658476118143086e-07, "loss": 0.4613, "step": 7355 }, { "epoch": 3.6493732158371603, "grad_norm": 0.0723475247866929, "learning_rate": 1.8605612604225388e-07, "loss": 0.4221, "step": 7356 }, { "epoch": 3.6498696785403997, "grad_norm": 0.07341268249466791, "learning_rate": 1.8552822663721382e-07, "loss": 0.4354, "step": 7357 }, { "epoch": 3.650366141243639, "grad_norm": 0.07049114822555134, "learning_rate": 1.850010630469884e-07, "loss": 0.4192, "step": 7358 }, { "epoch": 3.6508626039468783, "grad_norm": 0.07264265024902371, "learning_rate": 1.8447463535214872e-07, "loss": 0.4275, "step": 7359 }, { "epoch": 3.6513590666501177, "grad_norm": 0.06929144716295033, "learning_rate": 1.839489436331493e-07, "loss": 0.433, "step": 7360 }, { "epoch": 3.6518555293533574, "grad_norm": 0.07198194923441253, "learning_rate": 1.8342398797033479e-07, "loss": 0.4572, "step": 7361 }, { "epoch": 3.6523519920565968, "grad_norm": 0.07200270067094541, "learning_rate": 1.8289976844393599e-07, "loss": 0.4322, "step": 7362 }, { "epoch": 3.652848454759836, "grad_norm": 0.06941931192926973, "learning_rate": 1.8237628513407046e-07, "loss": 0.4091, "step": 7363 }, { "epoch": 3.6533449174630754, "grad_norm": 0.07070649329985829, "learning_rate": 1.818535381207459e-07, "loss": 0.4351, "step": 7364 }, { "epoch": 3.653841380166315, "grad_norm": 0.07445472823058359, "learning_rate": 1.8133152748385397e-07, "loss": 0.4628, "step": 7365 }, { "epoch": 3.6543378428695545, "grad_norm": 0.07447413719165662, "learning_rate": 1.8081025330317748e-07, "loss": 0.4695, "step": 7366 }, { "epoch": 3.654834305572794, "grad_norm": 0.07359381894990621, "learning_rate": 1.8028971565838381e-07, "loss": 0.437, "step": 7367 }, { "epoch": 3.655330768276033, "grad_norm": 0.07046676873498374, "learning_rate": 1.7976991462902827e-07, "loss": 0.447, "step": 7368 }, { "epoch": 3.6558272309792725, "grad_norm": 0.07269725462449012, "learning_rate": 1.7925085029455558e-07, "loss": 0.4503, "step": 7369 }, { "epoch": 3.656323693682512, "grad_norm": 0.07852134438582638, "learning_rate": 1.787325227342951e-07, "loss": 0.4914, "step": 7370 }, { "epoch": 3.6568201563857516, "grad_norm": 0.07509607274320566, "learning_rate": 1.7821493202746565e-07, "loss": 0.4357, "step": 7371 }, { "epoch": 3.657316619088991, "grad_norm": 0.07142845307398488, "learning_rate": 1.7769807825317232e-07, "loss": 0.4394, "step": 7372 }, { "epoch": 3.6578130817922303, "grad_norm": 0.07178759846116196, "learning_rate": 1.77181961490408e-07, "loss": 0.4345, "step": 7373 }, { "epoch": 3.6583095444954696, "grad_norm": 0.07158932483763085, "learning_rate": 1.7666658181805295e-07, "loss": 0.434, "step": 7374 }, { "epoch": 3.6588060071987094, "grad_norm": 0.07280743494373647, "learning_rate": 1.7615193931487417e-07, "loss": 0.4337, "step": 7375 }, { "epoch": 3.6593024699019487, "grad_norm": 0.07453278405125927, "learning_rate": 1.7563803405952761e-07, "loss": 0.4368, "step": 7376 }, { "epoch": 3.659798932605188, "grad_norm": 0.07010280774336348, "learning_rate": 1.751248661305538e-07, "loss": 0.4187, "step": 7377 }, { "epoch": 3.6602953953084274, "grad_norm": 0.0726927043570313, "learning_rate": 1.7461243560638442e-07, "loss": 0.4559, "step": 7378 }, { "epoch": 3.6607918580116667, "grad_norm": 0.07233361327364231, "learning_rate": 1.741007425653346e-07, "loss": 0.4505, "step": 7379 }, { "epoch": 3.661288320714906, "grad_norm": 0.07363675822337719, "learning_rate": 1.7358978708560848e-07, "loss": 0.4361, "step": 7380 }, { "epoch": 3.661784783418146, "grad_norm": 0.07199309976813066, "learning_rate": 1.73079569245298e-07, "loss": 0.4289, "step": 7381 }, { "epoch": 3.662281246121385, "grad_norm": 0.07317353186147292, "learning_rate": 1.7257008912238138e-07, "loss": 0.4487, "step": 7382 }, { "epoch": 3.6627777088246245, "grad_norm": 0.07372588029874809, "learning_rate": 1.720613467947252e-07, "loss": 0.4389, "step": 7383 }, { "epoch": 3.663274171527864, "grad_norm": 0.07320921827857442, "learning_rate": 1.715533423400817e-07, "loss": 0.4193, "step": 7384 }, { "epoch": 3.6637706342311036, "grad_norm": 0.07522855796845176, "learning_rate": 1.7104607583609157e-07, "loss": 0.4581, "step": 7385 }, { "epoch": 3.664267096934343, "grad_norm": 0.0734689312284619, "learning_rate": 1.7053954736028222e-07, "loss": 0.4576, "step": 7386 }, { "epoch": 3.6647635596375823, "grad_norm": 0.07075618371467093, "learning_rate": 1.700337569900684e-07, "loss": 0.4227, "step": 7387 }, { "epoch": 3.6652600223408216, "grad_norm": 0.06987389893800902, "learning_rate": 1.6952870480275273e-07, "loss": 0.4298, "step": 7388 }, { "epoch": 3.665756485044061, "grad_norm": 0.07197270042011136, "learning_rate": 1.6902439087552402e-07, "loss": 0.4571, "step": 7389 }, { "epoch": 3.6662529477473003, "grad_norm": 0.07106276503884373, "learning_rate": 1.6852081528545838e-07, "loss": 0.4175, "step": 7390 }, { "epoch": 3.66674941045054, "grad_norm": 0.07285710553611387, "learning_rate": 1.680179781095187e-07, "loss": 0.4527, "step": 7391 }, { "epoch": 3.6672458731537794, "grad_norm": 0.0737065341456534, "learning_rate": 1.6751587942455627e-07, "loss": 0.4609, "step": 7392 }, { "epoch": 3.6677423358570187, "grad_norm": 0.07056335676130286, "learning_rate": 1.670145193073086e-07, "loss": 0.4241, "step": 7393 }, { "epoch": 3.668238798560258, "grad_norm": 0.07405405366597342, "learning_rate": 1.665138978344011e-07, "loss": 0.4321, "step": 7394 }, { "epoch": 3.668735261263498, "grad_norm": 0.07170622222913631, "learning_rate": 1.6601401508234417e-07, "loss": 0.4267, "step": 7395 }, { "epoch": 3.669231723966737, "grad_norm": 0.07178680400866755, "learning_rate": 1.6551487112753893e-07, "loss": 0.4264, "step": 7396 }, { "epoch": 3.6697281866699765, "grad_norm": 0.0721285124574552, "learning_rate": 1.6501646604626997e-07, "loss": 0.4489, "step": 7397 }, { "epoch": 3.670224649373216, "grad_norm": 0.07082806212736886, "learning_rate": 1.6451879991471186e-07, "loss": 0.4538, "step": 7398 }, { "epoch": 3.670721112076455, "grad_norm": 0.0729535155041323, "learning_rate": 1.640218728089238e-07, "loss": 0.4467, "step": 7399 }, { "epoch": 3.6712175747796945, "grad_norm": 0.07230176500116002, "learning_rate": 1.6352568480485277e-07, "loss": 0.4326, "step": 7400 }, { "epoch": 3.6717140374829342, "grad_norm": 0.07345615326458771, "learning_rate": 1.6303023597833478e-07, "loss": 0.462, "step": 7401 }, { "epoch": 3.6722105001861736, "grad_norm": 0.07151370299052442, "learning_rate": 1.6253552640508985e-07, "loss": 0.4369, "step": 7402 }, { "epoch": 3.672706962889413, "grad_norm": 0.07148248573684546, "learning_rate": 1.6204155616072693e-07, "loss": 0.4302, "step": 7403 }, { "epoch": 3.6732034255926522, "grad_norm": 0.07043625217762424, "learning_rate": 1.615483253207417e-07, "loss": 0.4506, "step": 7404 }, { "epoch": 3.673699888295892, "grad_norm": 0.0728731177103944, "learning_rate": 1.610558339605156e-07, "loss": 0.4342, "step": 7405 }, { "epoch": 3.6741963509991313, "grad_norm": 0.07369450751879054, "learning_rate": 1.6056408215532005e-07, "loss": 0.4324, "step": 7406 }, { "epoch": 3.6746928137023707, "grad_norm": 0.07185327188054835, "learning_rate": 1.600730699803088e-07, "loss": 0.4597, "step": 7407 }, { "epoch": 3.67518927640561, "grad_norm": 0.07372288477305014, "learning_rate": 1.5958279751052686e-07, "loss": 0.4277, "step": 7408 }, { "epoch": 3.6756857391088493, "grad_norm": 0.07364912954293501, "learning_rate": 1.5909326482090371e-07, "loss": 0.4667, "step": 7409 }, { "epoch": 3.6761822018120887, "grad_norm": 0.07528543823766569, "learning_rate": 1.5860447198625784e-07, "loss": 0.4458, "step": 7410 }, { "epoch": 3.6766786645153284, "grad_norm": 0.07312472664500712, "learning_rate": 1.5811641908129226e-07, "loss": 0.4478, "step": 7411 }, { "epoch": 3.6771751272185678, "grad_norm": 0.07443070106907346, "learning_rate": 1.576291061805979e-07, "loss": 0.4292, "step": 7412 }, { "epoch": 3.677671589921807, "grad_norm": 0.07226946918899885, "learning_rate": 1.571425333586535e-07, "loss": 0.4423, "step": 7413 }, { "epoch": 3.6781680526250464, "grad_norm": 0.07206464093796312, "learning_rate": 1.5665670068982286e-07, "loss": 0.4505, "step": 7414 }, { "epoch": 3.678664515328286, "grad_norm": 0.07188587481501309, "learning_rate": 1.5617160824835942e-07, "loss": 0.4254, "step": 7415 }, { "epoch": 3.6791609780315255, "grad_norm": 0.07265685993339371, "learning_rate": 1.556872561084005e-07, "loss": 0.415, "step": 7416 }, { "epoch": 3.679657440734765, "grad_norm": 0.07162493038184804, "learning_rate": 1.552036443439714e-07, "loss": 0.4041, "step": 7417 }, { "epoch": 3.680153903438004, "grad_norm": 0.07155610729032282, "learning_rate": 1.5472077302898515e-07, "loss": 0.4076, "step": 7418 }, { "epoch": 3.6806503661412435, "grad_norm": 0.07355344416089545, "learning_rate": 1.542386422372405e-07, "loss": 0.4398, "step": 7419 }, { "epoch": 3.681146828844483, "grad_norm": 0.07116724820478201, "learning_rate": 1.5375725204242407e-07, "loss": 0.4096, "step": 7420 }, { "epoch": 3.6816432915477226, "grad_norm": 0.07241791000339823, "learning_rate": 1.532766025181076e-07, "loss": 0.4413, "step": 7421 }, { "epoch": 3.682139754250962, "grad_norm": 0.07047095111897257, "learning_rate": 1.5279669373775118e-07, "loss": 0.4271, "step": 7422 }, { "epoch": 3.6826362169542013, "grad_norm": 0.07313074541716791, "learning_rate": 1.523175257747017e-07, "loss": 0.4202, "step": 7423 }, { "epoch": 3.6831326796574406, "grad_norm": 0.0728777056119168, "learning_rate": 1.518390987021906e-07, "loss": 0.4475, "step": 7424 }, { "epoch": 3.6836291423606804, "grad_norm": 0.07442455746276211, "learning_rate": 1.5136141259333992e-07, "loss": 0.4543, "step": 7425 }, { "epoch": 3.6841256050639197, "grad_norm": 0.07267058678491543, "learning_rate": 1.5088446752115403e-07, "loss": 0.4238, "step": 7426 }, { "epoch": 3.684622067767159, "grad_norm": 0.07096583850524037, "learning_rate": 1.504082635585291e-07, "loss": 0.4389, "step": 7427 }, { "epoch": 3.6851185304703984, "grad_norm": 0.07447238809266303, "learning_rate": 1.49932800778243e-07, "loss": 0.4358, "step": 7428 }, { "epoch": 3.6856149931736377, "grad_norm": 0.0726313401883301, "learning_rate": 1.494580792529632e-07, "loss": 0.4654, "step": 7429 }, { "epoch": 3.686111455876877, "grad_norm": 0.07211262857098678, "learning_rate": 1.4898409905524436e-07, "loss": 0.4304, "step": 7430 }, { "epoch": 3.686607918580117, "grad_norm": 0.07363037113750746, "learning_rate": 1.4851086025752525e-07, "loss": 0.4148, "step": 7431 }, { "epoch": 3.687104381283356, "grad_norm": 0.07434935712528255, "learning_rate": 1.4803836293213303e-07, "loss": 0.4506, "step": 7432 }, { "epoch": 3.6876008439865955, "grad_norm": 0.07194422077130175, "learning_rate": 1.4756660715128267e-07, "loss": 0.4413, "step": 7433 }, { "epoch": 3.688097306689835, "grad_norm": 0.07294248103909735, "learning_rate": 1.4709559298707265e-07, "loss": 0.4236, "step": 7434 }, { "epoch": 3.6885937693930746, "grad_norm": 0.07226694533909107, "learning_rate": 1.4662532051149149e-07, "loss": 0.4567, "step": 7435 }, { "epoch": 3.689090232096314, "grad_norm": 0.07333144120478627, "learning_rate": 1.4615578979641164e-07, "loss": 0.4091, "step": 7436 }, { "epoch": 3.6895866947995533, "grad_norm": 0.07599661651446776, "learning_rate": 1.4568700091359412e-07, "loss": 0.4766, "step": 7437 }, { "epoch": 3.6900831575027926, "grad_norm": 0.07267348396613163, "learning_rate": 1.45218953934686e-07, "loss": 0.4541, "step": 7438 }, { "epoch": 3.690579620206032, "grad_norm": 0.07154268790112295, "learning_rate": 1.4475164893121952e-07, "loss": 0.445, "step": 7439 }, { "epoch": 3.6910760829092712, "grad_norm": 0.07736575888556468, "learning_rate": 1.4428508597461587e-07, "loss": 0.4725, "step": 7440 }, { "epoch": 3.691572545612511, "grad_norm": 0.07338375042541488, "learning_rate": 1.4381926513618139e-07, "loss": 0.4707, "step": 7441 }, { "epoch": 3.6920690083157504, "grad_norm": 0.07366882489429853, "learning_rate": 1.4335418648710907e-07, "loss": 0.4544, "step": 7442 }, { "epoch": 3.6925654710189897, "grad_norm": 0.07366490558844847, "learning_rate": 1.4288985009847932e-07, "loss": 0.4729, "step": 7443 }, { "epoch": 3.693061933722229, "grad_norm": 0.07561083067989416, "learning_rate": 1.4242625604125758e-07, "loss": 0.4583, "step": 7444 }, { "epoch": 3.693558396425469, "grad_norm": 0.07254485416644309, "learning_rate": 1.4196340438629774e-07, "loss": 0.4202, "step": 7445 }, { "epoch": 3.694054859128708, "grad_norm": 0.07400616431660505, "learning_rate": 1.415012952043382e-07, "loss": 0.4553, "step": 7446 }, { "epoch": 3.6945513218319475, "grad_norm": 0.07556294028601898, "learning_rate": 1.4103992856600634e-07, "loss": 0.4469, "step": 7447 }, { "epoch": 3.695047784535187, "grad_norm": 0.07015152787078588, "learning_rate": 1.4057930454181412e-07, "loss": 0.4226, "step": 7448 }, { "epoch": 3.695544247238426, "grad_norm": 0.07176355041550919, "learning_rate": 1.4011942320215964e-07, "loss": 0.4476, "step": 7449 }, { "epoch": 3.6960407099416654, "grad_norm": 0.0723388702967807, "learning_rate": 1.396602846173295e-07, "loss": 0.4106, "step": 7450 }, { "epoch": 3.696537172644905, "grad_norm": 0.07139271451498826, "learning_rate": 1.3920188885749475e-07, "loss": 0.4268, "step": 7451 }, { "epoch": 3.6970336353481446, "grad_norm": 0.0730447884583585, "learning_rate": 1.3874423599271435e-07, "loss": 0.4533, "step": 7452 }, { "epoch": 3.697530098051384, "grad_norm": 0.07339909768171726, "learning_rate": 1.3828732609293404e-07, "loss": 0.4243, "step": 7453 }, { "epoch": 3.698026560754623, "grad_norm": 0.07116727161227962, "learning_rate": 1.378311592279835e-07, "loss": 0.4267, "step": 7454 }, { "epoch": 3.698523023457863, "grad_norm": 0.07034248657623826, "learning_rate": 1.3737573546758198e-07, "loss": 0.4024, "step": 7455 }, { "epoch": 3.6990194861611023, "grad_norm": 0.07127374652005736, "learning_rate": 1.3692105488133211e-07, "loss": 0.4129, "step": 7456 }, { "epoch": 3.6995159488643417, "grad_norm": 0.07375826456205288, "learning_rate": 1.364671175387261e-07, "loss": 0.4344, "step": 7457 }, { "epoch": 3.700012411567581, "grad_norm": 0.07357945355081973, "learning_rate": 1.3601392350913957e-07, "loss": 0.4299, "step": 7458 }, { "epoch": 3.7005088742708203, "grad_norm": 0.06876318428670228, "learning_rate": 1.3556147286183762e-07, "loss": 0.4066, "step": 7459 }, { "epoch": 3.7010053369740596, "grad_norm": 0.07282356150500176, "learning_rate": 1.3510976566596946e-07, "loss": 0.4544, "step": 7460 }, { "epoch": 3.701501799677299, "grad_norm": 0.07378253520368942, "learning_rate": 1.346588019905698e-07, "loss": 0.4738, "step": 7461 }, { "epoch": 3.7019982623805388, "grad_norm": 0.07330034917523164, "learning_rate": 1.3420858190456353e-07, "loss": 0.4493, "step": 7462 }, { "epoch": 3.702494725083778, "grad_norm": 0.07280228471411862, "learning_rate": 1.3375910547675785e-07, "loss": 0.4559, "step": 7463 }, { "epoch": 3.7029911877870174, "grad_norm": 0.07247121072667194, "learning_rate": 1.333103727758489e-07, "loss": 0.4429, "step": 7464 }, { "epoch": 3.703487650490257, "grad_norm": 0.07188303430763324, "learning_rate": 1.328623838704185e-07, "loss": 0.4578, "step": 7465 }, { "epoch": 3.7039841131934965, "grad_norm": 0.07198733038911835, "learning_rate": 1.3241513882893297e-07, "loss": 0.4456, "step": 7466 }, { "epoch": 3.704480575896736, "grad_norm": 0.07444942706026461, "learning_rate": 1.3196863771974877e-07, "loss": 0.4482, "step": 7467 }, { "epoch": 3.704977038599975, "grad_norm": 0.07331451044578703, "learning_rate": 1.3152288061110518e-07, "loss": 0.4722, "step": 7468 }, { "epoch": 3.7054735013032145, "grad_norm": 0.0711213868596691, "learning_rate": 1.3107786757112827e-07, "loss": 0.4449, "step": 7469 }, { "epoch": 3.705969964006454, "grad_norm": 0.07371066642583825, "learning_rate": 1.3063359866783365e-07, "loss": 0.4485, "step": 7470 }, { "epoch": 3.706466426709693, "grad_norm": 0.07218421529119273, "learning_rate": 1.3019007396911809e-07, "loss": 0.4218, "step": 7471 }, { "epoch": 3.706962889412933, "grad_norm": 0.07372364917278044, "learning_rate": 1.297472935427685e-07, "loss": 0.4183, "step": 7472 }, { "epoch": 3.7074593521161723, "grad_norm": 0.0728757938451939, "learning_rate": 1.2930525745645572e-07, "loss": 0.4456, "step": 7473 }, { "epoch": 3.7079558148194116, "grad_norm": 0.07611551431202836, "learning_rate": 1.2886396577773963e-07, "loss": 0.4598, "step": 7474 }, { "epoch": 3.7084522775226514, "grad_norm": 0.07087427054206022, "learning_rate": 1.284234185740635e-07, "loss": 0.4217, "step": 7475 }, { "epoch": 3.7089487402258907, "grad_norm": 0.07179631403204342, "learning_rate": 1.2798361591275788e-07, "loss": 0.4418, "step": 7476 }, { "epoch": 3.70944520292913, "grad_norm": 0.07054594755591478, "learning_rate": 1.2754455786104015e-07, "loss": 0.4274, "step": 7477 }, { "epoch": 3.7099416656323694, "grad_norm": 0.0753189956312603, "learning_rate": 1.2710624448601216e-07, "loss": 0.4701, "step": 7478 }, { "epoch": 3.7104381283356087, "grad_norm": 0.074245085561885, "learning_rate": 1.2666867585466426e-07, "loss": 0.4503, "step": 7479 }, { "epoch": 3.710934591038848, "grad_norm": 0.07609399666272167, "learning_rate": 1.2623185203387124e-07, "loss": 0.4751, "step": 7480 }, { "epoch": 3.7114310537420874, "grad_norm": 0.0737868894777158, "learning_rate": 1.2579577309039416e-07, "loss": 0.4444, "step": 7481 }, { "epoch": 3.711927516445327, "grad_norm": 0.07286497148501907, "learning_rate": 1.253604390908819e-07, "loss": 0.4249, "step": 7482 }, { "epoch": 3.7124239791485665, "grad_norm": 0.07058913943698754, "learning_rate": 1.249258501018674e-07, "loss": 0.4333, "step": 7483 }, { "epoch": 3.712920441851806, "grad_norm": 0.07179867347108665, "learning_rate": 1.2449200618977087e-07, "loss": 0.407, "step": 7484 }, { "epoch": 3.713416904555045, "grad_norm": 0.07661219061733442, "learning_rate": 1.2405890742089866e-07, "loss": 0.4703, "step": 7485 }, { "epoch": 3.713913367258285, "grad_norm": 0.0743605137172351, "learning_rate": 1.2362655386144285e-07, "loss": 0.469, "step": 7486 }, { "epoch": 3.7144098299615242, "grad_norm": 0.07115634995620582, "learning_rate": 1.2319494557748112e-07, "loss": 0.3979, "step": 7487 }, { "epoch": 3.7149062926647636, "grad_norm": 0.07471501232365835, "learning_rate": 1.2276408263497796e-07, "loss": 0.4321, "step": 7488 }, { "epoch": 3.715402755368003, "grad_norm": 0.07250546434057126, "learning_rate": 1.2233396509978513e-07, "loss": 0.4548, "step": 7489 }, { "epoch": 3.7158992180712422, "grad_norm": 0.07214773644289035, "learning_rate": 1.2190459303763723e-07, "loss": 0.4651, "step": 7490 }, { "epoch": 3.7163956807744816, "grad_norm": 0.0718965188875489, "learning_rate": 1.2147596651415906e-07, "loss": 0.4139, "step": 7491 }, { "epoch": 3.7168921434777213, "grad_norm": 0.07277477056759525, "learning_rate": 1.2104808559485758e-07, "loss": 0.4387, "step": 7492 }, { "epoch": 3.7173886061809607, "grad_norm": 0.07317195671805793, "learning_rate": 1.2062095034512832e-07, "loss": 0.4226, "step": 7493 }, { "epoch": 3.7178850688842, "grad_norm": 0.0733610221756854, "learning_rate": 1.2019456083025184e-07, "loss": 0.4299, "step": 7494 }, { "epoch": 3.7183815315874393, "grad_norm": 0.07031093760031494, "learning_rate": 1.1976891711539485e-07, "loss": 0.4241, "step": 7495 }, { "epoch": 3.718877994290679, "grad_norm": 0.0732484652938987, "learning_rate": 1.193440192656109e-07, "loss": 0.4521, "step": 7496 }, { "epoch": 3.7193744569939184, "grad_norm": 0.0736922672961387, "learning_rate": 1.1891986734583805e-07, "loss": 0.4912, "step": 7497 }, { "epoch": 3.7198709196971578, "grad_norm": 0.07318471954689998, "learning_rate": 1.1849646142090054e-07, "loss": 0.4442, "step": 7498 }, { "epoch": 3.720367382400397, "grad_norm": 0.07406641358059599, "learning_rate": 1.1807380155551107e-07, "loss": 0.4461, "step": 7499 }, { "epoch": 3.7208638451036364, "grad_norm": 0.07588040681542361, "learning_rate": 1.1765188781426406e-07, "loss": 0.4549, "step": 7500 }, { "epoch": 3.7213603078068758, "grad_norm": 0.07167415013929496, "learning_rate": 1.1723072026164462e-07, "loss": 0.4408, "step": 7501 }, { "epoch": 3.7218567705101155, "grad_norm": 0.07461061552940207, "learning_rate": 1.1681029896202011e-07, "loss": 0.4485, "step": 7502 }, { "epoch": 3.722353233213355, "grad_norm": 0.07238913693556961, "learning_rate": 1.163906239796453e-07, "loss": 0.4396, "step": 7503 }, { "epoch": 3.722849695916594, "grad_norm": 0.07380424841271917, "learning_rate": 1.1597169537866104e-07, "loss": 0.461, "step": 7504 }, { "epoch": 3.7233461586198335, "grad_norm": 0.07294741460152432, "learning_rate": 1.1555351322309339e-07, "loss": 0.419, "step": 7505 }, { "epoch": 3.7238426213230733, "grad_norm": 0.07201224370610604, "learning_rate": 1.1513607757685508e-07, "loss": 0.4185, "step": 7506 }, { "epoch": 3.7243390840263126, "grad_norm": 0.07345882234607866, "learning_rate": 1.1471938850374509e-07, "loss": 0.4589, "step": 7507 }, { "epoch": 3.724835546729552, "grad_norm": 0.07540856132370888, "learning_rate": 1.1430344606744581e-07, "loss": 0.4805, "step": 7508 }, { "epoch": 3.7253320094327913, "grad_norm": 0.069747428732833, "learning_rate": 1.1388825033152973e-07, "loss": 0.4207, "step": 7509 }, { "epoch": 3.7258284721360306, "grad_norm": 0.0723745653939202, "learning_rate": 1.1347380135945108e-07, "loss": 0.4215, "step": 7510 }, { "epoch": 3.72632493483927, "grad_norm": 0.07205562171665118, "learning_rate": 1.1306009921455307e-07, "loss": 0.4283, "step": 7511 }, { "epoch": 3.7268213975425097, "grad_norm": 0.07103586324604899, "learning_rate": 1.1264714396006238e-07, "loss": 0.4365, "step": 7512 }, { "epoch": 3.727317860245749, "grad_norm": 0.07359648786780269, "learning_rate": 1.1223493565909238e-07, "loss": 0.426, "step": 7513 }, { "epoch": 3.7278143229489884, "grad_norm": 0.0731775813226635, "learning_rate": 1.1182347437464436e-07, "loss": 0.3941, "step": 7514 }, { "epoch": 3.7283107856522277, "grad_norm": 0.07398743238728402, "learning_rate": 1.1141276016960134e-07, "loss": 0.4517, "step": 7515 }, { "epoch": 3.7288072483554675, "grad_norm": 0.07149681628651035, "learning_rate": 1.110027931067359e-07, "loss": 0.4327, "step": 7516 }, { "epoch": 3.729303711058707, "grad_norm": 0.07604656685426203, "learning_rate": 1.1059357324870456e-07, "loss": 0.5081, "step": 7517 }, { "epoch": 3.729800173761946, "grad_norm": 0.07339999849617872, "learning_rate": 1.1018510065804954e-07, "loss": 0.422, "step": 7518 }, { "epoch": 3.7302966364651855, "grad_norm": 0.07454026734027142, "learning_rate": 1.097773753971998e-07, "loss": 0.4468, "step": 7519 }, { "epoch": 3.730793099168425, "grad_norm": 0.07104200483397377, "learning_rate": 1.0937039752846934e-07, "loss": 0.4052, "step": 7520 }, { "epoch": 3.731289561871664, "grad_norm": 0.07364091604071907, "learning_rate": 1.0896416711405844e-07, "loss": 0.476, "step": 7521 }, { "epoch": 3.731786024574904, "grad_norm": 0.07229456530948636, "learning_rate": 1.0855868421605242e-07, "loss": 0.4509, "step": 7522 }, { "epoch": 3.7322824872781433, "grad_norm": 0.07751160754378096, "learning_rate": 1.0815394889642339e-07, "loss": 0.4764, "step": 7523 }, { "epoch": 3.7327789499813826, "grad_norm": 0.07244200225598892, "learning_rate": 1.0774996121702907e-07, "loss": 0.4451, "step": 7524 }, { "epoch": 3.733275412684622, "grad_norm": 0.07498395699972732, "learning_rate": 1.073467212396112e-07, "loss": 0.4413, "step": 7525 }, { "epoch": 3.7337718753878617, "grad_norm": 0.073491623792054, "learning_rate": 1.0694422902579937e-07, "loss": 0.4481, "step": 7526 }, { "epoch": 3.734268338091101, "grad_norm": 0.07329100270406572, "learning_rate": 1.0654248463710826e-07, "loss": 0.4361, "step": 7527 }, { "epoch": 3.7347648007943404, "grad_norm": 0.07118864173737534, "learning_rate": 1.0614148813493764e-07, "loss": 0.4608, "step": 7528 }, { "epoch": 3.7352612634975797, "grad_norm": 0.0706601013127105, "learning_rate": 1.0574123958057347e-07, "loss": 0.4538, "step": 7529 }, { "epoch": 3.735757726200819, "grad_norm": 0.07220402969613111, "learning_rate": 1.053417390351874e-07, "loss": 0.4513, "step": 7530 }, { "epoch": 3.7362541889040584, "grad_norm": 0.07169252185126151, "learning_rate": 1.049429865598367e-07, "loss": 0.4262, "step": 7531 }, { "epoch": 3.736750651607298, "grad_norm": 0.073324508656316, "learning_rate": 1.0454498221546372e-07, "loss": 0.4238, "step": 7532 }, { "epoch": 3.7372471143105375, "grad_norm": 0.07198044107510718, "learning_rate": 1.0414772606289814e-07, "loss": 0.4454, "step": 7533 }, { "epoch": 3.737743577013777, "grad_norm": 0.07118370980297188, "learning_rate": 1.0375121816285361e-07, "loss": 0.4057, "step": 7534 }, { "epoch": 3.738240039717016, "grad_norm": 0.07160281716815883, "learning_rate": 1.0335545857592999e-07, "loss": 0.421, "step": 7535 }, { "epoch": 3.738736502420256, "grad_norm": 0.07145401626454782, "learning_rate": 1.0296044736261279e-07, "loss": 0.4177, "step": 7536 }, { "epoch": 3.7392329651234952, "grad_norm": 0.07322743586878572, "learning_rate": 1.025661845832726e-07, "loss": 0.4592, "step": 7537 }, { "epoch": 3.7397294278267346, "grad_norm": 0.07544486411820084, "learning_rate": 1.0217267029816736e-07, "loss": 0.4377, "step": 7538 }, { "epoch": 3.740225890529974, "grad_norm": 0.07049498098803661, "learning_rate": 1.0177990456743835e-07, "loss": 0.4324, "step": 7539 }, { "epoch": 3.7407223532332132, "grad_norm": 0.07235376143354762, "learning_rate": 1.0138788745111427e-07, "loss": 0.4507, "step": 7540 }, { "epoch": 3.7412188159364526, "grad_norm": 0.07288344961458237, "learning_rate": 1.0099661900910829e-07, "loss": 0.4552, "step": 7541 }, { "epoch": 3.7417152786396923, "grad_norm": 0.07193444729394458, "learning_rate": 1.0060609930121923e-07, "loss": 0.4278, "step": 7542 }, { "epoch": 3.7422117413429317, "grad_norm": 0.07227753500713993, "learning_rate": 1.0021632838713213e-07, "loss": 0.4435, "step": 7543 }, { "epoch": 3.742708204046171, "grad_norm": 0.07170481264345718, "learning_rate": 9.982730632641768e-08, "loss": 0.4204, "step": 7544 }, { "epoch": 3.7432046667494103, "grad_norm": 0.07643936687734741, "learning_rate": 9.943903317853055e-08, "loss": 0.457, "step": 7545 }, { "epoch": 3.74370112945265, "grad_norm": 0.07330117834237307, "learning_rate": 9.905150900281325e-08, "loss": 0.4408, "step": 7546 }, { "epoch": 3.7441975921558894, "grad_norm": 0.07001701440144002, "learning_rate": 9.866473385849117e-08, "loss": 0.3988, "step": 7547 }, { "epoch": 3.7446940548591288, "grad_norm": 0.07348218035906592, "learning_rate": 9.827870780467819e-08, "loss": 0.4546, "step": 7548 }, { "epoch": 3.745190517562368, "grad_norm": 0.07149853576907274, "learning_rate": 9.789343090037207e-08, "loss": 0.4354, "step": 7549 }, { "epoch": 3.7456869802656074, "grad_norm": 0.07237011239351258, "learning_rate": 9.750890320445517e-08, "loss": 0.4282, "step": 7550 }, { "epoch": 3.7461834429688468, "grad_norm": 0.069848514948421, "learning_rate": 9.712512477569713e-08, "loss": 0.4113, "step": 7551 }, { "epoch": 3.7466799056720865, "grad_norm": 0.07450332694516261, "learning_rate": 9.674209567275161e-08, "loss": 0.4375, "step": 7552 }, { "epoch": 3.747176368375326, "grad_norm": 0.07318998690584826, "learning_rate": 9.635981595415955e-08, "loss": 0.4656, "step": 7553 }, { "epoch": 3.747672831078565, "grad_norm": 0.07210166747045929, "learning_rate": 9.597828567834589e-08, "loss": 0.4431, "step": 7554 }, { "epoch": 3.7481692937818045, "grad_norm": 0.07136638830380382, "learning_rate": 9.559750490362063e-08, "loss": 0.4189, "step": 7555 }, { "epoch": 3.7486657564850443, "grad_norm": 0.07291966530697377, "learning_rate": 9.521747368818112e-08, "loss": 0.4365, "step": 7556 }, { "epoch": 3.7491622191882836, "grad_norm": 0.07022951970666523, "learning_rate": 9.483819209010813e-08, "loss": 0.4128, "step": 7557 }, { "epoch": 3.749658681891523, "grad_norm": 0.07527555997003625, "learning_rate": 9.445966016736974e-08, "loss": 0.496, "step": 7558 }, { "epoch": 3.7501551445947623, "grad_norm": 0.0759036245963917, "learning_rate": 9.408187797781743e-08, "loss": 0.4411, "step": 7559 }, { "epoch": 3.7506516072980016, "grad_norm": 0.07328521345891655, "learning_rate": 9.370484557919002e-08, "loss": 0.4489, "step": 7560 }, { "epoch": 3.7506516072980016, "eval_loss": 0.5161105394363403, "eval_runtime": 258.8004, "eval_samples_per_second": 117.283, "eval_steps_per_second": 14.664, "step": 7560 }, { "epoch": 3.751148070001241, "grad_norm": 0.07384410860926094, "learning_rate": 9.33285630291103e-08, "loss": 0.4631, "step": 7561 }, { "epoch": 3.7516445327044807, "grad_norm": 0.07337397188656725, "learning_rate": 9.295303038508729e-08, "loss": 0.4382, "step": 7562 }, { "epoch": 3.75214099540772, "grad_norm": 0.07247752832773147, "learning_rate": 9.257824770451507e-08, "loss": 0.4332, "step": 7563 }, { "epoch": 3.7526374581109594, "grad_norm": 0.07263876716289892, "learning_rate": 9.22042150446728e-08, "loss": 0.461, "step": 7564 }, { "epoch": 3.7531339208141987, "grad_norm": 0.07229693840018254, "learning_rate": 9.183093246272645e-08, "loss": 0.4416, "step": 7565 }, { "epoch": 3.7536303835174385, "grad_norm": 0.07183061018007368, "learning_rate": 9.145840001572537e-08, "loss": 0.4415, "step": 7566 }, { "epoch": 3.754126846220678, "grad_norm": 0.07461055074980504, "learning_rate": 9.108661776060568e-08, "loss": 0.4713, "step": 7567 }, { "epoch": 3.754623308923917, "grad_norm": 0.07409143152983287, "learning_rate": 9.071558575418749e-08, "loss": 0.451, "step": 7568 }, { "epoch": 3.7551197716271565, "grad_norm": 0.07229577276590579, "learning_rate": 9.034530405317765e-08, "loss": 0.4364, "step": 7569 }, { "epoch": 3.755616234330396, "grad_norm": 0.07086020583436162, "learning_rate": 8.997577271416758e-08, "loss": 0.4171, "step": 7570 }, { "epoch": 3.756112697033635, "grad_norm": 0.07011907855229332, "learning_rate": 8.96069917936343e-08, "loss": 0.421, "step": 7571 }, { "epoch": 3.756609159736875, "grad_norm": 0.07276329677023631, "learning_rate": 8.923896134794053e-08, "loss": 0.4565, "step": 7572 }, { "epoch": 3.7571056224401143, "grad_norm": 0.07401906530715995, "learning_rate": 8.887168143333402e-08, "loss": 0.43, "step": 7573 }, { "epoch": 3.7576020851433536, "grad_norm": 0.0726548623776061, "learning_rate": 8.850515210594601e-08, "loss": 0.4225, "step": 7574 }, { "epoch": 3.758098547846593, "grad_norm": 0.07209307721520765, "learning_rate": 8.813937342179613e-08, "loss": 0.3859, "step": 7575 }, { "epoch": 3.7585950105498327, "grad_norm": 0.07235868664387755, "learning_rate": 8.777434543678687e-08, "loss": 0.4178, "step": 7576 }, { "epoch": 3.759091473253072, "grad_norm": 0.07384631212067913, "learning_rate": 8.741006820670805e-08, "loss": 0.4502, "step": 7577 }, { "epoch": 3.7595879359563114, "grad_norm": 0.07160271493652788, "learning_rate": 8.704654178723293e-08, "loss": 0.4224, "step": 7578 }, { "epoch": 3.7600843986595507, "grad_norm": 0.07175008162985384, "learning_rate": 8.668376623391983e-08, "loss": 0.4291, "step": 7579 }, { "epoch": 3.76058086136279, "grad_norm": 0.071081695755974, "learning_rate": 8.632174160221496e-08, "loss": 0.4432, "step": 7580 }, { "epoch": 3.7610773240660293, "grad_norm": 0.07161415164284515, "learning_rate": 8.596046794744683e-08, "loss": 0.447, "step": 7581 }, { "epoch": 3.761573786769269, "grad_norm": 0.07457076039250243, "learning_rate": 8.559994532483074e-08, "loss": 0.4739, "step": 7582 }, { "epoch": 3.7620702494725085, "grad_norm": 0.0705624068638462, "learning_rate": 8.524017378946592e-08, "loss": 0.4159, "step": 7583 }, { "epoch": 3.762566712175748, "grad_norm": 0.07177507462194573, "learning_rate": 8.48811533963384e-08, "loss": 0.435, "step": 7584 }, { "epoch": 3.763063174878987, "grad_norm": 0.07268354722345835, "learning_rate": 8.452288420031929e-08, "loss": 0.4301, "step": 7585 }, { "epoch": 3.763559637582227, "grad_norm": 0.07380471538302204, "learning_rate": 8.416536625616312e-08, "loss": 0.4226, "step": 7586 }, { "epoch": 3.764056100285466, "grad_norm": 0.07641290940010709, "learning_rate": 8.380859961851174e-08, "loss": 0.444, "step": 7587 }, { "epoch": 3.7645525629887056, "grad_norm": 0.07167215516692764, "learning_rate": 8.345258434189041e-08, "loss": 0.44, "step": 7588 }, { "epoch": 3.765049025691945, "grad_norm": 0.07420714066871849, "learning_rate": 8.309732048071062e-08, "loss": 0.449, "step": 7589 }, { "epoch": 3.765545488395184, "grad_norm": 0.07177597297596648, "learning_rate": 8.274280808926893e-08, "loss": 0.404, "step": 7590 }, { "epoch": 3.7660419510984235, "grad_norm": 0.07439229401969116, "learning_rate": 8.238904722174701e-08, "loss": 0.4384, "step": 7591 }, { "epoch": 3.766538413801663, "grad_norm": 0.07206472800094606, "learning_rate": 8.203603793221104e-08, "loss": 0.4363, "step": 7592 }, { "epoch": 3.7670348765049027, "grad_norm": 0.07751943428429821, "learning_rate": 8.168378027461343e-08, "loss": 0.4822, "step": 7593 }, { "epoch": 3.767531339208142, "grad_norm": 0.07116288444682155, "learning_rate": 8.133227430279055e-08, "loss": 0.4153, "step": 7594 }, { "epoch": 3.7680278019113813, "grad_norm": 0.07418819356407474, "learning_rate": 8.0981520070465e-08, "loss": 0.4585, "step": 7595 }, { "epoch": 3.768524264614621, "grad_norm": 0.0733105625391331, "learning_rate": 8.063151763124332e-08, "loss": 0.4455, "step": 7596 }, { "epoch": 3.7690207273178604, "grad_norm": 0.07300590027724989, "learning_rate": 8.02822670386183e-08, "loss": 0.4423, "step": 7597 }, { "epoch": 3.7695171900210998, "grad_norm": 0.07357361670187289, "learning_rate": 7.993376834596722e-08, "loss": 0.4414, "step": 7598 }, { "epoch": 3.770013652724339, "grad_norm": 0.07297895745552392, "learning_rate": 7.958602160655193e-08, "loss": 0.4465, "step": 7599 }, { "epoch": 3.7705101154275784, "grad_norm": 0.0743257455513542, "learning_rate": 7.923902687352103e-08, "loss": 0.4503, "step": 7600 }, { "epoch": 3.7710065781308177, "grad_norm": 0.07258222433746735, "learning_rate": 7.889278419990598e-08, "loss": 0.4757, "step": 7601 }, { "epoch": 3.771503040834057, "grad_norm": 0.07186680285906648, "learning_rate": 7.854729363862502e-08, "loss": 0.4384, "step": 7602 }, { "epoch": 3.771999503537297, "grad_norm": 0.07320648320061995, "learning_rate": 7.820255524248032e-08, "loss": 0.4455, "step": 7603 }, { "epoch": 3.772495966240536, "grad_norm": 0.07359622206653958, "learning_rate": 7.78585690641609e-08, "loss": 0.4377, "step": 7604 }, { "epoch": 3.7729924289437755, "grad_norm": 0.07749700381908971, "learning_rate": 7.7515335156238e-08, "loss": 0.4363, "step": 7605 }, { "epoch": 3.7734888916470153, "grad_norm": 0.07211493802132227, "learning_rate": 7.717285357117022e-08, "loss": 0.4297, "step": 7606 }, { "epoch": 3.7739853543502546, "grad_norm": 0.07104624651291085, "learning_rate": 7.68311243613007e-08, "loss": 0.4409, "step": 7607 }, { "epoch": 3.774481817053494, "grad_norm": 0.0726414983389291, "learning_rate": 7.649014757885597e-08, "loss": 0.4284, "step": 7608 }, { "epoch": 3.7749782797567333, "grad_norm": 0.07053503146406671, "learning_rate": 7.614992327594994e-08, "loss": 0.4136, "step": 7609 }, { "epoch": 3.7754747424599726, "grad_norm": 0.0748476111263376, "learning_rate": 7.581045150458099e-08, "loss": 0.4532, "step": 7610 }, { "epoch": 3.775971205163212, "grad_norm": 0.07253655365457137, "learning_rate": 7.547173231663041e-08, "loss": 0.4345, "step": 7611 }, { "epoch": 3.7764676678664513, "grad_norm": 0.0714135317327364, "learning_rate": 7.513376576386678e-08, "loss": 0.4414, "step": 7612 }, { "epoch": 3.776964130569691, "grad_norm": 0.07205257695786416, "learning_rate": 7.479655189794266e-08, "loss": 0.4374, "step": 7613 }, { "epoch": 3.7774605932729304, "grad_norm": 0.07257848974462944, "learning_rate": 7.446009077039629e-08, "loss": 0.4548, "step": 7614 }, { "epoch": 3.7779570559761697, "grad_norm": 0.07242154741962721, "learning_rate": 7.41243824326504e-08, "loss": 0.4307, "step": 7615 }, { "epoch": 3.7784535186794095, "grad_norm": 0.0730945396679734, "learning_rate": 7.378942693601177e-08, "loss": 0.4675, "step": 7616 }, { "epoch": 3.778949981382649, "grad_norm": 0.07274778188236991, "learning_rate": 7.345522433167385e-08, "loss": 0.4497, "step": 7617 }, { "epoch": 3.779446444085888, "grad_norm": 0.07027696943602645, "learning_rate": 7.312177467071302e-08, "loss": 0.4272, "step": 7618 }, { "epoch": 3.7799429067891275, "grad_norm": 0.07173677048643133, "learning_rate": 7.278907800409296e-08, "loss": 0.4569, "step": 7619 }, { "epoch": 3.780439369492367, "grad_norm": 0.0713370611452363, "learning_rate": 7.245713438266022e-08, "loss": 0.4181, "step": 7620 }, { "epoch": 3.780935832195606, "grad_norm": 0.07240382585870658, "learning_rate": 7.212594385714755e-08, "loss": 0.4274, "step": 7621 }, { "epoch": 3.7814322948988455, "grad_norm": 0.0729393270005386, "learning_rate": 7.179550647817224e-08, "loss": 0.4517, "step": 7622 }, { "epoch": 3.7819287576020852, "grad_norm": 0.07434877364398472, "learning_rate": 7.1465822296235e-08, "loss": 0.4713, "step": 7623 }, { "epoch": 3.7824252203053246, "grad_norm": 0.07110157033397635, "learning_rate": 7.113689136172441e-08, "loss": 0.4262, "step": 7624 }, { "epoch": 3.782921683008564, "grad_norm": 0.07200827074812058, "learning_rate": 7.080871372491193e-08, "loss": 0.4307, "step": 7625 }, { "epoch": 3.7834181457118032, "grad_norm": 0.07128597824580814, "learning_rate": 7.048128943595356e-08, "loss": 0.4464, "step": 7626 }, { "epoch": 3.783914608415043, "grad_norm": 0.07378865528886532, "learning_rate": 7.015461854489148e-08, "loss": 0.4447, "step": 7627 }, { "epoch": 3.7844110711182823, "grad_norm": 0.07477304509027059, "learning_rate": 6.982870110165185e-08, "loss": 0.4768, "step": 7628 }, { "epoch": 3.7849075338215217, "grad_norm": 0.07292149129680923, "learning_rate": 6.950353715604597e-08, "loss": 0.4242, "step": 7629 }, { "epoch": 3.785403996524761, "grad_norm": 0.07333502503957316, "learning_rate": 6.917912675777016e-08, "loss": 0.4362, "step": 7630 }, { "epoch": 3.7859004592280003, "grad_norm": 0.07190268268817648, "learning_rate": 6.885546995640479e-08, "loss": 0.4507, "step": 7631 }, { "epoch": 3.7863969219312397, "grad_norm": 0.07061760849169589, "learning_rate": 6.85325668014164e-08, "loss": 0.4109, "step": 7632 }, { "epoch": 3.7868933846344794, "grad_norm": 0.07427625826680745, "learning_rate": 6.821041734215438e-08, "loss": 0.4956, "step": 7633 }, { "epoch": 3.7873898473377188, "grad_norm": 0.07358870722147563, "learning_rate": 6.788902162785549e-08, "loss": 0.4449, "step": 7634 }, { "epoch": 3.787886310040958, "grad_norm": 0.0720882277469726, "learning_rate": 6.756837970763875e-08, "loss": 0.4546, "step": 7635 }, { "epoch": 3.7883827727441974, "grad_norm": 0.07358476569511135, "learning_rate": 6.724849163050995e-08, "loss": 0.4937, "step": 7636 }, { "epoch": 3.788879235447437, "grad_norm": 0.07112676903545612, "learning_rate": 6.692935744535889e-08, "loss": 0.4274, "step": 7637 }, { "epoch": 3.7893756981506765, "grad_norm": 0.07339375287303061, "learning_rate": 6.661097720095877e-08, "loss": 0.4433, "step": 7638 }, { "epoch": 3.789872160853916, "grad_norm": 0.07241100188549718, "learning_rate": 6.629335094597067e-08, "loss": 0.4727, "step": 7639 }, { "epoch": 3.790368623557155, "grad_norm": 0.07495755848213913, "learning_rate": 6.597647872893798e-08, "loss": 0.4613, "step": 7640 }, { "epoch": 3.7908650862603945, "grad_norm": 0.07058571062118439, "learning_rate": 6.566036059828918e-08, "loss": 0.4577, "step": 7641 }, { "epoch": 3.791361548963634, "grad_norm": 0.07198596199162723, "learning_rate": 6.53449966023384e-08, "loss": 0.4468, "step": 7642 }, { "epoch": 3.7918580116668736, "grad_norm": 0.07094089700135145, "learning_rate": 6.50303867892832e-08, "loss": 0.4782, "step": 7643 }, { "epoch": 3.792354474370113, "grad_norm": 0.0722320181545589, "learning_rate": 6.471653120720733e-08, "loss": 0.4428, "step": 7644 }, { "epoch": 3.7928509370733523, "grad_norm": 0.07167870597974875, "learning_rate": 6.440342990407856e-08, "loss": 0.4252, "step": 7645 }, { "epoch": 3.7933473997765916, "grad_norm": 0.07092657802659981, "learning_rate": 6.409108292774912e-08, "loss": 0.4242, "step": 7646 }, { "epoch": 3.7938438624798314, "grad_norm": 0.0755376186724099, "learning_rate": 6.377949032595699e-08, "loss": 0.4451, "step": 7647 }, { "epoch": 3.7943403251830707, "grad_norm": 0.07150643420264564, "learning_rate": 6.346865214632292e-08, "loss": 0.4109, "step": 7648 }, { "epoch": 3.79483678788631, "grad_norm": 0.07421329049090035, "learning_rate": 6.315856843635449e-08, "loss": 0.4141, "step": 7649 }, { "epoch": 3.7953332505895494, "grad_norm": 0.07365889529760761, "learning_rate": 6.28492392434421e-08, "loss": 0.4379, "step": 7650 }, { "epoch": 3.7958297132927887, "grad_norm": 0.07265832592753993, "learning_rate": 6.254066461486241e-08, "loss": 0.4228, "step": 7651 }, { "epoch": 3.796326175996028, "grad_norm": 0.07236918113007618, "learning_rate": 6.223284459777601e-08, "loss": 0.4388, "step": 7652 }, { "epoch": 3.796822638699268, "grad_norm": 0.07135326402955866, "learning_rate": 6.19257792392286e-08, "loss": 0.4314, "step": 7653 }, { "epoch": 3.797319101402507, "grad_norm": 0.07192211849326689, "learning_rate": 6.161946858614931e-08, "loss": 0.4284, "step": 7654 }, { "epoch": 3.7978155641057465, "grad_norm": 0.07425977862800254, "learning_rate": 6.131391268535347e-08, "loss": 0.4783, "step": 7655 }, { "epoch": 3.798312026808986, "grad_norm": 0.07256891885712644, "learning_rate": 6.100911158354039e-08, "loss": 0.4139, "step": 7656 }, { "epoch": 3.7988084895122256, "grad_norm": 0.07132789042499738, "learning_rate": 6.070506532729393e-08, "loss": 0.4487, "step": 7657 }, { "epoch": 3.799304952215465, "grad_norm": 0.071378541512784, "learning_rate": 6.040177396308244e-08, "loss": 0.4264, "step": 7658 }, { "epoch": 3.7998014149187043, "grad_norm": 0.0725960156414164, "learning_rate": 6.009923753725999e-08, "loss": 0.4477, "step": 7659 }, { "epoch": 3.8002978776219436, "grad_norm": 0.0736973528236666, "learning_rate": 5.979745609606291e-08, "loss": 0.4299, "step": 7660 }, { "epoch": 3.800794340325183, "grad_norm": 0.07299851087260749, "learning_rate": 5.949642968561542e-08, "loss": 0.4331, "step": 7661 }, { "epoch": 3.8012908030284223, "grad_norm": 0.07473707812315147, "learning_rate": 5.9196158351923496e-08, "loss": 0.4818, "step": 7662 }, { "epoch": 3.801787265731662, "grad_norm": 0.07336642968051621, "learning_rate": 5.889664214087876e-08, "loss": 0.4387, "step": 7663 }, { "epoch": 3.8022837284349014, "grad_norm": 0.07245571259074714, "learning_rate": 5.8597881098257924e-08, "loss": 0.441, "step": 7664 }, { "epoch": 3.8027801911381407, "grad_norm": 0.07236547353401815, "learning_rate": 5.829987526972114e-08, "loss": 0.4435, "step": 7665 }, { "epoch": 3.80327665384138, "grad_norm": 0.07503876585403813, "learning_rate": 5.8002624700814744e-08, "loss": 0.4763, "step": 7666 }, { "epoch": 3.80377311654462, "grad_norm": 0.0732106784277791, "learning_rate": 5.770612943696741e-08, "loss": 0.4082, "step": 7667 }, { "epoch": 3.804269579247859, "grad_norm": 0.07331334362720807, "learning_rate": 5.741038952349565e-08, "loss": 0.435, "step": 7668 }, { "epoch": 3.8047660419510985, "grad_norm": 0.07081532219477497, "learning_rate": 5.71154050055972e-08, "loss": 0.4333, "step": 7669 }, { "epoch": 3.805262504654338, "grad_norm": 0.0729540189307146, "learning_rate": 5.682117592835545e-08, "loss": 0.439, "step": 7670 }, { "epoch": 3.805758967357577, "grad_norm": 0.07107821559478245, "learning_rate": 5.652770233673943e-08, "loss": 0.4316, "step": 7671 }, { "epoch": 3.8062554300608165, "grad_norm": 0.0712561932838854, "learning_rate": 5.623498427560159e-08, "loss": 0.4298, "step": 7672 }, { "epoch": 3.8067518927640562, "grad_norm": 0.07389039590125981, "learning_rate": 5.594302178967892e-08, "loss": 0.4223, "step": 7673 }, { "epoch": 3.8072483554672956, "grad_norm": 0.07327860460759891, "learning_rate": 5.565181492359406e-08, "loss": 0.4317, "step": 7674 }, { "epoch": 3.807744818170535, "grad_norm": 0.07032477492674219, "learning_rate": 5.536136372185197e-08, "loss": 0.418, "step": 7675 }, { "epoch": 3.8082412808737742, "grad_norm": 0.07096786204176252, "learning_rate": 5.507166822884435e-08, "loss": 0.4008, "step": 7676 }, { "epoch": 3.808737743577014, "grad_norm": 0.07408752101095577, "learning_rate": 5.478272848884636e-08, "loss": 0.4616, "step": 7677 }, { "epoch": 3.8092342062802533, "grad_norm": 0.0728659361480815, "learning_rate": 5.4494544546018216e-08, "loss": 0.4349, "step": 7678 }, { "epoch": 3.8097306689834927, "grad_norm": 0.07048801473333352, "learning_rate": 5.420711644440357e-08, "loss": 0.4175, "step": 7679 }, { "epoch": 3.810227131686732, "grad_norm": 0.07360760755959432, "learning_rate": 5.392044422793119e-08, "loss": 0.4632, "step": 7680 }, { "epoch": 3.8107235943899713, "grad_norm": 0.07173217601575797, "learning_rate": 5.3634527940414903e-08, "loss": 0.4181, "step": 7681 }, { "epoch": 3.8112200570932107, "grad_norm": 0.07142680853692407, "learning_rate": 5.334936762555198e-08, "loss": 0.4561, "step": 7682 }, { "epoch": 3.8117165197964504, "grad_norm": 0.07222635525660184, "learning_rate": 5.3064963326924235e-08, "loss": 0.4435, "step": 7683 }, { "epoch": 3.8122129824996898, "grad_norm": 0.07122565629989039, "learning_rate": 5.2781315087999106e-08, "loss": 0.4317, "step": 7684 }, { "epoch": 3.812709445202929, "grad_norm": 0.07193918182893863, "learning_rate": 5.249842295212748e-08, "loss": 0.4184, "step": 7685 }, { "epoch": 3.8132059079061684, "grad_norm": 0.06929577784349823, "learning_rate": 5.221628696254477e-08, "loss": 0.3988, "step": 7686 }, { "epoch": 3.813702370609408, "grad_norm": 0.07224933484530108, "learning_rate": 5.1934907162370374e-08, "loss": 0.441, "step": 7687 }, { "epoch": 3.8141988333126475, "grad_norm": 0.07296837062153164, "learning_rate": 5.165428359460989e-08, "loss": 0.4522, "step": 7688 }, { "epoch": 3.814695296015887, "grad_norm": 0.0737214515339532, "learning_rate": 5.137441630215123e-08, "loss": 0.4502, "step": 7689 }, { "epoch": 3.815191758719126, "grad_norm": 0.07404136133329889, "learning_rate": 5.1095305327767965e-08, "loss": 0.469, "step": 7690 }, { "epoch": 3.8156882214223655, "grad_norm": 0.07376348125016616, "learning_rate": 5.081695071411763e-08, "loss": 0.4454, "step": 7691 }, { "epoch": 3.816184684125605, "grad_norm": 0.07453116586826475, "learning_rate": 5.0539352503741756e-08, "loss": 0.4473, "step": 7692 }, { "epoch": 3.8166811468288446, "grad_norm": 0.07315513997164763, "learning_rate": 5.026251073906807e-08, "loss": 0.4248, "step": 7693 }, { "epoch": 3.817177609532084, "grad_norm": 0.0753470001876728, "learning_rate": 4.998642546240606e-08, "loss": 0.4602, "step": 7694 }, { "epoch": 3.8176740722353233, "grad_norm": 0.07377153988075652, "learning_rate": 4.9711096715951977e-08, "loss": 0.4182, "step": 7695 }, { "epoch": 3.8181705349385626, "grad_norm": 0.07252299098795054, "learning_rate": 4.9436524541784384e-08, "loss": 0.4297, "step": 7696 }, { "epoch": 3.8186669976418024, "grad_norm": 0.0750530077316559, "learning_rate": 4.9162708981868034e-08, "loss": 0.4418, "step": 7697 }, { "epoch": 3.8191634603450417, "grad_norm": 0.07255075825129673, "learning_rate": 4.888965007805113e-08, "loss": 0.4417, "step": 7698 }, { "epoch": 3.819659923048281, "grad_norm": 0.07321503165402111, "learning_rate": 4.861734787206529e-08, "loss": 0.4585, "step": 7699 }, { "epoch": 3.8201563857515204, "grad_norm": 0.0747663686249205, "learning_rate": 4.834580240552944e-08, "loss": 0.4267, "step": 7700 }, { "epoch": 3.8206528484547597, "grad_norm": 0.070900126955607, "learning_rate": 4.807501371994372e-08, "loss": 0.4307, "step": 7701 }, { "epoch": 3.821149311157999, "grad_norm": 0.07040814280606325, "learning_rate": 4.780498185669391e-08, "loss": 0.4374, "step": 7702 }, { "epoch": 3.821645773861239, "grad_norm": 0.06980323783933094, "learning_rate": 4.753570685705033e-08, "loss": 0.4215, "step": 7703 }, { "epoch": 3.822142236564478, "grad_norm": 0.0737712431960967, "learning_rate": 4.726718876216674e-08, "loss": 0.4528, "step": 7704 }, { "epoch": 3.8226386992677175, "grad_norm": 0.06996481879965229, "learning_rate": 4.699942761308307e-08, "loss": 0.4095, "step": 7705 }, { "epoch": 3.823135161970957, "grad_norm": 0.07212443730436509, "learning_rate": 4.67324234507216e-08, "loss": 0.4208, "step": 7706 }, { "epoch": 3.8236316246741966, "grad_norm": 0.0701011409340594, "learning_rate": 4.646617631588912e-08, "loss": 0.4538, "step": 7707 }, { "epoch": 3.824128087377436, "grad_norm": 0.07139643476891598, "learning_rate": 4.620068624927754e-08, "loss": 0.4357, "step": 7708 }, { "epoch": 3.8246245500806753, "grad_norm": 0.07197096113145784, "learning_rate": 4.593595329146327e-08, "loss": 0.4173, "step": 7709 }, { "epoch": 3.8251210127839146, "grad_norm": 0.07144844528918787, "learning_rate": 4.567197748290619e-08, "loss": 0.4693, "step": 7710 }, { "epoch": 3.825617475487154, "grad_norm": 0.07178497814452507, "learning_rate": 4.5408758863950685e-08, "loss": 0.4556, "step": 7711 }, { "epoch": 3.8261139381903932, "grad_norm": 0.07187992019343, "learning_rate": 4.5146297474825684e-08, "loss": 0.4478, "step": 7712 }, { "epoch": 3.826610400893633, "grad_norm": 0.0696093030235306, "learning_rate": 4.48845933556441e-08, "loss": 0.409, "step": 7713 }, { "epoch": 3.8271068635968724, "grad_norm": 0.07230955744554561, "learning_rate": 4.462364654640283e-08, "loss": 0.4346, "step": 7714 }, { "epoch": 3.8276033263001117, "grad_norm": 0.0731872540090233, "learning_rate": 4.4363457086984416e-08, "loss": 0.4461, "step": 7715 }, { "epoch": 3.828099789003351, "grad_norm": 0.0733655897084953, "learning_rate": 4.4104025017153165e-08, "loss": 0.4661, "step": 7716 }, { "epoch": 3.828596251706591, "grad_norm": 0.06899392563017782, "learning_rate": 4.384535037656068e-08, "loss": 0.4181, "step": 7717 }, { "epoch": 3.82909271440983, "grad_norm": 0.07049518740890774, "learning_rate": 4.3587433204739795e-08, "loss": 0.4303, "step": 7718 }, { "epoch": 3.8295891771130695, "grad_norm": 0.0746969522914595, "learning_rate": 4.333027354111008e-08, "loss": 0.4639, "step": 7719 }, { "epoch": 3.830085639816309, "grad_norm": 0.07287444592387665, "learning_rate": 4.307387142497399e-08, "loss": 0.465, "step": 7720 }, { "epoch": 3.830582102519548, "grad_norm": 0.07368450889542699, "learning_rate": 4.281822689551795e-08, "loss": 0.4309, "step": 7721 }, { "epoch": 3.8310785652227874, "grad_norm": 0.07167812835385413, "learning_rate": 4.256333999181406e-08, "loss": 0.422, "step": 7722 }, { "epoch": 3.831575027926027, "grad_norm": 0.07185303282623809, "learning_rate": 4.230921075281724e-08, "loss": 0.4336, "step": 7723 }, { "epoch": 3.8320714906292666, "grad_norm": 0.07492267375306622, "learning_rate": 4.205583921736644e-08, "loss": 0.4489, "step": 7724 }, { "epoch": 3.832567953332506, "grad_norm": 0.0735973347845139, "learning_rate": 4.18032254241868e-08, "loss": 0.4628, "step": 7725 }, { "epoch": 3.833064416035745, "grad_norm": 0.07490391828605589, "learning_rate": 4.155136941188465e-08, "loss": 0.4698, "step": 7726 }, { "epoch": 3.833560878738985, "grad_norm": 0.07323970805963602, "learning_rate": 4.130027121895419e-08, "loss": 0.4248, "step": 7727 }, { "epoch": 3.8340573414422243, "grad_norm": 0.07439139948354823, "learning_rate": 4.104993088376974e-08, "loss": 0.4538, "step": 7728 }, { "epoch": 3.8345538041454637, "grad_norm": 0.07164520350912759, "learning_rate": 4.0800348444592354e-08, "loss": 0.4484, "step": 7729 }, { "epoch": 3.835050266848703, "grad_norm": 0.07438774813373158, "learning_rate": 4.0551523939567626e-08, "loss": 0.4545, "step": 7730 }, { "epoch": 3.8355467295519423, "grad_norm": 0.07505882519637529, "learning_rate": 4.030345740672348e-08, "loss": 0.4427, "step": 7731 }, { "epoch": 3.8360431922551816, "grad_norm": 0.07161856615140008, "learning_rate": 4.005614888397347e-08, "loss": 0.399, "step": 7732 }, { "epoch": 3.836539654958421, "grad_norm": 0.07156914586364191, "learning_rate": 3.980959840911402e-08, "loss": 0.4366, "step": 7733 }, { "epoch": 3.8370361176616608, "grad_norm": 0.07024940982721835, "learning_rate": 3.956380601982668e-08, "loss": 0.4231, "step": 7734 }, { "epoch": 3.8375325803649, "grad_norm": 0.0721047312960411, "learning_rate": 3.931877175367749e-08, "loss": 0.4326, "step": 7735 }, { "epoch": 3.8380290430681394, "grad_norm": 0.07392564844521746, "learning_rate": 3.9074495648115384e-08, "loss": 0.4199, "step": 7736 }, { "epoch": 3.838525505771379, "grad_norm": 0.07194179417323361, "learning_rate": 3.88309777404744e-08, "loss": 0.4501, "step": 7737 }, { "epoch": 3.8390219684746185, "grad_norm": 0.07293171801245724, "learning_rate": 3.8588218067972526e-08, "loss": 0.4632, "step": 7738 }, { "epoch": 3.839518431177858, "grad_norm": 0.0728548840052456, "learning_rate": 3.8346216667710653e-08, "loss": 0.4458, "step": 7739 }, { "epoch": 3.840014893881097, "grad_norm": 0.07218780052939112, "learning_rate": 3.8104973576675863e-08, "loss": 0.4098, "step": 7740 }, { "epoch": 3.8405113565843365, "grad_norm": 0.07300164587754282, "learning_rate": 3.786448883173755e-08, "loss": 0.4407, "step": 7741 }, { "epoch": 3.841007819287576, "grad_norm": 0.07402710952167288, "learning_rate": 3.762476246965019e-08, "loss": 0.4467, "step": 7742 }, { "epoch": 3.841504281990815, "grad_norm": 0.0741556939673995, "learning_rate": 3.738579452705282e-08, "loss": 0.4528, "step": 7743 }, { "epoch": 3.842000744694055, "grad_norm": 0.07379438521836251, "learning_rate": 3.7147585040467336e-08, "loss": 0.4341, "step": 7744 }, { "epoch": 3.8424972073972943, "grad_norm": 0.07020108294788051, "learning_rate": 3.691013404630017e-08, "loss": 0.4323, "step": 7745 }, { "epoch": 3.8429936701005336, "grad_norm": 0.0708905708801741, "learning_rate": 3.667344158084118e-08, "loss": 0.4259, "step": 7746 }, { "epoch": 3.8434901328037734, "grad_norm": 0.07450166176521068, "learning_rate": 3.643750768026644e-08, "loss": 0.447, "step": 7747 }, { "epoch": 3.8439865955070127, "grad_norm": 0.07581994277980696, "learning_rate": 3.620233238063375e-08, "loss": 0.4254, "step": 7748 }, { "epoch": 3.844483058210252, "grad_norm": 0.07482993843850519, "learning_rate": 3.596791571788605e-08, "loss": 0.4533, "step": 7749 }, { "epoch": 3.8449795209134914, "grad_norm": 0.0745584907129804, "learning_rate": 3.573425772785077e-08, "loss": 0.4275, "step": 7750 }, { "epoch": 3.8454759836167307, "grad_norm": 0.07337402169038784, "learning_rate": 3.55013584462377e-08, "loss": 0.4518, "step": 7751 }, { "epoch": 3.84597244631997, "grad_norm": 0.0741595197126386, "learning_rate": 3.526921790864224e-08, "loss": 0.4334, "step": 7752 }, { "epoch": 3.8464689090232094, "grad_norm": 0.07075236082454928, "learning_rate": 3.503783615054324e-08, "loss": 0.4057, "step": 7753 }, { "epoch": 3.846965371726449, "grad_norm": 0.07066552410408557, "learning_rate": 3.4807213207304624e-08, "loss": 0.43, "step": 7754 }, { "epoch": 3.8474618344296885, "grad_norm": 0.07297525480330684, "learning_rate": 3.45773491141721e-08, "loss": 0.4413, "step": 7755 }, { "epoch": 3.847958297132928, "grad_norm": 0.07206069774315897, "learning_rate": 3.4348243906277554e-08, "loss": 0.4347, "step": 7756 }, { "epoch": 3.8484547598361676, "grad_norm": 0.07137753032462671, "learning_rate": 3.411989761863577e-08, "loss": 0.4156, "step": 7757 }, { "epoch": 3.848951222539407, "grad_norm": 0.07449584893291657, "learning_rate": 3.389231028614548e-08, "loss": 0.4427, "step": 7758 }, { "epoch": 3.8494476852426462, "grad_norm": 0.07267303968637863, "learning_rate": 3.3665481943590536e-08, "loss": 0.436, "step": 7759 }, { "epoch": 3.8499441479458856, "grad_norm": 0.0733567394109518, "learning_rate": 3.34394126256371e-08, "loss": 0.4537, "step": 7760 }, { "epoch": 3.850440610649125, "grad_norm": 0.07014161013435151, "learning_rate": 3.3214102366836974e-08, "loss": 0.4138, "step": 7761 }, { "epoch": 3.8509370733523642, "grad_norm": 0.07137409206356522, "learning_rate": 3.2989551201624836e-08, "loss": 0.4311, "step": 7762 }, { "epoch": 3.8514335360556036, "grad_norm": 0.07205143541112742, "learning_rate": 3.27657591643199e-08, "loss": 0.416, "step": 7763 }, { "epoch": 3.8519299987588433, "grad_norm": 0.0706412399665651, "learning_rate": 3.2542726289124804e-08, "loss": 0.4325, "step": 7764 }, { "epoch": 3.8524264614620827, "grad_norm": 0.07095456941872033, "learning_rate": 3.232045261012728e-08, "loss": 0.4188, "step": 7765 }, { "epoch": 3.852922924165322, "grad_norm": 0.07211475153795083, "learning_rate": 3.2098938161297945e-08, "loss": 0.4464, "step": 7766 }, { "epoch": 3.8534193868685613, "grad_norm": 0.0701709114389938, "learning_rate": 3.1878182976491366e-08, "loss": 0.4042, "step": 7767 }, { "epoch": 3.853915849571801, "grad_norm": 0.07153435007874302, "learning_rate": 3.165818708944668e-08, "loss": 0.4543, "step": 7768 }, { "epoch": 3.8544123122750404, "grad_norm": 0.0712219761262089, "learning_rate": 3.143895053378698e-08, "loss": 0.4205, "step": 7769 }, { "epoch": 3.8549087749782798, "grad_norm": 0.06959699545376342, "learning_rate": 3.122047334301881e-08, "loss": 0.4077, "step": 7770 }, { "epoch": 3.855405237681519, "grad_norm": 0.07165436362760237, "learning_rate": 3.100275555053323e-08, "loss": 0.4264, "step": 7771 }, { "epoch": 3.8559017003847584, "grad_norm": 0.0696543457598655, "learning_rate": 3.0785797189604725e-08, "loss": 0.4359, "step": 7772 }, { "epoch": 3.8563981630879978, "grad_norm": 0.07115194392658761, "learning_rate": 3.0569598293391235e-08, "loss": 0.4155, "step": 7773 }, { "epoch": 3.8568946257912375, "grad_norm": 0.07347182112534703, "learning_rate": 3.035415889493631e-08, "loss": 0.4571, "step": 7774 }, { "epoch": 3.857391088494477, "grad_norm": 0.07383870491369782, "learning_rate": 3.0139479027165855e-08, "loss": 0.4633, "step": 7775 }, { "epoch": 3.857887551197716, "grad_norm": 0.07222560472754942, "learning_rate": 2.992555872289082e-08, "loss": 0.4567, "step": 7776 }, { "epoch": 3.8583840139009555, "grad_norm": 0.07311389448186495, "learning_rate": 2.971239801480452e-08, "loss": 0.4393, "step": 7777 }, { "epoch": 3.8588804766041953, "grad_norm": 0.07205166481588295, "learning_rate": 2.949999693548533e-08, "loss": 0.4118, "step": 7778 }, { "epoch": 3.8593769393074346, "grad_norm": 0.07175495025624617, "learning_rate": 2.9288355517396726e-08, "loss": 0.4252, "step": 7779 }, { "epoch": 3.859873402010674, "grad_norm": 0.07242985283004692, "learning_rate": 2.9077473792882837e-08, "loss": 0.4192, "step": 7780 }, { "epoch": 3.8603698647139133, "grad_norm": 0.07324306870128854, "learning_rate": 2.8867351794174547e-08, "loss": 0.4543, "step": 7781 }, { "epoch": 3.8608663274171526, "grad_norm": 0.07133057449898085, "learning_rate": 2.8657989553385614e-08, "loss": 0.4362, "step": 7782 }, { "epoch": 3.861362790120392, "grad_norm": 0.07255116932077542, "learning_rate": 2.844938710251377e-08, "loss": 0.4327, "step": 7783 }, { "epoch": 3.8618592528236317, "grad_norm": 0.07084270382910844, "learning_rate": 2.8241544473440185e-08, "loss": 0.4079, "step": 7784 }, { "epoch": 3.862355715526871, "grad_norm": 0.06919189412064118, "learning_rate": 2.8034461697930005e-08, "loss": 0.3972, "step": 7785 }, { "epoch": 3.8628521782301104, "grad_norm": 0.07340701474171342, "learning_rate": 2.7828138807633465e-08, "loss": 0.4318, "step": 7786 }, { "epoch": 3.8633486409333497, "grad_norm": 0.06966816916473437, "learning_rate": 2.762257583408312e-08, "loss": 0.4434, "step": 7787 }, { "epoch": 3.8638451036365895, "grad_norm": 0.07393027151147268, "learning_rate": 2.7417772808696065e-08, "loss": 0.446, "step": 7788 }, { "epoch": 3.864341566339829, "grad_norm": 0.0725801905832486, "learning_rate": 2.7213729762773366e-08, "loss": 0.4501, "step": 7789 }, { "epoch": 3.864838029043068, "grad_norm": 0.07182022009850554, "learning_rate": 2.7010446727498974e-08, "loss": 0.4327, "step": 7790 }, { "epoch": 3.8653344917463075, "grad_norm": 0.07026862262470093, "learning_rate": 2.6807923733942474e-08, "loss": 0.4239, "step": 7791 }, { "epoch": 3.865830954449547, "grad_norm": 0.0700408038106669, "learning_rate": 2.6606160813055225e-08, "loss": 0.4386, "step": 7792 }, { "epoch": 3.866327417152786, "grad_norm": 0.07021156797431176, "learning_rate": 2.640515799567478e-08, "loss": 0.4227, "step": 7793 }, { "epoch": 3.866823879856026, "grad_norm": 0.07378239177728425, "learning_rate": 2.6204915312519898e-08, "loss": 0.4347, "step": 7794 }, { "epoch": 3.8673203425592653, "grad_norm": 0.0709349128055407, "learning_rate": 2.6005432794194985e-08, "loss": 0.404, "step": 7795 }, { "epoch": 3.8678168052625046, "grad_norm": 0.07371643425293671, "learning_rate": 2.5806710471187323e-08, "loss": 0.4593, "step": 7796 }, { "epoch": 3.868313267965744, "grad_norm": 0.07127165318042873, "learning_rate": 2.5608748373869285e-08, "loss": 0.4214, "step": 7797 }, { "epoch": 3.8688097306689837, "grad_norm": 0.07447420956274946, "learning_rate": 2.5411546532496113e-08, "loss": 0.4593, "step": 7798 }, { "epoch": 3.869306193372223, "grad_norm": 0.07438841127488162, "learning_rate": 2.5215104977205918e-08, "loss": 0.4456, "step": 7799 }, { "epoch": 3.8698026560754624, "grad_norm": 0.07230538935773687, "learning_rate": 2.5019423738022464e-08, "loss": 0.4437, "step": 7800 }, { "epoch": 3.8702991187787017, "grad_norm": 0.07139035223489404, "learning_rate": 2.4824502844852938e-08, "loss": 0.4292, "step": 7801 }, { "epoch": 3.870795581481941, "grad_norm": 0.07179684932883365, "learning_rate": 2.463034232748629e-08, "loss": 0.4277, "step": 7802 }, { "epoch": 3.8712920441851804, "grad_norm": 0.07397952129422569, "learning_rate": 2.443694221559878e-08, "loss": 0.4437, "step": 7803 }, { "epoch": 3.87178850688842, "grad_norm": 0.07249524619609739, "learning_rate": 2.4244302538746765e-08, "loss": 0.4558, "step": 7804 }, { "epoch": 3.8722849695916595, "grad_norm": 0.07270288733195596, "learning_rate": 2.4052423326373364e-08, "loss": 0.4578, "step": 7805 }, { "epoch": 3.872781432294899, "grad_norm": 0.0726698326519595, "learning_rate": 2.3861304607804005e-08, "loss": 0.4179, "step": 7806 }, { "epoch": 3.873277894998138, "grad_norm": 0.07307247352030227, "learning_rate": 2.367094641224754e-08, "loss": 0.425, "step": 7807 }, { "epoch": 3.873774357701378, "grad_norm": 0.06997583746928546, "learning_rate": 2.348134876879793e-08, "loss": 0.4366, "step": 7808 }, { "epoch": 3.8742708204046172, "grad_norm": 0.07301178424469724, "learning_rate": 2.3292511706431432e-08, "loss": 0.4397, "step": 7809 }, { "epoch": 3.8747672831078566, "grad_norm": 0.07162155795689573, "learning_rate": 2.3104435254008852e-08, "loss": 0.4414, "step": 7810 }, { "epoch": 3.875263745811096, "grad_norm": 0.07393752464541645, "learning_rate": 2.2917119440275524e-08, "loss": 0.4407, "step": 7811 }, { "epoch": 3.8757602085143352, "grad_norm": 0.07452520362796672, "learning_rate": 2.273056429385856e-08, "loss": 0.4543, "step": 7812 }, { "epoch": 3.8762566712175746, "grad_norm": 0.07272499079941327, "learning_rate": 2.25447698432707e-08, "loss": 0.4385, "step": 7813 }, { "epoch": 3.8767531339208143, "grad_norm": 0.07405819111966294, "learning_rate": 2.2359736116907006e-08, "loss": 0.4587, "step": 7814 }, { "epoch": 3.8772495966240537, "grad_norm": 0.07297572414669824, "learning_rate": 2.2175463143047636e-08, "loss": 0.4528, "step": 7815 }, { "epoch": 3.877746059327293, "grad_norm": 0.072798895598875, "learning_rate": 2.1991950949855067e-08, "loss": 0.4457, "step": 7816 }, { "epoch": 3.8782425220305323, "grad_norm": 0.07416129028729428, "learning_rate": 2.1809199565376305e-08, "loss": 0.4544, "step": 7817 }, { "epoch": 3.878738984733772, "grad_norm": 0.07049086209545863, "learning_rate": 2.162720901754234e-08, "loss": 0.4315, "step": 7818 }, { "epoch": 3.8792354474370114, "grad_norm": 0.07035749557978963, "learning_rate": 2.144597933416759e-08, "loss": 0.4339, "step": 7819 }, { "epoch": 3.8797319101402508, "grad_norm": 0.07354958199948448, "learning_rate": 2.1265510542949895e-08, "loss": 0.4473, "step": 7820 }, { "epoch": 3.88022837284349, "grad_norm": 0.07115599998557434, "learning_rate": 2.1085802671470533e-08, "loss": 0.4474, "step": 7821 }, { "epoch": 3.8807248355467294, "grad_norm": 0.07502940181703499, "learning_rate": 2.0906855747195864e-08, "loss": 0.4541, "step": 7822 }, { "epoch": 3.8812212982499688, "grad_norm": 0.07376974132135386, "learning_rate": 2.0728669797474565e-08, "loss": 0.4585, "step": 7823 }, { "epoch": 3.8817177609532085, "grad_norm": 0.07225019533734907, "learning_rate": 2.05512448495393e-08, "loss": 0.4423, "step": 7824 }, { "epoch": 3.882214223656448, "grad_norm": 0.07441920631682299, "learning_rate": 2.037458093050726e-08, "loss": 0.4407, "step": 7825 }, { "epoch": 3.882710686359687, "grad_norm": 0.07540349843464134, "learning_rate": 2.0198678067377965e-08, "loss": 0.4355, "step": 7826 }, { "epoch": 3.8832071490629265, "grad_norm": 0.07116267191584182, "learning_rate": 2.002353628703546e-08, "loss": 0.4147, "step": 7827 }, { "epoch": 3.8837036117661663, "grad_norm": 0.07237432491317197, "learning_rate": 1.984915561624834e-08, "loss": 0.4307, "step": 7828 }, { "epoch": 3.8842000744694056, "grad_norm": 0.0712437413651589, "learning_rate": 1.9675536081666392e-08, "loss": 0.4266, "step": 7829 }, { "epoch": 3.884696537172645, "grad_norm": 0.07248638071409952, "learning_rate": 1.9502677709825613e-08, "loss": 0.4531, "step": 7830 }, { "epoch": 3.8851929998758843, "grad_norm": 0.07092717694139716, "learning_rate": 1.9330580527144315e-08, "loss": 0.432, "step": 7831 }, { "epoch": 3.8856894625791236, "grad_norm": 0.06979256151546342, "learning_rate": 1.9159244559924795e-08, "loss": 0.4214, "step": 7832 }, { "epoch": 3.886185925282363, "grad_norm": 0.0717442354707785, "learning_rate": 1.8988669834352767e-08, "loss": 0.4483, "step": 7833 }, { "epoch": 3.8866823879856027, "grad_norm": 0.07453108762459253, "learning_rate": 1.8818856376498497e-08, "loss": 0.4479, "step": 7834 }, { "epoch": 3.887178850688842, "grad_norm": 0.07154902113522246, "learning_rate": 1.8649804212315103e-08, "loss": 0.4168, "step": 7835 }, { "epoch": 3.8876753133920814, "grad_norm": 0.07514204973247401, "learning_rate": 1.8481513367638593e-08, "loss": 0.4346, "step": 7836 }, { "epoch": 3.8881717760953207, "grad_norm": 0.07279588195975031, "learning_rate": 1.831398386819061e-08, "loss": 0.4302, "step": 7837 }, { "epoch": 3.8886682387985605, "grad_norm": 0.07083433789230371, "learning_rate": 1.8147215739575118e-08, "loss": 0.4343, "step": 7838 }, { "epoch": 3.8891647015018, "grad_norm": 0.07264082852536816, "learning_rate": 1.7981209007278956e-08, "loss": 0.4373, "step": 7839 }, { "epoch": 3.889661164205039, "grad_norm": 0.07218971964839162, "learning_rate": 1.7815963696675153e-08, "loss": 0.4335, "step": 7840 }, { "epoch": 3.8901576269082785, "grad_norm": 0.07279006779354186, "learning_rate": 1.765147983301796e-08, "loss": 0.4327, "step": 7841 }, { "epoch": 3.890654089611518, "grad_norm": 0.07169296606291194, "learning_rate": 1.7487757441446152e-08, "loss": 0.457, "step": 7842 }, { "epoch": 3.891150552314757, "grad_norm": 0.07436589540232992, "learning_rate": 1.7324796546981937e-08, "loss": 0.4483, "step": 7843 }, { "epoch": 3.891647015017997, "grad_norm": 0.0746696408772963, "learning_rate": 1.7162597174531503e-08, "loss": 0.4514, "step": 7844 }, { "epoch": 3.8921434777212363, "grad_norm": 0.07588571149435935, "learning_rate": 1.7001159348884466e-08, "loss": 0.4471, "step": 7845 }, { "epoch": 3.8926399404244756, "grad_norm": 0.07245883306650445, "learning_rate": 1.6840483094713867e-08, "loss": 0.4646, "step": 7846 }, { "epoch": 3.893136403127715, "grad_norm": 0.0705640191588085, "learning_rate": 1.6680568436576726e-08, "loss": 0.4345, "step": 7847 }, { "epoch": 3.8936328658309547, "grad_norm": 0.07208993186469582, "learning_rate": 1.6521415398912942e-08, "loss": 0.434, "step": 7848 }, { "epoch": 3.894129328534194, "grad_norm": 0.07197941174858101, "learning_rate": 1.6363024006046945e-08, "loss": 0.4348, "step": 7849 }, { "epoch": 3.8946257912374334, "grad_norm": 0.07302725303638385, "learning_rate": 1.6205394282186037e-08, "loss": 0.4585, "step": 7850 }, { "epoch": 3.8951222539406727, "grad_norm": 0.07479123755996081, "learning_rate": 1.6048526251421502e-08, "loss": 0.4697, "step": 7851 }, { "epoch": 3.895618716643912, "grad_norm": 0.0714460020972371, "learning_rate": 1.589241993772861e-08, "loss": 0.4547, "step": 7852 }, { "epoch": 3.8961151793471513, "grad_norm": 0.06989138523119054, "learning_rate": 1.573707536496494e-08, "loss": 0.4448, "step": 7853 }, { "epoch": 3.896611642050391, "grad_norm": 0.07408706106103906, "learning_rate": 1.5582492556872608e-08, "loss": 0.4332, "step": 7854 }, { "epoch": 3.8971081047536305, "grad_norm": 0.07365674527289237, "learning_rate": 1.5428671537077168e-08, "loss": 0.4464, "step": 7855 }, { "epoch": 3.89760456745687, "grad_norm": 0.07171203833147921, "learning_rate": 1.527561232908814e-08, "loss": 0.4478, "step": 7856 }, { "epoch": 3.898101030160109, "grad_norm": 0.07137804282473109, "learning_rate": 1.5123314956297375e-08, "loss": 0.4175, "step": 7857 }, { "epoch": 3.898597492863349, "grad_norm": 0.07128942518964267, "learning_rate": 1.4971779441981804e-08, "loss": 0.4415, "step": 7858 }, { "epoch": 3.899093955566588, "grad_norm": 0.07515082405610274, "learning_rate": 1.4821005809300681e-08, "loss": 0.4675, "step": 7859 }, { "epoch": 3.8995904182698276, "grad_norm": 0.07434300726999438, "learning_rate": 1.4670994081297796e-08, "loss": 0.4691, "step": 7860 }, { "epoch": 3.900086880973067, "grad_norm": 0.07227320230556432, "learning_rate": 1.4521744280899808e-08, "loss": 0.4602, "step": 7861 }, { "epoch": 3.900583343676306, "grad_norm": 0.07159390508729566, "learning_rate": 1.4373256430916805e-08, "loss": 0.4324, "step": 7862 }, { "epoch": 3.9010798063795455, "grad_norm": 0.07127322741170937, "learning_rate": 1.4225530554043409e-08, "loss": 0.4531, "step": 7863 }, { "epoch": 3.9015762690827853, "grad_norm": 0.07397431432659492, "learning_rate": 1.4078566672856564e-08, "loss": 0.4535, "step": 7864 }, { "epoch": 3.9020727317860247, "grad_norm": 0.07726727419027592, "learning_rate": 1.3932364809817745e-08, "loss": 0.4815, "step": 7865 }, { "epoch": 3.902569194489264, "grad_norm": 0.07289550295671977, "learning_rate": 1.3786924987271299e-08, "loss": 0.4266, "step": 7866 }, { "epoch": 3.9030656571925033, "grad_norm": 0.07308451132181172, "learning_rate": 1.3642247227446114e-08, "loss": 0.4157, "step": 7867 }, { "epoch": 3.903562119895743, "grad_norm": 0.07590479426568846, "learning_rate": 1.3498331552452837e-08, "loss": 0.4762, "step": 7868 }, { "epoch": 3.9040585825989824, "grad_norm": 0.06921959548820725, "learning_rate": 1.3355177984287205e-08, "loss": 0.4036, "step": 7869 }, { "epoch": 3.9045550453022217, "grad_norm": 0.07118954201021534, "learning_rate": 1.3212786544827828e-08, "loss": 0.4435, "step": 7870 }, { "epoch": 3.905051508005461, "grad_norm": 0.07403036754736678, "learning_rate": 1.3071157255836742e-08, "loss": 0.4469, "step": 7871 }, { "epoch": 3.9055479707087004, "grad_norm": 0.07467206775977153, "learning_rate": 1.2930290138960522e-08, "loss": 0.4566, "step": 7872 }, { "epoch": 3.9060444334119397, "grad_norm": 0.07396114789472796, "learning_rate": 1.2790185215727501e-08, "loss": 0.4306, "step": 7873 }, { "epoch": 3.906540896115179, "grad_norm": 0.07240021119883902, "learning_rate": 1.2650842507550554e-08, "loss": 0.4314, "step": 7874 }, { "epoch": 3.907037358818419, "grad_norm": 0.07189908371677901, "learning_rate": 1.251226203572653e-08, "loss": 0.4538, "step": 7875 }, { "epoch": 3.907533821521658, "grad_norm": 0.07336991053621658, "learning_rate": 1.2374443821435156e-08, "loss": 0.4604, "step": 7876 }, { "epoch": 3.9080302842248975, "grad_norm": 0.0720966340794103, "learning_rate": 1.2237387885739582e-08, "loss": 0.4556, "step": 7877 }, { "epoch": 3.9085267469281373, "grad_norm": 0.07273980475231961, "learning_rate": 1.2101094249585832e-08, "loss": 0.4215, "step": 7878 }, { "epoch": 3.9090232096313766, "grad_norm": 0.0743906178360643, "learning_rate": 1.1965562933805575e-08, "loss": 0.4663, "step": 7879 }, { "epoch": 3.909519672334616, "grad_norm": 0.07354772952353111, "learning_rate": 1.1830793959112241e-08, "loss": 0.4241, "step": 7880 }, { "epoch": 3.9100161350378553, "grad_norm": 0.07363233034652644, "learning_rate": 1.1696787346102134e-08, "loss": 0.4644, "step": 7881 }, { "epoch": 3.9105125977410946, "grad_norm": 0.0752133492026765, "learning_rate": 1.1563543115257203e-08, "loss": 0.4649, "step": 7882 }, { "epoch": 3.911009060444334, "grad_norm": 0.07207085299674393, "learning_rate": 1.1431061286941159e-08, "loss": 0.438, "step": 7883 }, { "epoch": 3.9115055231475733, "grad_norm": 0.07235157876028925, "learning_rate": 1.1299341881401693e-08, "loss": 0.4298, "step": 7884 }, { "epoch": 3.912001985850813, "grad_norm": 0.06988345938969277, "learning_rate": 1.1168384918769926e-08, "loss": 0.4237, "step": 7885 }, { "epoch": 3.9124984485540524, "grad_norm": 0.07545700325415562, "learning_rate": 1.1038190419060957e-08, "loss": 0.4584, "step": 7886 }, { "epoch": 3.9129949112572917, "grad_norm": 0.07367556891620859, "learning_rate": 1.090875840217276e-08, "loss": 0.4421, "step": 7887 }, { "epoch": 3.9134913739605315, "grad_norm": 0.07316765299975153, "learning_rate": 1.078008888788673e-08, "loss": 0.4282, "step": 7888 }, { "epoch": 3.913987836663771, "grad_norm": 0.07406156551740832, "learning_rate": 1.0652181895867697e-08, "loss": 0.4556, "step": 7889 }, { "epoch": 3.91448429936701, "grad_norm": 0.07194509976912991, "learning_rate": 1.052503744566502e-08, "loss": 0.4311, "step": 7890 }, { "epoch": 3.9149807620702495, "grad_norm": 0.07084174131860045, "learning_rate": 1.0398655556709824e-08, "loss": 0.4387, "step": 7891 }, { "epoch": 3.915477224773489, "grad_norm": 0.07592270875366632, "learning_rate": 1.0273036248318325e-08, "loss": 0.4898, "step": 7892 }, { "epoch": 3.915973687476728, "grad_norm": 0.07159994780886023, "learning_rate": 1.0148179539689051e-08, "loss": 0.4386, "step": 7893 }, { "epoch": 3.9164701501799675, "grad_norm": 0.07249131497488212, "learning_rate": 1.0024085449903964e-08, "loss": 0.459, "step": 7894 }, { "epoch": 3.9169666128832072, "grad_norm": 0.07215634562957392, "learning_rate": 9.900753997929557e-09, "loss": 0.4351, "step": 7895 }, { "epoch": 3.9174630755864466, "grad_norm": 0.07306229934044586, "learning_rate": 9.778185202614643e-09, "loss": 0.4542, "step": 7896 }, { "epoch": 3.917959538289686, "grad_norm": 0.07464935978086107, "learning_rate": 9.656379082692014e-09, "loss": 0.4696, "step": 7897 }, { "epoch": 3.9184560009929257, "grad_norm": 0.06959882315202705, "learning_rate": 9.535335656777333e-09, "loss": 0.4045, "step": 7898 }, { "epoch": 3.918952463696165, "grad_norm": 0.07189424265721825, "learning_rate": 9.41505494337136e-09, "loss": 0.444, "step": 7899 }, { "epoch": 3.9194489263994043, "grad_norm": 0.07512861736253441, "learning_rate": 9.295536960856055e-09, "loss": 0.4615, "step": 7900 }, { "epoch": 3.9199453891026437, "grad_norm": 0.07184217453560283, "learning_rate": 9.176781727497919e-09, "loss": 0.449, "step": 7901 }, { "epoch": 3.920441851805883, "grad_norm": 0.0692935480523205, "learning_rate": 9.058789261446876e-09, "loss": 0.4199, "step": 7902 }, { "epoch": 3.9209383145091223, "grad_norm": 0.07158569849881712, "learning_rate": 8.94155958073628e-09, "loss": 0.4446, "step": 7903 }, { "epoch": 3.9214347772123617, "grad_norm": 0.07128555959690813, "learning_rate": 8.825092703282912e-09, "loss": 0.4417, "step": 7904 }, { "epoch": 3.9219312399156014, "grad_norm": 0.07386013608300344, "learning_rate": 8.709388646886419e-09, "loss": 0.4383, "step": 7905 }, { "epoch": 3.9224277026188408, "grad_norm": 0.07077271842701878, "learning_rate": 8.59444742923099e-09, "loss": 0.4483, "step": 7906 }, { "epoch": 3.92292416532208, "grad_norm": 0.07214791377825751, "learning_rate": 8.480269067882574e-09, "loss": 0.4524, "step": 7907 }, { "epoch": 3.9234206280253194, "grad_norm": 0.07498863312685188, "learning_rate": 8.366853580292767e-09, "loss": 0.4706, "step": 7908 }, { "epoch": 3.923917090728559, "grad_norm": 0.07559995398531838, "learning_rate": 8.254200983794369e-09, "loss": 0.4325, "step": 7909 }, { "epoch": 3.9244135534317985, "grad_norm": 0.07226155025300252, "learning_rate": 8.14231129560472e-09, "loss": 0.4312, "step": 7910 }, { "epoch": 3.924910016135038, "grad_norm": 0.07127750193403352, "learning_rate": 8.031184532824588e-09, "loss": 0.4491, "step": 7911 }, { "epoch": 3.925406478838277, "grad_norm": 0.07415159903823959, "learning_rate": 7.920820712437604e-09, "loss": 0.4306, "step": 7912 }, { "epoch": 3.9259029415415165, "grad_norm": 0.07378377183619132, "learning_rate": 7.811219851311392e-09, "loss": 0.4494, "step": 7913 }, { "epoch": 3.926399404244756, "grad_norm": 0.07275277293764515, "learning_rate": 7.702381966196437e-09, "loss": 0.4556, "step": 7914 }, { "epoch": 3.9268958669479956, "grad_norm": 0.0746244877378339, "learning_rate": 7.594307073727214e-09, "loss": 0.434, "step": 7915 }, { "epoch": 3.927392329651235, "grad_norm": 0.07584813299523505, "learning_rate": 7.48699519042051e-09, "loss": 0.4589, "step": 7916 }, { "epoch": 3.9278887923544743, "grad_norm": 0.07282040520441282, "learning_rate": 7.380446332678204e-09, "loss": 0.4531, "step": 7917 }, { "epoch": 3.9283852550577136, "grad_norm": 0.07229666000116906, "learning_rate": 7.274660516783938e-09, "loss": 0.4335, "step": 7918 }, { "epoch": 3.9288817177609534, "grad_norm": 0.07213635388094723, "learning_rate": 7.16963775890589e-09, "loss": 0.4284, "step": 7919 }, { "epoch": 3.9293781804641927, "grad_norm": 0.0734265619388905, "learning_rate": 7.0653780750945534e-09, "loss": 0.4937, "step": 7920 }, { "epoch": 3.929874643167432, "grad_norm": 0.07242700143100422, "learning_rate": 6.961881481284405e-09, "loss": 0.4483, "step": 7921 }, { "epoch": 3.9303711058706714, "grad_norm": 0.07132732254668113, "learning_rate": 6.859147993293347e-09, "loss": 0.4248, "step": 7922 }, { "epoch": 3.9308675685739107, "grad_norm": 0.0735034320107039, "learning_rate": 6.757177626822709e-09, "loss": 0.4509, "step": 7923 }, { "epoch": 3.93136403127715, "grad_norm": 0.07314542671884339, "learning_rate": 6.655970397457245e-09, "loss": 0.4424, "step": 7924 }, { "epoch": 3.93186049398039, "grad_norm": 0.0711518739203714, "learning_rate": 6.555526320664029e-09, "loss": 0.4151, "step": 7925 }, { "epoch": 3.932356956683629, "grad_norm": 0.07142566153152452, "learning_rate": 6.455845411795225e-09, "loss": 0.4423, "step": 7926 }, { "epoch": 3.9328534193868685, "grad_norm": 0.07246197419030367, "learning_rate": 6.356927686084757e-09, "loss": 0.4286, "step": 7927 }, { "epoch": 3.933349882090108, "grad_norm": 0.0735996074037667, "learning_rate": 6.258773158650533e-09, "loss": 0.4461, "step": 7928 }, { "epoch": 3.9338463447933476, "grad_norm": 0.07195374563717467, "learning_rate": 6.161381844494995e-09, "loss": 0.404, "step": 7929 }, { "epoch": 3.934342807496587, "grad_norm": 0.07236629384914703, "learning_rate": 6.0647537585017956e-09, "loss": 0.447, "step": 7930 }, { "epoch": 3.9348392701998263, "grad_norm": 0.07330477178938324, "learning_rate": 5.968888915439675e-09, "loss": 0.4312, "step": 7931 }, { "epoch": 3.9353357329030656, "grad_norm": 0.07297599221536687, "learning_rate": 5.873787329959135e-09, "loss": 0.4588, "step": 7932 }, { "epoch": 3.935832195606305, "grad_norm": 0.0725622873374946, "learning_rate": 5.779449016595773e-09, "loss": 0.4248, "step": 7933 }, { "epoch": 3.9363286583095443, "grad_norm": 0.07113219855206085, "learning_rate": 5.685873989767499e-09, "loss": 0.414, "step": 7934 }, { "epoch": 3.936825121012784, "grad_norm": 0.07443343638570427, "learning_rate": 5.593062263775095e-09, "loss": 0.4223, "step": 7935 }, { "epoch": 3.9373215837160234, "grad_norm": 0.07409075328129311, "learning_rate": 5.501013852804438e-09, "loss": 0.4621, "step": 7936 }, { "epoch": 3.9378180464192627, "grad_norm": 0.07382741390740961, "learning_rate": 5.409728770923162e-09, "loss": 0.4646, "step": 7937 }, { "epoch": 3.938314509122502, "grad_norm": 0.07174126251091134, "learning_rate": 5.31920703208233e-09, "loss": 0.4315, "step": 7938 }, { "epoch": 3.938810971825742, "grad_norm": 0.07545133094403188, "learning_rate": 5.2294486501175415e-09, "loss": 0.4343, "step": 7939 }, { "epoch": 3.939307434528981, "grad_norm": 0.0738239885507223, "learning_rate": 5.140453638746156e-09, "loss": 0.434, "step": 7940 }, { "epoch": 3.9398038972322205, "grad_norm": 0.07164997663670053, "learning_rate": 5.052222011570074e-09, "loss": 0.4165, "step": 7941 }, { "epoch": 3.94030035993546, "grad_norm": 0.07283733967363415, "learning_rate": 4.964753782073506e-09, "loss": 0.4388, "step": 7942 }, { "epoch": 3.940796822638699, "grad_norm": 0.0739064888698837, "learning_rate": 4.878048963625759e-09, "loss": 0.4847, "step": 7943 }, { "epoch": 3.9412932853419385, "grad_norm": 0.07381267380019754, "learning_rate": 4.792107569476789e-09, "loss": 0.4413, "step": 7944 }, { "epoch": 3.9417897480451782, "grad_norm": 0.07393797317527814, "learning_rate": 4.706929612762756e-09, "loss": 0.4765, "step": 7945 }, { "epoch": 3.9422862107484176, "grad_norm": 0.07426952582219104, "learning_rate": 4.6225151065004695e-09, "loss": 0.474, "step": 7946 }, { "epoch": 3.942782673451657, "grad_norm": 0.07352326988482331, "learning_rate": 4.5388640635923855e-09, "loss": 0.4547, "step": 7947 }, { "epoch": 3.9432791361548962, "grad_norm": 0.07224179319878705, "learning_rate": 4.455976496822723e-09, "loss": 0.4379, "step": 7948 }, { "epoch": 3.943775598858136, "grad_norm": 0.07259290717504588, "learning_rate": 4.373852418859681e-09, "loss": 0.467, "step": 7949 }, { "epoch": 3.9442720615613753, "grad_norm": 0.07293484053231977, "learning_rate": 4.292491842254331e-09, "loss": 0.4172, "step": 7950 }, { "epoch": 3.9447685242646147, "grad_norm": 0.07146552534187618, "learning_rate": 4.211894779441727e-09, "loss": 0.4531, "step": 7951 }, { "epoch": 3.945264986967854, "grad_norm": 0.07296431475149752, "learning_rate": 4.1320612427397935e-09, "loss": 0.429, "step": 7952 }, { "epoch": 3.9457614496710933, "grad_norm": 0.07229619087165308, "learning_rate": 4.0529912443493246e-09, "loss": 0.445, "step": 7953 }, { "epoch": 3.9462579123743327, "grad_norm": 0.07254327033190938, "learning_rate": 3.974684796355099e-09, "loss": 0.4445, "step": 7954 }, { "epoch": 3.9467543750775724, "grad_norm": 0.07171593333450604, "learning_rate": 3.897141910725321e-09, "loss": 0.4538, "step": 7955 }, { "epoch": 3.9472508377808118, "grad_norm": 0.07196674522189404, "learning_rate": 3.820362599311067e-09, "loss": 0.4271, "step": 7956 }, { "epoch": 3.947747300484051, "grad_norm": 0.07219094545893444, "learning_rate": 3.744346873846838e-09, "loss": 0.4191, "step": 7957 }, { "epoch": 3.9482437631872904, "grad_norm": 0.07204485023562479, "learning_rate": 3.669094745950008e-09, "loss": 0.4072, "step": 7958 }, { "epoch": 3.94874022589053, "grad_norm": 0.07363711105033145, "learning_rate": 3.594606227121933e-09, "loss": 0.4216, "step": 7959 }, { "epoch": 3.9492366885937695, "grad_norm": 0.07456848670081903, "learning_rate": 3.520881328747394e-09, "loss": 0.4366, "step": 7960 }, { "epoch": 3.949733151297009, "grad_norm": 0.07378103181258867, "learning_rate": 3.4479200620934904e-09, "loss": 0.4531, "step": 7961 }, { "epoch": 3.950229614000248, "grad_norm": 0.07390658868890232, "learning_rate": 3.375722438311302e-09, "loss": 0.4638, "step": 7962 }, { "epoch": 3.9507260767034875, "grad_norm": 0.07300728961397632, "learning_rate": 3.304288468435335e-09, "loss": 0.4514, "step": 7963 }, { "epoch": 3.951222539406727, "grad_norm": 0.07023027048085913, "learning_rate": 3.2336181633829676e-09, "loss": 0.4128, "step": 7964 }, { "epoch": 3.9517190021099666, "grad_norm": 0.07505027569492374, "learning_rate": 3.16371153395445e-09, "loss": 0.4764, "step": 7965 }, { "epoch": 3.952215464813206, "grad_norm": 0.073450603120353, "learning_rate": 3.094568590835123e-09, "loss": 0.4575, "step": 7966 }, { "epoch": 3.9527119275164453, "grad_norm": 0.07273796979259504, "learning_rate": 3.0261893445915346e-09, "loss": 0.4542, "step": 7967 }, { "epoch": 3.9532083902196846, "grad_norm": 0.07073293634698218, "learning_rate": 2.958573805674214e-09, "loss": 0.4398, "step": 7968 }, { "epoch": 3.9537048529229244, "grad_norm": 0.07475951665447415, "learning_rate": 2.8917219844176724e-09, "loss": 0.4416, "step": 7969 }, { "epoch": 3.9542013156261637, "grad_norm": 0.07299123908045202, "learning_rate": 2.825633891039292e-09, "loss": 0.466, "step": 7970 }, { "epoch": 3.954697778329403, "grad_norm": 0.0726272322838916, "learning_rate": 2.760309535638772e-09, "loss": 0.4298, "step": 7971 }, { "epoch": 3.9551942410326424, "grad_norm": 0.07061022132016685, "learning_rate": 2.6957489281997927e-09, "loss": 0.4039, "step": 7972 }, { "epoch": 3.9556907037358817, "grad_norm": 0.07486006728424369, "learning_rate": 2.631952078590572e-09, "loss": 0.4831, "step": 7973 }, { "epoch": 3.956187166439121, "grad_norm": 0.0732371630222974, "learning_rate": 2.568918996560532e-09, "loss": 0.4318, "step": 7974 }, { "epoch": 3.956683629142361, "grad_norm": 0.07101131445407827, "learning_rate": 2.5066496917436346e-09, "loss": 0.4261, "step": 7975 }, { "epoch": 3.9571800918456, "grad_norm": 0.06824274012821278, "learning_rate": 2.4451441736567106e-09, "loss": 0.4106, "step": 7976 }, { "epoch": 3.9576765545488395, "grad_norm": 0.07220652300245425, "learning_rate": 2.3844024516994634e-09, "loss": 0.4322, "step": 7977 }, { "epoch": 3.958173017252079, "grad_norm": 0.07315711417550587, "learning_rate": 2.3244245351561334e-09, "loss": 0.4572, "step": 7978 }, { "epoch": 3.9586694799553186, "grad_norm": 0.07367506621638123, "learning_rate": 2.2652104331921664e-09, "loss": 0.4404, "step": 7979 }, { "epoch": 3.959165942658558, "grad_norm": 0.0733571536697855, "learning_rate": 2.206760154858656e-09, "loss": 0.4415, "step": 7980 }, { "epoch": 3.9596624053617973, "grad_norm": 0.07267523211050667, "learning_rate": 2.149073709088456e-09, "loss": 0.4413, "step": 7981 }, { "epoch": 3.9601588680650366, "grad_norm": 0.07303058441794463, "learning_rate": 2.0921511046978482e-09, "loss": 0.4389, "step": 7982 }, { "epoch": 3.960655330768276, "grad_norm": 0.07206286293004117, "learning_rate": 2.0359923503859844e-09, "loss": 0.4429, "step": 7983 }, { "epoch": 3.9611517934715152, "grad_norm": 0.07229480870759866, "learning_rate": 1.98059745473711e-09, "loss": 0.4496, "step": 7984 }, { "epoch": 3.961648256174755, "grad_norm": 0.07330560265167406, "learning_rate": 1.9259664262166745e-09, "loss": 0.4469, "step": 7985 }, { "epoch": 3.9621447188779944, "grad_norm": 0.07347536565439004, "learning_rate": 1.8720992731741104e-09, "loss": 0.4503, "step": 7986 }, { "epoch": 3.9626411815812337, "grad_norm": 0.06805756827226955, "learning_rate": 1.818996003842277e-09, "loss": 0.4025, "step": 7987 }, { "epoch": 3.963137644284473, "grad_norm": 0.07707538276879311, "learning_rate": 1.7666566263374596e-09, "loss": 0.5152, "step": 7988 }, { "epoch": 3.963634106987713, "grad_norm": 0.07205588748513372, "learning_rate": 1.7150811486582597e-09, "loss": 0.4058, "step": 7989 }, { "epoch": 3.964130569690952, "grad_norm": 0.07140882741949466, "learning_rate": 1.664269578687261e-09, "loss": 0.4337, "step": 7990 }, { "epoch": 3.9646270323941915, "grad_norm": 0.07212114622615089, "learning_rate": 1.6142219241910284e-09, "loss": 0.4594, "step": 7991 }, { "epoch": 3.965123495097431, "grad_norm": 0.07371435390405398, "learning_rate": 1.5649381928173335e-09, "loss": 0.4353, "step": 7992 }, { "epoch": 3.96561995780067, "grad_norm": 0.07281281216816414, "learning_rate": 1.5164183920995946e-09, "loss": 0.4208, "step": 7993 }, { "epoch": 3.9661164205039094, "grad_norm": 0.07163297655742279, "learning_rate": 1.4686625294524359e-09, "loss": 0.4264, "step": 7994 }, { "epoch": 3.966612883207149, "grad_norm": 0.07175362399757045, "learning_rate": 1.421670612175019e-09, "loss": 0.4193, "step": 7995 }, { "epoch": 3.9671093459103886, "grad_norm": 0.07426390382442767, "learning_rate": 1.3754426474488213e-09, "loss": 0.4553, "step": 7996 }, { "epoch": 3.967605808613628, "grad_norm": 0.07549104779425102, "learning_rate": 1.3299786423393025e-09, "loss": 0.434, "step": 7997 }, { "epoch": 3.968102271316867, "grad_norm": 0.07037254959583114, "learning_rate": 1.285278603795348e-09, "loss": 0.4229, "step": 7998 }, { "epoch": 3.968598734020107, "grad_norm": 0.07276741874477172, "learning_rate": 1.2413425386481604e-09, "loss": 0.4719, "step": 7999 }, { "epoch": 3.9690951967233463, "grad_norm": 0.07322622193035082, "learning_rate": 1.1981704536129234e-09, "loss": 0.4335, "step": 8000 }, { "epoch": 3.9695916594265857, "grad_norm": 0.07225963682092588, "learning_rate": 1.1557623552871378e-09, "loss": 0.4515, "step": 8001 }, { "epoch": 3.970088122129825, "grad_norm": 0.06965915089959167, "learning_rate": 1.1141182501533954e-09, "loss": 0.4038, "step": 8002 }, { "epoch": 3.9705845848330643, "grad_norm": 0.0702457996988929, "learning_rate": 1.0732381445749396e-09, "loss": 0.4679, "step": 8003 }, { "epoch": 3.9710810475363036, "grad_norm": 0.07355844745120076, "learning_rate": 1.0331220448006606e-09, "loss": 0.4619, "step": 8004 }, { "epoch": 3.9715775102395434, "grad_norm": 0.07306605760434766, "learning_rate": 9.937699569617654e-10, "loss": 0.4395, "step": 8005 }, { "epoch": 3.9720739729427827, "grad_norm": 0.07274946485326253, "learning_rate": 9.551818870712214e-10, "loss": 0.4733, "step": 8006 }, { "epoch": 3.972570435646022, "grad_norm": 0.07309474485727571, "learning_rate": 9.173578410281992e-10, "loss": 0.4631, "step": 8007 }, { "epoch": 3.9730668983492614, "grad_norm": 0.0725914055312676, "learning_rate": 8.802978246130745e-10, "loss": 0.4182, "step": 8008 }, { "epoch": 3.973563361052501, "grad_norm": 0.0739704011459376, "learning_rate": 8.440018434890951e-10, "loss": 0.4589, "step": 8009 }, { "epoch": 3.9740598237557405, "grad_norm": 0.07236480524788812, "learning_rate": 8.084699032040455e-10, "loss": 0.4535, "step": 8010 }, { "epoch": 3.97455628645898, "grad_norm": 0.07415395830044395, "learning_rate": 7.737020091885816e-10, "loss": 0.4642, "step": 8011 }, { "epoch": 3.975052749162219, "grad_norm": 0.07422221753321762, "learning_rate": 7.396981667562308e-10, "loss": 0.47, "step": 8012 }, { "epoch": 3.9755492118654585, "grad_norm": 0.07579519395700349, "learning_rate": 7.064583811039472e-10, "loss": 0.4164, "step": 8013 }, { "epoch": 3.976045674568698, "grad_norm": 0.07189369806693677, "learning_rate": 6.739826573121111e-10, "loss": 0.4366, "step": 8014 }, { "epoch": 3.976542137271937, "grad_norm": 0.07405554121755314, "learning_rate": 6.422710003439747e-10, "loss": 0.4445, "step": 8015 }, { "epoch": 3.977038599975177, "grad_norm": 0.07085365042841145, "learning_rate": 6.113234150462166e-10, "loss": 0.4333, "step": 8016 }, { "epoch": 3.9775350626784163, "grad_norm": 0.07432592377837327, "learning_rate": 5.811399061478318e-10, "loss": 0.4393, "step": 8017 }, { "epoch": 3.9780315253816556, "grad_norm": 0.07604342053211292, "learning_rate": 5.517204782634622e-10, "loss": 0.428, "step": 8018 }, { "epoch": 3.9785279880848954, "grad_norm": 0.07578805782078543, "learning_rate": 5.230651358878458e-10, "loss": 0.4992, "step": 8019 }, { "epoch": 3.9790244507881347, "grad_norm": 0.07353741636356836, "learning_rate": 4.951738834019226e-10, "loss": 0.4536, "step": 8020 }, { "epoch": 3.979520913491374, "grad_norm": 0.07402907843843444, "learning_rate": 4.680467250672837e-10, "loss": 0.4799, "step": 8021 }, { "epoch": 3.9800173761946134, "grad_norm": 0.07183089412737431, "learning_rate": 4.416836650300571e-10, "loss": 0.44, "step": 8022 }, { "epoch": 3.9805138388978527, "grad_norm": 0.07147423691021713, "learning_rate": 4.160847073203522e-10, "loss": 0.4499, "step": 8023 }, { "epoch": 3.981010301601092, "grad_norm": 0.07213831950971986, "learning_rate": 3.912498558494848e-10, "loss": 0.4279, "step": 8024 }, { "epoch": 3.9815067643043314, "grad_norm": 0.0728362024466595, "learning_rate": 3.671791144133074e-10, "loss": 0.4278, "step": 8025 }, { "epoch": 3.982003227007571, "grad_norm": 0.07470742291399306, "learning_rate": 3.438724866910992e-10, "loss": 0.4686, "step": 8026 }, { "epoch": 3.9824996897108105, "grad_norm": 0.07293936423797455, "learning_rate": 3.213299762444555e-10, "loss": 0.4369, "step": 8027 }, { "epoch": 3.98299615241405, "grad_norm": 0.07378429528311958, "learning_rate": 2.995515865183984e-10, "loss": 0.426, "step": 8028 }, { "epoch": 3.9834926151172896, "grad_norm": 0.07632103507293397, "learning_rate": 2.7853732084248687e-10, "loss": 0.4461, "step": 8029 }, { "epoch": 3.983989077820529, "grad_norm": 0.0726281969225929, "learning_rate": 2.5828718242693064e-10, "loss": 0.4328, "step": 8030 }, { "epoch": 3.9844855405237682, "grad_norm": 0.07534194717846308, "learning_rate": 2.3880117436814176e-10, "loss": 0.4417, "step": 8031 }, { "epoch": 3.9849820032270076, "grad_norm": 0.0765137544246794, "learning_rate": 2.200792996431833e-10, "loss": 0.4883, "step": 8032 }, { "epoch": 3.985478465930247, "grad_norm": 0.07125139382665711, "learning_rate": 2.0212156111365512e-10, "loss": 0.4274, "step": 8033 }, { "epoch": 3.9859749286334862, "grad_norm": 0.07279854809314834, "learning_rate": 1.849279615240285e-10, "loss": 0.4246, "step": 8034 }, { "epoch": 3.9864713913367256, "grad_norm": 0.07217756295907013, "learning_rate": 1.6849850350275643e-10, "loss": 0.4515, "step": 8035 }, { "epoch": 3.9869678540399653, "grad_norm": 0.07128111812935736, "learning_rate": 1.528331895600532e-10, "loss": 0.4098, "step": 8036 }, { "epoch": 3.9874643167432047, "grad_norm": 0.07351939432494661, "learning_rate": 1.3793202209011481e-10, "loss": 0.4502, "step": 8037 }, { "epoch": 3.987960779446444, "grad_norm": 0.07180638250150356, "learning_rate": 1.2379500337056372e-10, "loss": 0.4431, "step": 8038 }, { "epoch": 3.988457242149684, "grad_norm": 0.07389007991886898, "learning_rate": 1.1042213556244907e-10, "loss": 0.4535, "step": 8039 }, { "epoch": 3.988953704852923, "grad_norm": 0.07322600834668609, "learning_rate": 9.781342070913635e-11, "loss": 0.4267, "step": 8040 }, { "epoch": 3.9894501675561624, "grad_norm": 0.07292913398722783, "learning_rate": 8.596886073741761e-11, "loss": 0.4277, "step": 8041 }, { "epoch": 3.9899466302594018, "grad_norm": 0.07133143471334574, "learning_rate": 7.48884574575115e-11, "loss": 0.4277, "step": 8042 }, { "epoch": 3.990443092962641, "grad_norm": 0.07206100534353532, "learning_rate": 6.457221256361834e-11, "loss": 0.4427, "step": 8043 }, { "epoch": 3.9909395556658804, "grad_norm": 0.06961673178845708, "learning_rate": 5.502012763225484e-11, "loss": 0.4377, "step": 8044 }, { "epoch": 3.9914360183691198, "grad_norm": 0.07234539655461314, "learning_rate": 4.623220412280916e-11, "loss": 0.4542, "step": 8045 }, { "epoch": 3.9919324810723595, "grad_norm": 0.07365297569379811, "learning_rate": 3.820844337865115e-11, "loss": 0.4656, "step": 8046 }, { "epoch": 3.992428943775599, "grad_norm": 0.07182593616667256, "learning_rate": 3.094884662602215e-11, "loss": 0.4482, "step": 8047 }, { "epoch": 3.992925406478838, "grad_norm": 0.07176683987772409, "learning_rate": 2.4453414974034972e-11, "loss": 0.428, "step": 8048 }, { "epoch": 3.9934218691820775, "grad_norm": 0.07117401113949938, "learning_rate": 1.872214941633921e-11, "loss": 0.4418, "step": 8049 }, { "epoch": 3.9939183318853173, "grad_norm": 0.07501244871652403, "learning_rate": 1.3755050828345717e-11, "loss": 0.4443, "step": 8050 }, { "epoch": 3.9944147945885566, "grad_norm": 0.071852960560573, "learning_rate": 9.552119968891937e-12, "loss": 0.445, "step": 8051 }, { "epoch": 3.994911257291796, "grad_norm": 0.070757041234484, "learning_rate": 6.1133574807969995e-12, "loss": 0.4219, "step": 8052 }, { "epoch": 3.9954077199950353, "grad_norm": 0.07282539751959509, "learning_rate": 3.4387638897515065e-12, "loss": 0.4521, "step": 8053 }, { "epoch": 3.9959041826982746, "grad_norm": 0.07154126903717001, "learning_rate": 1.5283396037624188e-12, "loss": 0.4225, "step": 8054 }, { "epoch": 3.996400645401514, "grad_norm": 0.07092968323664142, "learning_rate": 3.82084915373504e-13, "loss": 0.4317, "step": 8055 }, { "epoch": 3.9968971081047537, "grad_norm": 0.07410447458205609, "learning_rate": 0.0, "loss": 0.4481, "step": 8056 } ], "logging_steps": 1, "max_steps": 8056, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 2014, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6115693053345792.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }